{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.932860169163922, "eval_steps": 5000000.0, "global_step": 500000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.865720338327844e-05, "grad_norm": 0.144638329744339, "learning_rate": 2e-06, "loss": 2.3034, "step": 10 }, { "epoch": 7.731440676655688e-05, "grad_norm": 0.11194495111703873, "learning_rate": 4e-06, "loss": 2.309, "step": 20 }, { "epoch": 0.00011597161014983532, "grad_norm": 0.10528688132762909, "learning_rate": 6e-06, "loss": 2.2992, "step": 30 }, { "epoch": 0.00015462881353311376, "grad_norm": 0.09728169441223145, "learning_rate": 8e-06, "loss": 2.3064, "step": 40 }, { "epoch": 0.0001932860169163922, "grad_norm": 0.09796875715255737, "learning_rate": 1e-05, "loss": 2.3046, "step": 50 }, { "epoch": 0.00023194322029967065, "grad_norm": 0.09509941190481186, "learning_rate": 1.2e-05, "loss": 2.2819, "step": 60 }, { "epoch": 0.0002706004236829491, "grad_norm": 0.09584011882543564, "learning_rate": 1.4e-05, "loss": 2.3002, "step": 70 }, { "epoch": 0.0003092576270662275, "grad_norm": 0.09579554200172424, "learning_rate": 1.6e-05, "loss": 2.279, "step": 80 }, { "epoch": 0.000347914830449506, "grad_norm": 0.09664827585220337, "learning_rate": 1.8e-05, "loss": 2.3028, "step": 90 }, { "epoch": 0.0003865720338327844, "grad_norm": 0.0957031100988388, "learning_rate": 2e-05, "loss": 2.2977, "step": 100 }, { "epoch": 0.0004252292372160628, "grad_norm": 0.09676313400268555, "learning_rate": 2.2e-05, "loss": 2.2983, "step": 110 }, { "epoch": 0.0004638864405993413, "grad_norm": 0.0992269441485405, "learning_rate": 2.4e-05, "loss": 2.2717, "step": 120 }, { "epoch": 0.0005025436439826198, "grad_norm": 0.11956988275051117, "learning_rate": 2.6e-05, "loss": 2.2923, "step": 130 }, { "epoch": 0.0005412008473658982, "grad_norm": 0.09714414924383163, "learning_rate": 2.8e-05, "loss": 2.2973, "step": 140 }, { "epoch": 0.0005798580507491766, "grad_norm": 0.09981994330883026, "learning_rate": 3e-05, "loss": 2.2948, "step": 150 }, { "epoch": 0.000618515254132455, "grad_norm": 0.09482458233833313, "learning_rate": 3.2e-05, "loss": 2.28, "step": 160 }, { "epoch": 0.0006571724575157334, "grad_norm": 0.10037554055452347, "learning_rate": 3.4000000000000007e-05, "loss": 2.2722, "step": 170 }, { "epoch": 0.000695829660899012, "grad_norm": 0.09889110177755356, "learning_rate": 3.6e-05, "loss": 2.288, "step": 180 }, { "epoch": 0.0007344868642822904, "grad_norm": 0.09977748990058899, "learning_rate": 3.8e-05, "loss": 2.2815, "step": 190 }, { "epoch": 0.0007731440676655688, "grad_norm": 0.10139628499746323, "learning_rate": 4e-05, "loss": 2.2811, "step": 200 }, { "epoch": 0.0008118012710488472, "grad_norm": 0.09862680733203888, "learning_rate": 4.2000000000000004e-05, "loss": 2.2776, "step": 210 }, { "epoch": 0.0008504584744321256, "grad_norm": 0.09958124905824661, "learning_rate": 4.4e-05, "loss": 2.2652, "step": 220 }, { "epoch": 0.0008891156778154042, "grad_norm": 0.10025738924741745, "learning_rate": 4.6e-05, "loss": 2.2808, "step": 230 }, { "epoch": 0.0009277728811986826, "grad_norm": 0.09997476637363434, "learning_rate": 4.8e-05, "loss": 2.2766, "step": 240 }, { "epoch": 0.000966430084581961, "grad_norm": 0.10078670084476471, "learning_rate": 5e-05, "loss": 2.2808, "step": 250 }, { "epoch": 0.0010050872879652395, "grad_norm": 0.09915891289710999, "learning_rate": 5.2e-05, "loss": 2.2705, "step": 260 }, { "epoch": 0.001043744491348518, "grad_norm": 0.09878283739089966, "learning_rate": 5.4e-05, "loss": 2.2642, "step": 270 }, { "epoch": 0.0010824016947317964, "grad_norm": 0.09960974007844925, "learning_rate": 5.6e-05, "loss": 2.2696, "step": 280 }, { "epoch": 0.0011210588981150748, "grad_norm": 0.10109972953796387, "learning_rate": 5.800000000000001e-05, "loss": 2.2802, "step": 290 }, { "epoch": 0.0011597161014983532, "grad_norm": 0.1021229699254036, "learning_rate": 6e-05, "loss": 2.2745, "step": 300 }, { "epoch": 0.0011983733048816316, "grad_norm": 0.10506097227334976, "learning_rate": 6.2e-05, "loss": 2.2848, "step": 310 }, { "epoch": 0.00123703050826491, "grad_norm": 0.10662543773651123, "learning_rate": 6.4e-05, "loss": 2.2669, "step": 320 }, { "epoch": 0.0012756877116481885, "grad_norm": 0.11067084223031998, "learning_rate": 6.6e-05, "loss": 2.2745, "step": 330 }, { "epoch": 0.0013143449150314669, "grad_norm": 0.10717500746250153, "learning_rate": 6.800000000000001e-05, "loss": 2.2709, "step": 340 }, { "epoch": 0.0013530021184147453, "grad_norm": 0.10758475214242935, "learning_rate": 7.000000000000001e-05, "loss": 2.2624, "step": 350 }, { "epoch": 0.001391659321798024, "grad_norm": 0.10585075616836548, "learning_rate": 7.2e-05, "loss": 2.2588, "step": 360 }, { "epoch": 0.0014303165251813024, "grad_norm": 0.10558781772851944, "learning_rate": 7.4e-05, "loss": 2.2673, "step": 370 }, { "epoch": 0.0014689737285645808, "grad_norm": 0.1085430234670639, "learning_rate": 7.6e-05, "loss": 2.2711, "step": 380 }, { "epoch": 0.0015076309319478592, "grad_norm": 0.10785538703203201, "learning_rate": 7.8e-05, "loss": 2.2614, "step": 390 }, { "epoch": 0.0015462881353311376, "grad_norm": 0.10400920361280441, "learning_rate": 8e-05, "loss": 2.2781, "step": 400 }, { "epoch": 0.001584945338714416, "grad_norm": 0.10345766693353653, "learning_rate": 8.2e-05, "loss": 2.2441, "step": 410 }, { "epoch": 0.0016236025420976945, "grad_norm": 0.1062808483839035, "learning_rate": 8.400000000000001e-05, "loss": 2.2522, "step": 420 }, { "epoch": 0.0016622597454809729, "grad_norm": 0.11519643664360046, "learning_rate": 8.599999999999999e-05, "loss": 2.262, "step": 430 }, { "epoch": 0.0017009169488642513, "grad_norm": 0.10753437131643295, "learning_rate": 8.8e-05, "loss": 2.261, "step": 440 }, { "epoch": 0.0017395741522475297, "grad_norm": 0.11565092951059341, "learning_rate": 8.999999999999999e-05, "loss": 2.2629, "step": 450 }, { "epoch": 0.0017782313556308083, "grad_norm": 0.11193150281906128, "learning_rate": 9.2e-05, "loss": 2.2495, "step": 460 }, { "epoch": 0.0018168885590140868, "grad_norm": 0.10999926179647446, "learning_rate": 9.400000000000001e-05, "loss": 2.2677, "step": 470 }, { "epoch": 0.0018555457623973652, "grad_norm": 0.1156141385436058, "learning_rate": 9.6e-05, "loss": 2.2475, "step": 480 }, { "epoch": 0.0018942029657806436, "grad_norm": 0.10972867161035538, "learning_rate": 9.800000000000001e-05, "loss": 2.2666, "step": 490 }, { "epoch": 0.001932860169163922, "grad_norm": 0.1137310117483139, "learning_rate": 0.0001, "loss": 2.2476, "step": 500 }, { "epoch": 0.0019715173725472004, "grad_norm": 0.11586374044418335, "learning_rate": 0.000102, "loss": 2.2718, "step": 510 }, { "epoch": 0.002010174575930479, "grad_norm": 0.10975225269794464, "learning_rate": 0.000104, "loss": 2.2705, "step": 520 }, { "epoch": 0.0020488317793137573, "grad_norm": 0.1140342503786087, "learning_rate": 0.000106, "loss": 2.2574, "step": 530 }, { "epoch": 0.002087488982697036, "grad_norm": 0.11401575058698654, "learning_rate": 0.000108, "loss": 2.2646, "step": 540 }, { "epoch": 0.002126146186080314, "grad_norm": 0.11033317446708679, "learning_rate": 0.00011, "loss": 2.2426, "step": 550 }, { "epoch": 0.0021648033894635928, "grad_norm": 0.12125371396541595, "learning_rate": 0.000112, "loss": 2.2444, "step": 560 }, { "epoch": 0.002203460592846871, "grad_norm": 0.11703993380069733, "learning_rate": 0.000114, "loss": 2.2463, "step": 570 }, { "epoch": 0.0022421177962301496, "grad_norm": 0.11883124709129333, "learning_rate": 0.00011600000000000001, "loss": 2.243, "step": 580 }, { "epoch": 0.002280774999613428, "grad_norm": 0.12158622592687607, "learning_rate": 0.000118, "loss": 2.2576, "step": 590 }, { "epoch": 0.0023194322029967064, "grad_norm": 0.11580552160739899, "learning_rate": 0.00012, "loss": 2.2597, "step": 600 }, { "epoch": 0.0023580894063799846, "grad_norm": 0.12047332525253296, "learning_rate": 0.000122, "loss": 2.2684, "step": 610 }, { "epoch": 0.0023967466097632633, "grad_norm": 0.11461437493562698, "learning_rate": 0.000124, "loss": 2.259, "step": 620 }, { "epoch": 0.002435403813146542, "grad_norm": 0.12357478588819504, "learning_rate": 0.000126, "loss": 2.2549, "step": 630 }, { "epoch": 0.00247406101652982, "grad_norm": 0.11461521685123444, "learning_rate": 0.000128, "loss": 2.2527, "step": 640 }, { "epoch": 0.0025127182199130987, "grad_norm": 0.12529653310775757, "learning_rate": 0.00013000000000000002, "loss": 2.2535, "step": 650 }, { "epoch": 0.002551375423296377, "grad_norm": 0.12418084591627121, "learning_rate": 0.000132, "loss": 2.2578, "step": 660 }, { "epoch": 0.0025900326266796556, "grad_norm": 0.11342219263315201, "learning_rate": 0.000134, "loss": 2.2488, "step": 670 }, { "epoch": 0.0026286898300629338, "grad_norm": 0.12109920382499695, "learning_rate": 0.00013600000000000003, "loss": 2.2513, "step": 680 }, { "epoch": 0.0026673470334462124, "grad_norm": 0.1217515617609024, "learning_rate": 0.00013800000000000002, "loss": 2.2513, "step": 690 }, { "epoch": 0.0027060042368294906, "grad_norm": 0.12609773874282837, "learning_rate": 0.00014000000000000001, "loss": 2.2452, "step": 700 }, { "epoch": 0.0027446614402127693, "grad_norm": 0.12247970700263977, "learning_rate": 0.00014199999999999998, "loss": 2.2418, "step": 710 }, { "epoch": 0.002783318643596048, "grad_norm": 0.12363874912261963, "learning_rate": 0.000144, "loss": 2.2465, "step": 720 }, { "epoch": 0.002821975846979326, "grad_norm": 0.12146373093128204, "learning_rate": 0.000146, "loss": 2.2627, "step": 730 }, { "epoch": 0.0028606330503626047, "grad_norm": 0.12576045095920563, "learning_rate": 0.000148, "loss": 2.2658, "step": 740 }, { "epoch": 0.002899290253745883, "grad_norm": 0.1205977275967598, "learning_rate": 0.00015, "loss": 2.2444, "step": 750 }, { "epoch": 0.0029379474571291616, "grad_norm": 0.12153708934783936, "learning_rate": 0.000152, "loss": 2.2632, "step": 760 }, { "epoch": 0.0029766046605124398, "grad_norm": 0.11591946333646774, "learning_rate": 0.000154, "loss": 2.2524, "step": 770 }, { "epoch": 0.0030152618638957184, "grad_norm": 0.12641796469688416, "learning_rate": 0.000156, "loss": 2.2611, "step": 780 }, { "epoch": 0.0030539190672789966, "grad_norm": 0.12534596025943756, "learning_rate": 0.000158, "loss": 2.2364, "step": 790 }, { "epoch": 0.0030925762706622752, "grad_norm": 0.1186433732509613, "learning_rate": 0.00016, "loss": 2.253, "step": 800 }, { "epoch": 0.0031312334740455534, "grad_norm": 0.12480587512254715, "learning_rate": 0.000162, "loss": 2.2434, "step": 810 }, { "epoch": 0.003169890677428832, "grad_norm": 0.1353996843099594, "learning_rate": 0.000164, "loss": 2.2472, "step": 820 }, { "epoch": 0.0032085478808121107, "grad_norm": 0.12390103936195374, "learning_rate": 0.00016600000000000002, "loss": 2.258, "step": 830 }, { "epoch": 0.003247205084195389, "grad_norm": 0.1263820379972458, "learning_rate": 0.00016800000000000002, "loss": 2.2552, "step": 840 }, { "epoch": 0.0032858622875786675, "grad_norm": 0.12677688896656036, "learning_rate": 0.00017, "loss": 2.233, "step": 850 }, { "epoch": 0.0033245194909619457, "grad_norm": 0.12664929032325745, "learning_rate": 0.00017199999999999998, "loss": 2.2527, "step": 860 }, { "epoch": 0.0033631766943452244, "grad_norm": 0.14130549132823944, "learning_rate": 0.000174, "loss": 2.253, "step": 870 }, { "epoch": 0.0034018338977285026, "grad_norm": 0.13592848181724548, "learning_rate": 0.000176, "loss": 2.2475, "step": 880 }, { "epoch": 0.0034404911011117812, "grad_norm": 0.1275721788406372, "learning_rate": 0.000178, "loss": 2.2585, "step": 890 }, { "epoch": 0.0034791483044950594, "grad_norm": 0.12690509855747223, "learning_rate": 0.00017999999999999998, "loss": 2.2525, "step": 900 }, { "epoch": 0.003517805507878338, "grad_norm": 0.12834276258945465, "learning_rate": 0.000182, "loss": 2.2622, "step": 910 }, { "epoch": 0.0035564627112616167, "grad_norm": 0.13139639794826508, "learning_rate": 0.000184, "loss": 2.2527, "step": 920 }, { "epoch": 0.003595119914644895, "grad_norm": 0.13028880953788757, "learning_rate": 0.000186, "loss": 2.2579, "step": 930 }, { "epoch": 0.0036337771180281735, "grad_norm": 0.14631029963493347, "learning_rate": 0.00018800000000000002, "loss": 2.2659, "step": 940 }, { "epoch": 0.0036724343214114517, "grad_norm": 0.1378420740365982, "learning_rate": 0.00019, "loss": 2.257, "step": 950 }, { "epoch": 0.0037110915247947304, "grad_norm": 0.12957599759101868, "learning_rate": 0.000192, "loss": 2.2533, "step": 960 }, { "epoch": 0.0037497487281780086, "grad_norm": 0.12839531898498535, "learning_rate": 0.000194, "loss": 2.2618, "step": 970 }, { "epoch": 0.003788405931561287, "grad_norm": 0.13206636905670166, "learning_rate": 0.00019600000000000002, "loss": 2.2564, "step": 980 }, { "epoch": 0.0038270631349445654, "grad_norm": 0.13995692133903503, "learning_rate": 0.00019800000000000002, "loss": 2.2503, "step": 990 }, { "epoch": 0.003865720338327844, "grad_norm": 0.13417716324329376, "learning_rate": 0.0002, "loss": 2.2494, "step": 1000 }, { "epoch": 0.0039043775417111222, "grad_norm": 0.13489042222499847, "learning_rate": 0.000202, "loss": 2.2506, "step": 1010 }, { "epoch": 0.003943034745094401, "grad_norm": 0.139221653342247, "learning_rate": 0.000204, "loss": 2.2493, "step": 1020 }, { "epoch": 0.003981691948477679, "grad_norm": 0.14685332775115967, "learning_rate": 0.000206, "loss": 2.252, "step": 1030 }, { "epoch": 0.004020349151860958, "grad_norm": 0.1311921775341034, "learning_rate": 0.000208, "loss": 2.2559, "step": 1040 }, { "epoch": 0.004059006355244236, "grad_norm": 0.13858993351459503, "learning_rate": 0.00021, "loss": 2.2518, "step": 1050 }, { "epoch": 0.0040976635586275146, "grad_norm": 0.15868879854679108, "learning_rate": 0.000212, "loss": 2.2542, "step": 1060 }, { "epoch": 0.004136320762010793, "grad_norm": 0.13859611749649048, "learning_rate": 0.000214, "loss": 2.2512, "step": 1070 }, { "epoch": 0.004174977965394072, "grad_norm": 0.12724171578884125, "learning_rate": 0.000216, "loss": 2.2692, "step": 1080 }, { "epoch": 0.00421363516877735, "grad_norm": 0.13106082379817963, "learning_rate": 0.000218, "loss": 2.2533, "step": 1090 }, { "epoch": 0.004252292372160628, "grad_norm": 0.1482999473810196, "learning_rate": 0.00022, "loss": 2.2379, "step": 1100 }, { "epoch": 0.0042909495755439064, "grad_norm": 0.1275726705789566, "learning_rate": 0.000222, "loss": 2.254, "step": 1110 }, { "epoch": 0.0043296067789271855, "grad_norm": 0.13930654525756836, "learning_rate": 0.000224, "loss": 2.2566, "step": 1120 }, { "epoch": 0.004368263982310464, "grad_norm": 0.13760654628276825, "learning_rate": 0.00022600000000000002, "loss": 2.2536, "step": 1130 }, { "epoch": 0.004406921185693742, "grad_norm": 0.13762839138507843, "learning_rate": 0.000228, "loss": 2.2569, "step": 1140 }, { "epoch": 0.004445578389077021, "grad_norm": 0.14481611549854279, "learning_rate": 0.00023, "loss": 2.2445, "step": 1150 }, { "epoch": 0.004484235592460299, "grad_norm": 0.13288311660289764, "learning_rate": 0.00023200000000000003, "loss": 2.246, "step": 1160 }, { "epoch": 0.004522892795843577, "grad_norm": 0.15648561716079712, "learning_rate": 0.00023400000000000002, "loss": 2.248, "step": 1170 }, { "epoch": 0.004561549999226856, "grad_norm": 0.13135381042957306, "learning_rate": 0.000236, "loss": 2.2464, "step": 1180 }, { "epoch": 0.004600207202610135, "grad_norm": 0.14945846796035767, "learning_rate": 0.00023799999999999998, "loss": 2.2538, "step": 1190 }, { "epoch": 0.004638864405993413, "grad_norm": 0.1418386697769165, "learning_rate": 0.00024, "loss": 2.2351, "step": 1200 }, { "epoch": 0.004677521609376691, "grad_norm": 0.1434290111064911, "learning_rate": 0.000242, "loss": 2.263, "step": 1210 }, { "epoch": 0.004716178812759969, "grad_norm": 0.14787034690380096, "learning_rate": 0.000244, "loss": 2.2561, "step": 1220 }, { "epoch": 0.004754836016143248, "grad_norm": 0.13952895998954773, "learning_rate": 0.000246, "loss": 2.2382, "step": 1230 }, { "epoch": 0.0047934932195265265, "grad_norm": 0.14909480512142181, "learning_rate": 0.000248, "loss": 2.2535, "step": 1240 }, { "epoch": 0.004832150422909805, "grad_norm": 0.1471477746963501, "learning_rate": 0.00025, "loss": 2.2703, "step": 1250 }, { "epoch": 0.004870807626293084, "grad_norm": 0.14406970143318176, "learning_rate": 0.000252, "loss": 2.2445, "step": 1260 }, { "epoch": 0.004909464829676362, "grad_norm": 0.14098721742630005, "learning_rate": 0.000254, "loss": 2.2494, "step": 1270 }, { "epoch": 0.00494812203305964, "grad_norm": 0.14734388887882233, "learning_rate": 0.000256, "loss": 2.2402, "step": 1280 }, { "epoch": 0.004986779236442918, "grad_norm": 0.14964799582958221, "learning_rate": 0.00025800000000000004, "loss": 2.2578, "step": 1290 }, { "epoch": 0.0050254364398261975, "grad_norm": 0.15391208231449127, "learning_rate": 0.00026000000000000003, "loss": 2.2633, "step": 1300 }, { "epoch": 0.005064093643209476, "grad_norm": 0.152483731508255, "learning_rate": 0.000262, "loss": 2.2457, "step": 1310 }, { "epoch": 0.005102750846592754, "grad_norm": 0.1448708474636078, "learning_rate": 0.000264, "loss": 2.2566, "step": 1320 }, { "epoch": 0.005141408049976033, "grad_norm": 0.15382276475429535, "learning_rate": 0.000266, "loss": 2.2623, "step": 1330 }, { "epoch": 0.005180065253359311, "grad_norm": 0.15795128047466278, "learning_rate": 0.000268, "loss": 2.2504, "step": 1340 }, { "epoch": 0.005218722456742589, "grad_norm": 0.14650534093379974, "learning_rate": 0.00027, "loss": 2.2556, "step": 1350 }, { "epoch": 0.0052573796601258676, "grad_norm": 0.15081769227981567, "learning_rate": 0.00027200000000000005, "loss": 2.2551, "step": 1360 }, { "epoch": 0.005296036863509147, "grad_norm": 0.1477501541376114, "learning_rate": 0.00027400000000000005, "loss": 2.2585, "step": 1370 }, { "epoch": 0.005334694066892425, "grad_norm": 0.16164270043373108, "learning_rate": 0.00027600000000000004, "loss": 2.2643, "step": 1380 }, { "epoch": 0.005373351270275703, "grad_norm": 0.15059034526348114, "learning_rate": 0.00027800000000000004, "loss": 2.2505, "step": 1390 }, { "epoch": 0.005412008473658981, "grad_norm": 0.14349956810474396, "learning_rate": 0.00028000000000000003, "loss": 2.264, "step": 1400 }, { "epoch": 0.00545066567704226, "grad_norm": 0.14197014272212982, "learning_rate": 0.00028199999999999997, "loss": 2.2582, "step": 1410 }, { "epoch": 0.0054893228804255385, "grad_norm": 0.1571299135684967, "learning_rate": 0.00028399999999999996, "loss": 2.2598, "step": 1420 }, { "epoch": 0.005527980083808817, "grad_norm": 0.14361920952796936, "learning_rate": 0.00028599999999999996, "loss": 2.2426, "step": 1430 }, { "epoch": 0.005566637287192096, "grad_norm": 0.14507678151130676, "learning_rate": 0.000288, "loss": 2.2576, "step": 1440 }, { "epoch": 0.005605294490575374, "grad_norm": 0.1388143002986908, "learning_rate": 0.00029, "loss": 2.2526, "step": 1450 }, { "epoch": 0.005643951693958652, "grad_norm": 0.15597301721572876, "learning_rate": 0.000292, "loss": 2.2675, "step": 1460 }, { "epoch": 0.00568260889734193, "grad_norm": 0.13945583999156952, "learning_rate": 0.000294, "loss": 2.2717, "step": 1470 }, { "epoch": 0.0057212661007252094, "grad_norm": 0.1599157154560089, "learning_rate": 0.000296, "loss": 2.256, "step": 1480 }, { "epoch": 0.005759923304108488, "grad_norm": 0.14406763017177582, "learning_rate": 0.000298, "loss": 2.2692, "step": 1490 }, { "epoch": 0.005798580507491766, "grad_norm": 0.15903228521347046, "learning_rate": 0.0003, "loss": 2.2734, "step": 1500 }, { "epoch": 0.005837237710875044, "grad_norm": 0.14652226865291595, "learning_rate": 0.000302, "loss": 2.2475, "step": 1510 }, { "epoch": 0.005875894914258323, "grad_norm": 0.16651105880737305, "learning_rate": 0.000304, "loss": 2.2459, "step": 1520 }, { "epoch": 0.005914552117641601, "grad_norm": 0.15857480466365814, "learning_rate": 0.000306, "loss": 2.2536, "step": 1530 }, { "epoch": 0.0059532093210248795, "grad_norm": 0.1656733900308609, "learning_rate": 0.000308, "loss": 2.2597, "step": 1540 }, { "epoch": 0.005991866524408159, "grad_norm": 0.1586008071899414, "learning_rate": 0.00031, "loss": 2.2641, "step": 1550 }, { "epoch": 0.006030523727791437, "grad_norm": 0.15786172449588776, "learning_rate": 0.000312, "loss": 2.2672, "step": 1560 }, { "epoch": 0.006069180931174715, "grad_norm": 0.15883037447929382, "learning_rate": 0.000314, "loss": 2.2475, "step": 1570 }, { "epoch": 0.006107838134557993, "grad_norm": 0.16221074759960175, "learning_rate": 0.000316, "loss": 2.258, "step": 1580 }, { "epoch": 0.006146495337941272, "grad_norm": 0.14376521110534668, "learning_rate": 0.00031800000000000003, "loss": 2.2619, "step": 1590 }, { "epoch": 0.0061851525413245505, "grad_norm": 0.1429993212223053, "learning_rate": 0.00032, "loss": 2.2329, "step": 1600 }, { "epoch": 0.006223809744707829, "grad_norm": 0.15341663360595703, "learning_rate": 0.000322, "loss": 2.2501, "step": 1610 }, { "epoch": 0.006262466948091107, "grad_norm": 0.15319260954856873, "learning_rate": 0.000324, "loss": 2.2727, "step": 1620 }, { "epoch": 0.006301124151474386, "grad_norm": 0.16039767861366272, "learning_rate": 0.000326, "loss": 2.2525, "step": 1630 }, { "epoch": 0.006339781354857664, "grad_norm": 0.1588415652513504, "learning_rate": 0.000328, "loss": 2.2577, "step": 1640 }, { "epoch": 0.006378438558240942, "grad_norm": 0.1502636820077896, "learning_rate": 0.00033, "loss": 2.2542, "step": 1650 }, { "epoch": 0.006417095761624221, "grad_norm": 0.1684446483850479, "learning_rate": 0.00033200000000000005, "loss": 2.2632, "step": 1660 }, { "epoch": 0.0064557529650075, "grad_norm": 0.13977859914302826, "learning_rate": 0.00033400000000000004, "loss": 2.2662, "step": 1670 }, { "epoch": 0.006494410168390778, "grad_norm": 0.14510294795036316, "learning_rate": 0.00033600000000000004, "loss": 2.2705, "step": 1680 }, { "epoch": 0.006533067371774056, "grad_norm": 0.16292671859264374, "learning_rate": 0.00033800000000000003, "loss": 2.2546, "step": 1690 }, { "epoch": 0.006571724575157335, "grad_norm": 0.15019813179969788, "learning_rate": 0.00034, "loss": 2.2561, "step": 1700 }, { "epoch": 0.006610381778540613, "grad_norm": 0.1569780558347702, "learning_rate": 0.000342, "loss": 2.2519, "step": 1710 }, { "epoch": 0.0066490389819238915, "grad_norm": 0.16433066129684448, "learning_rate": 0.00034399999999999996, "loss": 2.2739, "step": 1720 }, { "epoch": 0.006687696185307171, "grad_norm": 0.1681896299123764, "learning_rate": 0.000346, "loss": 2.2628, "step": 1730 }, { "epoch": 0.006726353388690449, "grad_norm": 0.17025180160999298, "learning_rate": 0.000348, "loss": 2.262, "step": 1740 }, { "epoch": 0.006765010592073727, "grad_norm": 0.15026843547821045, "learning_rate": 0.00035, "loss": 2.2624, "step": 1750 }, { "epoch": 0.006803667795457005, "grad_norm": 0.1481882929801941, "learning_rate": 0.000352, "loss": 2.2602, "step": 1760 }, { "epoch": 0.006842324998840284, "grad_norm": 0.16018210351467133, "learning_rate": 0.000354, "loss": 2.2603, "step": 1770 }, { "epoch": 0.0068809822022235624, "grad_norm": 0.15643790364265442, "learning_rate": 0.000356, "loss": 2.2671, "step": 1780 }, { "epoch": 0.006919639405606841, "grad_norm": 0.1506490260362625, "learning_rate": 0.000358, "loss": 2.2785, "step": 1790 }, { "epoch": 0.006958296608990119, "grad_norm": 0.15134088695049286, "learning_rate": 0.00035999999999999997, "loss": 2.2866, "step": 1800 }, { "epoch": 0.006996953812373398, "grad_norm": 0.16592784225940704, "learning_rate": 0.000362, "loss": 2.2632, "step": 1810 }, { "epoch": 0.007035611015756676, "grad_norm": 0.1507008671760559, "learning_rate": 0.000364, "loss": 2.2678, "step": 1820 }, { "epoch": 0.007074268219139954, "grad_norm": 0.15520262718200684, "learning_rate": 0.000366, "loss": 2.2733, "step": 1830 }, { "epoch": 0.007112925422523233, "grad_norm": 0.1662900447845459, "learning_rate": 0.000368, "loss": 2.2644, "step": 1840 }, { "epoch": 0.007151582625906512, "grad_norm": 0.17302747070789337, "learning_rate": 0.00037, "loss": 2.265, "step": 1850 }, { "epoch": 0.00719023982928979, "grad_norm": 0.167618989944458, "learning_rate": 0.000372, "loss": 2.2652, "step": 1860 }, { "epoch": 0.007228897032673068, "grad_norm": 0.1645369678735733, "learning_rate": 0.000374, "loss": 2.2806, "step": 1870 }, { "epoch": 0.007267554236056347, "grad_norm": 0.16747227311134338, "learning_rate": 0.00037600000000000003, "loss": 2.2729, "step": 1880 }, { "epoch": 0.007306211439439625, "grad_norm": 0.17343640327453613, "learning_rate": 0.000378, "loss": 2.2777, "step": 1890 }, { "epoch": 0.0073448686428229035, "grad_norm": 0.1752873957157135, "learning_rate": 0.00038, "loss": 2.2629, "step": 1900 }, { "epoch": 0.007383525846206182, "grad_norm": 0.17244476079940796, "learning_rate": 0.000382, "loss": 2.2598, "step": 1910 }, { "epoch": 0.007422183049589461, "grad_norm": 0.1787695288658142, "learning_rate": 0.000384, "loss": 2.2656, "step": 1920 }, { "epoch": 0.007460840252972739, "grad_norm": 0.15853258967399597, "learning_rate": 0.000386, "loss": 2.2673, "step": 1930 }, { "epoch": 0.007499497456356017, "grad_norm": 0.1486985981464386, "learning_rate": 0.000388, "loss": 2.2751, "step": 1940 }, { "epoch": 0.007538154659739296, "grad_norm": 0.1753663718700409, "learning_rate": 0.00039000000000000005, "loss": 2.2727, "step": 1950 }, { "epoch": 0.007576811863122574, "grad_norm": 0.18662576377391815, "learning_rate": 0.00039200000000000004, "loss": 2.2633, "step": 1960 }, { "epoch": 0.007615469066505853, "grad_norm": 0.1988459974527359, "learning_rate": 0.00039400000000000004, "loss": 2.2634, "step": 1970 }, { "epoch": 0.007654126269889131, "grad_norm": 0.17339687049388885, "learning_rate": 0.00039600000000000003, "loss": 2.2757, "step": 1980 }, { "epoch": 0.00769278347327241, "grad_norm": 0.18046623468399048, "learning_rate": 0.000398, "loss": 2.2807, "step": 1990 }, { "epoch": 0.007731440676655688, "grad_norm": 0.16285806894302368, "learning_rate": 0.0004, "loss": 2.261, "step": 2000 }, { "epoch": 0.007770097880038966, "grad_norm": 0.17096810042858124, "learning_rate": 0.000402, "loss": 2.2811, "step": 2010 }, { "epoch": 0.0078087550834222445, "grad_norm": 0.1719149798154831, "learning_rate": 0.000404, "loss": 2.2868, "step": 2020 }, { "epoch": 0.007847412286805524, "grad_norm": 0.17674361169338226, "learning_rate": 0.00040600000000000006, "loss": 2.2721, "step": 2030 }, { "epoch": 0.007886069490188802, "grad_norm": 0.15781450271606445, "learning_rate": 0.000408, "loss": 2.283, "step": 2040 }, { "epoch": 0.00792472669357208, "grad_norm": 0.19582943618297577, "learning_rate": 0.00041, "loss": 2.2681, "step": 2050 }, { "epoch": 0.007963383896955358, "grad_norm": 0.166877880692482, "learning_rate": 0.000412, "loss": 2.2836, "step": 2060 }, { "epoch": 0.008002041100338636, "grad_norm": 0.17595386505126953, "learning_rate": 0.000414, "loss": 2.2562, "step": 2070 }, { "epoch": 0.008040698303721916, "grad_norm": 0.17867539823055267, "learning_rate": 0.000416, "loss": 2.2676, "step": 2080 }, { "epoch": 0.008079355507105195, "grad_norm": 0.16842317581176758, "learning_rate": 0.00041799999999999997, "loss": 2.2731, "step": 2090 }, { "epoch": 0.008118012710488473, "grad_norm": 0.1676853597164154, "learning_rate": 0.00042, "loss": 2.2834, "step": 2100 }, { "epoch": 0.008156669913871751, "grad_norm": 0.16343624889850616, "learning_rate": 0.000422, "loss": 2.2692, "step": 2110 }, { "epoch": 0.008195327117255029, "grad_norm": 0.18455322086811066, "learning_rate": 0.000424, "loss": 2.2817, "step": 2120 }, { "epoch": 0.008233984320638307, "grad_norm": 0.16690371930599213, "learning_rate": 0.000426, "loss": 2.2858, "step": 2130 }, { "epoch": 0.008272641524021586, "grad_norm": 0.20441211760044098, "learning_rate": 0.000428, "loss": 2.2803, "step": 2140 }, { "epoch": 0.008311298727404865, "grad_norm": 0.1717919260263443, "learning_rate": 0.00043, "loss": 2.2571, "step": 2150 }, { "epoch": 0.008349955930788144, "grad_norm": 0.20094193518161774, "learning_rate": 0.000432, "loss": 2.2691, "step": 2160 }, { "epoch": 0.008388613134171422, "grad_norm": 0.17125524580478668, "learning_rate": 0.00043400000000000003, "loss": 2.2859, "step": 2170 }, { "epoch": 0.0084272703375547, "grad_norm": 0.18455886840820312, "learning_rate": 0.000436, "loss": 2.2732, "step": 2180 }, { "epoch": 0.008465927540937978, "grad_norm": 0.20173197984695435, "learning_rate": 0.000438, "loss": 2.2928, "step": 2190 }, { "epoch": 0.008504584744321256, "grad_norm": 0.18156647682189941, "learning_rate": 0.00044, "loss": 2.2725, "step": 2200 }, { "epoch": 0.008543241947704535, "grad_norm": 0.1819404810667038, "learning_rate": 0.000442, "loss": 2.2693, "step": 2210 }, { "epoch": 0.008581899151087813, "grad_norm": 0.18304607272148132, "learning_rate": 0.000444, "loss": 2.2843, "step": 2220 }, { "epoch": 0.008620556354471093, "grad_norm": 0.15833936631679535, "learning_rate": 0.000446, "loss": 2.2818, "step": 2230 }, { "epoch": 0.008659213557854371, "grad_norm": 0.1906866580247879, "learning_rate": 0.000448, "loss": 2.264, "step": 2240 }, { "epoch": 0.00869787076123765, "grad_norm": 0.16470417380332947, "learning_rate": 0.00045000000000000004, "loss": 2.2943, "step": 2250 }, { "epoch": 0.008736527964620927, "grad_norm": 0.16799645125865936, "learning_rate": 0.00045200000000000004, "loss": 2.2778, "step": 2260 }, { "epoch": 0.008775185168004206, "grad_norm": 0.1809621900320053, "learning_rate": 0.00045400000000000003, "loss": 2.284, "step": 2270 }, { "epoch": 0.008813842371387484, "grad_norm": 0.18328256905078888, "learning_rate": 0.000456, "loss": 2.272, "step": 2280 }, { "epoch": 0.008852499574770762, "grad_norm": 0.22645479440689087, "learning_rate": 0.000458, "loss": 2.2805, "step": 2290 }, { "epoch": 0.008891156778154042, "grad_norm": 0.21596001088619232, "learning_rate": 0.00046, "loss": 2.2835, "step": 2300 }, { "epoch": 0.00892981398153732, "grad_norm": 0.16236698627471924, "learning_rate": 0.000462, "loss": 2.2845, "step": 2310 }, { "epoch": 0.008968471184920598, "grad_norm": 0.16047120094299316, "learning_rate": 0.00046400000000000006, "loss": 2.2648, "step": 2320 }, { "epoch": 0.009007128388303877, "grad_norm": 0.1953253298997879, "learning_rate": 0.00046600000000000005, "loss": 2.2981, "step": 2330 }, { "epoch": 0.009045785591687155, "grad_norm": 0.16764451563358307, "learning_rate": 0.00046800000000000005, "loss": 2.2999, "step": 2340 }, { "epoch": 0.009084442795070433, "grad_norm": 0.18153171241283417, "learning_rate": 0.00047, "loss": 2.2778, "step": 2350 }, { "epoch": 0.009123099998453711, "grad_norm": 0.1875174194574356, "learning_rate": 0.000472, "loss": 2.2867, "step": 2360 }, { "epoch": 0.009161757201836991, "grad_norm": 0.18244752287864685, "learning_rate": 0.000474, "loss": 2.2849, "step": 2370 }, { "epoch": 0.00920041440522027, "grad_norm": 0.15618745982646942, "learning_rate": 0.00047599999999999997, "loss": 2.2917, "step": 2380 }, { "epoch": 0.009239071608603548, "grad_norm": 0.1807391494512558, "learning_rate": 0.00047799999999999996, "loss": 2.2907, "step": 2390 }, { "epoch": 0.009277728811986826, "grad_norm": 0.1593099981546402, "learning_rate": 0.00048, "loss": 2.2669, "step": 2400 }, { "epoch": 0.009316386015370104, "grad_norm": 0.17011666297912598, "learning_rate": 0.000482, "loss": 2.2749, "step": 2410 }, { "epoch": 0.009355043218753382, "grad_norm": 0.19175320863723755, "learning_rate": 0.000484, "loss": 2.2909, "step": 2420 }, { "epoch": 0.00939370042213666, "grad_norm": 0.19758890569210052, "learning_rate": 0.000486, "loss": 2.2949, "step": 2430 }, { "epoch": 0.009432357625519939, "grad_norm": 0.16885755956172943, "learning_rate": 0.000488, "loss": 2.2786, "step": 2440 }, { "epoch": 0.009471014828903218, "grad_norm": 0.16191567480564117, "learning_rate": 0.00049, "loss": 2.2946, "step": 2450 }, { "epoch": 0.009509672032286497, "grad_norm": 0.15698394179344177, "learning_rate": 0.000492, "loss": 2.2832, "step": 2460 }, { "epoch": 0.009548329235669775, "grad_norm": 0.19043174386024475, "learning_rate": 0.000494, "loss": 2.2739, "step": 2470 }, { "epoch": 0.009586986439053053, "grad_norm": 0.18678732216358185, "learning_rate": 0.000496, "loss": 2.2879, "step": 2480 }, { "epoch": 0.009625643642436331, "grad_norm": 0.21114246547222137, "learning_rate": 0.000498, "loss": 2.2851, "step": 2490 }, { "epoch": 0.00966430084581961, "grad_norm": 0.17600609362125397, "learning_rate": 0.0005, "loss": 2.2906, "step": 2500 }, { "epoch": 0.009702958049202888, "grad_norm": 0.1817324459552765, "learning_rate": 0.0005020000000000001, "loss": 2.2937, "step": 2510 }, { "epoch": 0.009741615252586168, "grad_norm": 0.19554492831230164, "learning_rate": 0.000504, "loss": 2.2969, "step": 2520 }, { "epoch": 0.009780272455969446, "grad_norm": 0.1822926104068756, "learning_rate": 0.000506, "loss": 2.2901, "step": 2530 }, { "epoch": 0.009818929659352724, "grad_norm": 0.19118402898311615, "learning_rate": 0.000508, "loss": 2.2959, "step": 2540 }, { "epoch": 0.009857586862736002, "grad_norm": 0.1897551566362381, "learning_rate": 0.00051, "loss": 2.2919, "step": 2550 }, { "epoch": 0.00989624406611928, "grad_norm": 0.17411769926548004, "learning_rate": 0.000512, "loss": 2.2832, "step": 2560 }, { "epoch": 0.009934901269502559, "grad_norm": 0.16486330330371857, "learning_rate": 0.000514, "loss": 2.2867, "step": 2570 }, { "epoch": 0.009973558472885837, "grad_norm": 0.18757574260234833, "learning_rate": 0.0005160000000000001, "loss": 2.3017, "step": 2580 }, { "epoch": 0.010012215676269117, "grad_norm": 0.17420315742492676, "learning_rate": 0.000518, "loss": 2.2916, "step": 2590 }, { "epoch": 0.010050872879652395, "grad_norm": 0.1847243458032608, "learning_rate": 0.0005200000000000001, "loss": 2.2889, "step": 2600 }, { "epoch": 0.010089530083035673, "grad_norm": 0.23134857416152954, "learning_rate": 0.000522, "loss": 2.2994, "step": 2610 }, { "epoch": 0.010128187286418951, "grad_norm": 0.250232458114624, "learning_rate": 0.000524, "loss": 2.2847, "step": 2620 }, { "epoch": 0.01016684448980223, "grad_norm": 0.2091963291168213, "learning_rate": 0.000526, "loss": 2.2928, "step": 2630 }, { "epoch": 0.010205501693185508, "grad_norm": 0.1857314556837082, "learning_rate": 0.000528, "loss": 2.3083, "step": 2640 }, { "epoch": 0.010244158896568786, "grad_norm": 0.20166365802288055, "learning_rate": 0.0005300000000000001, "loss": 2.2853, "step": 2650 }, { "epoch": 0.010282816099952066, "grad_norm": 0.1892065852880478, "learning_rate": 0.000532, "loss": 2.2838, "step": 2660 }, { "epoch": 0.010321473303335344, "grad_norm": 0.20674696564674377, "learning_rate": 0.0005340000000000001, "loss": 2.3009, "step": 2670 }, { "epoch": 0.010360130506718622, "grad_norm": 0.19799582660198212, "learning_rate": 0.000536, "loss": 2.3038, "step": 2680 }, { "epoch": 0.0103987877101019, "grad_norm": 0.19329093396663666, "learning_rate": 0.0005380000000000001, "loss": 2.2932, "step": 2690 }, { "epoch": 0.010437444913485179, "grad_norm": 0.21683421730995178, "learning_rate": 0.00054, "loss": 2.3102, "step": 2700 }, { "epoch": 0.010476102116868457, "grad_norm": 0.16719898581504822, "learning_rate": 0.0005420000000000001, "loss": 2.3057, "step": 2710 }, { "epoch": 0.010514759320251735, "grad_norm": 0.17605605721473694, "learning_rate": 0.0005440000000000001, "loss": 2.2882, "step": 2720 }, { "epoch": 0.010553416523635013, "grad_norm": 0.1690768152475357, "learning_rate": 0.000546, "loss": 2.3003, "step": 2730 }, { "epoch": 0.010592073727018293, "grad_norm": 0.175802081823349, "learning_rate": 0.0005480000000000001, "loss": 2.2874, "step": 2740 }, { "epoch": 0.010630730930401571, "grad_norm": 0.17927300930023193, "learning_rate": 0.00055, "loss": 2.2925, "step": 2750 }, { "epoch": 0.01066938813378485, "grad_norm": 0.19693294167518616, "learning_rate": 0.0005520000000000001, "loss": 2.2915, "step": 2760 }, { "epoch": 0.010708045337168128, "grad_norm": 0.16840171813964844, "learning_rate": 0.000554, "loss": 2.2909, "step": 2770 }, { "epoch": 0.010746702540551406, "grad_norm": 0.23003901541233063, "learning_rate": 0.0005560000000000001, "loss": 2.3001, "step": 2780 }, { "epoch": 0.010785359743934684, "grad_norm": 0.17170651257038116, "learning_rate": 0.000558, "loss": 2.2891, "step": 2790 }, { "epoch": 0.010824016947317962, "grad_norm": 0.19299864768981934, "learning_rate": 0.0005600000000000001, "loss": 2.3097, "step": 2800 }, { "epoch": 0.010862674150701242, "grad_norm": 0.17300739884376526, "learning_rate": 0.0005620000000000001, "loss": 2.2869, "step": 2810 }, { "epoch": 0.01090133135408452, "grad_norm": 0.20269937813282013, "learning_rate": 0.0005639999999999999, "loss": 2.3018, "step": 2820 }, { "epoch": 0.010939988557467799, "grad_norm": 1.11091148853302, "learning_rate": 0.000566, "loss": 2.3178, "step": 2830 }, { "epoch": 0.010978645760851077, "grad_norm": 0.1726454347372055, "learning_rate": 0.0005679999999999999, "loss": 2.3028, "step": 2840 }, { "epoch": 0.011017302964234355, "grad_norm": 0.19507430493831635, "learning_rate": 0.00057, "loss": 2.3144, "step": 2850 }, { "epoch": 0.011055960167617633, "grad_norm": 0.18410713970661163, "learning_rate": 0.0005719999999999999, "loss": 2.2999, "step": 2860 }, { "epoch": 0.011094617371000912, "grad_norm": 0.18434615433216095, "learning_rate": 0.000574, "loss": 2.2988, "step": 2870 }, { "epoch": 0.011133274574384192, "grad_norm": 0.203523188829422, "learning_rate": 0.000576, "loss": 2.3002, "step": 2880 }, { "epoch": 0.01117193177776747, "grad_norm": 0.20426899194717407, "learning_rate": 0.000578, "loss": 2.3113, "step": 2890 }, { "epoch": 0.011210588981150748, "grad_norm": 0.20032745599746704, "learning_rate": 0.00058, "loss": 2.2975, "step": 2900 }, { "epoch": 0.011249246184534026, "grad_norm": 0.1720176339149475, "learning_rate": 0.0005819999999999999, "loss": 2.3101, "step": 2910 }, { "epoch": 0.011287903387917304, "grad_norm": 0.19644276797771454, "learning_rate": 0.000584, "loss": 2.3017, "step": 2920 }, { "epoch": 0.011326560591300583, "grad_norm": 0.17177051305770874, "learning_rate": 0.0005859999999999999, "loss": 2.3089, "step": 2930 }, { "epoch": 0.01136521779468386, "grad_norm": 0.19372963905334473, "learning_rate": 0.000588, "loss": 2.2961, "step": 2940 }, { "epoch": 0.01140387499806714, "grad_norm": 0.17544853687286377, "learning_rate": 0.00059, "loss": 2.3147, "step": 2950 }, { "epoch": 0.011442532201450419, "grad_norm": 0.19046401977539062, "learning_rate": 0.000592, "loss": 2.3084, "step": 2960 }, { "epoch": 0.011481189404833697, "grad_norm": 0.16877801716327667, "learning_rate": 0.000594, "loss": 2.3172, "step": 2970 }, { "epoch": 0.011519846608216975, "grad_norm": 0.18222913146018982, "learning_rate": 0.000596, "loss": 2.3085, "step": 2980 }, { "epoch": 0.011558503811600254, "grad_norm": 0.17426137626171112, "learning_rate": 0.000598, "loss": 2.3167, "step": 2990 }, { "epoch": 0.011597161014983532, "grad_norm": 0.2104695737361908, "learning_rate": 0.0006, "loss": 2.3178, "step": 3000 }, { "epoch": 0.01163581821836681, "grad_norm": 0.1998831331729889, "learning_rate": 0.000602, "loss": 2.3057, "step": 3010 }, { "epoch": 0.011674475421750088, "grad_norm": 0.2088499665260315, "learning_rate": 0.000604, "loss": 2.3215, "step": 3020 }, { "epoch": 0.011713132625133368, "grad_norm": 0.1630593240261078, "learning_rate": 0.000606, "loss": 2.3106, "step": 3030 }, { "epoch": 0.011751789828516646, "grad_norm": 0.19042055308818817, "learning_rate": 0.000608, "loss": 2.3159, "step": 3040 }, { "epoch": 0.011790447031899924, "grad_norm": 0.2393055111169815, "learning_rate": 0.00061, "loss": 2.3008, "step": 3050 }, { "epoch": 0.011829104235283203, "grad_norm": 0.17863787710666656, "learning_rate": 0.000612, "loss": 2.2997, "step": 3060 }, { "epoch": 0.01186776143866648, "grad_norm": 0.17681317031383514, "learning_rate": 0.000614, "loss": 2.295, "step": 3070 }, { "epoch": 0.011906418642049759, "grad_norm": 0.20421043038368225, "learning_rate": 0.000616, "loss": 2.3029, "step": 3080 }, { "epoch": 0.011945075845433037, "grad_norm": 0.1847294718027115, "learning_rate": 0.0006180000000000001, "loss": 2.3119, "step": 3090 }, { "epoch": 0.011983733048816317, "grad_norm": 0.19557037949562073, "learning_rate": 0.00062, "loss": 2.3224, "step": 3100 }, { "epoch": 0.012022390252199595, "grad_norm": 0.1692788153886795, "learning_rate": 0.000622, "loss": 2.3152, "step": 3110 }, { "epoch": 0.012061047455582874, "grad_norm": 0.21557722985744476, "learning_rate": 0.000624, "loss": 2.3147, "step": 3120 }, { "epoch": 0.012099704658966152, "grad_norm": 0.17801079154014587, "learning_rate": 0.000626, "loss": 2.3108, "step": 3130 }, { "epoch": 0.01213836186234943, "grad_norm": 0.18151815235614777, "learning_rate": 0.000628, "loss": 2.3268, "step": 3140 }, { "epoch": 0.012177019065732708, "grad_norm": 0.2068193405866623, "learning_rate": 0.00063, "loss": 2.3084, "step": 3150 }, { "epoch": 0.012215676269115986, "grad_norm": 0.18960556387901306, "learning_rate": 0.000632, "loss": 2.3111, "step": 3160 }, { "epoch": 0.012254333472499266, "grad_norm": 0.2341216653585434, "learning_rate": 0.000634, "loss": 2.3085, "step": 3170 }, { "epoch": 0.012292990675882545, "grad_norm": 0.19195427000522614, "learning_rate": 0.0006360000000000001, "loss": 2.3069, "step": 3180 }, { "epoch": 0.012331647879265823, "grad_norm": 0.16848890483379364, "learning_rate": 0.000638, "loss": 2.2939, "step": 3190 }, { "epoch": 0.012370305082649101, "grad_norm": 0.1788295954465866, "learning_rate": 0.00064, "loss": 2.3208, "step": 3200 }, { "epoch": 0.01240896228603238, "grad_norm": 0.19146698713302612, "learning_rate": 0.000642, "loss": 2.3245, "step": 3210 }, { "epoch": 0.012447619489415657, "grad_norm": 0.18817923963069916, "learning_rate": 0.000644, "loss": 2.3363, "step": 3220 }, { "epoch": 0.012486276692798936, "grad_norm": 0.23819443583488464, "learning_rate": 0.000646, "loss": 2.313, "step": 3230 }, { "epoch": 0.012524933896182214, "grad_norm": 0.22465969622135162, "learning_rate": 0.000648, "loss": 2.3134, "step": 3240 }, { "epoch": 0.012563591099565494, "grad_norm": 0.18059320747852325, "learning_rate": 0.0006500000000000001, "loss": 2.3073, "step": 3250 }, { "epoch": 0.012602248302948772, "grad_norm": 0.22967374324798584, "learning_rate": 0.000652, "loss": 2.3114, "step": 3260 }, { "epoch": 0.01264090550633205, "grad_norm": 0.18112795054912567, "learning_rate": 0.0006540000000000001, "loss": 2.3164, "step": 3270 }, { "epoch": 0.012679562709715328, "grad_norm": 0.2173462063074112, "learning_rate": 0.000656, "loss": 2.3134, "step": 3280 }, { "epoch": 0.012718219913098606, "grad_norm": 0.1925901472568512, "learning_rate": 0.0006580000000000001, "loss": 2.3193, "step": 3290 }, { "epoch": 0.012756877116481885, "grad_norm": 0.18288016319274902, "learning_rate": 0.00066, "loss": 2.3186, "step": 3300 }, { "epoch": 0.012795534319865163, "grad_norm": 0.1826305240392685, "learning_rate": 0.000662, "loss": 2.3374, "step": 3310 }, { "epoch": 0.012834191523248443, "grad_norm": 0.23015402257442474, "learning_rate": 0.0006640000000000001, "loss": 2.3157, "step": 3320 }, { "epoch": 0.012872848726631721, "grad_norm": 0.19382594525814056, "learning_rate": 0.000666, "loss": 2.3106, "step": 3330 }, { "epoch": 0.012911505930015, "grad_norm": 0.18039023876190186, "learning_rate": 0.0006680000000000001, "loss": 2.3175, "step": 3340 }, { "epoch": 0.012950163133398277, "grad_norm": 0.1734837144613266, "learning_rate": 0.00067, "loss": 2.3264, "step": 3350 }, { "epoch": 0.012988820336781556, "grad_norm": 0.2026592344045639, "learning_rate": 0.0006720000000000001, "loss": 2.324, "step": 3360 }, { "epoch": 0.013027477540164834, "grad_norm": 0.17693206667900085, "learning_rate": 0.000674, "loss": 2.3244, "step": 3370 }, { "epoch": 0.013066134743548112, "grad_norm": 0.18763317167758942, "learning_rate": 0.0006760000000000001, "loss": 2.3247, "step": 3380 }, { "epoch": 0.013104791946931392, "grad_norm": 0.21735341846942902, "learning_rate": 0.0006780000000000001, "loss": 2.3155, "step": 3390 }, { "epoch": 0.01314344915031467, "grad_norm": 0.20110860466957092, "learning_rate": 0.00068, "loss": 2.3164, "step": 3400 }, { "epoch": 0.013182106353697948, "grad_norm": 0.20035584270954132, "learning_rate": 0.0006820000000000001, "loss": 2.3293, "step": 3410 }, { "epoch": 0.013220763557081227, "grad_norm": 0.19247788190841675, "learning_rate": 0.000684, "loss": 2.3253, "step": 3420 }, { "epoch": 0.013259420760464505, "grad_norm": 0.2187040001153946, "learning_rate": 0.0006860000000000001, "loss": 2.3348, "step": 3430 }, { "epoch": 0.013298077963847783, "grad_norm": 0.2174689620733261, "learning_rate": 0.0006879999999999999, "loss": 2.3274, "step": 3440 }, { "epoch": 0.013336735167231061, "grad_norm": 0.17934581637382507, "learning_rate": 0.00069, "loss": 2.3291, "step": 3450 }, { "epoch": 0.013375392370614341, "grad_norm": 0.1819523721933365, "learning_rate": 0.000692, "loss": 2.315, "step": 3460 }, { "epoch": 0.01341404957399762, "grad_norm": 0.20084640383720398, "learning_rate": 0.000694, "loss": 2.3244, "step": 3470 }, { "epoch": 0.013452706777380898, "grad_norm": 0.2400597631931305, "learning_rate": 0.000696, "loss": 2.3189, "step": 3480 }, { "epoch": 0.013491363980764176, "grad_norm": 0.19409014284610748, "learning_rate": 0.0006979999999999999, "loss": 2.3209, "step": 3490 }, { "epoch": 0.013530021184147454, "grad_norm": 0.20252855122089386, "learning_rate": 0.0007, "loss": 2.3371, "step": 3500 }, { "epoch": 0.013568678387530732, "grad_norm": 0.21784944832324982, "learning_rate": 0.0007019999999999999, "loss": 2.3313, "step": 3510 }, { "epoch": 0.01360733559091401, "grad_norm": 0.17790041863918304, "learning_rate": 0.000704, "loss": 2.3196, "step": 3520 }, { "epoch": 0.013645992794297289, "grad_norm": 0.19118991494178772, "learning_rate": 0.0007059999999999999, "loss": 2.3475, "step": 3530 }, { "epoch": 0.013684649997680568, "grad_norm": 0.17741745710372925, "learning_rate": 0.000708, "loss": 2.3288, "step": 3540 }, { "epoch": 0.013723307201063847, "grad_norm": 0.20735114812850952, "learning_rate": 0.00071, "loss": 2.3246, "step": 3550 }, { "epoch": 0.013761964404447125, "grad_norm": 0.1806357353925705, "learning_rate": 0.000712, "loss": 2.3364, "step": 3560 }, { "epoch": 0.013800621607830403, "grad_norm": 0.21187496185302734, "learning_rate": 0.000714, "loss": 2.3171, "step": 3570 }, { "epoch": 0.013839278811213681, "grad_norm": 0.22054551541805267, "learning_rate": 0.000716, "loss": 2.3246, "step": 3580 }, { "epoch": 0.01387793601459696, "grad_norm": 0.19840335845947266, "learning_rate": 0.000718, "loss": 2.3202, "step": 3590 }, { "epoch": 0.013916593217980238, "grad_norm": 0.20100551843643188, "learning_rate": 0.0007199999999999999, "loss": 2.3361, "step": 3600 }, { "epoch": 0.013955250421363518, "grad_norm": 0.19318947196006775, "learning_rate": 0.000722, "loss": 2.3387, "step": 3610 }, { "epoch": 0.013993907624746796, "grad_norm": 0.21331772208213806, "learning_rate": 0.000724, "loss": 2.3339, "step": 3620 }, { "epoch": 0.014032564828130074, "grad_norm": 0.17189262807369232, "learning_rate": 0.000726, "loss": 2.3349, "step": 3630 }, { "epoch": 0.014071222031513352, "grad_norm": 0.24295486509799957, "learning_rate": 0.000728, "loss": 2.338, "step": 3640 }, { "epoch": 0.01410987923489663, "grad_norm": 0.16952365636825562, "learning_rate": 0.00073, "loss": 2.3384, "step": 3650 }, { "epoch": 0.014148536438279909, "grad_norm": 0.2218736708164215, "learning_rate": 0.000732, "loss": 2.3221, "step": 3660 }, { "epoch": 0.014187193641663187, "grad_norm": 0.29244163632392883, "learning_rate": 0.000734, "loss": 2.3219, "step": 3670 }, { "epoch": 0.014225850845046467, "grad_norm": 0.21221856772899628, "learning_rate": 0.000736, "loss": 2.3372, "step": 3680 }, { "epoch": 0.014264508048429745, "grad_norm": 0.19032420217990875, "learning_rate": 0.000738, "loss": 2.336, "step": 3690 }, { "epoch": 0.014303165251813023, "grad_norm": 0.1942375898361206, "learning_rate": 0.00074, "loss": 2.3251, "step": 3700 }, { "epoch": 0.014341822455196301, "grad_norm": 0.18120437860488892, "learning_rate": 0.000742, "loss": 2.3325, "step": 3710 }, { "epoch": 0.01438047965857958, "grad_norm": 0.22333329916000366, "learning_rate": 0.000744, "loss": 2.3488, "step": 3720 }, { "epoch": 0.014419136861962858, "grad_norm": 0.21351563930511475, "learning_rate": 0.000746, "loss": 2.3263, "step": 3730 }, { "epoch": 0.014457794065346136, "grad_norm": 0.1748659312725067, "learning_rate": 0.000748, "loss": 2.3232, "step": 3740 }, { "epoch": 0.014496451268729414, "grad_norm": 0.1662777066230774, "learning_rate": 0.00075, "loss": 2.3334, "step": 3750 }, { "epoch": 0.014535108472112694, "grad_norm": 0.19590096175670624, "learning_rate": 0.0007520000000000001, "loss": 2.3427, "step": 3760 }, { "epoch": 0.014573765675495972, "grad_norm": 0.2757102847099304, "learning_rate": 0.000754, "loss": 2.3256, "step": 3770 }, { "epoch": 0.01461242287887925, "grad_norm": 0.1870422512292862, "learning_rate": 0.000756, "loss": 2.3414, "step": 3780 }, { "epoch": 0.014651080082262529, "grad_norm": 0.2014084756374359, "learning_rate": 0.000758, "loss": 2.3311, "step": 3790 }, { "epoch": 0.014689737285645807, "grad_norm": 0.21479026973247528, "learning_rate": 0.00076, "loss": 2.3392, "step": 3800 }, { "epoch": 0.014728394489029085, "grad_norm": 0.21824988722801208, "learning_rate": 0.000762, "loss": 2.3235, "step": 3810 }, { "epoch": 0.014767051692412363, "grad_norm": 0.18084734678268433, "learning_rate": 0.000764, "loss": 2.3392, "step": 3820 }, { "epoch": 0.014805708895795643, "grad_norm": 0.1988394409418106, "learning_rate": 0.0007660000000000001, "loss": 2.3204, "step": 3830 }, { "epoch": 0.014844366099178921, "grad_norm": 0.2254992574453354, "learning_rate": 0.000768, "loss": 2.333, "step": 3840 }, { "epoch": 0.0148830233025622, "grad_norm": 0.2529672384262085, "learning_rate": 0.0007700000000000001, "loss": 2.3312, "step": 3850 }, { "epoch": 0.014921680505945478, "grad_norm": 0.1985018253326416, "learning_rate": 0.000772, "loss": 2.346, "step": 3860 }, { "epoch": 0.014960337709328756, "grad_norm": 0.1995285451412201, "learning_rate": 0.0007740000000000001, "loss": 2.3367, "step": 3870 }, { "epoch": 0.014998994912712034, "grad_norm": 0.2558148205280304, "learning_rate": 0.000776, "loss": 2.3475, "step": 3880 }, { "epoch": 0.015037652116095312, "grad_norm": 0.1844359040260315, "learning_rate": 0.000778, "loss": 2.3576, "step": 3890 }, { "epoch": 0.015076309319478592, "grad_norm": 0.1759207844734192, "learning_rate": 0.0007800000000000001, "loss": 2.3413, "step": 3900 }, { "epoch": 0.01511496652286187, "grad_norm": 0.2178059220314026, "learning_rate": 0.000782, "loss": 2.3271, "step": 3910 }, { "epoch": 0.015153623726245149, "grad_norm": 0.19973725080490112, "learning_rate": 0.0007840000000000001, "loss": 2.338, "step": 3920 }, { "epoch": 0.015192280929628427, "grad_norm": 0.17417024075984955, "learning_rate": 0.000786, "loss": 2.3511, "step": 3930 }, { "epoch": 0.015230938133011705, "grad_norm": 0.20759367942810059, "learning_rate": 0.0007880000000000001, "loss": 2.3419, "step": 3940 }, { "epoch": 0.015269595336394983, "grad_norm": 0.17729806900024414, "learning_rate": 0.00079, "loss": 2.3487, "step": 3950 }, { "epoch": 0.015308252539778262, "grad_norm": 0.16963709890842438, "learning_rate": 0.0007920000000000001, "loss": 2.3435, "step": 3960 }, { "epoch": 0.015346909743161542, "grad_norm": 0.24149620532989502, "learning_rate": 0.0007940000000000001, "loss": 2.3415, "step": 3970 }, { "epoch": 0.01538556694654482, "grad_norm": 0.21152982115745544, "learning_rate": 0.000796, "loss": 2.348, "step": 3980 }, { "epoch": 0.015424224149928098, "grad_norm": 0.18707045912742615, "learning_rate": 0.0007980000000000001, "loss": 2.3503, "step": 3990 }, { "epoch": 0.015462881353311376, "grad_norm": 0.20199261605739594, "learning_rate": 0.0008, "loss": 2.3655, "step": 4000 }, { "epoch": 0.015501538556694654, "grad_norm": 0.2197076380252838, "learning_rate": 0.0008020000000000001, "loss": 2.3468, "step": 4010 }, { "epoch": 0.015540195760077933, "grad_norm": 0.21308410167694092, "learning_rate": 0.000804, "loss": 2.3371, "step": 4020 }, { "epoch": 0.01557885296346121, "grad_norm": 0.25000447034835815, "learning_rate": 0.0008060000000000001, "loss": 2.3436, "step": 4030 }, { "epoch": 0.015617510166844489, "grad_norm": 0.18997006118297577, "learning_rate": 0.000808, "loss": 2.3428, "step": 4040 }, { "epoch": 0.015656167370227767, "grad_norm": 0.1858537495136261, "learning_rate": 0.0008100000000000001, "loss": 2.3409, "step": 4050 }, { "epoch": 0.015694824573611047, "grad_norm": 0.20581378042697906, "learning_rate": 0.0008120000000000001, "loss": 2.3435, "step": 4060 }, { "epoch": 0.015733481776994324, "grad_norm": 0.16140508651733398, "learning_rate": 0.0008139999999999999, "loss": 2.3431, "step": 4070 }, { "epoch": 0.015772138980377604, "grad_norm": 0.20004132390022278, "learning_rate": 0.000816, "loss": 2.3354, "step": 4080 }, { "epoch": 0.015810796183760883, "grad_norm": 0.16737546026706696, "learning_rate": 0.0008179999999999999, "loss": 2.3609, "step": 4090 }, { "epoch": 0.01584945338714416, "grad_norm": 0.2625548243522644, "learning_rate": 0.00082, "loss": 2.3586, "step": 4100 }, { "epoch": 0.01588811059052744, "grad_norm": 0.2821698486804962, "learning_rate": 0.0008219999999999999, "loss": 2.3525, "step": 4110 }, { "epoch": 0.015926767793910716, "grad_norm": 0.21390102803707123, "learning_rate": 0.000824, "loss": 2.3458, "step": 4120 }, { "epoch": 0.015965424997293996, "grad_norm": 0.20981815457344055, "learning_rate": 0.000826, "loss": 2.3465, "step": 4130 }, { "epoch": 0.016004082200677273, "grad_norm": 0.1971135288476944, "learning_rate": 0.000828, "loss": 2.3513, "step": 4140 }, { "epoch": 0.016042739404060553, "grad_norm": 0.32396647334098816, "learning_rate": 0.00083, "loss": 2.3451, "step": 4150 }, { "epoch": 0.016081396607443833, "grad_norm": 0.24782757461071014, "learning_rate": 0.000832, "loss": 2.3713, "step": 4160 }, { "epoch": 0.01612005381082711, "grad_norm": 0.17164631187915802, "learning_rate": 0.000834, "loss": 2.3502, "step": 4170 }, { "epoch": 0.01615871101421039, "grad_norm": 0.16930937767028809, "learning_rate": 0.0008359999999999999, "loss": 2.343, "step": 4180 }, { "epoch": 0.016197368217593665, "grad_norm": 0.18578386306762695, "learning_rate": 0.000838, "loss": 2.3461, "step": 4190 }, { "epoch": 0.016236025420976945, "grad_norm": 0.2054670751094818, "learning_rate": 0.00084, "loss": 2.3541, "step": 4200 }, { "epoch": 0.016274682624360222, "grad_norm": 0.2295895665884018, "learning_rate": 0.000842, "loss": 2.3583, "step": 4210 }, { "epoch": 0.016313339827743502, "grad_norm": 0.2089163213968277, "learning_rate": 0.000844, "loss": 2.3494, "step": 4220 }, { "epoch": 0.016351997031126782, "grad_norm": 0.1846083551645279, "learning_rate": 0.000846, "loss": 2.3604, "step": 4230 }, { "epoch": 0.016390654234510058, "grad_norm": 0.20951008796691895, "learning_rate": 0.000848, "loss": 2.3517, "step": 4240 }, { "epoch": 0.016429311437893338, "grad_norm": 0.18815088272094727, "learning_rate": 0.00085, "loss": 2.3601, "step": 4250 }, { "epoch": 0.016467968641276615, "grad_norm": 0.19509609043598175, "learning_rate": 0.000852, "loss": 2.3516, "step": 4260 }, { "epoch": 0.016506625844659895, "grad_norm": 0.2361738532781601, "learning_rate": 0.000854, "loss": 2.3644, "step": 4270 }, { "epoch": 0.01654528304804317, "grad_norm": 0.23323406279087067, "learning_rate": 0.000856, "loss": 2.36, "step": 4280 }, { "epoch": 0.01658394025142645, "grad_norm": 0.20140711963176727, "learning_rate": 0.000858, "loss": 2.3502, "step": 4290 }, { "epoch": 0.01662259745480973, "grad_norm": 0.22504544258117676, "learning_rate": 0.00086, "loss": 2.3609, "step": 4300 }, { "epoch": 0.016661254658193007, "grad_norm": 0.19168426096439362, "learning_rate": 0.000862, "loss": 2.3573, "step": 4310 }, { "epoch": 0.016699911861576287, "grad_norm": 0.20893554389476776, "learning_rate": 0.000864, "loss": 2.3782, "step": 4320 }, { "epoch": 0.016738569064959564, "grad_norm": 0.19215723872184753, "learning_rate": 0.000866, "loss": 2.3409, "step": 4330 }, { "epoch": 0.016777226268342844, "grad_norm": 0.18347090482711792, "learning_rate": 0.0008680000000000001, "loss": 2.3683, "step": 4340 }, { "epoch": 0.01681588347172612, "grad_norm": 0.19273754954338074, "learning_rate": 0.00087, "loss": 2.3631, "step": 4350 }, { "epoch": 0.0168545406751094, "grad_norm": 0.28873759508132935, "learning_rate": 0.000872, "loss": 2.3594, "step": 4360 }, { "epoch": 0.016893197878492677, "grad_norm": 0.19875763356685638, "learning_rate": 0.000874, "loss": 2.3485, "step": 4370 }, { "epoch": 0.016931855081875957, "grad_norm": 0.2057722955942154, "learning_rate": 0.000876, "loss": 2.3577, "step": 4380 }, { "epoch": 0.016970512285259236, "grad_norm": 0.23414702713489532, "learning_rate": 0.000878, "loss": 2.3543, "step": 4390 }, { "epoch": 0.017009169488642513, "grad_norm": 0.19359450042247772, "learning_rate": 0.00088, "loss": 2.3571, "step": 4400 }, { "epoch": 0.017047826692025793, "grad_norm": 0.19209226965904236, "learning_rate": 0.000882, "loss": 2.3508, "step": 4410 }, { "epoch": 0.01708648389540907, "grad_norm": 0.2071743756532669, "learning_rate": 0.000884, "loss": 2.3479, "step": 4420 }, { "epoch": 0.01712514109879235, "grad_norm": 0.25464510917663574, "learning_rate": 0.0008860000000000001, "loss": 2.3719, "step": 4430 }, { "epoch": 0.017163798302175626, "grad_norm": 0.25327372550964355, "learning_rate": 0.000888, "loss": 2.3576, "step": 4440 }, { "epoch": 0.017202455505558906, "grad_norm": 0.19814546406269073, "learning_rate": 0.0008900000000000001, "loss": 2.351, "step": 4450 }, { "epoch": 0.017241112708942186, "grad_norm": 0.18401065468788147, "learning_rate": 0.000892, "loss": 2.3566, "step": 4460 }, { "epoch": 0.017279769912325462, "grad_norm": 0.1848963350057602, "learning_rate": 0.000894, "loss": 2.3525, "step": 4470 }, { "epoch": 0.017318427115708742, "grad_norm": 0.213156595826149, "learning_rate": 0.000896, "loss": 2.3474, "step": 4480 }, { "epoch": 0.01735708431909202, "grad_norm": 0.1951395869255066, "learning_rate": 0.000898, "loss": 2.367, "step": 4490 }, { "epoch": 0.0173957415224753, "grad_norm": 0.18687497079372406, "learning_rate": 0.0009000000000000001, "loss": 2.3521, "step": 4500 }, { "epoch": 0.017434398725858575, "grad_norm": 0.24728751182556152, "learning_rate": 0.000902, "loss": 2.3744, "step": 4510 }, { "epoch": 0.017473055929241855, "grad_norm": 0.23267033696174622, "learning_rate": 0.0009040000000000001, "loss": 2.3624, "step": 4520 }, { "epoch": 0.017511713132625135, "grad_norm": 0.1700582057237625, "learning_rate": 0.000906, "loss": 2.3719, "step": 4530 }, { "epoch": 0.01755037033600841, "grad_norm": 0.1689836084842682, "learning_rate": 0.0009080000000000001, "loss": 2.3659, "step": 4540 }, { "epoch": 0.01758902753939169, "grad_norm": 0.20966678857803345, "learning_rate": 0.00091, "loss": 2.3525, "step": 4550 }, { "epoch": 0.017627684742774968, "grad_norm": 0.18348130583763123, "learning_rate": 0.000912, "loss": 2.358, "step": 4560 }, { "epoch": 0.017666341946158248, "grad_norm": 0.21361717581748962, "learning_rate": 0.0009140000000000001, "loss": 2.3657, "step": 4570 }, { "epoch": 0.017704999149541524, "grad_norm": 0.19976146519184113, "learning_rate": 0.000916, "loss": 2.3555, "step": 4580 }, { "epoch": 0.017743656352924804, "grad_norm": 0.19655726850032806, "learning_rate": 0.0009180000000000001, "loss": 2.3498, "step": 4590 }, { "epoch": 0.017782313556308084, "grad_norm": 0.19483190774917603, "learning_rate": 0.00092, "loss": 2.3674, "step": 4600 }, { "epoch": 0.01782097075969136, "grad_norm": 0.22144700586795807, "learning_rate": 0.0009220000000000001, "loss": 2.3779, "step": 4610 }, { "epoch": 0.01785962796307464, "grad_norm": 0.2064056396484375, "learning_rate": 0.000924, "loss": 2.3547, "step": 4620 }, { "epoch": 0.017898285166457917, "grad_norm": 0.19249336421489716, "learning_rate": 0.0009260000000000001, "loss": 2.3645, "step": 4630 }, { "epoch": 0.017936942369841197, "grad_norm": 0.16990318894386292, "learning_rate": 0.0009280000000000001, "loss": 2.3738, "step": 4640 }, { "epoch": 0.017975599573224473, "grad_norm": 0.1956743597984314, "learning_rate": 0.00093, "loss": 2.3491, "step": 4650 }, { "epoch": 0.018014256776607753, "grad_norm": 0.4806165099143982, "learning_rate": 0.0009320000000000001, "loss": 2.3526, "step": 4660 }, { "epoch": 0.018052913979991033, "grad_norm": 0.1825423687696457, "learning_rate": 0.000934, "loss": 2.3588, "step": 4670 }, { "epoch": 0.01809157118337431, "grad_norm": 0.23481100797653198, "learning_rate": 0.0009360000000000001, "loss": 2.3541, "step": 4680 }, { "epoch": 0.01813022838675759, "grad_norm": 0.21459338068962097, "learning_rate": 0.0009379999999999999, "loss": 2.3703, "step": 4690 }, { "epoch": 0.018168885590140866, "grad_norm": 0.23080278933048248, "learning_rate": 0.00094, "loss": 2.3711, "step": 4700 }, { "epoch": 0.018207542793524146, "grad_norm": 0.21888788044452667, "learning_rate": 0.000942, "loss": 2.368, "step": 4710 }, { "epoch": 0.018246199996907422, "grad_norm": 0.1902155727148056, "learning_rate": 0.000944, "loss": 2.3778, "step": 4720 }, { "epoch": 0.018284857200290702, "grad_norm": 0.18804939091205597, "learning_rate": 0.000946, "loss": 2.3749, "step": 4730 }, { "epoch": 0.018323514403673982, "grad_norm": 0.23752015829086304, "learning_rate": 0.000948, "loss": 2.3782, "step": 4740 }, { "epoch": 0.01836217160705726, "grad_norm": 0.20950154960155487, "learning_rate": 0.00095, "loss": 2.3584, "step": 4750 }, { "epoch": 0.01840082881044054, "grad_norm": 0.2356835901737213, "learning_rate": 0.0009519999999999999, "loss": 2.3952, "step": 4760 }, { "epoch": 0.018439486013823815, "grad_norm": 0.2130763977766037, "learning_rate": 0.000954, "loss": 2.3659, "step": 4770 }, { "epoch": 0.018478143217207095, "grad_norm": 0.3312756419181824, "learning_rate": 0.0009559999999999999, "loss": 2.3651, "step": 4780 }, { "epoch": 0.01851680042059037, "grad_norm": 0.21410676836967468, "learning_rate": 0.000958, "loss": 2.3887, "step": 4790 }, { "epoch": 0.01855545762397365, "grad_norm": 0.18393675982952118, "learning_rate": 0.00096, "loss": 2.3764, "step": 4800 }, { "epoch": 0.01859411482735693, "grad_norm": 0.22657392919063568, "learning_rate": 0.000962, "loss": 2.3905, "step": 4810 }, { "epoch": 0.018632772030740208, "grad_norm": 0.1854638159275055, "learning_rate": 0.000964, "loss": 2.3755, "step": 4820 }, { "epoch": 0.018671429234123488, "grad_norm": 0.17408819496631622, "learning_rate": 0.000966, "loss": 2.3733, "step": 4830 }, { "epoch": 0.018710086437506764, "grad_norm": 0.21019886434078217, "learning_rate": 0.000968, "loss": 2.3909, "step": 4840 }, { "epoch": 0.018748743640890044, "grad_norm": 0.28492727875709534, "learning_rate": 0.0009699999999999999, "loss": 2.3551, "step": 4850 }, { "epoch": 0.01878740084427332, "grad_norm": 0.3036006689071655, "learning_rate": 0.000972, "loss": 2.3697, "step": 4860 }, { "epoch": 0.0188260580476566, "grad_norm": 0.27126896381378174, "learning_rate": 0.000974, "loss": 2.3682, "step": 4870 }, { "epoch": 0.018864715251039877, "grad_norm": 0.19037535786628723, "learning_rate": 0.000976, "loss": 2.3693, "step": 4880 }, { "epoch": 0.018903372454423157, "grad_norm": 0.17468905448913574, "learning_rate": 0.000978, "loss": 2.3792, "step": 4890 }, { "epoch": 0.018942029657806437, "grad_norm": 0.20166796445846558, "learning_rate": 0.00098, "loss": 2.3735, "step": 4900 }, { "epoch": 0.018980686861189713, "grad_norm": 0.21851827204227448, "learning_rate": 0.000982, "loss": 2.3554, "step": 4910 }, { "epoch": 0.019019344064572993, "grad_norm": 0.2518332898616791, "learning_rate": 0.000984, "loss": 2.3777, "step": 4920 }, { "epoch": 0.01905800126795627, "grad_norm": 0.21647198498249054, "learning_rate": 0.0009860000000000001, "loss": 2.3772, "step": 4930 }, { "epoch": 0.01909665847133955, "grad_norm": 0.19404593110084534, "learning_rate": 0.000988, "loss": 2.3813, "step": 4940 }, { "epoch": 0.019135315674722826, "grad_norm": 0.219878688454628, "learning_rate": 0.00099, "loss": 2.3693, "step": 4950 }, { "epoch": 0.019173972878106106, "grad_norm": 0.18607568740844727, "learning_rate": 0.000992, "loss": 2.3613, "step": 4960 }, { "epoch": 0.019212630081489386, "grad_norm": 0.28682470321655273, "learning_rate": 0.000994, "loss": 2.3921, "step": 4970 }, { "epoch": 0.019251287284872663, "grad_norm": 0.18783779442310333, "learning_rate": 0.000996, "loss": 2.3787, "step": 4980 }, { "epoch": 0.019289944488255942, "grad_norm": 0.21272382140159607, "learning_rate": 0.000998, "loss": 2.3665, "step": 4990 }, { "epoch": 0.01932860169163922, "grad_norm": 0.1703406572341919, "learning_rate": 0.001, "loss": 2.3749, "step": 5000 }, { "epoch": 0.0193672588950225, "grad_norm": 0.23985524475574493, "learning_rate": 0.001002, "loss": 2.3814, "step": 5010 }, { "epoch": 0.019405916098405775, "grad_norm": 0.21343666315078735, "learning_rate": 0.0010040000000000001, "loss": 2.3647, "step": 5020 }, { "epoch": 0.019444573301789055, "grad_norm": 0.18966184556484222, "learning_rate": 0.001006, "loss": 2.3807, "step": 5030 }, { "epoch": 0.019483230505172335, "grad_norm": 0.23002567887306213, "learning_rate": 0.001008, "loss": 2.379, "step": 5040 }, { "epoch": 0.01952188770855561, "grad_norm": 0.1744360476732254, "learning_rate": 0.00101, "loss": 2.3835, "step": 5050 }, { "epoch": 0.01956054491193889, "grad_norm": 0.213888481259346, "learning_rate": 0.001012, "loss": 2.3697, "step": 5060 }, { "epoch": 0.019599202115322168, "grad_norm": 0.2396910935640335, "learning_rate": 0.001014, "loss": 2.3888, "step": 5070 }, { "epoch": 0.019637859318705448, "grad_norm": 0.22268559038639069, "learning_rate": 0.001016, "loss": 2.3851, "step": 5080 }, { "epoch": 0.019676516522088724, "grad_norm": 0.2205670177936554, "learning_rate": 0.001018, "loss": 2.3908, "step": 5090 }, { "epoch": 0.019715173725472004, "grad_norm": 0.2212257832288742, "learning_rate": 0.00102, "loss": 2.3914, "step": 5100 }, { "epoch": 0.019753830928855284, "grad_norm": 0.1979675441980362, "learning_rate": 0.0010220000000000001, "loss": 2.3785, "step": 5110 }, { "epoch": 0.01979248813223856, "grad_norm": 0.275046706199646, "learning_rate": 0.001024, "loss": 2.3741, "step": 5120 }, { "epoch": 0.01983114533562184, "grad_norm": 0.1912265121936798, "learning_rate": 0.001026, "loss": 2.3851, "step": 5130 }, { "epoch": 0.019869802539005117, "grad_norm": 0.2097253054380417, "learning_rate": 0.001028, "loss": 2.3815, "step": 5140 }, { "epoch": 0.019908459742388397, "grad_norm": 0.19920724630355835, "learning_rate": 0.00103, "loss": 2.3834, "step": 5150 }, { "epoch": 0.019947116945771674, "grad_norm": 0.18724262714385986, "learning_rate": 0.0010320000000000001, "loss": 2.3754, "step": 5160 }, { "epoch": 0.019985774149154954, "grad_norm": 0.20353613793849945, "learning_rate": 0.001034, "loss": 2.3732, "step": 5170 }, { "epoch": 0.020024431352538234, "grad_norm": 0.24157506227493286, "learning_rate": 0.001036, "loss": 2.3814, "step": 5180 }, { "epoch": 0.02006308855592151, "grad_norm": 0.2027304321527481, "learning_rate": 0.001038, "loss": 2.3868, "step": 5190 }, { "epoch": 0.02010174575930479, "grad_norm": 0.20146556198596954, "learning_rate": 0.0010400000000000001, "loss": 2.3819, "step": 5200 }, { "epoch": 0.020140402962688066, "grad_norm": 0.1771874725818634, "learning_rate": 0.001042, "loss": 2.3867, "step": 5210 }, { "epoch": 0.020179060166071346, "grad_norm": 0.18880490958690643, "learning_rate": 0.001044, "loss": 2.3862, "step": 5220 }, { "epoch": 0.020217717369454623, "grad_norm": 0.2295592725276947, "learning_rate": 0.001046, "loss": 2.38, "step": 5230 }, { "epoch": 0.020256374572837903, "grad_norm": 0.20400582253932953, "learning_rate": 0.001048, "loss": 2.3833, "step": 5240 }, { "epoch": 0.020295031776221183, "grad_norm": 0.20016399025917053, "learning_rate": 0.0010500000000000002, "loss": 2.3831, "step": 5250 }, { "epoch": 0.02033368897960446, "grad_norm": 0.211043119430542, "learning_rate": 0.001052, "loss": 2.3928, "step": 5260 }, { "epoch": 0.02037234618298774, "grad_norm": 0.16745540499687195, "learning_rate": 0.001054, "loss": 2.3754, "step": 5270 }, { "epoch": 0.020411003386371016, "grad_norm": 0.2527609169483185, "learning_rate": 0.001056, "loss": 2.3707, "step": 5280 }, { "epoch": 0.020449660589754295, "grad_norm": 0.24378624558448792, "learning_rate": 0.0010580000000000001, "loss": 2.3861, "step": 5290 }, { "epoch": 0.020488317793137572, "grad_norm": 0.215751051902771, "learning_rate": 0.0010600000000000002, "loss": 2.3821, "step": 5300 }, { "epoch": 0.020526974996520852, "grad_norm": 0.17920060455799103, "learning_rate": 0.001062, "loss": 2.374, "step": 5310 }, { "epoch": 0.020565632199904132, "grad_norm": 0.19355903565883636, "learning_rate": 0.001064, "loss": 2.4042, "step": 5320 }, { "epoch": 0.02060428940328741, "grad_norm": 0.1970899999141693, "learning_rate": 0.001066, "loss": 2.3778, "step": 5330 }, { "epoch": 0.020642946606670688, "grad_norm": 0.21478936076164246, "learning_rate": 0.0010680000000000002, "loss": 2.3864, "step": 5340 }, { "epoch": 0.020681603810053965, "grad_norm": 0.1935175359249115, "learning_rate": 0.00107, "loss": 2.3968, "step": 5350 }, { "epoch": 0.020720261013437245, "grad_norm": 0.20764757692813873, "learning_rate": 0.001072, "loss": 2.3814, "step": 5360 }, { "epoch": 0.02075891821682052, "grad_norm": 0.2129974067211151, "learning_rate": 0.001074, "loss": 2.377, "step": 5370 }, { "epoch": 0.0207975754202038, "grad_norm": 0.1787700653076172, "learning_rate": 0.0010760000000000001, "loss": 2.3644, "step": 5380 }, { "epoch": 0.02083623262358708, "grad_norm": 0.17959833145141602, "learning_rate": 0.0010780000000000002, "loss": 2.3947, "step": 5390 }, { "epoch": 0.020874889826970357, "grad_norm": 0.1857430785894394, "learning_rate": 0.00108, "loss": 2.3993, "step": 5400 }, { "epoch": 0.020913547030353637, "grad_norm": 0.21030008792877197, "learning_rate": 0.001082, "loss": 2.3942, "step": 5410 }, { "epoch": 0.020952204233736914, "grad_norm": 0.2184520959854126, "learning_rate": 0.0010840000000000001, "loss": 2.3898, "step": 5420 }, { "epoch": 0.020990861437120194, "grad_norm": 0.22430236637592316, "learning_rate": 0.0010860000000000002, "loss": 2.3829, "step": 5430 }, { "epoch": 0.02102951864050347, "grad_norm": 0.20971611142158508, "learning_rate": 0.0010880000000000002, "loss": 2.381, "step": 5440 }, { "epoch": 0.02106817584388675, "grad_norm": 0.19676510989665985, "learning_rate": 0.00109, "loss": 2.3834, "step": 5450 }, { "epoch": 0.021106833047270027, "grad_norm": 0.2264118492603302, "learning_rate": 0.001092, "loss": 2.3947, "step": 5460 }, { "epoch": 0.021145490250653307, "grad_norm": 0.1974431425333023, "learning_rate": 0.0010940000000000001, "loss": 2.3712, "step": 5470 }, { "epoch": 0.021184147454036586, "grad_norm": 0.20495396852493286, "learning_rate": 0.0010960000000000002, "loss": 2.3856, "step": 5480 }, { "epoch": 0.021222804657419863, "grad_norm": 0.19436782598495483, "learning_rate": 0.001098, "loss": 2.3905, "step": 5490 }, { "epoch": 0.021261461860803143, "grad_norm": 0.1938960999250412, "learning_rate": 0.0011, "loss": 2.3965, "step": 5500 }, { "epoch": 0.02130011906418642, "grad_norm": 0.18784378468990326, "learning_rate": 0.0011020000000000001, "loss": 2.3885, "step": 5510 }, { "epoch": 0.0213387762675697, "grad_norm": 0.20048850774765015, "learning_rate": 0.0011040000000000002, "loss": 2.3738, "step": 5520 }, { "epoch": 0.021377433470952976, "grad_norm": 0.2599906623363495, "learning_rate": 0.0011060000000000002, "loss": 2.4158, "step": 5530 }, { "epoch": 0.021416090674336256, "grad_norm": 0.2207055687904358, "learning_rate": 0.001108, "loss": 2.393, "step": 5540 }, { "epoch": 0.021454747877719536, "grad_norm": 0.2058638483285904, "learning_rate": 0.00111, "loss": 2.3804, "step": 5550 }, { "epoch": 0.021493405081102812, "grad_norm": 0.22761179506778717, "learning_rate": 0.0011120000000000001, "loss": 2.3845, "step": 5560 }, { "epoch": 0.021532062284486092, "grad_norm": 0.18034592270851135, "learning_rate": 0.0011140000000000002, "loss": 2.4026, "step": 5570 }, { "epoch": 0.02157071948786937, "grad_norm": 0.21812139451503754, "learning_rate": 0.001116, "loss": 2.3913, "step": 5580 }, { "epoch": 0.02160937669125265, "grad_norm": 0.20300228893756866, "learning_rate": 0.001118, "loss": 2.3818, "step": 5590 }, { "epoch": 0.021648033894635925, "grad_norm": 0.1981932371854782, "learning_rate": 0.0011200000000000001, "loss": 2.407, "step": 5600 }, { "epoch": 0.021686691098019205, "grad_norm": 0.21006079018115997, "learning_rate": 0.0011220000000000002, "loss": 2.3832, "step": 5610 }, { "epoch": 0.021725348301402485, "grad_norm": 0.24926023185253143, "learning_rate": 0.0011240000000000002, "loss": 2.3968, "step": 5620 }, { "epoch": 0.02176400550478576, "grad_norm": 0.21123667061328888, "learning_rate": 0.0011259999999999998, "loss": 2.3916, "step": 5630 }, { "epoch": 0.02180266270816904, "grad_norm": 0.19651569426059723, "learning_rate": 0.0011279999999999999, "loss": 2.4016, "step": 5640 }, { "epoch": 0.021841319911552318, "grad_norm": 0.17947527766227722, "learning_rate": 0.00113, "loss": 2.3748, "step": 5650 }, { "epoch": 0.021879977114935598, "grad_norm": 0.25798648595809937, "learning_rate": 0.001132, "loss": 2.3818, "step": 5660 }, { "epoch": 0.021918634318318874, "grad_norm": 0.2755715548992157, "learning_rate": 0.001134, "loss": 2.3983, "step": 5670 }, { "epoch": 0.021957291521702154, "grad_norm": 0.19706271588802338, "learning_rate": 0.0011359999999999999, "loss": 2.3904, "step": 5680 }, { "epoch": 0.021995948725085434, "grad_norm": 0.2231791466474533, "learning_rate": 0.001138, "loss": 2.4054, "step": 5690 }, { "epoch": 0.02203460592846871, "grad_norm": 0.19517715275287628, "learning_rate": 0.00114, "loss": 2.3792, "step": 5700 }, { "epoch": 0.02207326313185199, "grad_norm": 0.21162500977516174, "learning_rate": 0.001142, "loss": 2.3907, "step": 5710 }, { "epoch": 0.022111920335235267, "grad_norm": 0.24791096150875092, "learning_rate": 0.0011439999999999998, "loss": 2.4084, "step": 5720 }, { "epoch": 0.022150577538618547, "grad_norm": 0.22254496812820435, "learning_rate": 0.0011459999999999999, "loss": 2.4031, "step": 5730 }, { "epoch": 0.022189234742001823, "grad_norm": 0.17276102304458618, "learning_rate": 0.001148, "loss": 2.3873, "step": 5740 }, { "epoch": 0.022227891945385103, "grad_norm": 0.21049363911151886, "learning_rate": 0.00115, "loss": 2.3936, "step": 5750 }, { "epoch": 0.022266549148768383, "grad_norm": 0.21012358367443085, "learning_rate": 0.001152, "loss": 2.3911, "step": 5760 }, { "epoch": 0.02230520635215166, "grad_norm": 0.17598873376846313, "learning_rate": 0.0011539999999999999, "loss": 2.3986, "step": 5770 }, { "epoch": 0.02234386355553494, "grad_norm": 0.21582140028476715, "learning_rate": 0.001156, "loss": 2.385, "step": 5780 }, { "epoch": 0.022382520758918216, "grad_norm": 0.2273397147655487, "learning_rate": 0.001158, "loss": 2.401, "step": 5790 }, { "epoch": 0.022421177962301496, "grad_norm": 0.16410350799560547, "learning_rate": 0.00116, "loss": 2.3746, "step": 5800 }, { "epoch": 0.022459835165684772, "grad_norm": 0.19850672781467438, "learning_rate": 0.0011619999999999998, "loss": 2.3876, "step": 5810 }, { "epoch": 0.022498492369068052, "grad_norm": 0.2051462084054947, "learning_rate": 0.0011639999999999999, "loss": 2.4057, "step": 5820 }, { "epoch": 0.022537149572451332, "grad_norm": 0.19643060863018036, "learning_rate": 0.001166, "loss": 2.3852, "step": 5830 }, { "epoch": 0.02257580677583461, "grad_norm": 0.21040578186511993, "learning_rate": 0.001168, "loss": 2.3928, "step": 5840 }, { "epoch": 0.02261446397921789, "grad_norm": 0.19768066704273224, "learning_rate": 0.00117, "loss": 2.4053, "step": 5850 }, { "epoch": 0.022653121182601165, "grad_norm": 0.21659426391124725, "learning_rate": 0.0011719999999999999, "loss": 2.409, "step": 5860 }, { "epoch": 0.022691778385984445, "grad_norm": 0.23596347868442535, "learning_rate": 0.001174, "loss": 2.4044, "step": 5870 }, { "epoch": 0.02273043558936772, "grad_norm": 0.1964534968137741, "learning_rate": 0.001176, "loss": 2.3805, "step": 5880 }, { "epoch": 0.022769092792751, "grad_norm": 0.15665480494499207, "learning_rate": 0.001178, "loss": 2.3937, "step": 5890 }, { "epoch": 0.02280774999613428, "grad_norm": 0.18110042810440063, "learning_rate": 0.00118, "loss": 2.3911, "step": 5900 }, { "epoch": 0.022846407199517558, "grad_norm": 0.20500323176383972, "learning_rate": 0.0011819999999999999, "loss": 2.3963, "step": 5910 }, { "epoch": 0.022885064402900838, "grad_norm": 0.21157889068126678, "learning_rate": 0.001184, "loss": 2.3959, "step": 5920 }, { "epoch": 0.022923721606284114, "grad_norm": 0.17701420187950134, "learning_rate": 0.001186, "loss": 2.4032, "step": 5930 }, { "epoch": 0.022962378809667394, "grad_norm": 0.20900875329971313, "learning_rate": 0.001188, "loss": 2.3954, "step": 5940 }, { "epoch": 0.02300103601305067, "grad_norm": 0.18997344374656677, "learning_rate": 0.0011899999999999999, "loss": 2.3865, "step": 5950 }, { "epoch": 0.02303969321643395, "grad_norm": 0.2688944935798645, "learning_rate": 0.001192, "loss": 2.4034, "step": 5960 }, { "epoch": 0.023078350419817227, "grad_norm": 0.2342280000448227, "learning_rate": 0.001194, "loss": 2.4029, "step": 5970 }, { "epoch": 0.023117007623200507, "grad_norm": 0.1937403380870819, "learning_rate": 0.001196, "loss": 2.4059, "step": 5980 }, { "epoch": 0.023155664826583787, "grad_norm": 0.1973395198583603, "learning_rate": 0.001198, "loss": 2.3967, "step": 5990 }, { "epoch": 0.023194322029967063, "grad_norm": 0.2075272798538208, "learning_rate": 0.0012, "loss": 2.3909, "step": 6000 }, { "epoch": 0.023232979233350343, "grad_norm": 0.18843838572502136, "learning_rate": 0.001202, "loss": 2.4028, "step": 6010 }, { "epoch": 0.02327163643673362, "grad_norm": 0.20422710478305817, "learning_rate": 0.001204, "loss": 2.3918, "step": 6020 }, { "epoch": 0.0233102936401169, "grad_norm": 0.19518813490867615, "learning_rate": 0.001206, "loss": 2.3905, "step": 6030 }, { "epoch": 0.023348950843500176, "grad_norm": 0.19929325580596924, "learning_rate": 0.001208, "loss": 2.3904, "step": 6040 }, { "epoch": 0.023387608046883456, "grad_norm": 0.20663338899612427, "learning_rate": 0.00121, "loss": 2.4037, "step": 6050 }, { "epoch": 0.023426265250266736, "grad_norm": 0.19592155516147614, "learning_rate": 0.001212, "loss": 2.3941, "step": 6060 }, { "epoch": 0.023464922453650013, "grad_norm": 0.23385976254940033, "learning_rate": 0.001214, "loss": 2.4098, "step": 6070 }, { "epoch": 0.023503579657033292, "grad_norm": 0.1804191768169403, "learning_rate": 0.001216, "loss": 2.4017, "step": 6080 }, { "epoch": 0.02354223686041657, "grad_norm": 0.26508116722106934, "learning_rate": 0.001218, "loss": 2.403, "step": 6090 }, { "epoch": 0.02358089406379985, "grad_norm": 0.23387646675109863, "learning_rate": 0.00122, "loss": 2.41, "step": 6100 }, { "epoch": 0.023619551267183125, "grad_norm": 0.19134730100631714, "learning_rate": 0.001222, "loss": 2.4137, "step": 6110 }, { "epoch": 0.023658208470566405, "grad_norm": 0.18082371354103088, "learning_rate": 0.001224, "loss": 2.4185, "step": 6120 }, { "epoch": 0.023696865673949685, "grad_norm": 0.19267909228801727, "learning_rate": 0.001226, "loss": 2.3945, "step": 6130 }, { "epoch": 0.02373552287733296, "grad_norm": 0.19761332869529724, "learning_rate": 0.001228, "loss": 2.3968, "step": 6140 }, { "epoch": 0.02377418008071624, "grad_norm": 0.20080329477787018, "learning_rate": 0.00123, "loss": 2.4168, "step": 6150 }, { "epoch": 0.023812837284099518, "grad_norm": 0.17178675532341003, "learning_rate": 0.001232, "loss": 2.4024, "step": 6160 }, { "epoch": 0.023851494487482798, "grad_norm": 0.17782160639762878, "learning_rate": 0.001234, "loss": 2.3888, "step": 6170 }, { "epoch": 0.023890151690866075, "grad_norm": 0.2577625513076782, "learning_rate": 0.0012360000000000001, "loss": 2.3935, "step": 6180 }, { "epoch": 0.023928808894249354, "grad_norm": 0.17612770199775696, "learning_rate": 0.001238, "loss": 2.4003, "step": 6190 }, { "epoch": 0.023967466097632634, "grad_norm": 0.24437908828258514, "learning_rate": 0.00124, "loss": 2.3939, "step": 6200 }, { "epoch": 0.02400612330101591, "grad_norm": 0.21090902388095856, "learning_rate": 0.001242, "loss": 2.4046, "step": 6210 }, { "epoch": 0.02404478050439919, "grad_norm": 0.18830551207065582, "learning_rate": 0.001244, "loss": 2.404, "step": 6220 }, { "epoch": 0.024083437707782467, "grad_norm": 0.2057628333568573, "learning_rate": 0.001246, "loss": 2.3923, "step": 6230 }, { "epoch": 0.024122094911165747, "grad_norm": 0.21243716776371002, "learning_rate": 0.001248, "loss": 2.4043, "step": 6240 }, { "epoch": 0.024160752114549024, "grad_norm": 0.24437469244003296, "learning_rate": 0.00125, "loss": 2.3894, "step": 6250 }, { "epoch": 0.024199409317932304, "grad_norm": 0.20179718732833862, "learning_rate": 0.001252, "loss": 2.4006, "step": 6260 }, { "epoch": 0.024238066521315584, "grad_norm": 0.3111397325992584, "learning_rate": 0.0012540000000000001, "loss": 2.4114, "step": 6270 }, { "epoch": 0.02427672372469886, "grad_norm": 0.2358233630657196, "learning_rate": 0.001256, "loss": 2.4033, "step": 6280 }, { "epoch": 0.02431538092808214, "grad_norm": 0.2047593742609024, "learning_rate": 0.001258, "loss": 2.4007, "step": 6290 }, { "epoch": 0.024354038131465416, "grad_norm": 0.18096667528152466, "learning_rate": 0.00126, "loss": 2.3984, "step": 6300 }, { "epoch": 0.024392695334848696, "grad_norm": 0.24265827238559723, "learning_rate": 0.001262, "loss": 2.3881, "step": 6310 }, { "epoch": 0.024431352538231973, "grad_norm": 0.17325134575366974, "learning_rate": 0.001264, "loss": 2.4013, "step": 6320 }, { "epoch": 0.024470009741615253, "grad_norm": 0.19234231114387512, "learning_rate": 0.001266, "loss": 2.4039, "step": 6330 }, { "epoch": 0.024508666944998533, "grad_norm": 0.24274741113185883, "learning_rate": 0.001268, "loss": 2.4045, "step": 6340 }, { "epoch": 0.02454732414838181, "grad_norm": 0.22068293392658234, "learning_rate": 0.00127, "loss": 2.4088, "step": 6350 }, { "epoch": 0.02458598135176509, "grad_norm": 0.5849812626838684, "learning_rate": 0.0012720000000000001, "loss": 2.4016, "step": 6360 }, { "epoch": 0.024624638555148366, "grad_norm": 0.1535091996192932, "learning_rate": 0.001274, "loss": 2.4138, "step": 6370 }, { "epoch": 0.024663295758531645, "grad_norm": 0.25961729884147644, "learning_rate": 0.001276, "loss": 2.4161, "step": 6380 }, { "epoch": 0.024701952961914922, "grad_norm": 0.18405650556087494, "learning_rate": 0.001278, "loss": 2.4102, "step": 6390 }, { "epoch": 0.024740610165298202, "grad_norm": 0.16772936284542084, "learning_rate": 0.00128, "loss": 2.4185, "step": 6400 }, { "epoch": 0.024779267368681482, "grad_norm": 0.18456052243709564, "learning_rate": 0.0012820000000000002, "loss": 2.392, "step": 6410 }, { "epoch": 0.02481792457206476, "grad_norm": 0.2476855367422104, "learning_rate": 0.001284, "loss": 2.4039, "step": 6420 }, { "epoch": 0.024856581775448038, "grad_norm": 0.19195041060447693, "learning_rate": 0.001286, "loss": 2.4161, "step": 6430 }, { "epoch": 0.024895238978831315, "grad_norm": 0.17237518727779388, "learning_rate": 0.001288, "loss": 2.3988, "step": 6440 }, { "epoch": 0.024933896182214595, "grad_norm": 0.21002577245235443, "learning_rate": 0.0012900000000000001, "loss": 2.3972, "step": 6450 }, { "epoch": 0.02497255338559787, "grad_norm": 0.2846126854419708, "learning_rate": 0.001292, "loss": 2.4008, "step": 6460 }, { "epoch": 0.02501121058898115, "grad_norm": 0.18850302696228027, "learning_rate": 0.001294, "loss": 2.4067, "step": 6470 }, { "epoch": 0.025049867792364428, "grad_norm": 0.19248883426189423, "learning_rate": 0.001296, "loss": 2.4116, "step": 6480 }, { "epoch": 0.025088524995747707, "grad_norm": 0.2386600375175476, "learning_rate": 0.0012980000000000001, "loss": 2.412, "step": 6490 }, { "epoch": 0.025127182199130987, "grad_norm": 0.20422658324241638, "learning_rate": 0.0013000000000000002, "loss": 2.3993, "step": 6500 }, { "epoch": 0.025165839402514264, "grad_norm": 0.20139940083026886, "learning_rate": 0.001302, "loss": 2.3956, "step": 6510 }, { "epoch": 0.025204496605897544, "grad_norm": 0.20745849609375, "learning_rate": 0.001304, "loss": 2.4091, "step": 6520 }, { "epoch": 0.02524315380928082, "grad_norm": 0.21816179156303406, "learning_rate": 0.001306, "loss": 2.3967, "step": 6530 }, { "epoch": 0.0252818110126641, "grad_norm": 0.18074050545692444, "learning_rate": 0.0013080000000000001, "loss": 2.402, "step": 6540 }, { "epoch": 0.025320468216047377, "grad_norm": 0.1811356097459793, "learning_rate": 0.0013100000000000002, "loss": 2.4289, "step": 6550 }, { "epoch": 0.025359125419430657, "grad_norm": 0.1797647923231125, "learning_rate": 0.001312, "loss": 2.3928, "step": 6560 }, { "epoch": 0.025397782622813937, "grad_norm": 0.22218936681747437, "learning_rate": 0.001314, "loss": 2.4093, "step": 6570 }, { "epoch": 0.025436439826197213, "grad_norm": 0.19117140769958496, "learning_rate": 0.0013160000000000001, "loss": 2.4005, "step": 6580 }, { "epoch": 0.025475097029580493, "grad_norm": 0.2183677852153778, "learning_rate": 0.0013180000000000002, "loss": 2.4097, "step": 6590 }, { "epoch": 0.02551375423296377, "grad_norm": 0.24778427183628082, "learning_rate": 0.00132, "loss": 2.3962, "step": 6600 }, { "epoch": 0.02555241143634705, "grad_norm": 0.19872575998306274, "learning_rate": 0.001322, "loss": 2.3982, "step": 6610 }, { "epoch": 0.025591068639730326, "grad_norm": 0.1927270144224167, "learning_rate": 0.001324, "loss": 2.408, "step": 6620 }, { "epoch": 0.025629725843113606, "grad_norm": 0.19589394330978394, "learning_rate": 0.0013260000000000001, "loss": 2.4102, "step": 6630 }, { "epoch": 0.025668383046496886, "grad_norm": 0.21557344496250153, "learning_rate": 0.0013280000000000002, "loss": 2.4112, "step": 6640 }, { "epoch": 0.025707040249880162, "grad_norm": 0.23353618383407593, "learning_rate": 0.00133, "loss": 2.4121, "step": 6650 }, { "epoch": 0.025745697453263442, "grad_norm": 0.1943051666021347, "learning_rate": 0.001332, "loss": 2.3952, "step": 6660 }, { "epoch": 0.02578435465664672, "grad_norm": 0.18049027025699615, "learning_rate": 0.0013340000000000001, "loss": 2.4113, "step": 6670 }, { "epoch": 0.02582301186003, "grad_norm": 0.17567066848278046, "learning_rate": 0.0013360000000000002, "loss": 2.4026, "step": 6680 }, { "epoch": 0.025861669063413275, "grad_norm": 0.16737550497055054, "learning_rate": 0.0013380000000000002, "loss": 2.4194, "step": 6690 }, { "epoch": 0.025900326266796555, "grad_norm": 0.1732235997915268, "learning_rate": 0.00134, "loss": 2.4126, "step": 6700 }, { "epoch": 0.025938983470179835, "grad_norm": 0.2096925675868988, "learning_rate": 0.001342, "loss": 2.4245, "step": 6710 }, { "epoch": 0.02597764067356311, "grad_norm": 0.1870320439338684, "learning_rate": 0.0013440000000000001, "loss": 2.3983, "step": 6720 }, { "epoch": 0.02601629787694639, "grad_norm": 0.16722071170806885, "learning_rate": 0.0013460000000000002, "loss": 2.4046, "step": 6730 }, { "epoch": 0.026054955080329668, "grad_norm": 0.25226983428001404, "learning_rate": 0.001348, "loss": 2.4059, "step": 6740 }, { "epoch": 0.026093612283712948, "grad_norm": 0.18488508462905884, "learning_rate": 0.00135, "loss": 2.4231, "step": 6750 }, { "epoch": 0.026132269487096224, "grad_norm": 0.294162392616272, "learning_rate": 0.0013520000000000001, "loss": 2.4038, "step": 6760 }, { "epoch": 0.026170926690479504, "grad_norm": 0.18017978966236115, "learning_rate": 0.0013540000000000002, "loss": 2.4268, "step": 6770 }, { "epoch": 0.026209583893862784, "grad_norm": 0.15233299136161804, "learning_rate": 0.0013560000000000002, "loss": 2.4033, "step": 6780 }, { "epoch": 0.02624824109724606, "grad_norm": 0.25568193197250366, "learning_rate": 0.001358, "loss": 2.4252, "step": 6790 }, { "epoch": 0.02628689830062934, "grad_norm": 0.2158443033695221, "learning_rate": 0.00136, "loss": 2.3924, "step": 6800 }, { "epoch": 0.026325555504012617, "grad_norm": 0.2108936309814453, "learning_rate": 0.0013620000000000001, "loss": 2.3924, "step": 6810 }, { "epoch": 0.026364212707395897, "grad_norm": 0.21832455694675446, "learning_rate": 0.0013640000000000002, "loss": 2.4067, "step": 6820 }, { "epoch": 0.026402869910779173, "grad_norm": 0.19482752680778503, "learning_rate": 0.001366, "loss": 2.4123, "step": 6830 }, { "epoch": 0.026441527114162453, "grad_norm": 0.21157225966453552, "learning_rate": 0.001368, "loss": 2.4023, "step": 6840 }, { "epoch": 0.026480184317545733, "grad_norm": 0.18475206196308136, "learning_rate": 0.0013700000000000001, "loss": 2.4093, "step": 6850 }, { "epoch": 0.02651884152092901, "grad_norm": 0.1895562708377838, "learning_rate": 0.0013720000000000002, "loss": 2.4012, "step": 6860 }, { "epoch": 0.02655749872431229, "grad_norm": 0.20044149458408356, "learning_rate": 0.0013740000000000002, "loss": 2.4186, "step": 6870 }, { "epoch": 0.026596155927695566, "grad_norm": 0.18344005942344666, "learning_rate": 0.0013759999999999998, "loss": 2.401, "step": 6880 }, { "epoch": 0.026634813131078846, "grad_norm": 0.2210623025894165, "learning_rate": 0.0013779999999999999, "loss": 2.4028, "step": 6890 }, { "epoch": 0.026673470334462122, "grad_norm": 0.22009354829788208, "learning_rate": 0.00138, "loss": 2.4031, "step": 6900 }, { "epoch": 0.026712127537845402, "grad_norm": 0.16893121600151062, "learning_rate": 0.001382, "loss": 2.4234, "step": 6910 }, { "epoch": 0.026750784741228682, "grad_norm": 0.19680050015449524, "learning_rate": 0.001384, "loss": 2.3951, "step": 6920 }, { "epoch": 0.02678944194461196, "grad_norm": 0.1974254697561264, "learning_rate": 0.0013859999999999999, "loss": 2.3968, "step": 6930 }, { "epoch": 0.02682809914799524, "grad_norm": 0.17142683267593384, "learning_rate": 0.001388, "loss": 2.4147, "step": 6940 }, { "epoch": 0.026866756351378515, "grad_norm": 0.20399171113967896, "learning_rate": 0.00139, "loss": 2.4172, "step": 6950 }, { "epoch": 0.026905413554761795, "grad_norm": 0.20409271121025085, "learning_rate": 0.001392, "loss": 2.4109, "step": 6960 }, { "epoch": 0.02694407075814507, "grad_norm": 0.20235559344291687, "learning_rate": 0.0013939999999999998, "loss": 2.4143, "step": 6970 }, { "epoch": 0.02698272796152835, "grad_norm": 0.1891755312681198, "learning_rate": 0.0013959999999999999, "loss": 2.423, "step": 6980 }, { "epoch": 0.027021385164911628, "grad_norm": 0.17014022171497345, "learning_rate": 0.001398, "loss": 2.419, "step": 6990 }, { "epoch": 0.027060042368294908, "grad_norm": 0.25941529870033264, "learning_rate": 0.0014, "loss": 2.4007, "step": 7000 }, { "epoch": 0.027098699571678188, "grad_norm": 0.16844941675662994, "learning_rate": 0.001402, "loss": 2.4125, "step": 7010 }, { "epoch": 0.027137356775061464, "grad_norm": 0.16671425104141235, "learning_rate": 0.0014039999999999999, "loss": 2.4036, "step": 7020 }, { "epoch": 0.027176013978444744, "grad_norm": 0.20527182519435883, "learning_rate": 0.001406, "loss": 2.4013, "step": 7030 }, { "epoch": 0.02721467118182802, "grad_norm": 0.17263086140155792, "learning_rate": 0.001408, "loss": 2.4145, "step": 7040 }, { "epoch": 0.0272533283852113, "grad_norm": 0.20254895091056824, "learning_rate": 0.00141, "loss": 2.418, "step": 7050 }, { "epoch": 0.027291985588594577, "grad_norm": 0.1766177862882614, "learning_rate": 0.0014119999999999998, "loss": 2.4029, "step": 7060 }, { "epoch": 0.027330642791977857, "grad_norm": 0.2355898916721344, "learning_rate": 0.001414, "loss": 2.4092, "step": 7070 }, { "epoch": 0.027369299995361137, "grad_norm": 0.1853450983762741, "learning_rate": 0.001416, "loss": 2.4102, "step": 7080 }, { "epoch": 0.027407957198744413, "grad_norm": 0.1911727339029312, "learning_rate": 0.001418, "loss": 2.4095, "step": 7090 }, { "epoch": 0.027446614402127693, "grad_norm": 0.19476091861724854, "learning_rate": 0.00142, "loss": 2.3967, "step": 7100 }, { "epoch": 0.02748527160551097, "grad_norm": 0.1929769665002823, "learning_rate": 0.0014219999999999999, "loss": 2.4075, "step": 7110 }, { "epoch": 0.02752392880889425, "grad_norm": 0.189010888338089, "learning_rate": 0.001424, "loss": 2.4088, "step": 7120 }, { "epoch": 0.027562586012277526, "grad_norm": 0.21539878845214844, "learning_rate": 0.001426, "loss": 2.4258, "step": 7130 }, { "epoch": 0.027601243215660806, "grad_norm": 0.17167668044567108, "learning_rate": 0.001428, "loss": 2.4128, "step": 7140 }, { "epoch": 0.027639900419044086, "grad_norm": 0.257478266954422, "learning_rate": 0.00143, "loss": 2.4038, "step": 7150 }, { "epoch": 0.027678557622427363, "grad_norm": 0.1884448379278183, "learning_rate": 0.001432, "loss": 2.4186, "step": 7160 }, { "epoch": 0.027717214825810643, "grad_norm": 0.18251746892929077, "learning_rate": 0.001434, "loss": 2.4233, "step": 7170 }, { "epoch": 0.02775587202919392, "grad_norm": 0.1865987330675125, "learning_rate": 0.001436, "loss": 2.3951, "step": 7180 }, { "epoch": 0.0277945292325772, "grad_norm": 0.17819331586360931, "learning_rate": 0.001438, "loss": 2.4152, "step": 7190 }, { "epoch": 0.027833186435960475, "grad_norm": 0.16948436200618744, "learning_rate": 0.0014399999999999999, "loss": 2.4266, "step": 7200 }, { "epoch": 0.027871843639343755, "grad_norm": 0.1923123002052307, "learning_rate": 0.001442, "loss": 2.4095, "step": 7210 }, { "epoch": 0.027910500842727035, "grad_norm": 0.18973374366760254, "learning_rate": 0.001444, "loss": 2.4033, "step": 7220 }, { "epoch": 0.027949158046110312, "grad_norm": 0.1747596561908722, "learning_rate": 0.001446, "loss": 2.4203, "step": 7230 }, { "epoch": 0.02798781524949359, "grad_norm": 0.20812322199344635, "learning_rate": 0.001448, "loss": 2.408, "step": 7240 }, { "epoch": 0.028026472452876868, "grad_norm": 0.19561204314231873, "learning_rate": 0.00145, "loss": 2.4228, "step": 7250 }, { "epoch": 0.028065129656260148, "grad_norm": 0.2217772752046585, "learning_rate": 0.001452, "loss": 2.404, "step": 7260 }, { "epoch": 0.028103786859643425, "grad_norm": 0.16729161143302917, "learning_rate": 0.001454, "loss": 2.405, "step": 7270 }, { "epoch": 0.028142444063026704, "grad_norm": 0.20534683763980865, "learning_rate": 0.001456, "loss": 2.4125, "step": 7280 }, { "epoch": 0.028181101266409984, "grad_norm": 0.20710591971874237, "learning_rate": 0.001458, "loss": 2.4264, "step": 7290 }, { "epoch": 0.02821975846979326, "grad_norm": 0.1754801869392395, "learning_rate": 0.00146, "loss": 2.4193, "step": 7300 }, { "epoch": 0.02825841567317654, "grad_norm": 0.20404274761676788, "learning_rate": 0.001462, "loss": 2.4108, "step": 7310 }, { "epoch": 0.028297072876559817, "grad_norm": 0.2507224380970001, "learning_rate": 0.001464, "loss": 2.4159, "step": 7320 }, { "epoch": 0.028335730079943097, "grad_norm": 0.16564474999904633, "learning_rate": 0.001466, "loss": 2.4109, "step": 7330 }, { "epoch": 0.028374387283326374, "grad_norm": 0.17911064624786377, "learning_rate": 0.001468, "loss": 2.4013, "step": 7340 }, { "epoch": 0.028413044486709654, "grad_norm": 0.2049718201160431, "learning_rate": 0.00147, "loss": 2.4342, "step": 7350 }, { "epoch": 0.028451701690092934, "grad_norm": 0.18454407155513763, "learning_rate": 0.001472, "loss": 2.4239, "step": 7360 }, { "epoch": 0.02849035889347621, "grad_norm": 0.17931893467903137, "learning_rate": 0.001474, "loss": 2.4239, "step": 7370 }, { "epoch": 0.02852901609685949, "grad_norm": 0.1730000078678131, "learning_rate": 0.001476, "loss": 2.4134, "step": 7380 }, { "epoch": 0.028567673300242766, "grad_norm": 0.18235714733600616, "learning_rate": 0.001478, "loss": 2.4198, "step": 7390 }, { "epoch": 0.028606330503626046, "grad_norm": 0.1695987433195114, "learning_rate": 0.00148, "loss": 2.4202, "step": 7400 }, { "epoch": 0.028644987707009323, "grad_norm": 0.2011159062385559, "learning_rate": 0.001482, "loss": 2.4123, "step": 7410 }, { "epoch": 0.028683644910392603, "grad_norm": 0.20711570978164673, "learning_rate": 0.001484, "loss": 2.4094, "step": 7420 }, { "epoch": 0.028722302113775883, "grad_norm": 0.1724478304386139, "learning_rate": 0.0014860000000000001, "loss": 2.4249, "step": 7430 }, { "epoch": 0.02876095931715916, "grad_norm": 0.18605752289295197, "learning_rate": 0.001488, "loss": 2.4244, "step": 7440 }, { "epoch": 0.02879961652054244, "grad_norm": 0.1809733510017395, "learning_rate": 0.00149, "loss": 2.4163, "step": 7450 }, { "epoch": 0.028838273723925716, "grad_norm": 0.174998477101326, "learning_rate": 0.001492, "loss": 2.4125, "step": 7460 }, { "epoch": 0.028876930927308996, "grad_norm": 0.2179594784975052, "learning_rate": 0.001494, "loss": 2.4089, "step": 7470 }, { "epoch": 0.028915588130692272, "grad_norm": 0.1974450945854187, "learning_rate": 0.001496, "loss": 2.4168, "step": 7480 }, { "epoch": 0.028954245334075552, "grad_norm": 0.18147790431976318, "learning_rate": 0.001498, "loss": 2.4196, "step": 7490 }, { "epoch": 0.02899290253745883, "grad_norm": 0.24443376064300537, "learning_rate": 0.0015, "loss": 2.425, "step": 7500 }, { "epoch": 0.02903155974084211, "grad_norm": 0.1688818484544754, "learning_rate": 0.001502, "loss": 2.4076, "step": 7510 }, { "epoch": 0.02907021694422539, "grad_norm": 0.18681873381137848, "learning_rate": 0.0015040000000000001, "loss": 2.4089, "step": 7520 }, { "epoch": 0.029108874147608665, "grad_norm": 0.1955171674489975, "learning_rate": 0.001506, "loss": 2.4153, "step": 7530 }, { "epoch": 0.029147531350991945, "grad_norm": 0.1625387966632843, "learning_rate": 0.001508, "loss": 2.4194, "step": 7540 }, { "epoch": 0.02918618855437522, "grad_norm": 0.21609428524971008, "learning_rate": 0.00151, "loss": 2.4088, "step": 7550 }, { "epoch": 0.0292248457577585, "grad_norm": 0.1740817129611969, "learning_rate": 0.001512, "loss": 2.4304, "step": 7560 }, { "epoch": 0.029263502961141778, "grad_norm": 0.2153731882572174, "learning_rate": 0.001514, "loss": 2.4228, "step": 7570 }, { "epoch": 0.029302160164525057, "grad_norm": 0.15823526680469513, "learning_rate": 0.001516, "loss": 2.408, "step": 7580 }, { "epoch": 0.029340817367908337, "grad_norm": 0.23106537759304047, "learning_rate": 0.001518, "loss": 2.412, "step": 7590 }, { "epoch": 0.029379474571291614, "grad_norm": 0.17345808446407318, "learning_rate": 0.00152, "loss": 2.4121, "step": 7600 }, { "epoch": 0.029418131774674894, "grad_norm": 0.1867612898349762, "learning_rate": 0.0015220000000000001, "loss": 2.4084, "step": 7610 }, { "epoch": 0.02945678897805817, "grad_norm": 0.18916818499565125, "learning_rate": 0.001524, "loss": 2.419, "step": 7620 }, { "epoch": 0.02949544618144145, "grad_norm": 0.18724285066127777, "learning_rate": 0.001526, "loss": 2.4043, "step": 7630 }, { "epoch": 0.029534103384824727, "grad_norm": 0.18231108784675598, "learning_rate": 0.001528, "loss": 2.4129, "step": 7640 }, { "epoch": 0.029572760588208007, "grad_norm": 0.21330668032169342, "learning_rate": 0.0015300000000000001, "loss": 2.4074, "step": 7650 }, { "epoch": 0.029611417791591287, "grad_norm": 0.16480223834514618, "learning_rate": 0.0015320000000000002, "loss": 2.4249, "step": 7660 }, { "epoch": 0.029650074994974563, "grad_norm": 0.1870049387216568, "learning_rate": 0.001534, "loss": 2.4123, "step": 7670 }, { "epoch": 0.029688732198357843, "grad_norm": 0.17868660390377045, "learning_rate": 0.001536, "loss": 2.427, "step": 7680 }, { "epoch": 0.02972738940174112, "grad_norm": 0.2201504111289978, "learning_rate": 0.001538, "loss": 2.4225, "step": 7690 }, { "epoch": 0.0297660466051244, "grad_norm": 0.18824969232082367, "learning_rate": 0.0015400000000000001, "loss": 2.4198, "step": 7700 }, { "epoch": 0.029804703808507676, "grad_norm": 0.18889504671096802, "learning_rate": 0.001542, "loss": 2.41, "step": 7710 }, { "epoch": 0.029843361011890956, "grad_norm": 0.20518645644187927, "learning_rate": 0.001544, "loss": 2.4286, "step": 7720 }, { "epoch": 0.029882018215274236, "grad_norm": 0.2309216409921646, "learning_rate": 0.001546, "loss": 2.4223, "step": 7730 }, { "epoch": 0.029920675418657512, "grad_norm": 0.1851509064435959, "learning_rate": 0.0015480000000000001, "loss": 2.4122, "step": 7740 }, { "epoch": 0.029959332622040792, "grad_norm": 0.16746215522289276, "learning_rate": 0.0015500000000000002, "loss": 2.4176, "step": 7750 }, { "epoch": 0.02999798982542407, "grad_norm": 0.17244480550289154, "learning_rate": 0.001552, "loss": 2.4138, "step": 7760 }, { "epoch": 0.03003664702880735, "grad_norm": 0.22287195920944214, "learning_rate": 0.001554, "loss": 2.4368, "step": 7770 }, { "epoch": 0.030075304232190625, "grad_norm": 0.19575530290603638, "learning_rate": 0.001556, "loss": 2.4253, "step": 7780 }, { "epoch": 0.030113961435573905, "grad_norm": 0.1770184487104416, "learning_rate": 0.0015580000000000001, "loss": 2.4148, "step": 7790 }, { "epoch": 0.030152618638957185, "grad_norm": 0.2342929095029831, "learning_rate": 0.0015600000000000002, "loss": 2.423, "step": 7800 }, { "epoch": 0.03019127584234046, "grad_norm": 0.16442647576332092, "learning_rate": 0.001562, "loss": 2.4262, "step": 7810 }, { "epoch": 0.03022993304572374, "grad_norm": 0.18403716385364532, "learning_rate": 0.001564, "loss": 2.4315, "step": 7820 }, { "epoch": 0.030268590249107018, "grad_norm": 0.15624719858169556, "learning_rate": 0.0015660000000000001, "loss": 2.4143, "step": 7830 }, { "epoch": 0.030307247452490298, "grad_norm": 0.19365200400352478, "learning_rate": 0.0015680000000000002, "loss": 2.4198, "step": 7840 }, { "epoch": 0.030345904655873574, "grad_norm": 0.19102121889591217, "learning_rate": 0.00157, "loss": 2.4338, "step": 7850 }, { "epoch": 0.030384561859256854, "grad_norm": 0.17510868608951569, "learning_rate": 0.001572, "loss": 2.4047, "step": 7860 }, { "epoch": 0.030423219062640134, "grad_norm": 0.18565328419208527, "learning_rate": 0.001574, "loss": 2.4435, "step": 7870 }, { "epoch": 0.03046187626602341, "grad_norm": 0.18640480935573578, "learning_rate": 0.0015760000000000001, "loss": 2.4197, "step": 7880 }, { "epoch": 0.03050053346940669, "grad_norm": 0.1854429990053177, "learning_rate": 0.0015780000000000002, "loss": 2.4143, "step": 7890 }, { "epoch": 0.030539190672789967, "grad_norm": 0.20129820704460144, "learning_rate": 0.00158, "loss": 2.4205, "step": 7900 }, { "epoch": 0.030577847876173247, "grad_norm": 0.18941105902194977, "learning_rate": 0.001582, "loss": 2.4216, "step": 7910 }, { "epoch": 0.030616505079556523, "grad_norm": 0.18483154475688934, "learning_rate": 0.0015840000000000001, "loss": 2.4178, "step": 7920 }, { "epoch": 0.030655162282939803, "grad_norm": 0.18414242565631866, "learning_rate": 0.0015860000000000002, "loss": 2.4261, "step": 7930 }, { "epoch": 0.030693819486323083, "grad_norm": 0.20815731585025787, "learning_rate": 0.0015880000000000002, "loss": 2.4169, "step": 7940 }, { "epoch": 0.03073247668970636, "grad_norm": 0.1711905598640442, "learning_rate": 0.00159, "loss": 2.4194, "step": 7950 }, { "epoch": 0.03077113389308964, "grad_norm": 0.1658889353275299, "learning_rate": 0.001592, "loss": 2.4217, "step": 7960 }, { "epoch": 0.030809791096472916, "grad_norm": 0.20899216830730438, "learning_rate": 0.0015940000000000001, "loss": 2.4159, "step": 7970 }, { "epoch": 0.030848448299856196, "grad_norm": 0.18888837099075317, "learning_rate": 0.0015960000000000002, "loss": 2.4203, "step": 7980 }, { "epoch": 0.030887105503239472, "grad_norm": 0.20210076868534088, "learning_rate": 0.001598, "loss": 2.4151, "step": 7990 }, { "epoch": 0.030925762706622752, "grad_norm": 0.16781243681907654, "learning_rate": 0.0016, "loss": 2.4053, "step": 8000 }, { "epoch": 0.03096441991000603, "grad_norm": 0.16579119861125946, "learning_rate": 0.0016020000000000001, "loss": 2.4033, "step": 8010 }, { "epoch": 0.03100307711338931, "grad_norm": 0.19240018725395203, "learning_rate": 0.0016040000000000002, "loss": 2.4245, "step": 8020 }, { "epoch": 0.03104173431677259, "grad_norm": 0.19058342278003693, "learning_rate": 0.0016060000000000002, "loss": 2.4219, "step": 8030 }, { "epoch": 0.031080391520155865, "grad_norm": 0.16241350769996643, "learning_rate": 0.001608, "loss": 2.4243, "step": 8040 }, { "epoch": 0.031119048723539145, "grad_norm": 0.17071078717708588, "learning_rate": 0.00161, "loss": 2.413, "step": 8050 }, { "epoch": 0.03115770592692242, "grad_norm": 0.1861797720193863, "learning_rate": 0.0016120000000000002, "loss": 2.4404, "step": 8060 }, { "epoch": 0.0311963631303057, "grad_norm": 0.1876525580883026, "learning_rate": 0.0016140000000000002, "loss": 2.4313, "step": 8070 }, { "epoch": 0.031235020333688978, "grad_norm": 0.22800956666469574, "learning_rate": 0.001616, "loss": 2.4158, "step": 8080 }, { "epoch": 0.03127367753707226, "grad_norm": 0.20102789998054504, "learning_rate": 0.001618, "loss": 2.4297, "step": 8090 }, { "epoch": 0.031312334740455534, "grad_norm": 0.1998523324728012, "learning_rate": 0.0016200000000000001, "loss": 2.4265, "step": 8100 }, { "epoch": 0.031350991943838814, "grad_norm": 0.21659059822559357, "learning_rate": 0.0016220000000000002, "loss": 2.4388, "step": 8110 }, { "epoch": 0.031389649147222094, "grad_norm": 0.20605839788913727, "learning_rate": 0.0016240000000000002, "loss": 2.4294, "step": 8120 }, { "epoch": 0.031428306350605374, "grad_norm": 0.19702279567718506, "learning_rate": 0.0016259999999999998, "loss": 2.4315, "step": 8130 }, { "epoch": 0.03146696355398865, "grad_norm": 0.1821749359369278, "learning_rate": 0.0016279999999999999, "loss": 2.4176, "step": 8140 }, { "epoch": 0.03150562075737193, "grad_norm": 0.22235362231731415, "learning_rate": 0.00163, "loss": 2.4267, "step": 8150 }, { "epoch": 0.03154427796075521, "grad_norm": 0.16252587735652924, "learning_rate": 0.001632, "loss": 2.4193, "step": 8160 }, { "epoch": 0.03158293516413849, "grad_norm": 0.3427959680557251, "learning_rate": 0.001634, "loss": 2.4284, "step": 8170 }, { "epoch": 0.03162159236752177, "grad_norm": 0.16242393851280212, "learning_rate": 0.0016359999999999999, "loss": 2.4143, "step": 8180 }, { "epoch": 0.03166024957090504, "grad_norm": 0.2046222984790802, "learning_rate": 0.001638, "loss": 2.4372, "step": 8190 }, { "epoch": 0.03169890677428832, "grad_norm": 0.1593078374862671, "learning_rate": 0.00164, "loss": 2.4244, "step": 8200 }, { "epoch": 0.0317375639776716, "grad_norm": 0.17311494052410126, "learning_rate": 0.001642, "loss": 2.4249, "step": 8210 }, { "epoch": 0.03177622118105488, "grad_norm": 0.17711615562438965, "learning_rate": 0.0016439999999999998, "loss": 2.4288, "step": 8220 }, { "epoch": 0.03181487838443815, "grad_norm": 0.16730278730392456, "learning_rate": 0.001646, "loss": 2.4265, "step": 8230 }, { "epoch": 0.03185353558782143, "grad_norm": 0.24033501744270325, "learning_rate": 0.001648, "loss": 2.4275, "step": 8240 }, { "epoch": 0.03189219279120471, "grad_norm": 0.18279847502708435, "learning_rate": 0.00165, "loss": 2.4232, "step": 8250 }, { "epoch": 0.03193084999458799, "grad_norm": 0.1985992193222046, "learning_rate": 0.001652, "loss": 2.4315, "step": 8260 }, { "epoch": 0.03196950719797127, "grad_norm": 0.15565842390060425, "learning_rate": 0.0016539999999999999, "loss": 2.4191, "step": 8270 }, { "epoch": 0.032008164401354545, "grad_norm": 0.21724532544612885, "learning_rate": 0.001656, "loss": 2.4069, "step": 8280 }, { "epoch": 0.032046821604737825, "grad_norm": 0.16617335379123688, "learning_rate": 0.001658, "loss": 2.4075, "step": 8290 }, { "epoch": 0.032085478808121105, "grad_norm": 0.15554694831371307, "learning_rate": 0.00166, "loss": 2.4068, "step": 8300 }, { "epoch": 0.032124136011504385, "grad_norm": 0.2197619378566742, "learning_rate": 0.0016619999999999998, "loss": 2.4262, "step": 8310 }, { "epoch": 0.032162793214887665, "grad_norm": 0.23088595271110535, "learning_rate": 0.001664, "loss": 2.421, "step": 8320 }, { "epoch": 0.03220145041827094, "grad_norm": 0.1569271832704544, "learning_rate": 0.001666, "loss": 2.4344, "step": 8330 }, { "epoch": 0.03224010762165422, "grad_norm": 0.21316871047019958, "learning_rate": 0.001668, "loss": 2.424, "step": 8340 }, { "epoch": 0.0322787648250375, "grad_norm": 0.199044868350029, "learning_rate": 0.00167, "loss": 2.4284, "step": 8350 }, { "epoch": 0.03231742202842078, "grad_norm": 0.1673698127269745, "learning_rate": 0.0016719999999999999, "loss": 2.4104, "step": 8360 }, { "epoch": 0.03235607923180405, "grad_norm": 0.21653950214385986, "learning_rate": 0.001674, "loss": 2.4272, "step": 8370 }, { "epoch": 0.03239473643518733, "grad_norm": 0.17064034938812256, "learning_rate": 0.001676, "loss": 2.427, "step": 8380 }, { "epoch": 0.03243339363857061, "grad_norm": 0.18788346648216248, "learning_rate": 0.001678, "loss": 2.4276, "step": 8390 }, { "epoch": 0.03247205084195389, "grad_norm": 0.24243298172950745, "learning_rate": 0.00168, "loss": 2.4277, "step": 8400 }, { "epoch": 0.03251070804533717, "grad_norm": 0.21477310359477997, "learning_rate": 0.001682, "loss": 2.4281, "step": 8410 }, { "epoch": 0.032549365248720444, "grad_norm": 0.15949584543704987, "learning_rate": 0.001684, "loss": 2.4278, "step": 8420 }, { "epoch": 0.032588022452103724, "grad_norm": 0.1640334576368332, "learning_rate": 0.001686, "loss": 2.4266, "step": 8430 }, { "epoch": 0.032626679655487004, "grad_norm": 0.21765173971652985, "learning_rate": 0.001688, "loss": 2.416, "step": 8440 }, { "epoch": 0.032665336858870284, "grad_norm": 0.2018786370754242, "learning_rate": 0.0016899999999999999, "loss": 2.4235, "step": 8450 }, { "epoch": 0.032703994062253564, "grad_norm": 0.16875681281089783, "learning_rate": 0.001692, "loss": 2.4347, "step": 8460 }, { "epoch": 0.03274265126563684, "grad_norm": 0.2098286747932434, "learning_rate": 0.001694, "loss": 2.4153, "step": 8470 }, { "epoch": 0.032781308469020116, "grad_norm": 0.1699836701154709, "learning_rate": 0.001696, "loss": 2.431, "step": 8480 }, { "epoch": 0.032819965672403396, "grad_norm": 0.16255663335323334, "learning_rate": 0.001698, "loss": 2.4396, "step": 8490 }, { "epoch": 0.032858622875786676, "grad_norm": 0.1716599017381668, "learning_rate": 0.0017, "loss": 2.4215, "step": 8500 }, { "epoch": 0.03289728007916995, "grad_norm": 0.16164468228816986, "learning_rate": 0.001702, "loss": 2.4358, "step": 8510 }, { "epoch": 0.03293593728255323, "grad_norm": 0.23469357192516327, "learning_rate": 0.001704, "loss": 2.4325, "step": 8520 }, { "epoch": 0.03297459448593651, "grad_norm": 0.19947314262390137, "learning_rate": 0.001706, "loss": 2.4306, "step": 8530 }, { "epoch": 0.03301325168931979, "grad_norm": 0.16452614963054657, "learning_rate": 0.001708, "loss": 2.4251, "step": 8540 }, { "epoch": 0.03305190889270307, "grad_norm": 0.16000273823738098, "learning_rate": 0.00171, "loss": 2.4243, "step": 8550 }, { "epoch": 0.03309056609608634, "grad_norm": 0.19534049928188324, "learning_rate": 0.001712, "loss": 2.4204, "step": 8560 }, { "epoch": 0.03312922329946962, "grad_norm": 0.17872479557991028, "learning_rate": 0.001714, "loss": 2.4244, "step": 8570 }, { "epoch": 0.0331678805028529, "grad_norm": 0.17934873700141907, "learning_rate": 0.001716, "loss": 2.4344, "step": 8580 }, { "epoch": 0.03320653770623618, "grad_norm": 0.14556285738945007, "learning_rate": 0.001718, "loss": 2.4166, "step": 8590 }, { "epoch": 0.03324519490961946, "grad_norm": 0.17934808135032654, "learning_rate": 0.00172, "loss": 2.4329, "step": 8600 }, { "epoch": 0.033283852113002735, "grad_norm": 0.1684950292110443, "learning_rate": 0.001722, "loss": 2.424, "step": 8610 }, { "epoch": 0.033322509316386015, "grad_norm": 0.20048771798610687, "learning_rate": 0.001724, "loss": 2.4272, "step": 8620 }, { "epoch": 0.033361166519769295, "grad_norm": 0.16629527509212494, "learning_rate": 0.001726, "loss": 2.4267, "step": 8630 }, { "epoch": 0.033399823723152575, "grad_norm": 0.21294847130775452, "learning_rate": 0.001728, "loss": 2.4217, "step": 8640 }, { "epoch": 0.03343848092653585, "grad_norm": 0.15192854404449463, "learning_rate": 0.00173, "loss": 2.4235, "step": 8650 }, { "epoch": 0.03347713812991913, "grad_norm": 0.16855189204216003, "learning_rate": 0.001732, "loss": 2.4115, "step": 8660 }, { "epoch": 0.03351579533330241, "grad_norm": 0.1933770477771759, "learning_rate": 0.001734, "loss": 2.4419, "step": 8670 }, { "epoch": 0.03355445253668569, "grad_norm": 0.16225305199623108, "learning_rate": 0.0017360000000000001, "loss": 2.435, "step": 8680 }, { "epoch": 0.03359310974006897, "grad_norm": 0.1811498999595642, "learning_rate": 0.001738, "loss": 2.4223, "step": 8690 }, { "epoch": 0.03363176694345224, "grad_norm": 0.18613067269325256, "learning_rate": 0.00174, "loss": 2.4329, "step": 8700 }, { "epoch": 0.03367042414683552, "grad_norm": 0.2437407523393631, "learning_rate": 0.001742, "loss": 2.4193, "step": 8710 }, { "epoch": 0.0337090813502188, "grad_norm": 0.18288400769233704, "learning_rate": 0.001744, "loss": 2.4332, "step": 8720 }, { "epoch": 0.03374773855360208, "grad_norm": 0.18807104229927063, "learning_rate": 0.001746, "loss": 2.4134, "step": 8730 }, { "epoch": 0.03378639575698535, "grad_norm": 0.9005038738250732, "learning_rate": 0.001748, "loss": 2.425, "step": 8740 }, { "epoch": 0.03382505296036863, "grad_norm": 0.19479355216026306, "learning_rate": 0.00175, "loss": 2.4594, "step": 8750 }, { "epoch": 0.03386371016375191, "grad_norm": 0.13804388046264648, "learning_rate": 0.001752, "loss": 2.4404, "step": 8760 }, { "epoch": 0.03390236736713519, "grad_norm": 0.17309629917144775, "learning_rate": 0.0017540000000000001, "loss": 2.425, "step": 8770 }, { "epoch": 0.03394102457051847, "grad_norm": 0.15053080022335052, "learning_rate": 0.001756, "loss": 2.4196, "step": 8780 }, { "epoch": 0.033979681773901746, "grad_norm": 0.16412365436553955, "learning_rate": 0.001758, "loss": 2.4119, "step": 8790 }, { "epoch": 0.034018338977285026, "grad_norm": 0.16084375977516174, "learning_rate": 0.00176, "loss": 2.4199, "step": 8800 }, { "epoch": 0.034056996180668306, "grad_norm": 0.19307050108909607, "learning_rate": 0.0017620000000000001, "loss": 2.428, "step": 8810 }, { "epoch": 0.034095653384051586, "grad_norm": 0.18793387711048126, "learning_rate": 0.001764, "loss": 2.4226, "step": 8820 }, { "epoch": 0.034134310587434866, "grad_norm": 0.1403844803571701, "learning_rate": 0.001766, "loss": 2.4284, "step": 8830 }, { "epoch": 0.03417296779081814, "grad_norm": 0.19078412652015686, "learning_rate": 0.001768, "loss": 2.4223, "step": 8840 }, { "epoch": 0.03421162499420142, "grad_norm": 0.21158762276172638, "learning_rate": 0.00177, "loss": 2.4318, "step": 8850 }, { "epoch": 0.0342502821975847, "grad_norm": 0.14591366052627563, "learning_rate": 0.0017720000000000001, "loss": 2.4221, "step": 8860 }, { "epoch": 0.03428893940096798, "grad_norm": 0.15341006219387054, "learning_rate": 0.001774, "loss": 2.4106, "step": 8870 }, { "epoch": 0.03432759660435125, "grad_norm": 0.20539985597133636, "learning_rate": 0.001776, "loss": 2.4242, "step": 8880 }, { "epoch": 0.03436625380773453, "grad_norm": 0.20937784016132355, "learning_rate": 0.001778, "loss": 2.4348, "step": 8890 }, { "epoch": 0.03440491101111781, "grad_norm": 0.16762758791446686, "learning_rate": 0.0017800000000000001, "loss": 2.4272, "step": 8900 }, { "epoch": 0.03444356821450109, "grad_norm": 0.1493655890226364, "learning_rate": 0.0017820000000000002, "loss": 2.4304, "step": 8910 }, { "epoch": 0.03448222541788437, "grad_norm": 0.15680846571922302, "learning_rate": 0.001784, "loss": 2.4474, "step": 8920 }, { "epoch": 0.034520882621267644, "grad_norm": 0.2038678675889969, "learning_rate": 0.001786, "loss": 2.4312, "step": 8930 }, { "epoch": 0.034559539824650924, "grad_norm": 0.1632731705904007, "learning_rate": 0.001788, "loss": 2.4169, "step": 8940 }, { "epoch": 0.034598197028034204, "grad_norm": 0.1767711639404297, "learning_rate": 0.0017900000000000001, "loss": 2.4342, "step": 8950 }, { "epoch": 0.034636854231417484, "grad_norm": 0.19585298001766205, "learning_rate": 0.001792, "loss": 2.4278, "step": 8960 }, { "epoch": 0.034675511434800764, "grad_norm": 0.1603458970785141, "learning_rate": 0.001794, "loss": 2.4413, "step": 8970 }, { "epoch": 0.03471416863818404, "grad_norm": 0.20633605122566223, "learning_rate": 0.001796, "loss": 2.4298, "step": 8980 }, { "epoch": 0.03475282584156732, "grad_norm": 0.2077600210905075, "learning_rate": 0.0017980000000000001, "loss": 2.4253, "step": 8990 }, { "epoch": 0.0347914830449506, "grad_norm": 0.1785653680562973, "learning_rate": 0.0018000000000000002, "loss": 2.4297, "step": 9000 }, { "epoch": 0.03483014024833388, "grad_norm": 0.19785434007644653, "learning_rate": 0.001802, "loss": 2.4188, "step": 9010 }, { "epoch": 0.03486879745171715, "grad_norm": 0.16691158711910248, "learning_rate": 0.001804, "loss": 2.4313, "step": 9020 }, { "epoch": 0.03490745465510043, "grad_norm": 0.16858600080013275, "learning_rate": 0.001806, "loss": 2.4427, "step": 9030 }, { "epoch": 0.03494611185848371, "grad_norm": 0.2232050895690918, "learning_rate": 0.0018080000000000001, "loss": 2.4238, "step": 9040 }, { "epoch": 0.03498476906186699, "grad_norm": 0.1576065868139267, "learning_rate": 0.0018100000000000002, "loss": 2.4303, "step": 9050 }, { "epoch": 0.03502342626525027, "grad_norm": 0.24189849197864532, "learning_rate": 0.001812, "loss": 2.4297, "step": 9060 }, { "epoch": 0.03506208346863354, "grad_norm": 0.18903590738773346, "learning_rate": 0.001814, "loss": 2.4246, "step": 9070 }, { "epoch": 0.03510074067201682, "grad_norm": 0.14904828369617462, "learning_rate": 0.0018160000000000001, "loss": 2.4339, "step": 9080 }, { "epoch": 0.0351393978754001, "grad_norm": 0.1678786277770996, "learning_rate": 0.0018180000000000002, "loss": 2.4337, "step": 9090 }, { "epoch": 0.03517805507878338, "grad_norm": 0.16725444793701172, "learning_rate": 0.00182, "loss": 2.4289, "step": 9100 }, { "epoch": 0.03521671228216666, "grad_norm": 0.19741575419902802, "learning_rate": 0.001822, "loss": 2.4519, "step": 9110 }, { "epoch": 0.035255369485549935, "grad_norm": 0.188306525349617, "learning_rate": 0.001824, "loss": 2.4358, "step": 9120 }, { "epoch": 0.035294026688933215, "grad_norm": 0.16728952527046204, "learning_rate": 0.0018260000000000001, "loss": 2.4164, "step": 9130 }, { "epoch": 0.035332683892316495, "grad_norm": 0.16376613080501556, "learning_rate": 0.0018280000000000002, "loss": 2.4224, "step": 9140 }, { "epoch": 0.035371341095699775, "grad_norm": 0.16212789714336395, "learning_rate": 0.00183, "loss": 2.4404, "step": 9150 }, { "epoch": 0.03540999829908305, "grad_norm": 0.21214650571346283, "learning_rate": 0.001832, "loss": 2.4279, "step": 9160 }, { "epoch": 0.03544865550246633, "grad_norm": 0.1481925994157791, "learning_rate": 0.0018340000000000001, "loss": 2.4317, "step": 9170 }, { "epoch": 0.03548731270584961, "grad_norm": 0.18432292342185974, "learning_rate": 0.0018360000000000002, "loss": 2.4182, "step": 9180 }, { "epoch": 0.03552596990923289, "grad_norm": 0.1933586597442627, "learning_rate": 0.0018380000000000002, "loss": 2.4391, "step": 9190 }, { "epoch": 0.03556462711261617, "grad_norm": 0.15520641207695007, "learning_rate": 0.00184, "loss": 2.4261, "step": 9200 }, { "epoch": 0.03560328431599944, "grad_norm": 0.1683807224035263, "learning_rate": 0.001842, "loss": 2.4161, "step": 9210 }, { "epoch": 0.03564194151938272, "grad_norm": 0.23813362419605255, "learning_rate": 0.0018440000000000002, "loss": 2.4359, "step": 9220 }, { "epoch": 0.035680598722766, "grad_norm": 0.16843190789222717, "learning_rate": 0.0018460000000000002, "loss": 2.4372, "step": 9230 }, { "epoch": 0.03571925592614928, "grad_norm": 0.15540075302124023, "learning_rate": 0.001848, "loss": 2.4411, "step": 9240 }, { "epoch": 0.035757913129532554, "grad_norm": 0.17383131384849548, "learning_rate": 0.00185, "loss": 2.4345, "step": 9250 }, { "epoch": 0.035796570332915834, "grad_norm": 0.4560840129852295, "learning_rate": 0.0018520000000000001, "loss": 2.4234, "step": 9260 }, { "epoch": 0.035835227536299114, "grad_norm": 0.17813929915428162, "learning_rate": 0.0018540000000000002, "loss": 2.4411, "step": 9270 }, { "epoch": 0.03587388473968239, "grad_norm": 0.17561641335487366, "learning_rate": 0.0018560000000000002, "loss": 2.4331, "step": 9280 }, { "epoch": 0.03591254194306567, "grad_norm": 0.17263886332511902, "learning_rate": 0.001858, "loss": 2.4285, "step": 9290 }, { "epoch": 0.035951199146448946, "grad_norm": 0.1532730907201767, "learning_rate": 0.00186, "loss": 2.4331, "step": 9300 }, { "epoch": 0.035989856349832226, "grad_norm": 0.15847301483154297, "learning_rate": 0.0018620000000000002, "loss": 2.4265, "step": 9310 }, { "epoch": 0.036028513553215506, "grad_norm": 0.1659020036458969, "learning_rate": 0.0018640000000000002, "loss": 2.4202, "step": 9320 }, { "epoch": 0.036067170756598786, "grad_norm": 0.19295507669448853, "learning_rate": 0.001866, "loss": 2.4331, "step": 9330 }, { "epoch": 0.036105827959982066, "grad_norm": 0.17685222625732422, "learning_rate": 0.001868, "loss": 2.4454, "step": 9340 }, { "epoch": 0.03614448516336534, "grad_norm": 0.1478830724954605, "learning_rate": 0.0018700000000000001, "loss": 2.424, "step": 9350 }, { "epoch": 0.03618314236674862, "grad_norm": 0.1698874682188034, "learning_rate": 0.0018720000000000002, "loss": 2.4359, "step": 9360 }, { "epoch": 0.0362217995701319, "grad_norm": 0.18125659227371216, "learning_rate": 0.0018740000000000002, "loss": 2.4275, "step": 9370 }, { "epoch": 0.03626045677351518, "grad_norm": 0.18805450201034546, "learning_rate": 0.0018759999999999998, "loss": 2.4392, "step": 9380 }, { "epoch": 0.03629911397689845, "grad_norm": 0.1469375342130661, "learning_rate": 0.001878, "loss": 2.4291, "step": 9390 }, { "epoch": 0.03633777118028173, "grad_norm": 0.18483032286167145, "learning_rate": 0.00188, "loss": 2.4193, "step": 9400 }, { "epoch": 0.03637642838366501, "grad_norm": 0.16580356657505035, "learning_rate": 0.001882, "loss": 2.4092, "step": 9410 }, { "epoch": 0.03641508558704829, "grad_norm": 0.1719176173210144, "learning_rate": 0.001884, "loss": 2.4338, "step": 9420 }, { "epoch": 0.03645374279043157, "grad_norm": 0.15128475427627563, "learning_rate": 0.0018859999999999999, "loss": 2.4231, "step": 9430 }, { "epoch": 0.036492399993814845, "grad_norm": 0.16530759632587433, "learning_rate": 0.001888, "loss": 2.4234, "step": 9440 }, { "epoch": 0.036531057197198125, "grad_norm": 0.20639896392822266, "learning_rate": 0.00189, "loss": 2.423, "step": 9450 }, { "epoch": 0.036569714400581405, "grad_norm": 0.1547228842973709, "learning_rate": 0.001892, "loss": 2.426, "step": 9460 }, { "epoch": 0.036608371603964684, "grad_norm": 0.14532585442066193, "learning_rate": 0.0018939999999999999, "loss": 2.4296, "step": 9470 }, { "epoch": 0.036647028807347964, "grad_norm": 0.16781659424304962, "learning_rate": 0.001896, "loss": 2.4236, "step": 9480 }, { "epoch": 0.03668568601073124, "grad_norm": 0.18817219138145447, "learning_rate": 0.001898, "loss": 2.4281, "step": 9490 }, { "epoch": 0.03672434321411452, "grad_norm": 0.17814214527606964, "learning_rate": 0.0019, "loss": 2.4434, "step": 9500 }, { "epoch": 0.0367630004174978, "grad_norm": 0.19819213449954987, "learning_rate": 0.001902, "loss": 2.4396, "step": 9510 }, { "epoch": 0.03680165762088108, "grad_norm": 0.17489992082118988, "learning_rate": 0.0019039999999999999, "loss": 2.428, "step": 9520 }, { "epoch": 0.03684031482426435, "grad_norm": 0.19796450436115265, "learning_rate": 0.001906, "loss": 2.4439, "step": 9530 }, { "epoch": 0.03687897202764763, "grad_norm": 0.14627830684185028, "learning_rate": 0.001908, "loss": 2.4234, "step": 9540 }, { "epoch": 0.03691762923103091, "grad_norm": 0.2152869999408722, "learning_rate": 0.00191, "loss": 2.4284, "step": 9550 }, { "epoch": 0.03695628643441419, "grad_norm": 0.16523297131061554, "learning_rate": 0.0019119999999999999, "loss": 2.4311, "step": 9560 }, { "epoch": 0.03699494363779747, "grad_norm": 0.1422155797481537, "learning_rate": 0.001914, "loss": 2.4374, "step": 9570 }, { "epoch": 0.03703360084118074, "grad_norm": 0.18466395139694214, "learning_rate": 0.001916, "loss": 2.4371, "step": 9580 }, { "epoch": 0.03707225804456402, "grad_norm": 0.16606661677360535, "learning_rate": 0.001918, "loss": 2.4267, "step": 9590 }, { "epoch": 0.0371109152479473, "grad_norm": 0.15255603194236755, "learning_rate": 0.00192, "loss": 2.4182, "step": 9600 }, { "epoch": 0.03714957245133058, "grad_norm": 0.18158304691314697, "learning_rate": 0.0019219999999999999, "loss": 2.4277, "step": 9610 }, { "epoch": 0.03718822965471386, "grad_norm": 0.1524418443441391, "learning_rate": 0.001924, "loss": 2.4341, "step": 9620 }, { "epoch": 0.037226886858097136, "grad_norm": 0.16808250546455383, "learning_rate": 0.001926, "loss": 2.4383, "step": 9630 }, { "epoch": 0.037265544061480416, "grad_norm": 0.229302316904068, "learning_rate": 0.001928, "loss": 2.4321, "step": 9640 }, { "epoch": 0.037304201264863696, "grad_norm": 0.1693592667579651, "learning_rate": 0.00193, "loss": 2.4251, "step": 9650 }, { "epoch": 0.037342858468246976, "grad_norm": 0.1731642633676529, "learning_rate": 0.001932, "loss": 2.4437, "step": 9660 }, { "epoch": 0.03738151567163025, "grad_norm": 0.1623421460390091, "learning_rate": 0.001934, "loss": 2.4378, "step": 9670 }, { "epoch": 0.03742017287501353, "grad_norm": 0.12941968441009521, "learning_rate": 0.001936, "loss": 2.4288, "step": 9680 }, { "epoch": 0.03745883007839681, "grad_norm": 0.16926322877407074, "learning_rate": 0.001938, "loss": 2.4358, "step": 9690 }, { "epoch": 0.03749748728178009, "grad_norm": 0.18344028294086456, "learning_rate": 0.0019399999999999999, "loss": 2.4294, "step": 9700 }, { "epoch": 0.03753614448516337, "grad_norm": 0.1774645298719406, "learning_rate": 0.001942, "loss": 2.4438, "step": 9710 }, { "epoch": 0.03757480168854664, "grad_norm": 0.17909649014472961, "learning_rate": 0.001944, "loss": 2.4473, "step": 9720 }, { "epoch": 0.03761345889192992, "grad_norm": 0.15844593942165375, "learning_rate": 0.001946, "loss": 2.4257, "step": 9730 }, { "epoch": 0.0376521160953132, "grad_norm": 0.18717212975025177, "learning_rate": 0.001948, "loss": 2.4333, "step": 9740 }, { "epoch": 0.03769077329869648, "grad_norm": 0.163429856300354, "learning_rate": 0.00195, "loss": 2.4375, "step": 9750 }, { "epoch": 0.037729430502079754, "grad_norm": 0.15213638544082642, "learning_rate": 0.001952, "loss": 2.4264, "step": 9760 }, { "epoch": 0.037768087705463034, "grad_norm": 0.18855980038642883, "learning_rate": 0.001954, "loss": 2.4371, "step": 9770 }, { "epoch": 0.037806744908846314, "grad_norm": 0.1842171996831894, "learning_rate": 0.001956, "loss": 2.4326, "step": 9780 }, { "epoch": 0.037845402112229594, "grad_norm": 0.18620212376117706, "learning_rate": 0.001958, "loss": 2.4259, "step": 9790 }, { "epoch": 0.037884059315612874, "grad_norm": 0.15474924445152283, "learning_rate": 0.00196, "loss": 2.4241, "step": 9800 }, { "epoch": 0.03792271651899615, "grad_norm": 0.15727892518043518, "learning_rate": 0.001962, "loss": 2.416, "step": 9810 }, { "epoch": 0.03796137372237943, "grad_norm": 0.17438285052776337, "learning_rate": 0.001964, "loss": 2.4224, "step": 9820 }, { "epoch": 0.03800003092576271, "grad_norm": 0.1774299591779709, "learning_rate": 0.001966, "loss": 2.423, "step": 9830 }, { "epoch": 0.03803868812914599, "grad_norm": 0.13991932570934296, "learning_rate": 0.001968, "loss": 2.4208, "step": 9840 }, { "epoch": 0.03807734533252927, "grad_norm": 0.1998598724603653, "learning_rate": 0.00197, "loss": 2.4263, "step": 9850 }, { "epoch": 0.03811600253591254, "grad_norm": 0.18491345643997192, "learning_rate": 0.0019720000000000002, "loss": 2.4354, "step": 9860 }, { "epoch": 0.03815465973929582, "grad_norm": 0.16325430572032928, "learning_rate": 0.001974, "loss": 2.4436, "step": 9870 }, { "epoch": 0.0381933169426791, "grad_norm": 0.209353506565094, "learning_rate": 0.001976, "loss": 2.4268, "step": 9880 }, { "epoch": 0.03823197414606238, "grad_norm": 0.16570289433002472, "learning_rate": 0.001978, "loss": 2.4323, "step": 9890 }, { "epoch": 0.03827063134944565, "grad_norm": 0.17465120553970337, "learning_rate": 0.00198, "loss": 2.4478, "step": 9900 }, { "epoch": 0.03830928855282893, "grad_norm": 0.20848344266414642, "learning_rate": 0.001982, "loss": 2.4321, "step": 9910 }, { "epoch": 0.03834794575621221, "grad_norm": 0.17331746220588684, "learning_rate": 0.001984, "loss": 2.4321, "step": 9920 }, { "epoch": 0.03838660295959549, "grad_norm": 0.1472446173429489, "learning_rate": 0.001986, "loss": 2.4397, "step": 9930 }, { "epoch": 0.03842526016297877, "grad_norm": 0.18850277364253998, "learning_rate": 0.001988, "loss": 2.4395, "step": 9940 }, { "epoch": 0.038463917366362045, "grad_norm": 0.2426530122756958, "learning_rate": 0.00199, "loss": 2.4367, "step": 9950 }, { "epoch": 0.038502574569745325, "grad_norm": 0.15660598874092102, "learning_rate": 0.001992, "loss": 2.4304, "step": 9960 }, { "epoch": 0.038541231773128605, "grad_norm": 0.16251277923583984, "learning_rate": 0.001994, "loss": 2.4295, "step": 9970 }, { "epoch": 0.038579888976511885, "grad_norm": 0.20397990942001343, "learning_rate": 0.001996, "loss": 2.4516, "step": 9980 }, { "epoch": 0.038618546179895165, "grad_norm": 0.16767215728759766, "learning_rate": 0.001998, "loss": 2.4455, "step": 9990 }, { "epoch": 0.03865720338327844, "grad_norm": 0.16770479083061218, "learning_rate": 0.002, "loss": 2.4284, "step": 10000 }, { "epoch": 0.03869586058666172, "grad_norm": 0.1625438928604126, "learning_rate": 0.002, "loss": 2.4331, "step": 10010 }, { "epoch": 0.038734517790045, "grad_norm": 0.15517131984233856, "learning_rate": 0.002, "loss": 2.4339, "step": 10020 }, { "epoch": 0.03877317499342828, "grad_norm": 0.1596555858850479, "learning_rate": 0.002, "loss": 2.4216, "step": 10030 }, { "epoch": 0.03881183219681155, "grad_norm": 0.12880107760429382, "learning_rate": 0.002, "loss": 2.4336, "step": 10040 }, { "epoch": 0.03885048940019483, "grad_norm": 0.14824466407299042, "learning_rate": 0.002, "loss": 2.4342, "step": 10050 }, { "epoch": 0.03888914660357811, "grad_norm": 0.18370555341243744, "learning_rate": 0.002, "loss": 2.4277, "step": 10060 }, { "epoch": 0.03892780380696139, "grad_norm": 0.17111271619796753, "learning_rate": 0.002, "loss": 2.4253, "step": 10070 }, { "epoch": 0.03896646101034467, "grad_norm": 0.14722810685634613, "learning_rate": 0.002, "loss": 2.4024, "step": 10080 }, { "epoch": 0.03900511821372794, "grad_norm": 0.16978606581687927, "learning_rate": 0.002, "loss": 2.4232, "step": 10090 }, { "epoch": 0.03904377541711122, "grad_norm": 0.18143069744110107, "learning_rate": 0.002, "loss": 2.4441, "step": 10100 }, { "epoch": 0.0390824326204945, "grad_norm": 0.19893452525138855, "learning_rate": 0.002, "loss": 2.4417, "step": 10110 }, { "epoch": 0.03912108982387778, "grad_norm": 0.15450480580329895, "learning_rate": 0.002, "loss": 2.4324, "step": 10120 }, { "epoch": 0.03915974702726106, "grad_norm": 0.153803288936615, "learning_rate": 0.002, "loss": 2.4373, "step": 10130 }, { "epoch": 0.039198404230644336, "grad_norm": 0.14763586223125458, "learning_rate": 0.002, "loss": 2.4316, "step": 10140 }, { "epoch": 0.039237061434027616, "grad_norm": 0.15948010981082916, "learning_rate": 0.002, "loss": 2.4285, "step": 10150 }, { "epoch": 0.039275718637410896, "grad_norm": 0.16599905490875244, "learning_rate": 0.002, "loss": 2.4303, "step": 10160 }, { "epoch": 0.039314375840794176, "grad_norm": 0.16918151080608368, "learning_rate": 0.002, "loss": 2.4334, "step": 10170 }, { "epoch": 0.03935303304417745, "grad_norm": 0.1887025535106659, "learning_rate": 0.002, "loss": 2.417, "step": 10180 }, { "epoch": 0.03939169024756073, "grad_norm": 0.18586301803588867, "learning_rate": 0.002, "loss": 2.4424, "step": 10190 }, { "epoch": 0.03943034745094401, "grad_norm": 0.12432118505239487, "learning_rate": 0.002, "loss": 2.4189, "step": 10200 }, { "epoch": 0.03946900465432729, "grad_norm": 0.16509106755256653, "learning_rate": 0.002, "loss": 2.4423, "step": 10210 }, { "epoch": 0.03950766185771057, "grad_norm": 0.1833307445049286, "learning_rate": 0.002, "loss": 2.436, "step": 10220 }, { "epoch": 0.03954631906109384, "grad_norm": 0.15685471892356873, "learning_rate": 0.002, "loss": 2.4376, "step": 10230 }, { "epoch": 0.03958497626447712, "grad_norm": 0.15386788547039032, "learning_rate": 0.002, "loss": 2.4248, "step": 10240 }, { "epoch": 0.0396236334678604, "grad_norm": 0.16778796911239624, "learning_rate": 0.002, "loss": 2.4473, "step": 10250 }, { "epoch": 0.03966229067124368, "grad_norm": 0.143438458442688, "learning_rate": 0.002, "loss": 2.4363, "step": 10260 }, { "epoch": 0.03970094787462696, "grad_norm": 0.1613985002040863, "learning_rate": 0.002, "loss": 2.4474, "step": 10270 }, { "epoch": 0.039739605078010234, "grad_norm": 0.15061679482460022, "learning_rate": 0.002, "loss": 2.437, "step": 10280 }, { "epoch": 0.039778262281393514, "grad_norm": 0.16388659179210663, "learning_rate": 0.002, "loss": 2.45, "step": 10290 }, { "epoch": 0.039816919484776794, "grad_norm": 0.13617081940174103, "learning_rate": 0.002, "loss": 2.4473, "step": 10300 }, { "epoch": 0.039855576688160074, "grad_norm": 0.20315887033939362, "learning_rate": 0.002, "loss": 2.4285, "step": 10310 }, { "epoch": 0.03989423389154335, "grad_norm": 0.15514788031578064, "learning_rate": 0.002, "loss": 2.4379, "step": 10320 }, { "epoch": 0.03993289109492663, "grad_norm": 0.16580229997634888, "learning_rate": 0.002, "loss": 2.4383, "step": 10330 }, { "epoch": 0.03997154829830991, "grad_norm": 0.16074010729789734, "learning_rate": 0.002, "loss": 2.4335, "step": 10340 }, { "epoch": 0.04001020550169319, "grad_norm": 0.1457606554031372, "learning_rate": 0.002, "loss": 2.4201, "step": 10350 }, { "epoch": 0.04004886270507647, "grad_norm": 0.18716587126255035, "learning_rate": 0.002, "loss": 2.4333, "step": 10360 }, { "epoch": 0.04008751990845974, "grad_norm": 0.13773228228092194, "learning_rate": 0.002, "loss": 2.4283, "step": 10370 }, { "epoch": 0.04012617711184302, "grad_norm": 0.15122456848621368, "learning_rate": 0.002, "loss": 2.433, "step": 10380 }, { "epoch": 0.0401648343152263, "grad_norm": 0.16871850192546844, "learning_rate": 0.002, "loss": 2.4313, "step": 10390 }, { "epoch": 0.04020349151860958, "grad_norm": 0.15135620534420013, "learning_rate": 0.002, "loss": 2.4404, "step": 10400 }, { "epoch": 0.04024214872199285, "grad_norm": 0.1498553603887558, "learning_rate": 0.002, "loss": 2.4224, "step": 10410 }, { "epoch": 0.04028080592537613, "grad_norm": 0.18582572042942047, "learning_rate": 0.002, "loss": 2.4413, "step": 10420 }, { "epoch": 0.04031946312875941, "grad_norm": 0.14045722782611847, "learning_rate": 0.002, "loss": 2.429, "step": 10430 }, { "epoch": 0.04035812033214269, "grad_norm": 0.1526753455400467, "learning_rate": 0.002, "loss": 2.4451, "step": 10440 }, { "epoch": 0.04039677753552597, "grad_norm": 0.17722046375274658, "learning_rate": 0.002, "loss": 2.4205, "step": 10450 }, { "epoch": 0.040435434738909246, "grad_norm": 0.18134362995624542, "learning_rate": 0.002, "loss": 2.4421, "step": 10460 }, { "epoch": 0.040474091942292525, "grad_norm": 0.15063491463661194, "learning_rate": 0.002, "loss": 2.4135, "step": 10470 }, { "epoch": 0.040512749145675805, "grad_norm": 0.11587220430374146, "learning_rate": 0.002, "loss": 2.4407, "step": 10480 }, { "epoch": 0.040551406349059085, "grad_norm": 0.14504306018352509, "learning_rate": 0.002, "loss": 2.434, "step": 10490 }, { "epoch": 0.040590063552442365, "grad_norm": 0.15471212565898895, "learning_rate": 0.002, "loss": 2.4264, "step": 10500 }, { "epoch": 0.04062872075582564, "grad_norm": 0.15481720864772797, "learning_rate": 0.002, "loss": 2.4233, "step": 10510 }, { "epoch": 0.04066737795920892, "grad_norm": 0.14765945076942444, "learning_rate": 0.002, "loss": 2.425, "step": 10520 }, { "epoch": 0.0407060351625922, "grad_norm": 0.16183441877365112, "learning_rate": 0.002, "loss": 2.4253, "step": 10530 }, { "epoch": 0.04074469236597548, "grad_norm": 0.14015688002109528, "learning_rate": 0.002, "loss": 2.43, "step": 10540 }, { "epoch": 0.04078334956935875, "grad_norm": 0.1484346091747284, "learning_rate": 0.002, "loss": 2.4191, "step": 10550 }, { "epoch": 0.04082200677274203, "grad_norm": 0.14880962669849396, "learning_rate": 0.002, "loss": 2.4379, "step": 10560 }, { "epoch": 0.04086066397612531, "grad_norm": 0.16797910630702972, "learning_rate": 0.002, "loss": 2.4472, "step": 10570 }, { "epoch": 0.04089932117950859, "grad_norm": 0.15298014879226685, "learning_rate": 0.002, "loss": 2.4088, "step": 10580 }, { "epoch": 0.04093797838289187, "grad_norm": 0.13158120214939117, "learning_rate": 0.002, "loss": 2.4229, "step": 10590 }, { "epoch": 0.040976635586275144, "grad_norm": 0.13972997665405273, "learning_rate": 0.002, "loss": 2.4211, "step": 10600 }, { "epoch": 0.041015292789658424, "grad_norm": 0.14697618782520294, "learning_rate": 0.002, "loss": 2.4384, "step": 10610 }, { "epoch": 0.041053949993041704, "grad_norm": 0.20341096818447113, "learning_rate": 0.002, "loss": 2.428, "step": 10620 }, { "epoch": 0.041092607196424984, "grad_norm": 0.14016160368919373, "learning_rate": 0.002, "loss": 2.4391, "step": 10630 }, { "epoch": 0.041131264399808264, "grad_norm": 0.12385547906160355, "learning_rate": 0.002, "loss": 2.4199, "step": 10640 }, { "epoch": 0.04116992160319154, "grad_norm": 0.15503078699111938, "learning_rate": 0.002, "loss": 2.4098, "step": 10650 }, { "epoch": 0.04120857880657482, "grad_norm": 0.13293243944644928, "learning_rate": 0.002, "loss": 2.4147, "step": 10660 }, { "epoch": 0.041247236009958096, "grad_norm": 0.14536736905574799, "learning_rate": 0.002, "loss": 2.4226, "step": 10670 }, { "epoch": 0.041285893213341376, "grad_norm": 0.14836855232715607, "learning_rate": 0.002, "loss": 2.4319, "step": 10680 }, { "epoch": 0.04132455041672465, "grad_norm": 0.15207520127296448, "learning_rate": 0.002, "loss": 2.4189, "step": 10690 }, { "epoch": 0.04136320762010793, "grad_norm": 0.15331625938415527, "learning_rate": 0.002, "loss": 2.4086, "step": 10700 }, { "epoch": 0.04140186482349121, "grad_norm": 0.16277608275413513, "learning_rate": 0.002, "loss": 2.4285, "step": 10710 }, { "epoch": 0.04144052202687449, "grad_norm": 0.13291038572788239, "learning_rate": 0.002, "loss": 2.4092, "step": 10720 }, { "epoch": 0.04147917923025777, "grad_norm": 0.13805748522281647, "learning_rate": 0.002, "loss": 2.413, "step": 10730 }, { "epoch": 0.04151783643364104, "grad_norm": 0.16852214932441711, "learning_rate": 0.002, "loss": 2.4142, "step": 10740 }, { "epoch": 0.04155649363702432, "grad_norm": 0.1524326652288437, "learning_rate": 0.002, "loss": 2.4165, "step": 10750 }, { "epoch": 0.0415951508404076, "grad_norm": 0.17319968342781067, "learning_rate": 0.002, "loss": 2.4216, "step": 10760 }, { "epoch": 0.04163380804379088, "grad_norm": 0.1453074961900711, "learning_rate": 0.002, "loss": 2.4352, "step": 10770 }, { "epoch": 0.04167246524717416, "grad_norm": 0.1632017195224762, "learning_rate": 0.002, "loss": 2.3946, "step": 10780 }, { "epoch": 0.041711122450557435, "grad_norm": 0.15487101674079895, "learning_rate": 0.002, "loss": 2.4321, "step": 10790 }, { "epoch": 0.041749779653940715, "grad_norm": 0.14366376399993896, "learning_rate": 0.002, "loss": 2.4209, "step": 10800 }, { "epoch": 0.041788436857323995, "grad_norm": 0.1451006531715393, "learning_rate": 0.002, "loss": 2.4191, "step": 10810 }, { "epoch": 0.041827094060707275, "grad_norm": 0.18275074660778046, "learning_rate": 0.002, "loss": 2.4141, "step": 10820 }, { "epoch": 0.04186575126409055, "grad_norm": 0.14227454364299774, "learning_rate": 0.002, "loss": 2.4291, "step": 10830 }, { "epoch": 0.04190440846747383, "grad_norm": 0.14518596231937408, "learning_rate": 0.002, "loss": 2.4179, "step": 10840 }, { "epoch": 0.04194306567085711, "grad_norm": 0.17375314235687256, "learning_rate": 0.002, "loss": 2.4294, "step": 10850 }, { "epoch": 0.04198172287424039, "grad_norm": 0.1337478905916214, "learning_rate": 0.002, "loss": 2.4362, "step": 10860 }, { "epoch": 0.04202038007762367, "grad_norm": 0.16233932971954346, "learning_rate": 0.002, "loss": 2.3947, "step": 10870 }, { "epoch": 0.04205903728100694, "grad_norm": 0.1352071315050125, "learning_rate": 0.002, "loss": 2.4216, "step": 10880 }, { "epoch": 0.04209769448439022, "grad_norm": 0.14264950156211853, "learning_rate": 0.002, "loss": 2.4258, "step": 10890 }, { "epoch": 0.0421363516877735, "grad_norm": 0.14911723136901855, "learning_rate": 0.002, "loss": 2.4161, "step": 10900 }, { "epoch": 0.04217500889115678, "grad_norm": 0.1300041824579239, "learning_rate": 0.002, "loss": 2.4483, "step": 10910 }, { "epoch": 0.04221366609454005, "grad_norm": 0.14911404252052307, "learning_rate": 0.002, "loss": 2.4284, "step": 10920 }, { "epoch": 0.04225232329792333, "grad_norm": 0.14238053560256958, "learning_rate": 0.002, "loss": 2.4336, "step": 10930 }, { "epoch": 0.04229098050130661, "grad_norm": 0.19582848250865936, "learning_rate": 0.002, "loss": 2.4228, "step": 10940 }, { "epoch": 0.04232963770468989, "grad_norm": 0.1654808223247528, "learning_rate": 0.002, "loss": 2.4154, "step": 10950 }, { "epoch": 0.04236829490807317, "grad_norm": 0.15952356159687042, "learning_rate": 0.002, "loss": 2.4204, "step": 10960 }, { "epoch": 0.042406952111456446, "grad_norm": 0.1382412314414978, "learning_rate": 0.002, "loss": 2.4181, "step": 10970 }, { "epoch": 0.042445609314839726, "grad_norm": 0.1486886739730835, "learning_rate": 0.002, "loss": 2.4178, "step": 10980 }, { "epoch": 0.042484266518223006, "grad_norm": 0.1526196449995041, "learning_rate": 0.002, "loss": 2.4201, "step": 10990 }, { "epoch": 0.042522923721606286, "grad_norm": 0.1615477204322815, "learning_rate": 0.002, "loss": 2.4353, "step": 11000 }, { "epoch": 0.042561580924989566, "grad_norm": 0.15322604775428772, "learning_rate": 0.002, "loss": 2.4149, "step": 11010 }, { "epoch": 0.04260023812837284, "grad_norm": 0.1561826914548874, "learning_rate": 0.002, "loss": 2.4404, "step": 11020 }, { "epoch": 0.04263889533175612, "grad_norm": 0.13902229070663452, "learning_rate": 0.002, "loss": 2.4336, "step": 11030 }, { "epoch": 0.0426775525351394, "grad_norm": 0.14555348455905914, "learning_rate": 0.002, "loss": 2.4139, "step": 11040 }, { "epoch": 0.04271620973852268, "grad_norm": 0.13210159540176392, "learning_rate": 0.002, "loss": 2.4143, "step": 11050 }, { "epoch": 0.04275486694190595, "grad_norm": 0.15930576622486115, "learning_rate": 0.002, "loss": 2.4186, "step": 11060 }, { "epoch": 0.04279352414528923, "grad_norm": 0.14124396443367004, "learning_rate": 0.002, "loss": 2.4213, "step": 11070 }, { "epoch": 0.04283218134867251, "grad_norm": 0.15243567526340485, "learning_rate": 0.002, "loss": 2.4228, "step": 11080 }, { "epoch": 0.04287083855205579, "grad_norm": 0.1674254685640335, "learning_rate": 0.002, "loss": 2.4153, "step": 11090 }, { "epoch": 0.04290949575543907, "grad_norm": 0.1357090175151825, "learning_rate": 0.002, "loss": 2.4278, "step": 11100 }, { "epoch": 0.042948152958822344, "grad_norm": 0.16976813971996307, "learning_rate": 0.002, "loss": 2.4142, "step": 11110 }, { "epoch": 0.042986810162205624, "grad_norm": 0.14292575418949127, "learning_rate": 0.002, "loss": 2.4299, "step": 11120 }, { "epoch": 0.043025467365588904, "grad_norm": 0.13828745484352112, "learning_rate": 0.002, "loss": 2.4205, "step": 11130 }, { "epoch": 0.043064124568972184, "grad_norm": 0.1772737056016922, "learning_rate": 0.002, "loss": 2.4211, "step": 11140 }, { "epoch": 0.043102781772355464, "grad_norm": 0.15409092605113983, "learning_rate": 0.002, "loss": 2.4308, "step": 11150 }, { "epoch": 0.04314143897573874, "grad_norm": 0.14300651848316193, "learning_rate": 0.002, "loss": 2.4007, "step": 11160 }, { "epoch": 0.04318009617912202, "grad_norm": 0.15811540186405182, "learning_rate": 0.002, "loss": 2.4217, "step": 11170 }, { "epoch": 0.0432187533825053, "grad_norm": 0.11724254488945007, "learning_rate": 0.002, "loss": 2.4236, "step": 11180 }, { "epoch": 0.04325741058588858, "grad_norm": 0.15714186429977417, "learning_rate": 0.002, "loss": 2.4145, "step": 11190 }, { "epoch": 0.04329606778927185, "grad_norm": 0.13312242925167084, "learning_rate": 0.002, "loss": 2.4184, "step": 11200 }, { "epoch": 0.04333472499265513, "grad_norm": 0.19631125032901764, "learning_rate": 0.002, "loss": 2.4347, "step": 11210 }, { "epoch": 0.04337338219603841, "grad_norm": 0.12227917462587357, "learning_rate": 0.002, "loss": 2.4227, "step": 11220 }, { "epoch": 0.04341203939942169, "grad_norm": 0.1563911885023117, "learning_rate": 0.002, "loss": 2.4227, "step": 11230 }, { "epoch": 0.04345069660280497, "grad_norm": 0.14006571471691132, "learning_rate": 0.002, "loss": 2.4161, "step": 11240 }, { "epoch": 0.04348935380618824, "grad_norm": 0.1672613024711609, "learning_rate": 0.002, "loss": 2.4058, "step": 11250 }, { "epoch": 0.04352801100957152, "grad_norm": 0.16477328538894653, "learning_rate": 0.002, "loss": 2.4084, "step": 11260 }, { "epoch": 0.0435666682129548, "grad_norm": 0.12407759577035904, "learning_rate": 0.002, "loss": 2.4317, "step": 11270 }, { "epoch": 0.04360532541633808, "grad_norm": 0.15415887534618378, "learning_rate": 0.002, "loss": 2.4296, "step": 11280 }, { "epoch": 0.04364398261972136, "grad_norm": 0.12192781269550323, "learning_rate": 0.002, "loss": 2.3992, "step": 11290 }, { "epoch": 0.043682639823104635, "grad_norm": 0.14742381870746613, "learning_rate": 0.002, "loss": 2.4225, "step": 11300 }, { "epoch": 0.043721297026487915, "grad_norm": 0.15054261684417725, "learning_rate": 0.002, "loss": 2.4257, "step": 11310 }, { "epoch": 0.043759954229871195, "grad_norm": 0.1351064294576645, "learning_rate": 0.002, "loss": 2.4023, "step": 11320 }, { "epoch": 0.043798611433254475, "grad_norm": 0.11836264282464981, "learning_rate": 0.002, "loss": 2.4129, "step": 11330 }, { "epoch": 0.04383726863663775, "grad_norm": 0.1980191022157669, "learning_rate": 0.002, "loss": 2.4123, "step": 11340 }, { "epoch": 0.04387592584002103, "grad_norm": 0.16777260601520538, "learning_rate": 0.002, "loss": 2.3954, "step": 11350 }, { "epoch": 0.04391458304340431, "grad_norm": 0.15461204946041107, "learning_rate": 0.002, "loss": 2.4146, "step": 11360 }, { "epoch": 0.04395324024678759, "grad_norm": 0.12209227681159973, "learning_rate": 0.002, "loss": 2.4198, "step": 11370 }, { "epoch": 0.04399189745017087, "grad_norm": 0.14516107738018036, "learning_rate": 0.002, "loss": 2.4273, "step": 11380 }, { "epoch": 0.04403055465355414, "grad_norm": 0.41329941153526306, "learning_rate": 0.002, "loss": 2.4405, "step": 11390 }, { "epoch": 0.04406921185693742, "grad_norm": 0.18117031455039978, "learning_rate": 0.002, "loss": 2.4214, "step": 11400 }, { "epoch": 0.0441078690603207, "grad_norm": 0.14210830628871918, "learning_rate": 0.002, "loss": 2.4205, "step": 11410 }, { "epoch": 0.04414652626370398, "grad_norm": 0.14302709698677063, "learning_rate": 0.002, "loss": 2.4262, "step": 11420 }, { "epoch": 0.044185183467087254, "grad_norm": 0.13019420206546783, "learning_rate": 0.002, "loss": 2.4177, "step": 11430 }, { "epoch": 0.044223840670470534, "grad_norm": 0.1916237622499466, "learning_rate": 0.002, "loss": 2.4143, "step": 11440 }, { "epoch": 0.044262497873853814, "grad_norm": 0.1374051868915558, "learning_rate": 0.002, "loss": 2.4145, "step": 11450 }, { "epoch": 0.044301155077237094, "grad_norm": 0.13074587285518646, "learning_rate": 0.002, "loss": 2.4126, "step": 11460 }, { "epoch": 0.04433981228062037, "grad_norm": 0.15300016105175018, "learning_rate": 0.002, "loss": 2.4224, "step": 11470 }, { "epoch": 0.044378469484003646, "grad_norm": 0.12504509091377258, "learning_rate": 0.002, "loss": 2.4172, "step": 11480 }, { "epoch": 0.044417126687386926, "grad_norm": 0.13933181762695312, "learning_rate": 0.002, "loss": 2.4227, "step": 11490 }, { "epoch": 0.044455783890770206, "grad_norm": 0.12378118932247162, "learning_rate": 0.002, "loss": 2.428, "step": 11500 }, { "epoch": 0.044494441094153486, "grad_norm": 0.15414609014987946, "learning_rate": 0.002, "loss": 2.3966, "step": 11510 }, { "epoch": 0.044533098297536766, "grad_norm": 0.11848758161067963, "learning_rate": 0.002, "loss": 2.3994, "step": 11520 }, { "epoch": 0.04457175550092004, "grad_norm": 0.1424514651298523, "learning_rate": 0.002, "loss": 2.4035, "step": 11530 }, { "epoch": 0.04461041270430332, "grad_norm": 0.121200330555439, "learning_rate": 0.002, "loss": 2.4232, "step": 11540 }, { "epoch": 0.0446490699076866, "grad_norm": 0.15000300109386444, "learning_rate": 0.002, "loss": 2.4292, "step": 11550 }, { "epoch": 0.04468772711106988, "grad_norm": 0.14829818904399872, "learning_rate": 0.002, "loss": 2.4114, "step": 11560 }, { "epoch": 0.04472638431445315, "grad_norm": 0.12678086757659912, "learning_rate": 0.002, "loss": 2.4287, "step": 11570 }, { "epoch": 0.04476504151783643, "grad_norm": 0.1372426301240921, "learning_rate": 0.002, "loss": 2.4197, "step": 11580 }, { "epoch": 0.04480369872121971, "grad_norm": 0.15566550195217133, "learning_rate": 0.002, "loss": 2.3989, "step": 11590 }, { "epoch": 0.04484235592460299, "grad_norm": 0.13825489580631256, "learning_rate": 0.002, "loss": 2.4205, "step": 11600 }, { "epoch": 0.04488101312798627, "grad_norm": 0.14873427152633667, "learning_rate": 0.002, "loss": 2.4188, "step": 11610 }, { "epoch": 0.044919670331369545, "grad_norm": 0.14977797865867615, "learning_rate": 0.002, "loss": 2.4181, "step": 11620 }, { "epoch": 0.044958327534752825, "grad_norm": 0.13811765611171722, "learning_rate": 0.002, "loss": 2.4092, "step": 11630 }, { "epoch": 0.044996984738136105, "grad_norm": 0.1411823183298111, "learning_rate": 0.002, "loss": 2.4149, "step": 11640 }, { "epoch": 0.045035641941519385, "grad_norm": 0.13481740653514862, "learning_rate": 0.002, "loss": 2.3933, "step": 11650 }, { "epoch": 0.045074299144902664, "grad_norm": 0.12790456414222717, "learning_rate": 0.002, "loss": 2.4176, "step": 11660 }, { "epoch": 0.04511295634828594, "grad_norm": 0.16515801846981049, "learning_rate": 0.002, "loss": 2.4253, "step": 11670 }, { "epoch": 0.04515161355166922, "grad_norm": 0.1256054937839508, "learning_rate": 0.002, "loss": 2.4085, "step": 11680 }, { "epoch": 0.0451902707550525, "grad_norm": 0.14855296909809113, "learning_rate": 0.002, "loss": 2.3995, "step": 11690 }, { "epoch": 0.04522892795843578, "grad_norm": 0.1553608626127243, "learning_rate": 0.002, "loss": 2.4157, "step": 11700 }, { "epoch": 0.04526758516181905, "grad_norm": 0.12629066407680511, "learning_rate": 0.002, "loss": 2.4115, "step": 11710 }, { "epoch": 0.04530624236520233, "grad_norm": 0.17862139642238617, "learning_rate": 0.002, "loss": 2.423, "step": 11720 }, { "epoch": 0.04534489956858561, "grad_norm": 0.1498710960149765, "learning_rate": 0.002, "loss": 2.421, "step": 11730 }, { "epoch": 0.04538355677196889, "grad_norm": 0.11899344623088837, "learning_rate": 0.002, "loss": 2.4171, "step": 11740 }, { "epoch": 0.04542221397535217, "grad_norm": 0.1494816094636917, "learning_rate": 0.002, "loss": 2.4111, "step": 11750 }, { "epoch": 0.04546087117873544, "grad_norm": 0.13252732157707214, "learning_rate": 0.002, "loss": 2.4128, "step": 11760 }, { "epoch": 0.04549952838211872, "grad_norm": 0.14148776233196259, "learning_rate": 0.002, "loss": 2.4192, "step": 11770 }, { "epoch": 0.045538185585502, "grad_norm": 0.15762783586978912, "learning_rate": 0.002, "loss": 2.4446, "step": 11780 }, { "epoch": 0.04557684278888528, "grad_norm": 0.16230829060077667, "learning_rate": 0.002, "loss": 2.4388, "step": 11790 }, { "epoch": 0.04561549999226856, "grad_norm": 0.11253784596920013, "learning_rate": 0.002, "loss": 2.4006, "step": 11800 }, { "epoch": 0.045654157195651836, "grad_norm": 0.1605105847120285, "learning_rate": 0.002, "loss": 2.4076, "step": 11810 }, { "epoch": 0.045692814399035116, "grad_norm": 0.1744198352098465, "learning_rate": 0.002, "loss": 2.3956, "step": 11820 }, { "epoch": 0.045731471602418396, "grad_norm": 0.6021363139152527, "learning_rate": 0.002, "loss": 2.4181, "step": 11830 }, { "epoch": 0.045770128805801676, "grad_norm": 0.13516347110271454, "learning_rate": 0.002, "loss": 2.4205, "step": 11840 }, { "epoch": 0.04580878600918495, "grad_norm": 0.1612042635679245, "learning_rate": 0.002, "loss": 2.4085, "step": 11850 }, { "epoch": 0.04584744321256823, "grad_norm": 0.11100103706121445, "learning_rate": 0.002, "loss": 2.4116, "step": 11860 }, { "epoch": 0.04588610041595151, "grad_norm": 0.1537276953458786, "learning_rate": 0.002, "loss": 2.4081, "step": 11870 }, { "epoch": 0.04592475761933479, "grad_norm": 0.1528160721063614, "learning_rate": 0.002, "loss": 2.4149, "step": 11880 }, { "epoch": 0.04596341482271807, "grad_norm": 0.12817005813121796, "learning_rate": 0.002, "loss": 2.4131, "step": 11890 }, { "epoch": 0.04600207202610134, "grad_norm": 0.15865662693977356, "learning_rate": 0.002, "loss": 2.4008, "step": 11900 }, { "epoch": 0.04604072922948462, "grad_norm": 0.1338355392217636, "learning_rate": 0.002, "loss": 2.4311, "step": 11910 }, { "epoch": 0.0460793864328679, "grad_norm": 0.11291113495826721, "learning_rate": 0.002, "loss": 2.4127, "step": 11920 }, { "epoch": 0.04611804363625118, "grad_norm": 0.1465831995010376, "learning_rate": 0.002, "loss": 2.4008, "step": 11930 }, { "epoch": 0.046156700839634454, "grad_norm": 0.13816089928150177, "learning_rate": 0.002, "loss": 2.4221, "step": 11940 }, { "epoch": 0.046195358043017734, "grad_norm": 0.13582506775856018, "learning_rate": 0.002, "loss": 2.4248, "step": 11950 }, { "epoch": 0.046234015246401014, "grad_norm": 0.15690980851650238, "learning_rate": 0.002, "loss": 2.4055, "step": 11960 }, { "epoch": 0.046272672449784294, "grad_norm": 0.1282709687948227, "learning_rate": 0.002, "loss": 2.4258, "step": 11970 }, { "epoch": 0.046311329653167574, "grad_norm": 0.12684884667396545, "learning_rate": 0.002, "loss": 2.4119, "step": 11980 }, { "epoch": 0.04634998685655085, "grad_norm": 0.17777405679225922, "learning_rate": 0.002, "loss": 2.4131, "step": 11990 }, { "epoch": 0.04638864405993413, "grad_norm": 0.13052219152450562, "learning_rate": 0.002, "loss": 2.3977, "step": 12000 }, { "epoch": 0.04642730126331741, "grad_norm": 0.14413100481033325, "learning_rate": 0.002, "loss": 2.4178, "step": 12010 }, { "epoch": 0.04646595846670069, "grad_norm": 0.14420953392982483, "learning_rate": 0.002, "loss": 2.4071, "step": 12020 }, { "epoch": 0.04650461567008397, "grad_norm": 0.14926595985889435, "learning_rate": 0.002, "loss": 2.4098, "step": 12030 }, { "epoch": 0.04654327287346724, "grad_norm": 0.12959520518779755, "learning_rate": 0.002, "loss": 2.4039, "step": 12040 }, { "epoch": 0.04658193007685052, "grad_norm": 0.1247779130935669, "learning_rate": 0.002, "loss": 2.411, "step": 12050 }, { "epoch": 0.0466205872802338, "grad_norm": 0.1487448364496231, "learning_rate": 0.002, "loss": 2.4128, "step": 12060 }, { "epoch": 0.04665924448361708, "grad_norm": 0.1244499534368515, "learning_rate": 0.002, "loss": 2.4115, "step": 12070 }, { "epoch": 0.04669790168700035, "grad_norm": 0.12959147989749908, "learning_rate": 0.002, "loss": 2.4005, "step": 12080 }, { "epoch": 0.04673655889038363, "grad_norm": 0.1252502202987671, "learning_rate": 0.002, "loss": 2.4081, "step": 12090 }, { "epoch": 0.04677521609376691, "grad_norm": 0.11847937107086182, "learning_rate": 0.002, "loss": 2.4062, "step": 12100 }, { "epoch": 0.04681387329715019, "grad_norm": 0.13584916293621063, "learning_rate": 0.002, "loss": 2.402, "step": 12110 }, { "epoch": 0.04685253050053347, "grad_norm": 0.1508847028017044, "learning_rate": 0.002, "loss": 2.4048, "step": 12120 }, { "epoch": 0.046891187703916745, "grad_norm": 0.12016989290714264, "learning_rate": 0.002, "loss": 2.4023, "step": 12130 }, { "epoch": 0.046929844907300025, "grad_norm": 0.1213717982172966, "learning_rate": 0.002, "loss": 2.4051, "step": 12140 }, { "epoch": 0.046968502110683305, "grad_norm": 0.14406681060791016, "learning_rate": 0.002, "loss": 2.3983, "step": 12150 }, { "epoch": 0.047007159314066585, "grad_norm": 0.16102002561092377, "learning_rate": 0.002, "loss": 2.3987, "step": 12160 }, { "epoch": 0.047045816517449865, "grad_norm": 0.11670637875795364, "learning_rate": 0.002, "loss": 2.4169, "step": 12170 }, { "epoch": 0.04708447372083314, "grad_norm": 0.1357085108757019, "learning_rate": 0.002, "loss": 2.4074, "step": 12180 }, { "epoch": 0.04712313092421642, "grad_norm": 0.15811742842197418, "learning_rate": 0.002, "loss": 2.3974, "step": 12190 }, { "epoch": 0.0471617881275997, "grad_norm": 0.12981584668159485, "learning_rate": 0.002, "loss": 2.4265, "step": 12200 }, { "epoch": 0.04720044533098298, "grad_norm": 0.1324855387210846, "learning_rate": 0.002, "loss": 2.4064, "step": 12210 }, { "epoch": 0.04723910253436625, "grad_norm": 0.14152173697948456, "learning_rate": 0.002, "loss": 2.4178, "step": 12220 }, { "epoch": 0.04727775973774953, "grad_norm": 0.12873715162277222, "learning_rate": 0.002, "loss": 2.4109, "step": 12230 }, { "epoch": 0.04731641694113281, "grad_norm": 0.12940950691699982, "learning_rate": 0.002, "loss": 2.4215, "step": 12240 }, { "epoch": 0.04735507414451609, "grad_norm": 0.1321355104446411, "learning_rate": 0.002, "loss": 2.4134, "step": 12250 }, { "epoch": 0.04739373134789937, "grad_norm": 0.16091695427894592, "learning_rate": 0.002, "loss": 2.3976, "step": 12260 }, { "epoch": 0.047432388551282643, "grad_norm": 0.14886890351772308, "learning_rate": 0.002, "loss": 2.4064, "step": 12270 }, { "epoch": 0.04747104575466592, "grad_norm": 0.13141034543514252, "learning_rate": 0.002, "loss": 2.3986, "step": 12280 }, { "epoch": 0.0475097029580492, "grad_norm": 0.1348421722650528, "learning_rate": 0.002, "loss": 2.4091, "step": 12290 }, { "epoch": 0.04754836016143248, "grad_norm": 0.1288251429796219, "learning_rate": 0.002, "loss": 2.4095, "step": 12300 }, { "epoch": 0.04758701736481576, "grad_norm": 0.16734634339809418, "learning_rate": 0.002, "loss": 2.4137, "step": 12310 }, { "epoch": 0.047625674568199036, "grad_norm": 0.1711134910583496, "learning_rate": 0.002, "loss": 2.4112, "step": 12320 }, { "epoch": 0.047664331771582316, "grad_norm": 0.12048466503620148, "learning_rate": 0.002, "loss": 2.4302, "step": 12330 }, { "epoch": 0.047702988974965596, "grad_norm": 0.13013233244419098, "learning_rate": 0.002, "loss": 2.4024, "step": 12340 }, { "epoch": 0.047741646178348876, "grad_norm": 0.14628617465496063, "learning_rate": 0.002, "loss": 2.4207, "step": 12350 }, { "epoch": 0.04778030338173215, "grad_norm": 0.11881358176469803, "learning_rate": 0.002, "loss": 2.4089, "step": 12360 }, { "epoch": 0.04781896058511543, "grad_norm": 0.14722537994384766, "learning_rate": 0.002, "loss": 2.4204, "step": 12370 }, { "epoch": 0.04785761778849871, "grad_norm": 0.1352567970752716, "learning_rate": 0.002, "loss": 2.4002, "step": 12380 }, { "epoch": 0.04789627499188199, "grad_norm": 0.13809798657894135, "learning_rate": 0.002, "loss": 2.4103, "step": 12390 }, { "epoch": 0.04793493219526527, "grad_norm": 0.134647399187088, "learning_rate": 0.002, "loss": 2.4146, "step": 12400 }, { "epoch": 0.04797358939864854, "grad_norm": 0.13529935479164124, "learning_rate": 0.002, "loss": 2.4162, "step": 12410 }, { "epoch": 0.04801224660203182, "grad_norm": 0.14426128566265106, "learning_rate": 0.002, "loss": 2.4014, "step": 12420 }, { "epoch": 0.0480509038054151, "grad_norm": 0.1167021095752716, "learning_rate": 0.002, "loss": 2.425, "step": 12430 }, { "epoch": 0.04808956100879838, "grad_norm": 0.16464290022850037, "learning_rate": 0.002, "loss": 2.4127, "step": 12440 }, { "epoch": 0.048128218212181655, "grad_norm": 0.1276850402355194, "learning_rate": 0.002, "loss": 2.4103, "step": 12450 }, { "epoch": 0.048166875415564935, "grad_norm": 0.14743928611278534, "learning_rate": 0.002, "loss": 2.4335, "step": 12460 }, { "epoch": 0.048205532618948214, "grad_norm": 0.11716633290052414, "learning_rate": 0.002, "loss": 2.4171, "step": 12470 }, { "epoch": 0.048244189822331494, "grad_norm": 0.12930072844028473, "learning_rate": 0.002, "loss": 2.4085, "step": 12480 }, { "epoch": 0.048282847025714774, "grad_norm": 0.16280917823314667, "learning_rate": 0.002, "loss": 2.4118, "step": 12490 }, { "epoch": 0.04832150422909805, "grad_norm": 0.1280793398618698, "learning_rate": 0.002, "loss": 2.3931, "step": 12500 }, { "epoch": 0.04836016143248133, "grad_norm": 0.12769965827465057, "learning_rate": 0.002, "loss": 2.4027, "step": 12510 }, { "epoch": 0.04839881863586461, "grad_norm": 0.21294499933719635, "learning_rate": 0.002, "loss": 2.4061, "step": 12520 }, { "epoch": 0.04843747583924789, "grad_norm": 0.13654080033302307, "learning_rate": 0.002, "loss": 2.4085, "step": 12530 }, { "epoch": 0.04847613304263117, "grad_norm": 0.1464715600013733, "learning_rate": 0.002, "loss": 2.3967, "step": 12540 }, { "epoch": 0.04851479024601444, "grad_norm": 0.17120212316513062, "learning_rate": 0.002, "loss": 2.3977, "step": 12550 }, { "epoch": 0.04855344744939772, "grad_norm": 0.12013162672519684, "learning_rate": 0.002, "loss": 2.4128, "step": 12560 }, { "epoch": 0.048592104652781, "grad_norm": 0.12653093039989471, "learning_rate": 0.002, "loss": 2.4259, "step": 12570 }, { "epoch": 0.04863076185616428, "grad_norm": 0.12446767836809158, "learning_rate": 0.002, "loss": 2.4048, "step": 12580 }, { "epoch": 0.04866941905954755, "grad_norm": 0.12352242320775986, "learning_rate": 0.002, "loss": 2.3932, "step": 12590 }, { "epoch": 0.04870807626293083, "grad_norm": 0.1217745840549469, "learning_rate": 0.002, "loss": 2.4092, "step": 12600 }, { "epoch": 0.04874673346631411, "grad_norm": 0.11448900401592255, "learning_rate": 0.002, "loss": 2.4002, "step": 12610 }, { "epoch": 0.04878539066969739, "grad_norm": 0.16743631660938263, "learning_rate": 0.002, "loss": 2.4037, "step": 12620 }, { "epoch": 0.04882404787308067, "grad_norm": 0.1504344940185547, "learning_rate": 0.002, "loss": 2.4046, "step": 12630 }, { "epoch": 0.048862705076463946, "grad_norm": 0.12713049352169037, "learning_rate": 0.002, "loss": 2.4006, "step": 12640 }, { "epoch": 0.048901362279847226, "grad_norm": 0.15592017769813538, "learning_rate": 0.002, "loss": 2.3979, "step": 12650 }, { "epoch": 0.048940019483230505, "grad_norm": 0.13741640746593475, "learning_rate": 0.002, "loss": 2.406, "step": 12660 }, { "epoch": 0.048978676686613785, "grad_norm": 0.11524946987628937, "learning_rate": 0.002, "loss": 2.4036, "step": 12670 }, { "epoch": 0.049017333889997065, "grad_norm": 0.15274588763713837, "learning_rate": 0.002, "loss": 2.4096, "step": 12680 }, { "epoch": 0.04905599109338034, "grad_norm": 0.12144805490970612, "learning_rate": 0.002, "loss": 2.4143, "step": 12690 }, { "epoch": 0.04909464829676362, "grad_norm": 0.12664386630058289, "learning_rate": 0.002, "loss": 2.3967, "step": 12700 }, { "epoch": 0.0491333055001469, "grad_norm": 0.15074361860752106, "learning_rate": 0.002, "loss": 2.3987, "step": 12710 }, { "epoch": 0.04917196270353018, "grad_norm": 0.12998323142528534, "learning_rate": 0.002, "loss": 2.4158, "step": 12720 }, { "epoch": 0.04921061990691345, "grad_norm": 0.12403959035873413, "learning_rate": 0.002, "loss": 2.4115, "step": 12730 }, { "epoch": 0.04924927711029673, "grad_norm": 0.12004856020212173, "learning_rate": 0.002, "loss": 2.398, "step": 12740 }, { "epoch": 0.04928793431368001, "grad_norm": 0.12569855153560638, "learning_rate": 0.002, "loss": 2.414, "step": 12750 }, { "epoch": 0.04932659151706329, "grad_norm": 0.10845328122377396, "learning_rate": 0.002, "loss": 2.4085, "step": 12760 }, { "epoch": 0.04936524872044657, "grad_norm": 0.12075639516115189, "learning_rate": 0.002, "loss": 2.4115, "step": 12770 }, { "epoch": 0.049403905923829844, "grad_norm": 0.1443193107843399, "learning_rate": 0.002, "loss": 2.4136, "step": 12780 }, { "epoch": 0.049442563127213124, "grad_norm": 0.131768599152565, "learning_rate": 0.002, "loss": 2.3755, "step": 12790 }, { "epoch": 0.049481220330596404, "grad_norm": 0.12630602717399597, "learning_rate": 0.002, "loss": 2.4034, "step": 12800 }, { "epoch": 0.049519877533979684, "grad_norm": 0.13021297752857208, "learning_rate": 0.002, "loss": 2.3968, "step": 12810 }, { "epoch": 0.049558534737362964, "grad_norm": 0.12939077615737915, "learning_rate": 0.002, "loss": 2.4204, "step": 12820 }, { "epoch": 0.04959719194074624, "grad_norm": 0.13959570229053497, "learning_rate": 0.002, "loss": 2.4009, "step": 12830 }, { "epoch": 0.04963584914412952, "grad_norm": 0.12185297906398773, "learning_rate": 0.002, "loss": 2.4016, "step": 12840 }, { "epoch": 0.0496745063475128, "grad_norm": 0.12302763015031815, "learning_rate": 0.002, "loss": 2.4199, "step": 12850 }, { "epoch": 0.049713163550896076, "grad_norm": 0.13255852460861206, "learning_rate": 0.002, "loss": 2.3924, "step": 12860 }, { "epoch": 0.04975182075427935, "grad_norm": 0.1301736682653427, "learning_rate": 0.002, "loss": 2.4136, "step": 12870 }, { "epoch": 0.04979047795766263, "grad_norm": 0.12346645444631577, "learning_rate": 0.002, "loss": 2.4028, "step": 12880 }, { "epoch": 0.04982913516104591, "grad_norm": 0.12624023854732513, "learning_rate": 0.002, "loss": 2.4112, "step": 12890 }, { "epoch": 0.04986779236442919, "grad_norm": 0.13940802216529846, "learning_rate": 0.002, "loss": 2.4061, "step": 12900 }, { "epoch": 0.04990644956781247, "grad_norm": 0.12112889438867569, "learning_rate": 0.002, "loss": 2.405, "step": 12910 }, { "epoch": 0.04994510677119574, "grad_norm": 0.1299026608467102, "learning_rate": 0.002, "loss": 2.3977, "step": 12920 }, { "epoch": 0.04998376397457902, "grad_norm": 0.1561487466096878, "learning_rate": 0.002, "loss": 2.4104, "step": 12930 }, { "epoch": 0.0500224211779623, "grad_norm": 0.11744363605976105, "learning_rate": 0.002, "loss": 2.3941, "step": 12940 }, { "epoch": 0.05006107838134558, "grad_norm": 0.1262972503900528, "learning_rate": 0.002, "loss": 2.4013, "step": 12950 }, { "epoch": 0.050099735584728855, "grad_norm": 0.1282545030117035, "learning_rate": 0.002, "loss": 2.4071, "step": 12960 }, { "epoch": 0.050138392788112135, "grad_norm": 0.1636199951171875, "learning_rate": 0.002, "loss": 2.3944, "step": 12970 }, { "epoch": 0.050177049991495415, "grad_norm": 0.11423587054014206, "learning_rate": 0.002, "loss": 2.4059, "step": 12980 }, { "epoch": 0.050215707194878695, "grad_norm": 0.1306290328502655, "learning_rate": 0.002, "loss": 2.4006, "step": 12990 }, { "epoch": 0.050254364398261975, "grad_norm": 0.1477252095937729, "learning_rate": 0.002, "loss": 2.3894, "step": 13000 }, { "epoch": 0.05029302160164525, "grad_norm": 0.1320551335811615, "learning_rate": 0.002, "loss": 2.4272, "step": 13010 }, { "epoch": 0.05033167880502853, "grad_norm": 0.14027470350265503, "learning_rate": 0.002, "loss": 2.4151, "step": 13020 }, { "epoch": 0.05037033600841181, "grad_norm": 0.1384015679359436, "learning_rate": 0.002, "loss": 2.4055, "step": 13030 }, { "epoch": 0.05040899321179509, "grad_norm": 0.13307714462280273, "learning_rate": 0.002, "loss": 2.3914, "step": 13040 }, { "epoch": 0.05044765041517837, "grad_norm": 0.11577140539884567, "learning_rate": 0.002, "loss": 2.3969, "step": 13050 }, { "epoch": 0.05048630761856164, "grad_norm": 0.12980316579341888, "learning_rate": 0.002, "loss": 2.4087, "step": 13060 }, { "epoch": 0.05052496482194492, "grad_norm": 0.13999392092227936, "learning_rate": 0.002, "loss": 2.3943, "step": 13070 }, { "epoch": 0.0505636220253282, "grad_norm": 0.10867933183908463, "learning_rate": 0.002, "loss": 2.4152, "step": 13080 }, { "epoch": 0.05060227922871148, "grad_norm": 0.11368278414011002, "learning_rate": 0.002, "loss": 2.4101, "step": 13090 }, { "epoch": 0.05064093643209475, "grad_norm": 0.11560127884149551, "learning_rate": 0.002, "loss": 2.3818, "step": 13100 }, { "epoch": 0.05067959363547803, "grad_norm": 0.11178620159626007, "learning_rate": 0.002, "loss": 2.3975, "step": 13110 }, { "epoch": 0.05071825083886131, "grad_norm": 0.13433168828487396, "learning_rate": 0.002, "loss": 2.3876, "step": 13120 }, { "epoch": 0.05075690804224459, "grad_norm": 0.1378285437822342, "learning_rate": 0.002, "loss": 2.3905, "step": 13130 }, { "epoch": 0.05079556524562787, "grad_norm": 0.11138379573822021, "learning_rate": 0.002, "loss": 2.4058, "step": 13140 }, { "epoch": 0.050834222449011146, "grad_norm": 0.1196100190281868, "learning_rate": 0.002, "loss": 2.3945, "step": 13150 }, { "epoch": 0.050872879652394426, "grad_norm": 0.14321012794971466, "learning_rate": 0.002, "loss": 2.3982, "step": 13160 }, { "epoch": 0.050911536855777706, "grad_norm": 0.12484076619148254, "learning_rate": 0.002, "loss": 2.4067, "step": 13170 }, { "epoch": 0.050950194059160986, "grad_norm": 0.14980606734752655, "learning_rate": 0.002, "loss": 2.407, "step": 13180 }, { "epoch": 0.050988851262544266, "grad_norm": 0.13169430196285248, "learning_rate": 0.002, "loss": 2.4015, "step": 13190 }, { "epoch": 0.05102750846592754, "grad_norm": 0.1352589726448059, "learning_rate": 0.002, "loss": 2.4024, "step": 13200 }, { "epoch": 0.05106616566931082, "grad_norm": 0.14622355997562408, "learning_rate": 0.002, "loss": 2.4073, "step": 13210 }, { "epoch": 0.0511048228726941, "grad_norm": 0.1075468584895134, "learning_rate": 0.002, "loss": 2.4132, "step": 13220 }, { "epoch": 0.05114348007607738, "grad_norm": 0.11611749976873398, "learning_rate": 0.002, "loss": 2.4027, "step": 13230 }, { "epoch": 0.05118213727946065, "grad_norm": 0.13044321537017822, "learning_rate": 0.002, "loss": 2.4114, "step": 13240 }, { "epoch": 0.05122079448284393, "grad_norm": 0.1160300001502037, "learning_rate": 0.002, "loss": 2.4056, "step": 13250 }, { "epoch": 0.05125945168622721, "grad_norm": 0.12946556508541107, "learning_rate": 0.002, "loss": 2.397, "step": 13260 }, { "epoch": 0.05129810888961049, "grad_norm": 0.11253096908330917, "learning_rate": 0.002, "loss": 2.3882, "step": 13270 }, { "epoch": 0.05133676609299377, "grad_norm": 0.11658414453268051, "learning_rate": 0.002, "loss": 2.3931, "step": 13280 }, { "epoch": 0.051375423296377044, "grad_norm": 0.13752421736717224, "learning_rate": 0.002, "loss": 2.4166, "step": 13290 }, { "epoch": 0.051414080499760324, "grad_norm": 0.1143462136387825, "learning_rate": 0.002, "loss": 2.414, "step": 13300 }, { "epoch": 0.051452737703143604, "grad_norm": 0.1488148272037506, "learning_rate": 0.002, "loss": 2.3871, "step": 13310 }, { "epoch": 0.051491394906526884, "grad_norm": 0.1388746201992035, "learning_rate": 0.002, "loss": 2.3956, "step": 13320 }, { "epoch": 0.051530052109910164, "grad_norm": 0.1489126980304718, "learning_rate": 0.002, "loss": 2.4011, "step": 13330 }, { "epoch": 0.05156870931329344, "grad_norm": 0.14923395216464996, "learning_rate": 0.002, "loss": 2.4017, "step": 13340 }, { "epoch": 0.05160736651667672, "grad_norm": 0.1405162364244461, "learning_rate": 0.002, "loss": 2.3972, "step": 13350 }, { "epoch": 0.05164602372006, "grad_norm": 0.1152169480919838, "learning_rate": 0.002, "loss": 2.3877, "step": 13360 }, { "epoch": 0.05168468092344328, "grad_norm": 0.11067835241556168, "learning_rate": 0.002, "loss": 2.3946, "step": 13370 }, { "epoch": 0.05172333812682655, "grad_norm": 0.13168556988239288, "learning_rate": 0.002, "loss": 2.4041, "step": 13380 }, { "epoch": 0.05176199533020983, "grad_norm": 0.1383281648159027, "learning_rate": 0.002, "loss": 2.3964, "step": 13390 }, { "epoch": 0.05180065253359311, "grad_norm": 0.11505181342363358, "learning_rate": 0.002, "loss": 2.3998, "step": 13400 }, { "epoch": 0.05183930973697639, "grad_norm": 0.13439294695854187, "learning_rate": 0.002, "loss": 2.4019, "step": 13410 }, { "epoch": 0.05187796694035967, "grad_norm": 0.12931932508945465, "learning_rate": 0.002, "loss": 2.4123, "step": 13420 }, { "epoch": 0.05191662414374294, "grad_norm": 0.12582558393478394, "learning_rate": 0.002, "loss": 2.408, "step": 13430 }, { "epoch": 0.05195528134712622, "grad_norm": 0.12109485268592834, "learning_rate": 0.002, "loss": 2.4017, "step": 13440 }, { "epoch": 0.0519939385505095, "grad_norm": 0.15276485681533813, "learning_rate": 0.002, "loss": 2.4023, "step": 13450 }, { "epoch": 0.05203259575389278, "grad_norm": 0.12507130205631256, "learning_rate": 0.002, "loss": 2.3829, "step": 13460 }, { "epoch": 0.052071252957276055, "grad_norm": 0.14027948677539825, "learning_rate": 0.002, "loss": 2.4167, "step": 13470 }, { "epoch": 0.052109910160659335, "grad_norm": 0.10799466818571091, "learning_rate": 0.002, "loss": 2.3788, "step": 13480 }, { "epoch": 0.052148567364042615, "grad_norm": 0.12428688257932663, "learning_rate": 0.002, "loss": 2.3893, "step": 13490 }, { "epoch": 0.052187224567425895, "grad_norm": 0.11244092136621475, "learning_rate": 0.002, "loss": 2.3953, "step": 13500 }, { "epoch": 0.052225881770809175, "grad_norm": 0.14743168652057648, "learning_rate": 0.002, "loss": 2.3916, "step": 13510 }, { "epoch": 0.05226453897419245, "grad_norm": 0.11982240527868271, "learning_rate": 0.002, "loss": 2.4215, "step": 13520 }, { "epoch": 0.05230319617757573, "grad_norm": 0.12308619916439056, "learning_rate": 0.002, "loss": 2.3991, "step": 13530 }, { "epoch": 0.05234185338095901, "grad_norm": 0.12184516340494156, "learning_rate": 0.002, "loss": 2.3979, "step": 13540 }, { "epoch": 0.05238051058434229, "grad_norm": 0.13491886854171753, "learning_rate": 0.002, "loss": 2.3994, "step": 13550 }, { "epoch": 0.05241916778772557, "grad_norm": 0.12219549715518951, "learning_rate": 0.002, "loss": 2.3918, "step": 13560 }, { "epoch": 0.05245782499110884, "grad_norm": 0.11105125397443771, "learning_rate": 0.002, "loss": 2.4074, "step": 13570 }, { "epoch": 0.05249648219449212, "grad_norm": 0.16937963664531708, "learning_rate": 0.002, "loss": 2.4001, "step": 13580 }, { "epoch": 0.0525351393978754, "grad_norm": 0.10928713530302048, "learning_rate": 0.002, "loss": 2.4093, "step": 13590 }, { "epoch": 0.05257379660125868, "grad_norm": 0.16527491807937622, "learning_rate": 0.002, "loss": 2.4012, "step": 13600 }, { "epoch": 0.052612453804641954, "grad_norm": 0.13442641496658325, "learning_rate": 0.002, "loss": 2.4026, "step": 13610 }, { "epoch": 0.052651111008025234, "grad_norm": 0.12295237183570862, "learning_rate": 0.002, "loss": 2.3935, "step": 13620 }, { "epoch": 0.052689768211408514, "grad_norm": 0.13979977369308472, "learning_rate": 0.002, "loss": 2.3918, "step": 13630 }, { "epoch": 0.052728425414791794, "grad_norm": 0.12009885162115097, "learning_rate": 0.002, "loss": 2.4035, "step": 13640 }, { "epoch": 0.052767082618175074, "grad_norm": 0.14299508929252625, "learning_rate": 0.002, "loss": 2.4086, "step": 13650 }, { "epoch": 0.052805739821558347, "grad_norm": 0.14301536977291107, "learning_rate": 0.002, "loss": 2.4084, "step": 13660 }, { "epoch": 0.052844397024941626, "grad_norm": 0.12862665951251984, "learning_rate": 0.002, "loss": 2.4025, "step": 13670 }, { "epoch": 0.052883054228324906, "grad_norm": 0.162658229470253, "learning_rate": 0.002, "loss": 2.3974, "step": 13680 }, { "epoch": 0.052921711431708186, "grad_norm": 0.10682334005832672, "learning_rate": 0.002, "loss": 2.411, "step": 13690 }, { "epoch": 0.052960368635091466, "grad_norm": 0.13332685828208923, "learning_rate": 0.002, "loss": 2.4163, "step": 13700 }, { "epoch": 0.05299902583847474, "grad_norm": 0.11928480863571167, "learning_rate": 0.002, "loss": 2.3914, "step": 13710 }, { "epoch": 0.05303768304185802, "grad_norm": 0.13431620597839355, "learning_rate": 0.002, "loss": 2.4059, "step": 13720 }, { "epoch": 0.0530763402452413, "grad_norm": 0.14743177592754364, "learning_rate": 0.002, "loss": 2.3915, "step": 13730 }, { "epoch": 0.05311499744862458, "grad_norm": 0.14384421706199646, "learning_rate": 0.002, "loss": 2.4026, "step": 13740 }, { "epoch": 0.05315365465200785, "grad_norm": 0.11944720149040222, "learning_rate": 0.002, "loss": 2.3983, "step": 13750 }, { "epoch": 0.05319231185539113, "grad_norm": 0.11968901008367538, "learning_rate": 0.002, "loss": 2.4023, "step": 13760 }, { "epoch": 0.05323096905877441, "grad_norm": 0.1279086172580719, "learning_rate": 0.002, "loss": 2.4049, "step": 13770 }, { "epoch": 0.05326962626215769, "grad_norm": 0.11430168896913528, "learning_rate": 0.002, "loss": 2.3911, "step": 13780 }, { "epoch": 0.05330828346554097, "grad_norm": 0.13571108877658844, "learning_rate": 0.002, "loss": 2.3965, "step": 13790 }, { "epoch": 0.053346940668924245, "grad_norm": 0.1266448199748993, "learning_rate": 0.002, "loss": 2.3854, "step": 13800 }, { "epoch": 0.053385597872307525, "grad_norm": 0.13343386352062225, "learning_rate": 0.002, "loss": 2.4135, "step": 13810 }, { "epoch": 0.053424255075690805, "grad_norm": 0.14449051022529602, "learning_rate": 0.002, "loss": 2.4074, "step": 13820 }, { "epoch": 0.053462912279074085, "grad_norm": 0.12064560502767563, "learning_rate": 0.002, "loss": 2.3903, "step": 13830 }, { "epoch": 0.053501569482457365, "grad_norm": 0.13178220391273499, "learning_rate": 0.002, "loss": 2.387, "step": 13840 }, { "epoch": 0.05354022668584064, "grad_norm": 0.13305504620075226, "learning_rate": 0.002, "loss": 2.3928, "step": 13850 }, { "epoch": 0.05357888388922392, "grad_norm": 0.14914706349372864, "learning_rate": 0.002, "loss": 2.3985, "step": 13860 }, { "epoch": 0.0536175410926072, "grad_norm": 0.13752660155296326, "learning_rate": 0.002, "loss": 2.3925, "step": 13870 }, { "epoch": 0.05365619829599048, "grad_norm": 0.1223013773560524, "learning_rate": 0.002, "loss": 2.3944, "step": 13880 }, { "epoch": 0.05369485549937375, "grad_norm": 0.13957269489765167, "learning_rate": 0.002, "loss": 2.3994, "step": 13890 }, { "epoch": 0.05373351270275703, "grad_norm": 0.159816175699234, "learning_rate": 0.002, "loss": 2.3863, "step": 13900 }, { "epoch": 0.05377216990614031, "grad_norm": 0.10616747289896011, "learning_rate": 0.002, "loss": 2.389, "step": 13910 }, { "epoch": 0.05381082710952359, "grad_norm": 0.1293746680021286, "learning_rate": 0.002, "loss": 2.3972, "step": 13920 }, { "epoch": 0.05384948431290687, "grad_norm": 0.13460808992385864, "learning_rate": 0.002, "loss": 2.3938, "step": 13930 }, { "epoch": 0.05388814151629014, "grad_norm": 0.13545729219913483, "learning_rate": 0.002, "loss": 2.3951, "step": 13940 }, { "epoch": 0.05392679871967342, "grad_norm": 0.11745970696210861, "learning_rate": 0.002, "loss": 2.4063, "step": 13950 }, { "epoch": 0.0539654559230567, "grad_norm": 0.11512023210525513, "learning_rate": 0.002, "loss": 2.3982, "step": 13960 }, { "epoch": 0.05400411312643998, "grad_norm": 0.12416357547044754, "learning_rate": 0.002, "loss": 2.3919, "step": 13970 }, { "epoch": 0.054042770329823256, "grad_norm": 0.12409238517284393, "learning_rate": 0.002, "loss": 2.4102, "step": 13980 }, { "epoch": 0.054081427533206536, "grad_norm": 0.10481012612581253, "learning_rate": 0.002, "loss": 2.4039, "step": 13990 }, { "epoch": 0.054120084736589816, "grad_norm": 0.11006207764148712, "learning_rate": 0.002, "loss": 2.4024, "step": 14000 }, { "epoch": 0.054158741939973096, "grad_norm": 0.13748984038829803, "learning_rate": 0.002, "loss": 2.3995, "step": 14010 }, { "epoch": 0.054197399143356376, "grad_norm": 0.13295917212963104, "learning_rate": 0.002, "loss": 2.3914, "step": 14020 }, { "epoch": 0.05423605634673965, "grad_norm": 0.10712581872940063, "learning_rate": 0.002, "loss": 2.392, "step": 14030 }, { "epoch": 0.05427471355012293, "grad_norm": 0.17704936861991882, "learning_rate": 0.002, "loss": 2.3972, "step": 14040 }, { "epoch": 0.05431337075350621, "grad_norm": 0.11886173486709595, "learning_rate": 0.002, "loss": 2.3952, "step": 14050 }, { "epoch": 0.05435202795688949, "grad_norm": 0.12546727061271667, "learning_rate": 0.002, "loss": 2.397, "step": 14060 }, { "epoch": 0.05439068516027277, "grad_norm": 0.16801774501800537, "learning_rate": 0.002, "loss": 2.4181, "step": 14070 }, { "epoch": 0.05442934236365604, "grad_norm": 0.10857007652521133, "learning_rate": 0.002, "loss": 2.4094, "step": 14080 }, { "epoch": 0.05446799956703932, "grad_norm": 0.12261257320642471, "learning_rate": 0.002, "loss": 2.4064, "step": 14090 }, { "epoch": 0.0545066567704226, "grad_norm": 0.12848199903964996, "learning_rate": 0.002, "loss": 2.3937, "step": 14100 }, { "epoch": 0.05454531397380588, "grad_norm": 0.10281208902597427, "learning_rate": 0.002, "loss": 2.3913, "step": 14110 }, { "epoch": 0.054583971177189154, "grad_norm": 0.14198198914527893, "learning_rate": 0.002, "loss": 2.4063, "step": 14120 }, { "epoch": 0.054622628380572434, "grad_norm": 0.1394672691822052, "learning_rate": 0.002, "loss": 2.3922, "step": 14130 }, { "epoch": 0.054661285583955714, "grad_norm": 0.13187329471111298, "learning_rate": 0.002, "loss": 2.3743, "step": 14140 }, { "epoch": 0.054699942787338994, "grad_norm": 0.14645549654960632, "learning_rate": 0.002, "loss": 2.4134, "step": 14150 }, { "epoch": 0.054738599990722274, "grad_norm": 0.13064813613891602, "learning_rate": 0.002, "loss": 2.3849, "step": 14160 }, { "epoch": 0.05477725719410555, "grad_norm": 0.12041107565164566, "learning_rate": 0.002, "loss": 2.3814, "step": 14170 }, { "epoch": 0.05481591439748883, "grad_norm": 0.11944100260734558, "learning_rate": 0.002, "loss": 2.3875, "step": 14180 }, { "epoch": 0.05485457160087211, "grad_norm": 0.13119396567344666, "learning_rate": 0.002, "loss": 2.3757, "step": 14190 }, { "epoch": 0.05489322880425539, "grad_norm": 0.12450587749481201, "learning_rate": 0.002, "loss": 2.3975, "step": 14200 }, { "epoch": 0.05493188600763867, "grad_norm": 0.1268710047006607, "learning_rate": 0.002, "loss": 2.3874, "step": 14210 }, { "epoch": 0.05497054321102194, "grad_norm": 0.12374629825353622, "learning_rate": 0.002, "loss": 2.3886, "step": 14220 }, { "epoch": 0.05500920041440522, "grad_norm": 0.14392772316932678, "learning_rate": 0.002, "loss": 2.4025, "step": 14230 }, { "epoch": 0.0550478576177885, "grad_norm": 0.11412364989519119, "learning_rate": 0.002, "loss": 2.3998, "step": 14240 }, { "epoch": 0.05508651482117178, "grad_norm": 0.11980044096708298, "learning_rate": 0.002, "loss": 2.3938, "step": 14250 }, { "epoch": 0.05512517202455505, "grad_norm": 0.13415683805942535, "learning_rate": 0.002, "loss": 2.4014, "step": 14260 }, { "epoch": 0.05516382922793833, "grad_norm": 0.1138518825173378, "learning_rate": 0.002, "loss": 2.3901, "step": 14270 }, { "epoch": 0.05520248643132161, "grad_norm": 0.12392842769622803, "learning_rate": 0.002, "loss": 2.3895, "step": 14280 }, { "epoch": 0.05524114363470489, "grad_norm": 0.1531682163476944, "learning_rate": 0.002, "loss": 2.3988, "step": 14290 }, { "epoch": 0.05527980083808817, "grad_norm": 0.1261725127696991, "learning_rate": 0.002, "loss": 2.3926, "step": 14300 }, { "epoch": 0.055318458041471445, "grad_norm": 0.10306981950998306, "learning_rate": 0.002, "loss": 2.3912, "step": 14310 }, { "epoch": 0.055357115244854725, "grad_norm": 0.13406990468502045, "learning_rate": 0.002, "loss": 2.416, "step": 14320 }, { "epoch": 0.055395772448238005, "grad_norm": 0.12140738219022751, "learning_rate": 0.002, "loss": 2.3945, "step": 14330 }, { "epoch": 0.055434429651621285, "grad_norm": 0.12369288504123688, "learning_rate": 0.002, "loss": 2.3788, "step": 14340 }, { "epoch": 0.055473086855004565, "grad_norm": 0.12816184759140015, "learning_rate": 0.002, "loss": 2.3931, "step": 14350 }, { "epoch": 0.05551174405838784, "grad_norm": 0.12383025139570236, "learning_rate": 0.002, "loss": 2.3911, "step": 14360 }, { "epoch": 0.05555040126177112, "grad_norm": 0.2093370109796524, "learning_rate": 0.002, "loss": 2.3928, "step": 14370 }, { "epoch": 0.0555890584651544, "grad_norm": 0.12548458576202393, "learning_rate": 0.002, "loss": 2.4142, "step": 14380 }, { "epoch": 0.05562771566853768, "grad_norm": 0.11871566623449326, "learning_rate": 0.002, "loss": 2.3986, "step": 14390 }, { "epoch": 0.05566637287192095, "grad_norm": 0.12631046772003174, "learning_rate": 0.002, "loss": 2.3936, "step": 14400 }, { "epoch": 0.05570503007530423, "grad_norm": 0.16620250046253204, "learning_rate": 0.002, "loss": 2.4023, "step": 14410 }, { "epoch": 0.05574368727868751, "grad_norm": 0.13363073766231537, "learning_rate": 0.002, "loss": 2.3915, "step": 14420 }, { "epoch": 0.05578234448207079, "grad_norm": 0.1280767321586609, "learning_rate": 0.002, "loss": 2.3954, "step": 14430 }, { "epoch": 0.05582100168545407, "grad_norm": 0.11046691983938217, "learning_rate": 0.002, "loss": 2.3925, "step": 14440 }, { "epoch": 0.055859658888837344, "grad_norm": 0.13728322088718414, "learning_rate": 0.002, "loss": 2.3992, "step": 14450 }, { "epoch": 0.055898316092220623, "grad_norm": 0.1518576294183731, "learning_rate": 0.002, "loss": 2.3964, "step": 14460 }, { "epoch": 0.0559369732956039, "grad_norm": 0.12583044171333313, "learning_rate": 0.002, "loss": 2.3944, "step": 14470 }, { "epoch": 0.05597563049898718, "grad_norm": 0.11695846915245056, "learning_rate": 0.002, "loss": 2.3858, "step": 14480 }, { "epoch": 0.056014287702370456, "grad_norm": 0.11865654587745667, "learning_rate": 0.002, "loss": 2.3907, "step": 14490 }, { "epoch": 0.056052944905753736, "grad_norm": 0.13754767179489136, "learning_rate": 0.002, "loss": 2.3897, "step": 14500 }, { "epoch": 0.056091602109137016, "grad_norm": 0.13682836294174194, "learning_rate": 0.002, "loss": 2.3889, "step": 14510 }, { "epoch": 0.056130259312520296, "grad_norm": 0.11361128836870193, "learning_rate": 0.002, "loss": 2.4157, "step": 14520 }, { "epoch": 0.056168916515903576, "grad_norm": 0.14468665421009064, "learning_rate": 0.002, "loss": 2.3852, "step": 14530 }, { "epoch": 0.05620757371928685, "grad_norm": 0.13871458172798157, "learning_rate": 0.002, "loss": 2.3996, "step": 14540 }, { "epoch": 0.05624623092267013, "grad_norm": 0.11031243205070496, "learning_rate": 0.002, "loss": 2.4044, "step": 14550 }, { "epoch": 0.05628488812605341, "grad_norm": 0.10833417624235153, "learning_rate": 0.002, "loss": 2.3902, "step": 14560 }, { "epoch": 0.05632354532943669, "grad_norm": 0.12488801032304764, "learning_rate": 0.002, "loss": 2.4009, "step": 14570 }, { "epoch": 0.05636220253281997, "grad_norm": 0.11902682483196259, "learning_rate": 0.002, "loss": 2.4042, "step": 14580 }, { "epoch": 0.05640085973620324, "grad_norm": 0.15277214348316193, "learning_rate": 0.002, "loss": 2.3996, "step": 14590 }, { "epoch": 0.05643951693958652, "grad_norm": 0.12215857207775116, "learning_rate": 0.002, "loss": 2.4015, "step": 14600 }, { "epoch": 0.0564781741429698, "grad_norm": 0.10888016223907471, "learning_rate": 0.002, "loss": 2.3991, "step": 14610 }, { "epoch": 0.05651683134635308, "grad_norm": 0.12086877226829529, "learning_rate": 0.002, "loss": 2.3884, "step": 14620 }, { "epoch": 0.056555488549736355, "grad_norm": 0.12756405770778656, "learning_rate": 0.002, "loss": 2.402, "step": 14630 }, { "epoch": 0.056594145753119635, "grad_norm": 0.11681199818849564, "learning_rate": 0.002, "loss": 2.3941, "step": 14640 }, { "epoch": 0.056632802956502915, "grad_norm": 0.15769197046756744, "learning_rate": 0.002, "loss": 2.3885, "step": 14650 }, { "epoch": 0.056671460159886194, "grad_norm": 0.14188191294670105, "learning_rate": 0.002, "loss": 2.4049, "step": 14660 }, { "epoch": 0.056710117363269474, "grad_norm": 0.12859699130058289, "learning_rate": 0.002, "loss": 2.3893, "step": 14670 }, { "epoch": 0.05674877456665275, "grad_norm": 0.11465740203857422, "learning_rate": 0.002, "loss": 2.3832, "step": 14680 }, { "epoch": 0.05678743177003603, "grad_norm": 0.15560325980186462, "learning_rate": 0.002, "loss": 2.3911, "step": 14690 }, { "epoch": 0.05682608897341931, "grad_norm": 0.1308726817369461, "learning_rate": 0.002, "loss": 2.3926, "step": 14700 }, { "epoch": 0.05686474617680259, "grad_norm": 0.14174123108386993, "learning_rate": 0.002, "loss": 2.3805, "step": 14710 }, { "epoch": 0.05690340338018587, "grad_norm": 0.12352757155895233, "learning_rate": 0.002, "loss": 2.41, "step": 14720 }, { "epoch": 0.05694206058356914, "grad_norm": 0.11764993518590927, "learning_rate": 0.002, "loss": 2.3907, "step": 14730 }, { "epoch": 0.05698071778695242, "grad_norm": 0.12702719867229462, "learning_rate": 0.002, "loss": 2.394, "step": 14740 }, { "epoch": 0.0570193749903357, "grad_norm": 0.11749805510044098, "learning_rate": 0.002, "loss": 2.3948, "step": 14750 }, { "epoch": 0.05705803219371898, "grad_norm": 0.10064171999692917, "learning_rate": 0.002, "loss": 2.3877, "step": 14760 }, { "epoch": 0.05709668939710225, "grad_norm": 0.12813323736190796, "learning_rate": 0.002, "loss": 2.3953, "step": 14770 }, { "epoch": 0.05713534660048553, "grad_norm": 0.11712568998336792, "learning_rate": 0.002, "loss": 2.3969, "step": 14780 }, { "epoch": 0.05717400380386881, "grad_norm": 0.1386580765247345, "learning_rate": 0.002, "loss": 2.3848, "step": 14790 }, { "epoch": 0.05721266100725209, "grad_norm": 0.13500967621803284, "learning_rate": 0.002, "loss": 2.3887, "step": 14800 }, { "epoch": 0.05725131821063537, "grad_norm": 0.12583929300308228, "learning_rate": 0.002, "loss": 2.3996, "step": 14810 }, { "epoch": 0.057289975414018646, "grad_norm": 0.10110796242952347, "learning_rate": 0.002, "loss": 2.3934, "step": 14820 }, { "epoch": 0.057328632617401926, "grad_norm": 0.11224600672721863, "learning_rate": 0.002, "loss": 2.397, "step": 14830 }, { "epoch": 0.057367289820785206, "grad_norm": 0.1203756108880043, "learning_rate": 0.002, "loss": 2.4008, "step": 14840 }, { "epoch": 0.057405947024168485, "grad_norm": 0.14332318305969238, "learning_rate": 0.002, "loss": 2.4115, "step": 14850 }, { "epoch": 0.057444604227551765, "grad_norm": 0.13124069571495056, "learning_rate": 0.002, "loss": 2.3937, "step": 14860 }, { "epoch": 0.05748326143093504, "grad_norm": 0.11798430979251862, "learning_rate": 0.002, "loss": 2.3706, "step": 14870 }, { "epoch": 0.05752191863431832, "grad_norm": 0.12214729934930801, "learning_rate": 0.002, "loss": 2.3888, "step": 14880 }, { "epoch": 0.0575605758377016, "grad_norm": 0.1360270380973816, "learning_rate": 0.002, "loss": 2.4036, "step": 14890 }, { "epoch": 0.05759923304108488, "grad_norm": 0.12253168225288391, "learning_rate": 0.002, "loss": 2.3981, "step": 14900 }, { "epoch": 0.05763789024446815, "grad_norm": 0.12843644618988037, "learning_rate": 0.002, "loss": 2.4008, "step": 14910 }, { "epoch": 0.05767654744785143, "grad_norm": 0.10690762847661972, "learning_rate": 0.002, "loss": 2.381, "step": 14920 }, { "epoch": 0.05771520465123471, "grad_norm": 0.12708573043346405, "learning_rate": 0.002, "loss": 2.3987, "step": 14930 }, { "epoch": 0.05775386185461799, "grad_norm": 0.11386797577142715, "learning_rate": 0.002, "loss": 2.3873, "step": 14940 }, { "epoch": 0.05779251905800127, "grad_norm": 0.1272570788860321, "learning_rate": 0.002, "loss": 2.3772, "step": 14950 }, { "epoch": 0.057831176261384544, "grad_norm": 0.1215393990278244, "learning_rate": 0.002, "loss": 2.3701, "step": 14960 }, { "epoch": 0.057869833464767824, "grad_norm": 0.11468160897493362, "learning_rate": 0.002, "loss": 2.4048, "step": 14970 }, { "epoch": 0.057908490668151104, "grad_norm": 0.12078743427991867, "learning_rate": 0.002, "loss": 2.3905, "step": 14980 }, { "epoch": 0.057947147871534384, "grad_norm": 0.1239713579416275, "learning_rate": 0.002, "loss": 2.406, "step": 14990 }, { "epoch": 0.05798580507491766, "grad_norm": 0.14568138122558594, "learning_rate": 0.002, "loss": 2.3732, "step": 15000 }, { "epoch": 0.05802446227830094, "grad_norm": 0.1350502371788025, "learning_rate": 0.002, "loss": 2.4036, "step": 15010 }, { "epoch": 0.05806311948168422, "grad_norm": 0.12064608931541443, "learning_rate": 0.002, "loss": 2.4013, "step": 15020 }, { "epoch": 0.0581017766850675, "grad_norm": 0.13205865025520325, "learning_rate": 0.002, "loss": 2.3941, "step": 15030 }, { "epoch": 0.05814043388845078, "grad_norm": 0.11407764256000519, "learning_rate": 0.002, "loss": 2.3833, "step": 15040 }, { "epoch": 0.05817909109183405, "grad_norm": 0.1086033508181572, "learning_rate": 0.002, "loss": 2.3905, "step": 15050 }, { "epoch": 0.05821774829521733, "grad_norm": 0.11027983576059341, "learning_rate": 0.002, "loss": 2.3785, "step": 15060 }, { "epoch": 0.05825640549860061, "grad_norm": 0.12719698250293732, "learning_rate": 0.002, "loss": 2.387, "step": 15070 }, { "epoch": 0.05829506270198389, "grad_norm": 0.12091485410928726, "learning_rate": 0.002, "loss": 2.3861, "step": 15080 }, { "epoch": 0.05833371990536717, "grad_norm": 0.11346925050020218, "learning_rate": 0.002, "loss": 2.3921, "step": 15090 }, { "epoch": 0.05837237710875044, "grad_norm": 0.12581966817378998, "learning_rate": 0.002, "loss": 2.3889, "step": 15100 }, { "epoch": 0.05841103431213372, "grad_norm": 0.12088809907436371, "learning_rate": 0.002, "loss": 2.3979, "step": 15110 }, { "epoch": 0.058449691515517, "grad_norm": 0.11056520789861679, "learning_rate": 0.002, "loss": 2.3911, "step": 15120 }, { "epoch": 0.05848834871890028, "grad_norm": 0.11620938032865524, "learning_rate": 0.002, "loss": 2.3843, "step": 15130 }, { "epoch": 0.058527005922283555, "grad_norm": 0.11881408095359802, "learning_rate": 0.002, "loss": 2.4168, "step": 15140 }, { "epoch": 0.058565663125666835, "grad_norm": 0.1285400390625, "learning_rate": 0.002, "loss": 2.3895, "step": 15150 }, { "epoch": 0.058604320329050115, "grad_norm": 0.11662331968545914, "learning_rate": 0.002, "loss": 2.399, "step": 15160 }, { "epoch": 0.058642977532433395, "grad_norm": 0.15776588022708893, "learning_rate": 0.002, "loss": 2.3772, "step": 15170 }, { "epoch": 0.058681634735816675, "grad_norm": 0.12372921407222748, "learning_rate": 0.002, "loss": 2.4061, "step": 15180 }, { "epoch": 0.05872029193919995, "grad_norm": 0.1367715746164322, "learning_rate": 0.002, "loss": 2.3926, "step": 15190 }, { "epoch": 0.05875894914258323, "grad_norm": 0.11921197175979614, "learning_rate": 0.002, "loss": 2.4003, "step": 15200 }, { "epoch": 0.05879760634596651, "grad_norm": 0.12707039713859558, "learning_rate": 0.002, "loss": 2.3901, "step": 15210 }, { "epoch": 0.05883626354934979, "grad_norm": 0.11815090477466583, "learning_rate": 0.002, "loss": 2.4102, "step": 15220 }, { "epoch": 0.05887492075273307, "grad_norm": 0.1395442932844162, "learning_rate": 0.002, "loss": 2.3962, "step": 15230 }, { "epoch": 0.05891357795611634, "grad_norm": 0.10509292781352997, "learning_rate": 0.002, "loss": 2.3918, "step": 15240 }, { "epoch": 0.05895223515949962, "grad_norm": 0.13025058805942535, "learning_rate": 0.002, "loss": 2.3921, "step": 15250 }, { "epoch": 0.0589908923628829, "grad_norm": 0.17052233219146729, "learning_rate": 0.002, "loss": 2.381, "step": 15260 }, { "epoch": 0.05902954956626618, "grad_norm": 0.1250266581773758, "learning_rate": 0.002, "loss": 2.3949, "step": 15270 }, { "epoch": 0.05906820676964945, "grad_norm": 0.11990693211555481, "learning_rate": 0.002, "loss": 2.3892, "step": 15280 }, { "epoch": 0.05910686397303273, "grad_norm": 0.11827023327350616, "learning_rate": 0.002, "loss": 2.4032, "step": 15290 }, { "epoch": 0.05914552117641601, "grad_norm": 0.12932279706001282, "learning_rate": 0.002, "loss": 2.4075, "step": 15300 }, { "epoch": 0.05918417837979929, "grad_norm": 0.14034594595432281, "learning_rate": 0.002, "loss": 2.3799, "step": 15310 }, { "epoch": 0.05922283558318257, "grad_norm": 0.1353245973587036, "learning_rate": 0.002, "loss": 2.3751, "step": 15320 }, { "epoch": 0.059261492786565846, "grad_norm": 0.11280287802219391, "learning_rate": 0.002, "loss": 2.3859, "step": 15330 }, { "epoch": 0.059300149989949126, "grad_norm": 0.10668610036373138, "learning_rate": 0.002, "loss": 2.3887, "step": 15340 }, { "epoch": 0.059338807193332406, "grad_norm": 0.16798001527786255, "learning_rate": 0.002, "loss": 2.3912, "step": 15350 }, { "epoch": 0.059377464396715686, "grad_norm": 0.1263061910867691, "learning_rate": 0.002, "loss": 2.3778, "step": 15360 }, { "epoch": 0.059416121600098966, "grad_norm": 0.11349482089281082, "learning_rate": 0.002, "loss": 2.4078, "step": 15370 }, { "epoch": 0.05945477880348224, "grad_norm": 0.142970010638237, "learning_rate": 0.002, "loss": 2.4013, "step": 15380 }, { "epoch": 0.05949343600686552, "grad_norm": 0.1031302735209465, "learning_rate": 0.002, "loss": 2.3776, "step": 15390 }, { "epoch": 0.0595320932102488, "grad_norm": 0.11524147540330887, "learning_rate": 0.002, "loss": 2.3778, "step": 15400 }, { "epoch": 0.05957075041363208, "grad_norm": 0.12222982197999954, "learning_rate": 0.002, "loss": 2.3905, "step": 15410 }, { "epoch": 0.05960940761701535, "grad_norm": 0.10952379554510117, "learning_rate": 0.002, "loss": 2.3993, "step": 15420 }, { "epoch": 0.05964806482039863, "grad_norm": 0.1340419203042984, "learning_rate": 0.002, "loss": 2.3869, "step": 15430 }, { "epoch": 0.05968672202378191, "grad_norm": 0.17222769558429718, "learning_rate": 0.002, "loss": 2.3963, "step": 15440 }, { "epoch": 0.05972537922716519, "grad_norm": 0.11056956648826599, "learning_rate": 0.002, "loss": 2.39, "step": 15450 }, { "epoch": 0.05976403643054847, "grad_norm": 0.11266839504241943, "learning_rate": 0.002, "loss": 2.391, "step": 15460 }, { "epoch": 0.059802693633931744, "grad_norm": 0.14877283573150635, "learning_rate": 0.002, "loss": 2.3763, "step": 15470 }, { "epoch": 0.059841350837315024, "grad_norm": 0.1140676960349083, "learning_rate": 0.002, "loss": 2.4032, "step": 15480 }, { "epoch": 0.059880008040698304, "grad_norm": 0.11659525334835052, "learning_rate": 0.002, "loss": 2.3889, "step": 15490 }, { "epoch": 0.059918665244081584, "grad_norm": 0.0978536531329155, "learning_rate": 0.002, "loss": 2.3965, "step": 15500 }, { "epoch": 0.05995732244746486, "grad_norm": 0.13969437777996063, "learning_rate": 0.002, "loss": 2.4046, "step": 15510 }, { "epoch": 0.05999597965084814, "grad_norm": 0.1132916733622551, "learning_rate": 0.002, "loss": 2.3867, "step": 15520 }, { "epoch": 0.06003463685423142, "grad_norm": 0.12317940592765808, "learning_rate": 0.002, "loss": 2.3947, "step": 15530 }, { "epoch": 0.0600732940576147, "grad_norm": 0.14708033204078674, "learning_rate": 0.002, "loss": 2.3983, "step": 15540 }, { "epoch": 0.06011195126099798, "grad_norm": 0.12788653373718262, "learning_rate": 0.002, "loss": 2.3984, "step": 15550 }, { "epoch": 0.06015060846438125, "grad_norm": 0.12532587349414825, "learning_rate": 0.002, "loss": 2.3927, "step": 15560 }, { "epoch": 0.06018926566776453, "grad_norm": 0.10975198447704315, "learning_rate": 0.002, "loss": 2.3771, "step": 15570 }, { "epoch": 0.06022792287114781, "grad_norm": 0.13159969449043274, "learning_rate": 0.002, "loss": 2.3946, "step": 15580 }, { "epoch": 0.06026658007453109, "grad_norm": 0.13560065627098083, "learning_rate": 0.002, "loss": 2.3945, "step": 15590 }, { "epoch": 0.06030523727791437, "grad_norm": 0.11611117422580719, "learning_rate": 0.002, "loss": 2.3913, "step": 15600 }, { "epoch": 0.06034389448129764, "grad_norm": 0.1417783945798874, "learning_rate": 0.002, "loss": 2.391, "step": 15610 }, { "epoch": 0.06038255168468092, "grad_norm": 0.0967651829123497, "learning_rate": 0.002, "loss": 2.3956, "step": 15620 }, { "epoch": 0.0604212088880642, "grad_norm": 0.1179899126291275, "learning_rate": 0.002, "loss": 2.3779, "step": 15630 }, { "epoch": 0.06045986609144748, "grad_norm": 0.10877932608127594, "learning_rate": 0.002, "loss": 2.3889, "step": 15640 }, { "epoch": 0.060498523294830756, "grad_norm": 0.11503571271896362, "learning_rate": 0.002, "loss": 2.3812, "step": 15650 }, { "epoch": 0.060537180498214035, "grad_norm": 0.1177992895245552, "learning_rate": 0.002, "loss": 2.4062, "step": 15660 }, { "epoch": 0.060575837701597315, "grad_norm": 0.14880454540252686, "learning_rate": 0.002, "loss": 2.3968, "step": 15670 }, { "epoch": 0.060614494904980595, "grad_norm": 0.1386214941740036, "learning_rate": 0.002, "loss": 2.384, "step": 15680 }, { "epoch": 0.060653152108363875, "grad_norm": 0.1116911992430687, "learning_rate": 0.002, "loss": 2.3854, "step": 15690 }, { "epoch": 0.06069180931174715, "grad_norm": 0.12330832332372665, "learning_rate": 0.002, "loss": 2.41, "step": 15700 }, { "epoch": 0.06073046651513043, "grad_norm": 0.11281420290470123, "learning_rate": 0.002, "loss": 2.3865, "step": 15710 }, { "epoch": 0.06076912371851371, "grad_norm": 0.18309475481510162, "learning_rate": 0.002, "loss": 2.3846, "step": 15720 }, { "epoch": 0.06080778092189699, "grad_norm": 0.10819264501333237, "learning_rate": 0.002, "loss": 2.3907, "step": 15730 }, { "epoch": 0.06084643812528027, "grad_norm": 0.10897762328386307, "learning_rate": 0.002, "loss": 2.3887, "step": 15740 }, { "epoch": 0.06088509532866354, "grad_norm": 0.12542861700057983, "learning_rate": 0.002, "loss": 2.3743, "step": 15750 }, { "epoch": 0.06092375253204682, "grad_norm": 0.11837367713451385, "learning_rate": 0.002, "loss": 2.3838, "step": 15760 }, { "epoch": 0.0609624097354301, "grad_norm": 0.11848590523004532, "learning_rate": 0.002, "loss": 2.3833, "step": 15770 }, { "epoch": 0.06100106693881338, "grad_norm": 0.1487286239862442, "learning_rate": 0.002, "loss": 2.3879, "step": 15780 }, { "epoch": 0.061039724142196654, "grad_norm": 0.12716227769851685, "learning_rate": 0.002, "loss": 2.3774, "step": 15790 }, { "epoch": 0.061078381345579934, "grad_norm": 0.1302156001329422, "learning_rate": 0.002, "loss": 2.3881, "step": 15800 }, { "epoch": 0.061117038548963214, "grad_norm": 0.11784037947654724, "learning_rate": 0.002, "loss": 2.402, "step": 15810 }, { "epoch": 0.061155695752346494, "grad_norm": 0.11263030022382736, "learning_rate": 0.002, "loss": 2.3859, "step": 15820 }, { "epoch": 0.061194352955729774, "grad_norm": 0.1392696052789688, "learning_rate": 0.002, "loss": 2.3846, "step": 15830 }, { "epoch": 0.06123301015911305, "grad_norm": 0.22238633036613464, "learning_rate": 0.002, "loss": 2.4209, "step": 15840 }, { "epoch": 0.061271667362496327, "grad_norm": 0.1388339102268219, "learning_rate": 0.002, "loss": 2.3937, "step": 15850 }, { "epoch": 0.061310324565879606, "grad_norm": 0.13323858380317688, "learning_rate": 0.002, "loss": 2.3897, "step": 15860 }, { "epoch": 0.061348981769262886, "grad_norm": 0.15302272140979767, "learning_rate": 0.002, "loss": 2.3889, "step": 15870 }, { "epoch": 0.061387638972646166, "grad_norm": 0.17193780839443207, "learning_rate": 0.002, "loss": 2.3962, "step": 15880 }, { "epoch": 0.06142629617602944, "grad_norm": 0.11914423853158951, "learning_rate": 0.002, "loss": 2.3723, "step": 15890 }, { "epoch": 0.06146495337941272, "grad_norm": 0.12326352298259735, "learning_rate": 0.002, "loss": 2.3754, "step": 15900 }, { "epoch": 0.061503610582796, "grad_norm": 0.10487114638090134, "learning_rate": 0.002, "loss": 2.3965, "step": 15910 }, { "epoch": 0.06154226778617928, "grad_norm": 0.11379309743642807, "learning_rate": 0.002, "loss": 2.4036, "step": 15920 }, { "epoch": 0.06158092498956255, "grad_norm": 0.1775919795036316, "learning_rate": 0.002, "loss": 2.4072, "step": 15930 }, { "epoch": 0.06161958219294583, "grad_norm": 0.13293004035949707, "learning_rate": 0.002, "loss": 2.3827, "step": 15940 }, { "epoch": 0.06165823939632911, "grad_norm": 0.11617787182331085, "learning_rate": 0.002, "loss": 2.3892, "step": 15950 }, { "epoch": 0.06169689659971239, "grad_norm": 0.1525639146566391, "learning_rate": 0.002, "loss": 2.3895, "step": 15960 }, { "epoch": 0.06173555380309567, "grad_norm": 0.114244744181633, "learning_rate": 0.002, "loss": 2.389, "step": 15970 }, { "epoch": 0.061774211006478945, "grad_norm": 0.10915686190128326, "learning_rate": 0.002, "loss": 2.3901, "step": 15980 }, { "epoch": 0.061812868209862225, "grad_norm": 0.11189400404691696, "learning_rate": 0.002, "loss": 2.391, "step": 15990 }, { "epoch": 0.061851525413245505, "grad_norm": 0.13927581906318665, "learning_rate": 0.002, "loss": 2.3952, "step": 16000 }, { "epoch": 0.061890182616628785, "grad_norm": 0.12263130396604538, "learning_rate": 0.002, "loss": 2.3818, "step": 16010 }, { "epoch": 0.06192883982001206, "grad_norm": 0.14536434412002563, "learning_rate": 0.002, "loss": 2.3894, "step": 16020 }, { "epoch": 0.06196749702339534, "grad_norm": 0.12297393381595612, "learning_rate": 0.002, "loss": 2.4019, "step": 16030 }, { "epoch": 0.06200615422677862, "grad_norm": 0.11197999864816666, "learning_rate": 0.002, "loss": 2.4026, "step": 16040 }, { "epoch": 0.0620448114301619, "grad_norm": 0.0987369641661644, "learning_rate": 0.002, "loss": 2.3922, "step": 16050 }, { "epoch": 0.06208346863354518, "grad_norm": 0.12647844851016998, "learning_rate": 0.002, "loss": 2.3969, "step": 16060 }, { "epoch": 0.06212212583692845, "grad_norm": 0.1253211498260498, "learning_rate": 0.002, "loss": 2.3797, "step": 16070 }, { "epoch": 0.06216078304031173, "grad_norm": 0.17184260487556458, "learning_rate": 0.002, "loss": 2.3922, "step": 16080 }, { "epoch": 0.06219944024369501, "grad_norm": 0.12006668746471405, "learning_rate": 0.002, "loss": 2.3838, "step": 16090 }, { "epoch": 0.06223809744707829, "grad_norm": 0.1277729719877243, "learning_rate": 0.002, "loss": 2.391, "step": 16100 }, { "epoch": 0.06227675465046157, "grad_norm": 0.13591617345809937, "learning_rate": 0.002, "loss": 2.3843, "step": 16110 }, { "epoch": 0.06231541185384484, "grad_norm": 0.0973956510424614, "learning_rate": 0.002, "loss": 2.392, "step": 16120 }, { "epoch": 0.06235406905722812, "grad_norm": 0.11103206872940063, "learning_rate": 0.002, "loss": 2.3901, "step": 16130 }, { "epoch": 0.0623927262606114, "grad_norm": 0.1389506459236145, "learning_rate": 0.002, "loss": 2.3829, "step": 16140 }, { "epoch": 0.06243138346399468, "grad_norm": 0.12428136169910431, "learning_rate": 0.002, "loss": 2.3939, "step": 16150 }, { "epoch": 0.062470040667377956, "grad_norm": 0.11365336179733276, "learning_rate": 0.002, "loss": 2.3806, "step": 16160 }, { "epoch": 0.06250869787076124, "grad_norm": 0.1254616379737854, "learning_rate": 0.002, "loss": 2.3777, "step": 16170 }, { "epoch": 0.06254735507414452, "grad_norm": 0.1262783706188202, "learning_rate": 0.002, "loss": 2.3789, "step": 16180 }, { "epoch": 0.06258601227752779, "grad_norm": 0.10713041573762894, "learning_rate": 0.002, "loss": 2.3823, "step": 16190 }, { "epoch": 0.06262466948091107, "grad_norm": 0.10575401782989502, "learning_rate": 0.002, "loss": 2.3831, "step": 16200 }, { "epoch": 0.06266332668429435, "grad_norm": 0.11362715810537338, "learning_rate": 0.002, "loss": 2.3933, "step": 16210 }, { "epoch": 0.06270198388767763, "grad_norm": 0.11229918152093887, "learning_rate": 0.002, "loss": 2.3845, "step": 16220 }, { "epoch": 0.06274064109106091, "grad_norm": 0.1316540241241455, "learning_rate": 0.002, "loss": 2.3891, "step": 16230 }, { "epoch": 0.06277929829444419, "grad_norm": 0.10296753793954849, "learning_rate": 0.002, "loss": 2.3797, "step": 16240 }, { "epoch": 0.06281795549782747, "grad_norm": 0.12756431102752686, "learning_rate": 0.002, "loss": 2.3934, "step": 16250 }, { "epoch": 0.06285661270121075, "grad_norm": 0.11889172345399857, "learning_rate": 0.002, "loss": 2.3757, "step": 16260 }, { "epoch": 0.06289526990459403, "grad_norm": 0.1506144106388092, "learning_rate": 0.002, "loss": 2.3856, "step": 16270 }, { "epoch": 0.0629339271079773, "grad_norm": 0.11867709457874298, "learning_rate": 0.002, "loss": 2.3827, "step": 16280 }, { "epoch": 0.06297258431136057, "grad_norm": 0.10856199264526367, "learning_rate": 0.002, "loss": 2.389, "step": 16290 }, { "epoch": 0.06301124151474385, "grad_norm": 0.1227448359131813, "learning_rate": 0.002, "loss": 2.399, "step": 16300 }, { "epoch": 0.06304989871812713, "grad_norm": 0.11154075711965561, "learning_rate": 0.002, "loss": 2.3776, "step": 16310 }, { "epoch": 0.06308855592151041, "grad_norm": 0.13388383388519287, "learning_rate": 0.002, "loss": 2.3954, "step": 16320 }, { "epoch": 0.0631272131248937, "grad_norm": 0.14490358531475067, "learning_rate": 0.002, "loss": 2.3833, "step": 16330 }, { "epoch": 0.06316587032827697, "grad_norm": 0.12403707951307297, "learning_rate": 0.002, "loss": 2.3907, "step": 16340 }, { "epoch": 0.06320452753166025, "grad_norm": 0.6374202370643616, "learning_rate": 0.002, "loss": 2.3763, "step": 16350 }, { "epoch": 0.06324318473504353, "grad_norm": 0.1515141725540161, "learning_rate": 0.002, "loss": 2.3872, "step": 16360 }, { "epoch": 0.0632818419384268, "grad_norm": 0.12579821050167084, "learning_rate": 0.002, "loss": 2.3828, "step": 16370 }, { "epoch": 0.06332049914181008, "grad_norm": 0.11145832389593124, "learning_rate": 0.002, "loss": 2.4088, "step": 16380 }, { "epoch": 0.06335915634519336, "grad_norm": 0.11568121612071991, "learning_rate": 0.002, "loss": 2.3804, "step": 16390 }, { "epoch": 0.06339781354857664, "grad_norm": 0.1275532841682434, "learning_rate": 0.002, "loss": 2.3909, "step": 16400 }, { "epoch": 0.06343647075195992, "grad_norm": 0.1336016207933426, "learning_rate": 0.002, "loss": 2.3805, "step": 16410 }, { "epoch": 0.0634751279553432, "grad_norm": 0.1019514799118042, "learning_rate": 0.002, "loss": 2.393, "step": 16420 }, { "epoch": 0.06351378515872648, "grad_norm": 0.12241934984922409, "learning_rate": 0.002, "loss": 2.3918, "step": 16430 }, { "epoch": 0.06355244236210976, "grad_norm": 0.13257557153701782, "learning_rate": 0.002, "loss": 2.3734, "step": 16440 }, { "epoch": 0.06359109956549304, "grad_norm": 0.12866371870040894, "learning_rate": 0.002, "loss": 2.3911, "step": 16450 }, { "epoch": 0.0636297567688763, "grad_norm": 0.10742007941007614, "learning_rate": 0.002, "loss": 2.3838, "step": 16460 }, { "epoch": 0.06366841397225959, "grad_norm": 0.11636551469564438, "learning_rate": 0.002, "loss": 2.3968, "step": 16470 }, { "epoch": 0.06370707117564287, "grad_norm": 0.11096464097499847, "learning_rate": 0.002, "loss": 2.3836, "step": 16480 }, { "epoch": 0.06374572837902615, "grad_norm": 0.10752718150615692, "learning_rate": 0.002, "loss": 2.3778, "step": 16490 }, { "epoch": 0.06378438558240943, "grad_norm": 0.11533330380916595, "learning_rate": 0.002, "loss": 2.3954, "step": 16500 }, { "epoch": 0.0638230427857927, "grad_norm": 0.14629951119422913, "learning_rate": 0.002, "loss": 2.4096, "step": 16510 }, { "epoch": 0.06386169998917599, "grad_norm": 0.11980853974819183, "learning_rate": 0.002, "loss": 2.3902, "step": 16520 }, { "epoch": 0.06390035719255927, "grad_norm": 0.14841502904891968, "learning_rate": 0.002, "loss": 2.397, "step": 16530 }, { "epoch": 0.06393901439594254, "grad_norm": 0.11659996956586838, "learning_rate": 0.002, "loss": 2.3851, "step": 16540 }, { "epoch": 0.06397767159932582, "grad_norm": 0.1393616944551468, "learning_rate": 0.002, "loss": 2.3913, "step": 16550 }, { "epoch": 0.06401632880270909, "grad_norm": 0.12829765677452087, "learning_rate": 0.002, "loss": 2.3959, "step": 16560 }, { "epoch": 0.06405498600609237, "grad_norm": 0.10121653974056244, "learning_rate": 0.002, "loss": 2.3809, "step": 16570 }, { "epoch": 0.06409364320947565, "grad_norm": 0.130237877368927, "learning_rate": 0.002, "loss": 2.4068, "step": 16580 }, { "epoch": 0.06413230041285893, "grad_norm": 0.12805424630641937, "learning_rate": 0.002, "loss": 2.4003, "step": 16590 }, { "epoch": 0.06417095761624221, "grad_norm": 0.13839778304100037, "learning_rate": 0.002, "loss": 2.3667, "step": 16600 }, { "epoch": 0.06420961481962549, "grad_norm": 0.10394293069839478, "learning_rate": 0.002, "loss": 2.4008, "step": 16610 }, { "epoch": 0.06424827202300877, "grad_norm": 0.1352817714214325, "learning_rate": 0.002, "loss": 2.3922, "step": 16620 }, { "epoch": 0.06428692922639205, "grad_norm": 0.12022780627012253, "learning_rate": 0.002, "loss": 2.397, "step": 16630 }, { "epoch": 0.06432558642977533, "grad_norm": 0.15451140701770782, "learning_rate": 0.002, "loss": 2.391, "step": 16640 }, { "epoch": 0.0643642436331586, "grad_norm": 0.12435924261808395, "learning_rate": 0.002, "loss": 2.3897, "step": 16650 }, { "epoch": 0.06440290083654188, "grad_norm": 0.09402734041213989, "learning_rate": 0.002, "loss": 2.3853, "step": 16660 }, { "epoch": 0.06444155803992516, "grad_norm": 0.15301144123077393, "learning_rate": 0.002, "loss": 2.3899, "step": 16670 }, { "epoch": 0.06448021524330844, "grad_norm": 0.12951329350471497, "learning_rate": 0.002, "loss": 2.3867, "step": 16680 }, { "epoch": 0.06451887244669172, "grad_norm": 0.10923568904399872, "learning_rate": 0.002, "loss": 2.3841, "step": 16690 }, { "epoch": 0.064557529650075, "grad_norm": 0.10874821990728378, "learning_rate": 0.002, "loss": 2.392, "step": 16700 }, { "epoch": 0.06459618685345828, "grad_norm": 0.12163813412189484, "learning_rate": 0.002, "loss": 2.3847, "step": 16710 }, { "epoch": 0.06463484405684156, "grad_norm": 0.11804139614105225, "learning_rate": 0.002, "loss": 2.3988, "step": 16720 }, { "epoch": 0.06467350126022484, "grad_norm": 0.11225691437721252, "learning_rate": 0.002, "loss": 2.3918, "step": 16730 }, { "epoch": 0.0647121584636081, "grad_norm": 0.12440955638885498, "learning_rate": 0.002, "loss": 2.4036, "step": 16740 }, { "epoch": 0.06475081566699138, "grad_norm": 0.11335624009370804, "learning_rate": 0.002, "loss": 2.3812, "step": 16750 }, { "epoch": 0.06478947287037466, "grad_norm": 0.13928361237049103, "learning_rate": 0.002, "loss": 2.3788, "step": 16760 }, { "epoch": 0.06482813007375794, "grad_norm": 0.12787578999996185, "learning_rate": 0.002, "loss": 2.3784, "step": 16770 }, { "epoch": 0.06486678727714122, "grad_norm": 0.12436560541391373, "learning_rate": 0.002, "loss": 2.3998, "step": 16780 }, { "epoch": 0.0649054444805245, "grad_norm": 0.12330484390258789, "learning_rate": 0.002, "loss": 2.3882, "step": 16790 }, { "epoch": 0.06494410168390778, "grad_norm": 0.12958766520023346, "learning_rate": 0.002, "loss": 2.3753, "step": 16800 }, { "epoch": 0.06498275888729106, "grad_norm": 0.12021058052778244, "learning_rate": 0.002, "loss": 2.4026, "step": 16810 }, { "epoch": 0.06502141609067434, "grad_norm": 0.11794283241033554, "learning_rate": 0.002, "loss": 2.373, "step": 16820 }, { "epoch": 0.06506007329405762, "grad_norm": 0.1235567033290863, "learning_rate": 0.002, "loss": 2.4002, "step": 16830 }, { "epoch": 0.06509873049744089, "grad_norm": 0.11046295613050461, "learning_rate": 0.002, "loss": 2.397, "step": 16840 }, { "epoch": 0.06513738770082417, "grad_norm": 0.10009758919477463, "learning_rate": 0.002, "loss": 2.3961, "step": 16850 }, { "epoch": 0.06517604490420745, "grad_norm": 0.13078537583351135, "learning_rate": 0.002, "loss": 2.3632, "step": 16860 }, { "epoch": 0.06521470210759073, "grad_norm": 0.11758499592542648, "learning_rate": 0.002, "loss": 2.3928, "step": 16870 }, { "epoch": 0.06525335931097401, "grad_norm": 0.12084563821554184, "learning_rate": 0.002, "loss": 2.3944, "step": 16880 }, { "epoch": 0.06529201651435729, "grad_norm": 0.1334090530872345, "learning_rate": 0.002, "loss": 2.402, "step": 16890 }, { "epoch": 0.06533067371774057, "grad_norm": 0.1250433474779129, "learning_rate": 0.002, "loss": 2.387, "step": 16900 }, { "epoch": 0.06536933092112385, "grad_norm": 0.1300637125968933, "learning_rate": 0.002, "loss": 2.3748, "step": 16910 }, { "epoch": 0.06540798812450713, "grad_norm": 0.09776023030281067, "learning_rate": 0.002, "loss": 2.3909, "step": 16920 }, { "epoch": 0.0654466453278904, "grad_norm": 0.11161220073699951, "learning_rate": 0.002, "loss": 2.387, "step": 16930 }, { "epoch": 0.06548530253127367, "grad_norm": 0.16109618544578552, "learning_rate": 0.002, "loss": 2.3706, "step": 16940 }, { "epoch": 0.06552395973465695, "grad_norm": 0.10100391507148743, "learning_rate": 0.002, "loss": 2.3986, "step": 16950 }, { "epoch": 0.06556261693804023, "grad_norm": 0.12305068969726562, "learning_rate": 0.002, "loss": 2.3967, "step": 16960 }, { "epoch": 0.06560127414142351, "grad_norm": 0.1368272602558136, "learning_rate": 0.002, "loss": 2.3813, "step": 16970 }, { "epoch": 0.06563993134480679, "grad_norm": 0.11573801189661026, "learning_rate": 0.002, "loss": 2.3807, "step": 16980 }, { "epoch": 0.06567858854819007, "grad_norm": 0.15364046394824982, "learning_rate": 0.002, "loss": 2.3807, "step": 16990 }, { "epoch": 0.06571724575157335, "grad_norm": 0.1141384094953537, "learning_rate": 0.002, "loss": 2.4019, "step": 17000 }, { "epoch": 0.06575590295495663, "grad_norm": 0.12331130355596542, "learning_rate": 0.002, "loss": 2.392, "step": 17010 }, { "epoch": 0.0657945601583399, "grad_norm": 0.10936921834945679, "learning_rate": 0.002, "loss": 2.4014, "step": 17020 }, { "epoch": 0.06583321736172318, "grad_norm": 0.112908273935318, "learning_rate": 0.002, "loss": 2.3769, "step": 17030 }, { "epoch": 0.06587187456510646, "grad_norm": 0.10883111506700516, "learning_rate": 0.002, "loss": 2.3939, "step": 17040 }, { "epoch": 0.06591053176848974, "grad_norm": 0.1455990970134735, "learning_rate": 0.002, "loss": 2.3862, "step": 17050 }, { "epoch": 0.06594918897187302, "grad_norm": 0.11590580642223358, "learning_rate": 0.002, "loss": 2.4026, "step": 17060 }, { "epoch": 0.0659878461752563, "grad_norm": 0.10473395138978958, "learning_rate": 0.002, "loss": 2.3899, "step": 17070 }, { "epoch": 0.06602650337863958, "grad_norm": 0.13969959318637848, "learning_rate": 0.002, "loss": 2.3741, "step": 17080 }, { "epoch": 0.06606516058202286, "grad_norm": 0.10769690573215485, "learning_rate": 0.002, "loss": 2.3938, "step": 17090 }, { "epoch": 0.06610381778540614, "grad_norm": 0.10526862740516663, "learning_rate": 0.002, "loss": 2.4033, "step": 17100 }, { "epoch": 0.0661424749887894, "grad_norm": 0.10747144371271133, "learning_rate": 0.002, "loss": 2.3765, "step": 17110 }, { "epoch": 0.06618113219217268, "grad_norm": 0.1294855922460556, "learning_rate": 0.002, "loss": 2.3993, "step": 17120 }, { "epoch": 0.06621978939555596, "grad_norm": 0.13381676375865936, "learning_rate": 0.002, "loss": 2.3977, "step": 17130 }, { "epoch": 0.06625844659893924, "grad_norm": 0.0924403965473175, "learning_rate": 0.002, "loss": 2.3917, "step": 17140 }, { "epoch": 0.06629710380232252, "grad_norm": 0.12378328293561935, "learning_rate": 0.002, "loss": 2.4003, "step": 17150 }, { "epoch": 0.0663357610057058, "grad_norm": 0.11626161634922028, "learning_rate": 0.002, "loss": 2.3676, "step": 17160 }, { "epoch": 0.06637441820908908, "grad_norm": 0.11259350925683975, "learning_rate": 0.002, "loss": 2.3858, "step": 17170 }, { "epoch": 0.06641307541247236, "grad_norm": 0.10405412316322327, "learning_rate": 0.002, "loss": 2.3879, "step": 17180 }, { "epoch": 0.06645173261585564, "grad_norm": 0.11657468974590302, "learning_rate": 0.002, "loss": 2.3949, "step": 17190 }, { "epoch": 0.06649038981923892, "grad_norm": 0.1102895587682724, "learning_rate": 0.002, "loss": 2.3933, "step": 17200 }, { "epoch": 0.06652904702262219, "grad_norm": 0.12945479154586792, "learning_rate": 0.002, "loss": 2.3829, "step": 17210 }, { "epoch": 0.06656770422600547, "grad_norm": 0.1273641437292099, "learning_rate": 0.002, "loss": 2.3812, "step": 17220 }, { "epoch": 0.06660636142938875, "grad_norm": 0.1088409423828125, "learning_rate": 0.002, "loss": 2.3705, "step": 17230 }, { "epoch": 0.06664501863277203, "grad_norm": 0.10263818502426147, "learning_rate": 0.002, "loss": 2.3848, "step": 17240 }, { "epoch": 0.06668367583615531, "grad_norm": 0.12657175958156586, "learning_rate": 0.002, "loss": 2.4051, "step": 17250 }, { "epoch": 0.06672233303953859, "grad_norm": 0.13320571184158325, "learning_rate": 0.002, "loss": 2.3783, "step": 17260 }, { "epoch": 0.06676099024292187, "grad_norm": 0.10921880602836609, "learning_rate": 0.002, "loss": 2.3814, "step": 17270 }, { "epoch": 0.06679964744630515, "grad_norm": 0.1231456771492958, "learning_rate": 0.002, "loss": 2.3794, "step": 17280 }, { "epoch": 0.06683830464968843, "grad_norm": 0.1348007470369339, "learning_rate": 0.002, "loss": 2.4037, "step": 17290 }, { "epoch": 0.0668769618530717, "grad_norm": 0.10821889340877533, "learning_rate": 0.002, "loss": 2.3881, "step": 17300 }, { "epoch": 0.06691561905645498, "grad_norm": 0.1109113097190857, "learning_rate": 0.002, "loss": 2.3837, "step": 17310 }, { "epoch": 0.06695427625983826, "grad_norm": 0.1150231882929802, "learning_rate": 0.002, "loss": 2.3893, "step": 17320 }, { "epoch": 0.06699293346322154, "grad_norm": 0.10427354276180267, "learning_rate": 0.002, "loss": 2.3738, "step": 17330 }, { "epoch": 0.06703159066660482, "grad_norm": 0.10868360102176666, "learning_rate": 0.002, "loss": 2.3905, "step": 17340 }, { "epoch": 0.0670702478699881, "grad_norm": 0.10528600960969925, "learning_rate": 0.002, "loss": 2.3925, "step": 17350 }, { "epoch": 0.06710890507337137, "grad_norm": 0.12943026423454285, "learning_rate": 0.002, "loss": 2.3786, "step": 17360 }, { "epoch": 0.06714756227675465, "grad_norm": 0.11128256469964981, "learning_rate": 0.002, "loss": 2.3826, "step": 17370 }, { "epoch": 0.06718621948013793, "grad_norm": 0.13703569769859314, "learning_rate": 0.002, "loss": 2.3882, "step": 17380 }, { "epoch": 0.0672248766835212, "grad_norm": 0.11831612139940262, "learning_rate": 0.002, "loss": 2.3854, "step": 17390 }, { "epoch": 0.06726353388690448, "grad_norm": 0.13398098945617676, "learning_rate": 0.002, "loss": 2.3936, "step": 17400 }, { "epoch": 0.06730219109028776, "grad_norm": 0.10794021934270859, "learning_rate": 0.002, "loss": 2.3853, "step": 17410 }, { "epoch": 0.06734084829367104, "grad_norm": 0.14635983109474182, "learning_rate": 0.002, "loss": 2.3847, "step": 17420 }, { "epoch": 0.06737950549705432, "grad_norm": 0.12673452496528625, "learning_rate": 0.002, "loss": 2.3811, "step": 17430 }, { "epoch": 0.0674181627004376, "grad_norm": 0.10255026817321777, "learning_rate": 0.002, "loss": 2.3835, "step": 17440 }, { "epoch": 0.06745681990382088, "grad_norm": 0.12468329817056656, "learning_rate": 0.002, "loss": 2.3732, "step": 17450 }, { "epoch": 0.06749547710720416, "grad_norm": 0.11799022555351257, "learning_rate": 0.002, "loss": 2.389, "step": 17460 }, { "epoch": 0.06753413431058744, "grad_norm": 0.1194017082452774, "learning_rate": 0.002, "loss": 2.3726, "step": 17470 }, { "epoch": 0.0675727915139707, "grad_norm": 0.11861254274845123, "learning_rate": 0.002, "loss": 2.3836, "step": 17480 }, { "epoch": 0.06761144871735399, "grad_norm": 0.12071160227060318, "learning_rate": 0.002, "loss": 2.3997, "step": 17490 }, { "epoch": 0.06765010592073727, "grad_norm": 0.12513267993927002, "learning_rate": 0.002, "loss": 2.3934, "step": 17500 }, { "epoch": 0.06768876312412055, "grad_norm": 0.13575059175491333, "learning_rate": 0.002, "loss": 2.3795, "step": 17510 }, { "epoch": 0.06772742032750383, "grad_norm": 0.10630622506141663, "learning_rate": 0.002, "loss": 2.3892, "step": 17520 }, { "epoch": 0.0677660775308871, "grad_norm": 0.10673921555280685, "learning_rate": 0.002, "loss": 2.3989, "step": 17530 }, { "epoch": 0.06780473473427039, "grad_norm": 0.14430269598960876, "learning_rate": 0.002, "loss": 2.3882, "step": 17540 }, { "epoch": 0.06784339193765367, "grad_norm": 0.144783154129982, "learning_rate": 0.002, "loss": 2.3756, "step": 17550 }, { "epoch": 0.06788204914103695, "grad_norm": 0.1043168380856514, "learning_rate": 0.002, "loss": 2.3838, "step": 17560 }, { "epoch": 0.06792070634442023, "grad_norm": 0.12369021773338318, "learning_rate": 0.002, "loss": 2.3702, "step": 17570 }, { "epoch": 0.06795936354780349, "grad_norm": 0.10070551931858063, "learning_rate": 0.002, "loss": 2.388, "step": 17580 }, { "epoch": 0.06799802075118677, "grad_norm": 0.13216231763362885, "learning_rate": 0.002, "loss": 2.3996, "step": 17590 }, { "epoch": 0.06803667795457005, "grad_norm": 0.10784224420785904, "learning_rate": 0.002, "loss": 2.3898, "step": 17600 }, { "epoch": 0.06807533515795333, "grad_norm": 0.1283050924539566, "learning_rate": 0.002, "loss": 2.3763, "step": 17610 }, { "epoch": 0.06811399236133661, "grad_norm": 0.12371443212032318, "learning_rate": 0.002, "loss": 2.3892, "step": 17620 }, { "epoch": 0.06815264956471989, "grad_norm": 0.13803911209106445, "learning_rate": 0.002, "loss": 2.3898, "step": 17630 }, { "epoch": 0.06819130676810317, "grad_norm": 0.10237491130828857, "learning_rate": 0.002, "loss": 2.3932, "step": 17640 }, { "epoch": 0.06822996397148645, "grad_norm": 0.11532513797283173, "learning_rate": 0.002, "loss": 2.3939, "step": 17650 }, { "epoch": 0.06826862117486973, "grad_norm": 0.14253586530685425, "learning_rate": 0.002, "loss": 2.3822, "step": 17660 }, { "epoch": 0.068307278378253, "grad_norm": 0.14113810658454895, "learning_rate": 0.002, "loss": 2.395, "step": 17670 }, { "epoch": 0.06834593558163628, "grad_norm": 0.1139352023601532, "learning_rate": 0.002, "loss": 2.3939, "step": 17680 }, { "epoch": 0.06838459278501956, "grad_norm": 0.09490272402763367, "learning_rate": 0.002, "loss": 2.3967, "step": 17690 }, { "epoch": 0.06842324998840284, "grad_norm": 0.1430560201406479, "learning_rate": 0.002, "loss": 2.3849, "step": 17700 }, { "epoch": 0.06846190719178612, "grad_norm": 0.12731243669986725, "learning_rate": 0.002, "loss": 2.3915, "step": 17710 }, { "epoch": 0.0685005643951694, "grad_norm": 0.12364219129085541, "learning_rate": 0.002, "loss": 2.3925, "step": 17720 }, { "epoch": 0.06853922159855268, "grad_norm": 0.12770256400108337, "learning_rate": 0.002, "loss": 2.3768, "step": 17730 }, { "epoch": 0.06857787880193596, "grad_norm": 0.13035711646080017, "learning_rate": 0.002, "loss": 2.3817, "step": 17740 }, { "epoch": 0.06861653600531924, "grad_norm": 0.1317276805639267, "learning_rate": 0.002, "loss": 2.3914, "step": 17750 }, { "epoch": 0.0686551932087025, "grad_norm": 0.11247614026069641, "learning_rate": 0.002, "loss": 2.376, "step": 17760 }, { "epoch": 0.06869385041208578, "grad_norm": 0.10378215461969376, "learning_rate": 0.002, "loss": 2.3852, "step": 17770 }, { "epoch": 0.06873250761546906, "grad_norm": 0.1200520247220993, "learning_rate": 0.002, "loss": 2.3735, "step": 17780 }, { "epoch": 0.06877116481885234, "grad_norm": 0.11229659616947174, "learning_rate": 0.002, "loss": 2.3765, "step": 17790 }, { "epoch": 0.06880982202223562, "grad_norm": 0.11006788909435272, "learning_rate": 0.002, "loss": 2.3764, "step": 17800 }, { "epoch": 0.0688484792256189, "grad_norm": 0.11270550638437271, "learning_rate": 0.002, "loss": 2.3864, "step": 17810 }, { "epoch": 0.06888713642900218, "grad_norm": 0.11977479606866837, "learning_rate": 0.002, "loss": 2.3838, "step": 17820 }, { "epoch": 0.06892579363238546, "grad_norm": 0.11547745764255524, "learning_rate": 0.002, "loss": 2.3658, "step": 17830 }, { "epoch": 0.06896445083576874, "grad_norm": 0.13601486384868622, "learning_rate": 0.002, "loss": 2.3968, "step": 17840 }, { "epoch": 0.06900310803915202, "grad_norm": 0.10460959374904633, "learning_rate": 0.002, "loss": 2.371, "step": 17850 }, { "epoch": 0.06904176524253529, "grad_norm": 0.10961294919252396, "learning_rate": 0.002, "loss": 2.3767, "step": 17860 }, { "epoch": 0.06908042244591857, "grad_norm": 0.1438174694776535, "learning_rate": 0.002, "loss": 2.3806, "step": 17870 }, { "epoch": 0.06911907964930185, "grad_norm": 0.1140781119465828, "learning_rate": 0.002, "loss": 2.3883, "step": 17880 }, { "epoch": 0.06915773685268513, "grad_norm": 0.12019556760787964, "learning_rate": 0.002, "loss": 2.3824, "step": 17890 }, { "epoch": 0.06919639405606841, "grad_norm": 0.11578171700239182, "learning_rate": 0.002, "loss": 2.392, "step": 17900 }, { "epoch": 0.06923505125945169, "grad_norm": 0.10989246517419815, "learning_rate": 0.002, "loss": 2.3813, "step": 17910 }, { "epoch": 0.06927370846283497, "grad_norm": 0.12332355231046677, "learning_rate": 0.002, "loss": 2.3807, "step": 17920 }, { "epoch": 0.06931236566621825, "grad_norm": 0.1192823126912117, "learning_rate": 0.002, "loss": 2.3796, "step": 17930 }, { "epoch": 0.06935102286960153, "grad_norm": 0.14072348177433014, "learning_rate": 0.002, "loss": 2.3855, "step": 17940 }, { "epoch": 0.0693896800729848, "grad_norm": 0.13811370730400085, "learning_rate": 0.002, "loss": 2.3638, "step": 17950 }, { "epoch": 0.06942833727636807, "grad_norm": 0.09993165731430054, "learning_rate": 0.002, "loss": 2.3806, "step": 17960 }, { "epoch": 0.06946699447975135, "grad_norm": 0.11219470202922821, "learning_rate": 0.002, "loss": 2.3887, "step": 17970 }, { "epoch": 0.06950565168313463, "grad_norm": 0.30075761675834656, "learning_rate": 0.002, "loss": 2.395, "step": 17980 }, { "epoch": 0.06954430888651791, "grad_norm": 0.1219414696097374, "learning_rate": 0.002, "loss": 2.3822, "step": 17990 }, { "epoch": 0.0695829660899012, "grad_norm": 0.1216287612915039, "learning_rate": 0.002, "loss": 2.3913, "step": 18000 }, { "epoch": 0.06962162329328447, "grad_norm": 0.12025120854377747, "learning_rate": 0.002, "loss": 2.3864, "step": 18010 }, { "epoch": 0.06966028049666775, "grad_norm": 0.12074564397335052, "learning_rate": 0.002, "loss": 2.3916, "step": 18020 }, { "epoch": 0.06969893770005103, "grad_norm": 0.097334124147892, "learning_rate": 0.002, "loss": 2.3883, "step": 18030 }, { "epoch": 0.0697375949034343, "grad_norm": 0.12103715538978577, "learning_rate": 0.002, "loss": 2.3791, "step": 18040 }, { "epoch": 0.06977625210681758, "grad_norm": 0.12247490882873535, "learning_rate": 0.002, "loss": 2.3775, "step": 18050 }, { "epoch": 0.06981490931020086, "grad_norm": 0.12782634794712067, "learning_rate": 0.002, "loss": 2.3822, "step": 18060 }, { "epoch": 0.06985356651358414, "grad_norm": 0.12588289380073547, "learning_rate": 0.002, "loss": 2.3981, "step": 18070 }, { "epoch": 0.06989222371696742, "grad_norm": 0.11344539374113083, "learning_rate": 0.002, "loss": 2.3856, "step": 18080 }, { "epoch": 0.0699308809203507, "grad_norm": 0.11065513640642166, "learning_rate": 0.002, "loss": 2.3883, "step": 18090 }, { "epoch": 0.06996953812373398, "grad_norm": 0.11105602234601974, "learning_rate": 0.002, "loss": 2.3854, "step": 18100 }, { "epoch": 0.07000819532711726, "grad_norm": 0.12619774043560028, "learning_rate": 0.002, "loss": 2.3977, "step": 18110 }, { "epoch": 0.07004685253050054, "grad_norm": 0.1004246398806572, "learning_rate": 0.002, "loss": 2.3947, "step": 18120 }, { "epoch": 0.0700855097338838, "grad_norm": 0.12445876747369766, "learning_rate": 0.002, "loss": 2.3691, "step": 18130 }, { "epoch": 0.07012416693726709, "grad_norm": 0.11129860579967499, "learning_rate": 0.002, "loss": 2.3959, "step": 18140 }, { "epoch": 0.07016282414065036, "grad_norm": 0.11270508915185928, "learning_rate": 0.002, "loss": 2.37, "step": 18150 }, { "epoch": 0.07020148134403364, "grad_norm": 0.10359988361597061, "learning_rate": 0.002, "loss": 2.3893, "step": 18160 }, { "epoch": 0.07024013854741692, "grad_norm": 0.11541220545768738, "learning_rate": 0.002, "loss": 2.3829, "step": 18170 }, { "epoch": 0.0702787957508002, "grad_norm": 0.1319669783115387, "learning_rate": 0.002, "loss": 2.3914, "step": 18180 }, { "epoch": 0.07031745295418348, "grad_norm": 0.12286441773176193, "learning_rate": 0.002, "loss": 2.3807, "step": 18190 }, { "epoch": 0.07035611015756676, "grad_norm": 0.12172552198171616, "learning_rate": 0.002, "loss": 2.3836, "step": 18200 }, { "epoch": 0.07039476736095004, "grad_norm": 0.11214584857225418, "learning_rate": 0.002, "loss": 2.3964, "step": 18210 }, { "epoch": 0.07043342456433332, "grad_norm": 0.14144858717918396, "learning_rate": 0.002, "loss": 2.3822, "step": 18220 }, { "epoch": 0.07047208176771659, "grad_norm": 0.1375395655632019, "learning_rate": 0.002, "loss": 2.3905, "step": 18230 }, { "epoch": 0.07051073897109987, "grad_norm": 0.1334933638572693, "learning_rate": 0.002, "loss": 2.3646, "step": 18240 }, { "epoch": 0.07054939617448315, "grad_norm": 0.11723175644874573, "learning_rate": 0.002, "loss": 2.3907, "step": 18250 }, { "epoch": 0.07058805337786643, "grad_norm": 0.1289069652557373, "learning_rate": 0.002, "loss": 2.3745, "step": 18260 }, { "epoch": 0.07062671058124971, "grad_norm": 0.10577051341533661, "learning_rate": 0.002, "loss": 2.3802, "step": 18270 }, { "epoch": 0.07066536778463299, "grad_norm": 0.09948733448982239, "learning_rate": 0.002, "loss": 2.3758, "step": 18280 }, { "epoch": 0.07070402498801627, "grad_norm": 0.12849846482276917, "learning_rate": 0.002, "loss": 2.3949, "step": 18290 }, { "epoch": 0.07074268219139955, "grad_norm": 0.10710543394088745, "learning_rate": 0.002, "loss": 2.391, "step": 18300 }, { "epoch": 0.07078133939478283, "grad_norm": 0.12671582400798798, "learning_rate": 0.002, "loss": 2.3914, "step": 18310 }, { "epoch": 0.0708199965981661, "grad_norm": 0.12102266401052475, "learning_rate": 0.002, "loss": 2.3722, "step": 18320 }, { "epoch": 0.07085865380154938, "grad_norm": 0.12042495608329773, "learning_rate": 0.002, "loss": 2.3793, "step": 18330 }, { "epoch": 0.07089731100493266, "grad_norm": 0.10737396776676178, "learning_rate": 0.002, "loss": 2.3856, "step": 18340 }, { "epoch": 0.07093596820831594, "grad_norm": 0.11897099018096924, "learning_rate": 0.002, "loss": 2.394, "step": 18350 }, { "epoch": 0.07097462541169922, "grad_norm": 0.10696249455213547, "learning_rate": 0.002, "loss": 2.3766, "step": 18360 }, { "epoch": 0.0710132826150825, "grad_norm": 0.12204062193632126, "learning_rate": 0.002, "loss": 2.3878, "step": 18370 }, { "epoch": 0.07105193981846578, "grad_norm": 0.09256469458341599, "learning_rate": 0.002, "loss": 2.3706, "step": 18380 }, { "epoch": 0.07109059702184906, "grad_norm": 0.12206672877073288, "learning_rate": 0.002, "loss": 2.3939, "step": 18390 }, { "epoch": 0.07112925422523234, "grad_norm": 0.10235872119665146, "learning_rate": 0.002, "loss": 2.3769, "step": 18400 }, { "epoch": 0.0711679114286156, "grad_norm": 0.11231094598770142, "learning_rate": 0.002, "loss": 2.3802, "step": 18410 }, { "epoch": 0.07120656863199888, "grad_norm": 0.15380600094795227, "learning_rate": 0.002, "loss": 2.3886, "step": 18420 }, { "epoch": 0.07124522583538216, "grad_norm": 0.11261408776044846, "learning_rate": 0.002, "loss": 2.3826, "step": 18430 }, { "epoch": 0.07128388303876544, "grad_norm": 0.11684536933898926, "learning_rate": 0.002, "loss": 2.3979, "step": 18440 }, { "epoch": 0.07132254024214872, "grad_norm": 0.12610866129398346, "learning_rate": 0.002, "loss": 2.3922, "step": 18450 }, { "epoch": 0.071361197445532, "grad_norm": 0.11578212678432465, "learning_rate": 0.002, "loss": 2.3781, "step": 18460 }, { "epoch": 0.07139985464891528, "grad_norm": 0.12588560581207275, "learning_rate": 0.002, "loss": 2.3944, "step": 18470 }, { "epoch": 0.07143851185229856, "grad_norm": 0.10358493030071259, "learning_rate": 0.002, "loss": 2.3734, "step": 18480 }, { "epoch": 0.07147716905568184, "grad_norm": 0.10771401971578598, "learning_rate": 0.002, "loss": 2.3879, "step": 18490 }, { "epoch": 0.07151582625906511, "grad_norm": 0.5038573145866394, "learning_rate": 0.002, "loss": 2.3918, "step": 18500 }, { "epoch": 0.07155448346244839, "grad_norm": 0.11553716659545898, "learning_rate": 0.002, "loss": 2.3749, "step": 18510 }, { "epoch": 0.07159314066583167, "grad_norm": 0.10773370414972305, "learning_rate": 0.002, "loss": 2.3721, "step": 18520 }, { "epoch": 0.07163179786921495, "grad_norm": 0.11228738725185394, "learning_rate": 0.002, "loss": 2.3854, "step": 18530 }, { "epoch": 0.07167045507259823, "grad_norm": 0.12033947557210922, "learning_rate": 0.002, "loss": 2.3696, "step": 18540 }, { "epoch": 0.07170911227598151, "grad_norm": 0.10592275112867355, "learning_rate": 0.002, "loss": 2.3684, "step": 18550 }, { "epoch": 0.07174776947936479, "grad_norm": 0.11138555407524109, "learning_rate": 0.002, "loss": 2.3874, "step": 18560 }, { "epoch": 0.07178642668274807, "grad_norm": 0.1372644156217575, "learning_rate": 0.002, "loss": 2.3902, "step": 18570 }, { "epoch": 0.07182508388613135, "grad_norm": 0.5497710704803467, "learning_rate": 0.002, "loss": 2.3934, "step": 18580 }, { "epoch": 0.07186374108951463, "grad_norm": 0.1122540831565857, "learning_rate": 0.002, "loss": 2.3824, "step": 18590 }, { "epoch": 0.07190239829289789, "grad_norm": 0.12664495408535004, "learning_rate": 0.002, "loss": 2.3852, "step": 18600 }, { "epoch": 0.07194105549628117, "grad_norm": 0.10843063145875931, "learning_rate": 0.002, "loss": 2.3861, "step": 18610 }, { "epoch": 0.07197971269966445, "grad_norm": 0.11812781542539597, "learning_rate": 0.002, "loss": 2.3912, "step": 18620 }, { "epoch": 0.07201836990304773, "grad_norm": 0.10957538336515427, "learning_rate": 0.002, "loss": 2.3889, "step": 18630 }, { "epoch": 0.07205702710643101, "grad_norm": 0.11784140020608902, "learning_rate": 0.002, "loss": 2.378, "step": 18640 }, { "epoch": 0.07209568430981429, "grad_norm": 0.12863163650035858, "learning_rate": 0.002, "loss": 2.3956, "step": 18650 }, { "epoch": 0.07213434151319757, "grad_norm": 0.1179456114768982, "learning_rate": 0.002, "loss": 2.377, "step": 18660 }, { "epoch": 0.07217299871658085, "grad_norm": 0.12140975892543793, "learning_rate": 0.002, "loss": 2.3956, "step": 18670 }, { "epoch": 0.07221165591996413, "grad_norm": 0.10543189942836761, "learning_rate": 0.002, "loss": 2.3775, "step": 18680 }, { "epoch": 0.0722503131233474, "grad_norm": 0.1302126944065094, "learning_rate": 0.002, "loss": 2.4016, "step": 18690 }, { "epoch": 0.07228897032673068, "grad_norm": 0.10657652467489243, "learning_rate": 0.002, "loss": 2.3917, "step": 18700 }, { "epoch": 0.07232762753011396, "grad_norm": 0.13003182411193848, "learning_rate": 0.002, "loss": 2.368, "step": 18710 }, { "epoch": 0.07236628473349724, "grad_norm": 0.11850715428590775, "learning_rate": 0.002, "loss": 2.3906, "step": 18720 }, { "epoch": 0.07240494193688052, "grad_norm": 0.1252976953983307, "learning_rate": 0.002, "loss": 2.3816, "step": 18730 }, { "epoch": 0.0724435991402638, "grad_norm": 0.10448279976844788, "learning_rate": 0.002, "loss": 2.3824, "step": 18740 }, { "epoch": 0.07248225634364708, "grad_norm": 0.0973781943321228, "learning_rate": 0.002, "loss": 2.3824, "step": 18750 }, { "epoch": 0.07252091354703036, "grad_norm": 0.11515215784311295, "learning_rate": 0.002, "loss": 2.3861, "step": 18760 }, { "epoch": 0.07255957075041364, "grad_norm": 0.12835046648979187, "learning_rate": 0.002, "loss": 2.3734, "step": 18770 }, { "epoch": 0.0725982279537969, "grad_norm": 0.09265491366386414, "learning_rate": 0.002, "loss": 2.3818, "step": 18780 }, { "epoch": 0.07263688515718018, "grad_norm": 0.12700672447681427, "learning_rate": 0.002, "loss": 2.3805, "step": 18790 }, { "epoch": 0.07267554236056346, "grad_norm": 0.10078644007444382, "learning_rate": 0.002, "loss": 2.3719, "step": 18800 }, { "epoch": 0.07271419956394674, "grad_norm": 0.1234968900680542, "learning_rate": 0.002, "loss": 2.3886, "step": 18810 }, { "epoch": 0.07275285676733002, "grad_norm": 0.10350227355957031, "learning_rate": 0.002, "loss": 2.3764, "step": 18820 }, { "epoch": 0.0727915139707133, "grad_norm": 0.10174798220396042, "learning_rate": 0.002, "loss": 2.3644, "step": 18830 }, { "epoch": 0.07283017117409658, "grad_norm": 0.12602569162845612, "learning_rate": 0.002, "loss": 2.3805, "step": 18840 }, { "epoch": 0.07286882837747986, "grad_norm": 0.14451903104782104, "learning_rate": 0.002, "loss": 2.3904, "step": 18850 }, { "epoch": 0.07290748558086314, "grad_norm": 0.10017625987529755, "learning_rate": 0.002, "loss": 2.3826, "step": 18860 }, { "epoch": 0.07294614278424642, "grad_norm": 0.10255925357341766, "learning_rate": 0.002, "loss": 2.3724, "step": 18870 }, { "epoch": 0.07298479998762969, "grad_norm": 0.11854524165391922, "learning_rate": 0.002, "loss": 2.3797, "step": 18880 }, { "epoch": 0.07302345719101297, "grad_norm": 0.0942547544836998, "learning_rate": 0.002, "loss": 2.3766, "step": 18890 }, { "epoch": 0.07306211439439625, "grad_norm": 0.12876085937023163, "learning_rate": 0.002, "loss": 2.378, "step": 18900 }, { "epoch": 0.07310077159777953, "grad_norm": 0.1679118126630783, "learning_rate": 0.002, "loss": 2.3923, "step": 18910 }, { "epoch": 0.07313942880116281, "grad_norm": 0.12198128551244736, "learning_rate": 0.002, "loss": 2.3875, "step": 18920 }, { "epoch": 0.07317808600454609, "grad_norm": 0.10201894491910934, "learning_rate": 0.002, "loss": 2.3889, "step": 18930 }, { "epoch": 0.07321674320792937, "grad_norm": 0.11405906826257706, "learning_rate": 0.002, "loss": 2.3974, "step": 18940 }, { "epoch": 0.07325540041131265, "grad_norm": 0.11819473654031754, "learning_rate": 0.002, "loss": 2.3838, "step": 18950 }, { "epoch": 0.07329405761469593, "grad_norm": 0.09402453154325485, "learning_rate": 0.002, "loss": 2.3788, "step": 18960 }, { "epoch": 0.0733327148180792, "grad_norm": 0.13503125309944153, "learning_rate": 0.002, "loss": 2.3855, "step": 18970 }, { "epoch": 0.07337137202146247, "grad_norm": 0.11238619685173035, "learning_rate": 0.002, "loss": 2.3873, "step": 18980 }, { "epoch": 0.07341002922484575, "grad_norm": 0.15545395016670227, "learning_rate": 0.002, "loss": 2.392, "step": 18990 }, { "epoch": 0.07344868642822903, "grad_norm": 0.13186931610107422, "learning_rate": 0.002, "loss": 2.3852, "step": 19000 }, { "epoch": 0.07348734363161231, "grad_norm": 0.10326071828603745, "learning_rate": 0.002, "loss": 2.3857, "step": 19010 }, { "epoch": 0.0735260008349956, "grad_norm": 0.11343759298324585, "learning_rate": 0.002, "loss": 2.3702, "step": 19020 }, { "epoch": 0.07356465803837887, "grad_norm": 0.13040369749069214, "learning_rate": 0.002, "loss": 2.3711, "step": 19030 }, { "epoch": 0.07360331524176215, "grad_norm": 0.1187443807721138, "learning_rate": 0.002, "loss": 2.3674, "step": 19040 }, { "epoch": 0.07364197244514543, "grad_norm": 0.09433488547801971, "learning_rate": 0.002, "loss": 2.3732, "step": 19050 }, { "epoch": 0.0736806296485287, "grad_norm": 0.12784084677696228, "learning_rate": 0.002, "loss": 2.3789, "step": 19060 }, { "epoch": 0.07371928685191198, "grad_norm": 0.14179980754852295, "learning_rate": 0.002, "loss": 2.3697, "step": 19070 }, { "epoch": 0.07375794405529526, "grad_norm": 0.11444813013076782, "learning_rate": 0.002, "loss": 2.383, "step": 19080 }, { "epoch": 0.07379660125867854, "grad_norm": 0.11803031712770462, "learning_rate": 0.002, "loss": 2.3851, "step": 19090 }, { "epoch": 0.07383525846206182, "grad_norm": 0.13434816896915436, "learning_rate": 0.002, "loss": 2.3878, "step": 19100 }, { "epoch": 0.0738739156654451, "grad_norm": 0.11736548691987991, "learning_rate": 0.002, "loss": 2.4008, "step": 19110 }, { "epoch": 0.07391257286882838, "grad_norm": 0.10972438007593155, "learning_rate": 0.002, "loss": 2.3875, "step": 19120 }, { "epoch": 0.07395123007221166, "grad_norm": 0.09769081324338913, "learning_rate": 0.002, "loss": 2.3922, "step": 19130 }, { "epoch": 0.07398988727559494, "grad_norm": 0.16008608043193817, "learning_rate": 0.002, "loss": 2.384, "step": 19140 }, { "epoch": 0.0740285444789782, "grad_norm": 0.21446815133094788, "learning_rate": 0.002, "loss": 2.379, "step": 19150 }, { "epoch": 0.07406720168236149, "grad_norm": 0.10447430610656738, "learning_rate": 0.002, "loss": 2.3734, "step": 19160 }, { "epoch": 0.07410585888574477, "grad_norm": 0.12838858366012573, "learning_rate": 0.002, "loss": 2.3784, "step": 19170 }, { "epoch": 0.07414451608912805, "grad_norm": 0.11898154765367508, "learning_rate": 0.002, "loss": 2.3846, "step": 19180 }, { "epoch": 0.07418317329251133, "grad_norm": 0.12039216607809067, "learning_rate": 0.002, "loss": 2.3698, "step": 19190 }, { "epoch": 0.0742218304958946, "grad_norm": 0.10423491895198822, "learning_rate": 0.002, "loss": 2.3833, "step": 19200 }, { "epoch": 0.07426048769927789, "grad_norm": 0.11539363116025925, "learning_rate": 0.002, "loss": 2.3702, "step": 19210 }, { "epoch": 0.07429914490266117, "grad_norm": 0.12009965628385544, "learning_rate": 0.002, "loss": 2.3824, "step": 19220 }, { "epoch": 0.07433780210604445, "grad_norm": 0.09702429175376892, "learning_rate": 0.002, "loss": 2.3681, "step": 19230 }, { "epoch": 0.07437645930942773, "grad_norm": 0.11230161041021347, "learning_rate": 0.002, "loss": 2.3861, "step": 19240 }, { "epoch": 0.07441511651281099, "grad_norm": 0.11779018491506577, "learning_rate": 0.002, "loss": 2.3754, "step": 19250 }, { "epoch": 0.07445377371619427, "grad_norm": 0.10018932819366455, "learning_rate": 0.002, "loss": 2.3866, "step": 19260 }, { "epoch": 0.07449243091957755, "grad_norm": 0.10820461064577103, "learning_rate": 0.002, "loss": 2.3727, "step": 19270 }, { "epoch": 0.07453108812296083, "grad_norm": 0.10591787844896317, "learning_rate": 0.002, "loss": 2.3739, "step": 19280 }, { "epoch": 0.07456974532634411, "grad_norm": 0.10695263743400574, "learning_rate": 0.002, "loss": 2.386, "step": 19290 }, { "epoch": 0.07460840252972739, "grad_norm": 0.10405397415161133, "learning_rate": 0.002, "loss": 2.3945, "step": 19300 }, { "epoch": 0.07464705973311067, "grad_norm": 0.121544249355793, "learning_rate": 0.002, "loss": 2.3863, "step": 19310 }, { "epoch": 0.07468571693649395, "grad_norm": 0.1174372136592865, "learning_rate": 0.002, "loss": 2.3663, "step": 19320 }, { "epoch": 0.07472437413987723, "grad_norm": 0.11053747683763504, "learning_rate": 0.002, "loss": 2.3722, "step": 19330 }, { "epoch": 0.0747630313432605, "grad_norm": 0.10836398601531982, "learning_rate": 0.002, "loss": 2.4037, "step": 19340 }, { "epoch": 0.07480168854664378, "grad_norm": 0.11245734244585037, "learning_rate": 0.002, "loss": 2.4008, "step": 19350 }, { "epoch": 0.07484034575002706, "grad_norm": 0.12424355745315552, "learning_rate": 0.002, "loss": 2.3923, "step": 19360 }, { "epoch": 0.07487900295341034, "grad_norm": 0.12254510074853897, "learning_rate": 0.002, "loss": 2.3662, "step": 19370 }, { "epoch": 0.07491766015679362, "grad_norm": 0.10089680552482605, "learning_rate": 0.002, "loss": 2.3808, "step": 19380 }, { "epoch": 0.0749563173601769, "grad_norm": 0.11235611885786057, "learning_rate": 0.002, "loss": 2.3836, "step": 19390 }, { "epoch": 0.07499497456356018, "grad_norm": 0.12315353006124496, "learning_rate": 0.002, "loss": 2.3795, "step": 19400 }, { "epoch": 0.07503363176694346, "grad_norm": 0.10509892553091049, "learning_rate": 0.002, "loss": 2.3804, "step": 19410 }, { "epoch": 0.07507228897032674, "grad_norm": 0.12031447142362595, "learning_rate": 0.002, "loss": 2.3917, "step": 19420 }, { "epoch": 0.07511094617371, "grad_norm": 0.15881817042827606, "learning_rate": 0.002, "loss": 2.3753, "step": 19430 }, { "epoch": 0.07514960337709328, "grad_norm": 0.11184466630220413, "learning_rate": 0.002, "loss": 2.3808, "step": 19440 }, { "epoch": 0.07518826058047656, "grad_norm": 0.12158004194498062, "learning_rate": 0.002, "loss": 2.3818, "step": 19450 }, { "epoch": 0.07522691778385984, "grad_norm": 0.12418025732040405, "learning_rate": 0.002, "loss": 2.3916, "step": 19460 }, { "epoch": 0.07526557498724312, "grad_norm": 0.16225135326385498, "learning_rate": 0.002, "loss": 2.3751, "step": 19470 }, { "epoch": 0.0753042321906264, "grad_norm": 0.11569126695394516, "learning_rate": 0.002, "loss": 2.3616, "step": 19480 }, { "epoch": 0.07534288939400968, "grad_norm": 0.10566077381372452, "learning_rate": 0.002, "loss": 2.3767, "step": 19490 }, { "epoch": 0.07538154659739296, "grad_norm": 0.1229267567396164, "learning_rate": 0.002, "loss": 2.3944, "step": 19500 }, { "epoch": 0.07542020380077624, "grad_norm": 0.13145491480827332, "learning_rate": 0.002, "loss": 2.3895, "step": 19510 }, { "epoch": 0.07545886100415951, "grad_norm": 0.10700027644634247, "learning_rate": 0.002, "loss": 2.357, "step": 19520 }, { "epoch": 0.07549751820754279, "grad_norm": 0.09952887147665024, "learning_rate": 0.002, "loss": 2.3769, "step": 19530 }, { "epoch": 0.07553617541092607, "grad_norm": 0.1233973428606987, "learning_rate": 0.002, "loss": 2.3882, "step": 19540 }, { "epoch": 0.07557483261430935, "grad_norm": 0.13303498923778534, "learning_rate": 0.002, "loss": 2.3857, "step": 19550 }, { "epoch": 0.07561348981769263, "grad_norm": 0.12933650612831116, "learning_rate": 0.002, "loss": 2.3783, "step": 19560 }, { "epoch": 0.07565214702107591, "grad_norm": 0.10663676261901855, "learning_rate": 0.002, "loss": 2.3876, "step": 19570 }, { "epoch": 0.07569080422445919, "grad_norm": 0.10642411559820175, "learning_rate": 0.002, "loss": 2.3733, "step": 19580 }, { "epoch": 0.07572946142784247, "grad_norm": 0.11113075911998749, "learning_rate": 0.002, "loss": 2.3881, "step": 19590 }, { "epoch": 0.07576811863122575, "grad_norm": 0.11284121870994568, "learning_rate": 0.002, "loss": 2.3904, "step": 19600 }, { "epoch": 0.07580677583460903, "grad_norm": 0.14630155265331268, "learning_rate": 0.002, "loss": 2.3907, "step": 19610 }, { "epoch": 0.0758454330379923, "grad_norm": 0.12654368579387665, "learning_rate": 0.002, "loss": 2.3823, "step": 19620 }, { "epoch": 0.07588409024137557, "grad_norm": 0.13922430574893951, "learning_rate": 0.002, "loss": 2.3774, "step": 19630 }, { "epoch": 0.07592274744475885, "grad_norm": 0.12410923093557358, "learning_rate": 0.002, "loss": 2.3789, "step": 19640 }, { "epoch": 0.07596140464814213, "grad_norm": 0.11142056435346603, "learning_rate": 0.002, "loss": 2.3881, "step": 19650 }, { "epoch": 0.07600006185152541, "grad_norm": 0.11221200227737427, "learning_rate": 0.002, "loss": 2.3824, "step": 19660 }, { "epoch": 0.0760387190549087, "grad_norm": 0.12378339469432831, "learning_rate": 0.002, "loss": 2.377, "step": 19670 }, { "epoch": 0.07607737625829197, "grad_norm": 0.11832631379365921, "learning_rate": 0.002, "loss": 2.3731, "step": 19680 }, { "epoch": 0.07611603346167525, "grad_norm": 0.1267072558403015, "learning_rate": 0.002, "loss": 2.3661, "step": 19690 }, { "epoch": 0.07615469066505853, "grad_norm": 0.14015980064868927, "learning_rate": 0.002, "loss": 2.3751, "step": 19700 }, { "epoch": 0.0761933478684418, "grad_norm": 0.11285578459501266, "learning_rate": 0.002, "loss": 2.3809, "step": 19710 }, { "epoch": 0.07623200507182508, "grad_norm": 0.09963081032037735, "learning_rate": 0.002, "loss": 2.3887, "step": 19720 }, { "epoch": 0.07627066227520836, "grad_norm": 0.13140037655830383, "learning_rate": 0.002, "loss": 2.3891, "step": 19730 }, { "epoch": 0.07630931947859164, "grad_norm": 0.11250842362642288, "learning_rate": 0.002, "loss": 2.3826, "step": 19740 }, { "epoch": 0.07634797668197492, "grad_norm": 0.12486717104911804, "learning_rate": 0.002, "loss": 2.3925, "step": 19750 }, { "epoch": 0.0763866338853582, "grad_norm": 0.12867963314056396, "learning_rate": 0.002, "loss": 2.3791, "step": 19760 }, { "epoch": 0.07642529108874148, "grad_norm": 0.10435399413108826, "learning_rate": 0.002, "loss": 2.3749, "step": 19770 }, { "epoch": 0.07646394829212476, "grad_norm": 0.12695525586605072, "learning_rate": 0.002, "loss": 2.3815, "step": 19780 }, { "epoch": 0.07650260549550804, "grad_norm": 0.1107652485370636, "learning_rate": 0.002, "loss": 2.3819, "step": 19790 }, { "epoch": 0.0765412626988913, "grad_norm": 0.14867202937602997, "learning_rate": 0.002, "loss": 2.3828, "step": 19800 }, { "epoch": 0.07657991990227458, "grad_norm": 0.10924647003412247, "learning_rate": 0.002, "loss": 2.3782, "step": 19810 }, { "epoch": 0.07661857710565786, "grad_norm": 0.1168089285492897, "learning_rate": 0.002, "loss": 2.3811, "step": 19820 }, { "epoch": 0.07665723430904114, "grad_norm": 0.12944529950618744, "learning_rate": 0.002, "loss": 2.3882, "step": 19830 }, { "epoch": 0.07669589151242442, "grad_norm": 0.12026828527450562, "learning_rate": 0.002, "loss": 2.3699, "step": 19840 }, { "epoch": 0.0767345487158077, "grad_norm": 0.10217534750699997, "learning_rate": 0.002, "loss": 2.39, "step": 19850 }, { "epoch": 0.07677320591919098, "grad_norm": 0.11586353927850723, "learning_rate": 0.002, "loss": 2.3529, "step": 19860 }, { "epoch": 0.07681186312257426, "grad_norm": 0.09830108284950256, "learning_rate": 0.002, "loss": 2.3736, "step": 19870 }, { "epoch": 0.07685052032595754, "grad_norm": 0.12570495903491974, "learning_rate": 0.002, "loss": 2.3854, "step": 19880 }, { "epoch": 0.07688917752934082, "grad_norm": 0.11000153422355652, "learning_rate": 0.002, "loss": 2.3871, "step": 19890 }, { "epoch": 0.07692783473272409, "grad_norm": 0.10306507349014282, "learning_rate": 0.002, "loss": 2.3868, "step": 19900 }, { "epoch": 0.07696649193610737, "grad_norm": 0.11710715293884277, "learning_rate": 0.002, "loss": 2.386, "step": 19910 }, { "epoch": 0.07700514913949065, "grad_norm": 0.12766055762767792, "learning_rate": 0.002, "loss": 2.4044, "step": 19920 }, { "epoch": 0.07704380634287393, "grad_norm": 0.10362893342971802, "learning_rate": 0.002, "loss": 2.3768, "step": 19930 }, { "epoch": 0.07708246354625721, "grad_norm": 0.11141230165958405, "learning_rate": 0.002, "loss": 2.3681, "step": 19940 }, { "epoch": 0.07712112074964049, "grad_norm": 0.11341533064842224, "learning_rate": 0.002, "loss": 2.3691, "step": 19950 }, { "epoch": 0.07715977795302377, "grad_norm": 0.10786847025156021, "learning_rate": 0.002, "loss": 2.3796, "step": 19960 }, { "epoch": 0.07719843515640705, "grad_norm": 0.09927070885896683, "learning_rate": 0.002, "loss": 2.3832, "step": 19970 }, { "epoch": 0.07723709235979033, "grad_norm": 0.11835852265357971, "learning_rate": 0.002, "loss": 2.3549, "step": 19980 }, { "epoch": 0.0772757495631736, "grad_norm": 0.1367740035057068, "learning_rate": 0.002, "loss": 2.3682, "step": 19990 }, { "epoch": 0.07731440676655688, "grad_norm": 0.10808567702770233, "learning_rate": 0.002, "loss": 2.3704, "step": 20000 }, { "epoch": 0.07735306396994016, "grad_norm": 0.11161024123430252, "learning_rate": 0.002, "loss": 2.3827, "step": 20010 }, { "epoch": 0.07739172117332344, "grad_norm": 0.10708248615264893, "learning_rate": 0.002, "loss": 2.3821, "step": 20020 }, { "epoch": 0.07743037837670672, "grad_norm": 0.1220446527004242, "learning_rate": 0.002, "loss": 2.359, "step": 20030 }, { "epoch": 0.07746903558009, "grad_norm": 0.11131221801042557, "learning_rate": 0.002, "loss": 2.3657, "step": 20040 }, { "epoch": 0.07750769278347328, "grad_norm": 0.14677201211452484, "learning_rate": 0.002, "loss": 2.3867, "step": 20050 }, { "epoch": 0.07754634998685656, "grad_norm": 0.11652742326259613, "learning_rate": 0.002, "loss": 2.3848, "step": 20060 }, { "epoch": 0.07758500719023984, "grad_norm": 0.1017276793718338, "learning_rate": 0.002, "loss": 2.3834, "step": 20070 }, { "epoch": 0.0776236643936231, "grad_norm": 0.11347658932209015, "learning_rate": 0.002, "loss": 2.3785, "step": 20080 }, { "epoch": 0.07766232159700638, "grad_norm": 0.15333813428878784, "learning_rate": 0.002, "loss": 2.3837, "step": 20090 }, { "epoch": 0.07770097880038966, "grad_norm": 0.10666406154632568, "learning_rate": 0.002, "loss": 2.3768, "step": 20100 }, { "epoch": 0.07773963600377294, "grad_norm": 0.115127794444561, "learning_rate": 0.002, "loss": 2.3674, "step": 20110 }, { "epoch": 0.07777829320715622, "grad_norm": 0.10911829769611359, "learning_rate": 0.002, "loss": 2.3789, "step": 20120 }, { "epoch": 0.0778169504105395, "grad_norm": 0.12019343674182892, "learning_rate": 0.002, "loss": 2.3975, "step": 20130 }, { "epoch": 0.07785560761392278, "grad_norm": 0.12885767221450806, "learning_rate": 0.002, "loss": 2.3825, "step": 20140 }, { "epoch": 0.07789426481730606, "grad_norm": 0.11365063488483429, "learning_rate": 0.002, "loss": 2.3814, "step": 20150 }, { "epoch": 0.07793292202068934, "grad_norm": 0.12837842106819153, "learning_rate": 0.002, "loss": 2.3785, "step": 20160 }, { "epoch": 0.0779715792240726, "grad_norm": 0.10112041234970093, "learning_rate": 0.002, "loss": 2.382, "step": 20170 }, { "epoch": 0.07801023642745589, "grad_norm": 0.11926894634962082, "learning_rate": 0.002, "loss": 2.3904, "step": 20180 }, { "epoch": 0.07804889363083917, "grad_norm": 0.13643495738506317, "learning_rate": 0.002, "loss": 2.3716, "step": 20190 }, { "epoch": 0.07808755083422245, "grad_norm": 0.12283652275800705, "learning_rate": 0.002, "loss": 2.3745, "step": 20200 }, { "epoch": 0.07812620803760573, "grad_norm": 0.1476060301065445, "learning_rate": 0.002, "loss": 2.3713, "step": 20210 }, { "epoch": 0.078164865240989, "grad_norm": 0.12512938678264618, "learning_rate": 0.002, "loss": 2.3807, "step": 20220 }, { "epoch": 0.07820352244437229, "grad_norm": 0.10858415067195892, "learning_rate": 0.002, "loss": 2.3908, "step": 20230 }, { "epoch": 0.07824217964775557, "grad_norm": 0.09735766798257828, "learning_rate": 0.002, "loss": 2.374, "step": 20240 }, { "epoch": 0.07828083685113885, "grad_norm": 0.13043081760406494, "learning_rate": 0.002, "loss": 2.3928, "step": 20250 }, { "epoch": 0.07831949405452213, "grad_norm": 0.1227329671382904, "learning_rate": 0.002, "loss": 2.3776, "step": 20260 }, { "epoch": 0.07835815125790539, "grad_norm": 0.1388980597257614, "learning_rate": 0.002, "loss": 2.38, "step": 20270 }, { "epoch": 0.07839680846128867, "grad_norm": 0.10539897531270981, "learning_rate": 0.002, "loss": 2.388, "step": 20280 }, { "epoch": 0.07843546566467195, "grad_norm": 0.10891108214855194, "learning_rate": 0.002, "loss": 2.3833, "step": 20290 }, { "epoch": 0.07847412286805523, "grad_norm": 0.11661384254693985, "learning_rate": 0.002, "loss": 2.3859, "step": 20300 }, { "epoch": 0.07851278007143851, "grad_norm": 0.10258234292268753, "learning_rate": 0.002, "loss": 2.3994, "step": 20310 }, { "epoch": 0.07855143727482179, "grad_norm": 0.11731996387243271, "learning_rate": 0.002, "loss": 2.3898, "step": 20320 }, { "epoch": 0.07859009447820507, "grad_norm": 0.1236349493265152, "learning_rate": 0.002, "loss": 2.3766, "step": 20330 }, { "epoch": 0.07862875168158835, "grad_norm": 0.11339427530765533, "learning_rate": 0.002, "loss": 2.3653, "step": 20340 }, { "epoch": 0.07866740888497163, "grad_norm": 0.10862452536821365, "learning_rate": 0.002, "loss": 2.3856, "step": 20350 }, { "epoch": 0.0787060660883549, "grad_norm": 0.14031213521957397, "learning_rate": 0.002, "loss": 2.373, "step": 20360 }, { "epoch": 0.07874472329173818, "grad_norm": 0.1003158912062645, "learning_rate": 0.002, "loss": 2.3782, "step": 20370 }, { "epoch": 0.07878338049512146, "grad_norm": 0.14314481616020203, "learning_rate": 0.002, "loss": 2.3869, "step": 20380 }, { "epoch": 0.07882203769850474, "grad_norm": 0.1109648272395134, "learning_rate": 0.002, "loss": 2.3802, "step": 20390 }, { "epoch": 0.07886069490188802, "grad_norm": 0.1257660835981369, "learning_rate": 0.002, "loss": 2.3901, "step": 20400 }, { "epoch": 0.0788993521052713, "grad_norm": 0.10887817293405533, "learning_rate": 0.002, "loss": 2.3842, "step": 20410 }, { "epoch": 0.07893800930865458, "grad_norm": 0.13619078695774078, "learning_rate": 0.002, "loss": 2.3846, "step": 20420 }, { "epoch": 0.07897666651203786, "grad_norm": 0.10961946099996567, "learning_rate": 0.002, "loss": 2.3792, "step": 20430 }, { "epoch": 0.07901532371542114, "grad_norm": 0.10585474222898483, "learning_rate": 0.002, "loss": 2.3804, "step": 20440 }, { "epoch": 0.0790539809188044, "grad_norm": 0.10117260366678238, "learning_rate": 0.002, "loss": 2.3979, "step": 20450 }, { "epoch": 0.07909263812218768, "grad_norm": 0.2224205732345581, "learning_rate": 0.002, "loss": 2.3761, "step": 20460 }, { "epoch": 0.07913129532557096, "grad_norm": 0.1441737562417984, "learning_rate": 0.002, "loss": 2.3879, "step": 20470 }, { "epoch": 0.07916995252895424, "grad_norm": 0.09196843951940536, "learning_rate": 0.002, "loss": 2.3774, "step": 20480 }, { "epoch": 0.07920860973233752, "grad_norm": 0.11055919528007507, "learning_rate": 0.002, "loss": 2.3658, "step": 20490 }, { "epoch": 0.0792472669357208, "grad_norm": 0.10769325494766235, "learning_rate": 0.002, "loss": 2.374, "step": 20500 }, { "epoch": 0.07928592413910408, "grad_norm": 0.10402721911668777, "learning_rate": 0.002, "loss": 2.362, "step": 20510 }, { "epoch": 0.07932458134248736, "grad_norm": 0.11635389924049377, "learning_rate": 0.002, "loss": 2.3846, "step": 20520 }, { "epoch": 0.07936323854587064, "grad_norm": 0.10108437389135361, "learning_rate": 0.002, "loss": 2.3873, "step": 20530 }, { "epoch": 0.07940189574925392, "grad_norm": 0.1175009086728096, "learning_rate": 0.002, "loss": 2.3826, "step": 20540 }, { "epoch": 0.07944055295263719, "grad_norm": 0.10868023335933685, "learning_rate": 0.002, "loss": 2.3804, "step": 20550 }, { "epoch": 0.07947921015602047, "grad_norm": 0.11248388141393661, "learning_rate": 0.002, "loss": 2.3777, "step": 20560 }, { "epoch": 0.07951786735940375, "grad_norm": 0.10098931193351746, "learning_rate": 0.002, "loss": 2.3814, "step": 20570 }, { "epoch": 0.07955652456278703, "grad_norm": 0.1326024979352951, "learning_rate": 0.002, "loss": 2.388, "step": 20580 }, { "epoch": 0.07959518176617031, "grad_norm": 0.1198132261633873, "learning_rate": 0.002, "loss": 2.3761, "step": 20590 }, { "epoch": 0.07963383896955359, "grad_norm": 0.10643836855888367, "learning_rate": 0.002, "loss": 2.3892, "step": 20600 }, { "epoch": 0.07967249617293687, "grad_norm": 0.11332546919584274, "learning_rate": 0.002, "loss": 2.3778, "step": 20610 }, { "epoch": 0.07971115337632015, "grad_norm": 0.11369810998439789, "learning_rate": 0.002, "loss": 2.3858, "step": 20620 }, { "epoch": 0.07974981057970343, "grad_norm": 0.1119384691119194, "learning_rate": 0.002, "loss": 2.3825, "step": 20630 }, { "epoch": 0.0797884677830867, "grad_norm": 0.1512312889099121, "learning_rate": 0.002, "loss": 2.3761, "step": 20640 }, { "epoch": 0.07982712498646997, "grad_norm": 0.10424868762493134, "learning_rate": 0.002, "loss": 2.3695, "step": 20650 }, { "epoch": 0.07986578218985325, "grad_norm": 0.4727657437324524, "learning_rate": 0.002, "loss": 2.3769, "step": 20660 }, { "epoch": 0.07990443939323653, "grad_norm": 0.10331464558839798, "learning_rate": 0.002, "loss": 2.3918, "step": 20670 }, { "epoch": 0.07994309659661981, "grad_norm": 0.10814424604177475, "learning_rate": 0.002, "loss": 2.3818, "step": 20680 }, { "epoch": 0.0799817538000031, "grad_norm": 0.10030341893434525, "learning_rate": 0.002, "loss": 2.3826, "step": 20690 }, { "epoch": 0.08002041100338637, "grad_norm": 0.1187674030661583, "learning_rate": 0.002, "loss": 2.3775, "step": 20700 }, { "epoch": 0.08005906820676965, "grad_norm": 0.12968982756137848, "learning_rate": 0.002, "loss": 2.3889, "step": 20710 }, { "epoch": 0.08009772541015293, "grad_norm": 0.11431638896465302, "learning_rate": 0.002, "loss": 2.3793, "step": 20720 }, { "epoch": 0.0801363826135362, "grad_norm": 0.10570517927408218, "learning_rate": 0.002, "loss": 2.3837, "step": 20730 }, { "epoch": 0.08017503981691948, "grad_norm": 0.10601756721735, "learning_rate": 0.002, "loss": 2.3753, "step": 20740 }, { "epoch": 0.08021369702030276, "grad_norm": 0.12536196410655975, "learning_rate": 0.002, "loss": 2.3733, "step": 20750 }, { "epoch": 0.08025235422368604, "grad_norm": 0.11333110183477402, "learning_rate": 0.002, "loss": 2.376, "step": 20760 }, { "epoch": 0.08029101142706932, "grad_norm": 0.11019251495599747, "learning_rate": 0.002, "loss": 2.3752, "step": 20770 }, { "epoch": 0.0803296686304526, "grad_norm": 0.10505245625972748, "learning_rate": 0.002, "loss": 2.3737, "step": 20780 }, { "epoch": 0.08036832583383588, "grad_norm": 0.12179173529148102, "learning_rate": 0.002, "loss": 2.3687, "step": 20790 }, { "epoch": 0.08040698303721916, "grad_norm": 0.11226214468479156, "learning_rate": 0.002, "loss": 2.3851, "step": 20800 }, { "epoch": 0.08044564024060244, "grad_norm": 0.11181282997131348, "learning_rate": 0.002, "loss": 2.3841, "step": 20810 }, { "epoch": 0.0804842974439857, "grad_norm": 0.11984371393918991, "learning_rate": 0.002, "loss": 2.3769, "step": 20820 }, { "epoch": 0.08052295464736899, "grad_norm": 0.12782707810401917, "learning_rate": 0.002, "loss": 2.3776, "step": 20830 }, { "epoch": 0.08056161185075227, "grad_norm": 0.10775158554315567, "learning_rate": 0.002, "loss": 2.376, "step": 20840 }, { "epoch": 0.08060026905413555, "grad_norm": 0.11608025431632996, "learning_rate": 0.002, "loss": 2.3896, "step": 20850 }, { "epoch": 0.08063892625751883, "grad_norm": 0.12389634549617767, "learning_rate": 0.002, "loss": 2.3697, "step": 20860 }, { "epoch": 0.0806775834609021, "grad_norm": 0.10256687551736832, "learning_rate": 0.002, "loss": 2.3705, "step": 20870 }, { "epoch": 0.08071624066428539, "grad_norm": 0.12604756653308868, "learning_rate": 0.002, "loss": 2.3946, "step": 20880 }, { "epoch": 0.08075489786766867, "grad_norm": 0.12273551523685455, "learning_rate": 0.002, "loss": 2.3809, "step": 20890 }, { "epoch": 0.08079355507105195, "grad_norm": 0.11249291896820068, "learning_rate": 0.002, "loss": 2.3885, "step": 20900 }, { "epoch": 0.08083221227443523, "grad_norm": 0.10646382719278336, "learning_rate": 0.002, "loss": 2.3781, "step": 20910 }, { "epoch": 0.08087086947781849, "grad_norm": 0.11489584296941757, "learning_rate": 0.002, "loss": 2.3702, "step": 20920 }, { "epoch": 0.08090952668120177, "grad_norm": 0.11377881467342377, "learning_rate": 0.002, "loss": 2.3819, "step": 20930 }, { "epoch": 0.08094818388458505, "grad_norm": 0.10648036748170853, "learning_rate": 0.002, "loss": 2.387, "step": 20940 }, { "epoch": 0.08098684108796833, "grad_norm": 0.11882845312356949, "learning_rate": 0.002, "loss": 2.3692, "step": 20950 }, { "epoch": 0.08102549829135161, "grad_norm": 0.1182943731546402, "learning_rate": 0.002, "loss": 2.371, "step": 20960 }, { "epoch": 0.08106415549473489, "grad_norm": 0.10753022134304047, "learning_rate": 0.002, "loss": 2.3777, "step": 20970 }, { "epoch": 0.08110281269811817, "grad_norm": 0.11233070492744446, "learning_rate": 0.002, "loss": 2.3831, "step": 20980 }, { "epoch": 0.08114146990150145, "grad_norm": 0.1118980348110199, "learning_rate": 0.002, "loss": 2.3837, "step": 20990 }, { "epoch": 0.08118012710488473, "grad_norm": 0.11435777693986893, "learning_rate": 0.002, "loss": 2.3701, "step": 21000 }, { "epoch": 0.081218784308268, "grad_norm": 0.11617391556501389, "learning_rate": 0.002, "loss": 2.3786, "step": 21010 }, { "epoch": 0.08125744151165128, "grad_norm": 0.09662947803735733, "learning_rate": 0.002, "loss": 2.3676, "step": 21020 }, { "epoch": 0.08129609871503456, "grad_norm": 0.1029861569404602, "learning_rate": 0.002, "loss": 2.395, "step": 21030 }, { "epoch": 0.08133475591841784, "grad_norm": 0.10826099663972855, "learning_rate": 0.002, "loss": 2.3811, "step": 21040 }, { "epoch": 0.08137341312180112, "grad_norm": 0.14398784935474396, "learning_rate": 0.002, "loss": 2.3809, "step": 21050 }, { "epoch": 0.0814120703251844, "grad_norm": 0.11851483583450317, "learning_rate": 0.002, "loss": 2.3787, "step": 21060 }, { "epoch": 0.08145072752856768, "grad_norm": 0.10036487132310867, "learning_rate": 0.002, "loss": 2.3835, "step": 21070 }, { "epoch": 0.08148938473195096, "grad_norm": 0.11577463150024414, "learning_rate": 0.002, "loss": 2.3743, "step": 21080 }, { "epoch": 0.08152804193533424, "grad_norm": 0.10647506266832352, "learning_rate": 0.002, "loss": 2.3804, "step": 21090 }, { "epoch": 0.0815666991387175, "grad_norm": 0.10589282959699631, "learning_rate": 0.002, "loss": 2.3823, "step": 21100 }, { "epoch": 0.08160535634210078, "grad_norm": 0.12870502471923828, "learning_rate": 0.002, "loss": 2.3835, "step": 21110 }, { "epoch": 0.08164401354548406, "grad_norm": 0.14981471002101898, "learning_rate": 0.002, "loss": 2.3903, "step": 21120 }, { "epoch": 0.08168267074886734, "grad_norm": 0.12884055078029633, "learning_rate": 0.002, "loss": 2.375, "step": 21130 }, { "epoch": 0.08172132795225062, "grad_norm": 0.1172378733754158, "learning_rate": 0.002, "loss": 2.3779, "step": 21140 }, { "epoch": 0.0817599851556339, "grad_norm": 0.11848177760839462, "learning_rate": 0.002, "loss": 2.3984, "step": 21150 }, { "epoch": 0.08179864235901718, "grad_norm": 0.35388875007629395, "learning_rate": 0.002, "loss": 2.3874, "step": 21160 }, { "epoch": 0.08183729956240046, "grad_norm": 0.09712839871644974, "learning_rate": 0.002, "loss": 2.3827, "step": 21170 }, { "epoch": 0.08187595676578374, "grad_norm": 0.0974545031785965, "learning_rate": 0.002, "loss": 2.3741, "step": 21180 }, { "epoch": 0.08191461396916701, "grad_norm": 0.1533958464860916, "learning_rate": 0.002, "loss": 2.3749, "step": 21190 }, { "epoch": 0.08195327117255029, "grad_norm": 0.1025494858622551, "learning_rate": 0.002, "loss": 2.3849, "step": 21200 }, { "epoch": 0.08199192837593357, "grad_norm": 0.10949676483869553, "learning_rate": 0.002, "loss": 2.3588, "step": 21210 }, { "epoch": 0.08203058557931685, "grad_norm": 0.10791637003421783, "learning_rate": 0.002, "loss": 2.3686, "step": 21220 }, { "epoch": 0.08206924278270013, "grad_norm": 0.10763223469257355, "learning_rate": 0.002, "loss": 2.3788, "step": 21230 }, { "epoch": 0.08210789998608341, "grad_norm": 0.12494704872369766, "learning_rate": 0.002, "loss": 2.372, "step": 21240 }, { "epoch": 0.08214655718946669, "grad_norm": 0.11371438950300217, "learning_rate": 0.002, "loss": 2.3762, "step": 21250 }, { "epoch": 0.08218521439284997, "grad_norm": 0.11540467292070389, "learning_rate": 0.002, "loss": 2.383, "step": 21260 }, { "epoch": 0.08222387159623325, "grad_norm": 0.1142238900065422, "learning_rate": 0.002, "loss": 2.378, "step": 21270 }, { "epoch": 0.08226252879961653, "grad_norm": 0.13156285881996155, "learning_rate": 0.002, "loss": 2.3813, "step": 21280 }, { "epoch": 0.0823011860029998, "grad_norm": 0.11045181006193161, "learning_rate": 0.002, "loss": 2.3771, "step": 21290 }, { "epoch": 0.08233984320638307, "grad_norm": 0.10513743758201599, "learning_rate": 0.002, "loss": 2.3875, "step": 21300 }, { "epoch": 0.08237850040976635, "grad_norm": 0.08865063637495041, "learning_rate": 0.002, "loss": 2.3762, "step": 21310 }, { "epoch": 0.08241715761314963, "grad_norm": 0.13477951288223267, "learning_rate": 0.002, "loss": 2.3681, "step": 21320 }, { "epoch": 0.08245581481653291, "grad_norm": 0.1116044670343399, "learning_rate": 0.002, "loss": 2.3654, "step": 21330 }, { "epoch": 0.08249447201991619, "grad_norm": 0.10824161022901535, "learning_rate": 0.002, "loss": 2.3846, "step": 21340 }, { "epoch": 0.08253312922329947, "grad_norm": 0.10181602835655212, "learning_rate": 0.002, "loss": 2.3758, "step": 21350 }, { "epoch": 0.08257178642668275, "grad_norm": 0.11037593334913254, "learning_rate": 0.002, "loss": 2.3822, "step": 21360 }, { "epoch": 0.08261044363006603, "grad_norm": 0.1049826517701149, "learning_rate": 0.002, "loss": 2.3849, "step": 21370 }, { "epoch": 0.0826491008334493, "grad_norm": 0.11907021701335907, "learning_rate": 0.002, "loss": 2.3867, "step": 21380 }, { "epoch": 0.08268775803683258, "grad_norm": 0.10949509590864182, "learning_rate": 0.002, "loss": 2.3784, "step": 21390 }, { "epoch": 0.08272641524021586, "grad_norm": 0.16441144049167633, "learning_rate": 0.002, "loss": 2.3817, "step": 21400 }, { "epoch": 0.08276507244359914, "grad_norm": 0.10310792177915573, "learning_rate": 0.002, "loss": 2.3903, "step": 21410 }, { "epoch": 0.08280372964698242, "grad_norm": 0.11294974386692047, "learning_rate": 0.002, "loss": 2.373, "step": 21420 }, { "epoch": 0.0828423868503657, "grad_norm": 0.09527469426393509, "learning_rate": 0.002, "loss": 2.3745, "step": 21430 }, { "epoch": 0.08288104405374898, "grad_norm": 0.10629330575466156, "learning_rate": 0.002, "loss": 2.3775, "step": 21440 }, { "epoch": 0.08291970125713226, "grad_norm": 0.11222105473279953, "learning_rate": 0.002, "loss": 2.3844, "step": 21450 }, { "epoch": 0.08295835846051554, "grad_norm": 0.11182720214128494, "learning_rate": 0.002, "loss": 2.3659, "step": 21460 }, { "epoch": 0.0829970156638988, "grad_norm": 0.10755111277103424, "learning_rate": 0.002, "loss": 2.3759, "step": 21470 }, { "epoch": 0.08303567286728208, "grad_norm": 0.10462916642427444, "learning_rate": 0.002, "loss": 2.3808, "step": 21480 }, { "epoch": 0.08307433007066536, "grad_norm": 0.10938968509435654, "learning_rate": 0.002, "loss": 2.3701, "step": 21490 }, { "epoch": 0.08311298727404864, "grad_norm": 0.19250869750976562, "learning_rate": 0.002, "loss": 2.39, "step": 21500 }, { "epoch": 0.08315164447743192, "grad_norm": 0.10719188302755356, "learning_rate": 0.002, "loss": 2.3638, "step": 21510 }, { "epoch": 0.0831903016808152, "grad_norm": 0.1125466376543045, "learning_rate": 0.002, "loss": 2.361, "step": 21520 }, { "epoch": 0.08322895888419848, "grad_norm": 0.10615231096744537, "learning_rate": 0.002, "loss": 2.3969, "step": 21530 }, { "epoch": 0.08326761608758176, "grad_norm": 0.1436234563589096, "learning_rate": 0.002, "loss": 2.3849, "step": 21540 }, { "epoch": 0.08330627329096504, "grad_norm": 0.1111617162823677, "learning_rate": 0.002, "loss": 2.3709, "step": 21550 }, { "epoch": 0.08334493049434832, "grad_norm": 0.11619032174348831, "learning_rate": 0.002, "loss": 2.3845, "step": 21560 }, { "epoch": 0.08338358769773159, "grad_norm": 0.15463846921920776, "learning_rate": 0.002, "loss": 2.3772, "step": 21570 }, { "epoch": 0.08342224490111487, "grad_norm": 0.11931682378053665, "learning_rate": 0.002, "loss": 2.3624, "step": 21580 }, { "epoch": 0.08346090210449815, "grad_norm": 0.12608924508094788, "learning_rate": 0.002, "loss": 2.3916, "step": 21590 }, { "epoch": 0.08349955930788143, "grad_norm": 0.11588284373283386, "learning_rate": 0.002, "loss": 2.3905, "step": 21600 }, { "epoch": 0.08353821651126471, "grad_norm": 0.1295454353094101, "learning_rate": 0.002, "loss": 2.3814, "step": 21610 }, { "epoch": 0.08357687371464799, "grad_norm": 0.10524514317512512, "learning_rate": 0.002, "loss": 2.372, "step": 21620 }, { "epoch": 0.08361553091803127, "grad_norm": 0.10964474081993103, "learning_rate": 0.002, "loss": 2.3772, "step": 21630 }, { "epoch": 0.08365418812141455, "grad_norm": 0.11840105056762695, "learning_rate": 0.002, "loss": 2.3657, "step": 21640 }, { "epoch": 0.08369284532479783, "grad_norm": 0.12307754904031754, "learning_rate": 0.002, "loss": 2.3889, "step": 21650 }, { "epoch": 0.0837315025281811, "grad_norm": 0.09594234824180603, "learning_rate": 0.002, "loss": 2.3894, "step": 21660 }, { "epoch": 0.08377015973156438, "grad_norm": 0.1343754529953003, "learning_rate": 0.002, "loss": 2.3675, "step": 21670 }, { "epoch": 0.08380881693494766, "grad_norm": 0.10107145458459854, "learning_rate": 0.002, "loss": 2.3706, "step": 21680 }, { "epoch": 0.08384747413833094, "grad_norm": 0.10919588059186935, "learning_rate": 0.002, "loss": 2.3831, "step": 21690 }, { "epoch": 0.08388613134171422, "grad_norm": 0.10154515504837036, "learning_rate": 0.002, "loss": 2.3782, "step": 21700 }, { "epoch": 0.0839247885450975, "grad_norm": 0.11246238648891449, "learning_rate": 0.002, "loss": 2.3888, "step": 21710 }, { "epoch": 0.08396344574848078, "grad_norm": 0.10734483599662781, "learning_rate": 0.002, "loss": 2.3742, "step": 21720 }, { "epoch": 0.08400210295186405, "grad_norm": 0.11789402365684509, "learning_rate": 0.002, "loss": 2.3939, "step": 21730 }, { "epoch": 0.08404076015524733, "grad_norm": 0.10355214029550552, "learning_rate": 0.002, "loss": 2.3824, "step": 21740 }, { "epoch": 0.0840794173586306, "grad_norm": 0.10136032849550247, "learning_rate": 0.002, "loss": 2.3741, "step": 21750 }, { "epoch": 0.08411807456201388, "grad_norm": 0.1416572630405426, "learning_rate": 0.002, "loss": 2.3941, "step": 21760 }, { "epoch": 0.08415673176539716, "grad_norm": 0.11044862866401672, "learning_rate": 0.002, "loss": 2.3807, "step": 21770 }, { "epoch": 0.08419538896878044, "grad_norm": 0.12113585323095322, "learning_rate": 0.002, "loss": 2.3937, "step": 21780 }, { "epoch": 0.08423404617216372, "grad_norm": 0.09596196562051773, "learning_rate": 0.002, "loss": 2.3673, "step": 21790 }, { "epoch": 0.084272703375547, "grad_norm": 0.11969374120235443, "learning_rate": 0.002, "loss": 2.3802, "step": 21800 }, { "epoch": 0.08431136057893028, "grad_norm": 0.11508044600486755, "learning_rate": 0.002, "loss": 2.3805, "step": 21810 }, { "epoch": 0.08435001778231356, "grad_norm": 0.10925393551588058, "learning_rate": 0.002, "loss": 2.3738, "step": 21820 }, { "epoch": 0.08438867498569684, "grad_norm": 0.12044740468263626, "learning_rate": 0.002, "loss": 2.3882, "step": 21830 }, { "epoch": 0.0844273321890801, "grad_norm": 0.10578937828540802, "learning_rate": 0.002, "loss": 2.3847, "step": 21840 }, { "epoch": 0.08446598939246339, "grad_norm": 0.1018751934170723, "learning_rate": 0.002, "loss": 2.3708, "step": 21850 }, { "epoch": 0.08450464659584667, "grad_norm": 0.11004742980003357, "learning_rate": 0.002, "loss": 2.3717, "step": 21860 }, { "epoch": 0.08454330379922995, "grad_norm": 0.12035630643367767, "learning_rate": 0.002, "loss": 2.3819, "step": 21870 }, { "epoch": 0.08458196100261323, "grad_norm": 0.14227500557899475, "learning_rate": 0.002, "loss": 2.3665, "step": 21880 }, { "epoch": 0.0846206182059965, "grad_norm": 0.1038215234875679, "learning_rate": 0.002, "loss": 2.3802, "step": 21890 }, { "epoch": 0.08465927540937979, "grad_norm": 0.10981255024671555, "learning_rate": 0.002, "loss": 2.3748, "step": 21900 }, { "epoch": 0.08469793261276307, "grad_norm": 0.13036499917507172, "learning_rate": 0.002, "loss": 2.3682, "step": 21910 }, { "epoch": 0.08473658981614635, "grad_norm": 0.10847773402929306, "learning_rate": 0.002, "loss": 2.3762, "step": 21920 }, { "epoch": 0.08477524701952963, "grad_norm": 0.11355654150247574, "learning_rate": 0.002, "loss": 2.4061, "step": 21930 }, { "epoch": 0.08481390422291289, "grad_norm": 0.10847753286361694, "learning_rate": 0.002, "loss": 2.3797, "step": 21940 }, { "epoch": 0.08485256142629617, "grad_norm": 0.1272539645433426, "learning_rate": 0.002, "loss": 2.3882, "step": 21950 }, { "epoch": 0.08489121862967945, "grad_norm": 0.10342312604188919, "learning_rate": 0.002, "loss": 2.3669, "step": 21960 }, { "epoch": 0.08492987583306273, "grad_norm": 0.12353398650884628, "learning_rate": 0.002, "loss": 2.3753, "step": 21970 }, { "epoch": 0.08496853303644601, "grad_norm": 0.1458020955324173, "learning_rate": 0.002, "loss": 2.3686, "step": 21980 }, { "epoch": 0.08500719023982929, "grad_norm": 0.11380862444639206, "learning_rate": 0.002, "loss": 2.3678, "step": 21990 }, { "epoch": 0.08504584744321257, "grad_norm": 0.12858135998249054, "learning_rate": 0.002, "loss": 2.391, "step": 22000 }, { "epoch": 0.08508450464659585, "grad_norm": 0.10241194814443588, "learning_rate": 0.002, "loss": 2.3877, "step": 22010 }, { "epoch": 0.08512316184997913, "grad_norm": 0.10861173272132874, "learning_rate": 0.002, "loss": 2.3798, "step": 22020 }, { "epoch": 0.0851618190533624, "grad_norm": 0.09653986990451813, "learning_rate": 0.002, "loss": 2.3825, "step": 22030 }, { "epoch": 0.08520047625674568, "grad_norm": 0.1258213073015213, "learning_rate": 0.002, "loss": 2.379, "step": 22040 }, { "epoch": 0.08523913346012896, "grad_norm": 0.11620525270700455, "learning_rate": 0.002, "loss": 2.3702, "step": 22050 }, { "epoch": 0.08527779066351224, "grad_norm": 0.10526707023382187, "learning_rate": 0.002, "loss": 2.3773, "step": 22060 }, { "epoch": 0.08531644786689552, "grad_norm": 0.11238943785429001, "learning_rate": 0.002, "loss": 2.3702, "step": 22070 }, { "epoch": 0.0853551050702788, "grad_norm": 0.1280938982963562, "learning_rate": 0.002, "loss": 2.3755, "step": 22080 }, { "epoch": 0.08539376227366208, "grad_norm": 0.09797575324773788, "learning_rate": 0.002, "loss": 2.3991, "step": 22090 }, { "epoch": 0.08543241947704536, "grad_norm": 0.1478355973958969, "learning_rate": 0.002, "loss": 2.3741, "step": 22100 }, { "epoch": 0.08547107668042864, "grad_norm": 0.11716704070568085, "learning_rate": 0.002, "loss": 2.3911, "step": 22110 }, { "epoch": 0.0855097338838119, "grad_norm": 0.1012004092335701, "learning_rate": 0.002, "loss": 2.3681, "step": 22120 }, { "epoch": 0.08554839108719518, "grad_norm": 0.09938769787549973, "learning_rate": 0.002, "loss": 2.3866, "step": 22130 }, { "epoch": 0.08558704829057846, "grad_norm": 0.12744750082492828, "learning_rate": 0.002, "loss": 2.3817, "step": 22140 }, { "epoch": 0.08562570549396174, "grad_norm": 0.14632226526737213, "learning_rate": 0.002, "loss": 2.3653, "step": 22150 }, { "epoch": 0.08566436269734502, "grad_norm": 0.10066097229719162, "learning_rate": 0.002, "loss": 2.3861, "step": 22160 }, { "epoch": 0.0857030199007283, "grad_norm": 0.10367601364850998, "learning_rate": 0.002, "loss": 2.3769, "step": 22170 }, { "epoch": 0.08574167710411158, "grad_norm": 0.11595974117517471, "learning_rate": 0.002, "loss": 2.383, "step": 22180 }, { "epoch": 0.08578033430749486, "grad_norm": 0.13266971707344055, "learning_rate": 0.002, "loss": 2.377, "step": 22190 }, { "epoch": 0.08581899151087814, "grad_norm": 0.10529126226902008, "learning_rate": 0.002, "loss": 2.3815, "step": 22200 }, { "epoch": 0.08585764871426141, "grad_norm": 0.10715759545564651, "learning_rate": 0.002, "loss": 2.3778, "step": 22210 }, { "epoch": 0.08589630591764469, "grad_norm": 0.11852872371673584, "learning_rate": 0.002, "loss": 2.3844, "step": 22220 }, { "epoch": 0.08593496312102797, "grad_norm": 0.11074218899011612, "learning_rate": 0.002, "loss": 2.3853, "step": 22230 }, { "epoch": 0.08597362032441125, "grad_norm": 0.1398768573999405, "learning_rate": 0.002, "loss": 2.3765, "step": 22240 }, { "epoch": 0.08601227752779453, "grad_norm": 0.12655308842658997, "learning_rate": 0.002, "loss": 2.3886, "step": 22250 }, { "epoch": 0.08605093473117781, "grad_norm": 0.13221947848796844, "learning_rate": 0.002, "loss": 2.365, "step": 22260 }, { "epoch": 0.08608959193456109, "grad_norm": 0.12071418017148972, "learning_rate": 0.002, "loss": 2.393, "step": 22270 }, { "epoch": 0.08612824913794437, "grad_norm": 0.09518468379974365, "learning_rate": 0.002, "loss": 2.3818, "step": 22280 }, { "epoch": 0.08616690634132765, "grad_norm": 0.11835268884897232, "learning_rate": 0.002, "loss": 2.3637, "step": 22290 }, { "epoch": 0.08620556354471093, "grad_norm": 0.11249658465385437, "learning_rate": 0.002, "loss": 2.3789, "step": 22300 }, { "epoch": 0.0862442207480942, "grad_norm": 0.12210391461849213, "learning_rate": 0.002, "loss": 2.3722, "step": 22310 }, { "epoch": 0.08628287795147747, "grad_norm": 0.10461321473121643, "learning_rate": 0.002, "loss": 2.3923, "step": 22320 }, { "epoch": 0.08632153515486075, "grad_norm": 0.10224251449108124, "learning_rate": 0.002, "loss": 2.3771, "step": 22330 }, { "epoch": 0.08636019235824403, "grad_norm": 0.12130418419837952, "learning_rate": 0.002, "loss": 2.3654, "step": 22340 }, { "epoch": 0.08639884956162731, "grad_norm": 0.1214376837015152, "learning_rate": 0.002, "loss": 2.3913, "step": 22350 }, { "epoch": 0.0864375067650106, "grad_norm": 0.11018189042806625, "learning_rate": 0.002, "loss": 2.376, "step": 22360 }, { "epoch": 0.08647616396839387, "grad_norm": 0.11936061084270477, "learning_rate": 0.002, "loss": 2.39, "step": 22370 }, { "epoch": 0.08651482117177715, "grad_norm": 0.10982014983892441, "learning_rate": 0.002, "loss": 2.3728, "step": 22380 }, { "epoch": 0.08655347837516043, "grad_norm": 0.11572841554880142, "learning_rate": 0.002, "loss": 2.3712, "step": 22390 }, { "epoch": 0.0865921355785437, "grad_norm": 0.11071977764368057, "learning_rate": 0.002, "loss": 2.3692, "step": 22400 }, { "epoch": 0.08663079278192698, "grad_norm": 0.1090395525097847, "learning_rate": 0.002, "loss": 2.3885, "step": 22410 }, { "epoch": 0.08666944998531026, "grad_norm": 0.11444620043039322, "learning_rate": 0.002, "loss": 2.3809, "step": 22420 }, { "epoch": 0.08670810718869354, "grad_norm": 0.11480669677257538, "learning_rate": 0.002, "loss": 2.3794, "step": 22430 }, { "epoch": 0.08674676439207682, "grad_norm": 0.11158749461174011, "learning_rate": 0.002, "loss": 2.3681, "step": 22440 }, { "epoch": 0.0867854215954601, "grad_norm": 0.10097053647041321, "learning_rate": 0.002, "loss": 2.3953, "step": 22450 }, { "epoch": 0.08682407879884338, "grad_norm": 0.12639379501342773, "learning_rate": 0.002, "loss": 2.3656, "step": 22460 }, { "epoch": 0.08686273600222666, "grad_norm": 0.12102729082107544, "learning_rate": 0.002, "loss": 2.3895, "step": 22470 }, { "epoch": 0.08690139320560994, "grad_norm": 0.1355907917022705, "learning_rate": 0.002, "loss": 2.3871, "step": 22480 }, { "epoch": 0.0869400504089932, "grad_norm": 0.11741629987955093, "learning_rate": 0.002, "loss": 2.3796, "step": 22490 }, { "epoch": 0.08697870761237649, "grad_norm": 0.11757150292396545, "learning_rate": 0.002, "loss": 2.3884, "step": 22500 }, { "epoch": 0.08701736481575977, "grad_norm": 0.12271060794591904, "learning_rate": 0.002, "loss": 2.3721, "step": 22510 }, { "epoch": 0.08705602201914305, "grad_norm": 0.10545120388269424, "learning_rate": 0.002, "loss": 2.3806, "step": 22520 }, { "epoch": 0.08709467922252632, "grad_norm": 0.1205563023686409, "learning_rate": 0.002, "loss": 2.3761, "step": 22530 }, { "epoch": 0.0871333364259096, "grad_norm": 0.10538642108440399, "learning_rate": 0.002, "loss": 2.3709, "step": 22540 }, { "epoch": 0.08717199362929288, "grad_norm": 0.13118794560432434, "learning_rate": 0.002, "loss": 2.3767, "step": 22550 }, { "epoch": 0.08721065083267616, "grad_norm": 0.11891195923089981, "learning_rate": 0.002, "loss": 2.3832, "step": 22560 }, { "epoch": 0.08724930803605944, "grad_norm": 0.12343083322048187, "learning_rate": 0.002, "loss": 2.3725, "step": 22570 }, { "epoch": 0.08728796523944272, "grad_norm": 0.10861959308385849, "learning_rate": 0.002, "loss": 2.3763, "step": 22580 }, { "epoch": 0.08732662244282599, "grad_norm": 0.11839080601930618, "learning_rate": 0.002, "loss": 2.3824, "step": 22590 }, { "epoch": 0.08736527964620927, "grad_norm": 0.1273038387298584, "learning_rate": 0.002, "loss": 2.398, "step": 22600 }, { "epoch": 0.08740393684959255, "grad_norm": 0.1179809421300888, "learning_rate": 0.002, "loss": 2.3778, "step": 22610 }, { "epoch": 0.08744259405297583, "grad_norm": 0.27098774909973145, "learning_rate": 0.002, "loss": 2.3773, "step": 22620 }, { "epoch": 0.08748125125635911, "grad_norm": 0.12713086605072021, "learning_rate": 0.002, "loss": 2.3854, "step": 22630 }, { "epoch": 0.08751990845974239, "grad_norm": 0.11043170839548111, "learning_rate": 0.002, "loss": 2.3726, "step": 22640 }, { "epoch": 0.08755856566312567, "grad_norm": 0.11204589903354645, "learning_rate": 0.002, "loss": 2.3635, "step": 22650 }, { "epoch": 0.08759722286650895, "grad_norm": 0.11137638986110687, "learning_rate": 0.002, "loss": 2.3632, "step": 22660 }, { "epoch": 0.08763588006989223, "grad_norm": 0.12163740396499634, "learning_rate": 0.002, "loss": 2.3752, "step": 22670 }, { "epoch": 0.0876745372732755, "grad_norm": 0.14378522336483002, "learning_rate": 0.002, "loss": 2.3786, "step": 22680 }, { "epoch": 0.08771319447665878, "grad_norm": 0.14936842024326324, "learning_rate": 0.002, "loss": 2.3649, "step": 22690 }, { "epoch": 0.08775185168004206, "grad_norm": 0.1017768532037735, "learning_rate": 0.002, "loss": 2.387, "step": 22700 }, { "epoch": 0.08779050888342534, "grad_norm": 0.12708184123039246, "learning_rate": 0.002, "loss": 2.3728, "step": 22710 }, { "epoch": 0.08782916608680862, "grad_norm": 0.1156398355960846, "learning_rate": 0.002, "loss": 2.3878, "step": 22720 }, { "epoch": 0.0878678232901919, "grad_norm": 0.12609833478927612, "learning_rate": 0.002, "loss": 2.3816, "step": 22730 }, { "epoch": 0.08790648049357518, "grad_norm": 0.09571592509746552, "learning_rate": 0.002, "loss": 2.378, "step": 22740 }, { "epoch": 0.08794513769695846, "grad_norm": 0.12027204781770706, "learning_rate": 0.002, "loss": 2.3738, "step": 22750 }, { "epoch": 0.08798379490034174, "grad_norm": 0.11196243017911911, "learning_rate": 0.002, "loss": 2.3789, "step": 22760 }, { "epoch": 0.088022452103725, "grad_norm": 0.10924676060676575, "learning_rate": 0.002, "loss": 2.3787, "step": 22770 }, { "epoch": 0.08806110930710828, "grad_norm": 0.11749155819416046, "learning_rate": 0.002, "loss": 2.4005, "step": 22780 }, { "epoch": 0.08809976651049156, "grad_norm": 0.11545772105455399, "learning_rate": 0.002, "loss": 2.3803, "step": 22790 }, { "epoch": 0.08813842371387484, "grad_norm": 0.11182691156864166, "learning_rate": 0.002, "loss": 2.3754, "step": 22800 }, { "epoch": 0.08817708091725812, "grad_norm": 0.13504658639431, "learning_rate": 0.002, "loss": 2.3736, "step": 22810 }, { "epoch": 0.0882157381206414, "grad_norm": 0.11489430069923401, "learning_rate": 0.002, "loss": 2.3796, "step": 22820 }, { "epoch": 0.08825439532402468, "grad_norm": 0.11193528026342392, "learning_rate": 0.002, "loss": 2.3746, "step": 22830 }, { "epoch": 0.08829305252740796, "grad_norm": 0.13250324130058289, "learning_rate": 0.002, "loss": 2.3806, "step": 22840 }, { "epoch": 0.08833170973079124, "grad_norm": 0.11712247878313065, "learning_rate": 0.002, "loss": 2.3782, "step": 22850 }, { "epoch": 0.08837036693417451, "grad_norm": 0.11358913779258728, "learning_rate": 0.002, "loss": 2.3732, "step": 22860 }, { "epoch": 0.08840902413755779, "grad_norm": 0.10116658359766006, "learning_rate": 0.002, "loss": 2.3685, "step": 22870 }, { "epoch": 0.08844768134094107, "grad_norm": 0.12980881333351135, "learning_rate": 0.002, "loss": 2.3837, "step": 22880 }, { "epoch": 0.08848633854432435, "grad_norm": 0.1002582311630249, "learning_rate": 0.002, "loss": 2.3785, "step": 22890 }, { "epoch": 0.08852499574770763, "grad_norm": 0.09856297075748444, "learning_rate": 0.002, "loss": 2.3962, "step": 22900 }, { "epoch": 0.08856365295109091, "grad_norm": 0.10269007831811905, "learning_rate": 0.002, "loss": 2.3878, "step": 22910 }, { "epoch": 0.08860231015447419, "grad_norm": 0.11187140643596649, "learning_rate": 0.002, "loss": 2.3951, "step": 22920 }, { "epoch": 0.08864096735785747, "grad_norm": 0.11259777843952179, "learning_rate": 0.002, "loss": 2.3704, "step": 22930 }, { "epoch": 0.08867962456124075, "grad_norm": 0.09621060639619827, "learning_rate": 0.002, "loss": 2.3723, "step": 22940 }, { "epoch": 0.08871828176462403, "grad_norm": 0.11501479893922806, "learning_rate": 0.002, "loss": 2.3937, "step": 22950 }, { "epoch": 0.08875693896800729, "grad_norm": 0.17122884094715118, "learning_rate": 0.002, "loss": 2.367, "step": 22960 }, { "epoch": 0.08879559617139057, "grad_norm": 0.10345969349145889, "learning_rate": 0.002, "loss": 2.3783, "step": 22970 }, { "epoch": 0.08883425337477385, "grad_norm": 0.11186736822128296, "learning_rate": 0.002, "loss": 2.3751, "step": 22980 }, { "epoch": 0.08887291057815713, "grad_norm": 0.11556321382522583, "learning_rate": 0.002, "loss": 2.3673, "step": 22990 }, { "epoch": 0.08891156778154041, "grad_norm": 0.10784627497196198, "learning_rate": 0.002, "loss": 2.3777, "step": 23000 }, { "epoch": 0.08895022498492369, "grad_norm": 0.12641456723213196, "learning_rate": 0.002, "loss": 2.3868, "step": 23010 }, { "epoch": 0.08898888218830697, "grad_norm": 0.14297804236412048, "learning_rate": 0.002, "loss": 2.3693, "step": 23020 }, { "epoch": 0.08902753939169025, "grad_norm": 0.12131819874048233, "learning_rate": 0.002, "loss": 2.3849, "step": 23030 }, { "epoch": 0.08906619659507353, "grad_norm": 0.11721863597631454, "learning_rate": 0.002, "loss": 2.3866, "step": 23040 }, { "epoch": 0.0891048537984568, "grad_norm": 0.13657094538211823, "learning_rate": 0.002, "loss": 2.3778, "step": 23050 }, { "epoch": 0.08914351100184008, "grad_norm": 0.15033309161663055, "learning_rate": 0.002, "loss": 2.3971, "step": 23060 }, { "epoch": 0.08918216820522336, "grad_norm": 0.10266692191362381, "learning_rate": 0.002, "loss": 2.3666, "step": 23070 }, { "epoch": 0.08922082540860664, "grad_norm": 0.1215285062789917, "learning_rate": 0.002, "loss": 2.3915, "step": 23080 }, { "epoch": 0.08925948261198992, "grad_norm": 0.12149213999509811, "learning_rate": 0.002, "loss": 2.3718, "step": 23090 }, { "epoch": 0.0892981398153732, "grad_norm": 0.2718287408351898, "learning_rate": 0.002, "loss": 2.3813, "step": 23100 }, { "epoch": 0.08933679701875648, "grad_norm": 0.12585672736167908, "learning_rate": 0.002, "loss": 2.3731, "step": 23110 }, { "epoch": 0.08937545422213976, "grad_norm": 0.1085464283823967, "learning_rate": 0.002, "loss": 2.357, "step": 23120 }, { "epoch": 0.08941411142552304, "grad_norm": 0.12199816852807999, "learning_rate": 0.002, "loss": 2.3805, "step": 23130 }, { "epoch": 0.0894527686289063, "grad_norm": 0.12557195127010345, "learning_rate": 0.002, "loss": 2.3757, "step": 23140 }, { "epoch": 0.08949142583228958, "grad_norm": 0.11516721546649933, "learning_rate": 0.002, "loss": 2.3791, "step": 23150 }, { "epoch": 0.08953008303567286, "grad_norm": 0.11621605604887009, "learning_rate": 0.002, "loss": 2.3718, "step": 23160 }, { "epoch": 0.08956874023905614, "grad_norm": 0.13228261470794678, "learning_rate": 0.002, "loss": 2.3782, "step": 23170 }, { "epoch": 0.08960739744243942, "grad_norm": 0.111338771879673, "learning_rate": 0.002, "loss": 2.3603, "step": 23180 }, { "epoch": 0.0896460546458227, "grad_norm": 0.12004300951957703, "learning_rate": 0.002, "loss": 2.3668, "step": 23190 }, { "epoch": 0.08968471184920598, "grad_norm": 0.1402856856584549, "learning_rate": 0.002, "loss": 2.3698, "step": 23200 }, { "epoch": 0.08972336905258926, "grad_norm": 0.09876136481761932, "learning_rate": 0.002, "loss": 2.3736, "step": 23210 }, { "epoch": 0.08976202625597254, "grad_norm": 0.10694080591201782, "learning_rate": 0.002, "loss": 2.3848, "step": 23220 }, { "epoch": 0.08980068345935581, "grad_norm": 0.1302351951599121, "learning_rate": 0.002, "loss": 2.3882, "step": 23230 }, { "epoch": 0.08983934066273909, "grad_norm": 0.10748574882745743, "learning_rate": 0.002, "loss": 2.3709, "step": 23240 }, { "epoch": 0.08987799786612237, "grad_norm": 0.1744678020477295, "learning_rate": 0.002, "loss": 2.3832, "step": 23250 }, { "epoch": 0.08991665506950565, "grad_norm": 0.09777440130710602, "learning_rate": 0.002, "loss": 2.3729, "step": 23260 }, { "epoch": 0.08995531227288893, "grad_norm": 0.11797336488962173, "learning_rate": 0.002, "loss": 2.3902, "step": 23270 }, { "epoch": 0.08999396947627221, "grad_norm": 0.10277258604764938, "learning_rate": 0.002, "loss": 2.3748, "step": 23280 }, { "epoch": 0.09003262667965549, "grad_norm": 0.12030167877674103, "learning_rate": 0.002, "loss": 2.3812, "step": 23290 }, { "epoch": 0.09007128388303877, "grad_norm": 0.10009963065385818, "learning_rate": 0.002, "loss": 2.3792, "step": 23300 }, { "epoch": 0.09010994108642205, "grad_norm": 0.13436256349086761, "learning_rate": 0.002, "loss": 2.3717, "step": 23310 }, { "epoch": 0.09014859828980533, "grad_norm": 0.11342042684555054, "learning_rate": 0.002, "loss": 2.3698, "step": 23320 }, { "epoch": 0.0901872554931886, "grad_norm": 0.14033156633377075, "learning_rate": 0.002, "loss": 2.3736, "step": 23330 }, { "epoch": 0.09022591269657187, "grad_norm": 0.11601479351520538, "learning_rate": 0.002, "loss": 2.3727, "step": 23340 }, { "epoch": 0.09026456989995515, "grad_norm": 0.1295476257801056, "learning_rate": 0.002, "loss": 2.3816, "step": 23350 }, { "epoch": 0.09030322710333843, "grad_norm": 0.1155814528465271, "learning_rate": 0.002, "loss": 2.3875, "step": 23360 }, { "epoch": 0.09034188430672171, "grad_norm": 0.11913534253835678, "learning_rate": 0.002, "loss": 2.3739, "step": 23370 }, { "epoch": 0.090380541510105, "grad_norm": 0.13233350217342377, "learning_rate": 0.002, "loss": 2.3773, "step": 23380 }, { "epoch": 0.09041919871348827, "grad_norm": 0.1004096195101738, "learning_rate": 0.002, "loss": 2.3746, "step": 23390 }, { "epoch": 0.09045785591687155, "grad_norm": 0.12615826725959778, "learning_rate": 0.002, "loss": 2.3881, "step": 23400 }, { "epoch": 0.09049651312025483, "grad_norm": 0.11457603424787521, "learning_rate": 0.002, "loss": 2.3899, "step": 23410 }, { "epoch": 0.0905351703236381, "grad_norm": 0.11348090320825577, "learning_rate": 0.002, "loss": 2.3757, "step": 23420 }, { "epoch": 0.09057382752702138, "grad_norm": 0.10704642534255981, "learning_rate": 0.002, "loss": 2.3771, "step": 23430 }, { "epoch": 0.09061248473040466, "grad_norm": 0.11089534312486649, "learning_rate": 0.002, "loss": 2.3991, "step": 23440 }, { "epoch": 0.09065114193378794, "grad_norm": 0.1276397705078125, "learning_rate": 0.002, "loss": 2.3582, "step": 23450 }, { "epoch": 0.09068979913717122, "grad_norm": 0.11047758162021637, "learning_rate": 0.002, "loss": 2.3748, "step": 23460 }, { "epoch": 0.0907284563405545, "grad_norm": 0.1099272072315216, "learning_rate": 0.002, "loss": 2.379, "step": 23470 }, { "epoch": 0.09076711354393778, "grad_norm": 0.13830797374248505, "learning_rate": 0.002, "loss": 2.3711, "step": 23480 }, { "epoch": 0.09080577074732106, "grad_norm": 0.1121407300233841, "learning_rate": 0.002, "loss": 2.3686, "step": 23490 }, { "epoch": 0.09084442795070434, "grad_norm": 0.12352314591407776, "learning_rate": 0.002, "loss": 2.3745, "step": 23500 }, { "epoch": 0.0908830851540876, "grad_norm": 0.11088527739048004, "learning_rate": 0.002, "loss": 2.3647, "step": 23510 }, { "epoch": 0.09092174235747089, "grad_norm": 0.11665134131908417, "learning_rate": 0.002, "loss": 2.3767, "step": 23520 }, { "epoch": 0.09096039956085417, "grad_norm": 0.1182350143790245, "learning_rate": 0.002, "loss": 2.3898, "step": 23530 }, { "epoch": 0.09099905676423745, "grad_norm": 0.1312289983034134, "learning_rate": 0.002, "loss": 2.3797, "step": 23540 }, { "epoch": 0.09103771396762073, "grad_norm": 0.0952557697892189, "learning_rate": 0.002, "loss": 2.3767, "step": 23550 }, { "epoch": 0.091076371171004, "grad_norm": 0.11382191628217697, "learning_rate": 0.002, "loss": 2.3732, "step": 23560 }, { "epoch": 0.09111502837438729, "grad_norm": 0.11281711608171463, "learning_rate": 0.002, "loss": 2.3659, "step": 23570 }, { "epoch": 0.09115368557777057, "grad_norm": 0.13775767385959625, "learning_rate": 0.002, "loss": 2.3964, "step": 23580 }, { "epoch": 0.09119234278115385, "grad_norm": 0.11854385584592819, "learning_rate": 0.002, "loss": 2.3732, "step": 23590 }, { "epoch": 0.09123099998453713, "grad_norm": 0.11661964654922485, "learning_rate": 0.002, "loss": 2.3875, "step": 23600 }, { "epoch": 0.09126965718792039, "grad_norm": 0.11538533866405487, "learning_rate": 0.002, "loss": 2.3616, "step": 23610 }, { "epoch": 0.09130831439130367, "grad_norm": 0.12258029729127884, "learning_rate": 0.002, "loss": 2.37, "step": 23620 }, { "epoch": 0.09134697159468695, "grad_norm": 0.10069010406732559, "learning_rate": 0.002, "loss": 2.3553, "step": 23630 }, { "epoch": 0.09138562879807023, "grad_norm": 0.0946861207485199, "learning_rate": 0.002, "loss": 2.383, "step": 23640 }, { "epoch": 0.09142428600145351, "grad_norm": 0.11013893038034439, "learning_rate": 0.002, "loss": 2.3659, "step": 23650 }, { "epoch": 0.09146294320483679, "grad_norm": 0.13029184937477112, "learning_rate": 0.002, "loss": 2.3733, "step": 23660 }, { "epoch": 0.09150160040822007, "grad_norm": 0.11333515495061874, "learning_rate": 0.002, "loss": 2.3695, "step": 23670 }, { "epoch": 0.09154025761160335, "grad_norm": 0.09361569583415985, "learning_rate": 0.002, "loss": 2.3795, "step": 23680 }, { "epoch": 0.09157891481498663, "grad_norm": 0.14618952572345734, "learning_rate": 0.002, "loss": 2.389, "step": 23690 }, { "epoch": 0.0916175720183699, "grad_norm": 0.12504111230373383, "learning_rate": 0.002, "loss": 2.3679, "step": 23700 }, { "epoch": 0.09165622922175318, "grad_norm": 0.11918000131845474, "learning_rate": 0.002, "loss": 2.3891, "step": 23710 }, { "epoch": 0.09169488642513646, "grad_norm": 0.21253357827663422, "learning_rate": 0.002, "loss": 2.379, "step": 23720 }, { "epoch": 0.09173354362851974, "grad_norm": 0.1254817694425583, "learning_rate": 0.002, "loss": 2.3724, "step": 23730 }, { "epoch": 0.09177220083190302, "grad_norm": 0.1153513491153717, "learning_rate": 0.002, "loss": 2.3723, "step": 23740 }, { "epoch": 0.0918108580352863, "grad_norm": 0.11657676100730896, "learning_rate": 0.002, "loss": 2.3675, "step": 23750 }, { "epoch": 0.09184951523866958, "grad_norm": 0.13089793920516968, "learning_rate": 0.002, "loss": 2.3732, "step": 23760 }, { "epoch": 0.09188817244205286, "grad_norm": 0.10609673708677292, "learning_rate": 0.002, "loss": 2.376, "step": 23770 }, { "epoch": 0.09192682964543614, "grad_norm": 0.13091377913951874, "learning_rate": 0.002, "loss": 2.3733, "step": 23780 }, { "epoch": 0.0919654868488194, "grad_norm": 0.10436306893825531, "learning_rate": 0.002, "loss": 2.3703, "step": 23790 }, { "epoch": 0.09200414405220268, "grad_norm": 0.10807617008686066, "learning_rate": 0.002, "loss": 2.3861, "step": 23800 }, { "epoch": 0.09204280125558596, "grad_norm": 0.10745303332805634, "learning_rate": 0.002, "loss": 2.3716, "step": 23810 }, { "epoch": 0.09208145845896924, "grad_norm": 0.11694838106632233, "learning_rate": 0.002, "loss": 2.3826, "step": 23820 }, { "epoch": 0.09212011566235252, "grad_norm": 0.1124061644077301, "learning_rate": 0.002, "loss": 2.3869, "step": 23830 }, { "epoch": 0.0921587728657358, "grad_norm": 0.1196490079164505, "learning_rate": 0.002, "loss": 2.3746, "step": 23840 }, { "epoch": 0.09219743006911908, "grad_norm": 0.10835790634155273, "learning_rate": 0.002, "loss": 2.3577, "step": 23850 }, { "epoch": 0.09223608727250236, "grad_norm": 0.13237883150577545, "learning_rate": 0.002, "loss": 2.3794, "step": 23860 }, { "epoch": 0.09227474447588564, "grad_norm": 0.1156863123178482, "learning_rate": 0.002, "loss": 2.3916, "step": 23870 }, { "epoch": 0.09231340167926891, "grad_norm": 0.10456960648298264, "learning_rate": 0.002, "loss": 2.3813, "step": 23880 }, { "epoch": 0.09235205888265219, "grad_norm": 0.09652217477560043, "learning_rate": 0.002, "loss": 2.3786, "step": 23890 }, { "epoch": 0.09239071608603547, "grad_norm": 0.12134707719087601, "learning_rate": 0.002, "loss": 2.3895, "step": 23900 }, { "epoch": 0.09242937328941875, "grad_norm": 0.10908094793558121, "learning_rate": 0.002, "loss": 2.3767, "step": 23910 }, { "epoch": 0.09246803049280203, "grad_norm": 0.13318543136119843, "learning_rate": 0.002, "loss": 2.3643, "step": 23920 }, { "epoch": 0.09250668769618531, "grad_norm": 0.10980760306119919, "learning_rate": 0.002, "loss": 2.3926, "step": 23930 }, { "epoch": 0.09254534489956859, "grad_norm": 0.10727297514677048, "learning_rate": 0.002, "loss": 2.3828, "step": 23940 }, { "epoch": 0.09258400210295187, "grad_norm": 0.11008038371801376, "learning_rate": 0.002, "loss": 2.3749, "step": 23950 }, { "epoch": 0.09262265930633515, "grad_norm": 0.1304430365562439, "learning_rate": 0.002, "loss": 2.3669, "step": 23960 }, { "epoch": 0.09266131650971843, "grad_norm": 0.10171637684106827, "learning_rate": 0.002, "loss": 2.3614, "step": 23970 }, { "epoch": 0.0926999737131017, "grad_norm": 0.11464967578649521, "learning_rate": 0.002, "loss": 2.3498, "step": 23980 }, { "epoch": 0.09273863091648497, "grad_norm": 0.10553108900785446, "learning_rate": 0.002, "loss": 2.3669, "step": 23990 }, { "epoch": 0.09277728811986825, "grad_norm": 0.12580671906471252, "learning_rate": 0.002, "loss": 2.3844, "step": 24000 }, { "epoch": 0.09281594532325153, "grad_norm": 0.11327177286148071, "learning_rate": 0.002, "loss": 2.38, "step": 24010 }, { "epoch": 0.09285460252663481, "grad_norm": 0.10099143534898758, "learning_rate": 0.002, "loss": 2.3553, "step": 24020 }, { "epoch": 0.0928932597300181, "grad_norm": 0.13738584518432617, "learning_rate": 0.002, "loss": 2.3771, "step": 24030 }, { "epoch": 0.09293191693340137, "grad_norm": 0.11237277090549469, "learning_rate": 0.002, "loss": 2.37, "step": 24040 }, { "epoch": 0.09297057413678465, "grad_norm": 0.11633989959955215, "learning_rate": 0.002, "loss": 2.3777, "step": 24050 }, { "epoch": 0.09300923134016793, "grad_norm": 0.13667155802249908, "learning_rate": 0.002, "loss": 2.3759, "step": 24060 }, { "epoch": 0.0930478885435512, "grad_norm": 0.10400626808404922, "learning_rate": 0.002, "loss": 2.3628, "step": 24070 }, { "epoch": 0.09308654574693448, "grad_norm": 0.12100454419851303, "learning_rate": 0.002, "loss": 2.3741, "step": 24080 }, { "epoch": 0.09312520295031776, "grad_norm": 0.1407627910375595, "learning_rate": 0.002, "loss": 2.3816, "step": 24090 }, { "epoch": 0.09316386015370104, "grad_norm": 0.116483174264431, "learning_rate": 0.002, "loss": 2.3742, "step": 24100 }, { "epoch": 0.09320251735708432, "grad_norm": 0.1100977286696434, "learning_rate": 0.002, "loss": 2.3679, "step": 24110 }, { "epoch": 0.0932411745604676, "grad_norm": 0.10602124035358429, "learning_rate": 0.002, "loss": 2.3631, "step": 24120 }, { "epoch": 0.09327983176385088, "grad_norm": 0.09690314531326294, "learning_rate": 0.002, "loss": 2.3803, "step": 24130 }, { "epoch": 0.09331848896723416, "grad_norm": 0.10744574666023254, "learning_rate": 0.002, "loss": 2.374, "step": 24140 }, { "epoch": 0.09335714617061744, "grad_norm": 0.10048481076955795, "learning_rate": 0.002, "loss": 2.3846, "step": 24150 }, { "epoch": 0.0933958033740007, "grad_norm": 0.13025830686092377, "learning_rate": 0.002, "loss": 2.3869, "step": 24160 }, { "epoch": 0.09343446057738398, "grad_norm": 0.09571056067943573, "learning_rate": 0.002, "loss": 2.3916, "step": 24170 }, { "epoch": 0.09347311778076726, "grad_norm": 0.13225597143173218, "learning_rate": 0.002, "loss": 2.3756, "step": 24180 }, { "epoch": 0.09351177498415054, "grad_norm": 0.11436488479375839, "learning_rate": 0.002, "loss": 2.3829, "step": 24190 }, { "epoch": 0.09355043218753382, "grad_norm": 0.13618864119052887, "learning_rate": 0.002, "loss": 2.3623, "step": 24200 }, { "epoch": 0.0935890893909171, "grad_norm": 0.1187516301870346, "learning_rate": 0.002, "loss": 2.3654, "step": 24210 }, { "epoch": 0.09362774659430038, "grad_norm": 0.09682077914476395, "learning_rate": 0.002, "loss": 2.353, "step": 24220 }, { "epoch": 0.09366640379768366, "grad_norm": 0.10571873933076859, "learning_rate": 0.002, "loss": 2.3683, "step": 24230 }, { "epoch": 0.09370506100106694, "grad_norm": 0.10611064732074738, "learning_rate": 0.002, "loss": 2.3624, "step": 24240 }, { "epoch": 0.09374371820445021, "grad_norm": 0.12139905989170074, "learning_rate": 0.002, "loss": 2.3842, "step": 24250 }, { "epoch": 0.09378237540783349, "grad_norm": 0.1359192132949829, "learning_rate": 0.002, "loss": 2.3828, "step": 24260 }, { "epoch": 0.09382103261121677, "grad_norm": 0.11392809450626373, "learning_rate": 0.002, "loss": 2.383, "step": 24270 }, { "epoch": 0.09385968981460005, "grad_norm": 0.12583954632282257, "learning_rate": 0.002, "loss": 2.3804, "step": 24280 }, { "epoch": 0.09389834701798333, "grad_norm": 0.10504510998725891, "learning_rate": 0.002, "loss": 2.3955, "step": 24290 }, { "epoch": 0.09393700422136661, "grad_norm": 0.12139500677585602, "learning_rate": 0.002, "loss": 2.3828, "step": 24300 }, { "epoch": 0.09397566142474989, "grad_norm": 0.12326785922050476, "learning_rate": 0.002, "loss": 2.3815, "step": 24310 }, { "epoch": 0.09401431862813317, "grad_norm": 0.10977497696876526, "learning_rate": 0.002, "loss": 2.3527, "step": 24320 }, { "epoch": 0.09405297583151645, "grad_norm": 0.10636772215366364, "learning_rate": 0.002, "loss": 2.3661, "step": 24330 }, { "epoch": 0.09409163303489973, "grad_norm": 0.10684899240732193, "learning_rate": 0.002, "loss": 2.3728, "step": 24340 }, { "epoch": 0.094130290238283, "grad_norm": 0.09462485462427139, "learning_rate": 0.002, "loss": 2.3854, "step": 24350 }, { "epoch": 0.09416894744166628, "grad_norm": 0.1221616268157959, "learning_rate": 0.002, "loss": 2.3796, "step": 24360 }, { "epoch": 0.09420760464504956, "grad_norm": 0.10672114044427872, "learning_rate": 0.002, "loss": 2.3693, "step": 24370 }, { "epoch": 0.09424626184843284, "grad_norm": 0.11299077421426773, "learning_rate": 0.002, "loss": 2.3716, "step": 24380 }, { "epoch": 0.09428491905181612, "grad_norm": 0.1013621836900711, "learning_rate": 0.002, "loss": 2.3771, "step": 24390 }, { "epoch": 0.0943235762551994, "grad_norm": 0.10756544023752213, "learning_rate": 0.002, "loss": 2.3845, "step": 24400 }, { "epoch": 0.09436223345858268, "grad_norm": 0.12545613944530487, "learning_rate": 0.002, "loss": 2.3611, "step": 24410 }, { "epoch": 0.09440089066196596, "grad_norm": 0.11735380440950394, "learning_rate": 0.002, "loss": 2.3832, "step": 24420 }, { "epoch": 0.09443954786534924, "grad_norm": 0.11142757534980774, "learning_rate": 0.002, "loss": 2.3746, "step": 24430 }, { "epoch": 0.0944782050687325, "grad_norm": 0.11514252424240112, "learning_rate": 0.002, "loss": 2.3782, "step": 24440 }, { "epoch": 0.09451686227211578, "grad_norm": 0.13839270174503326, "learning_rate": 0.002, "loss": 2.3759, "step": 24450 }, { "epoch": 0.09455551947549906, "grad_norm": 0.12157618999481201, "learning_rate": 0.002, "loss": 2.3797, "step": 24460 }, { "epoch": 0.09459417667888234, "grad_norm": 0.10932943224906921, "learning_rate": 0.002, "loss": 2.3786, "step": 24470 }, { "epoch": 0.09463283388226562, "grad_norm": 0.08984825760126114, "learning_rate": 0.002, "loss": 2.3639, "step": 24480 }, { "epoch": 0.0946714910856489, "grad_norm": 0.11526817828416824, "learning_rate": 0.002, "loss": 2.3766, "step": 24490 }, { "epoch": 0.09471014828903218, "grad_norm": 0.10593192279338837, "learning_rate": 0.002, "loss": 2.3792, "step": 24500 }, { "epoch": 0.09474880549241546, "grad_norm": 0.11164966970682144, "learning_rate": 0.002, "loss": 2.3658, "step": 24510 }, { "epoch": 0.09478746269579874, "grad_norm": 0.17076915502548218, "learning_rate": 0.002, "loss": 2.3875, "step": 24520 }, { "epoch": 0.09482611989918201, "grad_norm": 0.11258591711521149, "learning_rate": 0.002, "loss": 2.3771, "step": 24530 }, { "epoch": 0.09486477710256529, "grad_norm": 0.1002669557929039, "learning_rate": 0.002, "loss": 2.3865, "step": 24540 }, { "epoch": 0.09490343430594857, "grad_norm": 0.11262197047472, "learning_rate": 0.002, "loss": 2.3822, "step": 24550 }, { "epoch": 0.09494209150933185, "grad_norm": 0.10771956294775009, "learning_rate": 0.002, "loss": 2.3797, "step": 24560 }, { "epoch": 0.09498074871271513, "grad_norm": 0.1211184710264206, "learning_rate": 0.002, "loss": 2.3775, "step": 24570 }, { "epoch": 0.0950194059160984, "grad_norm": 0.11493786424398422, "learning_rate": 0.002, "loss": 2.3738, "step": 24580 }, { "epoch": 0.09505806311948169, "grad_norm": 0.11493559926748276, "learning_rate": 0.002, "loss": 2.3772, "step": 24590 }, { "epoch": 0.09509672032286497, "grad_norm": 0.11849837005138397, "learning_rate": 0.002, "loss": 2.3848, "step": 24600 }, { "epoch": 0.09513537752624825, "grad_norm": 0.10788939148187637, "learning_rate": 0.002, "loss": 2.3771, "step": 24610 }, { "epoch": 0.09517403472963153, "grad_norm": 0.11454930901527405, "learning_rate": 0.002, "loss": 2.3659, "step": 24620 }, { "epoch": 0.09521269193301479, "grad_norm": 0.1146981343626976, "learning_rate": 0.002, "loss": 2.3793, "step": 24630 }, { "epoch": 0.09525134913639807, "grad_norm": 0.12724652886390686, "learning_rate": 0.002, "loss": 2.3765, "step": 24640 }, { "epoch": 0.09529000633978135, "grad_norm": 0.10866862535476685, "learning_rate": 0.002, "loss": 2.3645, "step": 24650 }, { "epoch": 0.09532866354316463, "grad_norm": 0.14221100509166718, "learning_rate": 0.002, "loss": 2.3588, "step": 24660 }, { "epoch": 0.09536732074654791, "grad_norm": 0.1002168357372284, "learning_rate": 0.002, "loss": 2.3824, "step": 24670 }, { "epoch": 0.09540597794993119, "grad_norm": 0.1249079555273056, "learning_rate": 0.002, "loss": 2.3883, "step": 24680 }, { "epoch": 0.09544463515331447, "grad_norm": 0.10911814868450165, "learning_rate": 0.002, "loss": 2.3669, "step": 24690 }, { "epoch": 0.09548329235669775, "grad_norm": 0.11465930193662643, "learning_rate": 0.002, "loss": 2.3688, "step": 24700 }, { "epoch": 0.09552194956008103, "grad_norm": 0.09612670540809631, "learning_rate": 0.002, "loss": 2.3654, "step": 24710 }, { "epoch": 0.0955606067634643, "grad_norm": 0.11884750425815582, "learning_rate": 0.002, "loss": 2.3605, "step": 24720 }, { "epoch": 0.09559926396684758, "grad_norm": 0.10053402930498123, "learning_rate": 0.002, "loss": 2.3781, "step": 24730 }, { "epoch": 0.09563792117023086, "grad_norm": 0.11036361008882523, "learning_rate": 0.002, "loss": 2.3457, "step": 24740 }, { "epoch": 0.09567657837361414, "grad_norm": 0.09313686192035675, "learning_rate": 0.002, "loss": 2.3787, "step": 24750 }, { "epoch": 0.09571523557699742, "grad_norm": 0.12321452051401138, "learning_rate": 0.002, "loss": 2.374, "step": 24760 }, { "epoch": 0.0957538927803807, "grad_norm": 0.10040561854839325, "learning_rate": 0.002, "loss": 2.3822, "step": 24770 }, { "epoch": 0.09579254998376398, "grad_norm": 0.1582271158695221, "learning_rate": 0.002, "loss": 2.372, "step": 24780 }, { "epoch": 0.09583120718714726, "grad_norm": 0.09811810404062271, "learning_rate": 0.002, "loss": 2.3874, "step": 24790 }, { "epoch": 0.09586986439053054, "grad_norm": 0.12261014431715012, "learning_rate": 0.002, "loss": 2.3522, "step": 24800 }, { "epoch": 0.0959085215939138, "grad_norm": 0.09658876806497574, "learning_rate": 0.002, "loss": 2.3716, "step": 24810 }, { "epoch": 0.09594717879729708, "grad_norm": 0.13847355544567108, "learning_rate": 0.002, "loss": 2.3866, "step": 24820 }, { "epoch": 0.09598583600068036, "grad_norm": 0.09949313849210739, "learning_rate": 0.002, "loss": 2.3887, "step": 24830 }, { "epoch": 0.09602449320406364, "grad_norm": 0.11247383058071136, "learning_rate": 0.002, "loss": 2.3712, "step": 24840 }, { "epoch": 0.09606315040744692, "grad_norm": 0.11727588623762131, "learning_rate": 0.002, "loss": 2.3899, "step": 24850 }, { "epoch": 0.0961018076108302, "grad_norm": 0.12999024987220764, "learning_rate": 0.002, "loss": 2.3846, "step": 24860 }, { "epoch": 0.09614046481421348, "grad_norm": 0.10106495022773743, "learning_rate": 0.002, "loss": 2.3715, "step": 24870 }, { "epoch": 0.09617912201759676, "grad_norm": 0.1225874051451683, "learning_rate": 0.002, "loss": 2.3991, "step": 24880 }, { "epoch": 0.09621777922098004, "grad_norm": 0.11529222130775452, "learning_rate": 0.002, "loss": 2.3767, "step": 24890 }, { "epoch": 0.09625643642436331, "grad_norm": 0.10541075468063354, "learning_rate": 0.002, "loss": 2.3649, "step": 24900 }, { "epoch": 0.09629509362774659, "grad_norm": 0.11571445316076279, "learning_rate": 0.002, "loss": 2.3645, "step": 24910 }, { "epoch": 0.09633375083112987, "grad_norm": 0.10362482815980911, "learning_rate": 0.002, "loss": 2.3687, "step": 24920 }, { "epoch": 0.09637240803451315, "grad_norm": 0.12240533530712128, "learning_rate": 0.002, "loss": 2.3617, "step": 24930 }, { "epoch": 0.09641106523789643, "grad_norm": 0.09573902934789658, "learning_rate": 0.002, "loss": 2.3767, "step": 24940 }, { "epoch": 0.09644972244127971, "grad_norm": 0.098006471991539, "learning_rate": 0.002, "loss": 2.379, "step": 24950 }, { "epoch": 0.09648837964466299, "grad_norm": 0.10664179921150208, "learning_rate": 0.002, "loss": 2.3766, "step": 24960 }, { "epoch": 0.09652703684804627, "grad_norm": 0.12383517622947693, "learning_rate": 0.002, "loss": 2.3668, "step": 24970 }, { "epoch": 0.09656569405142955, "grad_norm": 0.10212542116641998, "learning_rate": 0.002, "loss": 2.37, "step": 24980 }, { "epoch": 0.09660435125481283, "grad_norm": 0.1323486864566803, "learning_rate": 0.002, "loss": 2.3722, "step": 24990 }, { "epoch": 0.0966430084581961, "grad_norm": 0.11106313019990921, "learning_rate": 0.002, "loss": 2.3635, "step": 25000 }, { "epoch": 0.09668166566157937, "grad_norm": 0.10711284726858139, "learning_rate": 0.002, "loss": 2.3777, "step": 25010 }, { "epoch": 0.09672032286496265, "grad_norm": 0.10144255310297012, "learning_rate": 0.002, "loss": 2.365, "step": 25020 }, { "epoch": 0.09675898006834593, "grad_norm": 0.10904534161090851, "learning_rate": 0.002, "loss": 2.3632, "step": 25030 }, { "epoch": 0.09679763727172921, "grad_norm": 0.11617907136678696, "learning_rate": 0.002, "loss": 2.3714, "step": 25040 }, { "epoch": 0.0968362944751125, "grad_norm": 0.11424647271633148, "learning_rate": 0.002, "loss": 2.3798, "step": 25050 }, { "epoch": 0.09687495167849577, "grad_norm": 0.12575869262218475, "learning_rate": 0.002, "loss": 2.3751, "step": 25060 }, { "epoch": 0.09691360888187905, "grad_norm": 0.10736224800348282, "learning_rate": 0.002, "loss": 2.3715, "step": 25070 }, { "epoch": 0.09695226608526233, "grad_norm": 0.1244237944483757, "learning_rate": 0.002, "loss": 2.3686, "step": 25080 }, { "epoch": 0.0969909232886456, "grad_norm": 0.10764366388320923, "learning_rate": 0.002, "loss": 2.3861, "step": 25090 }, { "epoch": 0.09702958049202888, "grad_norm": 0.10259444266557693, "learning_rate": 0.002, "loss": 2.3708, "step": 25100 }, { "epoch": 0.09706823769541216, "grad_norm": 0.11127594113349915, "learning_rate": 0.002, "loss": 2.3769, "step": 25110 }, { "epoch": 0.09710689489879544, "grad_norm": 0.14849691092967987, "learning_rate": 0.002, "loss": 2.3804, "step": 25120 }, { "epoch": 0.09714555210217872, "grad_norm": 0.10462760180234909, "learning_rate": 0.002, "loss": 2.3727, "step": 25130 }, { "epoch": 0.097184209305562, "grad_norm": 0.1071339026093483, "learning_rate": 0.002, "loss": 2.3714, "step": 25140 }, { "epoch": 0.09722286650894528, "grad_norm": 0.10815966129302979, "learning_rate": 0.002, "loss": 2.3653, "step": 25150 }, { "epoch": 0.09726152371232856, "grad_norm": 0.10802339017391205, "learning_rate": 0.002, "loss": 2.3651, "step": 25160 }, { "epoch": 0.09730018091571184, "grad_norm": 0.1159505546092987, "learning_rate": 0.002, "loss": 2.3738, "step": 25170 }, { "epoch": 0.0973388381190951, "grad_norm": 0.10223262757062912, "learning_rate": 0.002, "loss": 2.3825, "step": 25180 }, { "epoch": 0.09737749532247839, "grad_norm": 0.11454407870769501, "learning_rate": 0.002, "loss": 2.3654, "step": 25190 }, { "epoch": 0.09741615252586167, "grad_norm": 0.10623133182525635, "learning_rate": 0.002, "loss": 2.3826, "step": 25200 }, { "epoch": 0.09745480972924495, "grad_norm": 0.10946430265903473, "learning_rate": 0.002, "loss": 2.3736, "step": 25210 }, { "epoch": 0.09749346693262823, "grad_norm": 0.12587489187717438, "learning_rate": 0.002, "loss": 2.3763, "step": 25220 }, { "epoch": 0.0975321241360115, "grad_norm": 0.11556122452020645, "learning_rate": 0.002, "loss": 2.3701, "step": 25230 }, { "epoch": 0.09757078133939479, "grad_norm": 0.11611664295196533, "learning_rate": 0.002, "loss": 2.3719, "step": 25240 }, { "epoch": 0.09760943854277807, "grad_norm": 0.09125930815935135, "learning_rate": 0.002, "loss": 2.3777, "step": 25250 }, { "epoch": 0.09764809574616135, "grad_norm": 0.11490960419178009, "learning_rate": 0.002, "loss": 2.3737, "step": 25260 }, { "epoch": 0.09768675294954461, "grad_norm": 0.11289970576763153, "learning_rate": 0.002, "loss": 2.3687, "step": 25270 }, { "epoch": 0.09772541015292789, "grad_norm": 0.13276606798171997, "learning_rate": 0.002, "loss": 2.3789, "step": 25280 }, { "epoch": 0.09776406735631117, "grad_norm": 0.11637672036886215, "learning_rate": 0.002, "loss": 2.374, "step": 25290 }, { "epoch": 0.09780272455969445, "grad_norm": 0.1237371638417244, "learning_rate": 0.002, "loss": 2.366, "step": 25300 }, { "epoch": 0.09784138176307773, "grad_norm": 0.12679412961006165, "learning_rate": 0.002, "loss": 2.3714, "step": 25310 }, { "epoch": 0.09788003896646101, "grad_norm": 0.12102102488279343, "learning_rate": 0.002, "loss": 2.3854, "step": 25320 }, { "epoch": 0.09791869616984429, "grad_norm": 0.12702231109142303, "learning_rate": 0.002, "loss": 2.3844, "step": 25330 }, { "epoch": 0.09795735337322757, "grad_norm": 0.11291328072547913, "learning_rate": 0.002, "loss": 2.3641, "step": 25340 }, { "epoch": 0.09799601057661085, "grad_norm": 0.1038038358092308, "learning_rate": 0.002, "loss": 2.3743, "step": 25350 }, { "epoch": 0.09803466777999413, "grad_norm": 0.12257646024227142, "learning_rate": 0.002, "loss": 2.3634, "step": 25360 }, { "epoch": 0.0980733249833774, "grad_norm": 0.15314073860645294, "learning_rate": 0.002, "loss": 2.3745, "step": 25370 }, { "epoch": 0.09811198218676068, "grad_norm": 0.104371577501297, "learning_rate": 0.002, "loss": 2.3709, "step": 25380 }, { "epoch": 0.09815063939014396, "grad_norm": 0.10793410986661911, "learning_rate": 0.002, "loss": 2.3812, "step": 25390 }, { "epoch": 0.09818929659352724, "grad_norm": 0.10416117310523987, "learning_rate": 0.002, "loss": 2.3657, "step": 25400 }, { "epoch": 0.09822795379691052, "grad_norm": 0.11453581601381302, "learning_rate": 0.002, "loss": 2.3719, "step": 25410 }, { "epoch": 0.0982666110002938, "grad_norm": 0.12008222192525864, "learning_rate": 0.002, "loss": 2.3621, "step": 25420 }, { "epoch": 0.09830526820367708, "grad_norm": 0.10592380166053772, "learning_rate": 0.002, "loss": 2.3712, "step": 25430 }, { "epoch": 0.09834392540706036, "grad_norm": 0.122405044734478, "learning_rate": 0.002, "loss": 2.375, "step": 25440 }, { "epoch": 0.09838258261044364, "grad_norm": 0.10416276752948761, "learning_rate": 0.002, "loss": 2.3744, "step": 25450 }, { "epoch": 0.0984212398138269, "grad_norm": 0.12326578050851822, "learning_rate": 0.002, "loss": 2.372, "step": 25460 }, { "epoch": 0.09845989701721018, "grad_norm": 0.10835543274879456, "learning_rate": 0.002, "loss": 2.3818, "step": 25470 }, { "epoch": 0.09849855422059346, "grad_norm": 0.10613939166069031, "learning_rate": 0.002, "loss": 2.3752, "step": 25480 }, { "epoch": 0.09853721142397674, "grad_norm": 0.13459204137325287, "learning_rate": 0.002, "loss": 2.3658, "step": 25490 }, { "epoch": 0.09857586862736002, "grad_norm": 0.11468186229467392, "learning_rate": 0.002, "loss": 2.3712, "step": 25500 }, { "epoch": 0.0986145258307433, "grad_norm": 0.10123708844184875, "learning_rate": 0.002, "loss": 2.3669, "step": 25510 }, { "epoch": 0.09865318303412658, "grad_norm": 0.11035001277923584, "learning_rate": 0.002, "loss": 2.3923, "step": 25520 }, { "epoch": 0.09869184023750986, "grad_norm": 0.11449532955884933, "learning_rate": 0.002, "loss": 2.3939, "step": 25530 }, { "epoch": 0.09873049744089314, "grad_norm": 0.1010097786784172, "learning_rate": 0.002, "loss": 2.3726, "step": 25540 }, { "epoch": 0.09876915464427641, "grad_norm": 0.11513739079236984, "learning_rate": 0.002, "loss": 2.3663, "step": 25550 }, { "epoch": 0.09880781184765969, "grad_norm": 0.11579716205596924, "learning_rate": 0.002, "loss": 2.3814, "step": 25560 }, { "epoch": 0.09884646905104297, "grad_norm": 0.11263803392648697, "learning_rate": 0.002, "loss": 2.3841, "step": 25570 }, { "epoch": 0.09888512625442625, "grad_norm": 0.1053340956568718, "learning_rate": 0.002, "loss": 2.368, "step": 25580 }, { "epoch": 0.09892378345780953, "grad_norm": 0.10061566531658173, "learning_rate": 0.002, "loss": 2.3603, "step": 25590 }, { "epoch": 0.09896244066119281, "grad_norm": 0.13848647475242615, "learning_rate": 0.002, "loss": 2.3676, "step": 25600 }, { "epoch": 0.09900109786457609, "grad_norm": 0.10833652317523956, "learning_rate": 0.002, "loss": 2.3827, "step": 25610 }, { "epoch": 0.09903975506795937, "grad_norm": 0.11265638470649719, "learning_rate": 0.002, "loss": 2.3689, "step": 25620 }, { "epoch": 0.09907841227134265, "grad_norm": 0.13258330523967743, "learning_rate": 0.002, "loss": 2.3807, "step": 25630 }, { "epoch": 0.09911706947472593, "grad_norm": 0.1301572173833847, "learning_rate": 0.002, "loss": 2.3648, "step": 25640 }, { "epoch": 0.0991557266781092, "grad_norm": 0.11950825154781342, "learning_rate": 0.002, "loss": 2.3739, "step": 25650 }, { "epoch": 0.09919438388149247, "grad_norm": 0.23389548063278198, "learning_rate": 0.002, "loss": 2.3667, "step": 25660 }, { "epoch": 0.09923304108487575, "grad_norm": 0.12364401668310165, "learning_rate": 0.002, "loss": 2.3635, "step": 25670 }, { "epoch": 0.09927169828825903, "grad_norm": 0.10952173918485641, "learning_rate": 0.002, "loss": 2.3601, "step": 25680 }, { "epoch": 0.09931035549164231, "grad_norm": 0.13647736608982086, "learning_rate": 0.002, "loss": 2.3795, "step": 25690 }, { "epoch": 0.0993490126950256, "grad_norm": 0.11075005680322647, "learning_rate": 0.002, "loss": 2.3759, "step": 25700 }, { "epoch": 0.09938766989840887, "grad_norm": 0.11662223935127258, "learning_rate": 0.002, "loss": 2.3779, "step": 25710 }, { "epoch": 0.09942632710179215, "grad_norm": 0.10770941525697708, "learning_rate": 0.002, "loss": 2.3785, "step": 25720 }, { "epoch": 0.09946498430517543, "grad_norm": 0.13047263026237488, "learning_rate": 0.002, "loss": 2.3693, "step": 25730 }, { "epoch": 0.0995036415085587, "grad_norm": 0.1101449728012085, "learning_rate": 0.002, "loss": 2.3582, "step": 25740 }, { "epoch": 0.09954229871194198, "grad_norm": 0.11350306868553162, "learning_rate": 0.002, "loss": 2.3583, "step": 25750 }, { "epoch": 0.09958095591532526, "grad_norm": 0.12219146639108658, "learning_rate": 0.002, "loss": 2.3644, "step": 25760 }, { "epoch": 0.09961961311870854, "grad_norm": 0.12308717519044876, "learning_rate": 0.002, "loss": 2.3893, "step": 25770 }, { "epoch": 0.09965827032209182, "grad_norm": 0.09611739218235016, "learning_rate": 0.002, "loss": 2.3728, "step": 25780 }, { "epoch": 0.0996969275254751, "grad_norm": 0.1195925772190094, "learning_rate": 0.002, "loss": 2.3671, "step": 25790 }, { "epoch": 0.09973558472885838, "grad_norm": 0.09814034402370453, "learning_rate": 0.002, "loss": 2.3764, "step": 25800 }, { "epoch": 0.09977424193224166, "grad_norm": 0.12110476940870285, "learning_rate": 0.002, "loss": 2.3763, "step": 25810 }, { "epoch": 0.09981289913562494, "grad_norm": 0.11635403335094452, "learning_rate": 0.002, "loss": 2.3859, "step": 25820 }, { "epoch": 0.0998515563390082, "grad_norm": 0.1121845617890358, "learning_rate": 0.002, "loss": 2.3898, "step": 25830 }, { "epoch": 0.09989021354239148, "grad_norm": 0.12140703201293945, "learning_rate": 0.002, "loss": 2.362, "step": 25840 }, { "epoch": 0.09992887074577476, "grad_norm": 0.13646343350410461, "learning_rate": 0.002, "loss": 2.3758, "step": 25850 }, { "epoch": 0.09996752794915804, "grad_norm": 0.12495137006044388, "learning_rate": 0.002, "loss": 2.3611, "step": 25860 }, { "epoch": 0.10000618515254132, "grad_norm": 0.10688120126724243, "learning_rate": 0.002, "loss": 2.3684, "step": 25870 }, { "epoch": 0.1000448423559246, "grad_norm": 0.09174524247646332, "learning_rate": 0.002, "loss": 2.3768, "step": 25880 }, { "epoch": 0.10008349955930788, "grad_norm": 0.11917345225811005, "learning_rate": 0.002, "loss": 2.3653, "step": 25890 }, { "epoch": 0.10012215676269116, "grad_norm": 0.11813360452651978, "learning_rate": 0.002, "loss": 2.3814, "step": 25900 }, { "epoch": 0.10016081396607444, "grad_norm": 0.13693365454673767, "learning_rate": 0.002, "loss": 2.3657, "step": 25910 }, { "epoch": 0.10019947116945771, "grad_norm": 0.12706634402275085, "learning_rate": 0.002, "loss": 2.3603, "step": 25920 }, { "epoch": 0.10023812837284099, "grad_norm": 0.10601233690977097, "learning_rate": 0.002, "loss": 2.382, "step": 25930 }, { "epoch": 0.10027678557622427, "grad_norm": 0.12050879746675491, "learning_rate": 0.002, "loss": 2.3758, "step": 25940 }, { "epoch": 0.10031544277960755, "grad_norm": 0.1077536791563034, "learning_rate": 0.002, "loss": 2.3864, "step": 25950 }, { "epoch": 0.10035409998299083, "grad_norm": 0.11017072945833206, "learning_rate": 0.002, "loss": 2.3675, "step": 25960 }, { "epoch": 0.10039275718637411, "grad_norm": 0.37830984592437744, "learning_rate": 0.002, "loss": 2.3672, "step": 25970 }, { "epoch": 0.10043141438975739, "grad_norm": 0.10002478212118149, "learning_rate": 0.002, "loss": 2.3656, "step": 25980 }, { "epoch": 0.10047007159314067, "grad_norm": 0.12313816696405411, "learning_rate": 0.002, "loss": 2.3723, "step": 25990 }, { "epoch": 0.10050872879652395, "grad_norm": 0.09661564230918884, "learning_rate": 0.002, "loss": 2.3569, "step": 26000 }, { "epoch": 0.10054738599990723, "grad_norm": 0.10133011639118195, "learning_rate": 0.002, "loss": 2.3783, "step": 26010 }, { "epoch": 0.1005860432032905, "grad_norm": 0.14927417039871216, "learning_rate": 0.002, "loss": 2.3835, "step": 26020 }, { "epoch": 0.10062470040667378, "grad_norm": 0.10663340240716934, "learning_rate": 0.002, "loss": 2.3878, "step": 26030 }, { "epoch": 0.10066335761005706, "grad_norm": 0.10607342422008514, "learning_rate": 0.002, "loss": 2.3831, "step": 26040 }, { "epoch": 0.10070201481344034, "grad_norm": 0.09412727504968643, "learning_rate": 0.002, "loss": 2.37, "step": 26050 }, { "epoch": 0.10074067201682362, "grad_norm": 0.11209404468536377, "learning_rate": 0.002, "loss": 2.366, "step": 26060 }, { "epoch": 0.1007793292202069, "grad_norm": 0.1119234636425972, "learning_rate": 0.002, "loss": 2.3792, "step": 26070 }, { "epoch": 0.10081798642359018, "grad_norm": 0.13079524040222168, "learning_rate": 0.002, "loss": 2.3625, "step": 26080 }, { "epoch": 0.10085664362697346, "grad_norm": 0.11952626705169678, "learning_rate": 0.002, "loss": 2.3699, "step": 26090 }, { "epoch": 0.10089530083035674, "grad_norm": 0.10386581718921661, "learning_rate": 0.002, "loss": 2.3655, "step": 26100 }, { "epoch": 0.10093395803374, "grad_norm": 0.12058830261230469, "learning_rate": 0.002, "loss": 2.3627, "step": 26110 }, { "epoch": 0.10097261523712328, "grad_norm": 0.1013653352856636, "learning_rate": 0.002, "loss": 2.3705, "step": 26120 }, { "epoch": 0.10101127244050656, "grad_norm": 0.11395049095153809, "learning_rate": 0.002, "loss": 2.3635, "step": 26130 }, { "epoch": 0.10104992964388984, "grad_norm": 0.10870377719402313, "learning_rate": 0.002, "loss": 2.368, "step": 26140 }, { "epoch": 0.10108858684727312, "grad_norm": 0.1162482276558876, "learning_rate": 0.002, "loss": 2.3757, "step": 26150 }, { "epoch": 0.1011272440506564, "grad_norm": 0.10904961824417114, "learning_rate": 0.002, "loss": 2.376, "step": 26160 }, { "epoch": 0.10116590125403968, "grad_norm": 0.0989343523979187, "learning_rate": 0.002, "loss": 2.3715, "step": 26170 }, { "epoch": 0.10120455845742296, "grad_norm": 0.12322376668453217, "learning_rate": 0.002, "loss": 2.3816, "step": 26180 }, { "epoch": 0.10124321566080624, "grad_norm": 0.13838641345500946, "learning_rate": 0.002, "loss": 2.367, "step": 26190 }, { "epoch": 0.1012818728641895, "grad_norm": 0.11140194535255432, "learning_rate": 0.002, "loss": 2.3789, "step": 26200 }, { "epoch": 0.10132053006757279, "grad_norm": 0.17812778055667877, "learning_rate": 0.002, "loss": 2.3663, "step": 26210 }, { "epoch": 0.10135918727095607, "grad_norm": 0.12174686044454575, "learning_rate": 0.002, "loss": 2.3904, "step": 26220 }, { "epoch": 0.10139784447433935, "grad_norm": 0.11605304479598999, "learning_rate": 0.002, "loss": 2.3813, "step": 26230 }, { "epoch": 0.10143650167772263, "grad_norm": 0.28635290265083313, "learning_rate": 0.002, "loss": 2.3682, "step": 26240 }, { "epoch": 0.1014751588811059, "grad_norm": 0.11292878538370132, "learning_rate": 0.002, "loss": 2.3693, "step": 26250 }, { "epoch": 0.10151381608448919, "grad_norm": 0.10103033483028412, "learning_rate": 0.002, "loss": 2.3806, "step": 26260 }, { "epoch": 0.10155247328787247, "grad_norm": 0.09425285458564758, "learning_rate": 0.002, "loss": 2.3692, "step": 26270 }, { "epoch": 0.10159113049125575, "grad_norm": 0.11024901270866394, "learning_rate": 0.002, "loss": 2.3771, "step": 26280 }, { "epoch": 0.10162978769463901, "grad_norm": 0.16107779741287231, "learning_rate": 0.002, "loss": 2.3832, "step": 26290 }, { "epoch": 0.10166844489802229, "grad_norm": 0.111606165766716, "learning_rate": 0.002, "loss": 2.3728, "step": 26300 }, { "epoch": 0.10170710210140557, "grad_norm": 0.11568954586982727, "learning_rate": 0.002, "loss": 2.3716, "step": 26310 }, { "epoch": 0.10174575930478885, "grad_norm": 0.12137076258659363, "learning_rate": 0.002, "loss": 2.3793, "step": 26320 }, { "epoch": 0.10178441650817213, "grad_norm": 0.12343282252550125, "learning_rate": 0.002, "loss": 2.3727, "step": 26330 }, { "epoch": 0.10182307371155541, "grad_norm": 0.09838341176509857, "learning_rate": 0.002, "loss": 2.3711, "step": 26340 }, { "epoch": 0.10186173091493869, "grad_norm": 0.11724685877561569, "learning_rate": 0.002, "loss": 2.3692, "step": 26350 }, { "epoch": 0.10190038811832197, "grad_norm": 0.10881741344928741, "learning_rate": 0.002, "loss": 2.3679, "step": 26360 }, { "epoch": 0.10193904532170525, "grad_norm": 0.11824636906385422, "learning_rate": 0.002, "loss": 2.3813, "step": 26370 }, { "epoch": 0.10197770252508853, "grad_norm": 0.09991855919361115, "learning_rate": 0.002, "loss": 2.3746, "step": 26380 }, { "epoch": 0.1020163597284718, "grad_norm": 0.13345035910606384, "learning_rate": 0.002, "loss": 2.3714, "step": 26390 }, { "epoch": 0.10205501693185508, "grad_norm": 0.12668660283088684, "learning_rate": 0.002, "loss": 2.3752, "step": 26400 }, { "epoch": 0.10209367413523836, "grad_norm": 0.10875190794467926, "learning_rate": 0.002, "loss": 2.3693, "step": 26410 }, { "epoch": 0.10213233133862164, "grad_norm": 0.10290392488241196, "learning_rate": 0.002, "loss": 2.3662, "step": 26420 }, { "epoch": 0.10217098854200492, "grad_norm": 0.1253070831298828, "learning_rate": 0.002, "loss": 2.3763, "step": 26430 }, { "epoch": 0.1022096457453882, "grad_norm": 0.13730914890766144, "learning_rate": 0.002, "loss": 2.378, "step": 26440 }, { "epoch": 0.10224830294877148, "grad_norm": 0.12453850358724594, "learning_rate": 0.002, "loss": 2.368, "step": 26450 }, { "epoch": 0.10228696015215476, "grad_norm": 0.11111441254615784, "learning_rate": 0.002, "loss": 2.3782, "step": 26460 }, { "epoch": 0.10232561735553804, "grad_norm": 0.11621275544166565, "learning_rate": 0.002, "loss": 2.3639, "step": 26470 }, { "epoch": 0.1023642745589213, "grad_norm": 0.09777642786502838, "learning_rate": 0.002, "loss": 2.388, "step": 26480 }, { "epoch": 0.10240293176230458, "grad_norm": 0.1324297934770584, "learning_rate": 0.002, "loss": 2.386, "step": 26490 }, { "epoch": 0.10244158896568786, "grad_norm": 0.0918188989162445, "learning_rate": 0.002, "loss": 2.386, "step": 26500 }, { "epoch": 0.10248024616907114, "grad_norm": 0.10434520244598389, "learning_rate": 0.002, "loss": 2.3641, "step": 26510 }, { "epoch": 0.10251890337245442, "grad_norm": 0.10555671900510788, "learning_rate": 0.002, "loss": 2.372, "step": 26520 }, { "epoch": 0.1025575605758377, "grad_norm": 0.11064916849136353, "learning_rate": 0.002, "loss": 2.383, "step": 26530 }, { "epoch": 0.10259621777922098, "grad_norm": 0.13481812179088593, "learning_rate": 0.002, "loss": 2.362, "step": 26540 }, { "epoch": 0.10263487498260426, "grad_norm": 0.10597804188728333, "learning_rate": 0.002, "loss": 2.3705, "step": 26550 }, { "epoch": 0.10267353218598754, "grad_norm": 0.11946997791528702, "learning_rate": 0.002, "loss": 2.3842, "step": 26560 }, { "epoch": 0.10271218938937081, "grad_norm": 0.0989639163017273, "learning_rate": 0.002, "loss": 2.367, "step": 26570 }, { "epoch": 0.10275084659275409, "grad_norm": 0.13578683137893677, "learning_rate": 0.002, "loss": 2.3735, "step": 26580 }, { "epoch": 0.10278950379613737, "grad_norm": 0.12367697060108185, "learning_rate": 0.002, "loss": 2.3765, "step": 26590 }, { "epoch": 0.10282816099952065, "grad_norm": 0.14630058407783508, "learning_rate": 0.002, "loss": 2.3826, "step": 26600 }, { "epoch": 0.10286681820290393, "grad_norm": 0.10778837651014328, "learning_rate": 0.002, "loss": 2.3966, "step": 26610 }, { "epoch": 0.10290547540628721, "grad_norm": 0.11188562214374542, "learning_rate": 0.002, "loss": 2.3776, "step": 26620 }, { "epoch": 0.10294413260967049, "grad_norm": 0.1135137602686882, "learning_rate": 0.002, "loss": 2.3804, "step": 26630 }, { "epoch": 0.10298278981305377, "grad_norm": 0.12481187283992767, "learning_rate": 0.002, "loss": 2.3539, "step": 26640 }, { "epoch": 0.10302144701643705, "grad_norm": 0.10805981606245041, "learning_rate": 0.002, "loss": 2.3775, "step": 26650 }, { "epoch": 0.10306010421982033, "grad_norm": 0.11806239187717438, "learning_rate": 0.002, "loss": 2.3775, "step": 26660 }, { "epoch": 0.1030987614232036, "grad_norm": 0.13640879094600677, "learning_rate": 0.002, "loss": 2.3632, "step": 26670 }, { "epoch": 0.10313741862658687, "grad_norm": 0.11290092021226883, "learning_rate": 0.002, "loss": 2.3911, "step": 26680 }, { "epoch": 0.10317607582997015, "grad_norm": 0.11578276753425598, "learning_rate": 0.002, "loss": 2.3671, "step": 26690 }, { "epoch": 0.10321473303335343, "grad_norm": 0.12642863392829895, "learning_rate": 0.002, "loss": 2.3693, "step": 26700 }, { "epoch": 0.10325339023673671, "grad_norm": 0.10927240550518036, "learning_rate": 0.002, "loss": 2.3863, "step": 26710 }, { "epoch": 0.10329204744012, "grad_norm": 0.10512517392635345, "learning_rate": 0.002, "loss": 2.3734, "step": 26720 }, { "epoch": 0.10333070464350327, "grad_norm": 0.11986581981182098, "learning_rate": 0.002, "loss": 2.3728, "step": 26730 }, { "epoch": 0.10336936184688655, "grad_norm": 0.12444537878036499, "learning_rate": 0.002, "loss": 2.3668, "step": 26740 }, { "epoch": 0.10340801905026983, "grad_norm": 0.10115274786949158, "learning_rate": 0.002, "loss": 2.3781, "step": 26750 }, { "epoch": 0.1034466762536531, "grad_norm": 0.10158008337020874, "learning_rate": 0.002, "loss": 2.3721, "step": 26760 }, { "epoch": 0.10348533345703638, "grad_norm": 0.10689114779233932, "learning_rate": 0.002, "loss": 2.368, "step": 26770 }, { "epoch": 0.10352399066041966, "grad_norm": 0.11266842484474182, "learning_rate": 0.002, "loss": 2.373, "step": 26780 }, { "epoch": 0.10356264786380294, "grad_norm": 0.1356581598520279, "learning_rate": 0.002, "loss": 2.3578, "step": 26790 }, { "epoch": 0.10360130506718622, "grad_norm": 0.1164923831820488, "learning_rate": 0.002, "loss": 2.3697, "step": 26800 }, { "epoch": 0.1036399622705695, "grad_norm": 0.09714135527610779, "learning_rate": 0.002, "loss": 2.3678, "step": 26810 }, { "epoch": 0.10367861947395278, "grad_norm": 0.11546872556209564, "learning_rate": 0.002, "loss": 2.3677, "step": 26820 }, { "epoch": 0.10371727667733606, "grad_norm": 0.1366276890039444, "learning_rate": 0.002, "loss": 2.3701, "step": 26830 }, { "epoch": 0.10375593388071934, "grad_norm": 0.10508016496896744, "learning_rate": 0.002, "loss": 2.3674, "step": 26840 }, { "epoch": 0.1037945910841026, "grad_norm": 0.10757789015769958, "learning_rate": 0.002, "loss": 2.3753, "step": 26850 }, { "epoch": 0.10383324828748589, "grad_norm": 0.12697778642177582, "learning_rate": 0.002, "loss": 2.3682, "step": 26860 }, { "epoch": 0.10387190549086917, "grad_norm": 0.12644273042678833, "learning_rate": 0.002, "loss": 2.3775, "step": 26870 }, { "epoch": 0.10391056269425245, "grad_norm": 0.12781940400600433, "learning_rate": 0.002, "loss": 2.38, "step": 26880 }, { "epoch": 0.10394921989763573, "grad_norm": 0.13271625339984894, "learning_rate": 0.002, "loss": 2.3843, "step": 26890 }, { "epoch": 0.103987877101019, "grad_norm": 0.11471915245056152, "learning_rate": 0.002, "loss": 2.3782, "step": 26900 }, { "epoch": 0.10402653430440228, "grad_norm": 0.10376786440610886, "learning_rate": 0.002, "loss": 2.3709, "step": 26910 }, { "epoch": 0.10406519150778556, "grad_norm": 0.15959547460079193, "learning_rate": 0.002, "loss": 2.3843, "step": 26920 }, { "epoch": 0.10410384871116884, "grad_norm": 0.09334205090999603, "learning_rate": 0.002, "loss": 2.3734, "step": 26930 }, { "epoch": 0.10414250591455211, "grad_norm": 0.10745527595281601, "learning_rate": 0.002, "loss": 2.379, "step": 26940 }, { "epoch": 0.10418116311793539, "grad_norm": 0.12216676771640778, "learning_rate": 0.002, "loss": 2.3615, "step": 26950 }, { "epoch": 0.10421982032131867, "grad_norm": 0.10139105468988419, "learning_rate": 0.002, "loss": 2.3829, "step": 26960 }, { "epoch": 0.10425847752470195, "grad_norm": 0.1080985888838768, "learning_rate": 0.002, "loss": 2.3747, "step": 26970 }, { "epoch": 0.10429713472808523, "grad_norm": 0.12080413848161697, "learning_rate": 0.002, "loss": 2.3642, "step": 26980 }, { "epoch": 0.10433579193146851, "grad_norm": 0.1179196834564209, "learning_rate": 0.002, "loss": 2.3848, "step": 26990 }, { "epoch": 0.10437444913485179, "grad_norm": 0.09805526584386826, "learning_rate": 0.002, "loss": 2.3532, "step": 27000 }, { "epoch": 0.10441310633823507, "grad_norm": 0.11344782263040543, "learning_rate": 0.002, "loss": 2.3726, "step": 27010 }, { "epoch": 0.10445176354161835, "grad_norm": 0.10693656653165817, "learning_rate": 0.002, "loss": 2.3612, "step": 27020 }, { "epoch": 0.10449042074500163, "grad_norm": 0.14084559679031372, "learning_rate": 0.002, "loss": 2.3707, "step": 27030 }, { "epoch": 0.1045290779483849, "grad_norm": 0.10055924206972122, "learning_rate": 0.002, "loss": 2.3776, "step": 27040 }, { "epoch": 0.10456773515176818, "grad_norm": 0.11677692085504532, "learning_rate": 0.002, "loss": 2.3757, "step": 27050 }, { "epoch": 0.10460639235515146, "grad_norm": 0.13112536072731018, "learning_rate": 0.002, "loss": 2.3605, "step": 27060 }, { "epoch": 0.10464504955853474, "grad_norm": 0.09956295788288116, "learning_rate": 0.002, "loss": 2.3615, "step": 27070 }, { "epoch": 0.10468370676191802, "grad_norm": 0.10391475260257721, "learning_rate": 0.002, "loss": 2.3682, "step": 27080 }, { "epoch": 0.1047223639653013, "grad_norm": 0.12148088961839676, "learning_rate": 0.002, "loss": 2.3741, "step": 27090 }, { "epoch": 0.10476102116868458, "grad_norm": 0.1088813841342926, "learning_rate": 0.002, "loss": 2.3575, "step": 27100 }, { "epoch": 0.10479967837206786, "grad_norm": 0.10357842594385147, "learning_rate": 0.002, "loss": 2.382, "step": 27110 }, { "epoch": 0.10483833557545114, "grad_norm": 0.12168553471565247, "learning_rate": 0.002, "loss": 2.3573, "step": 27120 }, { "epoch": 0.1048769927788344, "grad_norm": 0.10601655393838882, "learning_rate": 0.002, "loss": 2.3607, "step": 27130 }, { "epoch": 0.10491564998221768, "grad_norm": 0.12945091724395752, "learning_rate": 0.002, "loss": 2.3684, "step": 27140 }, { "epoch": 0.10495430718560096, "grad_norm": 0.11099984496831894, "learning_rate": 0.002, "loss": 2.3709, "step": 27150 }, { "epoch": 0.10499296438898424, "grad_norm": 0.11713527143001556, "learning_rate": 0.002, "loss": 2.3761, "step": 27160 }, { "epoch": 0.10503162159236752, "grad_norm": 0.12044794857501984, "learning_rate": 0.002, "loss": 2.3965, "step": 27170 }, { "epoch": 0.1050702787957508, "grad_norm": 0.14438650012016296, "learning_rate": 0.002, "loss": 2.3664, "step": 27180 }, { "epoch": 0.10510893599913408, "grad_norm": 0.1254081428050995, "learning_rate": 0.002, "loss": 2.3778, "step": 27190 }, { "epoch": 0.10514759320251736, "grad_norm": 0.10578880459070206, "learning_rate": 0.002, "loss": 2.38, "step": 27200 }, { "epoch": 0.10518625040590064, "grad_norm": 0.10591401904821396, "learning_rate": 0.002, "loss": 2.3739, "step": 27210 }, { "epoch": 0.10522490760928391, "grad_norm": 0.11413241922855377, "learning_rate": 0.002, "loss": 2.3788, "step": 27220 }, { "epoch": 0.10526356481266719, "grad_norm": 0.12489752471446991, "learning_rate": 0.002, "loss": 2.3706, "step": 27230 }, { "epoch": 0.10530222201605047, "grad_norm": 0.09213671833276749, "learning_rate": 0.002, "loss": 2.3647, "step": 27240 }, { "epoch": 0.10534087921943375, "grad_norm": 0.11025281995534897, "learning_rate": 0.002, "loss": 2.3652, "step": 27250 }, { "epoch": 0.10537953642281703, "grad_norm": 0.10654711723327637, "learning_rate": 0.002, "loss": 2.382, "step": 27260 }, { "epoch": 0.10541819362620031, "grad_norm": 0.10936938971281052, "learning_rate": 0.002, "loss": 2.361, "step": 27270 }, { "epoch": 0.10545685082958359, "grad_norm": 0.11113861203193665, "learning_rate": 0.002, "loss": 2.3931, "step": 27280 }, { "epoch": 0.10549550803296687, "grad_norm": 0.12228459864854813, "learning_rate": 0.002, "loss": 2.3688, "step": 27290 }, { "epoch": 0.10553416523635015, "grad_norm": 0.10813633352518082, "learning_rate": 0.002, "loss": 2.3704, "step": 27300 }, { "epoch": 0.10557282243973341, "grad_norm": 0.1035778671503067, "learning_rate": 0.002, "loss": 2.3566, "step": 27310 }, { "epoch": 0.10561147964311669, "grad_norm": 0.12268638610839844, "learning_rate": 0.002, "loss": 2.3686, "step": 27320 }, { "epoch": 0.10565013684649997, "grad_norm": 0.11177417635917664, "learning_rate": 0.002, "loss": 2.3682, "step": 27330 }, { "epoch": 0.10568879404988325, "grad_norm": 0.11250213533639908, "learning_rate": 0.002, "loss": 2.376, "step": 27340 }, { "epoch": 0.10572745125326653, "grad_norm": 0.12354373186826706, "learning_rate": 0.002, "loss": 2.3799, "step": 27350 }, { "epoch": 0.10576610845664981, "grad_norm": 0.11750753968954086, "learning_rate": 0.002, "loss": 2.3704, "step": 27360 }, { "epoch": 0.10580476566003309, "grad_norm": 0.11276818066835403, "learning_rate": 0.002, "loss": 2.354, "step": 27370 }, { "epoch": 0.10584342286341637, "grad_norm": 0.1077154353260994, "learning_rate": 0.002, "loss": 2.3649, "step": 27380 }, { "epoch": 0.10588208006679965, "grad_norm": 0.10436894744634628, "learning_rate": 0.002, "loss": 2.3686, "step": 27390 }, { "epoch": 0.10592073727018293, "grad_norm": 0.09669612348079681, "learning_rate": 0.002, "loss": 2.3775, "step": 27400 }, { "epoch": 0.1059593944735662, "grad_norm": 0.12027565389871597, "learning_rate": 0.002, "loss": 2.3637, "step": 27410 }, { "epoch": 0.10599805167694948, "grad_norm": 0.10789991915225983, "learning_rate": 0.002, "loss": 2.3802, "step": 27420 }, { "epoch": 0.10603670888033276, "grad_norm": 0.12353754043579102, "learning_rate": 0.002, "loss": 2.3634, "step": 27430 }, { "epoch": 0.10607536608371604, "grad_norm": 0.1370828002691269, "learning_rate": 0.002, "loss": 2.3822, "step": 27440 }, { "epoch": 0.10611402328709932, "grad_norm": 0.11758144944906235, "learning_rate": 0.002, "loss": 2.3845, "step": 27450 }, { "epoch": 0.1061526804904826, "grad_norm": 0.11821454018354416, "learning_rate": 0.002, "loss": 2.3734, "step": 27460 }, { "epoch": 0.10619133769386588, "grad_norm": 0.1114441379904747, "learning_rate": 0.002, "loss": 2.3687, "step": 27470 }, { "epoch": 0.10622999489724916, "grad_norm": 0.11358706653118134, "learning_rate": 0.002, "loss": 2.3612, "step": 27480 }, { "epoch": 0.10626865210063244, "grad_norm": 0.12629422545433044, "learning_rate": 0.002, "loss": 2.3701, "step": 27490 }, { "epoch": 0.1063073093040157, "grad_norm": 0.11764193326234818, "learning_rate": 0.002, "loss": 2.3774, "step": 27500 }, { "epoch": 0.10634596650739898, "grad_norm": 0.11269880831241608, "learning_rate": 0.002, "loss": 2.3792, "step": 27510 }, { "epoch": 0.10638462371078226, "grad_norm": 0.12829801440238953, "learning_rate": 0.002, "loss": 2.3846, "step": 27520 }, { "epoch": 0.10642328091416554, "grad_norm": 0.11060722172260284, "learning_rate": 0.002, "loss": 2.3699, "step": 27530 }, { "epoch": 0.10646193811754882, "grad_norm": 0.10507179796695709, "learning_rate": 0.002, "loss": 2.3822, "step": 27540 }, { "epoch": 0.1065005953209321, "grad_norm": 0.12616103887557983, "learning_rate": 0.002, "loss": 2.3844, "step": 27550 }, { "epoch": 0.10653925252431538, "grad_norm": 0.10349361598491669, "learning_rate": 0.002, "loss": 2.3808, "step": 27560 }, { "epoch": 0.10657790972769866, "grad_norm": 0.10966850072145462, "learning_rate": 0.002, "loss": 2.3461, "step": 27570 }, { "epoch": 0.10661656693108194, "grad_norm": 0.11329416185617447, "learning_rate": 0.002, "loss": 2.3762, "step": 27580 }, { "epoch": 0.10665522413446521, "grad_norm": 0.11381129920482635, "learning_rate": 0.002, "loss": 2.3685, "step": 27590 }, { "epoch": 0.10669388133784849, "grad_norm": 0.11109792441129684, "learning_rate": 0.002, "loss": 2.3837, "step": 27600 }, { "epoch": 0.10673253854123177, "grad_norm": 0.09967318177223206, "learning_rate": 0.002, "loss": 2.3459, "step": 27610 }, { "epoch": 0.10677119574461505, "grad_norm": 0.10462494939565659, "learning_rate": 0.002, "loss": 2.3719, "step": 27620 }, { "epoch": 0.10680985294799833, "grad_norm": 0.10698872059583664, "learning_rate": 0.002, "loss": 2.3728, "step": 27630 }, { "epoch": 0.10684851015138161, "grad_norm": 0.12059954553842545, "learning_rate": 0.002, "loss": 2.3804, "step": 27640 }, { "epoch": 0.10688716735476489, "grad_norm": 0.10810644924640656, "learning_rate": 0.002, "loss": 2.3526, "step": 27650 }, { "epoch": 0.10692582455814817, "grad_norm": 0.10074674338102341, "learning_rate": 0.002, "loss": 2.3498, "step": 27660 }, { "epoch": 0.10696448176153145, "grad_norm": 0.13629195094108582, "learning_rate": 0.002, "loss": 2.3744, "step": 27670 }, { "epoch": 0.10700313896491473, "grad_norm": 0.13592801988124847, "learning_rate": 0.002, "loss": 2.3746, "step": 27680 }, { "epoch": 0.107041796168298, "grad_norm": 0.11054662615060806, "learning_rate": 0.002, "loss": 2.3869, "step": 27690 }, { "epoch": 0.10708045337168128, "grad_norm": 0.10843642801046371, "learning_rate": 0.002, "loss": 2.378, "step": 27700 }, { "epoch": 0.10711911057506456, "grad_norm": 0.13924673199653625, "learning_rate": 0.002, "loss": 2.3745, "step": 27710 }, { "epoch": 0.10715776777844783, "grad_norm": 0.11734243482351303, "learning_rate": 0.002, "loss": 2.3749, "step": 27720 }, { "epoch": 0.10719642498183111, "grad_norm": 0.1108909323811531, "learning_rate": 0.002, "loss": 2.3631, "step": 27730 }, { "epoch": 0.1072350821852144, "grad_norm": 0.13820059597492218, "learning_rate": 0.002, "loss": 2.385, "step": 27740 }, { "epoch": 0.10727373938859767, "grad_norm": 0.11467090249061584, "learning_rate": 0.002, "loss": 2.3719, "step": 27750 }, { "epoch": 0.10731239659198095, "grad_norm": 0.11033518612384796, "learning_rate": 0.002, "loss": 2.3777, "step": 27760 }, { "epoch": 0.10735105379536423, "grad_norm": 0.11089988052845001, "learning_rate": 0.002, "loss": 2.3747, "step": 27770 }, { "epoch": 0.1073897109987475, "grad_norm": 0.10706806927919388, "learning_rate": 0.002, "loss": 2.3808, "step": 27780 }, { "epoch": 0.10742836820213078, "grad_norm": 0.11792809516191483, "learning_rate": 0.002, "loss": 2.3672, "step": 27790 }, { "epoch": 0.10746702540551406, "grad_norm": 0.11145688593387604, "learning_rate": 0.002, "loss": 2.36, "step": 27800 }, { "epoch": 0.10750568260889734, "grad_norm": 0.11388550698757172, "learning_rate": 0.002, "loss": 2.3835, "step": 27810 }, { "epoch": 0.10754433981228062, "grad_norm": 0.11008848994970322, "learning_rate": 0.002, "loss": 2.3631, "step": 27820 }, { "epoch": 0.1075829970156639, "grad_norm": 0.12791645526885986, "learning_rate": 0.002, "loss": 2.3593, "step": 27830 }, { "epoch": 0.10762165421904718, "grad_norm": 0.13758353888988495, "learning_rate": 0.002, "loss": 2.3734, "step": 27840 }, { "epoch": 0.10766031142243046, "grad_norm": 0.10738374292850494, "learning_rate": 0.002, "loss": 2.3764, "step": 27850 }, { "epoch": 0.10769896862581374, "grad_norm": 0.10648205131292343, "learning_rate": 0.002, "loss": 2.3634, "step": 27860 }, { "epoch": 0.107737625829197, "grad_norm": 0.10689187794923782, "learning_rate": 0.002, "loss": 2.3582, "step": 27870 }, { "epoch": 0.10777628303258029, "grad_norm": 0.10008414834737778, "learning_rate": 0.002, "loss": 2.3754, "step": 27880 }, { "epoch": 0.10781494023596357, "grad_norm": 0.10352285206317902, "learning_rate": 0.002, "loss": 2.3779, "step": 27890 }, { "epoch": 0.10785359743934685, "grad_norm": 0.10048746317625046, "learning_rate": 0.002, "loss": 2.363, "step": 27900 }, { "epoch": 0.10789225464273013, "grad_norm": 0.13023824989795685, "learning_rate": 0.002, "loss": 2.3674, "step": 27910 }, { "epoch": 0.1079309118461134, "grad_norm": 0.1087852418422699, "learning_rate": 0.002, "loss": 2.3777, "step": 27920 }, { "epoch": 0.10796956904949669, "grad_norm": 0.12118271738290787, "learning_rate": 0.002, "loss": 2.3604, "step": 27930 }, { "epoch": 0.10800822625287997, "grad_norm": 0.11113785207271576, "learning_rate": 0.002, "loss": 2.3644, "step": 27940 }, { "epoch": 0.10804688345626325, "grad_norm": 0.12866459786891937, "learning_rate": 0.002, "loss": 2.3743, "step": 27950 }, { "epoch": 0.10808554065964651, "grad_norm": 0.11148317903280258, "learning_rate": 0.002, "loss": 2.381, "step": 27960 }, { "epoch": 0.10812419786302979, "grad_norm": 0.10880941152572632, "learning_rate": 0.002, "loss": 2.3874, "step": 27970 }, { "epoch": 0.10816285506641307, "grad_norm": 0.3135621249675751, "learning_rate": 0.002, "loss": 2.3714, "step": 27980 }, { "epoch": 0.10820151226979635, "grad_norm": 0.11539420485496521, "learning_rate": 0.002, "loss": 2.3676, "step": 27990 }, { "epoch": 0.10824016947317963, "grad_norm": 0.11317495256662369, "learning_rate": 0.002, "loss": 2.3817, "step": 28000 }, { "epoch": 0.10827882667656291, "grad_norm": 0.10599172115325928, "learning_rate": 0.002, "loss": 2.3711, "step": 28010 }, { "epoch": 0.10831748387994619, "grad_norm": 0.11403112858533859, "learning_rate": 0.002, "loss": 2.3728, "step": 28020 }, { "epoch": 0.10835614108332947, "grad_norm": 0.10704049468040466, "learning_rate": 0.002, "loss": 2.3824, "step": 28030 }, { "epoch": 0.10839479828671275, "grad_norm": 0.21458998322486877, "learning_rate": 0.002, "loss": 2.3652, "step": 28040 }, { "epoch": 0.10843345549009603, "grad_norm": 0.1518183797597885, "learning_rate": 0.002, "loss": 2.3745, "step": 28050 }, { "epoch": 0.1084721126934793, "grad_norm": 0.10209496319293976, "learning_rate": 0.002, "loss": 2.3809, "step": 28060 }, { "epoch": 0.10851076989686258, "grad_norm": 0.10443715751171112, "learning_rate": 0.002, "loss": 2.3782, "step": 28070 }, { "epoch": 0.10854942710024586, "grad_norm": 0.12445182353258133, "learning_rate": 0.002, "loss": 2.3698, "step": 28080 }, { "epoch": 0.10858808430362914, "grad_norm": 0.10870563238859177, "learning_rate": 0.002, "loss": 2.3707, "step": 28090 }, { "epoch": 0.10862674150701242, "grad_norm": 0.11430470645427704, "learning_rate": 0.002, "loss": 2.3778, "step": 28100 }, { "epoch": 0.1086653987103957, "grad_norm": 0.1253480315208435, "learning_rate": 0.002, "loss": 2.3829, "step": 28110 }, { "epoch": 0.10870405591377898, "grad_norm": 0.1110760048031807, "learning_rate": 0.002, "loss": 2.3551, "step": 28120 }, { "epoch": 0.10874271311716226, "grad_norm": 0.11550119519233704, "learning_rate": 0.002, "loss": 2.3852, "step": 28130 }, { "epoch": 0.10878137032054554, "grad_norm": 0.1076694130897522, "learning_rate": 0.002, "loss": 2.3724, "step": 28140 }, { "epoch": 0.1088200275239288, "grad_norm": 0.11724784970283508, "learning_rate": 0.002, "loss": 2.3816, "step": 28150 }, { "epoch": 0.10885868472731208, "grad_norm": 0.12491951882839203, "learning_rate": 0.002, "loss": 2.3857, "step": 28160 }, { "epoch": 0.10889734193069536, "grad_norm": 0.11169461160898209, "learning_rate": 0.002, "loss": 2.3626, "step": 28170 }, { "epoch": 0.10893599913407864, "grad_norm": 0.11813009530305862, "learning_rate": 0.002, "loss": 2.3685, "step": 28180 }, { "epoch": 0.10897465633746192, "grad_norm": 0.09784800559282303, "learning_rate": 0.002, "loss": 2.3777, "step": 28190 }, { "epoch": 0.1090133135408452, "grad_norm": 0.12332207709550858, "learning_rate": 0.002, "loss": 2.3732, "step": 28200 }, { "epoch": 0.10905197074422848, "grad_norm": 0.09766773879528046, "learning_rate": 0.002, "loss": 2.3737, "step": 28210 }, { "epoch": 0.10909062794761176, "grad_norm": 0.09964337944984436, "learning_rate": 0.002, "loss": 2.3779, "step": 28220 }, { "epoch": 0.10912928515099504, "grad_norm": 0.09515334665775299, "learning_rate": 0.002, "loss": 2.3783, "step": 28230 }, { "epoch": 0.10916794235437831, "grad_norm": 0.10735496878623962, "learning_rate": 0.002, "loss": 2.3592, "step": 28240 }, { "epoch": 0.10920659955776159, "grad_norm": 0.12152384966611862, "learning_rate": 0.002, "loss": 2.3757, "step": 28250 }, { "epoch": 0.10924525676114487, "grad_norm": 0.1107824295759201, "learning_rate": 0.002, "loss": 2.3595, "step": 28260 }, { "epoch": 0.10928391396452815, "grad_norm": 0.13151098787784576, "learning_rate": 0.002, "loss": 2.3709, "step": 28270 }, { "epoch": 0.10932257116791143, "grad_norm": 0.11157234758138657, "learning_rate": 0.002, "loss": 2.3699, "step": 28280 }, { "epoch": 0.10936122837129471, "grad_norm": 0.10893750935792923, "learning_rate": 0.002, "loss": 2.3833, "step": 28290 }, { "epoch": 0.10939988557467799, "grad_norm": 0.10100287944078445, "learning_rate": 0.002, "loss": 2.3668, "step": 28300 }, { "epoch": 0.10943854277806127, "grad_norm": 0.15186738967895508, "learning_rate": 0.002, "loss": 2.3677, "step": 28310 }, { "epoch": 0.10947719998144455, "grad_norm": 0.09146854281425476, "learning_rate": 0.002, "loss": 2.3659, "step": 28320 }, { "epoch": 0.10951585718482781, "grad_norm": 0.1130962148308754, "learning_rate": 0.002, "loss": 2.3714, "step": 28330 }, { "epoch": 0.1095545143882111, "grad_norm": 0.1127144992351532, "learning_rate": 0.002, "loss": 2.3824, "step": 28340 }, { "epoch": 0.10959317159159437, "grad_norm": 0.12409462779760361, "learning_rate": 0.002, "loss": 2.3826, "step": 28350 }, { "epoch": 0.10963182879497765, "grad_norm": 0.13971257209777832, "learning_rate": 0.002, "loss": 2.3624, "step": 28360 }, { "epoch": 0.10967048599836093, "grad_norm": 0.11502938717603683, "learning_rate": 0.002, "loss": 2.3625, "step": 28370 }, { "epoch": 0.10970914320174421, "grad_norm": 0.12327464669942856, "learning_rate": 0.002, "loss": 2.3735, "step": 28380 }, { "epoch": 0.1097478004051275, "grad_norm": 0.10914456099271774, "learning_rate": 0.002, "loss": 2.3758, "step": 28390 }, { "epoch": 0.10978645760851077, "grad_norm": 0.1268835812807083, "learning_rate": 0.002, "loss": 2.3665, "step": 28400 }, { "epoch": 0.10982511481189405, "grad_norm": 0.11616487801074982, "learning_rate": 0.002, "loss": 2.3611, "step": 28410 }, { "epoch": 0.10986377201527733, "grad_norm": 0.11308058351278305, "learning_rate": 0.002, "loss": 2.3819, "step": 28420 }, { "epoch": 0.1099024292186606, "grad_norm": 0.13024914264678955, "learning_rate": 0.002, "loss": 2.3806, "step": 28430 }, { "epoch": 0.10994108642204388, "grad_norm": 0.11701653152704239, "learning_rate": 0.002, "loss": 2.391, "step": 28440 }, { "epoch": 0.10997974362542716, "grad_norm": 0.1038404330611229, "learning_rate": 0.002, "loss": 2.3622, "step": 28450 }, { "epoch": 0.11001840082881044, "grad_norm": 0.11018650978803635, "learning_rate": 0.002, "loss": 2.3649, "step": 28460 }, { "epoch": 0.11005705803219372, "grad_norm": 0.09936435520648956, "learning_rate": 0.002, "loss": 2.3759, "step": 28470 }, { "epoch": 0.110095715235577, "grad_norm": 0.09147805720567703, "learning_rate": 0.002, "loss": 2.3538, "step": 28480 }, { "epoch": 0.11013437243896028, "grad_norm": 0.11586008965969086, "learning_rate": 0.002, "loss": 2.3748, "step": 28490 }, { "epoch": 0.11017302964234356, "grad_norm": 0.10951730608940125, "learning_rate": 0.002, "loss": 2.3692, "step": 28500 }, { "epoch": 0.11021168684572684, "grad_norm": 0.12234780192375183, "learning_rate": 0.002, "loss": 2.3688, "step": 28510 }, { "epoch": 0.1102503440491101, "grad_norm": 0.11470252275466919, "learning_rate": 0.002, "loss": 2.3524, "step": 28520 }, { "epoch": 0.11028900125249338, "grad_norm": 0.1343831568956375, "learning_rate": 0.002, "loss": 2.3639, "step": 28530 }, { "epoch": 0.11032765845587666, "grad_norm": 0.12148990482091904, "learning_rate": 0.002, "loss": 2.3749, "step": 28540 }, { "epoch": 0.11036631565925994, "grad_norm": 0.09863020479679108, "learning_rate": 0.002, "loss": 2.3836, "step": 28550 }, { "epoch": 0.11040497286264322, "grad_norm": 0.18454931676387787, "learning_rate": 0.002, "loss": 2.3672, "step": 28560 }, { "epoch": 0.1104436300660265, "grad_norm": 0.1118006780743599, "learning_rate": 0.002, "loss": 2.3625, "step": 28570 }, { "epoch": 0.11048228726940978, "grad_norm": 0.09556985646486282, "learning_rate": 0.002, "loss": 2.3625, "step": 28580 }, { "epoch": 0.11052094447279306, "grad_norm": 0.09216835349798203, "learning_rate": 0.002, "loss": 2.3657, "step": 28590 }, { "epoch": 0.11055960167617634, "grad_norm": 0.11700079590082169, "learning_rate": 0.002, "loss": 2.3715, "step": 28600 }, { "epoch": 0.11059825887955961, "grad_norm": 0.11173650622367859, "learning_rate": 0.002, "loss": 2.3687, "step": 28610 }, { "epoch": 0.11063691608294289, "grad_norm": 0.1257830411195755, "learning_rate": 0.002, "loss": 2.3783, "step": 28620 }, { "epoch": 0.11067557328632617, "grad_norm": 0.1310737282037735, "learning_rate": 0.002, "loss": 2.3769, "step": 28630 }, { "epoch": 0.11071423048970945, "grad_norm": 0.12189605087041855, "learning_rate": 0.002, "loss": 2.3852, "step": 28640 }, { "epoch": 0.11075288769309273, "grad_norm": 0.10539402067661285, "learning_rate": 0.002, "loss": 2.3751, "step": 28650 }, { "epoch": 0.11079154489647601, "grad_norm": 0.11254836618900299, "learning_rate": 0.002, "loss": 2.3799, "step": 28660 }, { "epoch": 0.11083020209985929, "grad_norm": 0.11399146914482117, "learning_rate": 0.002, "loss": 2.3758, "step": 28670 }, { "epoch": 0.11086885930324257, "grad_norm": 0.10469246655702591, "learning_rate": 0.002, "loss": 2.3783, "step": 28680 }, { "epoch": 0.11090751650662585, "grad_norm": 0.11052166670560837, "learning_rate": 0.002, "loss": 2.3847, "step": 28690 }, { "epoch": 0.11094617371000913, "grad_norm": 0.10565482825040817, "learning_rate": 0.002, "loss": 2.3679, "step": 28700 }, { "epoch": 0.1109848309133924, "grad_norm": 0.09518107026815414, "learning_rate": 0.002, "loss": 2.3739, "step": 28710 }, { "epoch": 0.11102348811677568, "grad_norm": 0.10598381608724594, "learning_rate": 0.002, "loss": 2.3738, "step": 28720 }, { "epoch": 0.11106214532015896, "grad_norm": 0.095307856798172, "learning_rate": 0.002, "loss": 2.3712, "step": 28730 }, { "epoch": 0.11110080252354224, "grad_norm": 0.12068048864603043, "learning_rate": 0.002, "loss": 2.3752, "step": 28740 }, { "epoch": 0.11113945972692552, "grad_norm": 0.11202985793352127, "learning_rate": 0.002, "loss": 2.3873, "step": 28750 }, { "epoch": 0.1111781169303088, "grad_norm": 0.09940610826015472, "learning_rate": 0.002, "loss": 2.367, "step": 28760 }, { "epoch": 0.11121677413369208, "grad_norm": 0.10500725358724594, "learning_rate": 0.002, "loss": 2.379, "step": 28770 }, { "epoch": 0.11125543133707536, "grad_norm": 0.1390557736158371, "learning_rate": 0.002, "loss": 2.3724, "step": 28780 }, { "epoch": 0.11129408854045864, "grad_norm": 0.11617904156446457, "learning_rate": 0.002, "loss": 2.3693, "step": 28790 }, { "epoch": 0.1113327457438419, "grad_norm": 0.1266857385635376, "learning_rate": 0.002, "loss": 2.3707, "step": 28800 }, { "epoch": 0.11137140294722518, "grad_norm": 0.1094893291592598, "learning_rate": 0.002, "loss": 2.364, "step": 28810 }, { "epoch": 0.11141006015060846, "grad_norm": 0.10328162461519241, "learning_rate": 0.002, "loss": 2.3657, "step": 28820 }, { "epoch": 0.11144871735399174, "grad_norm": 0.13057942688465118, "learning_rate": 0.002, "loss": 2.3837, "step": 28830 }, { "epoch": 0.11148737455737502, "grad_norm": 0.1053711324930191, "learning_rate": 0.002, "loss": 2.3861, "step": 28840 }, { "epoch": 0.1115260317607583, "grad_norm": 0.10615862160921097, "learning_rate": 0.002, "loss": 2.3635, "step": 28850 }, { "epoch": 0.11156468896414158, "grad_norm": 0.11354444175958633, "learning_rate": 0.002, "loss": 2.373, "step": 28860 }, { "epoch": 0.11160334616752486, "grad_norm": 0.10664892941713333, "learning_rate": 0.002, "loss": 2.391, "step": 28870 }, { "epoch": 0.11164200337090814, "grad_norm": 0.12627890706062317, "learning_rate": 0.002, "loss": 2.3647, "step": 28880 }, { "epoch": 0.11168066057429141, "grad_norm": 0.2308432012796402, "learning_rate": 0.002, "loss": 2.3773, "step": 28890 }, { "epoch": 0.11171931777767469, "grad_norm": 0.14682862162590027, "learning_rate": 0.002, "loss": 2.391, "step": 28900 }, { "epoch": 0.11175797498105797, "grad_norm": 0.10037046670913696, "learning_rate": 0.002, "loss": 2.3671, "step": 28910 }, { "epoch": 0.11179663218444125, "grad_norm": 0.11632565408945084, "learning_rate": 0.002, "loss": 2.3667, "step": 28920 }, { "epoch": 0.11183528938782453, "grad_norm": 0.12636606395244598, "learning_rate": 0.002, "loss": 2.3518, "step": 28930 }, { "epoch": 0.1118739465912078, "grad_norm": 0.147725448012352, "learning_rate": 0.002, "loss": 2.367, "step": 28940 }, { "epoch": 0.11191260379459109, "grad_norm": 0.11857064813375473, "learning_rate": 0.002, "loss": 2.3793, "step": 28950 }, { "epoch": 0.11195126099797437, "grad_norm": 0.11522777378559113, "learning_rate": 0.002, "loss": 2.3655, "step": 28960 }, { "epoch": 0.11198991820135765, "grad_norm": 0.12320879846811295, "learning_rate": 0.002, "loss": 2.3909, "step": 28970 }, { "epoch": 0.11202857540474091, "grad_norm": 0.11825113743543625, "learning_rate": 0.002, "loss": 2.3854, "step": 28980 }, { "epoch": 0.11206723260812419, "grad_norm": 0.11741521209478378, "learning_rate": 0.002, "loss": 2.3741, "step": 28990 }, { "epoch": 0.11210588981150747, "grad_norm": 0.10914985835552216, "learning_rate": 0.002, "loss": 2.365, "step": 29000 }, { "epoch": 0.11214454701489075, "grad_norm": 0.10588382929563522, "learning_rate": 0.002, "loss": 2.3745, "step": 29010 }, { "epoch": 0.11218320421827403, "grad_norm": 0.14614279568195343, "learning_rate": 0.002, "loss": 2.3819, "step": 29020 }, { "epoch": 0.11222186142165731, "grad_norm": 0.1200341135263443, "learning_rate": 0.002, "loss": 2.3711, "step": 29030 }, { "epoch": 0.11226051862504059, "grad_norm": 0.11353737115859985, "learning_rate": 0.002, "loss": 2.3842, "step": 29040 }, { "epoch": 0.11229917582842387, "grad_norm": 0.10931562632322311, "learning_rate": 0.002, "loss": 2.3774, "step": 29050 }, { "epoch": 0.11233783303180715, "grad_norm": 0.09870153665542603, "learning_rate": 0.002, "loss": 2.3828, "step": 29060 }, { "epoch": 0.11237649023519043, "grad_norm": 0.10087848454713821, "learning_rate": 0.002, "loss": 2.366, "step": 29070 }, { "epoch": 0.1124151474385737, "grad_norm": 0.14312563836574554, "learning_rate": 0.002, "loss": 2.3698, "step": 29080 }, { "epoch": 0.11245380464195698, "grad_norm": 0.10859497636556625, "learning_rate": 0.002, "loss": 2.3734, "step": 29090 }, { "epoch": 0.11249246184534026, "grad_norm": 0.1190008744597435, "learning_rate": 0.002, "loss": 2.3612, "step": 29100 }, { "epoch": 0.11253111904872354, "grad_norm": 0.10914407670497894, "learning_rate": 0.002, "loss": 2.3763, "step": 29110 }, { "epoch": 0.11256977625210682, "grad_norm": 0.12408732622861862, "learning_rate": 0.002, "loss": 2.3581, "step": 29120 }, { "epoch": 0.1126084334554901, "grad_norm": 0.09813961386680603, "learning_rate": 0.002, "loss": 2.3473, "step": 29130 }, { "epoch": 0.11264709065887338, "grad_norm": 0.13027751445770264, "learning_rate": 0.002, "loss": 2.3779, "step": 29140 }, { "epoch": 0.11268574786225666, "grad_norm": 0.10990479588508606, "learning_rate": 0.002, "loss": 2.3887, "step": 29150 }, { "epoch": 0.11272440506563994, "grad_norm": 0.11599687486886978, "learning_rate": 0.002, "loss": 2.3759, "step": 29160 }, { "epoch": 0.1127630622690232, "grad_norm": 0.1316378116607666, "learning_rate": 0.002, "loss": 2.371, "step": 29170 }, { "epoch": 0.11280171947240648, "grad_norm": 0.1244787871837616, "learning_rate": 0.002, "loss": 2.3822, "step": 29180 }, { "epoch": 0.11284037667578976, "grad_norm": 0.1331619769334793, "learning_rate": 0.002, "loss": 2.3719, "step": 29190 }, { "epoch": 0.11287903387917304, "grad_norm": 0.09210868179798126, "learning_rate": 0.002, "loss": 2.3634, "step": 29200 }, { "epoch": 0.11291769108255632, "grad_norm": 0.5537592172622681, "learning_rate": 0.002, "loss": 2.3652, "step": 29210 }, { "epoch": 0.1129563482859396, "grad_norm": 0.10940191894769669, "learning_rate": 0.002, "loss": 2.3703, "step": 29220 }, { "epoch": 0.11299500548932288, "grad_norm": 0.11873272061347961, "learning_rate": 0.002, "loss": 2.3884, "step": 29230 }, { "epoch": 0.11303366269270616, "grad_norm": 0.12138811498880386, "learning_rate": 0.002, "loss": 2.3839, "step": 29240 }, { "epoch": 0.11307231989608944, "grad_norm": 0.1103893592953682, "learning_rate": 0.002, "loss": 2.3853, "step": 29250 }, { "epoch": 0.11311097709947271, "grad_norm": 0.11036432534456253, "learning_rate": 0.002, "loss": 2.3735, "step": 29260 }, { "epoch": 0.11314963430285599, "grad_norm": 0.2721116244792938, "learning_rate": 0.002, "loss": 2.383, "step": 29270 }, { "epoch": 0.11318829150623927, "grad_norm": 0.1114020049571991, "learning_rate": 0.002, "loss": 2.3765, "step": 29280 }, { "epoch": 0.11322694870962255, "grad_norm": 0.11401335150003433, "learning_rate": 0.002, "loss": 2.3698, "step": 29290 }, { "epoch": 0.11326560591300583, "grad_norm": 0.1049218401312828, "learning_rate": 0.002, "loss": 2.3682, "step": 29300 }, { "epoch": 0.11330426311638911, "grad_norm": 0.16691258549690247, "learning_rate": 0.002, "loss": 2.3514, "step": 29310 }, { "epoch": 0.11334292031977239, "grad_norm": 0.11298374086618423, "learning_rate": 0.002, "loss": 2.3749, "step": 29320 }, { "epoch": 0.11338157752315567, "grad_norm": 0.09407994896173477, "learning_rate": 0.002, "loss": 2.3644, "step": 29330 }, { "epoch": 0.11342023472653895, "grad_norm": 0.12208922952413559, "learning_rate": 0.002, "loss": 2.361, "step": 29340 }, { "epoch": 0.11345889192992221, "grad_norm": 0.1252242773771286, "learning_rate": 0.002, "loss": 2.3775, "step": 29350 }, { "epoch": 0.1134975491333055, "grad_norm": 0.11096751689910889, "learning_rate": 0.002, "loss": 2.3639, "step": 29360 }, { "epoch": 0.11353620633668877, "grad_norm": 0.10881470143795013, "learning_rate": 0.002, "loss": 2.3857, "step": 29370 }, { "epoch": 0.11357486354007205, "grad_norm": 0.1198098361492157, "learning_rate": 0.002, "loss": 2.3748, "step": 29380 }, { "epoch": 0.11361352074345533, "grad_norm": 0.11969012767076492, "learning_rate": 0.002, "loss": 2.3788, "step": 29390 }, { "epoch": 0.11365217794683861, "grad_norm": 0.11216343939304352, "learning_rate": 0.002, "loss": 2.3811, "step": 29400 }, { "epoch": 0.1136908351502219, "grad_norm": 0.09968668967485428, "learning_rate": 0.002, "loss": 2.3849, "step": 29410 }, { "epoch": 0.11372949235360517, "grad_norm": 0.11205536127090454, "learning_rate": 0.002, "loss": 2.3633, "step": 29420 }, { "epoch": 0.11376814955698845, "grad_norm": 0.12463561445474625, "learning_rate": 0.002, "loss": 2.3619, "step": 29430 }, { "epoch": 0.11380680676037173, "grad_norm": 0.10464975237846375, "learning_rate": 0.002, "loss": 2.3892, "step": 29440 }, { "epoch": 0.113845463963755, "grad_norm": 0.109347403049469, "learning_rate": 0.002, "loss": 2.3722, "step": 29450 }, { "epoch": 0.11388412116713828, "grad_norm": 0.11343058943748474, "learning_rate": 0.002, "loss": 2.373, "step": 29460 }, { "epoch": 0.11392277837052156, "grad_norm": 0.1124456450343132, "learning_rate": 0.002, "loss": 2.3769, "step": 29470 }, { "epoch": 0.11396143557390484, "grad_norm": 0.10418668389320374, "learning_rate": 0.002, "loss": 2.375, "step": 29480 }, { "epoch": 0.11400009277728812, "grad_norm": 0.10243808478116989, "learning_rate": 0.002, "loss": 2.3646, "step": 29490 }, { "epoch": 0.1140387499806714, "grad_norm": 0.160277858376503, "learning_rate": 0.002, "loss": 2.3671, "step": 29500 }, { "epoch": 0.11407740718405468, "grad_norm": 0.10653091967105865, "learning_rate": 0.002, "loss": 2.3765, "step": 29510 }, { "epoch": 0.11411606438743796, "grad_norm": 0.19174635410308838, "learning_rate": 0.002, "loss": 2.3828, "step": 29520 }, { "epoch": 0.11415472159082124, "grad_norm": 0.11096177250146866, "learning_rate": 0.002, "loss": 2.387, "step": 29530 }, { "epoch": 0.1141933787942045, "grad_norm": 0.12363119423389435, "learning_rate": 0.002, "loss": 2.3764, "step": 29540 }, { "epoch": 0.11423203599758779, "grad_norm": 0.10024431347846985, "learning_rate": 0.002, "loss": 2.3643, "step": 29550 }, { "epoch": 0.11427069320097107, "grad_norm": 0.10744503140449524, "learning_rate": 0.002, "loss": 2.3682, "step": 29560 }, { "epoch": 0.11430935040435435, "grad_norm": 0.1058807298541069, "learning_rate": 0.002, "loss": 2.3808, "step": 29570 }, { "epoch": 0.11434800760773763, "grad_norm": 0.12204091995954514, "learning_rate": 0.002, "loss": 2.3799, "step": 29580 }, { "epoch": 0.1143866648111209, "grad_norm": 0.10262423008680344, "learning_rate": 0.002, "loss": 2.3861, "step": 29590 }, { "epoch": 0.11442532201450419, "grad_norm": 0.12699533998966217, "learning_rate": 0.002, "loss": 2.3767, "step": 29600 }, { "epoch": 0.11446397921788747, "grad_norm": 0.13344010710716248, "learning_rate": 0.002, "loss": 2.382, "step": 29610 }, { "epoch": 0.11450263642127075, "grad_norm": 0.12079144269227982, "learning_rate": 0.002, "loss": 2.37, "step": 29620 }, { "epoch": 0.11454129362465401, "grad_norm": 0.11895615607500076, "learning_rate": 0.002, "loss": 2.3792, "step": 29630 }, { "epoch": 0.11457995082803729, "grad_norm": 0.1387789100408554, "learning_rate": 0.002, "loss": 2.3699, "step": 29640 }, { "epoch": 0.11461860803142057, "grad_norm": 0.10361293703317642, "learning_rate": 0.002, "loss": 2.3672, "step": 29650 }, { "epoch": 0.11465726523480385, "grad_norm": 0.11020094901323318, "learning_rate": 0.002, "loss": 2.3903, "step": 29660 }, { "epoch": 0.11469592243818713, "grad_norm": 0.09268821775913239, "learning_rate": 0.002, "loss": 2.3756, "step": 29670 }, { "epoch": 0.11473457964157041, "grad_norm": 0.1092313826084137, "learning_rate": 0.002, "loss": 2.3786, "step": 29680 }, { "epoch": 0.11477323684495369, "grad_norm": 0.09730253368616104, "learning_rate": 0.002, "loss": 2.3659, "step": 29690 }, { "epoch": 0.11481189404833697, "grad_norm": 0.12316158413887024, "learning_rate": 0.002, "loss": 2.3865, "step": 29700 }, { "epoch": 0.11485055125172025, "grad_norm": 0.1004888042807579, "learning_rate": 0.002, "loss": 2.3849, "step": 29710 }, { "epoch": 0.11488920845510353, "grad_norm": 0.12836909294128418, "learning_rate": 0.002, "loss": 2.3858, "step": 29720 }, { "epoch": 0.1149278656584868, "grad_norm": 0.10969601571559906, "learning_rate": 0.002, "loss": 2.37, "step": 29730 }, { "epoch": 0.11496652286187008, "grad_norm": 0.11120554059743881, "learning_rate": 0.002, "loss": 2.3799, "step": 29740 }, { "epoch": 0.11500518006525336, "grad_norm": 0.10580164194107056, "learning_rate": 0.002, "loss": 2.378, "step": 29750 }, { "epoch": 0.11504383726863664, "grad_norm": 0.11593388020992279, "learning_rate": 0.002, "loss": 2.3674, "step": 29760 }, { "epoch": 0.11508249447201992, "grad_norm": 0.11328724771738052, "learning_rate": 0.002, "loss": 2.3909, "step": 29770 }, { "epoch": 0.1151211516754032, "grad_norm": 0.10201551020145416, "learning_rate": 0.002, "loss": 2.3538, "step": 29780 }, { "epoch": 0.11515980887878648, "grad_norm": 0.1188369020819664, "learning_rate": 0.002, "loss": 2.3777, "step": 29790 }, { "epoch": 0.11519846608216976, "grad_norm": 0.12928543984889984, "learning_rate": 0.002, "loss": 2.3662, "step": 29800 }, { "epoch": 0.11523712328555304, "grad_norm": 0.10411644726991653, "learning_rate": 0.002, "loss": 2.3724, "step": 29810 }, { "epoch": 0.1152757804889363, "grad_norm": 0.10747519880533218, "learning_rate": 0.002, "loss": 2.3724, "step": 29820 }, { "epoch": 0.11531443769231958, "grad_norm": 0.10192860662937164, "learning_rate": 0.002, "loss": 2.3489, "step": 29830 }, { "epoch": 0.11535309489570286, "grad_norm": 0.10935201495885849, "learning_rate": 0.002, "loss": 2.3714, "step": 29840 }, { "epoch": 0.11539175209908614, "grad_norm": 0.12911826372146606, "learning_rate": 0.002, "loss": 2.364, "step": 29850 }, { "epoch": 0.11543040930246942, "grad_norm": 0.11535267531871796, "learning_rate": 0.002, "loss": 2.3783, "step": 29860 }, { "epoch": 0.1154690665058527, "grad_norm": 0.11060041189193726, "learning_rate": 0.002, "loss": 2.3789, "step": 29870 }, { "epoch": 0.11550772370923598, "grad_norm": 0.10553035140037537, "learning_rate": 0.002, "loss": 2.3614, "step": 29880 }, { "epoch": 0.11554638091261926, "grad_norm": 0.11950163543224335, "learning_rate": 0.002, "loss": 2.3666, "step": 29890 }, { "epoch": 0.11558503811600254, "grad_norm": 0.13742852210998535, "learning_rate": 0.002, "loss": 2.3869, "step": 29900 }, { "epoch": 0.11562369531938581, "grad_norm": 0.10131851583719254, "learning_rate": 0.002, "loss": 2.3744, "step": 29910 }, { "epoch": 0.11566235252276909, "grad_norm": 0.10164907574653625, "learning_rate": 0.002, "loss": 2.3762, "step": 29920 }, { "epoch": 0.11570100972615237, "grad_norm": 0.1058017909526825, "learning_rate": 0.002, "loss": 2.3724, "step": 29930 }, { "epoch": 0.11573966692953565, "grad_norm": 0.13190488517284393, "learning_rate": 0.002, "loss": 2.3788, "step": 29940 }, { "epoch": 0.11577832413291893, "grad_norm": 0.09957809001207352, "learning_rate": 0.002, "loss": 2.3692, "step": 29950 }, { "epoch": 0.11581698133630221, "grad_norm": 0.1056378036737442, "learning_rate": 0.002, "loss": 2.3866, "step": 29960 }, { "epoch": 0.11585563853968549, "grad_norm": 0.10000760108232498, "learning_rate": 0.002, "loss": 2.362, "step": 29970 }, { "epoch": 0.11589429574306877, "grad_norm": 0.10699297487735748, "learning_rate": 0.002, "loss": 2.3649, "step": 29980 }, { "epoch": 0.11593295294645205, "grad_norm": 0.09690152853727341, "learning_rate": 0.002, "loss": 2.3894, "step": 29990 }, { "epoch": 0.11597161014983531, "grad_norm": 0.10324429720640182, "learning_rate": 0.002, "loss": 2.3722, "step": 30000 }, { "epoch": 0.1160102673532186, "grad_norm": 0.10979462414979935, "learning_rate": 0.002, "loss": 2.374, "step": 30010 }, { "epoch": 0.11604892455660187, "grad_norm": 0.10407369583845139, "learning_rate": 0.002, "loss": 2.379, "step": 30020 }, { "epoch": 0.11608758175998515, "grad_norm": 0.10763532668352127, "learning_rate": 0.002, "loss": 2.3733, "step": 30030 }, { "epoch": 0.11612623896336843, "grad_norm": 0.11464933305978775, "learning_rate": 0.002, "loss": 2.3666, "step": 30040 }, { "epoch": 0.11616489616675171, "grad_norm": 0.14590147137641907, "learning_rate": 0.002, "loss": 2.3681, "step": 30050 }, { "epoch": 0.116203553370135, "grad_norm": 0.1058560386300087, "learning_rate": 0.002, "loss": 2.3719, "step": 30060 }, { "epoch": 0.11624221057351827, "grad_norm": 0.11187131702899933, "learning_rate": 0.002, "loss": 2.3648, "step": 30070 }, { "epoch": 0.11628086777690155, "grad_norm": 0.11111967265605927, "learning_rate": 0.002, "loss": 2.357, "step": 30080 }, { "epoch": 0.11631952498028483, "grad_norm": 0.10049404203891754, "learning_rate": 0.002, "loss": 2.3713, "step": 30090 }, { "epoch": 0.1163581821836681, "grad_norm": 0.15781576931476593, "learning_rate": 0.002, "loss": 2.3811, "step": 30100 }, { "epoch": 0.11639683938705138, "grad_norm": 0.13108819723129272, "learning_rate": 0.002, "loss": 2.3682, "step": 30110 }, { "epoch": 0.11643549659043466, "grad_norm": 0.10871430486440659, "learning_rate": 0.002, "loss": 2.3734, "step": 30120 }, { "epoch": 0.11647415379381794, "grad_norm": 0.1290903091430664, "learning_rate": 0.002, "loss": 2.3918, "step": 30130 }, { "epoch": 0.11651281099720122, "grad_norm": 0.11009565740823746, "learning_rate": 0.002, "loss": 2.3595, "step": 30140 }, { "epoch": 0.1165514682005845, "grad_norm": 0.10931471735239029, "learning_rate": 0.002, "loss": 2.3632, "step": 30150 }, { "epoch": 0.11659012540396778, "grad_norm": 0.09931996464729309, "learning_rate": 0.002, "loss": 2.3723, "step": 30160 }, { "epoch": 0.11662878260735106, "grad_norm": 0.1256752461194992, "learning_rate": 0.002, "loss": 2.3699, "step": 30170 }, { "epoch": 0.11666743981073434, "grad_norm": 0.11275876313447952, "learning_rate": 0.002, "loss": 2.3671, "step": 30180 }, { "epoch": 0.1167060970141176, "grad_norm": 0.10061401128768921, "learning_rate": 0.002, "loss": 2.3638, "step": 30190 }, { "epoch": 0.11674475421750088, "grad_norm": 0.09852159768342972, "learning_rate": 0.002, "loss": 2.3675, "step": 30200 }, { "epoch": 0.11678341142088416, "grad_norm": 0.11242441833019257, "learning_rate": 0.002, "loss": 2.3714, "step": 30210 }, { "epoch": 0.11682206862426744, "grad_norm": 0.10587003827095032, "learning_rate": 0.002, "loss": 2.3672, "step": 30220 }, { "epoch": 0.11686072582765072, "grad_norm": 0.10544271022081375, "learning_rate": 0.002, "loss": 2.3748, "step": 30230 }, { "epoch": 0.116899383031034, "grad_norm": 0.09737107902765274, "learning_rate": 0.002, "loss": 2.3804, "step": 30240 }, { "epoch": 0.11693804023441728, "grad_norm": 0.1411416381597519, "learning_rate": 0.002, "loss": 2.3772, "step": 30250 }, { "epoch": 0.11697669743780056, "grad_norm": 0.10318762063980103, "learning_rate": 0.002, "loss": 2.3812, "step": 30260 }, { "epoch": 0.11701535464118384, "grad_norm": 0.12529000639915466, "learning_rate": 0.002, "loss": 2.3734, "step": 30270 }, { "epoch": 0.11705401184456711, "grad_norm": 0.10274723917245865, "learning_rate": 0.002, "loss": 2.3644, "step": 30280 }, { "epoch": 0.11709266904795039, "grad_norm": 0.1322697550058365, "learning_rate": 0.002, "loss": 2.3847, "step": 30290 }, { "epoch": 0.11713132625133367, "grad_norm": 0.11907245963811874, "learning_rate": 0.002, "loss": 2.3704, "step": 30300 }, { "epoch": 0.11716998345471695, "grad_norm": 0.13744261860847473, "learning_rate": 0.002, "loss": 2.3672, "step": 30310 }, { "epoch": 0.11720864065810023, "grad_norm": 0.09685148298740387, "learning_rate": 0.002, "loss": 2.3781, "step": 30320 }, { "epoch": 0.11724729786148351, "grad_norm": 0.11613011360168457, "learning_rate": 0.002, "loss": 2.3809, "step": 30330 }, { "epoch": 0.11728595506486679, "grad_norm": 0.11653503775596619, "learning_rate": 0.002, "loss": 2.3803, "step": 30340 }, { "epoch": 0.11732461226825007, "grad_norm": 0.10787040740251541, "learning_rate": 0.002, "loss": 2.3546, "step": 30350 }, { "epoch": 0.11736326947163335, "grad_norm": 0.10704168677330017, "learning_rate": 0.002, "loss": 2.3786, "step": 30360 }, { "epoch": 0.11740192667501662, "grad_norm": 0.12299351394176483, "learning_rate": 0.002, "loss": 2.3737, "step": 30370 }, { "epoch": 0.1174405838783999, "grad_norm": 0.13366322219371796, "learning_rate": 0.002, "loss": 2.3872, "step": 30380 }, { "epoch": 0.11747924108178318, "grad_norm": 0.11831143498420715, "learning_rate": 0.002, "loss": 2.37, "step": 30390 }, { "epoch": 0.11751789828516646, "grad_norm": 0.11491730809211731, "learning_rate": 0.002, "loss": 2.3682, "step": 30400 }, { "epoch": 0.11755655548854974, "grad_norm": 0.1049700379371643, "learning_rate": 0.002, "loss": 2.3722, "step": 30410 }, { "epoch": 0.11759521269193302, "grad_norm": 0.10995157808065414, "learning_rate": 0.002, "loss": 2.3756, "step": 30420 }, { "epoch": 0.1176338698953163, "grad_norm": 0.09854548424482346, "learning_rate": 0.002, "loss": 2.3676, "step": 30430 }, { "epoch": 0.11767252709869958, "grad_norm": 0.12066702544689178, "learning_rate": 0.002, "loss": 2.3707, "step": 30440 }, { "epoch": 0.11771118430208286, "grad_norm": 0.11020725965499878, "learning_rate": 0.002, "loss": 2.3864, "step": 30450 }, { "epoch": 0.11774984150546614, "grad_norm": 0.09980931878089905, "learning_rate": 0.002, "loss": 2.3581, "step": 30460 }, { "epoch": 0.1177884987088494, "grad_norm": 0.13707521557807922, "learning_rate": 0.002, "loss": 2.3736, "step": 30470 }, { "epoch": 0.11782715591223268, "grad_norm": 0.11138369143009186, "learning_rate": 0.002, "loss": 2.3627, "step": 30480 }, { "epoch": 0.11786581311561596, "grad_norm": 0.10805569589138031, "learning_rate": 0.002, "loss": 2.3759, "step": 30490 }, { "epoch": 0.11790447031899924, "grad_norm": 0.1061464250087738, "learning_rate": 0.002, "loss": 2.3726, "step": 30500 }, { "epoch": 0.11794312752238252, "grad_norm": 0.0994805321097374, "learning_rate": 0.002, "loss": 2.3755, "step": 30510 }, { "epoch": 0.1179817847257658, "grad_norm": 0.3877389430999756, "learning_rate": 0.002, "loss": 2.3762, "step": 30520 }, { "epoch": 0.11802044192914908, "grad_norm": 0.101154625415802, "learning_rate": 0.002, "loss": 2.3767, "step": 30530 }, { "epoch": 0.11805909913253236, "grad_norm": 0.12650534510612488, "learning_rate": 0.002, "loss": 2.3865, "step": 30540 }, { "epoch": 0.11809775633591564, "grad_norm": 0.1112612634897232, "learning_rate": 0.002, "loss": 2.3799, "step": 30550 }, { "epoch": 0.1181364135392989, "grad_norm": 0.10789719223976135, "learning_rate": 0.002, "loss": 2.3826, "step": 30560 }, { "epoch": 0.11817507074268219, "grad_norm": 0.11468793451786041, "learning_rate": 0.002, "loss": 2.3746, "step": 30570 }, { "epoch": 0.11821372794606547, "grad_norm": 0.1100701093673706, "learning_rate": 0.002, "loss": 2.3493, "step": 30580 }, { "epoch": 0.11825238514944875, "grad_norm": 0.09143993258476257, "learning_rate": 0.002, "loss": 2.3703, "step": 30590 }, { "epoch": 0.11829104235283203, "grad_norm": 0.10562839359045029, "learning_rate": 0.002, "loss": 2.3901, "step": 30600 }, { "epoch": 0.1183296995562153, "grad_norm": 0.10740886628627777, "learning_rate": 0.002, "loss": 2.3798, "step": 30610 }, { "epoch": 0.11836835675959859, "grad_norm": 0.15221528708934784, "learning_rate": 0.002, "loss": 2.3636, "step": 30620 }, { "epoch": 0.11840701396298187, "grad_norm": 0.11569618433713913, "learning_rate": 0.002, "loss": 2.3588, "step": 30630 }, { "epoch": 0.11844567116636515, "grad_norm": 0.10287598520517349, "learning_rate": 0.002, "loss": 2.3679, "step": 30640 }, { "epoch": 0.11848432836974841, "grad_norm": 0.10523258149623871, "learning_rate": 0.002, "loss": 2.3692, "step": 30650 }, { "epoch": 0.11852298557313169, "grad_norm": 0.11168524622917175, "learning_rate": 0.002, "loss": 2.3702, "step": 30660 }, { "epoch": 0.11856164277651497, "grad_norm": 0.10336096584796906, "learning_rate": 0.002, "loss": 2.353, "step": 30670 }, { "epoch": 0.11860029997989825, "grad_norm": 0.10352528840303421, "learning_rate": 0.002, "loss": 2.3604, "step": 30680 }, { "epoch": 0.11863895718328153, "grad_norm": 0.14516034722328186, "learning_rate": 0.002, "loss": 2.3588, "step": 30690 }, { "epoch": 0.11867761438666481, "grad_norm": 0.10678558051586151, "learning_rate": 0.002, "loss": 2.3729, "step": 30700 }, { "epoch": 0.11871627159004809, "grad_norm": 0.08876782655715942, "learning_rate": 0.002, "loss": 2.3635, "step": 30710 }, { "epoch": 0.11875492879343137, "grad_norm": 0.11112259328365326, "learning_rate": 0.002, "loss": 2.3879, "step": 30720 }, { "epoch": 0.11879358599681465, "grad_norm": 0.0998547375202179, "learning_rate": 0.002, "loss": 2.3656, "step": 30730 }, { "epoch": 0.11883224320019793, "grad_norm": 0.11167199909687042, "learning_rate": 0.002, "loss": 2.3776, "step": 30740 }, { "epoch": 0.1188709004035812, "grad_norm": 0.09049560129642487, "learning_rate": 0.002, "loss": 2.3668, "step": 30750 }, { "epoch": 0.11890955760696448, "grad_norm": 0.12534171342849731, "learning_rate": 0.002, "loss": 2.3698, "step": 30760 }, { "epoch": 0.11894821481034776, "grad_norm": 0.12534017860889435, "learning_rate": 0.002, "loss": 2.361, "step": 30770 }, { "epoch": 0.11898687201373104, "grad_norm": 0.1101115494966507, "learning_rate": 0.002, "loss": 2.3592, "step": 30780 }, { "epoch": 0.11902552921711432, "grad_norm": 0.11233854293823242, "learning_rate": 0.002, "loss": 2.375, "step": 30790 }, { "epoch": 0.1190641864204976, "grad_norm": 0.09808002412319183, "learning_rate": 0.002, "loss": 2.3478, "step": 30800 }, { "epoch": 0.11910284362388088, "grad_norm": 0.1032966896891594, "learning_rate": 0.002, "loss": 2.3717, "step": 30810 }, { "epoch": 0.11914150082726416, "grad_norm": 0.10848798602819443, "learning_rate": 0.002, "loss": 2.3572, "step": 30820 }, { "epoch": 0.11918015803064744, "grad_norm": 0.13478338718414307, "learning_rate": 0.002, "loss": 2.3874, "step": 30830 }, { "epoch": 0.1192188152340307, "grad_norm": 0.10117039084434509, "learning_rate": 0.002, "loss": 2.3758, "step": 30840 }, { "epoch": 0.11925747243741398, "grad_norm": 0.11757628619670868, "learning_rate": 0.002, "loss": 2.3669, "step": 30850 }, { "epoch": 0.11929612964079726, "grad_norm": 0.11381714046001434, "learning_rate": 0.002, "loss": 2.3744, "step": 30860 }, { "epoch": 0.11933478684418054, "grad_norm": 0.10066360980272293, "learning_rate": 0.002, "loss": 2.3741, "step": 30870 }, { "epoch": 0.11937344404756382, "grad_norm": 0.09848344326019287, "learning_rate": 0.002, "loss": 2.3652, "step": 30880 }, { "epoch": 0.1194121012509471, "grad_norm": 0.13607080280780792, "learning_rate": 0.002, "loss": 2.3809, "step": 30890 }, { "epoch": 0.11945075845433038, "grad_norm": 0.13393841683864594, "learning_rate": 0.002, "loss": 2.3754, "step": 30900 }, { "epoch": 0.11948941565771366, "grad_norm": 0.11103633046150208, "learning_rate": 0.002, "loss": 2.3972, "step": 30910 }, { "epoch": 0.11952807286109694, "grad_norm": 0.13245902955532074, "learning_rate": 0.002, "loss": 2.3893, "step": 30920 }, { "epoch": 0.11956673006448021, "grad_norm": 0.10202698409557343, "learning_rate": 0.002, "loss": 2.3762, "step": 30930 }, { "epoch": 0.11960538726786349, "grad_norm": 0.10218498855829239, "learning_rate": 0.002, "loss": 2.3613, "step": 30940 }, { "epoch": 0.11964404447124677, "grad_norm": 0.09530593454837799, "learning_rate": 0.002, "loss": 2.3563, "step": 30950 }, { "epoch": 0.11968270167463005, "grad_norm": 0.11549419164657593, "learning_rate": 0.002, "loss": 2.3677, "step": 30960 }, { "epoch": 0.11972135887801333, "grad_norm": 0.10871879756450653, "learning_rate": 0.002, "loss": 2.3688, "step": 30970 }, { "epoch": 0.11976001608139661, "grad_norm": 0.12116476148366928, "learning_rate": 0.002, "loss": 2.3649, "step": 30980 }, { "epoch": 0.11979867328477989, "grad_norm": 0.1303841769695282, "learning_rate": 0.002, "loss": 2.3688, "step": 30990 }, { "epoch": 0.11983733048816317, "grad_norm": 0.11049704253673553, "learning_rate": 0.002, "loss": 2.3731, "step": 31000 }, { "epoch": 0.11987598769154645, "grad_norm": 0.10392177104949951, "learning_rate": 0.002, "loss": 2.3764, "step": 31010 }, { "epoch": 0.11991464489492971, "grad_norm": 0.11160852760076523, "learning_rate": 0.002, "loss": 2.3736, "step": 31020 }, { "epoch": 0.119953302098313, "grad_norm": 0.10968372225761414, "learning_rate": 0.002, "loss": 2.377, "step": 31030 }, { "epoch": 0.11999195930169627, "grad_norm": 0.10463325679302216, "learning_rate": 0.002, "loss": 2.3676, "step": 31040 }, { "epoch": 0.12003061650507955, "grad_norm": 0.10578692704439163, "learning_rate": 0.002, "loss": 2.3609, "step": 31050 }, { "epoch": 0.12006927370846283, "grad_norm": 0.13343378901481628, "learning_rate": 0.002, "loss": 2.3681, "step": 31060 }, { "epoch": 0.12010793091184611, "grad_norm": 0.11525391787290573, "learning_rate": 0.002, "loss": 2.3693, "step": 31070 }, { "epoch": 0.1201465881152294, "grad_norm": 0.10206209868192673, "learning_rate": 0.002, "loss": 2.3805, "step": 31080 }, { "epoch": 0.12018524531861267, "grad_norm": 0.11907844245433807, "learning_rate": 0.002, "loss": 2.3645, "step": 31090 }, { "epoch": 0.12022390252199595, "grad_norm": 0.1089697778224945, "learning_rate": 0.002, "loss": 2.3667, "step": 31100 }, { "epoch": 0.12026255972537923, "grad_norm": 0.11410657316446304, "learning_rate": 0.002, "loss": 2.3663, "step": 31110 }, { "epoch": 0.1203012169287625, "grad_norm": 0.12164189666509628, "learning_rate": 0.002, "loss": 2.3701, "step": 31120 }, { "epoch": 0.12033987413214578, "grad_norm": 0.11357486248016357, "learning_rate": 0.002, "loss": 2.3585, "step": 31130 }, { "epoch": 0.12037853133552906, "grad_norm": 0.12656593322753906, "learning_rate": 0.002, "loss": 2.3824, "step": 31140 }, { "epoch": 0.12041718853891234, "grad_norm": 0.11332329362630844, "learning_rate": 0.002, "loss": 2.3637, "step": 31150 }, { "epoch": 0.12045584574229562, "grad_norm": 0.10493875294923782, "learning_rate": 0.002, "loss": 2.3518, "step": 31160 }, { "epoch": 0.1204945029456789, "grad_norm": 0.10366437584161758, "learning_rate": 0.002, "loss": 2.3814, "step": 31170 }, { "epoch": 0.12053316014906218, "grad_norm": 0.12637628614902496, "learning_rate": 0.002, "loss": 2.3794, "step": 31180 }, { "epoch": 0.12057181735244546, "grad_norm": 0.12367497384548187, "learning_rate": 0.002, "loss": 2.361, "step": 31190 }, { "epoch": 0.12061047455582874, "grad_norm": 0.1017698347568512, "learning_rate": 0.002, "loss": 2.3756, "step": 31200 }, { "epoch": 0.120649131759212, "grad_norm": 0.1321551352739334, "learning_rate": 0.002, "loss": 2.3714, "step": 31210 }, { "epoch": 0.12068778896259529, "grad_norm": 0.10494833439588547, "learning_rate": 0.002, "loss": 2.3749, "step": 31220 }, { "epoch": 0.12072644616597857, "grad_norm": 0.11836351454257965, "learning_rate": 0.002, "loss": 2.384, "step": 31230 }, { "epoch": 0.12076510336936185, "grad_norm": 0.12958987057209015, "learning_rate": 0.002, "loss": 2.368, "step": 31240 }, { "epoch": 0.12080376057274513, "grad_norm": 0.10293237864971161, "learning_rate": 0.002, "loss": 2.3696, "step": 31250 }, { "epoch": 0.1208424177761284, "grad_norm": 0.11553335934877396, "learning_rate": 0.002, "loss": 2.3757, "step": 31260 }, { "epoch": 0.12088107497951169, "grad_norm": 0.12096146494150162, "learning_rate": 0.002, "loss": 2.3582, "step": 31270 }, { "epoch": 0.12091973218289497, "grad_norm": 0.11854767799377441, "learning_rate": 0.002, "loss": 2.3766, "step": 31280 }, { "epoch": 0.12095838938627824, "grad_norm": 0.10580070316791534, "learning_rate": 0.002, "loss": 2.3798, "step": 31290 }, { "epoch": 0.12099704658966151, "grad_norm": 0.11417514830827713, "learning_rate": 0.002, "loss": 2.3862, "step": 31300 }, { "epoch": 0.12103570379304479, "grad_norm": 0.11195675283670425, "learning_rate": 0.002, "loss": 2.3705, "step": 31310 }, { "epoch": 0.12107436099642807, "grad_norm": 0.09545913338661194, "learning_rate": 0.002, "loss": 2.3558, "step": 31320 }, { "epoch": 0.12111301819981135, "grad_norm": 0.10411213338375092, "learning_rate": 0.002, "loss": 2.3568, "step": 31330 }, { "epoch": 0.12115167540319463, "grad_norm": 0.11142470687627792, "learning_rate": 0.002, "loss": 2.3691, "step": 31340 }, { "epoch": 0.12119033260657791, "grad_norm": 0.09210135042667389, "learning_rate": 0.002, "loss": 2.359, "step": 31350 }, { "epoch": 0.12122898980996119, "grad_norm": 0.10986145585775375, "learning_rate": 0.002, "loss": 2.3714, "step": 31360 }, { "epoch": 0.12126764701334447, "grad_norm": 0.10226043313741684, "learning_rate": 0.002, "loss": 2.377, "step": 31370 }, { "epoch": 0.12130630421672775, "grad_norm": 0.12476920336484909, "learning_rate": 0.002, "loss": 2.3847, "step": 31380 }, { "epoch": 0.12134496142011102, "grad_norm": 0.10898647457361221, "learning_rate": 0.002, "loss": 2.3723, "step": 31390 }, { "epoch": 0.1213836186234943, "grad_norm": 0.10633208602666855, "learning_rate": 0.002, "loss": 2.362, "step": 31400 }, { "epoch": 0.12142227582687758, "grad_norm": 0.09353228658437729, "learning_rate": 0.002, "loss": 2.3776, "step": 31410 }, { "epoch": 0.12146093303026086, "grad_norm": 0.13745516538619995, "learning_rate": 0.002, "loss": 2.3683, "step": 31420 }, { "epoch": 0.12149959023364414, "grad_norm": 0.11073607206344604, "learning_rate": 0.002, "loss": 2.3646, "step": 31430 }, { "epoch": 0.12153824743702742, "grad_norm": 0.11021778732538223, "learning_rate": 0.002, "loss": 2.3639, "step": 31440 }, { "epoch": 0.1215769046404107, "grad_norm": 0.10833906382322311, "learning_rate": 0.002, "loss": 2.3729, "step": 31450 }, { "epoch": 0.12161556184379398, "grad_norm": 0.1117563247680664, "learning_rate": 0.002, "loss": 2.3796, "step": 31460 }, { "epoch": 0.12165421904717726, "grad_norm": 0.12070424854755402, "learning_rate": 0.002, "loss": 2.3701, "step": 31470 }, { "epoch": 0.12169287625056054, "grad_norm": 0.11928492784500122, "learning_rate": 0.002, "loss": 2.3809, "step": 31480 }, { "epoch": 0.1217315334539438, "grad_norm": 0.10886865854263306, "learning_rate": 0.002, "loss": 2.368, "step": 31490 }, { "epoch": 0.12177019065732708, "grad_norm": 0.09866543859243393, "learning_rate": 0.002, "loss": 2.3648, "step": 31500 }, { "epoch": 0.12180884786071036, "grad_norm": 0.1067083477973938, "learning_rate": 0.002, "loss": 2.3726, "step": 31510 }, { "epoch": 0.12184750506409364, "grad_norm": 0.09977913647890091, "learning_rate": 0.002, "loss": 2.3669, "step": 31520 }, { "epoch": 0.12188616226747692, "grad_norm": 0.11578541994094849, "learning_rate": 0.002, "loss": 2.3748, "step": 31530 }, { "epoch": 0.1219248194708602, "grad_norm": 0.10357668250799179, "learning_rate": 0.002, "loss": 2.3815, "step": 31540 }, { "epoch": 0.12196347667424348, "grad_norm": 0.1520976573228836, "learning_rate": 0.002, "loss": 2.3823, "step": 31550 }, { "epoch": 0.12200213387762676, "grad_norm": 0.0963573008775711, "learning_rate": 0.002, "loss": 2.373, "step": 31560 }, { "epoch": 0.12204079108101004, "grad_norm": 0.20803941786289215, "learning_rate": 0.002, "loss": 2.3607, "step": 31570 }, { "epoch": 0.12207944828439331, "grad_norm": 0.10182123631238937, "learning_rate": 0.002, "loss": 2.3665, "step": 31580 }, { "epoch": 0.12211810548777659, "grad_norm": 0.10604804754257202, "learning_rate": 0.002, "loss": 2.3613, "step": 31590 }, { "epoch": 0.12215676269115987, "grad_norm": 0.11439476162195206, "learning_rate": 0.002, "loss": 2.3596, "step": 31600 }, { "epoch": 0.12219541989454315, "grad_norm": 0.1221204400062561, "learning_rate": 0.002, "loss": 2.3819, "step": 31610 }, { "epoch": 0.12223407709792643, "grad_norm": 0.10415617376565933, "learning_rate": 0.002, "loss": 2.389, "step": 31620 }, { "epoch": 0.12227273430130971, "grad_norm": 0.10428808629512787, "learning_rate": 0.002, "loss": 2.363, "step": 31630 }, { "epoch": 0.12231139150469299, "grad_norm": 0.1038336530327797, "learning_rate": 0.002, "loss": 2.3691, "step": 31640 }, { "epoch": 0.12235004870807627, "grad_norm": 0.10959843546152115, "learning_rate": 0.002, "loss": 2.3639, "step": 31650 }, { "epoch": 0.12238870591145955, "grad_norm": 0.10477408766746521, "learning_rate": 0.002, "loss": 2.3858, "step": 31660 }, { "epoch": 0.12242736311484281, "grad_norm": 0.11104969680309296, "learning_rate": 0.002, "loss": 2.3632, "step": 31670 }, { "epoch": 0.1224660203182261, "grad_norm": 0.2157076746225357, "learning_rate": 0.002, "loss": 2.367, "step": 31680 }, { "epoch": 0.12250467752160937, "grad_norm": 0.10109077394008636, "learning_rate": 0.002, "loss": 2.3637, "step": 31690 }, { "epoch": 0.12254333472499265, "grad_norm": 0.1090427115559578, "learning_rate": 0.002, "loss": 2.3831, "step": 31700 }, { "epoch": 0.12258199192837593, "grad_norm": 0.12032198160886765, "learning_rate": 0.002, "loss": 2.3648, "step": 31710 }, { "epoch": 0.12262064913175921, "grad_norm": 0.1247207447886467, "learning_rate": 0.002, "loss": 2.3588, "step": 31720 }, { "epoch": 0.12265930633514249, "grad_norm": 0.10226906836032867, "learning_rate": 0.002, "loss": 2.3696, "step": 31730 }, { "epoch": 0.12269796353852577, "grad_norm": 0.11415991932153702, "learning_rate": 0.002, "loss": 2.3876, "step": 31740 }, { "epoch": 0.12273662074190905, "grad_norm": 0.14343005418777466, "learning_rate": 0.002, "loss": 2.3698, "step": 31750 }, { "epoch": 0.12277527794529233, "grad_norm": 0.10491285473108292, "learning_rate": 0.002, "loss": 2.3608, "step": 31760 }, { "epoch": 0.1228139351486756, "grad_norm": 0.12787176668643951, "learning_rate": 0.002, "loss": 2.3602, "step": 31770 }, { "epoch": 0.12285259235205888, "grad_norm": 0.1200731173157692, "learning_rate": 0.002, "loss": 2.3807, "step": 31780 }, { "epoch": 0.12289124955544216, "grad_norm": 0.09315181523561478, "learning_rate": 0.002, "loss": 2.371, "step": 31790 }, { "epoch": 0.12292990675882544, "grad_norm": 0.10988342016935349, "learning_rate": 0.002, "loss": 2.3818, "step": 31800 }, { "epoch": 0.12296856396220872, "grad_norm": 0.1178271472454071, "learning_rate": 0.002, "loss": 2.3689, "step": 31810 }, { "epoch": 0.123007221165592, "grad_norm": 0.12532812356948853, "learning_rate": 0.002, "loss": 2.3704, "step": 31820 }, { "epoch": 0.12304587836897528, "grad_norm": 0.1229834333062172, "learning_rate": 0.002, "loss": 2.3669, "step": 31830 }, { "epoch": 0.12308453557235856, "grad_norm": 0.12011890858411789, "learning_rate": 0.002, "loss": 2.3771, "step": 31840 }, { "epoch": 0.12312319277574184, "grad_norm": 0.11288615316152573, "learning_rate": 0.002, "loss": 2.3736, "step": 31850 }, { "epoch": 0.1231618499791251, "grad_norm": 0.13161183893680573, "learning_rate": 0.002, "loss": 2.3883, "step": 31860 }, { "epoch": 0.12320050718250838, "grad_norm": 0.11495167762041092, "learning_rate": 0.002, "loss": 2.3588, "step": 31870 }, { "epoch": 0.12323916438589166, "grad_norm": 0.10141012817621231, "learning_rate": 0.002, "loss": 2.3842, "step": 31880 }, { "epoch": 0.12327782158927494, "grad_norm": 0.13126742839813232, "learning_rate": 0.002, "loss": 2.3657, "step": 31890 }, { "epoch": 0.12331647879265822, "grad_norm": 0.09751483798027039, "learning_rate": 0.002, "loss": 2.3751, "step": 31900 }, { "epoch": 0.1233551359960415, "grad_norm": 0.11303214728832245, "learning_rate": 0.002, "loss": 2.3934, "step": 31910 }, { "epoch": 0.12339379319942478, "grad_norm": 0.10097888857126236, "learning_rate": 0.002, "loss": 2.3749, "step": 31920 }, { "epoch": 0.12343245040280806, "grad_norm": 0.09599561244249344, "learning_rate": 0.002, "loss": 2.3661, "step": 31930 }, { "epoch": 0.12347110760619134, "grad_norm": 0.10423211753368378, "learning_rate": 0.002, "loss": 2.3843, "step": 31940 }, { "epoch": 0.12350976480957461, "grad_norm": 0.09624336659908295, "learning_rate": 0.002, "loss": 2.3567, "step": 31950 }, { "epoch": 0.12354842201295789, "grad_norm": 0.09582757949829102, "learning_rate": 0.002, "loss": 2.3739, "step": 31960 }, { "epoch": 0.12358707921634117, "grad_norm": 0.1117819994688034, "learning_rate": 0.002, "loss": 2.3711, "step": 31970 }, { "epoch": 0.12362573641972445, "grad_norm": 0.10983041673898697, "learning_rate": 0.002, "loss": 2.3788, "step": 31980 }, { "epoch": 0.12366439362310773, "grad_norm": 0.11222923547029495, "learning_rate": 0.002, "loss": 2.3714, "step": 31990 }, { "epoch": 0.12370305082649101, "grad_norm": 0.1040363609790802, "learning_rate": 0.002, "loss": 2.3772, "step": 32000 }, { "epoch": 0.12374170802987429, "grad_norm": 0.12385562807321548, "learning_rate": 0.002, "loss": 2.3777, "step": 32010 }, { "epoch": 0.12378036523325757, "grad_norm": 0.10607069730758667, "learning_rate": 0.002, "loss": 2.3611, "step": 32020 }, { "epoch": 0.12381902243664085, "grad_norm": 0.09980407357215881, "learning_rate": 0.002, "loss": 2.3667, "step": 32030 }, { "epoch": 0.12385767964002412, "grad_norm": 0.12108637392520905, "learning_rate": 0.002, "loss": 2.3735, "step": 32040 }, { "epoch": 0.1238963368434074, "grad_norm": 0.10419555008411407, "learning_rate": 0.002, "loss": 2.374, "step": 32050 }, { "epoch": 0.12393499404679068, "grad_norm": 0.1171611100435257, "learning_rate": 0.002, "loss": 2.3813, "step": 32060 }, { "epoch": 0.12397365125017396, "grad_norm": 0.10272298008203506, "learning_rate": 0.002, "loss": 2.3604, "step": 32070 }, { "epoch": 0.12401230845355724, "grad_norm": 0.11056289076805115, "learning_rate": 0.002, "loss": 2.3634, "step": 32080 }, { "epoch": 0.12405096565694052, "grad_norm": 0.11453638225793839, "learning_rate": 0.002, "loss": 2.3892, "step": 32090 }, { "epoch": 0.1240896228603238, "grad_norm": 0.11036056280136108, "learning_rate": 0.002, "loss": 2.3749, "step": 32100 }, { "epoch": 0.12412828006370707, "grad_norm": 0.11167748272418976, "learning_rate": 0.002, "loss": 2.3914, "step": 32110 }, { "epoch": 0.12416693726709035, "grad_norm": 0.10620572417974472, "learning_rate": 0.002, "loss": 2.3677, "step": 32120 }, { "epoch": 0.12420559447047363, "grad_norm": 0.10564051568508148, "learning_rate": 0.002, "loss": 2.3561, "step": 32130 }, { "epoch": 0.1242442516738569, "grad_norm": 0.1290002316236496, "learning_rate": 0.002, "loss": 2.3683, "step": 32140 }, { "epoch": 0.12428290887724018, "grad_norm": 0.12541048228740692, "learning_rate": 0.002, "loss": 2.3678, "step": 32150 }, { "epoch": 0.12432156608062346, "grad_norm": 0.13069938123226166, "learning_rate": 0.002, "loss": 2.3807, "step": 32160 }, { "epoch": 0.12436022328400674, "grad_norm": 0.10358031839132309, "learning_rate": 0.002, "loss": 2.3674, "step": 32170 }, { "epoch": 0.12439888048739002, "grad_norm": 0.10525479167699814, "learning_rate": 0.002, "loss": 2.3694, "step": 32180 }, { "epoch": 0.1244375376907733, "grad_norm": 0.10335072129964828, "learning_rate": 0.002, "loss": 2.3799, "step": 32190 }, { "epoch": 0.12447619489415658, "grad_norm": 0.10958348959684372, "learning_rate": 0.002, "loss": 2.352, "step": 32200 }, { "epoch": 0.12451485209753986, "grad_norm": 0.1179596409201622, "learning_rate": 0.002, "loss": 2.3749, "step": 32210 }, { "epoch": 0.12455350930092314, "grad_norm": 0.09787681698799133, "learning_rate": 0.002, "loss": 2.3734, "step": 32220 }, { "epoch": 0.1245921665043064, "grad_norm": 0.11009865999221802, "learning_rate": 0.002, "loss": 2.3656, "step": 32230 }, { "epoch": 0.12463082370768969, "grad_norm": 0.12366067618131638, "learning_rate": 0.002, "loss": 2.3761, "step": 32240 }, { "epoch": 0.12466948091107297, "grad_norm": 0.1004662960767746, "learning_rate": 0.002, "loss": 2.3677, "step": 32250 }, { "epoch": 0.12470813811445625, "grad_norm": 0.10903781652450562, "learning_rate": 0.002, "loss": 2.3724, "step": 32260 }, { "epoch": 0.12474679531783953, "grad_norm": 0.11716281622648239, "learning_rate": 0.002, "loss": 2.3687, "step": 32270 }, { "epoch": 0.1247854525212228, "grad_norm": 0.10709843039512634, "learning_rate": 0.002, "loss": 2.36, "step": 32280 }, { "epoch": 0.12482410972460609, "grad_norm": 0.10901437699794769, "learning_rate": 0.002, "loss": 2.3605, "step": 32290 }, { "epoch": 0.12486276692798937, "grad_norm": 0.10774769634008408, "learning_rate": 0.002, "loss": 2.3926, "step": 32300 }, { "epoch": 0.12490142413137265, "grad_norm": 0.10092653334140778, "learning_rate": 0.002, "loss": 2.3666, "step": 32310 }, { "epoch": 0.12494008133475591, "grad_norm": 0.10283713787794113, "learning_rate": 0.002, "loss": 2.3777, "step": 32320 }, { "epoch": 0.12497873853813919, "grad_norm": 0.09721902012825012, "learning_rate": 0.002, "loss": 2.3734, "step": 32330 }, { "epoch": 0.12501739574152249, "grad_norm": 0.11538095027208328, "learning_rate": 0.002, "loss": 2.3662, "step": 32340 }, { "epoch": 0.12505605294490577, "grad_norm": 0.1069326102733612, "learning_rate": 0.002, "loss": 2.3743, "step": 32350 }, { "epoch": 0.12509471014828905, "grad_norm": 0.11422941833734512, "learning_rate": 0.002, "loss": 2.3558, "step": 32360 }, { "epoch": 0.1251333673516723, "grad_norm": 0.10990530997514725, "learning_rate": 0.002, "loss": 2.3703, "step": 32370 }, { "epoch": 0.12517202455505558, "grad_norm": 0.13378532230854034, "learning_rate": 0.002, "loss": 2.3748, "step": 32380 }, { "epoch": 0.12521068175843886, "grad_norm": 0.112189382314682, "learning_rate": 0.002, "loss": 2.38, "step": 32390 }, { "epoch": 0.12524933896182214, "grad_norm": 0.11024999618530273, "learning_rate": 0.002, "loss": 2.3636, "step": 32400 }, { "epoch": 0.12528799616520542, "grad_norm": 0.11159463226795197, "learning_rate": 0.002, "loss": 2.369, "step": 32410 }, { "epoch": 0.1253266533685887, "grad_norm": 0.11366341263055801, "learning_rate": 0.002, "loss": 2.3667, "step": 32420 }, { "epoch": 0.12536531057197198, "grad_norm": 0.10603636503219604, "learning_rate": 0.002, "loss": 2.3731, "step": 32430 }, { "epoch": 0.12540396777535526, "grad_norm": 0.11254012584686279, "learning_rate": 0.002, "loss": 2.3665, "step": 32440 }, { "epoch": 0.12544262497873854, "grad_norm": 0.1268596053123474, "learning_rate": 0.002, "loss": 2.3593, "step": 32450 }, { "epoch": 0.12548128218212182, "grad_norm": 0.1108691468834877, "learning_rate": 0.002, "loss": 2.3463, "step": 32460 }, { "epoch": 0.1255199393855051, "grad_norm": 0.12088431417942047, "learning_rate": 0.002, "loss": 2.3811, "step": 32470 }, { "epoch": 0.12555859658888838, "grad_norm": 0.24373848736286163, "learning_rate": 0.002, "loss": 2.3821, "step": 32480 }, { "epoch": 0.12559725379227166, "grad_norm": 0.11183463037014008, "learning_rate": 0.002, "loss": 2.3791, "step": 32490 }, { "epoch": 0.12563591099565494, "grad_norm": 0.12124831974506378, "learning_rate": 0.002, "loss": 2.3888, "step": 32500 }, { "epoch": 0.12567456819903822, "grad_norm": 0.10651678591966629, "learning_rate": 0.002, "loss": 2.3732, "step": 32510 }, { "epoch": 0.1257132254024215, "grad_norm": 0.12588591873645782, "learning_rate": 0.002, "loss": 2.3721, "step": 32520 }, { "epoch": 0.12575188260580478, "grad_norm": 0.09625239670276642, "learning_rate": 0.002, "loss": 2.3623, "step": 32530 }, { "epoch": 0.12579053980918806, "grad_norm": 0.11514429748058319, "learning_rate": 0.002, "loss": 2.375, "step": 32540 }, { "epoch": 0.1258291970125713, "grad_norm": 0.10993642359972, "learning_rate": 0.002, "loss": 2.3752, "step": 32550 }, { "epoch": 0.1258678542159546, "grad_norm": 0.10194283723831177, "learning_rate": 0.002, "loss": 2.3752, "step": 32560 }, { "epoch": 0.12590651141933787, "grad_norm": 0.09949901700019836, "learning_rate": 0.002, "loss": 2.3809, "step": 32570 }, { "epoch": 0.12594516862272115, "grad_norm": 0.12141376733779907, "learning_rate": 0.002, "loss": 2.3835, "step": 32580 }, { "epoch": 0.12598382582610443, "grad_norm": 0.12764514982700348, "learning_rate": 0.002, "loss": 2.3809, "step": 32590 }, { "epoch": 0.1260224830294877, "grad_norm": 0.11071311682462692, "learning_rate": 0.002, "loss": 2.3729, "step": 32600 }, { "epoch": 0.126061140232871, "grad_norm": 0.10684597492218018, "learning_rate": 0.002, "loss": 2.3815, "step": 32610 }, { "epoch": 0.12609979743625427, "grad_norm": 0.11873281747102737, "learning_rate": 0.002, "loss": 2.3639, "step": 32620 }, { "epoch": 0.12613845463963755, "grad_norm": 0.08808255940675735, "learning_rate": 0.002, "loss": 2.3732, "step": 32630 }, { "epoch": 0.12617711184302083, "grad_norm": 0.10667643696069717, "learning_rate": 0.002, "loss": 2.3831, "step": 32640 }, { "epoch": 0.1262157690464041, "grad_norm": 0.12549149990081787, "learning_rate": 0.002, "loss": 2.351, "step": 32650 }, { "epoch": 0.1262544262497874, "grad_norm": 0.09748613834381104, "learning_rate": 0.002, "loss": 2.3703, "step": 32660 }, { "epoch": 0.12629308345317067, "grad_norm": 0.11375278234481812, "learning_rate": 0.002, "loss": 2.3678, "step": 32670 }, { "epoch": 0.12633174065655395, "grad_norm": 0.10890177637338638, "learning_rate": 0.002, "loss": 2.3647, "step": 32680 }, { "epoch": 0.12637039785993723, "grad_norm": 0.09607157856225967, "learning_rate": 0.002, "loss": 2.3603, "step": 32690 }, { "epoch": 0.1264090550633205, "grad_norm": 0.11426623165607452, "learning_rate": 0.002, "loss": 2.3601, "step": 32700 }, { "epoch": 0.1264477122667038, "grad_norm": 0.09992794692516327, "learning_rate": 0.002, "loss": 2.3703, "step": 32710 }, { "epoch": 0.12648636947008707, "grad_norm": 0.10609929263591766, "learning_rate": 0.002, "loss": 2.3681, "step": 32720 }, { "epoch": 0.12652502667347035, "grad_norm": 0.09984554350376129, "learning_rate": 0.002, "loss": 2.3779, "step": 32730 }, { "epoch": 0.1265636838768536, "grad_norm": 0.09323658049106598, "learning_rate": 0.002, "loss": 2.3834, "step": 32740 }, { "epoch": 0.12660234108023688, "grad_norm": 0.11080362647771835, "learning_rate": 0.002, "loss": 2.3776, "step": 32750 }, { "epoch": 0.12664099828362016, "grad_norm": 0.11429428309202194, "learning_rate": 0.002, "loss": 2.383, "step": 32760 }, { "epoch": 0.12667965548700344, "grad_norm": 0.13485601544380188, "learning_rate": 0.002, "loss": 2.3532, "step": 32770 }, { "epoch": 0.12671831269038672, "grad_norm": 0.10016801953315735, "learning_rate": 0.002, "loss": 2.3766, "step": 32780 }, { "epoch": 0.12675696989377, "grad_norm": 0.11193803697824478, "learning_rate": 0.002, "loss": 2.3729, "step": 32790 }, { "epoch": 0.12679562709715328, "grad_norm": 0.10117737203836441, "learning_rate": 0.002, "loss": 2.3611, "step": 32800 }, { "epoch": 0.12683428430053656, "grad_norm": 0.11539480090141296, "learning_rate": 0.002, "loss": 2.3678, "step": 32810 }, { "epoch": 0.12687294150391984, "grad_norm": 0.1457090973854065, "learning_rate": 0.002, "loss": 2.3737, "step": 32820 }, { "epoch": 0.12691159870730312, "grad_norm": 0.10625103861093521, "learning_rate": 0.002, "loss": 2.3736, "step": 32830 }, { "epoch": 0.1269502559106864, "grad_norm": 0.10272681713104248, "learning_rate": 0.002, "loss": 2.3744, "step": 32840 }, { "epoch": 0.12698891311406968, "grad_norm": 0.13046136498451233, "learning_rate": 0.002, "loss": 2.3756, "step": 32850 }, { "epoch": 0.12702757031745296, "grad_norm": 0.10971887409687042, "learning_rate": 0.002, "loss": 2.3671, "step": 32860 }, { "epoch": 0.12706622752083624, "grad_norm": 0.09908357262611389, "learning_rate": 0.002, "loss": 2.3704, "step": 32870 }, { "epoch": 0.12710488472421952, "grad_norm": 0.09171049296855927, "learning_rate": 0.002, "loss": 2.3701, "step": 32880 }, { "epoch": 0.1271435419276028, "grad_norm": 0.11479821056127548, "learning_rate": 0.002, "loss": 2.3727, "step": 32890 }, { "epoch": 0.12718219913098608, "grad_norm": 0.11469388753175735, "learning_rate": 0.002, "loss": 2.3673, "step": 32900 }, { "epoch": 0.12722085633436936, "grad_norm": 0.10828559100627899, "learning_rate": 0.002, "loss": 2.3588, "step": 32910 }, { "epoch": 0.1272595135377526, "grad_norm": 0.12178273499011993, "learning_rate": 0.002, "loss": 2.3584, "step": 32920 }, { "epoch": 0.1272981707411359, "grad_norm": 0.12015478312969208, "learning_rate": 0.002, "loss": 2.3608, "step": 32930 }, { "epoch": 0.12733682794451917, "grad_norm": 0.1151486188173294, "learning_rate": 0.002, "loss": 2.377, "step": 32940 }, { "epoch": 0.12737548514790245, "grad_norm": 0.10822568833827972, "learning_rate": 0.002, "loss": 2.3673, "step": 32950 }, { "epoch": 0.12741414235128573, "grad_norm": 0.12262659519910812, "learning_rate": 0.002, "loss": 2.3843, "step": 32960 }, { "epoch": 0.127452799554669, "grad_norm": 0.09884728491306305, "learning_rate": 0.002, "loss": 2.3692, "step": 32970 }, { "epoch": 0.1274914567580523, "grad_norm": 0.10737740248441696, "learning_rate": 0.002, "loss": 2.375, "step": 32980 }, { "epoch": 0.12753011396143557, "grad_norm": 0.12268875539302826, "learning_rate": 0.002, "loss": 2.3612, "step": 32990 }, { "epoch": 0.12756877116481885, "grad_norm": 0.108036570250988, "learning_rate": 0.002, "loss": 2.3633, "step": 33000 }, { "epoch": 0.12760742836820213, "grad_norm": 0.12710832059383392, "learning_rate": 0.002, "loss": 2.3744, "step": 33010 }, { "epoch": 0.1276460855715854, "grad_norm": 0.11616093665361404, "learning_rate": 0.002, "loss": 2.3619, "step": 33020 }, { "epoch": 0.1276847427749687, "grad_norm": 0.11192166805267334, "learning_rate": 0.002, "loss": 2.3604, "step": 33030 }, { "epoch": 0.12772339997835197, "grad_norm": 0.09995438903570175, "learning_rate": 0.002, "loss": 2.3661, "step": 33040 }, { "epoch": 0.12776205718173525, "grad_norm": 0.10356901586055756, "learning_rate": 0.002, "loss": 2.3743, "step": 33050 }, { "epoch": 0.12780071438511853, "grad_norm": 0.1217799261212349, "learning_rate": 0.002, "loss": 2.3659, "step": 33060 }, { "epoch": 0.1278393715885018, "grad_norm": 0.14946649968624115, "learning_rate": 0.002, "loss": 2.3579, "step": 33070 }, { "epoch": 0.1278780287918851, "grad_norm": 0.10571971535682678, "learning_rate": 0.002, "loss": 2.3639, "step": 33080 }, { "epoch": 0.12791668599526837, "grad_norm": 0.10848978906869888, "learning_rate": 0.002, "loss": 2.3734, "step": 33090 }, { "epoch": 0.12795534319865165, "grad_norm": 0.10227275639772415, "learning_rate": 0.002, "loss": 2.3595, "step": 33100 }, { "epoch": 0.1279940004020349, "grad_norm": 0.1201995238661766, "learning_rate": 0.002, "loss": 2.3695, "step": 33110 }, { "epoch": 0.12803265760541818, "grad_norm": 0.1080368310213089, "learning_rate": 0.002, "loss": 2.3762, "step": 33120 }, { "epoch": 0.12807131480880146, "grad_norm": 0.10840100049972534, "learning_rate": 0.002, "loss": 2.3614, "step": 33130 }, { "epoch": 0.12810997201218474, "grad_norm": 0.11384178698062897, "learning_rate": 0.002, "loss": 2.372, "step": 33140 }, { "epoch": 0.12814862921556802, "grad_norm": 0.11699331551790237, "learning_rate": 0.002, "loss": 2.3498, "step": 33150 }, { "epoch": 0.1281872864189513, "grad_norm": 0.11681430786848068, "learning_rate": 0.002, "loss": 2.3651, "step": 33160 }, { "epoch": 0.12822594362233458, "grad_norm": 0.1251811385154724, "learning_rate": 0.002, "loss": 2.3572, "step": 33170 }, { "epoch": 0.12826460082571786, "grad_norm": 0.11562751978635788, "learning_rate": 0.002, "loss": 2.3729, "step": 33180 }, { "epoch": 0.12830325802910114, "grad_norm": 0.1195770800113678, "learning_rate": 0.002, "loss": 2.3524, "step": 33190 }, { "epoch": 0.12834191523248442, "grad_norm": 0.09677097201347351, "learning_rate": 0.002, "loss": 2.3604, "step": 33200 }, { "epoch": 0.1283805724358677, "grad_norm": 0.12000081688165665, "learning_rate": 0.002, "loss": 2.3806, "step": 33210 }, { "epoch": 0.12841922963925098, "grad_norm": 0.10882232338190079, "learning_rate": 0.002, "loss": 2.3717, "step": 33220 }, { "epoch": 0.12845788684263426, "grad_norm": 0.11827144026756287, "learning_rate": 0.002, "loss": 2.3595, "step": 33230 }, { "epoch": 0.12849654404601754, "grad_norm": 0.10499045997858047, "learning_rate": 0.002, "loss": 2.3598, "step": 33240 }, { "epoch": 0.12853520124940082, "grad_norm": 0.09814994782209396, "learning_rate": 0.002, "loss": 2.3502, "step": 33250 }, { "epoch": 0.1285738584527841, "grad_norm": 0.09377431869506836, "learning_rate": 0.002, "loss": 2.379, "step": 33260 }, { "epoch": 0.12861251565616738, "grad_norm": 0.09934505075216293, "learning_rate": 0.002, "loss": 2.3637, "step": 33270 }, { "epoch": 0.12865117285955066, "grad_norm": 0.10614591091871262, "learning_rate": 0.002, "loss": 2.3697, "step": 33280 }, { "epoch": 0.12868983006293394, "grad_norm": 0.11912450194358826, "learning_rate": 0.002, "loss": 2.3853, "step": 33290 }, { "epoch": 0.1287284872663172, "grad_norm": 0.10692890733480453, "learning_rate": 0.002, "loss": 2.372, "step": 33300 }, { "epoch": 0.12876714446970047, "grad_norm": 0.10942743718624115, "learning_rate": 0.002, "loss": 2.3593, "step": 33310 }, { "epoch": 0.12880580167308375, "grad_norm": 0.10537812858819962, "learning_rate": 0.002, "loss": 2.3675, "step": 33320 }, { "epoch": 0.12884445887646703, "grad_norm": 0.11302319914102554, "learning_rate": 0.002, "loss": 2.3586, "step": 33330 }, { "epoch": 0.1288831160798503, "grad_norm": 0.13061635196208954, "learning_rate": 0.002, "loss": 2.3708, "step": 33340 }, { "epoch": 0.1289217732832336, "grad_norm": 0.18177761137485504, "learning_rate": 0.002, "loss": 2.3768, "step": 33350 }, { "epoch": 0.12896043048661687, "grad_norm": 0.11147965490818024, "learning_rate": 0.002, "loss": 2.3836, "step": 33360 }, { "epoch": 0.12899908769000015, "grad_norm": 0.10751602053642273, "learning_rate": 0.002, "loss": 2.3687, "step": 33370 }, { "epoch": 0.12903774489338343, "grad_norm": 0.10788480192422867, "learning_rate": 0.002, "loss": 2.3728, "step": 33380 }, { "epoch": 0.1290764020967667, "grad_norm": 0.10402106493711472, "learning_rate": 0.002, "loss": 2.385, "step": 33390 }, { "epoch": 0.12911505930015, "grad_norm": 0.14801332354545593, "learning_rate": 0.002, "loss": 2.3645, "step": 33400 }, { "epoch": 0.12915371650353327, "grad_norm": 0.10262597352266312, "learning_rate": 0.002, "loss": 2.3839, "step": 33410 }, { "epoch": 0.12919237370691655, "grad_norm": 0.11831195652484894, "learning_rate": 0.002, "loss": 2.3663, "step": 33420 }, { "epoch": 0.12923103091029983, "grad_norm": 0.10673588514328003, "learning_rate": 0.002, "loss": 2.3739, "step": 33430 }, { "epoch": 0.1292696881136831, "grad_norm": 0.11378040909767151, "learning_rate": 0.002, "loss": 2.3652, "step": 33440 }, { "epoch": 0.1293083453170664, "grad_norm": 0.10400614142417908, "learning_rate": 0.002, "loss": 2.3727, "step": 33450 }, { "epoch": 0.12934700252044967, "grad_norm": 0.10201553255319595, "learning_rate": 0.002, "loss": 2.368, "step": 33460 }, { "epoch": 0.12938565972383295, "grad_norm": 0.10591879487037659, "learning_rate": 0.002, "loss": 2.3725, "step": 33470 }, { "epoch": 0.1294243169272162, "grad_norm": 0.09862728416919708, "learning_rate": 0.002, "loss": 2.369, "step": 33480 }, { "epoch": 0.12946297413059948, "grad_norm": 0.1223021075129509, "learning_rate": 0.002, "loss": 2.3652, "step": 33490 }, { "epoch": 0.12950163133398276, "grad_norm": 0.11414515972137451, "learning_rate": 0.002, "loss": 2.3806, "step": 33500 }, { "epoch": 0.12954028853736604, "grad_norm": 0.10432834178209305, "learning_rate": 0.002, "loss": 2.3699, "step": 33510 }, { "epoch": 0.12957894574074932, "grad_norm": 0.09742403030395508, "learning_rate": 0.002, "loss": 2.3601, "step": 33520 }, { "epoch": 0.1296176029441326, "grad_norm": 0.13793131709098816, "learning_rate": 0.002, "loss": 2.3757, "step": 33530 }, { "epoch": 0.12965626014751588, "grad_norm": 0.10405918955802917, "learning_rate": 0.002, "loss": 2.3644, "step": 33540 }, { "epoch": 0.12969491735089916, "grad_norm": 0.12017746269702911, "learning_rate": 0.002, "loss": 2.3594, "step": 33550 }, { "epoch": 0.12973357455428244, "grad_norm": 0.16131430864334106, "learning_rate": 0.002, "loss": 2.3602, "step": 33560 }, { "epoch": 0.12977223175766572, "grad_norm": 0.09843146055936813, "learning_rate": 0.002, "loss": 2.366, "step": 33570 }, { "epoch": 0.129810888961049, "grad_norm": 0.09624658524990082, "learning_rate": 0.002, "loss": 2.374, "step": 33580 }, { "epoch": 0.12984954616443228, "grad_norm": 0.14317472279071808, "learning_rate": 0.002, "loss": 2.3789, "step": 33590 }, { "epoch": 0.12988820336781556, "grad_norm": 0.11247939616441727, "learning_rate": 0.002, "loss": 2.3651, "step": 33600 }, { "epoch": 0.12992686057119884, "grad_norm": 0.11236079782247543, "learning_rate": 0.002, "loss": 2.3718, "step": 33610 }, { "epoch": 0.12996551777458212, "grad_norm": 0.09830132871866226, "learning_rate": 0.002, "loss": 2.3682, "step": 33620 }, { "epoch": 0.1300041749779654, "grad_norm": 0.12697303295135498, "learning_rate": 0.002, "loss": 2.368, "step": 33630 }, { "epoch": 0.13004283218134868, "grad_norm": 0.11531970649957657, "learning_rate": 0.002, "loss": 2.3742, "step": 33640 }, { "epoch": 0.13008148938473196, "grad_norm": 0.1151520311832428, "learning_rate": 0.002, "loss": 2.3625, "step": 33650 }, { "epoch": 0.13012014658811524, "grad_norm": 0.11021476984024048, "learning_rate": 0.002, "loss": 2.3832, "step": 33660 }, { "epoch": 0.1301588037914985, "grad_norm": 0.1186911091208458, "learning_rate": 0.002, "loss": 2.3853, "step": 33670 }, { "epoch": 0.13019746099488178, "grad_norm": 0.12309000641107559, "learning_rate": 0.002, "loss": 2.3735, "step": 33680 }, { "epoch": 0.13023611819826506, "grad_norm": 0.11252123862504959, "learning_rate": 0.002, "loss": 2.3567, "step": 33690 }, { "epoch": 0.13027477540164834, "grad_norm": 0.10669244825839996, "learning_rate": 0.002, "loss": 2.3853, "step": 33700 }, { "epoch": 0.13031343260503161, "grad_norm": 0.10406588017940521, "learning_rate": 0.002, "loss": 2.3752, "step": 33710 }, { "epoch": 0.1303520898084149, "grad_norm": 0.11617938429117203, "learning_rate": 0.002, "loss": 2.3589, "step": 33720 }, { "epoch": 0.13039074701179817, "grad_norm": 0.1132294237613678, "learning_rate": 0.002, "loss": 2.3764, "step": 33730 }, { "epoch": 0.13042940421518145, "grad_norm": 0.10331351310014725, "learning_rate": 0.002, "loss": 2.376, "step": 33740 }, { "epoch": 0.13046806141856473, "grad_norm": 0.11811483651399612, "learning_rate": 0.002, "loss": 2.3668, "step": 33750 }, { "epoch": 0.13050671862194801, "grad_norm": 0.11243601888418198, "learning_rate": 0.002, "loss": 2.3582, "step": 33760 }, { "epoch": 0.1305453758253313, "grad_norm": 0.10086555033922195, "learning_rate": 0.002, "loss": 2.3754, "step": 33770 }, { "epoch": 0.13058403302871457, "grad_norm": 0.09739705175161362, "learning_rate": 0.002, "loss": 2.3644, "step": 33780 }, { "epoch": 0.13062269023209785, "grad_norm": 0.11868289113044739, "learning_rate": 0.002, "loss": 2.3613, "step": 33790 }, { "epoch": 0.13066134743548113, "grad_norm": 0.12014929950237274, "learning_rate": 0.002, "loss": 2.368, "step": 33800 }, { "epoch": 0.13070000463886441, "grad_norm": 0.11841581016778946, "learning_rate": 0.002, "loss": 2.3699, "step": 33810 }, { "epoch": 0.1307386618422477, "grad_norm": 0.10554622858762741, "learning_rate": 0.002, "loss": 2.3683, "step": 33820 }, { "epoch": 0.13077731904563097, "grad_norm": 0.11373893916606903, "learning_rate": 0.002, "loss": 2.3634, "step": 33830 }, { "epoch": 0.13081597624901425, "grad_norm": 0.10629983991384506, "learning_rate": 0.002, "loss": 2.3731, "step": 33840 }, { "epoch": 0.1308546334523975, "grad_norm": 0.12039709091186523, "learning_rate": 0.002, "loss": 2.3752, "step": 33850 }, { "epoch": 0.1308932906557808, "grad_norm": 0.10280530154705048, "learning_rate": 0.002, "loss": 2.3574, "step": 33860 }, { "epoch": 0.13093194785916407, "grad_norm": 0.11763978004455566, "learning_rate": 0.002, "loss": 2.3716, "step": 33870 }, { "epoch": 0.13097060506254735, "grad_norm": 0.10091401636600494, "learning_rate": 0.002, "loss": 2.3718, "step": 33880 }, { "epoch": 0.13100926226593063, "grad_norm": 0.09946057200431824, "learning_rate": 0.002, "loss": 2.366, "step": 33890 }, { "epoch": 0.1310479194693139, "grad_norm": 0.09984345734119415, "learning_rate": 0.002, "loss": 2.3772, "step": 33900 }, { "epoch": 0.13108657667269719, "grad_norm": 0.10241945832967758, "learning_rate": 0.002, "loss": 2.3587, "step": 33910 }, { "epoch": 0.13112523387608047, "grad_norm": 0.12130527943372726, "learning_rate": 0.002, "loss": 2.379, "step": 33920 }, { "epoch": 0.13116389107946375, "grad_norm": 0.1324186772108078, "learning_rate": 0.002, "loss": 2.3719, "step": 33930 }, { "epoch": 0.13120254828284703, "grad_norm": 0.11365784704685211, "learning_rate": 0.002, "loss": 2.3628, "step": 33940 }, { "epoch": 0.1312412054862303, "grad_norm": 0.1000702902674675, "learning_rate": 0.002, "loss": 2.361, "step": 33950 }, { "epoch": 0.13127986268961359, "grad_norm": 0.12024486064910889, "learning_rate": 0.002, "loss": 2.3513, "step": 33960 }, { "epoch": 0.13131851989299687, "grad_norm": 0.11599510163068771, "learning_rate": 0.002, "loss": 2.3722, "step": 33970 }, { "epoch": 0.13135717709638015, "grad_norm": 0.11001361906528473, "learning_rate": 0.002, "loss": 2.3495, "step": 33980 }, { "epoch": 0.13139583429976343, "grad_norm": 0.12406077235937119, "learning_rate": 0.002, "loss": 2.3865, "step": 33990 }, { "epoch": 0.1314344915031467, "grad_norm": 0.1052245944738388, "learning_rate": 0.002, "loss": 2.3723, "step": 34000 }, { "epoch": 0.13147314870652999, "grad_norm": 0.10168890655040741, "learning_rate": 0.002, "loss": 2.368, "step": 34010 }, { "epoch": 0.13151180590991327, "grad_norm": 0.107086680829525, "learning_rate": 0.002, "loss": 2.3638, "step": 34020 }, { "epoch": 0.13155046311329655, "grad_norm": 0.12155095487833023, "learning_rate": 0.002, "loss": 2.3671, "step": 34030 }, { "epoch": 0.1315891203166798, "grad_norm": 0.14013329148292542, "learning_rate": 0.002, "loss": 2.3699, "step": 34040 }, { "epoch": 0.13162777752006308, "grad_norm": 0.0939648449420929, "learning_rate": 0.002, "loss": 2.3815, "step": 34050 }, { "epoch": 0.13166643472344636, "grad_norm": 0.10857540369033813, "learning_rate": 0.002, "loss": 2.3779, "step": 34060 }, { "epoch": 0.13170509192682964, "grad_norm": 0.12482918798923492, "learning_rate": 0.002, "loss": 2.3753, "step": 34070 }, { "epoch": 0.13174374913021292, "grad_norm": 0.11149606853723526, "learning_rate": 0.002, "loss": 2.3555, "step": 34080 }, { "epoch": 0.1317824063335962, "grad_norm": 0.09495566040277481, "learning_rate": 0.002, "loss": 2.3716, "step": 34090 }, { "epoch": 0.13182106353697948, "grad_norm": 0.11346203833818436, "learning_rate": 0.002, "loss": 2.3701, "step": 34100 }, { "epoch": 0.13185972074036276, "grad_norm": 0.09801855683326721, "learning_rate": 0.002, "loss": 2.3629, "step": 34110 }, { "epoch": 0.13189837794374604, "grad_norm": 0.11547388881444931, "learning_rate": 0.002, "loss": 2.3624, "step": 34120 }, { "epoch": 0.13193703514712932, "grad_norm": 0.09938013553619385, "learning_rate": 0.002, "loss": 2.3743, "step": 34130 }, { "epoch": 0.1319756923505126, "grad_norm": 0.11353269219398499, "learning_rate": 0.002, "loss": 2.3713, "step": 34140 }, { "epoch": 0.13201434955389588, "grad_norm": 0.10928058624267578, "learning_rate": 0.002, "loss": 2.3703, "step": 34150 }, { "epoch": 0.13205300675727916, "grad_norm": 0.09228232502937317, "learning_rate": 0.002, "loss": 2.3598, "step": 34160 }, { "epoch": 0.13209166396066244, "grad_norm": 0.10695263743400574, "learning_rate": 0.002, "loss": 2.3641, "step": 34170 }, { "epoch": 0.13213032116404572, "grad_norm": 0.10757897049188614, "learning_rate": 0.002, "loss": 2.3602, "step": 34180 }, { "epoch": 0.132168978367429, "grad_norm": 0.11652503907680511, "learning_rate": 0.002, "loss": 2.3705, "step": 34190 }, { "epoch": 0.13220763557081228, "grad_norm": 0.11906265467405319, "learning_rate": 0.002, "loss": 2.3776, "step": 34200 }, { "epoch": 0.13224629277419556, "grad_norm": 0.10782443732023239, "learning_rate": 0.002, "loss": 2.3779, "step": 34210 }, { "epoch": 0.1322849499775788, "grad_norm": 0.11037446558475494, "learning_rate": 0.002, "loss": 2.3699, "step": 34220 }, { "epoch": 0.1323236071809621, "grad_norm": 0.10838919878005981, "learning_rate": 0.002, "loss": 2.3632, "step": 34230 }, { "epoch": 0.13236226438434537, "grad_norm": 0.11310845613479614, "learning_rate": 0.002, "loss": 2.3702, "step": 34240 }, { "epoch": 0.13240092158772865, "grad_norm": 0.10031761229038239, "learning_rate": 0.002, "loss": 2.385, "step": 34250 }, { "epoch": 0.13243957879111193, "grad_norm": 0.11770909279584885, "learning_rate": 0.002, "loss": 2.3791, "step": 34260 }, { "epoch": 0.1324782359944952, "grad_norm": 0.12400855869054794, "learning_rate": 0.002, "loss": 2.3792, "step": 34270 }, { "epoch": 0.1325168931978785, "grad_norm": 0.10364599525928497, "learning_rate": 0.002, "loss": 2.3756, "step": 34280 }, { "epoch": 0.13255555040126177, "grad_norm": 0.11015373468399048, "learning_rate": 0.002, "loss": 2.3638, "step": 34290 }, { "epoch": 0.13259420760464505, "grad_norm": 0.09881162643432617, "learning_rate": 0.002, "loss": 2.3707, "step": 34300 }, { "epoch": 0.13263286480802833, "grad_norm": 0.11332736909389496, "learning_rate": 0.002, "loss": 2.3836, "step": 34310 }, { "epoch": 0.1326715220114116, "grad_norm": 0.11150939762592316, "learning_rate": 0.002, "loss": 2.3677, "step": 34320 }, { "epoch": 0.1327101792147949, "grad_norm": 0.10195937752723694, "learning_rate": 0.002, "loss": 2.3621, "step": 34330 }, { "epoch": 0.13274883641817817, "grad_norm": 0.12010291963815689, "learning_rate": 0.002, "loss": 2.3634, "step": 34340 }, { "epoch": 0.13278749362156145, "grad_norm": 0.09696830064058304, "learning_rate": 0.002, "loss": 2.3634, "step": 34350 }, { "epoch": 0.13282615082494473, "grad_norm": 0.14391209185123444, "learning_rate": 0.002, "loss": 2.3654, "step": 34360 }, { "epoch": 0.132864808028328, "grad_norm": 0.11253627389669418, "learning_rate": 0.002, "loss": 2.3669, "step": 34370 }, { "epoch": 0.1329034652317113, "grad_norm": 0.10166656225919724, "learning_rate": 0.002, "loss": 2.3727, "step": 34380 }, { "epoch": 0.13294212243509457, "grad_norm": 0.0988810807466507, "learning_rate": 0.002, "loss": 2.3694, "step": 34390 }, { "epoch": 0.13298077963847785, "grad_norm": 0.10384784638881683, "learning_rate": 0.002, "loss": 2.3751, "step": 34400 }, { "epoch": 0.1330194368418611, "grad_norm": 0.14408060908317566, "learning_rate": 0.002, "loss": 2.3568, "step": 34410 }, { "epoch": 0.13305809404524438, "grad_norm": 0.11995476484298706, "learning_rate": 0.002, "loss": 2.3735, "step": 34420 }, { "epoch": 0.13309675124862766, "grad_norm": 0.10418880730867386, "learning_rate": 0.002, "loss": 2.3632, "step": 34430 }, { "epoch": 0.13313540845201094, "grad_norm": 0.10377342253923416, "learning_rate": 0.002, "loss": 2.3705, "step": 34440 }, { "epoch": 0.13317406565539422, "grad_norm": 0.09623515605926514, "learning_rate": 0.002, "loss": 2.369, "step": 34450 }, { "epoch": 0.1332127228587775, "grad_norm": 0.11711203306913376, "learning_rate": 0.002, "loss": 2.3659, "step": 34460 }, { "epoch": 0.13325138006216078, "grad_norm": 0.12866175174713135, "learning_rate": 0.002, "loss": 2.3761, "step": 34470 }, { "epoch": 0.13329003726554406, "grad_norm": 0.10368845611810684, "learning_rate": 0.002, "loss": 2.3693, "step": 34480 }, { "epoch": 0.13332869446892734, "grad_norm": 0.0972469374537468, "learning_rate": 0.002, "loss": 2.3631, "step": 34490 }, { "epoch": 0.13336735167231062, "grad_norm": 0.09679017961025238, "learning_rate": 0.002, "loss": 2.3524, "step": 34500 }, { "epoch": 0.1334060088756939, "grad_norm": 0.11398512870073318, "learning_rate": 0.002, "loss": 2.3746, "step": 34510 }, { "epoch": 0.13344466607907718, "grad_norm": 0.10731250047683716, "learning_rate": 0.002, "loss": 2.3725, "step": 34520 }, { "epoch": 0.13348332328246046, "grad_norm": 0.12601740658283234, "learning_rate": 0.002, "loss": 2.3568, "step": 34530 }, { "epoch": 0.13352198048584374, "grad_norm": 0.10348492115736008, "learning_rate": 0.002, "loss": 2.3512, "step": 34540 }, { "epoch": 0.13356063768922702, "grad_norm": 0.09467864781618118, "learning_rate": 0.002, "loss": 2.383, "step": 34550 }, { "epoch": 0.1335992948926103, "grad_norm": 0.1319250464439392, "learning_rate": 0.002, "loss": 2.3697, "step": 34560 }, { "epoch": 0.13363795209599358, "grad_norm": 0.09204111993312836, "learning_rate": 0.002, "loss": 2.3801, "step": 34570 }, { "epoch": 0.13367660929937686, "grad_norm": 0.09467501938343048, "learning_rate": 0.002, "loss": 2.3692, "step": 34580 }, { "epoch": 0.1337152665027601, "grad_norm": 0.1087702140212059, "learning_rate": 0.002, "loss": 2.3724, "step": 34590 }, { "epoch": 0.1337539237061434, "grad_norm": 0.10382219403982162, "learning_rate": 0.002, "loss": 2.3799, "step": 34600 }, { "epoch": 0.13379258090952667, "grad_norm": 0.14096559584140778, "learning_rate": 0.002, "loss": 2.3712, "step": 34610 }, { "epoch": 0.13383123811290995, "grad_norm": 0.10706502944231033, "learning_rate": 0.002, "loss": 2.3521, "step": 34620 }, { "epoch": 0.13386989531629323, "grad_norm": 0.10187694430351257, "learning_rate": 0.002, "loss": 2.3647, "step": 34630 }, { "epoch": 0.1339085525196765, "grad_norm": 0.10945390164852142, "learning_rate": 0.002, "loss": 2.3572, "step": 34640 }, { "epoch": 0.1339472097230598, "grad_norm": 0.10661990195512772, "learning_rate": 0.002, "loss": 2.3674, "step": 34650 }, { "epoch": 0.13398586692644307, "grad_norm": 0.12462542206048965, "learning_rate": 0.002, "loss": 2.3747, "step": 34660 }, { "epoch": 0.13402452412982635, "grad_norm": 0.13518981635570526, "learning_rate": 0.002, "loss": 2.3543, "step": 34670 }, { "epoch": 0.13406318133320963, "grad_norm": 0.11695235967636108, "learning_rate": 0.002, "loss": 2.374, "step": 34680 }, { "epoch": 0.1341018385365929, "grad_norm": 0.10137923061847687, "learning_rate": 0.002, "loss": 2.3561, "step": 34690 }, { "epoch": 0.1341404957399762, "grad_norm": 0.09696846455335617, "learning_rate": 0.002, "loss": 2.3584, "step": 34700 }, { "epoch": 0.13417915294335947, "grad_norm": 0.10143442451953888, "learning_rate": 0.002, "loss": 2.3766, "step": 34710 }, { "epoch": 0.13421781014674275, "grad_norm": 0.11306238174438477, "learning_rate": 0.002, "loss": 2.3537, "step": 34720 }, { "epoch": 0.13425646735012603, "grad_norm": 0.10919847339391708, "learning_rate": 0.002, "loss": 2.3618, "step": 34730 }, { "epoch": 0.1342951245535093, "grad_norm": 0.09910539537668228, "learning_rate": 0.002, "loss": 2.3818, "step": 34740 }, { "epoch": 0.1343337817568926, "grad_norm": 0.1374046355485916, "learning_rate": 0.002, "loss": 2.3749, "step": 34750 }, { "epoch": 0.13437243896027587, "grad_norm": 0.10045044869184494, "learning_rate": 0.002, "loss": 2.3878, "step": 34760 }, { "epoch": 0.13441109616365915, "grad_norm": 0.11504543572664261, "learning_rate": 0.002, "loss": 2.38, "step": 34770 }, { "epoch": 0.1344497533670424, "grad_norm": 0.10948486626148224, "learning_rate": 0.002, "loss": 2.3734, "step": 34780 }, { "epoch": 0.13448841057042568, "grad_norm": 0.10025618225336075, "learning_rate": 0.002, "loss": 2.3583, "step": 34790 }, { "epoch": 0.13452706777380896, "grad_norm": 0.0966828241944313, "learning_rate": 0.002, "loss": 2.3668, "step": 34800 }, { "epoch": 0.13456572497719224, "grad_norm": 0.11460334807634354, "learning_rate": 0.002, "loss": 2.371, "step": 34810 }, { "epoch": 0.13460438218057552, "grad_norm": 0.11981458216905594, "learning_rate": 0.002, "loss": 2.3651, "step": 34820 }, { "epoch": 0.1346430393839588, "grad_norm": 0.09813759475946426, "learning_rate": 0.002, "loss": 2.3641, "step": 34830 }, { "epoch": 0.13468169658734208, "grad_norm": 0.14167159795761108, "learning_rate": 0.002, "loss": 2.365, "step": 34840 }, { "epoch": 0.13472035379072536, "grad_norm": 0.11069276183843613, "learning_rate": 0.002, "loss": 2.3705, "step": 34850 }, { "epoch": 0.13475901099410864, "grad_norm": 0.11259116232395172, "learning_rate": 0.002, "loss": 2.382, "step": 34860 }, { "epoch": 0.13479766819749192, "grad_norm": 0.10852546244859695, "learning_rate": 0.002, "loss": 2.3708, "step": 34870 }, { "epoch": 0.1348363254008752, "grad_norm": 0.11632886528968811, "learning_rate": 0.002, "loss": 2.3615, "step": 34880 }, { "epoch": 0.13487498260425848, "grad_norm": 0.09457679092884064, "learning_rate": 0.002, "loss": 2.3743, "step": 34890 }, { "epoch": 0.13491363980764176, "grad_norm": 0.09845416992902756, "learning_rate": 0.002, "loss": 2.3732, "step": 34900 }, { "epoch": 0.13495229701102504, "grad_norm": 0.11023177206516266, "learning_rate": 0.002, "loss": 2.3742, "step": 34910 }, { "epoch": 0.13499095421440832, "grad_norm": 0.09931118786334991, "learning_rate": 0.002, "loss": 2.3652, "step": 34920 }, { "epoch": 0.1350296114177916, "grad_norm": 0.10401184856891632, "learning_rate": 0.002, "loss": 2.3664, "step": 34930 }, { "epoch": 0.13506826862117488, "grad_norm": 0.1214630976319313, "learning_rate": 0.002, "loss": 2.3821, "step": 34940 }, { "epoch": 0.13510692582455816, "grad_norm": 0.10948102176189423, "learning_rate": 0.002, "loss": 2.3682, "step": 34950 }, { "epoch": 0.1351455830279414, "grad_norm": 0.10995184630155563, "learning_rate": 0.002, "loss": 2.3545, "step": 34960 }, { "epoch": 0.1351842402313247, "grad_norm": 0.10805307328701019, "learning_rate": 0.002, "loss": 2.369, "step": 34970 }, { "epoch": 0.13522289743470797, "grad_norm": 0.10571695864200592, "learning_rate": 0.002, "loss": 2.365, "step": 34980 }, { "epoch": 0.13526155463809125, "grad_norm": 0.11283328384160995, "learning_rate": 0.002, "loss": 2.3714, "step": 34990 }, { "epoch": 0.13530021184147453, "grad_norm": 0.12748633325099945, "learning_rate": 0.002, "loss": 2.3749, "step": 35000 }, { "epoch": 0.1353388690448578, "grad_norm": 0.11866314709186554, "learning_rate": 0.002, "loss": 2.3589, "step": 35010 }, { "epoch": 0.1353775262482411, "grad_norm": 0.11615738272666931, "learning_rate": 0.002, "loss": 2.3731, "step": 35020 }, { "epoch": 0.13541618345162437, "grad_norm": 0.09199300408363342, "learning_rate": 0.002, "loss": 2.3664, "step": 35030 }, { "epoch": 0.13545484065500765, "grad_norm": 0.10658486187458038, "learning_rate": 0.002, "loss": 2.3699, "step": 35040 }, { "epoch": 0.13549349785839093, "grad_norm": 0.1143282800912857, "learning_rate": 0.002, "loss": 2.352, "step": 35050 }, { "epoch": 0.1355321550617742, "grad_norm": 0.11966723948717117, "learning_rate": 0.002, "loss": 2.3598, "step": 35060 }, { "epoch": 0.1355708122651575, "grad_norm": 0.10665853321552277, "learning_rate": 0.002, "loss": 2.3687, "step": 35070 }, { "epoch": 0.13560946946854077, "grad_norm": 0.10373278707265854, "learning_rate": 0.002, "loss": 2.3714, "step": 35080 }, { "epoch": 0.13564812667192405, "grad_norm": 0.12308533489704132, "learning_rate": 0.002, "loss": 2.3651, "step": 35090 }, { "epoch": 0.13568678387530733, "grad_norm": 0.09434260427951813, "learning_rate": 0.002, "loss": 2.3532, "step": 35100 }, { "epoch": 0.1357254410786906, "grad_norm": 0.108939990401268, "learning_rate": 0.002, "loss": 2.3862, "step": 35110 }, { "epoch": 0.1357640982820739, "grad_norm": 0.09926486015319824, "learning_rate": 0.002, "loss": 2.3693, "step": 35120 }, { "epoch": 0.13580275548545717, "grad_norm": 0.10864213854074478, "learning_rate": 0.002, "loss": 2.3706, "step": 35130 }, { "epoch": 0.13584141268884045, "grad_norm": 0.10281189531087875, "learning_rate": 0.002, "loss": 2.3615, "step": 35140 }, { "epoch": 0.1358800698922237, "grad_norm": 0.11445096880197525, "learning_rate": 0.002, "loss": 2.3807, "step": 35150 }, { "epoch": 0.13591872709560698, "grad_norm": 0.10199040919542313, "learning_rate": 0.002, "loss": 2.3579, "step": 35160 }, { "epoch": 0.13595738429899026, "grad_norm": 0.10905351489782333, "learning_rate": 0.002, "loss": 2.3754, "step": 35170 }, { "epoch": 0.13599604150237354, "grad_norm": 0.09866225719451904, "learning_rate": 0.002, "loss": 2.3523, "step": 35180 }, { "epoch": 0.13603469870575682, "grad_norm": 0.12357515841722488, "learning_rate": 0.002, "loss": 2.363, "step": 35190 }, { "epoch": 0.1360733559091401, "grad_norm": 0.1316716969013214, "learning_rate": 0.002, "loss": 2.366, "step": 35200 }, { "epoch": 0.13611201311252338, "grad_norm": 0.11268094182014465, "learning_rate": 0.002, "loss": 2.3734, "step": 35210 }, { "epoch": 0.13615067031590666, "grad_norm": 0.1085466742515564, "learning_rate": 0.002, "loss": 2.3703, "step": 35220 }, { "epoch": 0.13618932751928994, "grad_norm": 0.11580248922109604, "learning_rate": 0.002, "loss": 2.3818, "step": 35230 }, { "epoch": 0.13622798472267322, "grad_norm": 0.09953868389129639, "learning_rate": 0.002, "loss": 2.3716, "step": 35240 }, { "epoch": 0.1362666419260565, "grad_norm": 0.10986287146806717, "learning_rate": 0.002, "loss": 2.3807, "step": 35250 }, { "epoch": 0.13630529912943978, "grad_norm": 0.13953445851802826, "learning_rate": 0.002, "loss": 2.3574, "step": 35260 }, { "epoch": 0.13634395633282306, "grad_norm": 0.10694142431020737, "learning_rate": 0.002, "loss": 2.3793, "step": 35270 }, { "epoch": 0.13638261353620634, "grad_norm": 0.1019204631447792, "learning_rate": 0.002, "loss": 2.3661, "step": 35280 }, { "epoch": 0.13642127073958962, "grad_norm": 0.11643867939710617, "learning_rate": 0.002, "loss": 2.3519, "step": 35290 }, { "epoch": 0.1364599279429729, "grad_norm": 0.10843092203140259, "learning_rate": 0.002, "loss": 2.3634, "step": 35300 }, { "epoch": 0.13649858514635618, "grad_norm": 0.11738748848438263, "learning_rate": 0.002, "loss": 2.365, "step": 35310 }, { "epoch": 0.13653724234973946, "grad_norm": 0.10633944720029831, "learning_rate": 0.002, "loss": 2.3665, "step": 35320 }, { "epoch": 0.13657589955312274, "grad_norm": 0.11410286277532578, "learning_rate": 0.002, "loss": 2.3729, "step": 35330 }, { "epoch": 0.136614556756506, "grad_norm": 0.11511071771383286, "learning_rate": 0.002, "loss": 2.3578, "step": 35340 }, { "epoch": 0.13665321395988927, "grad_norm": 0.10633667558431625, "learning_rate": 0.002, "loss": 2.3657, "step": 35350 }, { "epoch": 0.13669187116327255, "grad_norm": 0.12423171103000641, "learning_rate": 0.002, "loss": 2.3771, "step": 35360 }, { "epoch": 0.13673052836665583, "grad_norm": 0.10752137005329132, "learning_rate": 0.002, "loss": 2.3747, "step": 35370 }, { "epoch": 0.13676918557003911, "grad_norm": 0.1084589809179306, "learning_rate": 0.002, "loss": 2.3683, "step": 35380 }, { "epoch": 0.1368078427734224, "grad_norm": 0.0935010313987732, "learning_rate": 0.002, "loss": 2.3678, "step": 35390 }, { "epoch": 0.13684649997680567, "grad_norm": 0.1256430596113205, "learning_rate": 0.002, "loss": 2.3655, "step": 35400 }, { "epoch": 0.13688515718018895, "grad_norm": 0.11605731397867203, "learning_rate": 0.002, "loss": 2.3828, "step": 35410 }, { "epoch": 0.13692381438357223, "grad_norm": 0.12058790028095245, "learning_rate": 0.002, "loss": 2.3725, "step": 35420 }, { "epoch": 0.13696247158695551, "grad_norm": 0.12661702930927277, "learning_rate": 0.002, "loss": 2.3644, "step": 35430 }, { "epoch": 0.1370011287903388, "grad_norm": 0.09687843918800354, "learning_rate": 0.002, "loss": 2.3704, "step": 35440 }, { "epoch": 0.13703978599372207, "grad_norm": 0.11837822943925858, "learning_rate": 0.002, "loss": 2.3673, "step": 35450 }, { "epoch": 0.13707844319710535, "grad_norm": 0.11290821433067322, "learning_rate": 0.002, "loss": 2.3684, "step": 35460 }, { "epoch": 0.13711710040048863, "grad_norm": 0.11151868104934692, "learning_rate": 0.002, "loss": 2.3849, "step": 35470 }, { "epoch": 0.13715575760387191, "grad_norm": 0.13255830109119415, "learning_rate": 0.002, "loss": 2.3654, "step": 35480 }, { "epoch": 0.1371944148072552, "grad_norm": 0.09891264140605927, "learning_rate": 0.002, "loss": 2.3509, "step": 35490 }, { "epoch": 0.13723307201063847, "grad_norm": 0.11972101032733917, "learning_rate": 0.002, "loss": 2.3627, "step": 35500 }, { "epoch": 0.13727172921402175, "grad_norm": 0.11530592292547226, "learning_rate": 0.002, "loss": 2.3613, "step": 35510 }, { "epoch": 0.137310386417405, "grad_norm": 0.10842709243297577, "learning_rate": 0.002, "loss": 2.3698, "step": 35520 }, { "epoch": 0.13734904362078829, "grad_norm": 0.11317627876996994, "learning_rate": 0.002, "loss": 2.379, "step": 35530 }, { "epoch": 0.13738770082417157, "grad_norm": 0.13239452242851257, "learning_rate": 0.002, "loss": 2.3725, "step": 35540 }, { "epoch": 0.13742635802755485, "grad_norm": 0.12214444577693939, "learning_rate": 0.002, "loss": 2.3694, "step": 35550 }, { "epoch": 0.13746501523093813, "grad_norm": 0.12387856096029282, "learning_rate": 0.002, "loss": 2.39, "step": 35560 }, { "epoch": 0.1375036724343214, "grad_norm": 0.10341054946184158, "learning_rate": 0.002, "loss": 2.3709, "step": 35570 }, { "epoch": 0.13754232963770469, "grad_norm": 0.1387908160686493, "learning_rate": 0.002, "loss": 2.3667, "step": 35580 }, { "epoch": 0.13758098684108797, "grad_norm": 0.11028929054737091, "learning_rate": 0.002, "loss": 2.3686, "step": 35590 }, { "epoch": 0.13761964404447125, "grad_norm": 0.10659206658601761, "learning_rate": 0.002, "loss": 2.3689, "step": 35600 }, { "epoch": 0.13765830124785453, "grad_norm": 0.1167743057012558, "learning_rate": 0.002, "loss": 2.3729, "step": 35610 }, { "epoch": 0.1376969584512378, "grad_norm": 0.1232951208949089, "learning_rate": 0.002, "loss": 2.3617, "step": 35620 }, { "epoch": 0.13773561565462109, "grad_norm": 0.118013396859169, "learning_rate": 0.002, "loss": 2.3732, "step": 35630 }, { "epoch": 0.13777427285800437, "grad_norm": 0.1103489100933075, "learning_rate": 0.002, "loss": 2.3585, "step": 35640 }, { "epoch": 0.13781293006138765, "grad_norm": 0.10421431809663773, "learning_rate": 0.002, "loss": 2.3725, "step": 35650 }, { "epoch": 0.13785158726477093, "grad_norm": 0.11698539555072784, "learning_rate": 0.002, "loss": 2.3649, "step": 35660 }, { "epoch": 0.1378902444681542, "grad_norm": 0.08978178352117538, "learning_rate": 0.002, "loss": 2.3687, "step": 35670 }, { "epoch": 0.13792890167153748, "grad_norm": 0.12872745096683502, "learning_rate": 0.002, "loss": 2.3584, "step": 35680 }, { "epoch": 0.13796755887492076, "grad_norm": 0.10976526886224747, "learning_rate": 0.002, "loss": 2.3677, "step": 35690 }, { "epoch": 0.13800621607830404, "grad_norm": 0.13008564710617065, "learning_rate": 0.002, "loss": 2.3638, "step": 35700 }, { "epoch": 0.1380448732816873, "grad_norm": 0.10386268049478531, "learning_rate": 0.002, "loss": 2.3568, "step": 35710 }, { "epoch": 0.13808353048507058, "grad_norm": 0.11616958677768707, "learning_rate": 0.002, "loss": 2.3685, "step": 35720 }, { "epoch": 0.13812218768845386, "grad_norm": 0.13968725502490997, "learning_rate": 0.002, "loss": 2.3657, "step": 35730 }, { "epoch": 0.13816084489183714, "grad_norm": 0.11268781870603561, "learning_rate": 0.002, "loss": 2.3713, "step": 35740 }, { "epoch": 0.13819950209522042, "grad_norm": 0.1011965274810791, "learning_rate": 0.002, "loss": 2.3644, "step": 35750 }, { "epoch": 0.1382381592986037, "grad_norm": 0.09758837521076202, "learning_rate": 0.002, "loss": 2.3621, "step": 35760 }, { "epoch": 0.13827681650198698, "grad_norm": 0.11418548226356506, "learning_rate": 0.002, "loss": 2.3667, "step": 35770 }, { "epoch": 0.13831547370537026, "grad_norm": 0.0981953963637352, "learning_rate": 0.002, "loss": 2.348, "step": 35780 }, { "epoch": 0.13835413090875354, "grad_norm": 0.10302402824163437, "learning_rate": 0.002, "loss": 2.3665, "step": 35790 }, { "epoch": 0.13839278811213682, "grad_norm": 0.14570669829845428, "learning_rate": 0.002, "loss": 2.3531, "step": 35800 }, { "epoch": 0.1384314453155201, "grad_norm": 0.1252729445695877, "learning_rate": 0.002, "loss": 2.3825, "step": 35810 }, { "epoch": 0.13847010251890338, "grad_norm": 0.10477358847856522, "learning_rate": 0.002, "loss": 2.3634, "step": 35820 }, { "epoch": 0.13850875972228666, "grad_norm": 0.10391885787248611, "learning_rate": 0.002, "loss": 2.3738, "step": 35830 }, { "epoch": 0.13854741692566994, "grad_norm": 0.11140663921833038, "learning_rate": 0.002, "loss": 2.3673, "step": 35840 }, { "epoch": 0.13858607412905322, "grad_norm": 0.11339595913887024, "learning_rate": 0.002, "loss": 2.3738, "step": 35850 }, { "epoch": 0.1386247313324365, "grad_norm": 0.11354203522205353, "learning_rate": 0.002, "loss": 2.3675, "step": 35860 }, { "epoch": 0.13866338853581978, "grad_norm": 0.10377524793148041, "learning_rate": 0.002, "loss": 2.3651, "step": 35870 }, { "epoch": 0.13870204573920306, "grad_norm": 0.11121828854084015, "learning_rate": 0.002, "loss": 2.394, "step": 35880 }, { "epoch": 0.1387407029425863, "grad_norm": 0.107803113758564, "learning_rate": 0.002, "loss": 2.3554, "step": 35890 }, { "epoch": 0.1387793601459696, "grad_norm": 0.12699300050735474, "learning_rate": 0.002, "loss": 2.3631, "step": 35900 }, { "epoch": 0.13881801734935287, "grad_norm": 0.11092286556959152, "learning_rate": 0.002, "loss": 2.376, "step": 35910 }, { "epoch": 0.13885667455273615, "grad_norm": 0.10627099871635437, "learning_rate": 0.002, "loss": 2.3685, "step": 35920 }, { "epoch": 0.13889533175611943, "grad_norm": 0.09759150445461273, "learning_rate": 0.002, "loss": 2.3427, "step": 35930 }, { "epoch": 0.1389339889595027, "grad_norm": 0.134559765458107, "learning_rate": 0.002, "loss": 2.3538, "step": 35940 }, { "epoch": 0.138972646162886, "grad_norm": 0.10734544694423676, "learning_rate": 0.002, "loss": 2.3753, "step": 35950 }, { "epoch": 0.13901130336626927, "grad_norm": 0.09406106173992157, "learning_rate": 0.002, "loss": 2.3614, "step": 35960 }, { "epoch": 0.13904996056965255, "grad_norm": 0.09491714090108871, "learning_rate": 0.002, "loss": 2.3613, "step": 35970 }, { "epoch": 0.13908861777303583, "grad_norm": 0.1088794469833374, "learning_rate": 0.002, "loss": 2.3645, "step": 35980 }, { "epoch": 0.1391272749764191, "grad_norm": 0.11808019876480103, "learning_rate": 0.002, "loss": 2.3747, "step": 35990 }, { "epoch": 0.1391659321798024, "grad_norm": 0.11798539012670517, "learning_rate": 0.002, "loss": 2.3708, "step": 36000 }, { "epoch": 0.13920458938318567, "grad_norm": 0.11974812299013138, "learning_rate": 0.002, "loss": 2.373, "step": 36010 }, { "epoch": 0.13924324658656895, "grad_norm": 0.10560175031423569, "learning_rate": 0.002, "loss": 2.3733, "step": 36020 }, { "epoch": 0.13928190378995223, "grad_norm": 0.10788323730230331, "learning_rate": 0.002, "loss": 2.3629, "step": 36030 }, { "epoch": 0.1393205609933355, "grad_norm": 0.10671960562467575, "learning_rate": 0.002, "loss": 2.3655, "step": 36040 }, { "epoch": 0.1393592181967188, "grad_norm": 0.10810962319374084, "learning_rate": 0.002, "loss": 2.3586, "step": 36050 }, { "epoch": 0.13939787540010207, "grad_norm": 0.14038975536823273, "learning_rate": 0.002, "loss": 2.3456, "step": 36060 }, { "epoch": 0.13943653260348535, "grad_norm": 0.11312098801136017, "learning_rate": 0.002, "loss": 2.3605, "step": 36070 }, { "epoch": 0.1394751898068686, "grad_norm": 0.11241593956947327, "learning_rate": 0.002, "loss": 2.3753, "step": 36080 }, { "epoch": 0.13951384701025188, "grad_norm": 0.11558341234922409, "learning_rate": 0.002, "loss": 2.3671, "step": 36090 }, { "epoch": 0.13955250421363516, "grad_norm": 0.11133956909179688, "learning_rate": 0.002, "loss": 2.3725, "step": 36100 }, { "epoch": 0.13959116141701844, "grad_norm": 0.10750514268875122, "learning_rate": 0.002, "loss": 2.3552, "step": 36110 }, { "epoch": 0.13962981862040172, "grad_norm": 0.1164507046341896, "learning_rate": 0.002, "loss": 2.3802, "step": 36120 }, { "epoch": 0.139668475823785, "grad_norm": 0.10103576630353928, "learning_rate": 0.002, "loss": 2.3707, "step": 36130 }, { "epoch": 0.13970713302716828, "grad_norm": 0.12037768959999084, "learning_rate": 0.002, "loss": 2.372, "step": 36140 }, { "epoch": 0.13974579023055156, "grad_norm": 0.0958688035607338, "learning_rate": 0.002, "loss": 2.3747, "step": 36150 }, { "epoch": 0.13978444743393484, "grad_norm": 0.1011674776673317, "learning_rate": 0.002, "loss": 2.3549, "step": 36160 }, { "epoch": 0.13982310463731812, "grad_norm": 0.12649409472942352, "learning_rate": 0.002, "loss": 2.366, "step": 36170 }, { "epoch": 0.1398617618407014, "grad_norm": 0.10558871924877167, "learning_rate": 0.002, "loss": 2.3749, "step": 36180 }, { "epoch": 0.13990041904408468, "grad_norm": 0.1151675283908844, "learning_rate": 0.002, "loss": 2.3606, "step": 36190 }, { "epoch": 0.13993907624746796, "grad_norm": 0.09600713849067688, "learning_rate": 0.002, "loss": 2.3597, "step": 36200 }, { "epoch": 0.13997773345085124, "grad_norm": 0.1276940405368805, "learning_rate": 0.002, "loss": 2.3732, "step": 36210 }, { "epoch": 0.14001639065423452, "grad_norm": 0.11435941606760025, "learning_rate": 0.002, "loss": 2.3576, "step": 36220 }, { "epoch": 0.1400550478576178, "grad_norm": 0.11217609792947769, "learning_rate": 0.002, "loss": 2.3648, "step": 36230 }, { "epoch": 0.14009370506100108, "grad_norm": 0.09375467151403427, "learning_rate": 0.002, "loss": 2.3704, "step": 36240 }, { "epoch": 0.14013236226438436, "grad_norm": 0.1384739875793457, "learning_rate": 0.002, "loss": 2.3657, "step": 36250 }, { "epoch": 0.1401710194677676, "grad_norm": 0.11851988732814789, "learning_rate": 0.002, "loss": 2.3706, "step": 36260 }, { "epoch": 0.1402096766711509, "grad_norm": 0.1143161803483963, "learning_rate": 0.002, "loss": 2.3688, "step": 36270 }, { "epoch": 0.14024833387453417, "grad_norm": 0.09635128825902939, "learning_rate": 0.002, "loss": 2.3536, "step": 36280 }, { "epoch": 0.14028699107791745, "grad_norm": 0.092675119638443, "learning_rate": 0.002, "loss": 2.3677, "step": 36290 }, { "epoch": 0.14032564828130073, "grad_norm": 0.10740847885608673, "learning_rate": 0.002, "loss": 2.3663, "step": 36300 }, { "epoch": 0.140364305484684, "grad_norm": 0.1300441473722458, "learning_rate": 0.002, "loss": 2.3706, "step": 36310 }, { "epoch": 0.1404029626880673, "grad_norm": 0.11544227600097656, "learning_rate": 0.002, "loss": 2.3693, "step": 36320 }, { "epoch": 0.14044161989145057, "grad_norm": 0.12911774218082428, "learning_rate": 0.002, "loss": 2.3575, "step": 36330 }, { "epoch": 0.14048027709483385, "grad_norm": 0.10501087456941605, "learning_rate": 0.002, "loss": 2.3596, "step": 36340 }, { "epoch": 0.14051893429821713, "grad_norm": 0.11758385598659515, "learning_rate": 0.002, "loss": 2.363, "step": 36350 }, { "epoch": 0.1405575915016004, "grad_norm": 0.11279705911874771, "learning_rate": 0.002, "loss": 2.363, "step": 36360 }, { "epoch": 0.1405962487049837, "grad_norm": 0.11421018093824387, "learning_rate": 0.002, "loss": 2.3634, "step": 36370 }, { "epoch": 0.14063490590836697, "grad_norm": 0.10318736732006073, "learning_rate": 0.002, "loss": 2.3807, "step": 36380 }, { "epoch": 0.14067356311175025, "grad_norm": 0.10576699674129486, "learning_rate": 0.002, "loss": 2.377, "step": 36390 }, { "epoch": 0.14071222031513353, "grad_norm": 0.10628781467676163, "learning_rate": 0.002, "loss": 2.3837, "step": 36400 }, { "epoch": 0.1407508775185168, "grad_norm": 0.09710478782653809, "learning_rate": 0.002, "loss": 2.3691, "step": 36410 }, { "epoch": 0.1407895347219001, "grad_norm": 0.11761929094791412, "learning_rate": 0.002, "loss": 2.3603, "step": 36420 }, { "epoch": 0.14082819192528337, "grad_norm": 0.11446376144886017, "learning_rate": 0.002, "loss": 2.3745, "step": 36430 }, { "epoch": 0.14086684912866665, "grad_norm": 0.10575418919324875, "learning_rate": 0.002, "loss": 2.3648, "step": 36440 }, { "epoch": 0.1409055063320499, "grad_norm": 0.10004209727048874, "learning_rate": 0.002, "loss": 2.3669, "step": 36450 }, { "epoch": 0.14094416353543318, "grad_norm": 0.11382671445608139, "learning_rate": 0.002, "loss": 2.3631, "step": 36460 }, { "epoch": 0.14098282073881646, "grad_norm": 0.11979065090417862, "learning_rate": 0.002, "loss": 2.3604, "step": 36470 }, { "epoch": 0.14102147794219974, "grad_norm": 0.09754132479429245, "learning_rate": 0.002, "loss": 2.3672, "step": 36480 }, { "epoch": 0.14106013514558302, "grad_norm": 0.1170993447303772, "learning_rate": 0.002, "loss": 2.3549, "step": 36490 }, { "epoch": 0.1410987923489663, "grad_norm": 0.09756185859441757, "learning_rate": 0.002, "loss": 2.3607, "step": 36500 }, { "epoch": 0.14113744955234958, "grad_norm": 0.12066052854061127, "learning_rate": 0.002, "loss": 2.3645, "step": 36510 }, { "epoch": 0.14117610675573286, "grad_norm": 0.09300950914621353, "learning_rate": 0.002, "loss": 2.3533, "step": 36520 }, { "epoch": 0.14121476395911614, "grad_norm": 0.10591083765029907, "learning_rate": 0.002, "loss": 2.3694, "step": 36530 }, { "epoch": 0.14125342116249942, "grad_norm": 0.11248134076595306, "learning_rate": 0.002, "loss": 2.3586, "step": 36540 }, { "epoch": 0.1412920783658827, "grad_norm": 0.1965792328119278, "learning_rate": 0.002, "loss": 2.3724, "step": 36550 }, { "epoch": 0.14133073556926598, "grad_norm": 0.11744797229766846, "learning_rate": 0.002, "loss": 2.3771, "step": 36560 }, { "epoch": 0.14136939277264926, "grad_norm": 0.10491868853569031, "learning_rate": 0.002, "loss": 2.3773, "step": 36570 }, { "epoch": 0.14140804997603254, "grad_norm": 0.11006952077150345, "learning_rate": 0.002, "loss": 2.3676, "step": 36580 }, { "epoch": 0.14144670717941582, "grad_norm": 0.10664433985948563, "learning_rate": 0.002, "loss": 2.3863, "step": 36590 }, { "epoch": 0.1414853643827991, "grad_norm": 0.10997500270605087, "learning_rate": 0.002, "loss": 2.3784, "step": 36600 }, { "epoch": 0.14152402158618238, "grad_norm": 0.10684429109096527, "learning_rate": 0.002, "loss": 2.3668, "step": 36610 }, { "epoch": 0.14156267878956566, "grad_norm": 0.11024966835975647, "learning_rate": 0.002, "loss": 2.3683, "step": 36620 }, { "epoch": 0.1416013359929489, "grad_norm": 0.10300753265619278, "learning_rate": 0.002, "loss": 2.3713, "step": 36630 }, { "epoch": 0.1416399931963322, "grad_norm": 0.2539750635623932, "learning_rate": 0.002, "loss": 2.373, "step": 36640 }, { "epoch": 0.14167865039971547, "grad_norm": 0.11048327386379242, "learning_rate": 0.002, "loss": 2.368, "step": 36650 }, { "epoch": 0.14171730760309875, "grad_norm": 0.10366856306791306, "learning_rate": 0.002, "loss": 2.3658, "step": 36660 }, { "epoch": 0.14175596480648203, "grad_norm": 0.09625563025474548, "learning_rate": 0.002, "loss": 2.3577, "step": 36670 }, { "epoch": 0.1417946220098653, "grad_norm": 0.10819088667631149, "learning_rate": 0.002, "loss": 2.37, "step": 36680 }, { "epoch": 0.1418332792132486, "grad_norm": 0.11163844913244247, "learning_rate": 0.002, "loss": 2.3599, "step": 36690 }, { "epoch": 0.14187193641663187, "grad_norm": 0.13152261078357697, "learning_rate": 0.002, "loss": 2.3553, "step": 36700 }, { "epoch": 0.14191059362001515, "grad_norm": 0.11616198718547821, "learning_rate": 0.002, "loss": 2.3623, "step": 36710 }, { "epoch": 0.14194925082339843, "grad_norm": 0.11571443825960159, "learning_rate": 0.002, "loss": 2.3592, "step": 36720 }, { "epoch": 0.1419879080267817, "grad_norm": 0.1276528388261795, "learning_rate": 0.002, "loss": 2.3616, "step": 36730 }, { "epoch": 0.142026565230165, "grad_norm": 0.11477814614772797, "learning_rate": 0.002, "loss": 2.3555, "step": 36740 }, { "epoch": 0.14206522243354827, "grad_norm": 0.11127634346485138, "learning_rate": 0.002, "loss": 2.3663, "step": 36750 }, { "epoch": 0.14210387963693155, "grad_norm": 0.11378784477710724, "learning_rate": 0.002, "loss": 2.3691, "step": 36760 }, { "epoch": 0.14214253684031483, "grad_norm": 0.09223242849111557, "learning_rate": 0.002, "loss": 2.3694, "step": 36770 }, { "epoch": 0.1421811940436981, "grad_norm": 0.1084481030702591, "learning_rate": 0.002, "loss": 2.3683, "step": 36780 }, { "epoch": 0.1422198512470814, "grad_norm": 0.12222377210855484, "learning_rate": 0.002, "loss": 2.3604, "step": 36790 }, { "epoch": 0.14225850845046467, "grad_norm": 0.10207165777683258, "learning_rate": 0.002, "loss": 2.3654, "step": 36800 }, { "epoch": 0.14229716565384795, "grad_norm": 0.13364824652671814, "learning_rate": 0.002, "loss": 2.348, "step": 36810 }, { "epoch": 0.1423358228572312, "grad_norm": 0.10581555217504501, "learning_rate": 0.002, "loss": 2.3753, "step": 36820 }, { "epoch": 0.14237448006061448, "grad_norm": 0.10019327700138092, "learning_rate": 0.002, "loss": 2.3839, "step": 36830 }, { "epoch": 0.14241313726399776, "grad_norm": 0.11175578832626343, "learning_rate": 0.002, "loss": 2.3673, "step": 36840 }, { "epoch": 0.14245179446738104, "grad_norm": 0.12977084517478943, "learning_rate": 0.002, "loss": 2.3664, "step": 36850 }, { "epoch": 0.14249045167076432, "grad_norm": 0.10484184324741364, "learning_rate": 0.002, "loss": 2.3688, "step": 36860 }, { "epoch": 0.1425291088741476, "grad_norm": 0.11148852109909058, "learning_rate": 0.002, "loss": 2.364, "step": 36870 }, { "epoch": 0.14256776607753088, "grad_norm": 0.12150207161903381, "learning_rate": 0.002, "loss": 2.3605, "step": 36880 }, { "epoch": 0.14260642328091416, "grad_norm": 0.3142203986644745, "learning_rate": 0.002, "loss": 2.3602, "step": 36890 }, { "epoch": 0.14264508048429744, "grad_norm": 0.1186809092760086, "learning_rate": 0.002, "loss": 2.358, "step": 36900 }, { "epoch": 0.14268373768768072, "grad_norm": 0.11978371441364288, "learning_rate": 0.002, "loss": 2.3744, "step": 36910 }, { "epoch": 0.142722394891064, "grad_norm": 0.11863017082214355, "learning_rate": 0.002, "loss": 2.3695, "step": 36920 }, { "epoch": 0.14276105209444728, "grad_norm": 0.09911804646253586, "learning_rate": 0.002, "loss": 2.3605, "step": 36930 }, { "epoch": 0.14279970929783056, "grad_norm": 0.10414470732212067, "learning_rate": 0.002, "loss": 2.3688, "step": 36940 }, { "epoch": 0.14283836650121384, "grad_norm": 0.11176794022321701, "learning_rate": 0.002, "loss": 2.3714, "step": 36950 }, { "epoch": 0.14287702370459712, "grad_norm": 0.09463711827993393, "learning_rate": 0.002, "loss": 2.3636, "step": 36960 }, { "epoch": 0.1429156809079804, "grad_norm": 0.10849887132644653, "learning_rate": 0.002, "loss": 2.3695, "step": 36970 }, { "epoch": 0.14295433811136368, "grad_norm": 0.11662127822637558, "learning_rate": 0.002, "loss": 2.3674, "step": 36980 }, { "epoch": 0.14299299531474696, "grad_norm": 0.10456910729408264, "learning_rate": 0.002, "loss": 2.3852, "step": 36990 }, { "epoch": 0.14303165251813021, "grad_norm": 0.09997577965259552, "learning_rate": 0.002, "loss": 2.3637, "step": 37000 }, { "epoch": 0.1430703097215135, "grad_norm": 0.09770150482654572, "learning_rate": 0.002, "loss": 2.3876, "step": 37010 }, { "epoch": 0.14310896692489677, "grad_norm": 0.10815806686878204, "learning_rate": 0.002, "loss": 2.3694, "step": 37020 }, { "epoch": 0.14314762412828005, "grad_norm": 0.10741622745990753, "learning_rate": 0.002, "loss": 2.3598, "step": 37030 }, { "epoch": 0.14318628133166333, "grad_norm": 0.09064080566167831, "learning_rate": 0.002, "loss": 2.3537, "step": 37040 }, { "epoch": 0.14322493853504661, "grad_norm": 0.12310798466205597, "learning_rate": 0.002, "loss": 2.3606, "step": 37050 }, { "epoch": 0.1432635957384299, "grad_norm": 0.11956330388784409, "learning_rate": 0.002, "loss": 2.3489, "step": 37060 }, { "epoch": 0.14330225294181317, "grad_norm": 0.09280236810445786, "learning_rate": 0.002, "loss": 2.3645, "step": 37070 }, { "epoch": 0.14334091014519645, "grad_norm": 0.10639739781618118, "learning_rate": 0.002, "loss": 2.3883, "step": 37080 }, { "epoch": 0.14337956734857973, "grad_norm": 0.10321924835443497, "learning_rate": 0.002, "loss": 2.3768, "step": 37090 }, { "epoch": 0.14341822455196301, "grad_norm": 0.09975964576005936, "learning_rate": 0.002, "loss": 2.3781, "step": 37100 }, { "epoch": 0.1434568817553463, "grad_norm": 0.11580091714859009, "learning_rate": 0.002, "loss": 2.3709, "step": 37110 }, { "epoch": 0.14349553895872957, "grad_norm": 0.10225492715835571, "learning_rate": 0.002, "loss": 2.3721, "step": 37120 }, { "epoch": 0.14353419616211285, "grad_norm": 0.10724704712629318, "learning_rate": 0.002, "loss": 2.3681, "step": 37130 }, { "epoch": 0.14357285336549613, "grad_norm": 0.09924058616161346, "learning_rate": 0.002, "loss": 2.3727, "step": 37140 }, { "epoch": 0.1436115105688794, "grad_norm": 0.11980696767568588, "learning_rate": 0.002, "loss": 2.3675, "step": 37150 }, { "epoch": 0.1436501677722627, "grad_norm": 0.11811673641204834, "learning_rate": 0.002, "loss": 2.3626, "step": 37160 }, { "epoch": 0.14368882497564597, "grad_norm": 0.11168906837701797, "learning_rate": 0.002, "loss": 2.3658, "step": 37170 }, { "epoch": 0.14372748217902925, "grad_norm": 0.09915437549352646, "learning_rate": 0.002, "loss": 2.3636, "step": 37180 }, { "epoch": 0.1437661393824125, "grad_norm": 0.13308192789554596, "learning_rate": 0.002, "loss": 2.3685, "step": 37190 }, { "epoch": 0.14380479658579579, "grad_norm": 0.11475107818841934, "learning_rate": 0.002, "loss": 2.383, "step": 37200 }, { "epoch": 0.14384345378917907, "grad_norm": 0.09859679639339447, "learning_rate": 0.002, "loss": 2.3706, "step": 37210 }, { "epoch": 0.14388211099256235, "grad_norm": 0.12095901370048523, "learning_rate": 0.002, "loss": 2.3663, "step": 37220 }, { "epoch": 0.14392076819594563, "grad_norm": 0.09938090294599533, "learning_rate": 0.002, "loss": 2.3652, "step": 37230 }, { "epoch": 0.1439594253993289, "grad_norm": 0.1443241834640503, "learning_rate": 0.002, "loss": 2.3554, "step": 37240 }, { "epoch": 0.14399808260271219, "grad_norm": 0.12056610733270645, "learning_rate": 0.002, "loss": 2.3541, "step": 37250 }, { "epoch": 0.14403673980609547, "grad_norm": 0.11923497915267944, "learning_rate": 0.002, "loss": 2.3644, "step": 37260 }, { "epoch": 0.14407539700947875, "grad_norm": 0.1033424586057663, "learning_rate": 0.002, "loss": 2.3597, "step": 37270 }, { "epoch": 0.14411405421286202, "grad_norm": 0.10121098160743713, "learning_rate": 0.002, "loss": 2.3768, "step": 37280 }, { "epoch": 0.1441527114162453, "grad_norm": 0.11377987265586853, "learning_rate": 0.002, "loss": 2.3491, "step": 37290 }, { "epoch": 0.14419136861962858, "grad_norm": 0.09906457364559174, "learning_rate": 0.002, "loss": 2.3795, "step": 37300 }, { "epoch": 0.14423002582301186, "grad_norm": 0.0996997058391571, "learning_rate": 0.002, "loss": 2.3718, "step": 37310 }, { "epoch": 0.14426868302639514, "grad_norm": 0.1009167730808258, "learning_rate": 0.002, "loss": 2.369, "step": 37320 }, { "epoch": 0.14430734022977842, "grad_norm": 0.11943599581718445, "learning_rate": 0.002, "loss": 2.37, "step": 37330 }, { "epoch": 0.1443459974331617, "grad_norm": 0.10313688963651657, "learning_rate": 0.002, "loss": 2.3703, "step": 37340 }, { "epoch": 0.14438465463654498, "grad_norm": 0.11535441875457764, "learning_rate": 0.002, "loss": 2.3588, "step": 37350 }, { "epoch": 0.14442331183992826, "grad_norm": 0.12095227837562561, "learning_rate": 0.002, "loss": 2.3724, "step": 37360 }, { "epoch": 0.14446196904331154, "grad_norm": 0.12098788470029831, "learning_rate": 0.002, "loss": 2.3628, "step": 37370 }, { "epoch": 0.1445006262466948, "grad_norm": 0.0900544747710228, "learning_rate": 0.002, "loss": 2.3721, "step": 37380 }, { "epoch": 0.14453928345007808, "grad_norm": 0.1258208453655243, "learning_rate": 0.002, "loss": 2.366, "step": 37390 }, { "epoch": 0.14457794065346136, "grad_norm": 0.10623008757829666, "learning_rate": 0.002, "loss": 2.3544, "step": 37400 }, { "epoch": 0.14461659785684464, "grad_norm": 0.11886344105005264, "learning_rate": 0.002, "loss": 2.3681, "step": 37410 }, { "epoch": 0.14465525506022792, "grad_norm": 0.12853644788265228, "learning_rate": 0.002, "loss": 2.3613, "step": 37420 }, { "epoch": 0.1446939122636112, "grad_norm": 0.1244492307305336, "learning_rate": 0.002, "loss": 2.3644, "step": 37430 }, { "epoch": 0.14473256946699448, "grad_norm": 0.10314949601888657, "learning_rate": 0.002, "loss": 2.3752, "step": 37440 }, { "epoch": 0.14477122667037776, "grad_norm": 0.1144338920712471, "learning_rate": 0.002, "loss": 2.3702, "step": 37450 }, { "epoch": 0.14480988387376104, "grad_norm": 0.09299468994140625, "learning_rate": 0.002, "loss": 2.3696, "step": 37460 }, { "epoch": 0.14484854107714432, "grad_norm": 0.12412026524543762, "learning_rate": 0.002, "loss": 2.3616, "step": 37470 }, { "epoch": 0.1448871982805276, "grad_norm": 0.12529881298542023, "learning_rate": 0.002, "loss": 2.3554, "step": 37480 }, { "epoch": 0.14492585548391088, "grad_norm": 0.11511372774839401, "learning_rate": 0.002, "loss": 2.3888, "step": 37490 }, { "epoch": 0.14496451268729416, "grad_norm": 0.1147739514708519, "learning_rate": 0.002, "loss": 2.3665, "step": 37500 }, { "epoch": 0.14500316989067744, "grad_norm": 0.08730924129486084, "learning_rate": 0.002, "loss": 2.3678, "step": 37510 }, { "epoch": 0.14504182709406072, "grad_norm": 0.11324899643659592, "learning_rate": 0.002, "loss": 2.3602, "step": 37520 }, { "epoch": 0.145080484297444, "grad_norm": 0.10321440547704697, "learning_rate": 0.002, "loss": 2.3519, "step": 37530 }, { "epoch": 0.14511914150082728, "grad_norm": 0.10298170894384384, "learning_rate": 0.002, "loss": 2.3675, "step": 37540 }, { "epoch": 0.14515779870421056, "grad_norm": 0.10055653750896454, "learning_rate": 0.002, "loss": 2.3652, "step": 37550 }, { "epoch": 0.1451964559075938, "grad_norm": 0.11173214018344879, "learning_rate": 0.002, "loss": 2.3775, "step": 37560 }, { "epoch": 0.1452351131109771, "grad_norm": 0.11200610548257828, "learning_rate": 0.002, "loss": 2.3571, "step": 37570 }, { "epoch": 0.14527377031436037, "grad_norm": 0.0937102884054184, "learning_rate": 0.002, "loss": 2.3699, "step": 37580 }, { "epoch": 0.14531242751774365, "grad_norm": 0.10634052008390427, "learning_rate": 0.002, "loss": 2.354, "step": 37590 }, { "epoch": 0.14535108472112693, "grad_norm": 0.10749258100986481, "learning_rate": 0.002, "loss": 2.36, "step": 37600 }, { "epoch": 0.1453897419245102, "grad_norm": 0.10672356188297272, "learning_rate": 0.002, "loss": 2.3641, "step": 37610 }, { "epoch": 0.1454283991278935, "grad_norm": 0.1124885305762291, "learning_rate": 0.002, "loss": 2.3736, "step": 37620 }, { "epoch": 0.14546705633127677, "grad_norm": 0.144533172249794, "learning_rate": 0.002, "loss": 2.358, "step": 37630 }, { "epoch": 0.14550571353466005, "grad_norm": 0.12376045435667038, "learning_rate": 0.002, "loss": 2.3595, "step": 37640 }, { "epoch": 0.14554437073804333, "grad_norm": 0.11379161477088928, "learning_rate": 0.002, "loss": 2.3439, "step": 37650 }, { "epoch": 0.1455830279414266, "grad_norm": 0.10776171833276749, "learning_rate": 0.002, "loss": 2.3643, "step": 37660 }, { "epoch": 0.1456216851448099, "grad_norm": 0.1258264183998108, "learning_rate": 0.002, "loss": 2.3556, "step": 37670 }, { "epoch": 0.14566034234819317, "grad_norm": 0.10170570015907288, "learning_rate": 0.002, "loss": 2.3609, "step": 37680 }, { "epoch": 0.14569899955157645, "grad_norm": 0.10516185313463211, "learning_rate": 0.002, "loss": 2.353, "step": 37690 }, { "epoch": 0.14573765675495973, "grad_norm": 0.10602464526891708, "learning_rate": 0.002, "loss": 2.3604, "step": 37700 }, { "epoch": 0.145776313958343, "grad_norm": 0.11193682253360748, "learning_rate": 0.002, "loss": 2.3578, "step": 37710 }, { "epoch": 0.1458149711617263, "grad_norm": 0.12408501654863358, "learning_rate": 0.002, "loss": 2.3633, "step": 37720 }, { "epoch": 0.14585362836510957, "grad_norm": 0.09819848090410233, "learning_rate": 0.002, "loss": 2.3627, "step": 37730 }, { "epoch": 0.14589228556849285, "grad_norm": 0.1574973613023758, "learning_rate": 0.002, "loss": 2.3538, "step": 37740 }, { "epoch": 0.1459309427718761, "grad_norm": 0.10473944991827011, "learning_rate": 0.002, "loss": 2.3681, "step": 37750 }, { "epoch": 0.14596959997525938, "grad_norm": 0.10519735515117645, "learning_rate": 0.002, "loss": 2.3528, "step": 37760 }, { "epoch": 0.14600825717864266, "grad_norm": 0.13095340132713318, "learning_rate": 0.002, "loss": 2.3606, "step": 37770 }, { "epoch": 0.14604691438202594, "grad_norm": 0.1454060673713684, "learning_rate": 0.002, "loss": 2.3669, "step": 37780 }, { "epoch": 0.14608557158540922, "grad_norm": 0.10329166799783707, "learning_rate": 0.002, "loss": 2.3674, "step": 37790 }, { "epoch": 0.1461242287887925, "grad_norm": 0.11781110614538193, "learning_rate": 0.002, "loss": 2.3678, "step": 37800 }, { "epoch": 0.14616288599217578, "grad_norm": 0.12178536504507065, "learning_rate": 0.002, "loss": 2.3613, "step": 37810 }, { "epoch": 0.14620154319555906, "grad_norm": 0.10970946401357651, "learning_rate": 0.002, "loss": 2.3826, "step": 37820 }, { "epoch": 0.14624020039894234, "grad_norm": 0.1142861470580101, "learning_rate": 0.002, "loss": 2.3858, "step": 37830 }, { "epoch": 0.14627885760232562, "grad_norm": 0.10663673281669617, "learning_rate": 0.002, "loss": 2.3554, "step": 37840 }, { "epoch": 0.1463175148057089, "grad_norm": 0.10833430290222168, "learning_rate": 0.002, "loss": 2.3589, "step": 37850 }, { "epoch": 0.14635617200909218, "grad_norm": 0.10738880932331085, "learning_rate": 0.002, "loss": 2.3625, "step": 37860 }, { "epoch": 0.14639482921247546, "grad_norm": 0.11424372345209122, "learning_rate": 0.002, "loss": 2.3691, "step": 37870 }, { "epoch": 0.14643348641585874, "grad_norm": 0.10799358040094376, "learning_rate": 0.002, "loss": 2.3637, "step": 37880 }, { "epoch": 0.14647214361924202, "grad_norm": 0.14553822576999664, "learning_rate": 0.002, "loss": 2.3489, "step": 37890 }, { "epoch": 0.1465108008226253, "grad_norm": 0.11662301421165466, "learning_rate": 0.002, "loss": 2.3479, "step": 37900 }, { "epoch": 0.14654945802600858, "grad_norm": 0.11311762779951096, "learning_rate": 0.002, "loss": 2.3588, "step": 37910 }, { "epoch": 0.14658811522939186, "grad_norm": 0.12561675906181335, "learning_rate": 0.002, "loss": 2.3795, "step": 37920 }, { "epoch": 0.1466267724327751, "grad_norm": 0.12258616089820862, "learning_rate": 0.002, "loss": 2.3594, "step": 37930 }, { "epoch": 0.1466654296361584, "grad_norm": 0.09970700740814209, "learning_rate": 0.002, "loss": 2.3749, "step": 37940 }, { "epoch": 0.14670408683954167, "grad_norm": 0.1401025950908661, "learning_rate": 0.002, "loss": 2.365, "step": 37950 }, { "epoch": 0.14674274404292495, "grad_norm": 0.10542020946741104, "learning_rate": 0.002, "loss": 2.3528, "step": 37960 }, { "epoch": 0.14678140124630823, "grad_norm": 0.1307201087474823, "learning_rate": 0.002, "loss": 2.3626, "step": 37970 }, { "epoch": 0.1468200584496915, "grad_norm": 0.1087263748049736, "learning_rate": 0.002, "loss": 2.3709, "step": 37980 }, { "epoch": 0.1468587156530748, "grad_norm": 0.0990528091788292, "learning_rate": 0.002, "loss": 2.3591, "step": 37990 }, { "epoch": 0.14689737285645807, "grad_norm": 0.13766375184059143, "learning_rate": 0.002, "loss": 2.3612, "step": 38000 }, { "epoch": 0.14693603005984135, "grad_norm": 0.10230547934770584, "learning_rate": 0.002, "loss": 2.3692, "step": 38010 }, { "epoch": 0.14697468726322463, "grad_norm": 0.11706888675689697, "learning_rate": 0.002, "loss": 2.3648, "step": 38020 }, { "epoch": 0.1470133444666079, "grad_norm": 0.11129742115736008, "learning_rate": 0.002, "loss": 2.3695, "step": 38030 }, { "epoch": 0.1470520016699912, "grad_norm": 0.09448976814746857, "learning_rate": 0.002, "loss": 2.3813, "step": 38040 }, { "epoch": 0.14709065887337447, "grad_norm": 0.11556555330753326, "learning_rate": 0.002, "loss": 2.3735, "step": 38050 }, { "epoch": 0.14712931607675775, "grad_norm": 0.10370432585477829, "learning_rate": 0.002, "loss": 2.3654, "step": 38060 }, { "epoch": 0.14716797328014103, "grad_norm": 0.16028402745723724, "learning_rate": 0.002, "loss": 2.3788, "step": 38070 }, { "epoch": 0.1472066304835243, "grad_norm": 0.12887880206108093, "learning_rate": 0.002, "loss": 2.3581, "step": 38080 }, { "epoch": 0.1472452876869076, "grad_norm": 0.12080951780080795, "learning_rate": 0.002, "loss": 2.374, "step": 38090 }, { "epoch": 0.14728394489029087, "grad_norm": 0.11555592715740204, "learning_rate": 0.002, "loss": 2.3726, "step": 38100 }, { "epoch": 0.14732260209367415, "grad_norm": 0.09942886233329773, "learning_rate": 0.002, "loss": 2.3841, "step": 38110 }, { "epoch": 0.1473612592970574, "grad_norm": 0.10487055033445358, "learning_rate": 0.002, "loss": 2.3705, "step": 38120 }, { "epoch": 0.14739991650044068, "grad_norm": 0.10219061374664307, "learning_rate": 0.002, "loss": 2.3704, "step": 38130 }, { "epoch": 0.14743857370382396, "grad_norm": 0.10599678754806519, "learning_rate": 0.002, "loss": 2.3651, "step": 38140 }, { "epoch": 0.14747723090720724, "grad_norm": 0.11388130486011505, "learning_rate": 0.002, "loss": 2.3632, "step": 38150 }, { "epoch": 0.14751588811059052, "grad_norm": 0.10307695716619492, "learning_rate": 0.002, "loss": 2.3765, "step": 38160 }, { "epoch": 0.1475545453139738, "grad_norm": 0.10115735977888107, "learning_rate": 0.002, "loss": 2.3798, "step": 38170 }, { "epoch": 0.14759320251735708, "grad_norm": 0.11397730559110641, "learning_rate": 0.002, "loss": 2.3488, "step": 38180 }, { "epoch": 0.14763185972074036, "grad_norm": 0.10884783416986465, "learning_rate": 0.002, "loss": 2.3608, "step": 38190 }, { "epoch": 0.14767051692412364, "grad_norm": 0.10271456837654114, "learning_rate": 0.002, "loss": 2.3809, "step": 38200 }, { "epoch": 0.14770917412750692, "grad_norm": 0.11193260550498962, "learning_rate": 0.002, "loss": 2.3536, "step": 38210 }, { "epoch": 0.1477478313308902, "grad_norm": 0.11704428493976593, "learning_rate": 0.002, "loss": 2.3637, "step": 38220 }, { "epoch": 0.14778648853427348, "grad_norm": 0.10461351275444031, "learning_rate": 0.002, "loss": 2.3551, "step": 38230 }, { "epoch": 0.14782514573765676, "grad_norm": 0.11719801276922226, "learning_rate": 0.002, "loss": 2.3735, "step": 38240 }, { "epoch": 0.14786380294104004, "grad_norm": 0.12601828575134277, "learning_rate": 0.002, "loss": 2.3701, "step": 38250 }, { "epoch": 0.14790246014442332, "grad_norm": 0.1058812215924263, "learning_rate": 0.002, "loss": 2.387, "step": 38260 }, { "epoch": 0.1479411173478066, "grad_norm": 0.11499867588281631, "learning_rate": 0.002, "loss": 2.3765, "step": 38270 }, { "epoch": 0.14797977455118988, "grad_norm": 0.11680841445922852, "learning_rate": 0.002, "loss": 2.3641, "step": 38280 }, { "epoch": 0.14801843175457316, "grad_norm": 0.12532247602939606, "learning_rate": 0.002, "loss": 2.3542, "step": 38290 }, { "epoch": 0.1480570889579564, "grad_norm": 0.11021430045366287, "learning_rate": 0.002, "loss": 2.3665, "step": 38300 }, { "epoch": 0.1480957461613397, "grad_norm": 0.11453874409198761, "learning_rate": 0.002, "loss": 2.3525, "step": 38310 }, { "epoch": 0.14813440336472297, "grad_norm": 0.09985355287790298, "learning_rate": 0.002, "loss": 2.3494, "step": 38320 }, { "epoch": 0.14817306056810625, "grad_norm": 0.11640987545251846, "learning_rate": 0.002, "loss": 2.3591, "step": 38330 }, { "epoch": 0.14821171777148953, "grad_norm": 0.09893188625574112, "learning_rate": 0.002, "loss": 2.3634, "step": 38340 }, { "epoch": 0.1482503749748728, "grad_norm": 0.12688705325126648, "learning_rate": 0.002, "loss": 2.3679, "step": 38350 }, { "epoch": 0.1482890321782561, "grad_norm": 0.12036380916833878, "learning_rate": 0.002, "loss": 2.3825, "step": 38360 }, { "epoch": 0.14832768938163937, "grad_norm": 0.09100010991096497, "learning_rate": 0.002, "loss": 2.3629, "step": 38370 }, { "epoch": 0.14836634658502265, "grad_norm": 0.10775777697563171, "learning_rate": 0.002, "loss": 2.3631, "step": 38380 }, { "epoch": 0.14840500378840593, "grad_norm": 0.12571115791797638, "learning_rate": 0.002, "loss": 2.3757, "step": 38390 }, { "epoch": 0.1484436609917892, "grad_norm": 0.10129281133413315, "learning_rate": 0.002, "loss": 2.3665, "step": 38400 }, { "epoch": 0.1484823181951725, "grad_norm": 0.11698244512081146, "learning_rate": 0.002, "loss": 2.3703, "step": 38410 }, { "epoch": 0.14852097539855577, "grad_norm": 0.09857890754938126, "learning_rate": 0.002, "loss": 2.3628, "step": 38420 }, { "epoch": 0.14855963260193905, "grad_norm": 0.11326782405376434, "learning_rate": 0.002, "loss": 2.3616, "step": 38430 }, { "epoch": 0.14859828980532233, "grad_norm": 0.11370991915464401, "learning_rate": 0.002, "loss": 2.3645, "step": 38440 }, { "epoch": 0.1486369470087056, "grad_norm": 0.10248290747404099, "learning_rate": 0.002, "loss": 2.3828, "step": 38450 }, { "epoch": 0.1486756042120889, "grad_norm": 0.09775995463132858, "learning_rate": 0.002, "loss": 2.3703, "step": 38460 }, { "epoch": 0.14871426141547217, "grad_norm": 0.1176404282450676, "learning_rate": 0.002, "loss": 2.3609, "step": 38470 }, { "epoch": 0.14875291861885545, "grad_norm": 0.10633568465709686, "learning_rate": 0.002, "loss": 2.3406, "step": 38480 }, { "epoch": 0.1487915758222387, "grad_norm": 0.13698548078536987, "learning_rate": 0.002, "loss": 2.3547, "step": 38490 }, { "epoch": 0.14883023302562198, "grad_norm": 0.10824945569038391, "learning_rate": 0.002, "loss": 2.3782, "step": 38500 }, { "epoch": 0.14886889022900526, "grad_norm": 0.0984220951795578, "learning_rate": 0.002, "loss": 2.3661, "step": 38510 }, { "epoch": 0.14890754743238854, "grad_norm": 0.13383355736732483, "learning_rate": 0.002, "loss": 2.3742, "step": 38520 }, { "epoch": 0.14894620463577182, "grad_norm": 0.1034078449010849, "learning_rate": 0.002, "loss": 2.3738, "step": 38530 }, { "epoch": 0.1489848618391551, "grad_norm": 0.10896123200654984, "learning_rate": 0.002, "loss": 2.3531, "step": 38540 }, { "epoch": 0.14902351904253838, "grad_norm": 0.0965457409620285, "learning_rate": 0.002, "loss": 2.3718, "step": 38550 }, { "epoch": 0.14906217624592166, "grad_norm": 0.11075472086668015, "learning_rate": 0.002, "loss": 2.363, "step": 38560 }, { "epoch": 0.14910083344930494, "grad_norm": 0.128397598862648, "learning_rate": 0.002, "loss": 2.3763, "step": 38570 }, { "epoch": 0.14913949065268822, "grad_norm": 0.12091022729873657, "learning_rate": 0.002, "loss": 2.3682, "step": 38580 }, { "epoch": 0.1491781478560715, "grad_norm": 0.09868521243333817, "learning_rate": 0.002, "loss": 2.3706, "step": 38590 }, { "epoch": 0.14921680505945478, "grad_norm": 0.1057891920208931, "learning_rate": 0.002, "loss": 2.3737, "step": 38600 }, { "epoch": 0.14925546226283806, "grad_norm": 0.12372202426195145, "learning_rate": 0.002, "loss": 2.3776, "step": 38610 }, { "epoch": 0.14929411946622134, "grad_norm": 0.11062665283679962, "learning_rate": 0.002, "loss": 2.3553, "step": 38620 }, { "epoch": 0.14933277666960462, "grad_norm": 0.11425059288740158, "learning_rate": 0.002, "loss": 2.3647, "step": 38630 }, { "epoch": 0.1493714338729879, "grad_norm": 0.11539951711893082, "learning_rate": 0.002, "loss": 2.3679, "step": 38640 }, { "epoch": 0.14941009107637118, "grad_norm": 0.11890841275453568, "learning_rate": 0.002, "loss": 2.3762, "step": 38650 }, { "epoch": 0.14944874827975446, "grad_norm": 0.10240473598241806, "learning_rate": 0.002, "loss": 2.3527, "step": 38660 }, { "epoch": 0.14948740548313771, "grad_norm": 0.10921300947666168, "learning_rate": 0.002, "loss": 2.3677, "step": 38670 }, { "epoch": 0.149526062686521, "grad_norm": 0.114039845764637, "learning_rate": 0.002, "loss": 2.3577, "step": 38680 }, { "epoch": 0.14956471988990427, "grad_norm": 0.10230555385351181, "learning_rate": 0.002, "loss": 2.3614, "step": 38690 }, { "epoch": 0.14960337709328755, "grad_norm": 0.11473822593688965, "learning_rate": 0.002, "loss": 2.3749, "step": 38700 }, { "epoch": 0.14964203429667083, "grad_norm": 0.10442385077476501, "learning_rate": 0.002, "loss": 2.3547, "step": 38710 }, { "epoch": 0.1496806915000541, "grad_norm": 0.11906211078166962, "learning_rate": 0.002, "loss": 2.3648, "step": 38720 }, { "epoch": 0.1497193487034374, "grad_norm": 0.1230156272649765, "learning_rate": 0.002, "loss": 2.3692, "step": 38730 }, { "epoch": 0.14975800590682067, "grad_norm": 0.09884335100650787, "learning_rate": 0.002, "loss": 2.3633, "step": 38740 }, { "epoch": 0.14979666311020395, "grad_norm": 0.10977873206138611, "learning_rate": 0.002, "loss": 2.3503, "step": 38750 }, { "epoch": 0.14983532031358723, "grad_norm": 0.12378506362438202, "learning_rate": 0.002, "loss": 2.3517, "step": 38760 }, { "epoch": 0.1498739775169705, "grad_norm": 0.11741282045841217, "learning_rate": 0.002, "loss": 2.3625, "step": 38770 }, { "epoch": 0.1499126347203538, "grad_norm": 0.11203381419181824, "learning_rate": 0.002, "loss": 2.3712, "step": 38780 }, { "epoch": 0.14995129192373707, "grad_norm": 0.09905987977981567, "learning_rate": 0.002, "loss": 2.3638, "step": 38790 }, { "epoch": 0.14998994912712035, "grad_norm": 0.11241578310728073, "learning_rate": 0.002, "loss": 2.3799, "step": 38800 }, { "epoch": 0.15002860633050363, "grad_norm": 0.1121586337685585, "learning_rate": 0.002, "loss": 2.3602, "step": 38810 }, { "epoch": 0.1500672635338869, "grad_norm": 0.11441362649202347, "learning_rate": 0.002, "loss": 2.3758, "step": 38820 }, { "epoch": 0.1501059207372702, "grad_norm": 0.09616050124168396, "learning_rate": 0.002, "loss": 2.3545, "step": 38830 }, { "epoch": 0.15014457794065347, "grad_norm": 0.11325273662805557, "learning_rate": 0.002, "loss": 2.3548, "step": 38840 }, { "epoch": 0.15018323514403675, "grad_norm": 0.11043589562177658, "learning_rate": 0.002, "loss": 2.3601, "step": 38850 }, { "epoch": 0.15022189234742, "grad_norm": 0.11214675009250641, "learning_rate": 0.002, "loss": 2.3692, "step": 38860 }, { "epoch": 0.15026054955080329, "grad_norm": 0.11280752718448639, "learning_rate": 0.002, "loss": 2.376, "step": 38870 }, { "epoch": 0.15029920675418657, "grad_norm": 0.11541387438774109, "learning_rate": 0.002, "loss": 2.3546, "step": 38880 }, { "epoch": 0.15033786395756984, "grad_norm": 0.1194639578461647, "learning_rate": 0.002, "loss": 2.3734, "step": 38890 }, { "epoch": 0.15037652116095312, "grad_norm": 0.10848919302225113, "learning_rate": 0.002, "loss": 2.3627, "step": 38900 }, { "epoch": 0.1504151783643364, "grad_norm": 0.14039918780326843, "learning_rate": 0.002, "loss": 2.3627, "step": 38910 }, { "epoch": 0.15045383556771968, "grad_norm": 0.11318860203027725, "learning_rate": 0.002, "loss": 2.3701, "step": 38920 }, { "epoch": 0.15049249277110296, "grad_norm": 0.10295028239488602, "learning_rate": 0.002, "loss": 2.3633, "step": 38930 }, { "epoch": 0.15053114997448624, "grad_norm": 0.11704400181770325, "learning_rate": 0.002, "loss": 2.3546, "step": 38940 }, { "epoch": 0.15056980717786952, "grad_norm": 0.09262275695800781, "learning_rate": 0.002, "loss": 2.3673, "step": 38950 }, { "epoch": 0.1506084643812528, "grad_norm": 0.11537528783082962, "learning_rate": 0.002, "loss": 2.3598, "step": 38960 }, { "epoch": 0.15064712158463608, "grad_norm": 0.11481942981481552, "learning_rate": 0.002, "loss": 2.3537, "step": 38970 }, { "epoch": 0.15068577878801936, "grad_norm": 0.1113208681344986, "learning_rate": 0.002, "loss": 2.3567, "step": 38980 }, { "epoch": 0.15072443599140264, "grad_norm": 0.12350216507911682, "learning_rate": 0.002, "loss": 2.3764, "step": 38990 }, { "epoch": 0.15076309319478592, "grad_norm": 0.11521659046411514, "learning_rate": 0.002, "loss": 2.3577, "step": 39000 }, { "epoch": 0.1508017503981692, "grad_norm": 0.10397952049970627, "learning_rate": 0.002, "loss": 2.3574, "step": 39010 }, { "epoch": 0.15084040760155248, "grad_norm": 0.11351260542869568, "learning_rate": 0.002, "loss": 2.3561, "step": 39020 }, { "epoch": 0.15087906480493576, "grad_norm": 0.10307390987873077, "learning_rate": 0.002, "loss": 2.3599, "step": 39030 }, { "epoch": 0.15091772200831902, "grad_norm": 0.1473456174135208, "learning_rate": 0.002, "loss": 2.3584, "step": 39040 }, { "epoch": 0.1509563792117023, "grad_norm": 0.10277149826288223, "learning_rate": 0.002, "loss": 2.3721, "step": 39050 }, { "epoch": 0.15099503641508558, "grad_norm": 0.10011863708496094, "learning_rate": 0.002, "loss": 2.3664, "step": 39060 }, { "epoch": 0.15103369361846886, "grad_norm": 0.10471244901418686, "learning_rate": 0.002, "loss": 2.3618, "step": 39070 }, { "epoch": 0.15107235082185214, "grad_norm": 0.11708759516477585, "learning_rate": 0.002, "loss": 2.3721, "step": 39080 }, { "epoch": 0.15111100802523542, "grad_norm": 0.10626459121704102, "learning_rate": 0.002, "loss": 2.357, "step": 39090 }, { "epoch": 0.1511496652286187, "grad_norm": 0.11258683353662491, "learning_rate": 0.002, "loss": 2.3676, "step": 39100 }, { "epoch": 0.15118832243200198, "grad_norm": 0.11116767674684525, "learning_rate": 0.002, "loss": 2.3602, "step": 39110 }, { "epoch": 0.15122697963538526, "grad_norm": 0.10669376701116562, "learning_rate": 0.002, "loss": 2.3604, "step": 39120 }, { "epoch": 0.15126563683876854, "grad_norm": 0.10341840237379074, "learning_rate": 0.002, "loss": 2.3684, "step": 39130 }, { "epoch": 0.15130429404215182, "grad_norm": 0.1296912282705307, "learning_rate": 0.002, "loss": 2.3654, "step": 39140 }, { "epoch": 0.1513429512455351, "grad_norm": 0.15270934998989105, "learning_rate": 0.002, "loss": 2.3728, "step": 39150 }, { "epoch": 0.15138160844891838, "grad_norm": 0.10418158024549484, "learning_rate": 0.002, "loss": 2.3682, "step": 39160 }, { "epoch": 0.15142026565230166, "grad_norm": 0.11090809851884842, "learning_rate": 0.002, "loss": 2.3616, "step": 39170 }, { "epoch": 0.15145892285568494, "grad_norm": 0.10907279700040817, "learning_rate": 0.002, "loss": 2.3793, "step": 39180 }, { "epoch": 0.15149758005906822, "grad_norm": 0.11926936358213425, "learning_rate": 0.002, "loss": 2.3733, "step": 39190 }, { "epoch": 0.1515362372624515, "grad_norm": 0.12747298181056976, "learning_rate": 0.002, "loss": 2.3686, "step": 39200 }, { "epoch": 0.15157489446583478, "grad_norm": 0.11071181297302246, "learning_rate": 0.002, "loss": 2.3554, "step": 39210 }, { "epoch": 0.15161355166921806, "grad_norm": 0.09829133003950119, "learning_rate": 0.002, "loss": 2.3591, "step": 39220 }, { "epoch": 0.1516522088726013, "grad_norm": 0.12237284332513809, "learning_rate": 0.002, "loss": 2.3666, "step": 39230 }, { "epoch": 0.1516908660759846, "grad_norm": 0.10596148669719696, "learning_rate": 0.002, "loss": 2.3729, "step": 39240 }, { "epoch": 0.15172952327936787, "grad_norm": 0.09914745390415192, "learning_rate": 0.002, "loss": 2.3629, "step": 39250 }, { "epoch": 0.15176818048275115, "grad_norm": 0.09774194657802582, "learning_rate": 0.002, "loss": 2.3776, "step": 39260 }, { "epoch": 0.15180683768613443, "grad_norm": 0.12466096132993698, "learning_rate": 0.002, "loss": 2.3699, "step": 39270 }, { "epoch": 0.1518454948895177, "grad_norm": 0.10741476714611053, "learning_rate": 0.002, "loss": 2.3606, "step": 39280 }, { "epoch": 0.151884152092901, "grad_norm": 0.12783187627792358, "learning_rate": 0.002, "loss": 2.3724, "step": 39290 }, { "epoch": 0.15192280929628427, "grad_norm": 0.10841857641935349, "learning_rate": 0.002, "loss": 2.3764, "step": 39300 }, { "epoch": 0.15196146649966755, "grad_norm": 0.09851887822151184, "learning_rate": 0.002, "loss": 2.3561, "step": 39310 }, { "epoch": 0.15200012370305083, "grad_norm": 0.13779272139072418, "learning_rate": 0.002, "loss": 2.3561, "step": 39320 }, { "epoch": 0.1520387809064341, "grad_norm": 0.1023627445101738, "learning_rate": 0.002, "loss": 2.3568, "step": 39330 }, { "epoch": 0.1520774381098174, "grad_norm": 0.10356482118368149, "learning_rate": 0.002, "loss": 2.3619, "step": 39340 }, { "epoch": 0.15211609531320067, "grad_norm": 0.13198953866958618, "learning_rate": 0.002, "loss": 2.3746, "step": 39350 }, { "epoch": 0.15215475251658395, "grad_norm": 0.11287932097911835, "learning_rate": 0.002, "loss": 2.3689, "step": 39360 }, { "epoch": 0.15219340971996723, "grad_norm": 0.107682004570961, "learning_rate": 0.002, "loss": 2.382, "step": 39370 }, { "epoch": 0.1522320669233505, "grad_norm": 0.0983332172036171, "learning_rate": 0.002, "loss": 2.3691, "step": 39380 }, { "epoch": 0.1522707241267338, "grad_norm": 0.10807695239782333, "learning_rate": 0.002, "loss": 2.3722, "step": 39390 }, { "epoch": 0.15230938133011707, "grad_norm": 0.1007051095366478, "learning_rate": 0.002, "loss": 2.3447, "step": 39400 }, { "epoch": 0.15234803853350035, "grad_norm": 0.11607711017131805, "learning_rate": 0.002, "loss": 2.3781, "step": 39410 }, { "epoch": 0.1523866957368836, "grad_norm": 0.10342895984649658, "learning_rate": 0.002, "loss": 2.3628, "step": 39420 }, { "epoch": 0.15242535294026688, "grad_norm": 0.09878204017877579, "learning_rate": 0.002, "loss": 2.362, "step": 39430 }, { "epoch": 0.15246401014365016, "grad_norm": 0.12246676534414291, "learning_rate": 0.002, "loss": 2.3642, "step": 39440 }, { "epoch": 0.15250266734703344, "grad_norm": 0.10982445627450943, "learning_rate": 0.002, "loss": 2.3782, "step": 39450 }, { "epoch": 0.15254132455041672, "grad_norm": 0.10152310132980347, "learning_rate": 0.002, "loss": 2.3669, "step": 39460 }, { "epoch": 0.1525799817538, "grad_norm": 0.11473408341407776, "learning_rate": 0.002, "loss": 2.3732, "step": 39470 }, { "epoch": 0.15261863895718328, "grad_norm": 0.12105746567249298, "learning_rate": 0.002, "loss": 2.37, "step": 39480 }, { "epoch": 0.15265729616056656, "grad_norm": 0.11079411208629608, "learning_rate": 0.002, "loss": 2.3599, "step": 39490 }, { "epoch": 0.15269595336394984, "grad_norm": 0.1170242428779602, "learning_rate": 0.002, "loss": 2.3558, "step": 39500 }, { "epoch": 0.15273461056733312, "grad_norm": 0.09397020190954208, "learning_rate": 0.002, "loss": 2.3534, "step": 39510 }, { "epoch": 0.1527732677707164, "grad_norm": 0.09887134283781052, "learning_rate": 0.002, "loss": 2.3649, "step": 39520 }, { "epoch": 0.15281192497409968, "grad_norm": 0.10110117495059967, "learning_rate": 0.002, "loss": 2.3618, "step": 39530 }, { "epoch": 0.15285058217748296, "grad_norm": 0.09449794888496399, "learning_rate": 0.002, "loss": 2.3545, "step": 39540 }, { "epoch": 0.15288923938086624, "grad_norm": 0.11885342746973038, "learning_rate": 0.002, "loss": 2.3612, "step": 39550 }, { "epoch": 0.15292789658424952, "grad_norm": 0.10996508598327637, "learning_rate": 0.002, "loss": 2.3569, "step": 39560 }, { "epoch": 0.1529665537876328, "grad_norm": 0.1292993724346161, "learning_rate": 0.002, "loss": 2.3612, "step": 39570 }, { "epoch": 0.15300521099101608, "grad_norm": 0.11036507785320282, "learning_rate": 0.002, "loss": 2.3609, "step": 39580 }, { "epoch": 0.15304386819439936, "grad_norm": 0.10646365582942963, "learning_rate": 0.002, "loss": 2.3711, "step": 39590 }, { "epoch": 0.1530825253977826, "grad_norm": 0.11589569598436356, "learning_rate": 0.002, "loss": 2.3682, "step": 39600 }, { "epoch": 0.1531211826011659, "grad_norm": 0.1215120479464531, "learning_rate": 0.002, "loss": 2.3558, "step": 39610 }, { "epoch": 0.15315983980454917, "grad_norm": 0.1089867576956749, "learning_rate": 0.002, "loss": 2.3785, "step": 39620 }, { "epoch": 0.15319849700793245, "grad_norm": 0.11142711341381073, "learning_rate": 0.002, "loss": 2.3571, "step": 39630 }, { "epoch": 0.15323715421131573, "grad_norm": 0.11389360576868057, "learning_rate": 0.002, "loss": 2.3545, "step": 39640 }, { "epoch": 0.153275811414699, "grad_norm": 0.1226469874382019, "learning_rate": 0.002, "loss": 2.372, "step": 39650 }, { "epoch": 0.1533144686180823, "grad_norm": 0.11769437789916992, "learning_rate": 0.002, "loss": 2.3656, "step": 39660 }, { "epoch": 0.15335312582146557, "grad_norm": 0.12982621788978577, "learning_rate": 0.002, "loss": 2.3556, "step": 39670 }, { "epoch": 0.15339178302484885, "grad_norm": 0.11239667981863022, "learning_rate": 0.002, "loss": 2.3632, "step": 39680 }, { "epoch": 0.15343044022823213, "grad_norm": 0.1196182444691658, "learning_rate": 0.002, "loss": 2.3681, "step": 39690 }, { "epoch": 0.1534690974316154, "grad_norm": 0.1112075224518776, "learning_rate": 0.002, "loss": 2.3641, "step": 39700 }, { "epoch": 0.1535077546349987, "grad_norm": 0.10573304444551468, "learning_rate": 0.002, "loss": 2.3763, "step": 39710 }, { "epoch": 0.15354641183838197, "grad_norm": 0.11556225270032883, "learning_rate": 0.002, "loss": 2.3729, "step": 39720 }, { "epoch": 0.15358506904176525, "grad_norm": 0.10684444010257721, "learning_rate": 0.002, "loss": 2.3765, "step": 39730 }, { "epoch": 0.15362372624514853, "grad_norm": 0.1147504448890686, "learning_rate": 0.002, "loss": 2.3613, "step": 39740 }, { "epoch": 0.1536623834485318, "grad_norm": 0.11106063425540924, "learning_rate": 0.002, "loss": 2.3856, "step": 39750 }, { "epoch": 0.1537010406519151, "grad_norm": 0.11069447547197342, "learning_rate": 0.002, "loss": 2.362, "step": 39760 }, { "epoch": 0.15373969785529837, "grad_norm": 0.10675516724586487, "learning_rate": 0.002, "loss": 2.3502, "step": 39770 }, { "epoch": 0.15377835505868165, "grad_norm": 0.11702346801757812, "learning_rate": 0.002, "loss": 2.3593, "step": 39780 }, { "epoch": 0.1538170122620649, "grad_norm": 0.12388580292463303, "learning_rate": 0.002, "loss": 2.3603, "step": 39790 }, { "epoch": 0.15385566946544818, "grad_norm": 0.140107199549675, "learning_rate": 0.002, "loss": 2.3749, "step": 39800 }, { "epoch": 0.15389432666883146, "grad_norm": 0.09637805819511414, "learning_rate": 0.002, "loss": 2.3641, "step": 39810 }, { "epoch": 0.15393298387221474, "grad_norm": 0.10908488184213638, "learning_rate": 0.002, "loss": 2.363, "step": 39820 }, { "epoch": 0.15397164107559802, "grad_norm": 0.09757030755281448, "learning_rate": 0.002, "loss": 2.3756, "step": 39830 }, { "epoch": 0.1540102982789813, "grad_norm": 0.1076817587018013, "learning_rate": 0.002, "loss": 2.3492, "step": 39840 }, { "epoch": 0.15404895548236458, "grad_norm": 0.11481721699237823, "learning_rate": 0.002, "loss": 2.3564, "step": 39850 }, { "epoch": 0.15408761268574786, "grad_norm": 0.09377177059650421, "learning_rate": 0.002, "loss": 2.3621, "step": 39860 }, { "epoch": 0.15412626988913114, "grad_norm": 0.1203828752040863, "learning_rate": 0.002, "loss": 2.3673, "step": 39870 }, { "epoch": 0.15416492709251442, "grad_norm": 0.11628779768943787, "learning_rate": 0.002, "loss": 2.3714, "step": 39880 }, { "epoch": 0.1542035842958977, "grad_norm": 0.10759463161230087, "learning_rate": 0.002, "loss": 2.3732, "step": 39890 }, { "epoch": 0.15424224149928098, "grad_norm": 0.10259198397397995, "learning_rate": 0.002, "loss": 2.373, "step": 39900 }, { "epoch": 0.15428089870266426, "grad_norm": 0.10605776309967041, "learning_rate": 0.002, "loss": 2.3702, "step": 39910 }, { "epoch": 0.15431955590604754, "grad_norm": 0.5578303337097168, "learning_rate": 0.002, "loss": 2.3749, "step": 39920 }, { "epoch": 0.15435821310943082, "grad_norm": 0.13610078394412994, "learning_rate": 0.002, "loss": 2.3571, "step": 39930 }, { "epoch": 0.1543968703128141, "grad_norm": 0.10285218060016632, "learning_rate": 0.002, "loss": 2.3712, "step": 39940 }, { "epoch": 0.15443552751619738, "grad_norm": 0.10524322837591171, "learning_rate": 0.002, "loss": 2.3615, "step": 39950 }, { "epoch": 0.15447418471958066, "grad_norm": 0.10412564873695374, "learning_rate": 0.002, "loss": 2.3677, "step": 39960 }, { "epoch": 0.1545128419229639, "grad_norm": 0.11148788779973984, "learning_rate": 0.002, "loss": 2.3673, "step": 39970 }, { "epoch": 0.1545514991263472, "grad_norm": 0.1292443871498108, "learning_rate": 0.002, "loss": 2.3759, "step": 39980 }, { "epoch": 0.15459015632973047, "grad_norm": 0.08943097293376923, "learning_rate": 0.002, "loss": 2.3746, "step": 39990 }, { "epoch": 0.15462881353311375, "grad_norm": 0.09316378086805344, "learning_rate": 0.002, "loss": 2.3567, "step": 40000 }, { "epoch": 0.15466747073649703, "grad_norm": 0.11126011610031128, "learning_rate": 0.002, "loss": 2.3571, "step": 40010 }, { "epoch": 0.1547061279398803, "grad_norm": 0.11391105502843857, "learning_rate": 0.002, "loss": 2.3559, "step": 40020 }, { "epoch": 0.1547447851432636, "grad_norm": 0.10801863670349121, "learning_rate": 0.002, "loss": 2.3893, "step": 40030 }, { "epoch": 0.15478344234664687, "grad_norm": 0.09264006465673447, "learning_rate": 0.002, "loss": 2.3738, "step": 40040 }, { "epoch": 0.15482209955003015, "grad_norm": 0.10908648371696472, "learning_rate": 0.002, "loss": 2.3516, "step": 40050 }, { "epoch": 0.15486075675341343, "grad_norm": 0.11322391778230667, "learning_rate": 0.002, "loss": 2.37, "step": 40060 }, { "epoch": 0.1548994139567967, "grad_norm": 0.10048907995223999, "learning_rate": 0.002, "loss": 2.3564, "step": 40070 }, { "epoch": 0.15493807116018, "grad_norm": 0.11480753123760223, "learning_rate": 0.002, "loss": 2.3621, "step": 40080 }, { "epoch": 0.15497672836356327, "grad_norm": 0.11866872012615204, "learning_rate": 0.002, "loss": 2.3507, "step": 40090 }, { "epoch": 0.15501538556694655, "grad_norm": 0.12018725275993347, "learning_rate": 0.002, "loss": 2.359, "step": 40100 }, { "epoch": 0.15505404277032983, "grad_norm": 0.11524038016796112, "learning_rate": 0.002, "loss": 2.3739, "step": 40110 }, { "epoch": 0.1550926999737131, "grad_norm": 0.11069086194038391, "learning_rate": 0.002, "loss": 2.3658, "step": 40120 }, { "epoch": 0.1551313571770964, "grad_norm": 0.11821892112493515, "learning_rate": 0.002, "loss": 2.3698, "step": 40130 }, { "epoch": 0.15517001438047967, "grad_norm": 0.11484308540821075, "learning_rate": 0.002, "loss": 2.3557, "step": 40140 }, { "epoch": 0.15520867158386295, "grad_norm": 0.09898848831653595, "learning_rate": 0.002, "loss": 2.3653, "step": 40150 }, { "epoch": 0.1552473287872462, "grad_norm": 0.10578508675098419, "learning_rate": 0.002, "loss": 2.3546, "step": 40160 }, { "epoch": 0.15528598599062948, "grad_norm": 0.09843472391366959, "learning_rate": 0.002, "loss": 2.3538, "step": 40170 }, { "epoch": 0.15532464319401276, "grad_norm": 0.11496349424123764, "learning_rate": 0.002, "loss": 2.3656, "step": 40180 }, { "epoch": 0.15536330039739604, "grad_norm": 0.11298873275518417, "learning_rate": 0.002, "loss": 2.369, "step": 40190 }, { "epoch": 0.15540195760077932, "grad_norm": 0.1040901467204094, "learning_rate": 0.002, "loss": 2.3657, "step": 40200 }, { "epoch": 0.1554406148041626, "grad_norm": 0.1098884865641594, "learning_rate": 0.002, "loss": 2.3633, "step": 40210 }, { "epoch": 0.15547927200754588, "grad_norm": 0.11201644688844681, "learning_rate": 0.002, "loss": 2.3807, "step": 40220 }, { "epoch": 0.15551792921092916, "grad_norm": 0.12052586674690247, "learning_rate": 0.002, "loss": 2.3707, "step": 40230 }, { "epoch": 0.15555658641431244, "grad_norm": 0.09976432472467422, "learning_rate": 0.002, "loss": 2.3726, "step": 40240 }, { "epoch": 0.15559524361769572, "grad_norm": 0.09134536236524582, "learning_rate": 0.002, "loss": 2.3534, "step": 40250 }, { "epoch": 0.155633900821079, "grad_norm": 0.12106562405824661, "learning_rate": 0.002, "loss": 2.3548, "step": 40260 }, { "epoch": 0.15567255802446228, "grad_norm": 0.13005444407463074, "learning_rate": 0.002, "loss": 2.3725, "step": 40270 }, { "epoch": 0.15571121522784556, "grad_norm": 0.11198069900274277, "learning_rate": 0.002, "loss": 2.3671, "step": 40280 }, { "epoch": 0.15574987243122884, "grad_norm": 0.13218308985233307, "learning_rate": 0.002, "loss": 2.3715, "step": 40290 }, { "epoch": 0.15578852963461212, "grad_norm": 0.11125843226909637, "learning_rate": 0.002, "loss": 2.3604, "step": 40300 }, { "epoch": 0.1558271868379954, "grad_norm": 0.10655783861875534, "learning_rate": 0.002, "loss": 2.3667, "step": 40310 }, { "epoch": 0.15586584404137868, "grad_norm": 0.11717306077480316, "learning_rate": 0.002, "loss": 2.3722, "step": 40320 }, { "epoch": 0.15590450124476196, "grad_norm": 0.12472710013389587, "learning_rate": 0.002, "loss": 2.3557, "step": 40330 }, { "epoch": 0.1559431584481452, "grad_norm": 0.10040798783302307, "learning_rate": 0.002, "loss": 2.3578, "step": 40340 }, { "epoch": 0.1559818156515285, "grad_norm": 0.10789766907691956, "learning_rate": 0.002, "loss": 2.3783, "step": 40350 }, { "epoch": 0.15602047285491177, "grad_norm": 0.10105174034833908, "learning_rate": 0.002, "loss": 2.3637, "step": 40360 }, { "epoch": 0.15605913005829505, "grad_norm": 0.10405503958463669, "learning_rate": 0.002, "loss": 2.3646, "step": 40370 }, { "epoch": 0.15609778726167833, "grad_norm": 0.12392150610685349, "learning_rate": 0.002, "loss": 2.357, "step": 40380 }, { "epoch": 0.1561364444650616, "grad_norm": 0.11654236912727356, "learning_rate": 0.002, "loss": 2.3713, "step": 40390 }, { "epoch": 0.1561751016684449, "grad_norm": 0.10785891860723495, "learning_rate": 0.002, "loss": 2.3721, "step": 40400 }, { "epoch": 0.15621375887182817, "grad_norm": 0.10248953849077225, "learning_rate": 0.002, "loss": 2.3831, "step": 40410 }, { "epoch": 0.15625241607521145, "grad_norm": 0.09905094653367996, "learning_rate": 0.002, "loss": 2.3577, "step": 40420 }, { "epoch": 0.15629107327859473, "grad_norm": 0.11263404786586761, "learning_rate": 0.002, "loss": 2.3679, "step": 40430 }, { "epoch": 0.156329730481978, "grad_norm": 0.10626372694969177, "learning_rate": 0.002, "loss": 2.3498, "step": 40440 }, { "epoch": 0.1563683876853613, "grad_norm": 0.10268019884824753, "learning_rate": 0.002, "loss": 2.3629, "step": 40450 }, { "epoch": 0.15640704488874457, "grad_norm": 0.1138499453663826, "learning_rate": 0.002, "loss": 2.3598, "step": 40460 }, { "epoch": 0.15644570209212785, "grad_norm": 0.11354564130306244, "learning_rate": 0.002, "loss": 2.3705, "step": 40470 }, { "epoch": 0.15648435929551113, "grad_norm": 0.10203731805086136, "learning_rate": 0.002, "loss": 2.3622, "step": 40480 }, { "epoch": 0.1565230164988944, "grad_norm": 0.14382880926132202, "learning_rate": 0.002, "loss": 2.3671, "step": 40490 }, { "epoch": 0.1565616737022777, "grad_norm": 0.1070975810289383, "learning_rate": 0.002, "loss": 2.367, "step": 40500 }, { "epoch": 0.15660033090566097, "grad_norm": 0.10172320157289505, "learning_rate": 0.002, "loss": 2.3716, "step": 40510 }, { "epoch": 0.15663898810904425, "grad_norm": 0.13020893931388855, "learning_rate": 0.002, "loss": 2.3725, "step": 40520 }, { "epoch": 0.1566776453124275, "grad_norm": 0.11600089818239212, "learning_rate": 0.002, "loss": 2.3548, "step": 40530 }, { "epoch": 0.15671630251581078, "grad_norm": 0.12256909906864166, "learning_rate": 0.002, "loss": 2.3561, "step": 40540 }, { "epoch": 0.15675495971919406, "grad_norm": 0.11398594826459885, "learning_rate": 0.002, "loss": 2.3611, "step": 40550 }, { "epoch": 0.15679361692257734, "grad_norm": 0.10339022427797318, "learning_rate": 0.002, "loss": 2.3552, "step": 40560 }, { "epoch": 0.15683227412596062, "grad_norm": 0.11490005254745483, "learning_rate": 0.002, "loss": 2.3525, "step": 40570 }, { "epoch": 0.1568709313293439, "grad_norm": 0.10776352137327194, "learning_rate": 0.002, "loss": 2.3722, "step": 40580 }, { "epoch": 0.15690958853272718, "grad_norm": 0.09247356653213501, "learning_rate": 0.002, "loss": 2.3831, "step": 40590 }, { "epoch": 0.15694824573611046, "grad_norm": 0.10917262732982635, "learning_rate": 0.002, "loss": 2.3596, "step": 40600 }, { "epoch": 0.15698690293949374, "grad_norm": 0.12253812700510025, "learning_rate": 0.002, "loss": 2.3792, "step": 40610 }, { "epoch": 0.15702556014287702, "grad_norm": 0.09024526178836823, "learning_rate": 0.002, "loss": 2.3661, "step": 40620 }, { "epoch": 0.1570642173462603, "grad_norm": 0.1180238425731659, "learning_rate": 0.002, "loss": 2.3611, "step": 40630 }, { "epoch": 0.15710287454964358, "grad_norm": 0.10546514391899109, "learning_rate": 0.002, "loss": 2.3545, "step": 40640 }, { "epoch": 0.15714153175302686, "grad_norm": 0.10448755323886871, "learning_rate": 0.002, "loss": 2.3629, "step": 40650 }, { "epoch": 0.15718018895641014, "grad_norm": 0.09461455792188644, "learning_rate": 0.002, "loss": 2.3578, "step": 40660 }, { "epoch": 0.15721884615979342, "grad_norm": 0.11377495527267456, "learning_rate": 0.002, "loss": 2.3757, "step": 40670 }, { "epoch": 0.1572575033631767, "grad_norm": 0.11607875674962997, "learning_rate": 0.002, "loss": 2.3553, "step": 40680 }, { "epoch": 0.15729616056655998, "grad_norm": 0.10454504936933517, "learning_rate": 0.002, "loss": 2.3636, "step": 40690 }, { "epoch": 0.15733481776994326, "grad_norm": 0.10954848676919937, "learning_rate": 0.002, "loss": 2.355, "step": 40700 }, { "epoch": 0.15737347497332652, "grad_norm": 0.18937352299690247, "learning_rate": 0.002, "loss": 2.3644, "step": 40710 }, { "epoch": 0.1574121321767098, "grad_norm": 0.09121294319629669, "learning_rate": 0.002, "loss": 2.3709, "step": 40720 }, { "epoch": 0.15745078938009308, "grad_norm": 0.10851619392633438, "learning_rate": 0.002, "loss": 2.3548, "step": 40730 }, { "epoch": 0.15748944658347636, "grad_norm": 0.1146823838353157, "learning_rate": 0.002, "loss": 2.3761, "step": 40740 }, { "epoch": 0.15752810378685964, "grad_norm": 0.11195283383131027, "learning_rate": 0.002, "loss": 2.3721, "step": 40750 }, { "epoch": 0.15756676099024292, "grad_norm": 0.09316971898078918, "learning_rate": 0.002, "loss": 2.3622, "step": 40760 }, { "epoch": 0.1576054181936262, "grad_norm": 0.11370929330587387, "learning_rate": 0.002, "loss": 2.3672, "step": 40770 }, { "epoch": 0.15764407539700948, "grad_norm": 0.11564512550830841, "learning_rate": 0.002, "loss": 2.371, "step": 40780 }, { "epoch": 0.15768273260039276, "grad_norm": 0.11623459309339523, "learning_rate": 0.002, "loss": 2.3564, "step": 40790 }, { "epoch": 0.15772138980377604, "grad_norm": 0.09869327396154404, "learning_rate": 0.002, "loss": 2.344, "step": 40800 }, { "epoch": 0.15776004700715932, "grad_norm": 0.12094533443450928, "learning_rate": 0.002, "loss": 2.3608, "step": 40810 }, { "epoch": 0.1577987042105426, "grad_norm": 0.10380878299474716, "learning_rate": 0.002, "loss": 2.3584, "step": 40820 }, { "epoch": 0.15783736141392588, "grad_norm": 0.1145130842924118, "learning_rate": 0.002, "loss": 2.3672, "step": 40830 }, { "epoch": 0.15787601861730916, "grad_norm": 0.109002023935318, "learning_rate": 0.002, "loss": 2.3693, "step": 40840 }, { "epoch": 0.15791467582069244, "grad_norm": 0.1000434085726738, "learning_rate": 0.002, "loss": 2.3587, "step": 40850 }, { "epoch": 0.15795333302407571, "grad_norm": 0.1145804226398468, "learning_rate": 0.002, "loss": 2.3698, "step": 40860 }, { "epoch": 0.157991990227459, "grad_norm": 0.09565671533346176, "learning_rate": 0.002, "loss": 2.3603, "step": 40870 }, { "epoch": 0.15803064743084227, "grad_norm": 0.09479733556509018, "learning_rate": 0.002, "loss": 2.3591, "step": 40880 }, { "epoch": 0.15806930463422555, "grad_norm": 0.09191577136516571, "learning_rate": 0.002, "loss": 2.3669, "step": 40890 }, { "epoch": 0.1581079618376088, "grad_norm": 0.10246901214122772, "learning_rate": 0.002, "loss": 2.3655, "step": 40900 }, { "epoch": 0.1581466190409921, "grad_norm": 0.11019384860992432, "learning_rate": 0.002, "loss": 2.3679, "step": 40910 }, { "epoch": 0.15818527624437537, "grad_norm": 0.10496606677770615, "learning_rate": 0.002, "loss": 2.3605, "step": 40920 }, { "epoch": 0.15822393344775865, "grad_norm": 0.12050442397594452, "learning_rate": 0.002, "loss": 2.3632, "step": 40930 }, { "epoch": 0.15826259065114193, "grad_norm": 0.1075400784611702, "learning_rate": 0.002, "loss": 2.3835, "step": 40940 }, { "epoch": 0.1583012478545252, "grad_norm": 0.10765066742897034, "learning_rate": 0.002, "loss": 2.3694, "step": 40950 }, { "epoch": 0.1583399050579085, "grad_norm": 0.1123281940817833, "learning_rate": 0.002, "loss": 2.359, "step": 40960 }, { "epoch": 0.15837856226129177, "grad_norm": 0.12240695208311081, "learning_rate": 0.002, "loss": 2.3685, "step": 40970 }, { "epoch": 0.15841721946467505, "grad_norm": 0.12359564006328583, "learning_rate": 0.002, "loss": 2.3581, "step": 40980 }, { "epoch": 0.15845587666805833, "grad_norm": 0.12420374900102615, "learning_rate": 0.002, "loss": 2.3679, "step": 40990 }, { "epoch": 0.1584945338714416, "grad_norm": 0.10608118772506714, "learning_rate": 0.002, "loss": 2.392, "step": 41000 }, { "epoch": 0.1585331910748249, "grad_norm": 0.11344565451145172, "learning_rate": 0.002, "loss": 2.3708, "step": 41010 }, { "epoch": 0.15857184827820817, "grad_norm": 0.10235242545604706, "learning_rate": 0.002, "loss": 2.3733, "step": 41020 }, { "epoch": 0.15861050548159145, "grad_norm": 0.11939121037721634, "learning_rate": 0.002, "loss": 2.3701, "step": 41030 }, { "epoch": 0.15864916268497473, "grad_norm": 0.1067543774843216, "learning_rate": 0.002, "loss": 2.3662, "step": 41040 }, { "epoch": 0.158687819888358, "grad_norm": 0.10151252895593643, "learning_rate": 0.002, "loss": 2.3769, "step": 41050 }, { "epoch": 0.15872647709174129, "grad_norm": 0.10624859482049942, "learning_rate": 0.002, "loss": 2.3709, "step": 41060 }, { "epoch": 0.15876513429512457, "grad_norm": 0.11090574413537979, "learning_rate": 0.002, "loss": 2.3647, "step": 41070 }, { "epoch": 0.15880379149850785, "grad_norm": 0.10871037095785141, "learning_rate": 0.002, "loss": 2.357, "step": 41080 }, { "epoch": 0.1588424487018911, "grad_norm": 0.094578817486763, "learning_rate": 0.002, "loss": 2.3623, "step": 41090 }, { "epoch": 0.15888110590527438, "grad_norm": 0.12802806496620178, "learning_rate": 0.002, "loss": 2.3714, "step": 41100 }, { "epoch": 0.15891976310865766, "grad_norm": 0.15294228494167328, "learning_rate": 0.002, "loss": 2.3617, "step": 41110 }, { "epoch": 0.15895842031204094, "grad_norm": 0.11911449581384659, "learning_rate": 0.002, "loss": 2.3656, "step": 41120 }, { "epoch": 0.15899707751542422, "grad_norm": 0.10868462920188904, "learning_rate": 0.002, "loss": 2.368, "step": 41130 }, { "epoch": 0.1590357347188075, "grad_norm": 0.1078554317355156, "learning_rate": 0.002, "loss": 2.3671, "step": 41140 }, { "epoch": 0.15907439192219078, "grad_norm": 0.11344462633132935, "learning_rate": 0.002, "loss": 2.3682, "step": 41150 }, { "epoch": 0.15911304912557406, "grad_norm": 0.10913313180208206, "learning_rate": 0.002, "loss": 2.3748, "step": 41160 }, { "epoch": 0.15915170632895734, "grad_norm": 0.094475157558918, "learning_rate": 0.002, "loss": 2.3649, "step": 41170 }, { "epoch": 0.15919036353234062, "grad_norm": 0.10378986597061157, "learning_rate": 0.002, "loss": 2.3711, "step": 41180 }, { "epoch": 0.1592290207357239, "grad_norm": 0.12117094546556473, "learning_rate": 0.002, "loss": 2.3682, "step": 41190 }, { "epoch": 0.15926767793910718, "grad_norm": 0.10164425522089005, "learning_rate": 0.002, "loss": 2.3696, "step": 41200 }, { "epoch": 0.15930633514249046, "grad_norm": 0.11677688360214233, "learning_rate": 0.002, "loss": 2.3519, "step": 41210 }, { "epoch": 0.15934499234587374, "grad_norm": 0.10177718847990036, "learning_rate": 0.002, "loss": 2.3645, "step": 41220 }, { "epoch": 0.15938364954925702, "grad_norm": 0.12472458928823471, "learning_rate": 0.002, "loss": 2.3513, "step": 41230 }, { "epoch": 0.1594223067526403, "grad_norm": 0.09913904964923859, "learning_rate": 0.002, "loss": 2.3657, "step": 41240 }, { "epoch": 0.15946096395602358, "grad_norm": 0.11345645785331726, "learning_rate": 0.002, "loss": 2.3745, "step": 41250 }, { "epoch": 0.15949962115940686, "grad_norm": 0.10212550312280655, "learning_rate": 0.002, "loss": 2.3663, "step": 41260 }, { "epoch": 0.1595382783627901, "grad_norm": 0.09881438314914703, "learning_rate": 0.002, "loss": 2.3634, "step": 41270 }, { "epoch": 0.1595769355661734, "grad_norm": 0.108587846159935, "learning_rate": 0.002, "loss": 2.3587, "step": 41280 }, { "epoch": 0.15961559276955667, "grad_norm": 0.12372417002916336, "learning_rate": 0.002, "loss": 2.3773, "step": 41290 }, { "epoch": 0.15965424997293995, "grad_norm": 0.11009825766086578, "learning_rate": 0.002, "loss": 2.3614, "step": 41300 }, { "epoch": 0.15969290717632323, "grad_norm": 0.09615076333284378, "learning_rate": 0.002, "loss": 2.3773, "step": 41310 }, { "epoch": 0.1597315643797065, "grad_norm": 0.10243486613035202, "learning_rate": 0.002, "loss": 2.3686, "step": 41320 }, { "epoch": 0.1597702215830898, "grad_norm": 0.10789047926664352, "learning_rate": 0.002, "loss": 2.3657, "step": 41330 }, { "epoch": 0.15980887878647307, "grad_norm": 0.10097122937440872, "learning_rate": 0.002, "loss": 2.3678, "step": 41340 }, { "epoch": 0.15984753598985635, "grad_norm": 0.11414719372987747, "learning_rate": 0.002, "loss": 2.3632, "step": 41350 }, { "epoch": 0.15988619319323963, "grad_norm": 0.11628225445747375, "learning_rate": 0.002, "loss": 2.3744, "step": 41360 }, { "epoch": 0.1599248503966229, "grad_norm": 0.10371656715869904, "learning_rate": 0.002, "loss": 2.3691, "step": 41370 }, { "epoch": 0.1599635076000062, "grad_norm": 0.10703284293413162, "learning_rate": 0.002, "loss": 2.3646, "step": 41380 }, { "epoch": 0.16000216480338947, "grad_norm": 0.1234053373336792, "learning_rate": 0.002, "loss": 2.3462, "step": 41390 }, { "epoch": 0.16004082200677275, "grad_norm": 0.11645738780498505, "learning_rate": 0.002, "loss": 2.3677, "step": 41400 }, { "epoch": 0.16007947921015603, "grad_norm": 0.10837571322917938, "learning_rate": 0.002, "loss": 2.3723, "step": 41410 }, { "epoch": 0.1601181364135393, "grad_norm": 0.11016348004341125, "learning_rate": 0.002, "loss": 2.3506, "step": 41420 }, { "epoch": 0.1601567936169226, "grad_norm": 0.11296390742063522, "learning_rate": 0.002, "loss": 2.3667, "step": 41430 }, { "epoch": 0.16019545082030587, "grad_norm": 0.11930079013109207, "learning_rate": 0.002, "loss": 2.3669, "step": 41440 }, { "epoch": 0.16023410802368915, "grad_norm": 0.10097251832485199, "learning_rate": 0.002, "loss": 2.3615, "step": 41450 }, { "epoch": 0.1602727652270724, "grad_norm": 0.10060349106788635, "learning_rate": 0.002, "loss": 2.3556, "step": 41460 }, { "epoch": 0.16031142243045568, "grad_norm": 0.10195745527744293, "learning_rate": 0.002, "loss": 2.3516, "step": 41470 }, { "epoch": 0.16035007963383896, "grad_norm": 0.10063726454973221, "learning_rate": 0.002, "loss": 2.3772, "step": 41480 }, { "epoch": 0.16038873683722224, "grad_norm": 0.11097043007612228, "learning_rate": 0.002, "loss": 2.3642, "step": 41490 }, { "epoch": 0.16042739404060552, "grad_norm": 0.10367852449417114, "learning_rate": 0.002, "loss": 2.3648, "step": 41500 }, { "epoch": 0.1604660512439888, "grad_norm": 0.1086919978260994, "learning_rate": 0.002, "loss": 2.3576, "step": 41510 }, { "epoch": 0.16050470844737208, "grad_norm": 0.10193134099245071, "learning_rate": 0.002, "loss": 2.3573, "step": 41520 }, { "epoch": 0.16054336565075536, "grad_norm": 0.10302462428808212, "learning_rate": 0.002, "loss": 2.3726, "step": 41530 }, { "epoch": 0.16058202285413864, "grad_norm": 0.11917942762374878, "learning_rate": 0.002, "loss": 2.3657, "step": 41540 }, { "epoch": 0.16062068005752192, "grad_norm": 0.11485613137483597, "learning_rate": 0.002, "loss": 2.3567, "step": 41550 }, { "epoch": 0.1606593372609052, "grad_norm": 0.11260788142681122, "learning_rate": 0.002, "loss": 2.3741, "step": 41560 }, { "epoch": 0.16069799446428848, "grad_norm": 0.10732334852218628, "learning_rate": 0.002, "loss": 2.3604, "step": 41570 }, { "epoch": 0.16073665166767176, "grad_norm": 0.12386949360370636, "learning_rate": 0.002, "loss": 2.3662, "step": 41580 }, { "epoch": 0.16077530887105504, "grad_norm": 0.11389769613742828, "learning_rate": 0.002, "loss": 2.3545, "step": 41590 }, { "epoch": 0.16081396607443832, "grad_norm": 0.10484343022108078, "learning_rate": 0.002, "loss": 2.3486, "step": 41600 }, { "epoch": 0.1608526232778216, "grad_norm": 0.11448004841804504, "learning_rate": 0.002, "loss": 2.3647, "step": 41610 }, { "epoch": 0.16089128048120488, "grad_norm": 0.1158195436000824, "learning_rate": 0.002, "loss": 2.3711, "step": 41620 }, { "epoch": 0.16092993768458816, "grad_norm": 0.10200965404510498, "learning_rate": 0.002, "loss": 2.3738, "step": 41630 }, { "epoch": 0.1609685948879714, "grad_norm": 0.1114157885313034, "learning_rate": 0.002, "loss": 2.3716, "step": 41640 }, { "epoch": 0.1610072520913547, "grad_norm": 0.14126089215278625, "learning_rate": 0.002, "loss": 2.3696, "step": 41650 }, { "epoch": 0.16104590929473797, "grad_norm": 0.09708284586668015, "learning_rate": 0.002, "loss": 2.3719, "step": 41660 }, { "epoch": 0.16108456649812125, "grad_norm": 0.10202867537736893, "learning_rate": 0.002, "loss": 2.3575, "step": 41670 }, { "epoch": 0.16112322370150453, "grad_norm": 0.10208814591169357, "learning_rate": 0.002, "loss": 2.3533, "step": 41680 }, { "epoch": 0.1611618809048878, "grad_norm": 0.1131867989897728, "learning_rate": 0.002, "loss": 2.3672, "step": 41690 }, { "epoch": 0.1612005381082711, "grad_norm": 0.12291889637708664, "learning_rate": 0.002, "loss": 2.3683, "step": 41700 }, { "epoch": 0.16123919531165437, "grad_norm": 0.12259145081043243, "learning_rate": 0.002, "loss": 2.3866, "step": 41710 }, { "epoch": 0.16127785251503765, "grad_norm": 0.12137303501367569, "learning_rate": 0.002, "loss": 2.3738, "step": 41720 }, { "epoch": 0.16131650971842093, "grad_norm": 0.09981673955917358, "learning_rate": 0.002, "loss": 2.3658, "step": 41730 }, { "epoch": 0.1613551669218042, "grad_norm": 0.09920236468315125, "learning_rate": 0.002, "loss": 2.3495, "step": 41740 }, { "epoch": 0.1613938241251875, "grad_norm": 0.10953893512487411, "learning_rate": 0.002, "loss": 2.3602, "step": 41750 }, { "epoch": 0.16143248132857077, "grad_norm": 0.12220784276723862, "learning_rate": 0.002, "loss": 2.358, "step": 41760 }, { "epoch": 0.16147113853195405, "grad_norm": 0.11718578636646271, "learning_rate": 0.002, "loss": 2.3576, "step": 41770 }, { "epoch": 0.16150979573533733, "grad_norm": 0.10739953815937042, "learning_rate": 0.002, "loss": 2.3641, "step": 41780 }, { "epoch": 0.1615484529387206, "grad_norm": 0.099624864757061, "learning_rate": 0.002, "loss": 2.3685, "step": 41790 }, { "epoch": 0.1615871101421039, "grad_norm": 0.10801070928573608, "learning_rate": 0.002, "loss": 2.359, "step": 41800 }, { "epoch": 0.16162576734548717, "grad_norm": 0.10810569673776627, "learning_rate": 0.002, "loss": 2.3557, "step": 41810 }, { "epoch": 0.16166442454887045, "grad_norm": 0.09924687445163727, "learning_rate": 0.002, "loss": 2.3627, "step": 41820 }, { "epoch": 0.1617030817522537, "grad_norm": 0.10863161087036133, "learning_rate": 0.002, "loss": 2.3732, "step": 41830 }, { "epoch": 0.16174173895563698, "grad_norm": 0.1171480268239975, "learning_rate": 0.002, "loss": 2.3598, "step": 41840 }, { "epoch": 0.16178039615902026, "grad_norm": 0.12051352858543396, "learning_rate": 0.002, "loss": 2.3733, "step": 41850 }, { "epoch": 0.16181905336240354, "grad_norm": 0.11164912581443787, "learning_rate": 0.002, "loss": 2.361, "step": 41860 }, { "epoch": 0.16185771056578682, "grad_norm": 0.10219510644674301, "learning_rate": 0.002, "loss": 2.3714, "step": 41870 }, { "epoch": 0.1618963677691701, "grad_norm": 0.1181781142950058, "learning_rate": 0.002, "loss": 2.3634, "step": 41880 }, { "epoch": 0.16193502497255338, "grad_norm": 0.10184772312641144, "learning_rate": 0.002, "loss": 2.3652, "step": 41890 }, { "epoch": 0.16197368217593666, "grad_norm": 0.09413854032754898, "learning_rate": 0.002, "loss": 2.3493, "step": 41900 }, { "epoch": 0.16201233937931994, "grad_norm": 0.09799924492835999, "learning_rate": 0.002, "loss": 2.3545, "step": 41910 }, { "epoch": 0.16205099658270322, "grad_norm": 0.09872174263000488, "learning_rate": 0.002, "loss": 2.3568, "step": 41920 }, { "epoch": 0.1620896537860865, "grad_norm": 0.10964304208755493, "learning_rate": 0.002, "loss": 2.367, "step": 41930 }, { "epoch": 0.16212831098946978, "grad_norm": 0.1020277738571167, "learning_rate": 0.002, "loss": 2.3608, "step": 41940 }, { "epoch": 0.16216696819285306, "grad_norm": 0.10835389047861099, "learning_rate": 0.002, "loss": 2.365, "step": 41950 }, { "epoch": 0.16220562539623634, "grad_norm": 0.0997660756111145, "learning_rate": 0.002, "loss": 2.3596, "step": 41960 }, { "epoch": 0.16224428259961962, "grad_norm": 0.11345890164375305, "learning_rate": 0.002, "loss": 2.3569, "step": 41970 }, { "epoch": 0.1622829398030029, "grad_norm": 0.10423390567302704, "learning_rate": 0.002, "loss": 2.3613, "step": 41980 }, { "epoch": 0.16232159700638618, "grad_norm": 0.1084778755903244, "learning_rate": 0.002, "loss": 2.3832, "step": 41990 }, { "epoch": 0.16236025420976946, "grad_norm": 0.09999390691518784, "learning_rate": 0.002, "loss": 2.3576, "step": 42000 }, { "epoch": 0.1623989114131527, "grad_norm": 0.10016658902168274, "learning_rate": 0.002, "loss": 2.3645, "step": 42010 }, { "epoch": 0.162437568616536, "grad_norm": 0.10784460604190826, "learning_rate": 0.002, "loss": 2.3679, "step": 42020 }, { "epoch": 0.16247622581991927, "grad_norm": 0.10587889701128006, "learning_rate": 0.002, "loss": 2.3587, "step": 42030 }, { "epoch": 0.16251488302330255, "grad_norm": 0.1207164004445076, "learning_rate": 0.002, "loss": 2.3601, "step": 42040 }, { "epoch": 0.16255354022668583, "grad_norm": 0.109578438103199, "learning_rate": 0.002, "loss": 2.3661, "step": 42050 }, { "epoch": 0.1625921974300691, "grad_norm": 0.12938733398914337, "learning_rate": 0.002, "loss": 2.3726, "step": 42060 }, { "epoch": 0.1626308546334524, "grad_norm": 0.10855179280042648, "learning_rate": 0.002, "loss": 2.3616, "step": 42070 }, { "epoch": 0.16266951183683567, "grad_norm": 0.1001514419913292, "learning_rate": 0.002, "loss": 2.3605, "step": 42080 }, { "epoch": 0.16270816904021895, "grad_norm": 0.10790702700614929, "learning_rate": 0.002, "loss": 2.3669, "step": 42090 }, { "epoch": 0.16274682624360223, "grad_norm": 0.11273425817489624, "learning_rate": 0.002, "loss": 2.3557, "step": 42100 }, { "epoch": 0.1627854834469855, "grad_norm": 0.12000437825918198, "learning_rate": 0.002, "loss": 2.3623, "step": 42110 }, { "epoch": 0.1628241406503688, "grad_norm": 0.10086032003164291, "learning_rate": 0.002, "loss": 2.3658, "step": 42120 }, { "epoch": 0.16286279785375207, "grad_norm": 0.11639446020126343, "learning_rate": 0.002, "loss": 2.3726, "step": 42130 }, { "epoch": 0.16290145505713535, "grad_norm": 0.1023893803358078, "learning_rate": 0.002, "loss": 2.3565, "step": 42140 }, { "epoch": 0.16294011226051863, "grad_norm": 0.10636291652917862, "learning_rate": 0.002, "loss": 2.3686, "step": 42150 }, { "epoch": 0.1629787694639019, "grad_norm": 0.11687823385000229, "learning_rate": 0.002, "loss": 2.3615, "step": 42160 }, { "epoch": 0.1630174266672852, "grad_norm": 0.11895740032196045, "learning_rate": 0.002, "loss": 2.3734, "step": 42170 }, { "epoch": 0.16305608387066847, "grad_norm": 0.12592259049415588, "learning_rate": 0.002, "loss": 2.3518, "step": 42180 }, { "epoch": 0.16309474107405175, "grad_norm": 0.10185597091913223, "learning_rate": 0.002, "loss": 2.3582, "step": 42190 }, { "epoch": 0.163133398277435, "grad_norm": 0.0969577431678772, "learning_rate": 0.002, "loss": 2.3572, "step": 42200 }, { "epoch": 0.16317205548081828, "grad_norm": 0.1074877604842186, "learning_rate": 0.002, "loss": 2.3637, "step": 42210 }, { "epoch": 0.16321071268420156, "grad_norm": 0.1083407774567604, "learning_rate": 0.002, "loss": 2.3644, "step": 42220 }, { "epoch": 0.16324936988758484, "grad_norm": 0.10659298300743103, "learning_rate": 0.002, "loss": 2.3698, "step": 42230 }, { "epoch": 0.16328802709096812, "grad_norm": 0.12535052001476288, "learning_rate": 0.002, "loss": 2.3748, "step": 42240 }, { "epoch": 0.1633266842943514, "grad_norm": 0.10125640034675598, "learning_rate": 0.002, "loss": 2.3619, "step": 42250 }, { "epoch": 0.16336534149773468, "grad_norm": 0.12252649664878845, "learning_rate": 0.002, "loss": 2.3697, "step": 42260 }, { "epoch": 0.16340399870111796, "grad_norm": 0.10944797098636627, "learning_rate": 0.002, "loss": 2.3532, "step": 42270 }, { "epoch": 0.16344265590450124, "grad_norm": 0.09932339191436768, "learning_rate": 0.002, "loss": 2.3682, "step": 42280 }, { "epoch": 0.16348131310788452, "grad_norm": 0.10842996090650558, "learning_rate": 0.002, "loss": 2.3567, "step": 42290 }, { "epoch": 0.1635199703112678, "grad_norm": 0.10472535341978073, "learning_rate": 0.002, "loss": 2.3742, "step": 42300 }, { "epoch": 0.16355862751465108, "grad_norm": 0.11402362585067749, "learning_rate": 0.002, "loss": 2.3708, "step": 42310 }, { "epoch": 0.16359728471803436, "grad_norm": 0.11580096185207367, "learning_rate": 0.002, "loss": 2.3672, "step": 42320 }, { "epoch": 0.16363594192141764, "grad_norm": 0.1062471866607666, "learning_rate": 0.002, "loss": 2.3597, "step": 42330 }, { "epoch": 0.16367459912480092, "grad_norm": 0.10192801803350449, "learning_rate": 0.002, "loss": 2.3705, "step": 42340 }, { "epoch": 0.1637132563281842, "grad_norm": 0.10680807381868362, "learning_rate": 0.002, "loss": 2.3637, "step": 42350 }, { "epoch": 0.16375191353156748, "grad_norm": 0.1325022429227829, "learning_rate": 0.002, "loss": 2.3685, "step": 42360 }, { "epoch": 0.16379057073495076, "grad_norm": 0.11499712616205215, "learning_rate": 0.002, "loss": 2.363, "step": 42370 }, { "epoch": 0.16382922793833402, "grad_norm": 0.10338990390300751, "learning_rate": 0.002, "loss": 2.3669, "step": 42380 }, { "epoch": 0.1638678851417173, "grad_norm": 0.11115837097167969, "learning_rate": 0.002, "loss": 2.3543, "step": 42390 }, { "epoch": 0.16390654234510058, "grad_norm": 0.09730077534914017, "learning_rate": 0.002, "loss": 2.3491, "step": 42400 }, { "epoch": 0.16394519954848386, "grad_norm": 0.1082802340388298, "learning_rate": 0.002, "loss": 2.3613, "step": 42410 }, { "epoch": 0.16398385675186714, "grad_norm": 0.11049145460128784, "learning_rate": 0.002, "loss": 2.365, "step": 42420 }, { "epoch": 0.16402251395525042, "grad_norm": 0.10516611486673355, "learning_rate": 0.002, "loss": 2.3759, "step": 42430 }, { "epoch": 0.1640611711586337, "grad_norm": 0.11633968353271484, "learning_rate": 0.002, "loss": 2.3634, "step": 42440 }, { "epoch": 0.16409982836201698, "grad_norm": 0.1104530468583107, "learning_rate": 0.002, "loss": 2.3576, "step": 42450 }, { "epoch": 0.16413848556540026, "grad_norm": 0.13360534608364105, "learning_rate": 0.002, "loss": 2.3666, "step": 42460 }, { "epoch": 0.16417714276878353, "grad_norm": 0.11686535179615021, "learning_rate": 0.002, "loss": 2.3521, "step": 42470 }, { "epoch": 0.16421579997216681, "grad_norm": 0.09977176785469055, "learning_rate": 0.002, "loss": 2.3528, "step": 42480 }, { "epoch": 0.1642544571755501, "grad_norm": 0.09932684898376465, "learning_rate": 0.002, "loss": 2.373, "step": 42490 }, { "epoch": 0.16429311437893337, "grad_norm": 0.10714755952358246, "learning_rate": 0.002, "loss": 2.367, "step": 42500 }, { "epoch": 0.16433177158231665, "grad_norm": 0.11574291437864304, "learning_rate": 0.002, "loss": 2.369, "step": 42510 }, { "epoch": 0.16437042878569993, "grad_norm": 0.10226025432348251, "learning_rate": 0.002, "loss": 2.3717, "step": 42520 }, { "epoch": 0.16440908598908321, "grad_norm": 0.13221094012260437, "learning_rate": 0.002, "loss": 2.3716, "step": 42530 }, { "epoch": 0.1644477431924665, "grad_norm": 0.1127496212720871, "learning_rate": 0.002, "loss": 2.3564, "step": 42540 }, { "epoch": 0.16448640039584977, "grad_norm": 0.16812896728515625, "learning_rate": 0.002, "loss": 2.3571, "step": 42550 }, { "epoch": 0.16452505759923305, "grad_norm": 0.11325038224458694, "learning_rate": 0.002, "loss": 2.3892, "step": 42560 }, { "epoch": 0.1645637148026163, "grad_norm": 0.09131856262683868, "learning_rate": 0.002, "loss": 2.3751, "step": 42570 }, { "epoch": 0.1646023720059996, "grad_norm": 0.11105062067508698, "learning_rate": 0.002, "loss": 2.365, "step": 42580 }, { "epoch": 0.16464102920938287, "grad_norm": 0.1120314747095108, "learning_rate": 0.002, "loss": 2.3611, "step": 42590 }, { "epoch": 0.16467968641276615, "grad_norm": 0.119980588555336, "learning_rate": 0.002, "loss": 2.3821, "step": 42600 }, { "epoch": 0.16471834361614943, "grad_norm": 0.08983515202999115, "learning_rate": 0.002, "loss": 2.3771, "step": 42610 }, { "epoch": 0.1647570008195327, "grad_norm": 0.10229698568582535, "learning_rate": 0.002, "loss": 2.352, "step": 42620 }, { "epoch": 0.164795658022916, "grad_norm": 0.10783220827579498, "learning_rate": 0.002, "loss": 2.3656, "step": 42630 }, { "epoch": 0.16483431522629927, "grad_norm": 0.1008199080824852, "learning_rate": 0.002, "loss": 2.3516, "step": 42640 }, { "epoch": 0.16487297242968255, "grad_norm": 0.10238343477249146, "learning_rate": 0.002, "loss": 2.3624, "step": 42650 }, { "epoch": 0.16491162963306583, "grad_norm": 0.11489154398441315, "learning_rate": 0.002, "loss": 2.3752, "step": 42660 }, { "epoch": 0.1649502868364491, "grad_norm": 0.12892086803913116, "learning_rate": 0.002, "loss": 2.373, "step": 42670 }, { "epoch": 0.16498894403983239, "grad_norm": 0.10225165635347366, "learning_rate": 0.002, "loss": 2.3808, "step": 42680 }, { "epoch": 0.16502760124321567, "grad_norm": 0.10173000395298004, "learning_rate": 0.002, "loss": 2.3516, "step": 42690 }, { "epoch": 0.16506625844659895, "grad_norm": 0.11082421243190765, "learning_rate": 0.002, "loss": 2.3713, "step": 42700 }, { "epoch": 0.16510491564998223, "grad_norm": 0.11183352023363113, "learning_rate": 0.002, "loss": 2.3682, "step": 42710 }, { "epoch": 0.1651435728533655, "grad_norm": 0.11176785081624985, "learning_rate": 0.002, "loss": 2.3532, "step": 42720 }, { "epoch": 0.16518223005674879, "grad_norm": 0.10572459548711777, "learning_rate": 0.002, "loss": 2.3621, "step": 42730 }, { "epoch": 0.16522088726013207, "grad_norm": 0.11575083434581757, "learning_rate": 0.002, "loss": 2.3657, "step": 42740 }, { "epoch": 0.16525954446351532, "grad_norm": 0.0935940369963646, "learning_rate": 0.002, "loss": 2.3665, "step": 42750 }, { "epoch": 0.1652982016668986, "grad_norm": 0.12977440655231476, "learning_rate": 0.002, "loss": 2.375, "step": 42760 }, { "epoch": 0.16533685887028188, "grad_norm": 0.10405240952968597, "learning_rate": 0.002, "loss": 2.3699, "step": 42770 }, { "epoch": 0.16537551607366516, "grad_norm": 0.11216165125370026, "learning_rate": 0.002, "loss": 2.356, "step": 42780 }, { "epoch": 0.16541417327704844, "grad_norm": 0.10593120753765106, "learning_rate": 0.002, "loss": 2.3571, "step": 42790 }, { "epoch": 0.16545283048043172, "grad_norm": 0.12728847563266754, "learning_rate": 0.002, "loss": 2.366, "step": 42800 }, { "epoch": 0.165491487683815, "grad_norm": 0.12727369368076324, "learning_rate": 0.002, "loss": 2.3631, "step": 42810 }, { "epoch": 0.16553014488719828, "grad_norm": 0.11364904046058655, "learning_rate": 0.002, "loss": 2.3845, "step": 42820 }, { "epoch": 0.16556880209058156, "grad_norm": 0.09339181333780289, "learning_rate": 0.002, "loss": 2.378, "step": 42830 }, { "epoch": 0.16560745929396484, "grad_norm": 0.14502650499343872, "learning_rate": 0.002, "loss": 2.3592, "step": 42840 }, { "epoch": 0.16564611649734812, "grad_norm": 0.10401785373687744, "learning_rate": 0.002, "loss": 2.364, "step": 42850 }, { "epoch": 0.1656847737007314, "grad_norm": 0.10165320336818695, "learning_rate": 0.002, "loss": 2.3655, "step": 42860 }, { "epoch": 0.16572343090411468, "grad_norm": 0.11069615185260773, "learning_rate": 0.002, "loss": 2.3611, "step": 42870 }, { "epoch": 0.16576208810749796, "grad_norm": 0.11860626935958862, "learning_rate": 0.002, "loss": 2.3584, "step": 42880 }, { "epoch": 0.16580074531088124, "grad_norm": 0.10857722163200378, "learning_rate": 0.002, "loss": 2.3614, "step": 42890 }, { "epoch": 0.16583940251426452, "grad_norm": 0.09501806646585464, "learning_rate": 0.002, "loss": 2.3699, "step": 42900 }, { "epoch": 0.1658780597176478, "grad_norm": 0.13568583130836487, "learning_rate": 0.002, "loss": 2.3774, "step": 42910 }, { "epoch": 0.16591671692103108, "grad_norm": 0.10308828204870224, "learning_rate": 0.002, "loss": 2.3626, "step": 42920 }, { "epoch": 0.16595537412441436, "grad_norm": 0.11815542727708817, "learning_rate": 0.002, "loss": 2.3636, "step": 42930 }, { "epoch": 0.1659940313277976, "grad_norm": 0.11061900109052658, "learning_rate": 0.002, "loss": 2.3538, "step": 42940 }, { "epoch": 0.1660326885311809, "grad_norm": 0.11406237632036209, "learning_rate": 0.002, "loss": 2.3748, "step": 42950 }, { "epoch": 0.16607134573456417, "grad_norm": 0.0945214033126831, "learning_rate": 0.002, "loss": 2.3698, "step": 42960 }, { "epoch": 0.16611000293794745, "grad_norm": 0.11722251772880554, "learning_rate": 0.002, "loss": 2.3619, "step": 42970 }, { "epoch": 0.16614866014133073, "grad_norm": 0.1507737636566162, "learning_rate": 0.002, "loss": 2.372, "step": 42980 }, { "epoch": 0.166187317344714, "grad_norm": 0.11134760081768036, "learning_rate": 0.002, "loss": 2.3787, "step": 42990 }, { "epoch": 0.1662259745480973, "grad_norm": 0.19596156477928162, "learning_rate": 0.002, "loss": 2.3808, "step": 43000 }, { "epoch": 0.16626463175148057, "grad_norm": 0.15305453538894653, "learning_rate": 0.002, "loss": 2.3772, "step": 43010 }, { "epoch": 0.16630328895486385, "grad_norm": 0.11053252220153809, "learning_rate": 0.002, "loss": 2.3756, "step": 43020 }, { "epoch": 0.16634194615824713, "grad_norm": 0.11744187027215958, "learning_rate": 0.002, "loss": 2.3496, "step": 43030 }, { "epoch": 0.1663806033616304, "grad_norm": 0.11429338902235031, "learning_rate": 0.002, "loss": 2.3706, "step": 43040 }, { "epoch": 0.1664192605650137, "grad_norm": 0.12207076698541641, "learning_rate": 0.002, "loss": 2.3546, "step": 43050 }, { "epoch": 0.16645791776839697, "grad_norm": 0.1300329566001892, "learning_rate": 0.002, "loss": 2.3585, "step": 43060 }, { "epoch": 0.16649657497178025, "grad_norm": 0.10230127722024918, "learning_rate": 0.002, "loss": 2.3704, "step": 43070 }, { "epoch": 0.16653523217516353, "grad_norm": 0.11186619848012924, "learning_rate": 0.002, "loss": 2.3643, "step": 43080 }, { "epoch": 0.1665738893785468, "grad_norm": 0.1025962233543396, "learning_rate": 0.002, "loss": 2.379, "step": 43090 }, { "epoch": 0.1666125465819301, "grad_norm": 0.12221942842006683, "learning_rate": 0.002, "loss": 2.3586, "step": 43100 }, { "epoch": 0.16665120378531337, "grad_norm": 0.10172362625598907, "learning_rate": 0.002, "loss": 2.3672, "step": 43110 }, { "epoch": 0.16668986098869665, "grad_norm": 0.09649766981601715, "learning_rate": 0.002, "loss": 2.3567, "step": 43120 }, { "epoch": 0.1667285181920799, "grad_norm": 0.11706840246915817, "learning_rate": 0.002, "loss": 2.3572, "step": 43130 }, { "epoch": 0.16676717539546318, "grad_norm": 0.11006322503089905, "learning_rate": 0.002, "loss": 2.3683, "step": 43140 }, { "epoch": 0.16680583259884646, "grad_norm": 0.1078108474612236, "learning_rate": 0.002, "loss": 2.3621, "step": 43150 }, { "epoch": 0.16684448980222974, "grad_norm": 0.13114474713802338, "learning_rate": 0.002, "loss": 2.3594, "step": 43160 }, { "epoch": 0.16688314700561302, "grad_norm": 0.11985458433628082, "learning_rate": 0.002, "loss": 2.3875, "step": 43170 }, { "epoch": 0.1669218042089963, "grad_norm": 0.10740912705659866, "learning_rate": 0.002, "loss": 2.3573, "step": 43180 }, { "epoch": 0.16696046141237958, "grad_norm": 0.12001767009496689, "learning_rate": 0.002, "loss": 2.3672, "step": 43190 }, { "epoch": 0.16699911861576286, "grad_norm": 0.10463010519742966, "learning_rate": 0.002, "loss": 2.3647, "step": 43200 }, { "epoch": 0.16703777581914614, "grad_norm": 0.09417607635259628, "learning_rate": 0.002, "loss": 2.3724, "step": 43210 }, { "epoch": 0.16707643302252942, "grad_norm": 0.10411380976438522, "learning_rate": 0.002, "loss": 2.3731, "step": 43220 }, { "epoch": 0.1671150902259127, "grad_norm": 0.11311469227075577, "learning_rate": 0.002, "loss": 2.3648, "step": 43230 }, { "epoch": 0.16715374742929598, "grad_norm": 0.1006241962313652, "learning_rate": 0.002, "loss": 2.358, "step": 43240 }, { "epoch": 0.16719240463267926, "grad_norm": 0.09688572585582733, "learning_rate": 0.002, "loss": 2.3982, "step": 43250 }, { "epoch": 0.16723106183606254, "grad_norm": 0.12874460220336914, "learning_rate": 0.002, "loss": 2.3592, "step": 43260 }, { "epoch": 0.16726971903944582, "grad_norm": 0.10218334943056107, "learning_rate": 0.002, "loss": 2.3553, "step": 43270 }, { "epoch": 0.1673083762428291, "grad_norm": 0.11155752837657928, "learning_rate": 0.002, "loss": 2.3742, "step": 43280 }, { "epoch": 0.16734703344621238, "grad_norm": 0.0992024838924408, "learning_rate": 0.002, "loss": 2.3735, "step": 43290 }, { "epoch": 0.16738569064959566, "grad_norm": 0.11269593983888626, "learning_rate": 0.002, "loss": 2.353, "step": 43300 }, { "epoch": 0.1674243478529789, "grad_norm": 0.1077861487865448, "learning_rate": 0.002, "loss": 2.3594, "step": 43310 }, { "epoch": 0.1674630050563622, "grad_norm": 0.11057788133621216, "learning_rate": 0.002, "loss": 2.3724, "step": 43320 }, { "epoch": 0.16750166225974547, "grad_norm": 0.10573381930589676, "learning_rate": 0.002, "loss": 2.3593, "step": 43330 }, { "epoch": 0.16754031946312875, "grad_norm": 0.10935483127832413, "learning_rate": 0.002, "loss": 2.3613, "step": 43340 }, { "epoch": 0.16757897666651203, "grad_norm": 0.13087883591651917, "learning_rate": 0.002, "loss": 2.3551, "step": 43350 }, { "epoch": 0.1676176338698953, "grad_norm": 0.09338037669658661, "learning_rate": 0.002, "loss": 2.3678, "step": 43360 }, { "epoch": 0.1676562910732786, "grad_norm": 0.09488900750875473, "learning_rate": 0.002, "loss": 2.362, "step": 43370 }, { "epoch": 0.16769494827666187, "grad_norm": 0.10798195749521255, "learning_rate": 0.002, "loss": 2.3709, "step": 43380 }, { "epoch": 0.16773360548004515, "grad_norm": 0.10760509222745895, "learning_rate": 0.002, "loss": 2.3659, "step": 43390 }, { "epoch": 0.16777226268342843, "grad_norm": 0.11724671721458435, "learning_rate": 0.002, "loss": 2.3414, "step": 43400 }, { "epoch": 0.1678109198868117, "grad_norm": 0.10204483568668365, "learning_rate": 0.002, "loss": 2.3619, "step": 43410 }, { "epoch": 0.167849577090195, "grad_norm": 0.10492843389511108, "learning_rate": 0.002, "loss": 2.3723, "step": 43420 }, { "epoch": 0.16788823429357827, "grad_norm": 0.10826718807220459, "learning_rate": 0.002, "loss": 2.3577, "step": 43430 }, { "epoch": 0.16792689149696155, "grad_norm": 0.11438310146331787, "learning_rate": 0.002, "loss": 2.3718, "step": 43440 }, { "epoch": 0.16796554870034483, "grad_norm": 0.12006634473800659, "learning_rate": 0.002, "loss": 2.3671, "step": 43450 }, { "epoch": 0.1680042059037281, "grad_norm": 0.10296225547790527, "learning_rate": 0.002, "loss": 2.3653, "step": 43460 }, { "epoch": 0.1680428631071114, "grad_norm": 0.10148349404335022, "learning_rate": 0.002, "loss": 2.3472, "step": 43470 }, { "epoch": 0.16808152031049467, "grad_norm": 0.09886068850755692, "learning_rate": 0.002, "loss": 2.367, "step": 43480 }, { "epoch": 0.16812017751387795, "grad_norm": 0.11167940497398376, "learning_rate": 0.002, "loss": 2.3761, "step": 43490 }, { "epoch": 0.1681588347172612, "grad_norm": 0.12011834233999252, "learning_rate": 0.002, "loss": 2.3462, "step": 43500 }, { "epoch": 0.16819749192064448, "grad_norm": 0.11208701878786087, "learning_rate": 0.002, "loss": 2.3538, "step": 43510 }, { "epoch": 0.16823614912402776, "grad_norm": 0.11228106915950775, "learning_rate": 0.002, "loss": 2.3689, "step": 43520 }, { "epoch": 0.16827480632741104, "grad_norm": 0.10892346501350403, "learning_rate": 0.002, "loss": 2.36, "step": 43530 }, { "epoch": 0.16831346353079432, "grad_norm": 0.09330982714891434, "learning_rate": 0.002, "loss": 2.3555, "step": 43540 }, { "epoch": 0.1683521207341776, "grad_norm": 0.12238658964633942, "learning_rate": 0.002, "loss": 2.3801, "step": 43550 }, { "epoch": 0.16839077793756088, "grad_norm": 0.10222240537405014, "learning_rate": 0.002, "loss": 2.3577, "step": 43560 }, { "epoch": 0.16842943514094416, "grad_norm": 0.10212979465723038, "learning_rate": 0.002, "loss": 2.3566, "step": 43570 }, { "epoch": 0.16846809234432744, "grad_norm": 0.09810635447502136, "learning_rate": 0.002, "loss": 2.355, "step": 43580 }, { "epoch": 0.16850674954771072, "grad_norm": 0.11242741346359253, "learning_rate": 0.002, "loss": 2.3491, "step": 43590 }, { "epoch": 0.168545406751094, "grad_norm": 0.11861951649188995, "learning_rate": 0.002, "loss": 2.3719, "step": 43600 }, { "epoch": 0.16858406395447728, "grad_norm": 0.10650777816772461, "learning_rate": 0.002, "loss": 2.3737, "step": 43610 }, { "epoch": 0.16862272115786056, "grad_norm": 0.10390397906303406, "learning_rate": 0.002, "loss": 2.3479, "step": 43620 }, { "epoch": 0.16866137836124384, "grad_norm": 0.09910054504871368, "learning_rate": 0.002, "loss": 2.3793, "step": 43630 }, { "epoch": 0.16870003556462712, "grad_norm": 0.10709985345602036, "learning_rate": 0.002, "loss": 2.3649, "step": 43640 }, { "epoch": 0.1687386927680104, "grad_norm": 0.22000548243522644, "learning_rate": 0.002, "loss": 2.3755, "step": 43650 }, { "epoch": 0.16877734997139368, "grad_norm": 0.11501609534025192, "learning_rate": 0.002, "loss": 2.3771, "step": 43660 }, { "epoch": 0.16881600717477696, "grad_norm": 0.11920400708913803, "learning_rate": 0.002, "loss": 2.3647, "step": 43670 }, { "epoch": 0.1688546643781602, "grad_norm": 0.1039498895406723, "learning_rate": 0.002, "loss": 2.3817, "step": 43680 }, { "epoch": 0.1688933215815435, "grad_norm": 0.11218732595443726, "learning_rate": 0.002, "loss": 2.3683, "step": 43690 }, { "epoch": 0.16893197878492677, "grad_norm": 0.10131587088108063, "learning_rate": 0.002, "loss": 2.3729, "step": 43700 }, { "epoch": 0.16897063598831005, "grad_norm": 0.14687520265579224, "learning_rate": 0.002, "loss": 2.3653, "step": 43710 }, { "epoch": 0.16900929319169333, "grad_norm": 0.1265539526939392, "learning_rate": 0.002, "loss": 2.3588, "step": 43720 }, { "epoch": 0.1690479503950766, "grad_norm": 0.10759860277175903, "learning_rate": 0.002, "loss": 2.368, "step": 43730 }, { "epoch": 0.1690866075984599, "grad_norm": 0.12066882848739624, "learning_rate": 0.002, "loss": 2.371, "step": 43740 }, { "epoch": 0.16912526480184317, "grad_norm": 0.10885506868362427, "learning_rate": 0.002, "loss": 2.3624, "step": 43750 }, { "epoch": 0.16916392200522645, "grad_norm": 0.09433972835540771, "learning_rate": 0.002, "loss": 2.3576, "step": 43760 }, { "epoch": 0.16920257920860973, "grad_norm": 0.11025030165910721, "learning_rate": 0.002, "loss": 2.3488, "step": 43770 }, { "epoch": 0.169241236411993, "grad_norm": 0.10352890193462372, "learning_rate": 0.002, "loss": 2.3685, "step": 43780 }, { "epoch": 0.1692798936153763, "grad_norm": 0.13102802634239197, "learning_rate": 0.002, "loss": 2.3672, "step": 43790 }, { "epoch": 0.16931855081875957, "grad_norm": 0.09540624916553497, "learning_rate": 0.002, "loss": 2.3567, "step": 43800 }, { "epoch": 0.16935720802214285, "grad_norm": 0.10578183084726334, "learning_rate": 0.002, "loss": 2.3655, "step": 43810 }, { "epoch": 0.16939586522552613, "grad_norm": 0.101011723279953, "learning_rate": 0.002, "loss": 2.3581, "step": 43820 }, { "epoch": 0.1694345224289094, "grad_norm": 0.1189325824379921, "learning_rate": 0.002, "loss": 2.3695, "step": 43830 }, { "epoch": 0.1694731796322927, "grad_norm": 0.11614560335874557, "learning_rate": 0.002, "loss": 2.3755, "step": 43840 }, { "epoch": 0.16951183683567597, "grad_norm": 0.10390368103981018, "learning_rate": 0.002, "loss": 2.358, "step": 43850 }, { "epoch": 0.16955049403905925, "grad_norm": 0.10693172365427017, "learning_rate": 0.002, "loss": 2.3768, "step": 43860 }, { "epoch": 0.1695891512424425, "grad_norm": 0.11329218000173569, "learning_rate": 0.002, "loss": 2.3612, "step": 43870 }, { "epoch": 0.16962780844582578, "grad_norm": 0.08312906324863434, "learning_rate": 0.002, "loss": 2.3677, "step": 43880 }, { "epoch": 0.16966646564920906, "grad_norm": 0.1291521042585373, "learning_rate": 0.002, "loss": 2.3757, "step": 43890 }, { "epoch": 0.16970512285259234, "grad_norm": 0.09508147835731506, "learning_rate": 0.002, "loss": 2.3665, "step": 43900 }, { "epoch": 0.16974378005597562, "grad_norm": 0.11148455739021301, "learning_rate": 0.002, "loss": 2.3641, "step": 43910 }, { "epoch": 0.1697824372593589, "grad_norm": 0.12895052134990692, "learning_rate": 0.002, "loss": 2.3505, "step": 43920 }, { "epoch": 0.16982109446274218, "grad_norm": 0.12822982668876648, "learning_rate": 0.002, "loss": 2.3662, "step": 43930 }, { "epoch": 0.16985975166612546, "grad_norm": 0.089112788438797, "learning_rate": 0.002, "loss": 2.3588, "step": 43940 }, { "epoch": 0.16989840886950874, "grad_norm": 0.10964418947696686, "learning_rate": 0.002, "loss": 2.3517, "step": 43950 }, { "epoch": 0.16993706607289202, "grad_norm": 0.12344907969236374, "learning_rate": 0.002, "loss": 2.3521, "step": 43960 }, { "epoch": 0.1699757232762753, "grad_norm": 0.09824126213788986, "learning_rate": 0.002, "loss": 2.3629, "step": 43970 }, { "epoch": 0.17001438047965858, "grad_norm": 0.10377447307109833, "learning_rate": 0.002, "loss": 2.3658, "step": 43980 }, { "epoch": 0.17005303768304186, "grad_norm": 0.11566921323537827, "learning_rate": 0.002, "loss": 2.3684, "step": 43990 }, { "epoch": 0.17009169488642514, "grad_norm": 0.12002314627170563, "learning_rate": 0.002, "loss": 2.3647, "step": 44000 }, { "epoch": 0.17013035208980842, "grad_norm": 0.09902594238519669, "learning_rate": 0.002, "loss": 2.365, "step": 44010 }, { "epoch": 0.1701690092931917, "grad_norm": 0.0968996211886406, "learning_rate": 0.002, "loss": 2.3608, "step": 44020 }, { "epoch": 0.17020766649657498, "grad_norm": 0.12413863837718964, "learning_rate": 0.002, "loss": 2.3598, "step": 44030 }, { "epoch": 0.17024632369995826, "grad_norm": 0.1103622242808342, "learning_rate": 0.002, "loss": 2.3776, "step": 44040 }, { "epoch": 0.17028498090334152, "grad_norm": 0.11150927096605301, "learning_rate": 0.002, "loss": 2.3638, "step": 44050 }, { "epoch": 0.1703236381067248, "grad_norm": 0.13580836355686188, "learning_rate": 0.002, "loss": 2.3583, "step": 44060 }, { "epoch": 0.17036229531010808, "grad_norm": 0.11713317036628723, "learning_rate": 0.002, "loss": 2.3697, "step": 44070 }, { "epoch": 0.17040095251349135, "grad_norm": 0.11041072010993958, "learning_rate": 0.002, "loss": 2.3637, "step": 44080 }, { "epoch": 0.17043960971687463, "grad_norm": 0.13192050158977509, "learning_rate": 0.002, "loss": 2.3589, "step": 44090 }, { "epoch": 0.17047826692025791, "grad_norm": 0.1301390528678894, "learning_rate": 0.002, "loss": 2.3645, "step": 44100 }, { "epoch": 0.1705169241236412, "grad_norm": 0.12830758094787598, "learning_rate": 0.002, "loss": 2.3565, "step": 44110 }, { "epoch": 0.17055558132702447, "grad_norm": 0.10173708200454712, "learning_rate": 0.002, "loss": 2.3591, "step": 44120 }, { "epoch": 0.17059423853040775, "grad_norm": 0.09899038076400757, "learning_rate": 0.002, "loss": 2.3715, "step": 44130 }, { "epoch": 0.17063289573379103, "grad_norm": 0.10673151165246964, "learning_rate": 0.002, "loss": 2.3595, "step": 44140 }, { "epoch": 0.17067155293717431, "grad_norm": 0.10917104035615921, "learning_rate": 0.002, "loss": 2.3663, "step": 44150 }, { "epoch": 0.1707102101405576, "grad_norm": 0.10550056397914886, "learning_rate": 0.002, "loss": 2.3633, "step": 44160 }, { "epoch": 0.17074886734394087, "grad_norm": 0.10488106310367584, "learning_rate": 0.002, "loss": 2.3667, "step": 44170 }, { "epoch": 0.17078752454732415, "grad_norm": 0.13775895535945892, "learning_rate": 0.002, "loss": 2.3521, "step": 44180 }, { "epoch": 0.17082618175070743, "grad_norm": 0.10303659737110138, "learning_rate": 0.002, "loss": 2.3676, "step": 44190 }, { "epoch": 0.17086483895409071, "grad_norm": 0.11900082975625992, "learning_rate": 0.002, "loss": 2.3606, "step": 44200 }, { "epoch": 0.170903496157474, "grad_norm": 0.11325709521770477, "learning_rate": 0.002, "loss": 2.366, "step": 44210 }, { "epoch": 0.17094215336085727, "grad_norm": 0.09763014316558838, "learning_rate": 0.002, "loss": 2.3482, "step": 44220 }, { "epoch": 0.17098081056424055, "grad_norm": 0.1087694838643074, "learning_rate": 0.002, "loss": 2.3795, "step": 44230 }, { "epoch": 0.1710194677676238, "grad_norm": 0.1253872960805893, "learning_rate": 0.002, "loss": 2.3795, "step": 44240 }, { "epoch": 0.1710581249710071, "grad_norm": 0.16377699375152588, "learning_rate": 0.002, "loss": 2.3605, "step": 44250 }, { "epoch": 0.17109678217439037, "grad_norm": 0.10072610527276993, "learning_rate": 0.002, "loss": 2.3643, "step": 44260 }, { "epoch": 0.17113543937777365, "grad_norm": 0.0969557985663414, "learning_rate": 0.002, "loss": 2.3513, "step": 44270 }, { "epoch": 0.17117409658115693, "grad_norm": 0.11019019037485123, "learning_rate": 0.002, "loss": 2.3524, "step": 44280 }, { "epoch": 0.1712127537845402, "grad_norm": 0.10584430396556854, "learning_rate": 0.002, "loss": 2.3628, "step": 44290 }, { "epoch": 0.17125141098792349, "grad_norm": 0.10273636132478714, "learning_rate": 0.002, "loss": 2.368, "step": 44300 }, { "epoch": 0.17129006819130677, "grad_norm": 0.10688309371471405, "learning_rate": 0.002, "loss": 2.359, "step": 44310 }, { "epoch": 0.17132872539469005, "grad_norm": 0.1351660043001175, "learning_rate": 0.002, "loss": 2.3595, "step": 44320 }, { "epoch": 0.17136738259807333, "grad_norm": 0.12430575489997864, "learning_rate": 0.002, "loss": 2.3487, "step": 44330 }, { "epoch": 0.1714060398014566, "grad_norm": 0.12683185935020447, "learning_rate": 0.002, "loss": 2.3611, "step": 44340 }, { "epoch": 0.17144469700483989, "grad_norm": 0.1072586253285408, "learning_rate": 0.002, "loss": 2.3703, "step": 44350 }, { "epoch": 0.17148335420822317, "grad_norm": 0.10600695013999939, "learning_rate": 0.002, "loss": 2.361, "step": 44360 }, { "epoch": 0.17152201141160645, "grad_norm": 0.10198380798101425, "learning_rate": 0.002, "loss": 2.3608, "step": 44370 }, { "epoch": 0.17156066861498973, "grad_norm": 0.10946477949619293, "learning_rate": 0.002, "loss": 2.3535, "step": 44380 }, { "epoch": 0.171599325818373, "grad_norm": 0.11855118721723557, "learning_rate": 0.002, "loss": 2.3605, "step": 44390 }, { "epoch": 0.17163798302175629, "grad_norm": 0.10252556949853897, "learning_rate": 0.002, "loss": 2.3606, "step": 44400 }, { "epoch": 0.17167664022513957, "grad_norm": 0.10042252391576767, "learning_rate": 0.002, "loss": 2.3578, "step": 44410 }, { "epoch": 0.17171529742852282, "grad_norm": 0.10182294249534607, "learning_rate": 0.002, "loss": 2.3574, "step": 44420 }, { "epoch": 0.1717539546319061, "grad_norm": 0.12930281460285187, "learning_rate": 0.002, "loss": 2.3669, "step": 44430 }, { "epoch": 0.17179261183528938, "grad_norm": 0.10676681250333786, "learning_rate": 0.002, "loss": 2.3692, "step": 44440 }, { "epoch": 0.17183126903867266, "grad_norm": 0.1255345195531845, "learning_rate": 0.002, "loss": 2.3467, "step": 44450 }, { "epoch": 0.17186992624205594, "grad_norm": 0.11223578453063965, "learning_rate": 0.002, "loss": 2.3618, "step": 44460 }, { "epoch": 0.17190858344543922, "grad_norm": 0.09784567356109619, "learning_rate": 0.002, "loss": 2.3534, "step": 44470 }, { "epoch": 0.1719472406488225, "grad_norm": 0.09986116737127304, "learning_rate": 0.002, "loss": 2.3713, "step": 44480 }, { "epoch": 0.17198589785220578, "grad_norm": 0.15577766299247742, "learning_rate": 0.002, "loss": 2.3624, "step": 44490 }, { "epoch": 0.17202455505558906, "grad_norm": 0.10955075919628143, "learning_rate": 0.002, "loss": 2.3783, "step": 44500 }, { "epoch": 0.17206321225897234, "grad_norm": 0.10409995913505554, "learning_rate": 0.002, "loss": 2.3678, "step": 44510 }, { "epoch": 0.17210186946235562, "grad_norm": 0.1049322858452797, "learning_rate": 0.002, "loss": 2.3726, "step": 44520 }, { "epoch": 0.1721405266657389, "grad_norm": 0.09995309263467789, "learning_rate": 0.002, "loss": 2.3593, "step": 44530 }, { "epoch": 0.17217918386912218, "grad_norm": 0.10812091827392578, "learning_rate": 0.002, "loss": 2.358, "step": 44540 }, { "epoch": 0.17221784107250546, "grad_norm": 0.10199900716543198, "learning_rate": 0.002, "loss": 2.3566, "step": 44550 }, { "epoch": 0.17225649827588874, "grad_norm": 0.11742229759693146, "learning_rate": 0.002, "loss": 2.3619, "step": 44560 }, { "epoch": 0.17229515547927202, "grad_norm": 0.13238979876041412, "learning_rate": 0.002, "loss": 2.367, "step": 44570 }, { "epoch": 0.1723338126826553, "grad_norm": 0.11673494428396225, "learning_rate": 0.002, "loss": 2.3744, "step": 44580 }, { "epoch": 0.17237246988603858, "grad_norm": 0.10327861458063126, "learning_rate": 0.002, "loss": 2.375, "step": 44590 }, { "epoch": 0.17241112708942186, "grad_norm": 0.09699682891368866, "learning_rate": 0.002, "loss": 2.3744, "step": 44600 }, { "epoch": 0.1724497842928051, "grad_norm": 0.09948820620775223, "learning_rate": 0.002, "loss": 2.3572, "step": 44610 }, { "epoch": 0.1724884414961884, "grad_norm": 0.12126339972019196, "learning_rate": 0.002, "loss": 2.3508, "step": 44620 }, { "epoch": 0.17252709869957167, "grad_norm": 0.1148219108581543, "learning_rate": 0.002, "loss": 2.3456, "step": 44630 }, { "epoch": 0.17256575590295495, "grad_norm": 0.10499219596385956, "learning_rate": 0.002, "loss": 2.3755, "step": 44640 }, { "epoch": 0.17260441310633823, "grad_norm": 0.10470159351825714, "learning_rate": 0.002, "loss": 2.3718, "step": 44650 }, { "epoch": 0.1726430703097215, "grad_norm": 0.12049949914216995, "learning_rate": 0.002, "loss": 2.3653, "step": 44660 }, { "epoch": 0.1726817275131048, "grad_norm": 0.10587108880281448, "learning_rate": 0.002, "loss": 2.3528, "step": 44670 }, { "epoch": 0.17272038471648807, "grad_norm": 0.15022005140781403, "learning_rate": 0.002, "loss": 2.3622, "step": 44680 }, { "epoch": 0.17275904191987135, "grad_norm": 0.09850125759840012, "learning_rate": 0.002, "loss": 2.358, "step": 44690 }, { "epoch": 0.17279769912325463, "grad_norm": 0.1199873685836792, "learning_rate": 0.002, "loss": 2.3684, "step": 44700 }, { "epoch": 0.1728363563266379, "grad_norm": 0.10073809325695038, "learning_rate": 0.002, "loss": 2.3671, "step": 44710 }, { "epoch": 0.1728750135300212, "grad_norm": 0.10510051250457764, "learning_rate": 0.002, "loss": 2.3666, "step": 44720 }, { "epoch": 0.17291367073340447, "grad_norm": 0.1406104862689972, "learning_rate": 0.002, "loss": 2.3614, "step": 44730 }, { "epoch": 0.17295232793678775, "grad_norm": 0.0993463471531868, "learning_rate": 0.002, "loss": 2.3695, "step": 44740 }, { "epoch": 0.17299098514017103, "grad_norm": 0.11417844891548157, "learning_rate": 0.002, "loss": 2.3679, "step": 44750 }, { "epoch": 0.1730296423435543, "grad_norm": 0.11962078511714935, "learning_rate": 0.002, "loss": 2.3578, "step": 44760 }, { "epoch": 0.1730682995469376, "grad_norm": 0.12286528199911118, "learning_rate": 0.002, "loss": 2.3537, "step": 44770 }, { "epoch": 0.17310695675032087, "grad_norm": 0.09920985251665115, "learning_rate": 0.002, "loss": 2.3647, "step": 44780 }, { "epoch": 0.17314561395370412, "grad_norm": 0.10032475739717484, "learning_rate": 0.002, "loss": 2.3657, "step": 44790 }, { "epoch": 0.1731842711570874, "grad_norm": 0.0911695659160614, "learning_rate": 0.002, "loss": 2.3577, "step": 44800 }, { "epoch": 0.17322292836047068, "grad_norm": 0.11346561461687088, "learning_rate": 0.002, "loss": 2.3561, "step": 44810 }, { "epoch": 0.17326158556385396, "grad_norm": 0.11510348320007324, "learning_rate": 0.002, "loss": 2.3625, "step": 44820 }, { "epoch": 0.17330024276723724, "grad_norm": 0.11334090679883957, "learning_rate": 0.002, "loss": 2.3674, "step": 44830 }, { "epoch": 0.17333889997062052, "grad_norm": 0.1060083732008934, "learning_rate": 0.002, "loss": 2.3537, "step": 44840 }, { "epoch": 0.1733775571740038, "grad_norm": 0.11127477884292603, "learning_rate": 0.002, "loss": 2.3607, "step": 44850 }, { "epoch": 0.17341621437738708, "grad_norm": 0.10036677122116089, "learning_rate": 0.002, "loss": 2.3563, "step": 44860 }, { "epoch": 0.17345487158077036, "grad_norm": 0.12818297743797302, "learning_rate": 0.002, "loss": 2.3867, "step": 44870 }, { "epoch": 0.17349352878415364, "grad_norm": 0.10412877053022385, "learning_rate": 0.002, "loss": 2.3548, "step": 44880 }, { "epoch": 0.17353218598753692, "grad_norm": 0.16107313334941864, "learning_rate": 0.002, "loss": 2.3788, "step": 44890 }, { "epoch": 0.1735708431909202, "grad_norm": 0.10612837225198746, "learning_rate": 0.002, "loss": 2.3609, "step": 44900 }, { "epoch": 0.17360950039430348, "grad_norm": 0.11250829696655273, "learning_rate": 0.002, "loss": 2.3552, "step": 44910 }, { "epoch": 0.17364815759768676, "grad_norm": 0.11362912505865097, "learning_rate": 0.002, "loss": 2.3625, "step": 44920 }, { "epoch": 0.17368681480107004, "grad_norm": 0.10450930148363113, "learning_rate": 0.002, "loss": 2.3627, "step": 44930 }, { "epoch": 0.17372547200445332, "grad_norm": 0.10974158346652985, "learning_rate": 0.002, "loss": 2.3806, "step": 44940 }, { "epoch": 0.1737641292078366, "grad_norm": 0.1050289049744606, "learning_rate": 0.002, "loss": 2.3629, "step": 44950 }, { "epoch": 0.17380278641121988, "grad_norm": 0.10771483927965164, "learning_rate": 0.002, "loss": 2.3543, "step": 44960 }, { "epoch": 0.17384144361460316, "grad_norm": 0.09984450042247772, "learning_rate": 0.002, "loss": 2.3759, "step": 44970 }, { "epoch": 0.1738801008179864, "grad_norm": 0.1271042674779892, "learning_rate": 0.002, "loss": 2.3606, "step": 44980 }, { "epoch": 0.1739187580213697, "grad_norm": 0.10474269837141037, "learning_rate": 0.002, "loss": 2.3614, "step": 44990 }, { "epoch": 0.17395741522475297, "grad_norm": 0.10538092255592346, "learning_rate": 0.002, "loss": 2.3596, "step": 45000 }, { "epoch": 0.17399607242813625, "grad_norm": 0.09209699928760529, "learning_rate": 0.002, "loss": 2.3629, "step": 45010 }, { "epoch": 0.17403472963151953, "grad_norm": 0.11103249341249466, "learning_rate": 0.002, "loss": 2.3698, "step": 45020 }, { "epoch": 0.1740733868349028, "grad_norm": 0.10285907238721848, "learning_rate": 0.002, "loss": 2.3541, "step": 45030 }, { "epoch": 0.1741120440382861, "grad_norm": 0.11306691914796829, "learning_rate": 0.002, "loss": 2.3533, "step": 45040 }, { "epoch": 0.17415070124166937, "grad_norm": 0.12675841152668, "learning_rate": 0.002, "loss": 2.3657, "step": 45050 }, { "epoch": 0.17418935844505265, "grad_norm": 0.11700502038002014, "learning_rate": 0.002, "loss": 2.374, "step": 45060 }, { "epoch": 0.17422801564843593, "grad_norm": 0.1016639992594719, "learning_rate": 0.002, "loss": 2.3648, "step": 45070 }, { "epoch": 0.1742666728518192, "grad_norm": 0.10172295570373535, "learning_rate": 0.002, "loss": 2.3599, "step": 45080 }, { "epoch": 0.1743053300552025, "grad_norm": 0.11756344884634018, "learning_rate": 0.002, "loss": 2.3706, "step": 45090 }, { "epoch": 0.17434398725858577, "grad_norm": 0.11248449981212616, "learning_rate": 0.002, "loss": 2.3615, "step": 45100 }, { "epoch": 0.17438264446196905, "grad_norm": 0.09576928615570068, "learning_rate": 0.002, "loss": 2.3796, "step": 45110 }, { "epoch": 0.17442130166535233, "grad_norm": 0.10466929525136948, "learning_rate": 0.002, "loss": 2.3618, "step": 45120 }, { "epoch": 0.1744599588687356, "grad_norm": 0.10423614829778671, "learning_rate": 0.002, "loss": 2.3675, "step": 45130 }, { "epoch": 0.1744986160721189, "grad_norm": 0.11010349541902542, "learning_rate": 0.002, "loss": 2.3688, "step": 45140 }, { "epoch": 0.17453727327550217, "grad_norm": 0.11372268944978714, "learning_rate": 0.002, "loss": 2.3586, "step": 45150 }, { "epoch": 0.17457593047888545, "grad_norm": 0.12476648390293121, "learning_rate": 0.002, "loss": 2.3658, "step": 45160 }, { "epoch": 0.1746145876822687, "grad_norm": 0.09552644193172455, "learning_rate": 0.002, "loss": 2.3812, "step": 45170 }, { "epoch": 0.17465324488565198, "grad_norm": 0.13805052638053894, "learning_rate": 0.002, "loss": 2.3475, "step": 45180 }, { "epoch": 0.17469190208903526, "grad_norm": 0.11120432615280151, "learning_rate": 0.002, "loss": 2.377, "step": 45190 }, { "epoch": 0.17473055929241854, "grad_norm": 0.10684333741664886, "learning_rate": 0.002, "loss": 2.3829, "step": 45200 }, { "epoch": 0.17476921649580182, "grad_norm": 0.10616767406463623, "learning_rate": 0.002, "loss": 2.3506, "step": 45210 }, { "epoch": 0.1748078736991851, "grad_norm": 0.09469354897737503, "learning_rate": 0.002, "loss": 2.3527, "step": 45220 }, { "epoch": 0.17484653090256838, "grad_norm": 0.10828037559986115, "learning_rate": 0.002, "loss": 2.3713, "step": 45230 }, { "epoch": 0.17488518810595166, "grad_norm": 0.09035887569189072, "learning_rate": 0.002, "loss": 2.3617, "step": 45240 }, { "epoch": 0.17492384530933494, "grad_norm": 0.10850965231657028, "learning_rate": 0.002, "loss": 2.3685, "step": 45250 }, { "epoch": 0.17496250251271822, "grad_norm": 0.11956586688756943, "learning_rate": 0.002, "loss": 2.3619, "step": 45260 }, { "epoch": 0.1750011597161015, "grad_norm": 0.09887845814228058, "learning_rate": 0.002, "loss": 2.3537, "step": 45270 }, { "epoch": 0.17503981691948478, "grad_norm": 0.0914347693324089, "learning_rate": 0.002, "loss": 2.3464, "step": 45280 }, { "epoch": 0.17507847412286806, "grad_norm": 0.09789085388183594, "learning_rate": 0.002, "loss": 2.3609, "step": 45290 }, { "epoch": 0.17511713132625134, "grad_norm": 0.09701121598482132, "learning_rate": 0.002, "loss": 2.3687, "step": 45300 }, { "epoch": 0.17515578852963462, "grad_norm": 0.11873602122068405, "learning_rate": 0.002, "loss": 2.3607, "step": 45310 }, { "epoch": 0.1751944457330179, "grad_norm": 0.1296318918466568, "learning_rate": 0.002, "loss": 2.3563, "step": 45320 }, { "epoch": 0.17523310293640118, "grad_norm": 0.11447659134864807, "learning_rate": 0.002, "loss": 2.3526, "step": 45330 }, { "epoch": 0.17527176013978446, "grad_norm": 0.09558804333209991, "learning_rate": 0.002, "loss": 2.3561, "step": 45340 }, { "epoch": 0.1753104173431677, "grad_norm": 0.108881376683712, "learning_rate": 0.002, "loss": 2.3767, "step": 45350 }, { "epoch": 0.175349074546551, "grad_norm": 0.12144729495048523, "learning_rate": 0.002, "loss": 2.362, "step": 45360 }, { "epoch": 0.17538773174993427, "grad_norm": 0.10852917283773422, "learning_rate": 0.002, "loss": 2.3627, "step": 45370 }, { "epoch": 0.17542638895331755, "grad_norm": 0.091704361140728, "learning_rate": 0.002, "loss": 2.3731, "step": 45380 }, { "epoch": 0.17546504615670083, "grad_norm": 0.11262385547161102, "learning_rate": 0.002, "loss": 2.3503, "step": 45390 }, { "epoch": 0.1755037033600841, "grad_norm": 0.12413255125284195, "learning_rate": 0.002, "loss": 2.3766, "step": 45400 }, { "epoch": 0.1755423605634674, "grad_norm": 0.09488387405872345, "learning_rate": 0.002, "loss": 2.3695, "step": 45410 }, { "epoch": 0.17558101776685067, "grad_norm": 0.10605372488498688, "learning_rate": 0.002, "loss": 2.3578, "step": 45420 }, { "epoch": 0.17561967497023395, "grad_norm": 0.12880057096481323, "learning_rate": 0.002, "loss": 2.3505, "step": 45430 }, { "epoch": 0.17565833217361723, "grad_norm": 0.20423239469528198, "learning_rate": 0.002, "loss": 2.3456, "step": 45440 }, { "epoch": 0.1756969893770005, "grad_norm": 0.09972039610147476, "learning_rate": 0.002, "loss": 2.3701, "step": 45450 }, { "epoch": 0.1757356465803838, "grad_norm": 0.11355508863925934, "learning_rate": 0.002, "loss": 2.3634, "step": 45460 }, { "epoch": 0.17577430378376707, "grad_norm": 0.1071861982345581, "learning_rate": 0.002, "loss": 2.3595, "step": 45470 }, { "epoch": 0.17581296098715035, "grad_norm": 0.1498335301876068, "learning_rate": 0.002, "loss": 2.3578, "step": 45480 }, { "epoch": 0.17585161819053363, "grad_norm": 0.1033952608704567, "learning_rate": 0.002, "loss": 2.3662, "step": 45490 }, { "epoch": 0.1758902753939169, "grad_norm": 0.09816624969244003, "learning_rate": 0.002, "loss": 2.3596, "step": 45500 }, { "epoch": 0.1759289325973002, "grad_norm": 0.13971713185310364, "learning_rate": 0.002, "loss": 2.3549, "step": 45510 }, { "epoch": 0.17596758980068347, "grad_norm": 0.10514900088310242, "learning_rate": 0.002, "loss": 2.3667, "step": 45520 }, { "epoch": 0.17600624700406675, "grad_norm": 0.09521742910146713, "learning_rate": 0.002, "loss": 2.3571, "step": 45530 }, { "epoch": 0.17604490420745, "grad_norm": 0.10389627516269684, "learning_rate": 0.002, "loss": 2.3713, "step": 45540 }, { "epoch": 0.17608356141083328, "grad_norm": 0.11555583775043488, "learning_rate": 0.002, "loss": 2.367, "step": 45550 }, { "epoch": 0.17612221861421656, "grad_norm": 0.10777294635772705, "learning_rate": 0.002, "loss": 2.3771, "step": 45560 }, { "epoch": 0.17616087581759984, "grad_norm": 0.14498485624790192, "learning_rate": 0.002, "loss": 2.3791, "step": 45570 }, { "epoch": 0.17619953302098312, "grad_norm": 0.10857778787612915, "learning_rate": 0.002, "loss": 2.3617, "step": 45580 }, { "epoch": 0.1762381902243664, "grad_norm": 0.10575216263532639, "learning_rate": 0.002, "loss": 2.3677, "step": 45590 }, { "epoch": 0.17627684742774968, "grad_norm": 0.11028721928596497, "learning_rate": 0.002, "loss": 2.363, "step": 45600 }, { "epoch": 0.17631550463113296, "grad_norm": 0.13524943590164185, "learning_rate": 0.002, "loss": 2.3635, "step": 45610 }, { "epoch": 0.17635416183451624, "grad_norm": 0.10539298504590988, "learning_rate": 0.002, "loss": 2.3782, "step": 45620 }, { "epoch": 0.17639281903789952, "grad_norm": 0.10771636664867401, "learning_rate": 0.002, "loss": 2.3677, "step": 45630 }, { "epoch": 0.1764314762412828, "grad_norm": 0.09472755342721939, "learning_rate": 0.002, "loss": 2.3674, "step": 45640 }, { "epoch": 0.17647013344466608, "grad_norm": 0.10767373442649841, "learning_rate": 0.002, "loss": 2.3826, "step": 45650 }, { "epoch": 0.17650879064804936, "grad_norm": 0.11240257322788239, "learning_rate": 0.002, "loss": 2.3501, "step": 45660 }, { "epoch": 0.17654744785143264, "grad_norm": 0.1209542527794838, "learning_rate": 0.002, "loss": 2.3715, "step": 45670 }, { "epoch": 0.17658610505481592, "grad_norm": 0.12508618831634521, "learning_rate": 0.002, "loss": 2.3513, "step": 45680 }, { "epoch": 0.1766247622581992, "grad_norm": 0.10958380252122879, "learning_rate": 0.002, "loss": 2.3555, "step": 45690 }, { "epoch": 0.17666341946158248, "grad_norm": 0.1180795207619667, "learning_rate": 0.002, "loss": 2.3731, "step": 45700 }, { "epoch": 0.17670207666496576, "grad_norm": 0.10940097272396088, "learning_rate": 0.002, "loss": 2.3604, "step": 45710 }, { "epoch": 0.17674073386834901, "grad_norm": 0.11199961602687836, "learning_rate": 0.002, "loss": 2.3531, "step": 45720 }, { "epoch": 0.1767793910717323, "grad_norm": 0.10809573531150818, "learning_rate": 0.002, "loss": 2.3734, "step": 45730 }, { "epoch": 0.17681804827511557, "grad_norm": 0.11269965022802353, "learning_rate": 0.002, "loss": 2.3628, "step": 45740 }, { "epoch": 0.17685670547849885, "grad_norm": 0.10095714032649994, "learning_rate": 0.002, "loss": 2.3676, "step": 45750 }, { "epoch": 0.17689536268188213, "grad_norm": 0.117369644343853, "learning_rate": 0.002, "loss": 2.3759, "step": 45760 }, { "epoch": 0.17693401988526541, "grad_norm": 0.11148129403591156, "learning_rate": 0.002, "loss": 2.3674, "step": 45770 }, { "epoch": 0.1769726770886487, "grad_norm": 0.11547453701496124, "learning_rate": 0.002, "loss": 2.3668, "step": 45780 }, { "epoch": 0.17701133429203197, "grad_norm": 0.10645493119955063, "learning_rate": 0.002, "loss": 2.3822, "step": 45790 }, { "epoch": 0.17704999149541525, "grad_norm": 0.11404749751091003, "learning_rate": 0.002, "loss": 2.3546, "step": 45800 }, { "epoch": 0.17708864869879853, "grad_norm": 0.11249172687530518, "learning_rate": 0.002, "loss": 2.3565, "step": 45810 }, { "epoch": 0.17712730590218181, "grad_norm": 0.10758005082607269, "learning_rate": 0.002, "loss": 2.3531, "step": 45820 }, { "epoch": 0.1771659631055651, "grad_norm": 0.09517668187618256, "learning_rate": 0.002, "loss": 2.3625, "step": 45830 }, { "epoch": 0.17720462030894837, "grad_norm": 0.09979206323623657, "learning_rate": 0.002, "loss": 2.3515, "step": 45840 }, { "epoch": 0.17724327751233165, "grad_norm": 0.12252053618431091, "learning_rate": 0.002, "loss": 2.3398, "step": 45850 }, { "epoch": 0.17728193471571493, "grad_norm": 0.09131057560443878, "learning_rate": 0.002, "loss": 2.3711, "step": 45860 }, { "epoch": 0.1773205919190982, "grad_norm": 0.1101309061050415, "learning_rate": 0.002, "loss": 2.3594, "step": 45870 }, { "epoch": 0.1773592491224815, "grad_norm": 0.10598935186862946, "learning_rate": 0.002, "loss": 2.3646, "step": 45880 }, { "epoch": 0.17739790632586477, "grad_norm": 0.09703782200813293, "learning_rate": 0.002, "loss": 2.368, "step": 45890 }, { "epoch": 0.17743656352924805, "grad_norm": 0.09163852781057358, "learning_rate": 0.002, "loss": 2.3615, "step": 45900 }, { "epoch": 0.1774752207326313, "grad_norm": 0.10155810415744781, "learning_rate": 0.002, "loss": 2.3682, "step": 45910 }, { "epoch": 0.17751387793601459, "grad_norm": 0.0889534205198288, "learning_rate": 0.002, "loss": 2.3802, "step": 45920 }, { "epoch": 0.17755253513939787, "grad_norm": 0.11034919321537018, "learning_rate": 0.002, "loss": 2.3613, "step": 45930 }, { "epoch": 0.17759119234278115, "grad_norm": 0.09865278750658035, "learning_rate": 0.002, "loss": 2.3634, "step": 45940 }, { "epoch": 0.17762984954616443, "grad_norm": 0.14617250859737396, "learning_rate": 0.002, "loss": 2.368, "step": 45950 }, { "epoch": 0.1776685067495477, "grad_norm": 0.10735570639371872, "learning_rate": 0.002, "loss": 2.3444, "step": 45960 }, { "epoch": 0.17770716395293099, "grad_norm": 0.11080707609653473, "learning_rate": 0.002, "loss": 2.3681, "step": 45970 }, { "epoch": 0.17774582115631427, "grad_norm": 0.10667536407709122, "learning_rate": 0.002, "loss": 2.3875, "step": 45980 }, { "epoch": 0.17778447835969755, "grad_norm": 0.11528275907039642, "learning_rate": 0.002, "loss": 2.374, "step": 45990 }, { "epoch": 0.17782313556308083, "grad_norm": 0.10348264873027802, "learning_rate": 0.002, "loss": 2.3593, "step": 46000 }, { "epoch": 0.1778617927664641, "grad_norm": 0.12170681357383728, "learning_rate": 0.002, "loss": 2.3533, "step": 46010 }, { "epoch": 0.17790044996984739, "grad_norm": 0.10371945053339005, "learning_rate": 0.002, "loss": 2.3486, "step": 46020 }, { "epoch": 0.17793910717323067, "grad_norm": 0.10639218986034393, "learning_rate": 0.002, "loss": 2.3729, "step": 46030 }, { "epoch": 0.17797776437661394, "grad_norm": 0.0943278968334198, "learning_rate": 0.002, "loss": 2.3597, "step": 46040 }, { "epoch": 0.17801642157999722, "grad_norm": 0.08742164820432663, "learning_rate": 0.002, "loss": 2.356, "step": 46050 }, { "epoch": 0.1780550787833805, "grad_norm": 0.09669308364391327, "learning_rate": 0.002, "loss": 2.3662, "step": 46060 }, { "epoch": 0.17809373598676378, "grad_norm": 0.41715648770332336, "learning_rate": 0.002, "loss": 2.3591, "step": 46070 }, { "epoch": 0.17813239319014706, "grad_norm": 0.11329425871372223, "learning_rate": 0.002, "loss": 2.3722, "step": 46080 }, { "epoch": 0.17817105039353032, "grad_norm": 0.10821500420570374, "learning_rate": 0.002, "loss": 2.3645, "step": 46090 }, { "epoch": 0.1782097075969136, "grad_norm": 0.12273920327425003, "learning_rate": 0.002, "loss": 2.3652, "step": 46100 }, { "epoch": 0.17824836480029688, "grad_norm": 0.11014291644096375, "learning_rate": 0.002, "loss": 2.3661, "step": 46110 }, { "epoch": 0.17828702200368016, "grad_norm": 0.10393861681222916, "learning_rate": 0.002, "loss": 2.3632, "step": 46120 }, { "epoch": 0.17832567920706344, "grad_norm": 0.10663165152072906, "learning_rate": 0.002, "loss": 2.3576, "step": 46130 }, { "epoch": 0.17836433641044672, "grad_norm": 0.11056865751743317, "learning_rate": 0.002, "loss": 2.3645, "step": 46140 }, { "epoch": 0.17840299361383, "grad_norm": 0.10019081830978394, "learning_rate": 0.002, "loss": 2.3845, "step": 46150 }, { "epoch": 0.17844165081721328, "grad_norm": 0.11017369478940964, "learning_rate": 0.002, "loss": 2.345, "step": 46160 }, { "epoch": 0.17848030802059656, "grad_norm": 0.12934242188930511, "learning_rate": 0.002, "loss": 2.3611, "step": 46170 }, { "epoch": 0.17851896522397984, "grad_norm": 0.09688087552785873, "learning_rate": 0.002, "loss": 2.3609, "step": 46180 }, { "epoch": 0.17855762242736312, "grad_norm": 0.11228056252002716, "learning_rate": 0.002, "loss": 2.3484, "step": 46190 }, { "epoch": 0.1785962796307464, "grad_norm": 0.12104123085737228, "learning_rate": 0.002, "loss": 2.3624, "step": 46200 }, { "epoch": 0.17863493683412968, "grad_norm": 0.11429349333047867, "learning_rate": 0.002, "loss": 2.3522, "step": 46210 }, { "epoch": 0.17867359403751296, "grad_norm": 0.10362027585506439, "learning_rate": 0.002, "loss": 2.3718, "step": 46220 }, { "epoch": 0.17871225124089624, "grad_norm": 0.09616777300834656, "learning_rate": 0.002, "loss": 2.371, "step": 46230 }, { "epoch": 0.17875090844427952, "grad_norm": 0.11989966779947281, "learning_rate": 0.002, "loss": 2.3567, "step": 46240 }, { "epoch": 0.1787895656476628, "grad_norm": 0.11376291513442993, "learning_rate": 0.002, "loss": 2.3521, "step": 46250 }, { "epoch": 0.17882822285104608, "grad_norm": 0.11649604141712189, "learning_rate": 0.002, "loss": 2.3686, "step": 46260 }, { "epoch": 0.17886688005442936, "grad_norm": 0.10671722888946533, "learning_rate": 0.002, "loss": 2.3581, "step": 46270 }, { "epoch": 0.1789055372578126, "grad_norm": 0.10673186182975769, "learning_rate": 0.002, "loss": 2.3434, "step": 46280 }, { "epoch": 0.1789441944611959, "grad_norm": 0.11358071118593216, "learning_rate": 0.002, "loss": 2.3648, "step": 46290 }, { "epoch": 0.17898285166457917, "grad_norm": 0.11956032365560532, "learning_rate": 0.002, "loss": 2.3764, "step": 46300 }, { "epoch": 0.17902150886796245, "grad_norm": 0.10784201323986053, "learning_rate": 0.002, "loss": 2.3652, "step": 46310 }, { "epoch": 0.17906016607134573, "grad_norm": 0.09544270485639572, "learning_rate": 0.002, "loss": 2.3656, "step": 46320 }, { "epoch": 0.179098823274729, "grad_norm": 0.10480832308530807, "learning_rate": 0.002, "loss": 2.3625, "step": 46330 }, { "epoch": 0.1791374804781123, "grad_norm": 0.11912026256322861, "learning_rate": 0.002, "loss": 2.3508, "step": 46340 }, { "epoch": 0.17917613768149557, "grad_norm": 0.11689063161611557, "learning_rate": 0.002, "loss": 2.3817, "step": 46350 }, { "epoch": 0.17921479488487885, "grad_norm": 0.10498364269733429, "learning_rate": 0.002, "loss": 2.3456, "step": 46360 }, { "epoch": 0.17925345208826213, "grad_norm": 0.1057942733168602, "learning_rate": 0.002, "loss": 2.3533, "step": 46370 }, { "epoch": 0.1792921092916454, "grad_norm": 0.12397985905408859, "learning_rate": 0.002, "loss": 2.3725, "step": 46380 }, { "epoch": 0.1793307664950287, "grad_norm": 0.1134156733751297, "learning_rate": 0.002, "loss": 2.3606, "step": 46390 }, { "epoch": 0.17936942369841197, "grad_norm": 0.11008192598819733, "learning_rate": 0.002, "loss": 2.3554, "step": 46400 }, { "epoch": 0.17940808090179525, "grad_norm": 0.10683248937129974, "learning_rate": 0.002, "loss": 2.3706, "step": 46410 }, { "epoch": 0.17944673810517853, "grad_norm": 0.10753653198480606, "learning_rate": 0.002, "loss": 2.3663, "step": 46420 }, { "epoch": 0.1794853953085618, "grad_norm": 0.09765728563070297, "learning_rate": 0.002, "loss": 2.3629, "step": 46430 }, { "epoch": 0.1795240525119451, "grad_norm": 0.11700306832790375, "learning_rate": 0.002, "loss": 2.3502, "step": 46440 }, { "epoch": 0.17956270971532837, "grad_norm": 0.09504656493663788, "learning_rate": 0.002, "loss": 2.3601, "step": 46450 }, { "epoch": 0.17960136691871162, "grad_norm": 0.10488741844892502, "learning_rate": 0.002, "loss": 2.3627, "step": 46460 }, { "epoch": 0.1796400241220949, "grad_norm": 0.09921709448099136, "learning_rate": 0.002, "loss": 2.3673, "step": 46470 }, { "epoch": 0.17967868132547818, "grad_norm": 0.12173950672149658, "learning_rate": 0.002, "loss": 2.3527, "step": 46480 }, { "epoch": 0.17971733852886146, "grad_norm": 0.11671063303947449, "learning_rate": 0.002, "loss": 2.3576, "step": 46490 }, { "epoch": 0.17975599573224474, "grad_norm": 0.12096337229013443, "learning_rate": 0.002, "loss": 2.3528, "step": 46500 }, { "epoch": 0.17979465293562802, "grad_norm": 0.11629269272089005, "learning_rate": 0.002, "loss": 2.3546, "step": 46510 }, { "epoch": 0.1798333101390113, "grad_norm": 0.12332172691822052, "learning_rate": 0.002, "loss": 2.374, "step": 46520 }, { "epoch": 0.17987196734239458, "grad_norm": 0.11764345318078995, "learning_rate": 0.002, "loss": 2.359, "step": 46530 }, { "epoch": 0.17991062454577786, "grad_norm": 0.13840240240097046, "learning_rate": 0.002, "loss": 2.3598, "step": 46540 }, { "epoch": 0.17994928174916114, "grad_norm": 0.1019141748547554, "learning_rate": 0.002, "loss": 2.3504, "step": 46550 }, { "epoch": 0.17998793895254442, "grad_norm": 0.11623389273881912, "learning_rate": 0.002, "loss": 2.3576, "step": 46560 }, { "epoch": 0.1800265961559277, "grad_norm": 0.12610310316085815, "learning_rate": 0.002, "loss": 2.3651, "step": 46570 }, { "epoch": 0.18006525335931098, "grad_norm": 0.10052474588155746, "learning_rate": 0.002, "loss": 2.3619, "step": 46580 }, { "epoch": 0.18010391056269426, "grad_norm": 0.11499066650867462, "learning_rate": 0.002, "loss": 2.3766, "step": 46590 }, { "epoch": 0.18014256776607754, "grad_norm": 0.09875231236219406, "learning_rate": 0.002, "loss": 2.3777, "step": 46600 }, { "epoch": 0.18018122496946082, "grad_norm": 0.10460720211267471, "learning_rate": 0.002, "loss": 2.3632, "step": 46610 }, { "epoch": 0.1802198821728441, "grad_norm": 0.11153114587068558, "learning_rate": 0.002, "loss": 2.3574, "step": 46620 }, { "epoch": 0.18025853937622738, "grad_norm": 0.10913459956645966, "learning_rate": 0.002, "loss": 2.3536, "step": 46630 }, { "epoch": 0.18029719657961066, "grad_norm": 0.10822083801031113, "learning_rate": 0.002, "loss": 2.364, "step": 46640 }, { "epoch": 0.1803358537829939, "grad_norm": 0.09244625270366669, "learning_rate": 0.002, "loss": 2.3686, "step": 46650 }, { "epoch": 0.1803745109863772, "grad_norm": 0.13497793674468994, "learning_rate": 0.002, "loss": 2.3454, "step": 46660 }, { "epoch": 0.18041316818976047, "grad_norm": 0.11675471812486649, "learning_rate": 0.002, "loss": 2.3737, "step": 46670 }, { "epoch": 0.18045182539314375, "grad_norm": 0.12889958918094635, "learning_rate": 0.002, "loss": 2.3477, "step": 46680 }, { "epoch": 0.18049048259652703, "grad_norm": 0.09921512752771378, "learning_rate": 0.002, "loss": 2.3574, "step": 46690 }, { "epoch": 0.1805291397999103, "grad_norm": 0.10539357364177704, "learning_rate": 0.002, "loss": 2.3728, "step": 46700 }, { "epoch": 0.1805677970032936, "grad_norm": 0.13350282609462738, "learning_rate": 0.002, "loss": 2.3583, "step": 46710 }, { "epoch": 0.18060645420667687, "grad_norm": 0.11116880923509598, "learning_rate": 0.002, "loss": 2.3674, "step": 46720 }, { "epoch": 0.18064511141006015, "grad_norm": 0.12459864467382431, "learning_rate": 0.002, "loss": 2.3717, "step": 46730 }, { "epoch": 0.18068376861344343, "grad_norm": 0.1052606999874115, "learning_rate": 0.002, "loss": 2.3522, "step": 46740 }, { "epoch": 0.1807224258168267, "grad_norm": 0.10243360698223114, "learning_rate": 0.002, "loss": 2.3617, "step": 46750 }, { "epoch": 0.18076108302021, "grad_norm": 0.12499461323022842, "learning_rate": 0.002, "loss": 2.3612, "step": 46760 }, { "epoch": 0.18079974022359327, "grad_norm": 0.1343996822834015, "learning_rate": 0.002, "loss": 2.3533, "step": 46770 }, { "epoch": 0.18083839742697655, "grad_norm": 0.11027532070875168, "learning_rate": 0.002, "loss": 2.3476, "step": 46780 }, { "epoch": 0.18087705463035983, "grad_norm": 0.10879873484373093, "learning_rate": 0.002, "loss": 2.3535, "step": 46790 }, { "epoch": 0.1809157118337431, "grad_norm": 0.2611781358718872, "learning_rate": 0.002, "loss": 2.364, "step": 46800 }, { "epoch": 0.1809543690371264, "grad_norm": 0.09826645255088806, "learning_rate": 0.002, "loss": 2.3782, "step": 46810 }, { "epoch": 0.18099302624050967, "grad_norm": 0.09456578642129898, "learning_rate": 0.002, "loss": 2.3633, "step": 46820 }, { "epoch": 0.18103168344389292, "grad_norm": 0.24171464145183563, "learning_rate": 0.002, "loss": 2.3707, "step": 46830 }, { "epoch": 0.1810703406472762, "grad_norm": 0.10748471319675446, "learning_rate": 0.002, "loss": 2.3617, "step": 46840 }, { "epoch": 0.18110899785065948, "grad_norm": 0.09292565286159515, "learning_rate": 0.002, "loss": 2.36, "step": 46850 }, { "epoch": 0.18114765505404276, "grad_norm": 0.09652915596961975, "learning_rate": 0.002, "loss": 2.3553, "step": 46860 }, { "epoch": 0.18118631225742604, "grad_norm": 0.1162387803196907, "learning_rate": 0.002, "loss": 2.3582, "step": 46870 }, { "epoch": 0.18122496946080932, "grad_norm": 0.1014401912689209, "learning_rate": 0.002, "loss": 2.3666, "step": 46880 }, { "epoch": 0.1812636266641926, "grad_norm": 0.11465539783239365, "learning_rate": 0.002, "loss": 2.3615, "step": 46890 }, { "epoch": 0.18130228386757588, "grad_norm": 0.11276334524154663, "learning_rate": 0.002, "loss": 2.3509, "step": 46900 }, { "epoch": 0.18134094107095916, "grad_norm": 0.12095353752374649, "learning_rate": 0.002, "loss": 2.3616, "step": 46910 }, { "epoch": 0.18137959827434244, "grad_norm": 0.10484836995601654, "learning_rate": 0.002, "loss": 2.3588, "step": 46920 }, { "epoch": 0.18141825547772572, "grad_norm": 0.13355353474617004, "learning_rate": 0.002, "loss": 2.3507, "step": 46930 }, { "epoch": 0.181456912681109, "grad_norm": 0.11671741306781769, "learning_rate": 0.002, "loss": 2.3599, "step": 46940 }, { "epoch": 0.18149556988449228, "grad_norm": 0.11582811921834946, "learning_rate": 0.002, "loss": 2.3594, "step": 46950 }, { "epoch": 0.18153422708787556, "grad_norm": 0.10815490782260895, "learning_rate": 0.002, "loss": 2.3437, "step": 46960 }, { "epoch": 0.18157288429125884, "grad_norm": 0.11243810504674911, "learning_rate": 0.002, "loss": 2.3581, "step": 46970 }, { "epoch": 0.18161154149464212, "grad_norm": 0.1018824502825737, "learning_rate": 0.002, "loss": 2.3739, "step": 46980 }, { "epoch": 0.1816501986980254, "grad_norm": 0.13891954720020294, "learning_rate": 0.002, "loss": 2.3733, "step": 46990 }, { "epoch": 0.18168885590140868, "grad_norm": 0.11140652000904083, "learning_rate": 0.002, "loss": 2.36, "step": 47000 }, { "epoch": 0.18172751310479196, "grad_norm": 0.1114838719367981, "learning_rate": 0.002, "loss": 2.3651, "step": 47010 }, { "epoch": 0.1817661703081752, "grad_norm": 0.10132309049367905, "learning_rate": 0.002, "loss": 2.366, "step": 47020 }, { "epoch": 0.1818048275115585, "grad_norm": 0.10207788646221161, "learning_rate": 0.002, "loss": 2.3504, "step": 47030 }, { "epoch": 0.18184348471494177, "grad_norm": 0.10041003674268723, "learning_rate": 0.002, "loss": 2.3678, "step": 47040 }, { "epoch": 0.18188214191832505, "grad_norm": 0.10246816277503967, "learning_rate": 0.002, "loss": 2.3745, "step": 47050 }, { "epoch": 0.18192079912170833, "grad_norm": 0.11263757944107056, "learning_rate": 0.002, "loss": 2.3683, "step": 47060 }, { "epoch": 0.1819594563250916, "grad_norm": 0.10960984230041504, "learning_rate": 0.002, "loss": 2.37, "step": 47070 }, { "epoch": 0.1819981135284749, "grad_norm": 0.12285451591014862, "learning_rate": 0.002, "loss": 2.361, "step": 47080 }, { "epoch": 0.18203677073185817, "grad_norm": 0.09926436841487885, "learning_rate": 0.002, "loss": 2.3518, "step": 47090 }, { "epoch": 0.18207542793524145, "grad_norm": 0.10922511667013168, "learning_rate": 0.002, "loss": 2.3626, "step": 47100 }, { "epoch": 0.18211408513862473, "grad_norm": 0.10222557187080383, "learning_rate": 0.002, "loss": 2.3562, "step": 47110 }, { "epoch": 0.182152742342008, "grad_norm": 0.11343535780906677, "learning_rate": 0.002, "loss": 2.3641, "step": 47120 }, { "epoch": 0.1821913995453913, "grad_norm": 0.10885771363973618, "learning_rate": 0.002, "loss": 2.3562, "step": 47130 }, { "epoch": 0.18223005674877457, "grad_norm": 0.1149262934923172, "learning_rate": 0.002, "loss": 2.3733, "step": 47140 }, { "epoch": 0.18226871395215785, "grad_norm": 0.12736138701438904, "learning_rate": 0.002, "loss": 2.3503, "step": 47150 }, { "epoch": 0.18230737115554113, "grad_norm": 0.12280838936567307, "learning_rate": 0.002, "loss": 2.3439, "step": 47160 }, { "epoch": 0.1823460283589244, "grad_norm": 0.10826502740383148, "learning_rate": 0.002, "loss": 2.3597, "step": 47170 }, { "epoch": 0.1823846855623077, "grad_norm": 0.10554526746273041, "learning_rate": 0.002, "loss": 2.363, "step": 47180 }, { "epoch": 0.18242334276569097, "grad_norm": 0.11571838706731796, "learning_rate": 0.002, "loss": 2.3433, "step": 47190 }, { "epoch": 0.18246199996907425, "grad_norm": 0.12278785556554794, "learning_rate": 0.002, "loss": 2.3524, "step": 47200 }, { "epoch": 0.1825006571724575, "grad_norm": 0.11990324407815933, "learning_rate": 0.002, "loss": 2.3657, "step": 47210 }, { "epoch": 0.18253931437584078, "grad_norm": 0.12232258170843124, "learning_rate": 0.002, "loss": 2.3548, "step": 47220 }, { "epoch": 0.18257797157922406, "grad_norm": 0.11240170150995255, "learning_rate": 0.002, "loss": 2.3586, "step": 47230 }, { "epoch": 0.18261662878260734, "grad_norm": 0.11030350625514984, "learning_rate": 0.002, "loss": 2.3689, "step": 47240 }, { "epoch": 0.18265528598599062, "grad_norm": 0.11100868135690689, "learning_rate": 0.002, "loss": 2.3597, "step": 47250 }, { "epoch": 0.1826939431893739, "grad_norm": 0.11257486790418625, "learning_rate": 0.002, "loss": 2.3606, "step": 47260 }, { "epoch": 0.18273260039275718, "grad_norm": 0.11170840263366699, "learning_rate": 0.002, "loss": 2.3673, "step": 47270 }, { "epoch": 0.18277125759614046, "grad_norm": 0.11553363502025604, "learning_rate": 0.002, "loss": 2.3631, "step": 47280 }, { "epoch": 0.18280991479952374, "grad_norm": 0.09762480109930038, "learning_rate": 0.002, "loss": 2.3577, "step": 47290 }, { "epoch": 0.18284857200290702, "grad_norm": 0.10298377275466919, "learning_rate": 0.002, "loss": 2.364, "step": 47300 }, { "epoch": 0.1828872292062903, "grad_norm": 0.10539838671684265, "learning_rate": 0.002, "loss": 2.3619, "step": 47310 }, { "epoch": 0.18292588640967358, "grad_norm": 0.10760633647441864, "learning_rate": 0.002, "loss": 2.3677, "step": 47320 }, { "epoch": 0.18296454361305686, "grad_norm": 0.09907570481300354, "learning_rate": 0.002, "loss": 2.3668, "step": 47330 }, { "epoch": 0.18300320081644014, "grad_norm": 0.10394413769245148, "learning_rate": 0.002, "loss": 2.359, "step": 47340 }, { "epoch": 0.18304185801982342, "grad_norm": 0.08843515068292618, "learning_rate": 0.002, "loss": 2.36, "step": 47350 }, { "epoch": 0.1830805152232067, "grad_norm": 0.11288506537675858, "learning_rate": 0.002, "loss": 2.3582, "step": 47360 }, { "epoch": 0.18311917242658998, "grad_norm": 0.12361892312765121, "learning_rate": 0.002, "loss": 2.3672, "step": 47370 }, { "epoch": 0.18315782962997326, "grad_norm": 0.11280398070812225, "learning_rate": 0.002, "loss": 2.3631, "step": 47380 }, { "epoch": 0.18319648683335651, "grad_norm": 0.09853893518447876, "learning_rate": 0.002, "loss": 2.3574, "step": 47390 }, { "epoch": 0.1832351440367398, "grad_norm": 0.11054504662752151, "learning_rate": 0.002, "loss": 2.3539, "step": 47400 }, { "epoch": 0.18327380124012307, "grad_norm": 0.10531944036483765, "learning_rate": 0.002, "loss": 2.3494, "step": 47410 }, { "epoch": 0.18331245844350635, "grad_norm": 0.10795912146568298, "learning_rate": 0.002, "loss": 2.3528, "step": 47420 }, { "epoch": 0.18335111564688963, "grad_norm": 0.09988661110401154, "learning_rate": 0.002, "loss": 2.3544, "step": 47430 }, { "epoch": 0.18338977285027291, "grad_norm": 0.10467620193958282, "learning_rate": 0.002, "loss": 2.355, "step": 47440 }, { "epoch": 0.1834284300536562, "grad_norm": 0.10298123955726624, "learning_rate": 0.002, "loss": 2.3515, "step": 47450 }, { "epoch": 0.18346708725703947, "grad_norm": 0.0956730917096138, "learning_rate": 0.002, "loss": 2.3619, "step": 47460 }, { "epoch": 0.18350574446042275, "grad_norm": 0.10356737673282623, "learning_rate": 0.002, "loss": 2.3707, "step": 47470 }, { "epoch": 0.18354440166380603, "grad_norm": 0.11420086771249771, "learning_rate": 0.002, "loss": 2.359, "step": 47480 }, { "epoch": 0.1835830588671893, "grad_norm": 0.1024063304066658, "learning_rate": 0.002, "loss": 2.3587, "step": 47490 }, { "epoch": 0.1836217160705726, "grad_norm": 0.11216024309396744, "learning_rate": 0.002, "loss": 2.3535, "step": 47500 }, { "epoch": 0.18366037327395587, "grad_norm": 0.10744771361351013, "learning_rate": 0.002, "loss": 2.3495, "step": 47510 }, { "epoch": 0.18369903047733915, "grad_norm": 0.09520062059164047, "learning_rate": 0.002, "loss": 2.3502, "step": 47520 }, { "epoch": 0.18373768768072243, "grad_norm": 0.10963470488786697, "learning_rate": 0.002, "loss": 2.3631, "step": 47530 }, { "epoch": 0.1837763448841057, "grad_norm": 0.11262558400630951, "learning_rate": 0.002, "loss": 2.352, "step": 47540 }, { "epoch": 0.183815002087489, "grad_norm": 0.10754802823066711, "learning_rate": 0.002, "loss": 2.3478, "step": 47550 }, { "epoch": 0.18385365929087227, "grad_norm": 0.11214997619390488, "learning_rate": 0.002, "loss": 2.3599, "step": 47560 }, { "epoch": 0.18389231649425555, "grad_norm": 0.12302026897668839, "learning_rate": 0.002, "loss": 2.3734, "step": 47570 }, { "epoch": 0.1839309736976388, "grad_norm": 0.10110511630773544, "learning_rate": 0.002, "loss": 2.3713, "step": 47580 }, { "epoch": 0.18396963090102209, "grad_norm": 0.09465829282999039, "learning_rate": 0.002, "loss": 2.3845, "step": 47590 }, { "epoch": 0.18400828810440537, "grad_norm": 0.09735490381717682, "learning_rate": 0.002, "loss": 2.3619, "step": 47600 }, { "epoch": 0.18404694530778865, "grad_norm": 0.1238107830286026, "learning_rate": 0.002, "loss": 2.3487, "step": 47610 }, { "epoch": 0.18408560251117193, "grad_norm": 0.10707355290651321, "learning_rate": 0.002, "loss": 2.3602, "step": 47620 }, { "epoch": 0.1841242597145552, "grad_norm": 0.10860617458820343, "learning_rate": 0.002, "loss": 2.3613, "step": 47630 }, { "epoch": 0.18416291691793849, "grad_norm": 0.10216601192951202, "learning_rate": 0.002, "loss": 2.3789, "step": 47640 }, { "epoch": 0.18420157412132176, "grad_norm": 0.1095500960946083, "learning_rate": 0.002, "loss": 2.3631, "step": 47650 }, { "epoch": 0.18424023132470504, "grad_norm": 0.09551633149385452, "learning_rate": 0.002, "loss": 2.3538, "step": 47660 }, { "epoch": 0.18427888852808832, "grad_norm": 0.11457042396068573, "learning_rate": 0.002, "loss": 2.3623, "step": 47670 }, { "epoch": 0.1843175457314716, "grad_norm": 0.10601530224084854, "learning_rate": 0.002, "loss": 2.3588, "step": 47680 }, { "epoch": 0.18435620293485488, "grad_norm": 0.10594504326581955, "learning_rate": 0.002, "loss": 2.3778, "step": 47690 }, { "epoch": 0.18439486013823816, "grad_norm": 0.11967021971940994, "learning_rate": 0.002, "loss": 2.3905, "step": 47700 }, { "epoch": 0.18443351734162144, "grad_norm": 0.11337132006883621, "learning_rate": 0.002, "loss": 2.3569, "step": 47710 }, { "epoch": 0.18447217454500472, "grad_norm": 0.11595602333545685, "learning_rate": 0.002, "loss": 2.3707, "step": 47720 }, { "epoch": 0.184510831748388, "grad_norm": 0.10115175694227219, "learning_rate": 0.002, "loss": 2.3535, "step": 47730 }, { "epoch": 0.18454948895177128, "grad_norm": 0.11340272426605225, "learning_rate": 0.002, "loss": 2.3655, "step": 47740 }, { "epoch": 0.18458814615515456, "grad_norm": 0.1012321338057518, "learning_rate": 0.002, "loss": 2.3511, "step": 47750 }, { "epoch": 0.18462680335853782, "grad_norm": 0.1182548850774765, "learning_rate": 0.002, "loss": 2.3602, "step": 47760 }, { "epoch": 0.1846654605619211, "grad_norm": 0.1108071506023407, "learning_rate": 0.002, "loss": 2.3705, "step": 47770 }, { "epoch": 0.18470411776530438, "grad_norm": 0.11040622740983963, "learning_rate": 0.002, "loss": 2.3554, "step": 47780 }, { "epoch": 0.18474277496868766, "grad_norm": 0.1175733357667923, "learning_rate": 0.002, "loss": 2.3642, "step": 47790 }, { "epoch": 0.18478143217207094, "grad_norm": 0.1209789365530014, "learning_rate": 0.002, "loss": 2.3547, "step": 47800 }, { "epoch": 0.18482008937545422, "grad_norm": 0.1346805989742279, "learning_rate": 0.002, "loss": 2.3785, "step": 47810 }, { "epoch": 0.1848587465788375, "grad_norm": 0.10459670424461365, "learning_rate": 0.002, "loss": 2.368, "step": 47820 }, { "epoch": 0.18489740378222078, "grad_norm": 0.14861343801021576, "learning_rate": 0.002, "loss": 2.3406, "step": 47830 }, { "epoch": 0.18493606098560406, "grad_norm": 0.10830673575401306, "learning_rate": 0.002, "loss": 2.3554, "step": 47840 }, { "epoch": 0.18497471818898734, "grad_norm": 0.12613213062286377, "learning_rate": 0.002, "loss": 2.3576, "step": 47850 }, { "epoch": 0.18501337539237062, "grad_norm": 0.12823881208896637, "learning_rate": 0.002, "loss": 2.3748, "step": 47860 }, { "epoch": 0.1850520325957539, "grad_norm": 0.10592518746852875, "learning_rate": 0.002, "loss": 2.3715, "step": 47870 }, { "epoch": 0.18509068979913718, "grad_norm": 0.10592817515134811, "learning_rate": 0.002, "loss": 2.3527, "step": 47880 }, { "epoch": 0.18512934700252046, "grad_norm": 0.1027887687087059, "learning_rate": 0.002, "loss": 2.3719, "step": 47890 }, { "epoch": 0.18516800420590374, "grad_norm": 0.10653171688318253, "learning_rate": 0.002, "loss": 2.3544, "step": 47900 }, { "epoch": 0.18520666140928702, "grad_norm": 0.11139184981584549, "learning_rate": 0.002, "loss": 2.368, "step": 47910 }, { "epoch": 0.1852453186126703, "grad_norm": 0.11169011890888214, "learning_rate": 0.002, "loss": 2.3525, "step": 47920 }, { "epoch": 0.18528397581605358, "grad_norm": 0.08974819630384445, "learning_rate": 0.002, "loss": 2.353, "step": 47930 }, { "epoch": 0.18532263301943686, "grad_norm": 0.10861147195100784, "learning_rate": 0.002, "loss": 2.3598, "step": 47940 }, { "epoch": 0.1853612902228201, "grad_norm": 0.12688162922859192, "learning_rate": 0.002, "loss": 2.3659, "step": 47950 }, { "epoch": 0.1853999474262034, "grad_norm": 0.09429048746824265, "learning_rate": 0.002, "loss": 2.3556, "step": 47960 }, { "epoch": 0.18543860462958667, "grad_norm": 0.10829395055770874, "learning_rate": 0.002, "loss": 2.3599, "step": 47970 }, { "epoch": 0.18547726183296995, "grad_norm": 0.11658895760774612, "learning_rate": 0.002, "loss": 2.3629, "step": 47980 }, { "epoch": 0.18551591903635323, "grad_norm": 0.11807240545749664, "learning_rate": 0.002, "loss": 2.3675, "step": 47990 }, { "epoch": 0.1855545762397365, "grad_norm": 0.10759463906288147, "learning_rate": 0.002, "loss": 2.3562, "step": 48000 }, { "epoch": 0.1855932334431198, "grad_norm": 0.0997847467660904, "learning_rate": 0.002, "loss": 2.356, "step": 48010 }, { "epoch": 0.18563189064650307, "grad_norm": 0.12707951664924622, "learning_rate": 0.002, "loss": 2.362, "step": 48020 }, { "epoch": 0.18567054784988635, "grad_norm": 0.10862939059734344, "learning_rate": 0.002, "loss": 2.3507, "step": 48030 }, { "epoch": 0.18570920505326963, "grad_norm": 0.11815818399190903, "learning_rate": 0.002, "loss": 2.3525, "step": 48040 }, { "epoch": 0.1857478622566529, "grad_norm": 0.10583402961492538, "learning_rate": 0.002, "loss": 2.3628, "step": 48050 }, { "epoch": 0.1857865194600362, "grad_norm": 0.11162041127681732, "learning_rate": 0.002, "loss": 2.3533, "step": 48060 }, { "epoch": 0.18582517666341947, "grad_norm": 0.1119517907500267, "learning_rate": 0.002, "loss": 2.3496, "step": 48070 }, { "epoch": 0.18586383386680275, "grad_norm": 0.11928005516529083, "learning_rate": 0.002, "loss": 2.3551, "step": 48080 }, { "epoch": 0.18590249107018603, "grad_norm": 0.11648055911064148, "learning_rate": 0.002, "loss": 2.3646, "step": 48090 }, { "epoch": 0.1859411482735693, "grad_norm": 0.111610546708107, "learning_rate": 0.002, "loss": 2.3657, "step": 48100 }, { "epoch": 0.1859798054769526, "grad_norm": 0.10913601517677307, "learning_rate": 0.002, "loss": 2.3734, "step": 48110 }, { "epoch": 0.18601846268033587, "grad_norm": 0.1146617978811264, "learning_rate": 0.002, "loss": 2.37, "step": 48120 }, { "epoch": 0.18605711988371912, "grad_norm": 0.11511615663766861, "learning_rate": 0.002, "loss": 2.3707, "step": 48130 }, { "epoch": 0.1860957770871024, "grad_norm": 0.13018366694450378, "learning_rate": 0.002, "loss": 2.3568, "step": 48140 }, { "epoch": 0.18613443429048568, "grad_norm": 0.10985668003559113, "learning_rate": 0.002, "loss": 2.3574, "step": 48150 }, { "epoch": 0.18617309149386896, "grad_norm": 0.10770490020513535, "learning_rate": 0.002, "loss": 2.371, "step": 48160 }, { "epoch": 0.18621174869725224, "grad_norm": 0.10217759013175964, "learning_rate": 0.002, "loss": 2.3725, "step": 48170 }, { "epoch": 0.18625040590063552, "grad_norm": 0.1058979406952858, "learning_rate": 0.002, "loss": 2.3522, "step": 48180 }, { "epoch": 0.1862890631040188, "grad_norm": 0.11669570952653885, "learning_rate": 0.002, "loss": 2.3464, "step": 48190 }, { "epoch": 0.18632772030740208, "grad_norm": 0.15146414935588837, "learning_rate": 0.002, "loss": 2.3633, "step": 48200 }, { "epoch": 0.18636637751078536, "grad_norm": 0.11123238503932953, "learning_rate": 0.002, "loss": 2.3694, "step": 48210 }, { "epoch": 0.18640503471416864, "grad_norm": 0.11966782063245773, "learning_rate": 0.002, "loss": 2.353, "step": 48220 }, { "epoch": 0.18644369191755192, "grad_norm": 0.08881065249443054, "learning_rate": 0.002, "loss": 2.3499, "step": 48230 }, { "epoch": 0.1864823491209352, "grad_norm": 0.11233676224946976, "learning_rate": 0.002, "loss": 2.37, "step": 48240 }, { "epoch": 0.18652100632431848, "grad_norm": 0.1251545548439026, "learning_rate": 0.002, "loss": 2.368, "step": 48250 }, { "epoch": 0.18655966352770176, "grad_norm": 0.10777512192726135, "learning_rate": 0.002, "loss": 2.3692, "step": 48260 }, { "epoch": 0.18659832073108504, "grad_norm": 0.10856172442436218, "learning_rate": 0.002, "loss": 2.3545, "step": 48270 }, { "epoch": 0.18663697793446832, "grad_norm": 0.11710671335458755, "learning_rate": 0.002, "loss": 2.3562, "step": 48280 }, { "epoch": 0.1866756351378516, "grad_norm": 0.13152992725372314, "learning_rate": 0.002, "loss": 2.3805, "step": 48290 }, { "epoch": 0.18671429234123488, "grad_norm": 0.10631529986858368, "learning_rate": 0.002, "loss": 2.3583, "step": 48300 }, { "epoch": 0.18675294954461816, "grad_norm": 0.09661126136779785, "learning_rate": 0.002, "loss": 2.3515, "step": 48310 }, { "epoch": 0.1867916067480014, "grad_norm": 0.10520081222057343, "learning_rate": 0.002, "loss": 2.3491, "step": 48320 }, { "epoch": 0.1868302639513847, "grad_norm": 0.11317374557256699, "learning_rate": 0.002, "loss": 2.3661, "step": 48330 }, { "epoch": 0.18686892115476797, "grad_norm": 0.09754928201436996, "learning_rate": 0.002, "loss": 2.3566, "step": 48340 }, { "epoch": 0.18690757835815125, "grad_norm": 0.10474085062742233, "learning_rate": 0.002, "loss": 2.3765, "step": 48350 }, { "epoch": 0.18694623556153453, "grad_norm": 0.10286795347929001, "learning_rate": 0.002, "loss": 2.3493, "step": 48360 }, { "epoch": 0.1869848927649178, "grad_norm": 0.10378462821245193, "learning_rate": 0.002, "loss": 2.3814, "step": 48370 }, { "epoch": 0.1870235499683011, "grad_norm": 0.11171542853116989, "learning_rate": 0.002, "loss": 2.3725, "step": 48380 }, { "epoch": 0.18706220717168437, "grad_norm": 0.11779604852199554, "learning_rate": 0.002, "loss": 2.3587, "step": 48390 }, { "epoch": 0.18710086437506765, "grad_norm": 0.0927729532122612, "learning_rate": 0.002, "loss": 2.3781, "step": 48400 }, { "epoch": 0.18713952157845093, "grad_norm": 0.13855071365833282, "learning_rate": 0.002, "loss": 2.359, "step": 48410 }, { "epoch": 0.1871781787818342, "grad_norm": 0.10825547575950623, "learning_rate": 0.002, "loss": 2.3726, "step": 48420 }, { "epoch": 0.1872168359852175, "grad_norm": 0.10999954491853714, "learning_rate": 0.002, "loss": 2.3713, "step": 48430 }, { "epoch": 0.18725549318860077, "grad_norm": 0.10651865601539612, "learning_rate": 0.002, "loss": 2.3572, "step": 48440 }, { "epoch": 0.18729415039198405, "grad_norm": 0.09782829135656357, "learning_rate": 0.002, "loss": 2.3577, "step": 48450 }, { "epoch": 0.18733280759536733, "grad_norm": 0.15147368609905243, "learning_rate": 0.002, "loss": 2.3542, "step": 48460 }, { "epoch": 0.1873714647987506, "grad_norm": 0.21194928884506226, "learning_rate": 0.002, "loss": 2.3567, "step": 48470 }, { "epoch": 0.1874101220021339, "grad_norm": 0.1071695014834404, "learning_rate": 0.002, "loss": 2.3627, "step": 48480 }, { "epoch": 0.18744877920551717, "grad_norm": 0.1013324111700058, "learning_rate": 0.002, "loss": 2.3687, "step": 48490 }, { "epoch": 0.18748743640890042, "grad_norm": 0.10426542907953262, "learning_rate": 0.002, "loss": 2.353, "step": 48500 }, { "epoch": 0.1875260936122837, "grad_norm": 0.10406072437763214, "learning_rate": 0.002, "loss": 2.3471, "step": 48510 }, { "epoch": 0.18756475081566698, "grad_norm": 0.10694553703069687, "learning_rate": 0.002, "loss": 2.3775, "step": 48520 }, { "epoch": 0.18760340801905026, "grad_norm": 0.10278517007827759, "learning_rate": 0.002, "loss": 2.3641, "step": 48530 }, { "epoch": 0.18764206522243354, "grad_norm": 0.12315454334020615, "learning_rate": 0.002, "loss": 2.3586, "step": 48540 }, { "epoch": 0.18768072242581682, "grad_norm": 0.10810661315917969, "learning_rate": 0.002, "loss": 2.3746, "step": 48550 }, { "epoch": 0.1877193796292001, "grad_norm": 0.10936526954174042, "learning_rate": 0.002, "loss": 2.358, "step": 48560 }, { "epoch": 0.18775803683258338, "grad_norm": 0.10920289158821106, "learning_rate": 0.002, "loss": 2.3724, "step": 48570 }, { "epoch": 0.18779669403596666, "grad_norm": 0.10547734797000885, "learning_rate": 0.002, "loss": 2.351, "step": 48580 }, { "epoch": 0.18783535123934994, "grad_norm": 0.09385696798563004, "learning_rate": 0.002, "loss": 2.3556, "step": 48590 }, { "epoch": 0.18787400844273322, "grad_norm": 0.1407233476638794, "learning_rate": 0.002, "loss": 2.3688, "step": 48600 }, { "epoch": 0.1879126656461165, "grad_norm": 0.11766829341650009, "learning_rate": 0.002, "loss": 2.3549, "step": 48610 }, { "epoch": 0.18795132284949978, "grad_norm": 0.13642434775829315, "learning_rate": 0.002, "loss": 2.3576, "step": 48620 }, { "epoch": 0.18798998005288306, "grad_norm": 0.11041362583637238, "learning_rate": 0.002, "loss": 2.3554, "step": 48630 }, { "epoch": 0.18802863725626634, "grad_norm": 0.112607941031456, "learning_rate": 0.002, "loss": 2.3562, "step": 48640 }, { "epoch": 0.18806729445964962, "grad_norm": 0.10543616116046906, "learning_rate": 0.002, "loss": 2.3494, "step": 48650 }, { "epoch": 0.1881059516630329, "grad_norm": 0.11784285306930542, "learning_rate": 0.002, "loss": 2.3693, "step": 48660 }, { "epoch": 0.18814460886641618, "grad_norm": 0.09574959427118301, "learning_rate": 0.002, "loss": 2.367, "step": 48670 }, { "epoch": 0.18818326606979946, "grad_norm": 0.11014584451913834, "learning_rate": 0.002, "loss": 2.3654, "step": 48680 }, { "epoch": 0.1882219232731827, "grad_norm": 0.1165049821138382, "learning_rate": 0.002, "loss": 2.362, "step": 48690 }, { "epoch": 0.188260580476566, "grad_norm": 0.10616659373044968, "learning_rate": 0.002, "loss": 2.3567, "step": 48700 }, { "epoch": 0.18829923767994927, "grad_norm": 0.11664260178804398, "learning_rate": 0.002, "loss": 2.3635, "step": 48710 }, { "epoch": 0.18833789488333255, "grad_norm": 0.1024443507194519, "learning_rate": 0.002, "loss": 2.3716, "step": 48720 }, { "epoch": 0.18837655208671583, "grad_norm": 0.1058788001537323, "learning_rate": 0.002, "loss": 2.3469, "step": 48730 }, { "epoch": 0.1884152092900991, "grad_norm": 0.10449366271495819, "learning_rate": 0.002, "loss": 2.3593, "step": 48740 }, { "epoch": 0.1884538664934824, "grad_norm": 0.11875820904970169, "learning_rate": 0.002, "loss": 2.3658, "step": 48750 }, { "epoch": 0.18849252369686567, "grad_norm": 0.11891734600067139, "learning_rate": 0.002, "loss": 2.3515, "step": 48760 }, { "epoch": 0.18853118090024895, "grad_norm": 0.10855202376842499, "learning_rate": 0.002, "loss": 2.3586, "step": 48770 }, { "epoch": 0.18856983810363223, "grad_norm": 0.1013849750161171, "learning_rate": 0.002, "loss": 2.3575, "step": 48780 }, { "epoch": 0.1886084953070155, "grad_norm": 0.13188014924526215, "learning_rate": 0.002, "loss": 2.364, "step": 48790 }, { "epoch": 0.1886471525103988, "grad_norm": 0.09956081956624985, "learning_rate": 0.002, "loss": 2.3663, "step": 48800 }, { "epoch": 0.18868580971378207, "grad_norm": 0.10106679052114487, "learning_rate": 0.002, "loss": 2.3655, "step": 48810 }, { "epoch": 0.18872446691716535, "grad_norm": 0.12552915513515472, "learning_rate": 0.002, "loss": 2.368, "step": 48820 }, { "epoch": 0.18876312412054863, "grad_norm": 0.1399880200624466, "learning_rate": 0.002, "loss": 2.3779, "step": 48830 }, { "epoch": 0.1888017813239319, "grad_norm": 0.10296420007944107, "learning_rate": 0.002, "loss": 2.3738, "step": 48840 }, { "epoch": 0.1888404385273152, "grad_norm": 0.12583224475383759, "learning_rate": 0.002, "loss": 2.3638, "step": 48850 }, { "epoch": 0.18887909573069847, "grad_norm": 0.11771658062934875, "learning_rate": 0.002, "loss": 2.3714, "step": 48860 }, { "epoch": 0.18891775293408172, "grad_norm": 0.095871701836586, "learning_rate": 0.002, "loss": 2.355, "step": 48870 }, { "epoch": 0.188956410137465, "grad_norm": 0.10693106800317764, "learning_rate": 0.002, "loss": 2.3548, "step": 48880 }, { "epoch": 0.18899506734084828, "grad_norm": 0.12797410786151886, "learning_rate": 0.002, "loss": 2.3659, "step": 48890 }, { "epoch": 0.18903372454423156, "grad_norm": 0.09604047238826752, "learning_rate": 0.002, "loss": 2.3542, "step": 48900 }, { "epoch": 0.18907238174761484, "grad_norm": 0.12026006728410721, "learning_rate": 0.002, "loss": 2.3619, "step": 48910 }, { "epoch": 0.18911103895099812, "grad_norm": 0.11032012104988098, "learning_rate": 0.002, "loss": 2.3616, "step": 48920 }, { "epoch": 0.1891496961543814, "grad_norm": 0.1259058266878128, "learning_rate": 0.002, "loss": 2.3561, "step": 48930 }, { "epoch": 0.18918835335776468, "grad_norm": 0.10738653689622879, "learning_rate": 0.002, "loss": 2.3742, "step": 48940 }, { "epoch": 0.18922701056114796, "grad_norm": 0.10654452443122864, "learning_rate": 0.002, "loss": 2.3497, "step": 48950 }, { "epoch": 0.18926566776453124, "grad_norm": 0.10034621506929398, "learning_rate": 0.002, "loss": 2.354, "step": 48960 }, { "epoch": 0.18930432496791452, "grad_norm": 0.11938867717981339, "learning_rate": 0.002, "loss": 2.3764, "step": 48970 }, { "epoch": 0.1893429821712978, "grad_norm": 0.10956014692783356, "learning_rate": 0.002, "loss": 2.3719, "step": 48980 }, { "epoch": 0.18938163937468108, "grad_norm": 0.093159019947052, "learning_rate": 0.002, "loss": 2.362, "step": 48990 }, { "epoch": 0.18942029657806436, "grad_norm": 0.11266133189201355, "learning_rate": 0.002, "loss": 2.3455, "step": 49000 }, { "epoch": 0.18945895378144764, "grad_norm": 0.10753951221704483, "learning_rate": 0.002, "loss": 2.3672, "step": 49010 }, { "epoch": 0.18949761098483092, "grad_norm": 0.10640676319599152, "learning_rate": 0.002, "loss": 2.3723, "step": 49020 }, { "epoch": 0.1895362681882142, "grad_norm": 0.10722913593053818, "learning_rate": 0.002, "loss": 2.3804, "step": 49030 }, { "epoch": 0.18957492539159748, "grad_norm": 0.11566959321498871, "learning_rate": 0.002, "loss": 2.3499, "step": 49040 }, { "epoch": 0.18961358259498076, "grad_norm": 0.097598135471344, "learning_rate": 0.002, "loss": 2.3516, "step": 49050 }, { "epoch": 0.18965223979836401, "grad_norm": 0.1264805644750595, "learning_rate": 0.002, "loss": 2.3569, "step": 49060 }, { "epoch": 0.1896908970017473, "grad_norm": 0.11760244518518448, "learning_rate": 0.002, "loss": 2.3602, "step": 49070 }, { "epoch": 0.18972955420513057, "grad_norm": 0.11690422892570496, "learning_rate": 0.002, "loss": 2.3679, "step": 49080 }, { "epoch": 0.18976821140851385, "grad_norm": 0.10791273415088654, "learning_rate": 0.002, "loss": 2.3661, "step": 49090 }, { "epoch": 0.18980686861189713, "grad_norm": 0.09621349722146988, "learning_rate": 0.002, "loss": 2.3634, "step": 49100 }, { "epoch": 0.1898455258152804, "grad_norm": 0.11259328573942184, "learning_rate": 0.002, "loss": 2.3571, "step": 49110 }, { "epoch": 0.1898841830186637, "grad_norm": 0.11389777064323425, "learning_rate": 0.002, "loss": 2.338, "step": 49120 }, { "epoch": 0.18992284022204697, "grad_norm": 0.10725866258144379, "learning_rate": 0.002, "loss": 2.349, "step": 49130 }, { "epoch": 0.18996149742543025, "grad_norm": 0.08963416516780853, "learning_rate": 0.002, "loss": 2.3662, "step": 49140 }, { "epoch": 0.19000015462881353, "grad_norm": 0.13235047459602356, "learning_rate": 0.002, "loss": 2.3623, "step": 49150 }, { "epoch": 0.1900388118321968, "grad_norm": 0.11058180034160614, "learning_rate": 0.002, "loss": 2.3568, "step": 49160 }, { "epoch": 0.1900774690355801, "grad_norm": 0.11632433533668518, "learning_rate": 0.002, "loss": 2.3645, "step": 49170 }, { "epoch": 0.19011612623896337, "grad_norm": 0.0975283533334732, "learning_rate": 0.002, "loss": 2.3543, "step": 49180 }, { "epoch": 0.19015478344234665, "grad_norm": 0.09888440370559692, "learning_rate": 0.002, "loss": 2.3689, "step": 49190 }, { "epoch": 0.19019344064572993, "grad_norm": 0.10686645656824112, "learning_rate": 0.002, "loss": 2.3707, "step": 49200 }, { "epoch": 0.1902320978491132, "grad_norm": 0.11044856160879135, "learning_rate": 0.002, "loss": 2.3669, "step": 49210 }, { "epoch": 0.1902707550524965, "grad_norm": 0.1261729896068573, "learning_rate": 0.002, "loss": 2.3642, "step": 49220 }, { "epoch": 0.19030941225587977, "grad_norm": 0.10963032394647598, "learning_rate": 0.002, "loss": 2.3688, "step": 49230 }, { "epoch": 0.19034806945926305, "grad_norm": 0.10861487686634064, "learning_rate": 0.002, "loss": 2.3709, "step": 49240 }, { "epoch": 0.1903867266626463, "grad_norm": 0.12178757786750793, "learning_rate": 0.002, "loss": 2.3607, "step": 49250 }, { "epoch": 0.19042538386602959, "grad_norm": 0.09885502606630325, "learning_rate": 0.002, "loss": 2.3529, "step": 49260 }, { "epoch": 0.19046404106941286, "grad_norm": 0.10452932864427567, "learning_rate": 0.002, "loss": 2.3677, "step": 49270 }, { "epoch": 0.19050269827279614, "grad_norm": 0.11776251345872879, "learning_rate": 0.002, "loss": 2.3626, "step": 49280 }, { "epoch": 0.19054135547617942, "grad_norm": 0.12242273986339569, "learning_rate": 0.002, "loss": 2.3618, "step": 49290 }, { "epoch": 0.1905800126795627, "grad_norm": 0.11257563531398773, "learning_rate": 0.002, "loss": 2.3582, "step": 49300 }, { "epoch": 0.19061866988294598, "grad_norm": 0.10450883209705353, "learning_rate": 0.002, "loss": 2.3718, "step": 49310 }, { "epoch": 0.19065732708632926, "grad_norm": 0.11983456462621689, "learning_rate": 0.002, "loss": 2.3534, "step": 49320 }, { "epoch": 0.19069598428971254, "grad_norm": 0.10491986572742462, "learning_rate": 0.002, "loss": 2.3799, "step": 49330 }, { "epoch": 0.19073464149309582, "grad_norm": 0.11008800566196442, "learning_rate": 0.002, "loss": 2.3613, "step": 49340 }, { "epoch": 0.1907732986964791, "grad_norm": 0.10659252852201462, "learning_rate": 0.002, "loss": 2.3506, "step": 49350 }, { "epoch": 0.19081195589986238, "grad_norm": 0.10718666017055511, "learning_rate": 0.002, "loss": 2.3576, "step": 49360 }, { "epoch": 0.19085061310324566, "grad_norm": 0.10284899175167084, "learning_rate": 0.002, "loss": 2.3746, "step": 49370 }, { "epoch": 0.19088927030662894, "grad_norm": 0.10044750571250916, "learning_rate": 0.002, "loss": 2.3508, "step": 49380 }, { "epoch": 0.19092792751001222, "grad_norm": 0.11064857989549637, "learning_rate": 0.002, "loss": 2.363, "step": 49390 }, { "epoch": 0.1909665847133955, "grad_norm": 0.12401686608791351, "learning_rate": 0.002, "loss": 2.3552, "step": 49400 }, { "epoch": 0.19100524191677878, "grad_norm": 0.18540844321250916, "learning_rate": 0.002, "loss": 2.3607, "step": 49410 }, { "epoch": 0.19104389912016206, "grad_norm": 0.10839496552944183, "learning_rate": 0.002, "loss": 2.3592, "step": 49420 }, { "epoch": 0.19108255632354532, "grad_norm": 0.09727524220943451, "learning_rate": 0.002, "loss": 2.3588, "step": 49430 }, { "epoch": 0.1911212135269286, "grad_norm": 0.09541568905115128, "learning_rate": 0.002, "loss": 2.3533, "step": 49440 }, { "epoch": 0.19115987073031188, "grad_norm": 0.09279711544513702, "learning_rate": 0.002, "loss": 2.3668, "step": 49450 }, { "epoch": 0.19119852793369516, "grad_norm": 0.1435684859752655, "learning_rate": 0.002, "loss": 2.3678, "step": 49460 }, { "epoch": 0.19123718513707844, "grad_norm": 0.09051964432001114, "learning_rate": 0.002, "loss": 2.3642, "step": 49470 }, { "epoch": 0.19127584234046172, "grad_norm": 0.10041384398937225, "learning_rate": 0.002, "loss": 2.3515, "step": 49480 }, { "epoch": 0.191314499543845, "grad_norm": 0.12758305668830872, "learning_rate": 0.002, "loss": 2.3706, "step": 49490 }, { "epoch": 0.19135315674722828, "grad_norm": 0.1029890701174736, "learning_rate": 0.002, "loss": 2.3618, "step": 49500 }, { "epoch": 0.19139181395061156, "grad_norm": 0.11915244907140732, "learning_rate": 0.002, "loss": 2.3631, "step": 49510 }, { "epoch": 0.19143047115399484, "grad_norm": 0.11430905759334564, "learning_rate": 0.002, "loss": 2.3502, "step": 49520 }, { "epoch": 0.19146912835737812, "grad_norm": 0.11046679317951202, "learning_rate": 0.002, "loss": 2.3427, "step": 49530 }, { "epoch": 0.1915077855607614, "grad_norm": 0.0970064029097557, "learning_rate": 0.002, "loss": 2.3556, "step": 49540 }, { "epoch": 0.19154644276414468, "grad_norm": 0.11496380716562271, "learning_rate": 0.002, "loss": 2.3634, "step": 49550 }, { "epoch": 0.19158509996752796, "grad_norm": 0.10296601802110672, "learning_rate": 0.002, "loss": 2.3652, "step": 49560 }, { "epoch": 0.19162375717091124, "grad_norm": 0.10830742120742798, "learning_rate": 0.002, "loss": 2.359, "step": 49570 }, { "epoch": 0.19166241437429452, "grad_norm": 0.1091323122382164, "learning_rate": 0.002, "loss": 2.3663, "step": 49580 }, { "epoch": 0.1917010715776778, "grad_norm": 0.118216872215271, "learning_rate": 0.002, "loss": 2.3642, "step": 49590 }, { "epoch": 0.19173972878106108, "grad_norm": 0.12398912012577057, "learning_rate": 0.002, "loss": 2.3532, "step": 49600 }, { "epoch": 0.19177838598444436, "grad_norm": 0.10063641518354416, "learning_rate": 0.002, "loss": 2.3646, "step": 49610 }, { "epoch": 0.1918170431878276, "grad_norm": 0.10972592234611511, "learning_rate": 0.002, "loss": 2.3663, "step": 49620 }, { "epoch": 0.1918557003912109, "grad_norm": 0.1127837672829628, "learning_rate": 0.002, "loss": 2.3582, "step": 49630 }, { "epoch": 0.19189435759459417, "grad_norm": 0.10776273161172867, "learning_rate": 0.002, "loss": 2.3306, "step": 49640 }, { "epoch": 0.19193301479797745, "grad_norm": 0.09368200600147247, "learning_rate": 0.002, "loss": 2.3691, "step": 49650 }, { "epoch": 0.19197167200136073, "grad_norm": 0.11541017889976501, "learning_rate": 0.002, "loss": 2.3706, "step": 49660 }, { "epoch": 0.192010329204744, "grad_norm": 0.11094188690185547, "learning_rate": 0.002, "loss": 2.3673, "step": 49670 }, { "epoch": 0.1920489864081273, "grad_norm": 0.09609813988208771, "learning_rate": 0.002, "loss": 2.3517, "step": 49680 }, { "epoch": 0.19208764361151057, "grad_norm": 0.14049966633319855, "learning_rate": 0.002, "loss": 2.3451, "step": 49690 }, { "epoch": 0.19212630081489385, "grad_norm": 0.10160891711711884, "learning_rate": 0.002, "loss": 2.3731, "step": 49700 }, { "epoch": 0.19216495801827713, "grad_norm": 0.10994237661361694, "learning_rate": 0.002, "loss": 2.3684, "step": 49710 }, { "epoch": 0.1922036152216604, "grad_norm": 0.10198924690485, "learning_rate": 0.002, "loss": 2.3533, "step": 49720 }, { "epoch": 0.1922422724250437, "grad_norm": 0.09823419898748398, "learning_rate": 0.002, "loss": 2.3659, "step": 49730 }, { "epoch": 0.19228092962842697, "grad_norm": 0.1106940433382988, "learning_rate": 0.002, "loss": 2.3743, "step": 49740 }, { "epoch": 0.19231958683181025, "grad_norm": 0.11430583149194717, "learning_rate": 0.002, "loss": 2.3656, "step": 49750 }, { "epoch": 0.19235824403519353, "grad_norm": 0.10822034627199173, "learning_rate": 0.002, "loss": 2.3636, "step": 49760 }, { "epoch": 0.1923969012385768, "grad_norm": 0.12003743648529053, "learning_rate": 0.002, "loss": 2.3624, "step": 49770 }, { "epoch": 0.1924355584419601, "grad_norm": 0.09463027864694595, "learning_rate": 0.002, "loss": 2.365, "step": 49780 }, { "epoch": 0.19247421564534337, "grad_norm": 0.11937935650348663, "learning_rate": 0.002, "loss": 2.3537, "step": 49790 }, { "epoch": 0.19251287284872662, "grad_norm": 0.1150641217827797, "learning_rate": 0.002, "loss": 2.3699, "step": 49800 }, { "epoch": 0.1925515300521099, "grad_norm": 0.10229261964559555, "learning_rate": 0.002, "loss": 2.3678, "step": 49810 }, { "epoch": 0.19259018725549318, "grad_norm": 0.1260485053062439, "learning_rate": 0.002, "loss": 2.361, "step": 49820 }, { "epoch": 0.19262884445887646, "grad_norm": 0.10355617851018906, "learning_rate": 0.002, "loss": 2.3561, "step": 49830 }, { "epoch": 0.19266750166225974, "grad_norm": 0.10395023226737976, "learning_rate": 0.002, "loss": 2.3604, "step": 49840 }, { "epoch": 0.19270615886564302, "grad_norm": 0.11862505972385406, "learning_rate": 0.002, "loss": 2.3621, "step": 49850 }, { "epoch": 0.1927448160690263, "grad_norm": 0.12586042284965515, "learning_rate": 0.002, "loss": 2.3702, "step": 49860 }, { "epoch": 0.19278347327240958, "grad_norm": 0.10471781343221664, "learning_rate": 0.002, "loss": 2.3624, "step": 49870 }, { "epoch": 0.19282213047579286, "grad_norm": 0.12276338040828705, "learning_rate": 0.002, "loss": 2.3561, "step": 49880 }, { "epoch": 0.19286078767917614, "grad_norm": 0.11906957626342773, "learning_rate": 0.002, "loss": 2.3641, "step": 49890 }, { "epoch": 0.19289944488255942, "grad_norm": 0.126028373837471, "learning_rate": 0.002, "loss": 2.3612, "step": 49900 }, { "epoch": 0.1929381020859427, "grad_norm": 0.10770002752542496, "learning_rate": 0.002, "loss": 2.3647, "step": 49910 }, { "epoch": 0.19297675928932598, "grad_norm": 0.1063154861330986, "learning_rate": 0.002, "loss": 2.343, "step": 49920 }, { "epoch": 0.19301541649270926, "grad_norm": 0.10494157671928406, "learning_rate": 0.002, "loss": 2.3486, "step": 49930 }, { "epoch": 0.19305407369609254, "grad_norm": 0.10478539019823074, "learning_rate": 0.002, "loss": 2.3489, "step": 49940 }, { "epoch": 0.19309273089947582, "grad_norm": 0.0903603732585907, "learning_rate": 0.002, "loss": 2.3395, "step": 49950 }, { "epoch": 0.1931313881028591, "grad_norm": 0.10410972684621811, "learning_rate": 0.002, "loss": 2.3533, "step": 49960 }, { "epoch": 0.19317004530624238, "grad_norm": 0.1011076495051384, "learning_rate": 0.002, "loss": 2.3719, "step": 49970 }, { "epoch": 0.19320870250962566, "grad_norm": 0.10331527888774872, "learning_rate": 0.002, "loss": 2.3538, "step": 49980 }, { "epoch": 0.1932473597130089, "grad_norm": 0.12761783599853516, "learning_rate": 0.002, "loss": 2.3436, "step": 49990 }, { "epoch": 0.1932860169163922, "grad_norm": 0.1133008822798729, "learning_rate": 0.002, "loss": 2.3613, "step": 50000 }, { "epoch": 0.19332467411977547, "grad_norm": 0.10571694374084473, "learning_rate": 0.002, "loss": 2.3816, "step": 50010 }, { "epoch": 0.19336333132315875, "grad_norm": 0.10567709803581238, "learning_rate": 0.002, "loss": 2.3574, "step": 50020 }, { "epoch": 0.19340198852654203, "grad_norm": 0.10329192876815796, "learning_rate": 0.002, "loss": 2.3486, "step": 50030 }, { "epoch": 0.1934406457299253, "grad_norm": 0.12079528719186783, "learning_rate": 0.002, "loss": 2.3594, "step": 50040 }, { "epoch": 0.1934793029333086, "grad_norm": 0.10283046960830688, "learning_rate": 0.002, "loss": 2.364, "step": 50050 }, { "epoch": 0.19351796013669187, "grad_norm": 0.10554220527410507, "learning_rate": 0.002, "loss": 2.3622, "step": 50060 }, { "epoch": 0.19355661734007515, "grad_norm": 0.10391606390476227, "learning_rate": 0.002, "loss": 2.3541, "step": 50070 }, { "epoch": 0.19359527454345843, "grad_norm": 0.10973990708589554, "learning_rate": 0.002, "loss": 2.3325, "step": 50080 }, { "epoch": 0.1936339317468417, "grad_norm": 0.10813383758068085, "learning_rate": 0.002, "loss": 2.3513, "step": 50090 }, { "epoch": 0.193672588950225, "grad_norm": 0.10249023884534836, "learning_rate": 0.002, "loss": 2.3702, "step": 50100 }, { "epoch": 0.19371124615360827, "grad_norm": 0.11180222779512405, "learning_rate": 0.002, "loss": 2.3631, "step": 50110 }, { "epoch": 0.19374990335699155, "grad_norm": 0.09287769347429276, "learning_rate": 0.002, "loss": 2.3579, "step": 50120 }, { "epoch": 0.19378856056037483, "grad_norm": 0.11256791651248932, "learning_rate": 0.002, "loss": 2.3635, "step": 50130 }, { "epoch": 0.1938272177637581, "grad_norm": 0.10587641596794128, "learning_rate": 0.002, "loss": 2.3675, "step": 50140 }, { "epoch": 0.1938658749671414, "grad_norm": 0.10730777680873871, "learning_rate": 0.002, "loss": 2.3666, "step": 50150 }, { "epoch": 0.19390453217052467, "grad_norm": 0.10186601430177689, "learning_rate": 0.002, "loss": 2.3627, "step": 50160 }, { "epoch": 0.19394318937390792, "grad_norm": 0.11971145868301392, "learning_rate": 0.002, "loss": 2.3436, "step": 50170 }, { "epoch": 0.1939818465772912, "grad_norm": 0.09542839974164963, "learning_rate": 0.002, "loss": 2.3693, "step": 50180 }, { "epoch": 0.19402050378067448, "grad_norm": 0.10131485760211945, "learning_rate": 0.002, "loss": 2.3464, "step": 50190 }, { "epoch": 0.19405916098405776, "grad_norm": 0.10544635355472565, "learning_rate": 0.002, "loss": 2.3683, "step": 50200 }, { "epoch": 0.19409781818744104, "grad_norm": 0.12036942690610886, "learning_rate": 0.002, "loss": 2.3418, "step": 50210 }, { "epoch": 0.19413647539082432, "grad_norm": 0.12579873204231262, "learning_rate": 0.002, "loss": 2.3596, "step": 50220 }, { "epoch": 0.1941751325942076, "grad_norm": 0.10959774255752563, "learning_rate": 0.002, "loss": 2.3681, "step": 50230 }, { "epoch": 0.19421378979759088, "grad_norm": 0.10837439447641373, "learning_rate": 0.002, "loss": 2.3615, "step": 50240 }, { "epoch": 0.19425244700097416, "grad_norm": 0.10460014641284943, "learning_rate": 0.002, "loss": 2.3551, "step": 50250 }, { "epoch": 0.19429110420435744, "grad_norm": 0.11192625761032104, "learning_rate": 0.002, "loss": 2.3547, "step": 50260 }, { "epoch": 0.19432976140774072, "grad_norm": 0.09709108620882034, "learning_rate": 0.002, "loss": 2.3581, "step": 50270 }, { "epoch": 0.194368418611124, "grad_norm": 0.12948331236839294, "learning_rate": 0.002, "loss": 2.3519, "step": 50280 }, { "epoch": 0.19440707581450728, "grad_norm": 0.12882012128829956, "learning_rate": 0.002, "loss": 2.3548, "step": 50290 }, { "epoch": 0.19444573301789056, "grad_norm": 0.09529069811105728, "learning_rate": 0.002, "loss": 2.3399, "step": 50300 }, { "epoch": 0.19448439022127384, "grad_norm": 0.09715364873409271, "learning_rate": 0.002, "loss": 2.3655, "step": 50310 }, { "epoch": 0.19452304742465712, "grad_norm": 0.09917979687452316, "learning_rate": 0.002, "loss": 2.3561, "step": 50320 }, { "epoch": 0.1945617046280404, "grad_norm": 0.32154932618141174, "learning_rate": 0.002, "loss": 2.3632, "step": 50330 }, { "epoch": 0.19460036183142368, "grad_norm": 0.12019972503185272, "learning_rate": 0.002, "loss": 2.3783, "step": 50340 }, { "epoch": 0.19463901903480696, "grad_norm": 0.12406770139932632, "learning_rate": 0.002, "loss": 2.3467, "step": 50350 }, { "epoch": 0.1946776762381902, "grad_norm": 0.10514701157808304, "learning_rate": 0.002, "loss": 2.3747, "step": 50360 }, { "epoch": 0.1947163334415735, "grad_norm": 0.11640505492687225, "learning_rate": 0.002, "loss": 2.3545, "step": 50370 }, { "epoch": 0.19475499064495677, "grad_norm": 0.1056564599275589, "learning_rate": 0.002, "loss": 2.3554, "step": 50380 }, { "epoch": 0.19479364784834005, "grad_norm": 0.127691850066185, "learning_rate": 0.002, "loss": 2.3586, "step": 50390 }, { "epoch": 0.19483230505172333, "grad_norm": 0.09586314857006073, "learning_rate": 0.002, "loss": 2.356, "step": 50400 }, { "epoch": 0.1948709622551066, "grad_norm": 0.08516710251569748, "learning_rate": 0.002, "loss": 2.3484, "step": 50410 }, { "epoch": 0.1949096194584899, "grad_norm": 0.10823085159063339, "learning_rate": 0.002, "loss": 2.3656, "step": 50420 }, { "epoch": 0.19494827666187317, "grad_norm": 0.11516916751861572, "learning_rate": 0.002, "loss": 2.3713, "step": 50430 }, { "epoch": 0.19498693386525645, "grad_norm": 0.11594846844673157, "learning_rate": 0.002, "loss": 2.3619, "step": 50440 }, { "epoch": 0.19502559106863973, "grad_norm": 0.1126951277256012, "learning_rate": 0.002, "loss": 2.3741, "step": 50450 }, { "epoch": 0.195064248272023, "grad_norm": 0.10487323999404907, "learning_rate": 0.002, "loss": 2.3534, "step": 50460 }, { "epoch": 0.1951029054754063, "grad_norm": 0.11344010382890701, "learning_rate": 0.002, "loss": 2.367, "step": 50470 }, { "epoch": 0.19514156267878957, "grad_norm": 0.1146879568696022, "learning_rate": 0.002, "loss": 2.3625, "step": 50480 }, { "epoch": 0.19518021988217285, "grad_norm": 0.10187899321317673, "learning_rate": 0.002, "loss": 2.3498, "step": 50490 }, { "epoch": 0.19521887708555613, "grad_norm": 0.11327874660491943, "learning_rate": 0.002, "loss": 2.361, "step": 50500 }, { "epoch": 0.1952575342889394, "grad_norm": 0.11856690049171448, "learning_rate": 0.002, "loss": 2.3716, "step": 50510 }, { "epoch": 0.1952961914923227, "grad_norm": 0.11800426989793777, "learning_rate": 0.002, "loss": 2.3458, "step": 50520 }, { "epoch": 0.19533484869570597, "grad_norm": 0.10900002717971802, "learning_rate": 0.002, "loss": 2.3786, "step": 50530 }, { "epoch": 0.19537350589908922, "grad_norm": 0.12437902390956879, "learning_rate": 0.002, "loss": 2.3631, "step": 50540 }, { "epoch": 0.1954121631024725, "grad_norm": 0.0991293415427208, "learning_rate": 0.002, "loss": 2.3463, "step": 50550 }, { "epoch": 0.19545082030585578, "grad_norm": 0.12089473009109497, "learning_rate": 0.002, "loss": 2.3628, "step": 50560 }, { "epoch": 0.19548947750923906, "grad_norm": 0.11330953240394592, "learning_rate": 0.002, "loss": 2.3572, "step": 50570 }, { "epoch": 0.19552813471262234, "grad_norm": 0.10841525346040726, "learning_rate": 0.002, "loss": 2.3458, "step": 50580 }, { "epoch": 0.19556679191600562, "grad_norm": 0.10499054193496704, "learning_rate": 0.002, "loss": 2.3571, "step": 50590 }, { "epoch": 0.1956054491193889, "grad_norm": 0.11165262013673782, "learning_rate": 0.002, "loss": 2.3585, "step": 50600 }, { "epoch": 0.19564410632277218, "grad_norm": 0.11451289802789688, "learning_rate": 0.002, "loss": 2.3544, "step": 50610 }, { "epoch": 0.19568276352615546, "grad_norm": 0.09710144251585007, "learning_rate": 0.002, "loss": 2.355, "step": 50620 }, { "epoch": 0.19572142072953874, "grad_norm": 0.09652550518512726, "learning_rate": 0.002, "loss": 2.3653, "step": 50630 }, { "epoch": 0.19576007793292202, "grad_norm": 0.13575007021427155, "learning_rate": 0.002, "loss": 2.3638, "step": 50640 }, { "epoch": 0.1957987351363053, "grad_norm": 0.09911160171031952, "learning_rate": 0.002, "loss": 2.3701, "step": 50650 }, { "epoch": 0.19583739233968858, "grad_norm": 0.10242325067520142, "learning_rate": 0.002, "loss": 2.3636, "step": 50660 }, { "epoch": 0.19587604954307186, "grad_norm": 0.1086401492357254, "learning_rate": 0.002, "loss": 2.3546, "step": 50670 }, { "epoch": 0.19591470674645514, "grad_norm": 0.1018538549542427, "learning_rate": 0.002, "loss": 2.354, "step": 50680 }, { "epoch": 0.19595336394983842, "grad_norm": 0.10752106457948685, "learning_rate": 0.002, "loss": 2.3628, "step": 50690 }, { "epoch": 0.1959920211532217, "grad_norm": 0.11646592617034912, "learning_rate": 0.002, "loss": 2.3581, "step": 50700 }, { "epoch": 0.19603067835660498, "grad_norm": 0.10107237845659256, "learning_rate": 0.002, "loss": 2.3496, "step": 50710 }, { "epoch": 0.19606933555998826, "grad_norm": 0.11378759145736694, "learning_rate": 0.002, "loss": 2.3633, "step": 50720 }, { "epoch": 0.1961079927633715, "grad_norm": 0.11688712984323502, "learning_rate": 0.002, "loss": 2.3496, "step": 50730 }, { "epoch": 0.1961466499667548, "grad_norm": 0.13264037668704987, "learning_rate": 0.002, "loss": 2.353, "step": 50740 }, { "epoch": 0.19618530717013807, "grad_norm": 0.09421103447675705, "learning_rate": 0.002, "loss": 2.3583, "step": 50750 }, { "epoch": 0.19622396437352135, "grad_norm": 0.1111598014831543, "learning_rate": 0.002, "loss": 2.3699, "step": 50760 }, { "epoch": 0.19626262157690463, "grad_norm": 0.10464281588792801, "learning_rate": 0.002, "loss": 2.3382, "step": 50770 }, { "epoch": 0.1963012787802879, "grad_norm": 0.12343593686819077, "learning_rate": 0.002, "loss": 2.3732, "step": 50780 }, { "epoch": 0.1963399359836712, "grad_norm": 0.11526413261890411, "learning_rate": 0.002, "loss": 2.3777, "step": 50790 }, { "epoch": 0.19637859318705447, "grad_norm": 0.10543012619018555, "learning_rate": 0.002, "loss": 2.3534, "step": 50800 }, { "epoch": 0.19641725039043775, "grad_norm": 0.09727109223604202, "learning_rate": 0.002, "loss": 2.3671, "step": 50810 }, { "epoch": 0.19645590759382103, "grad_norm": 0.12500298023223877, "learning_rate": 0.002, "loss": 2.357, "step": 50820 }, { "epoch": 0.1964945647972043, "grad_norm": 0.10165640711784363, "learning_rate": 0.002, "loss": 2.3613, "step": 50830 }, { "epoch": 0.1965332220005876, "grad_norm": 0.11737733334302902, "learning_rate": 0.002, "loss": 2.3612, "step": 50840 }, { "epoch": 0.19657187920397087, "grad_norm": 0.12259113043546677, "learning_rate": 0.002, "loss": 2.3484, "step": 50850 }, { "epoch": 0.19661053640735415, "grad_norm": 0.10291915386915207, "learning_rate": 0.002, "loss": 2.3689, "step": 50860 }, { "epoch": 0.19664919361073743, "grad_norm": 0.0999462828040123, "learning_rate": 0.002, "loss": 2.3662, "step": 50870 }, { "epoch": 0.1966878508141207, "grad_norm": 0.10957477986812592, "learning_rate": 0.002, "loss": 2.3544, "step": 50880 }, { "epoch": 0.196726508017504, "grad_norm": 0.10866300761699677, "learning_rate": 0.002, "loss": 2.3739, "step": 50890 }, { "epoch": 0.19676516522088727, "grad_norm": 0.4267626106739044, "learning_rate": 0.002, "loss": 2.3591, "step": 50900 }, { "epoch": 0.19680382242427052, "grad_norm": 0.13349512219429016, "learning_rate": 0.002, "loss": 2.3663, "step": 50910 }, { "epoch": 0.1968424796276538, "grad_norm": 0.10224291682243347, "learning_rate": 0.002, "loss": 2.3715, "step": 50920 }, { "epoch": 0.19688113683103708, "grad_norm": 0.10515494644641876, "learning_rate": 0.002, "loss": 2.3479, "step": 50930 }, { "epoch": 0.19691979403442036, "grad_norm": 0.12216347455978394, "learning_rate": 0.002, "loss": 2.3658, "step": 50940 }, { "epoch": 0.19695845123780364, "grad_norm": 0.10043194890022278, "learning_rate": 0.002, "loss": 2.3523, "step": 50950 }, { "epoch": 0.19699710844118692, "grad_norm": 0.10455900430679321, "learning_rate": 0.002, "loss": 2.3582, "step": 50960 }, { "epoch": 0.1970357656445702, "grad_norm": 0.17741772532463074, "learning_rate": 0.002, "loss": 2.3583, "step": 50970 }, { "epoch": 0.19707442284795348, "grad_norm": 0.11157890409231186, "learning_rate": 0.002, "loss": 2.35, "step": 50980 }, { "epoch": 0.19711308005133676, "grad_norm": 0.09297435730695724, "learning_rate": 0.002, "loss": 2.3395, "step": 50990 }, { "epoch": 0.19715173725472004, "grad_norm": 0.1312807947397232, "learning_rate": 0.002, "loss": 2.3637, "step": 51000 }, { "epoch": 0.19719039445810332, "grad_norm": 0.11647715419530869, "learning_rate": 0.002, "loss": 2.3602, "step": 51010 }, { "epoch": 0.1972290516614866, "grad_norm": 0.12451427429914474, "learning_rate": 0.002, "loss": 2.3559, "step": 51020 }, { "epoch": 0.19726770886486988, "grad_norm": 0.10510771721601486, "learning_rate": 0.002, "loss": 2.3572, "step": 51030 }, { "epoch": 0.19730636606825316, "grad_norm": 0.11461485922336578, "learning_rate": 0.002, "loss": 2.3554, "step": 51040 }, { "epoch": 0.19734502327163644, "grad_norm": 0.12008356302976608, "learning_rate": 0.002, "loss": 2.3652, "step": 51050 }, { "epoch": 0.19738368047501972, "grad_norm": 0.08818504959344864, "learning_rate": 0.002, "loss": 2.3647, "step": 51060 }, { "epoch": 0.197422337678403, "grad_norm": 0.1345818042755127, "learning_rate": 0.002, "loss": 2.3616, "step": 51070 }, { "epoch": 0.19746099488178628, "grad_norm": 0.10152529925107956, "learning_rate": 0.002, "loss": 2.3716, "step": 51080 }, { "epoch": 0.19749965208516956, "grad_norm": 0.11092906445264816, "learning_rate": 0.002, "loss": 2.3623, "step": 51090 }, { "epoch": 0.19753830928855282, "grad_norm": 0.09306729584932327, "learning_rate": 0.002, "loss": 2.3649, "step": 51100 }, { "epoch": 0.1975769664919361, "grad_norm": 0.15187200903892517, "learning_rate": 0.002, "loss": 2.348, "step": 51110 }, { "epoch": 0.19761562369531938, "grad_norm": 0.1150587648153305, "learning_rate": 0.002, "loss": 2.365, "step": 51120 }, { "epoch": 0.19765428089870266, "grad_norm": 0.114975206553936, "learning_rate": 0.002, "loss": 2.365, "step": 51130 }, { "epoch": 0.19769293810208594, "grad_norm": 0.11752933263778687, "learning_rate": 0.002, "loss": 2.3594, "step": 51140 }, { "epoch": 0.19773159530546922, "grad_norm": 0.11128650605678558, "learning_rate": 0.002, "loss": 2.3751, "step": 51150 }, { "epoch": 0.1977702525088525, "grad_norm": 0.12327593564987183, "learning_rate": 0.002, "loss": 2.333, "step": 51160 }, { "epoch": 0.19780890971223578, "grad_norm": 0.09861862659454346, "learning_rate": 0.002, "loss": 2.3662, "step": 51170 }, { "epoch": 0.19784756691561906, "grad_norm": 0.10292232781648636, "learning_rate": 0.002, "loss": 2.3684, "step": 51180 }, { "epoch": 0.19788622411900234, "grad_norm": 0.10943485051393509, "learning_rate": 0.002, "loss": 2.3599, "step": 51190 }, { "epoch": 0.19792488132238562, "grad_norm": 0.10931216925382614, "learning_rate": 0.002, "loss": 2.3815, "step": 51200 }, { "epoch": 0.1979635385257689, "grad_norm": 0.11949874460697174, "learning_rate": 0.002, "loss": 2.3676, "step": 51210 }, { "epoch": 0.19800219572915218, "grad_norm": 0.10317299515008926, "learning_rate": 0.002, "loss": 2.3672, "step": 51220 }, { "epoch": 0.19804085293253545, "grad_norm": 0.11775912344455719, "learning_rate": 0.002, "loss": 2.3689, "step": 51230 }, { "epoch": 0.19807951013591873, "grad_norm": 0.1264687180519104, "learning_rate": 0.002, "loss": 2.3782, "step": 51240 }, { "epoch": 0.19811816733930201, "grad_norm": 0.10325953364372253, "learning_rate": 0.002, "loss": 2.3687, "step": 51250 }, { "epoch": 0.1981568245426853, "grad_norm": 0.129286527633667, "learning_rate": 0.002, "loss": 2.3595, "step": 51260 }, { "epoch": 0.19819548174606857, "grad_norm": 0.10436193645000458, "learning_rate": 0.002, "loss": 2.3763, "step": 51270 }, { "epoch": 0.19823413894945185, "grad_norm": 0.10270849615335464, "learning_rate": 0.002, "loss": 2.3628, "step": 51280 }, { "epoch": 0.1982727961528351, "grad_norm": 0.09914952516555786, "learning_rate": 0.002, "loss": 2.3537, "step": 51290 }, { "epoch": 0.1983114533562184, "grad_norm": 0.10150845348834991, "learning_rate": 0.002, "loss": 2.3558, "step": 51300 }, { "epoch": 0.19835011055960167, "grad_norm": 0.11425089091062546, "learning_rate": 0.002, "loss": 2.3389, "step": 51310 }, { "epoch": 0.19838876776298495, "grad_norm": 0.10831782966852188, "learning_rate": 0.002, "loss": 2.3559, "step": 51320 }, { "epoch": 0.19842742496636823, "grad_norm": 0.09986311942338943, "learning_rate": 0.002, "loss": 2.3563, "step": 51330 }, { "epoch": 0.1984660821697515, "grad_norm": 0.10581366717815399, "learning_rate": 0.002, "loss": 2.3706, "step": 51340 }, { "epoch": 0.1985047393731348, "grad_norm": 0.09615137428045273, "learning_rate": 0.002, "loss": 2.3526, "step": 51350 }, { "epoch": 0.19854339657651807, "grad_norm": 0.12368248403072357, "learning_rate": 0.002, "loss": 2.3596, "step": 51360 }, { "epoch": 0.19858205377990135, "grad_norm": 0.11965450644493103, "learning_rate": 0.002, "loss": 2.3648, "step": 51370 }, { "epoch": 0.19862071098328463, "grad_norm": 0.1004866436123848, "learning_rate": 0.002, "loss": 2.3529, "step": 51380 }, { "epoch": 0.1986593681866679, "grad_norm": 0.112697534263134, "learning_rate": 0.002, "loss": 2.3691, "step": 51390 }, { "epoch": 0.1986980253900512, "grad_norm": 0.09046253561973572, "learning_rate": 0.002, "loss": 2.3559, "step": 51400 }, { "epoch": 0.19873668259343447, "grad_norm": 0.09781434386968613, "learning_rate": 0.002, "loss": 2.3669, "step": 51410 }, { "epoch": 0.19877533979681775, "grad_norm": 0.11699929088354111, "learning_rate": 0.002, "loss": 2.3514, "step": 51420 }, { "epoch": 0.19881399700020103, "grad_norm": 0.10510469973087311, "learning_rate": 0.002, "loss": 2.361, "step": 51430 }, { "epoch": 0.1988526542035843, "grad_norm": 0.10240361839532852, "learning_rate": 0.002, "loss": 2.3564, "step": 51440 }, { "epoch": 0.19889131140696759, "grad_norm": 0.09002921730279922, "learning_rate": 0.002, "loss": 2.3592, "step": 51450 }, { "epoch": 0.19892996861035087, "grad_norm": 0.12872596085071564, "learning_rate": 0.002, "loss": 2.3605, "step": 51460 }, { "epoch": 0.19896862581373412, "grad_norm": 0.10436994582414627, "learning_rate": 0.002, "loss": 2.3674, "step": 51470 }, { "epoch": 0.1990072830171174, "grad_norm": 0.10890268534421921, "learning_rate": 0.002, "loss": 2.3517, "step": 51480 }, { "epoch": 0.19904594022050068, "grad_norm": 0.1062799021601677, "learning_rate": 0.002, "loss": 2.3496, "step": 51490 }, { "epoch": 0.19908459742388396, "grad_norm": 0.11125991493463516, "learning_rate": 0.002, "loss": 2.3733, "step": 51500 }, { "epoch": 0.19912325462726724, "grad_norm": 0.11942413449287415, "learning_rate": 0.002, "loss": 2.3825, "step": 51510 }, { "epoch": 0.19916191183065052, "grad_norm": 0.11848786473274231, "learning_rate": 0.002, "loss": 2.3494, "step": 51520 }, { "epoch": 0.1992005690340338, "grad_norm": 0.10823323577642441, "learning_rate": 0.002, "loss": 2.3627, "step": 51530 }, { "epoch": 0.19923922623741708, "grad_norm": 0.1108582392334938, "learning_rate": 0.002, "loss": 2.3593, "step": 51540 }, { "epoch": 0.19927788344080036, "grad_norm": 0.1172277182340622, "learning_rate": 0.002, "loss": 2.3626, "step": 51550 }, { "epoch": 0.19931654064418364, "grad_norm": 0.19322575628757477, "learning_rate": 0.002, "loss": 2.3619, "step": 51560 }, { "epoch": 0.19935519784756692, "grad_norm": 0.11942016333341599, "learning_rate": 0.002, "loss": 2.3592, "step": 51570 }, { "epoch": 0.1993938550509502, "grad_norm": 0.1157030388712883, "learning_rate": 0.002, "loss": 2.3723, "step": 51580 }, { "epoch": 0.19943251225433348, "grad_norm": 0.09611659497022629, "learning_rate": 0.002, "loss": 2.3808, "step": 51590 }, { "epoch": 0.19947116945771676, "grad_norm": 0.12318339943885803, "learning_rate": 0.002, "loss": 2.3717, "step": 51600 }, { "epoch": 0.19950982666110004, "grad_norm": 0.1084495484828949, "learning_rate": 0.002, "loss": 2.3701, "step": 51610 }, { "epoch": 0.19954848386448332, "grad_norm": 0.10069076716899872, "learning_rate": 0.002, "loss": 2.3632, "step": 51620 }, { "epoch": 0.1995871410678666, "grad_norm": 0.1215478777885437, "learning_rate": 0.002, "loss": 2.3556, "step": 51630 }, { "epoch": 0.19962579827124988, "grad_norm": 0.11098402738571167, "learning_rate": 0.002, "loss": 2.3534, "step": 51640 }, { "epoch": 0.19966445547463316, "grad_norm": 0.10591264814138412, "learning_rate": 0.002, "loss": 2.3593, "step": 51650 }, { "epoch": 0.1997031126780164, "grad_norm": 0.0950435921549797, "learning_rate": 0.002, "loss": 2.3566, "step": 51660 }, { "epoch": 0.1997417698813997, "grad_norm": 0.11709853261709213, "learning_rate": 0.002, "loss": 2.3616, "step": 51670 }, { "epoch": 0.19978042708478297, "grad_norm": 0.09581815451383591, "learning_rate": 0.002, "loss": 2.3625, "step": 51680 }, { "epoch": 0.19981908428816625, "grad_norm": 0.1009148582816124, "learning_rate": 0.002, "loss": 2.3575, "step": 51690 }, { "epoch": 0.19985774149154953, "grad_norm": 0.1481183022260666, "learning_rate": 0.002, "loss": 2.3675, "step": 51700 }, { "epoch": 0.1998963986949328, "grad_norm": 0.10128211230039597, "learning_rate": 0.002, "loss": 2.3502, "step": 51710 }, { "epoch": 0.1999350558983161, "grad_norm": 0.10426509380340576, "learning_rate": 0.002, "loss": 2.3709, "step": 51720 }, { "epoch": 0.19997371310169937, "grad_norm": 0.11983291804790497, "learning_rate": 0.002, "loss": 2.3816, "step": 51730 }, { "epoch": 0.20001237030508265, "grad_norm": 0.102561816573143, "learning_rate": 0.002, "loss": 2.3647, "step": 51740 }, { "epoch": 0.20005102750846593, "grad_norm": 0.10453730076551437, "learning_rate": 0.002, "loss": 2.357, "step": 51750 }, { "epoch": 0.2000896847118492, "grad_norm": 0.12531380355358124, "learning_rate": 0.002, "loss": 2.3607, "step": 51760 }, { "epoch": 0.2001283419152325, "grad_norm": 0.10815022140741348, "learning_rate": 0.002, "loss": 2.3683, "step": 51770 }, { "epoch": 0.20016699911861577, "grad_norm": 0.11682631075382233, "learning_rate": 0.002, "loss": 2.3516, "step": 51780 }, { "epoch": 0.20020565632199905, "grad_norm": 0.10224246978759766, "learning_rate": 0.002, "loss": 2.3534, "step": 51790 }, { "epoch": 0.20024431352538233, "grad_norm": 0.1255955845117569, "learning_rate": 0.002, "loss": 2.3656, "step": 51800 }, { "epoch": 0.2002829707287656, "grad_norm": 0.11466054618358612, "learning_rate": 0.002, "loss": 2.3581, "step": 51810 }, { "epoch": 0.2003216279321489, "grad_norm": 0.10104741901159286, "learning_rate": 0.002, "loss": 2.3776, "step": 51820 }, { "epoch": 0.20036028513553217, "grad_norm": 0.10511884838342667, "learning_rate": 0.002, "loss": 2.3579, "step": 51830 }, { "epoch": 0.20039894233891542, "grad_norm": 0.11252196878194809, "learning_rate": 0.002, "loss": 2.3513, "step": 51840 }, { "epoch": 0.2004375995422987, "grad_norm": 0.10455108433961868, "learning_rate": 0.002, "loss": 2.3836, "step": 51850 }, { "epoch": 0.20047625674568198, "grad_norm": 0.10854092985391617, "learning_rate": 0.002, "loss": 2.362, "step": 51860 }, { "epoch": 0.20051491394906526, "grad_norm": 0.12762337923049927, "learning_rate": 0.002, "loss": 2.3594, "step": 51870 }, { "epoch": 0.20055357115244854, "grad_norm": 0.12892848253250122, "learning_rate": 0.002, "loss": 2.3561, "step": 51880 }, { "epoch": 0.20059222835583182, "grad_norm": 0.09376315027475357, "learning_rate": 0.002, "loss": 2.3561, "step": 51890 }, { "epoch": 0.2006308855592151, "grad_norm": 0.126753568649292, "learning_rate": 0.002, "loss": 2.3665, "step": 51900 }, { "epoch": 0.20066954276259838, "grad_norm": 0.11163308471441269, "learning_rate": 0.002, "loss": 2.3704, "step": 51910 }, { "epoch": 0.20070819996598166, "grad_norm": 0.09663382172584534, "learning_rate": 0.002, "loss": 2.3423, "step": 51920 }, { "epoch": 0.20074685716936494, "grad_norm": 0.10127527266740799, "learning_rate": 0.002, "loss": 2.3719, "step": 51930 }, { "epoch": 0.20078551437274822, "grad_norm": 0.11793727427721024, "learning_rate": 0.002, "loss": 2.3658, "step": 51940 }, { "epoch": 0.2008241715761315, "grad_norm": 0.10714305192232132, "learning_rate": 0.002, "loss": 2.3415, "step": 51950 }, { "epoch": 0.20086282877951478, "grad_norm": 0.09726440906524658, "learning_rate": 0.002, "loss": 2.3553, "step": 51960 }, { "epoch": 0.20090148598289806, "grad_norm": 0.14774122834205627, "learning_rate": 0.002, "loss": 2.3625, "step": 51970 }, { "epoch": 0.20094014318628134, "grad_norm": 0.10895711183547974, "learning_rate": 0.002, "loss": 2.36, "step": 51980 }, { "epoch": 0.20097880038966462, "grad_norm": 0.12761323153972626, "learning_rate": 0.002, "loss": 2.3564, "step": 51990 }, { "epoch": 0.2010174575930479, "grad_norm": 0.10693914443254471, "learning_rate": 0.002, "loss": 2.3674, "step": 52000 }, { "epoch": 0.20105611479643118, "grad_norm": 0.10339793562889099, "learning_rate": 0.002, "loss": 2.3557, "step": 52010 }, { "epoch": 0.20109477199981446, "grad_norm": 0.11272173374891281, "learning_rate": 0.002, "loss": 2.373, "step": 52020 }, { "epoch": 0.2011334292031977, "grad_norm": 0.19333140552043915, "learning_rate": 0.002, "loss": 2.3635, "step": 52030 }, { "epoch": 0.201172086406581, "grad_norm": 0.12649370729923248, "learning_rate": 0.002, "loss": 2.3502, "step": 52040 }, { "epoch": 0.20121074360996427, "grad_norm": 0.10977057367563248, "learning_rate": 0.002, "loss": 2.3577, "step": 52050 }, { "epoch": 0.20124940081334755, "grad_norm": 0.12432746589183807, "learning_rate": 0.002, "loss": 2.3509, "step": 52060 }, { "epoch": 0.20128805801673083, "grad_norm": 0.11161592602729797, "learning_rate": 0.002, "loss": 2.3665, "step": 52070 }, { "epoch": 0.2013267152201141, "grad_norm": 0.10561925172805786, "learning_rate": 0.002, "loss": 2.3594, "step": 52080 }, { "epoch": 0.2013653724234974, "grad_norm": 0.10858765244483948, "learning_rate": 0.002, "loss": 2.3577, "step": 52090 }, { "epoch": 0.20140402962688067, "grad_norm": 0.09681866317987442, "learning_rate": 0.002, "loss": 2.3617, "step": 52100 }, { "epoch": 0.20144268683026395, "grad_norm": 0.09604979306459427, "learning_rate": 0.002, "loss": 2.3593, "step": 52110 }, { "epoch": 0.20148134403364723, "grad_norm": 0.10785799473524094, "learning_rate": 0.002, "loss": 2.3602, "step": 52120 }, { "epoch": 0.2015200012370305, "grad_norm": 0.11797741055488586, "learning_rate": 0.002, "loss": 2.3469, "step": 52130 }, { "epoch": 0.2015586584404138, "grad_norm": 0.1102878525853157, "learning_rate": 0.002, "loss": 2.3668, "step": 52140 }, { "epoch": 0.20159731564379707, "grad_norm": 0.11702249944210052, "learning_rate": 0.002, "loss": 2.3539, "step": 52150 }, { "epoch": 0.20163597284718035, "grad_norm": 0.13625964522361755, "learning_rate": 0.002, "loss": 2.3703, "step": 52160 }, { "epoch": 0.20167463005056363, "grad_norm": 0.10420363396406174, "learning_rate": 0.002, "loss": 2.3542, "step": 52170 }, { "epoch": 0.2017132872539469, "grad_norm": 0.09645235538482666, "learning_rate": 0.002, "loss": 2.366, "step": 52180 }, { "epoch": 0.2017519444573302, "grad_norm": 0.09708897024393082, "learning_rate": 0.002, "loss": 2.3534, "step": 52190 }, { "epoch": 0.20179060166071347, "grad_norm": 0.1467909961938858, "learning_rate": 0.002, "loss": 2.3562, "step": 52200 }, { "epoch": 0.20182925886409672, "grad_norm": 0.10608675330877304, "learning_rate": 0.002, "loss": 2.347, "step": 52210 }, { "epoch": 0.20186791606748, "grad_norm": 0.11547255516052246, "learning_rate": 0.002, "loss": 2.3682, "step": 52220 }, { "epoch": 0.20190657327086328, "grad_norm": 0.10597088187932968, "learning_rate": 0.002, "loss": 2.3535, "step": 52230 }, { "epoch": 0.20194523047424656, "grad_norm": 0.11478706449270248, "learning_rate": 0.002, "loss": 2.3649, "step": 52240 }, { "epoch": 0.20198388767762984, "grad_norm": 0.09801722317934036, "learning_rate": 0.002, "loss": 2.3568, "step": 52250 }, { "epoch": 0.20202254488101312, "grad_norm": 0.10778038203716278, "learning_rate": 0.002, "loss": 2.363, "step": 52260 }, { "epoch": 0.2020612020843964, "grad_norm": 0.10066181421279907, "learning_rate": 0.002, "loss": 2.362, "step": 52270 }, { "epoch": 0.20209985928777968, "grad_norm": 0.11359509825706482, "learning_rate": 0.002, "loss": 2.3515, "step": 52280 }, { "epoch": 0.20213851649116296, "grad_norm": 0.10373353958129883, "learning_rate": 0.002, "loss": 2.3464, "step": 52290 }, { "epoch": 0.20217717369454624, "grad_norm": 0.12140042334794998, "learning_rate": 0.002, "loss": 2.3598, "step": 52300 }, { "epoch": 0.20221583089792952, "grad_norm": 0.1029222160577774, "learning_rate": 0.002, "loss": 2.3651, "step": 52310 }, { "epoch": 0.2022544881013128, "grad_norm": 0.10438024997711182, "learning_rate": 0.002, "loss": 2.3651, "step": 52320 }, { "epoch": 0.20229314530469608, "grad_norm": 0.1253383904695511, "learning_rate": 0.002, "loss": 2.3697, "step": 52330 }, { "epoch": 0.20233180250807936, "grad_norm": 0.0986919105052948, "learning_rate": 0.002, "loss": 2.3634, "step": 52340 }, { "epoch": 0.20237045971146264, "grad_norm": 0.15070602297782898, "learning_rate": 0.002, "loss": 2.3712, "step": 52350 }, { "epoch": 0.20240911691484592, "grad_norm": 0.1333739310503006, "learning_rate": 0.002, "loss": 2.3559, "step": 52360 }, { "epoch": 0.2024477741182292, "grad_norm": 0.10192544013261795, "learning_rate": 0.002, "loss": 2.371, "step": 52370 }, { "epoch": 0.20248643132161248, "grad_norm": 0.11291380226612091, "learning_rate": 0.002, "loss": 2.3556, "step": 52380 }, { "epoch": 0.20252508852499576, "grad_norm": 0.11298597604036331, "learning_rate": 0.002, "loss": 2.3681, "step": 52390 }, { "epoch": 0.202563745728379, "grad_norm": 0.11294633895158768, "learning_rate": 0.002, "loss": 2.3622, "step": 52400 }, { "epoch": 0.2026024029317623, "grad_norm": 0.09718062728643417, "learning_rate": 0.002, "loss": 2.3702, "step": 52410 }, { "epoch": 0.20264106013514557, "grad_norm": 0.09357758611440659, "learning_rate": 0.002, "loss": 2.3668, "step": 52420 }, { "epoch": 0.20267971733852885, "grad_norm": 0.10638459771871567, "learning_rate": 0.002, "loss": 2.3755, "step": 52430 }, { "epoch": 0.20271837454191213, "grad_norm": 0.10469187796115875, "learning_rate": 0.002, "loss": 2.3581, "step": 52440 }, { "epoch": 0.2027570317452954, "grad_norm": 0.11636140942573547, "learning_rate": 0.002, "loss": 2.3608, "step": 52450 }, { "epoch": 0.2027956889486787, "grad_norm": 0.10756337642669678, "learning_rate": 0.002, "loss": 2.3641, "step": 52460 }, { "epoch": 0.20283434615206197, "grad_norm": 0.10654626041650772, "learning_rate": 0.002, "loss": 2.3638, "step": 52470 }, { "epoch": 0.20287300335544525, "grad_norm": 0.08959631621837616, "learning_rate": 0.002, "loss": 2.3595, "step": 52480 }, { "epoch": 0.20291166055882853, "grad_norm": 0.10842543840408325, "learning_rate": 0.002, "loss": 2.3575, "step": 52490 }, { "epoch": 0.2029503177622118, "grad_norm": 0.12206081300973892, "learning_rate": 0.002, "loss": 2.3624, "step": 52500 }, { "epoch": 0.2029889749655951, "grad_norm": 0.1023700013756752, "learning_rate": 0.002, "loss": 2.3604, "step": 52510 }, { "epoch": 0.20302763216897837, "grad_norm": 0.11099164187908173, "learning_rate": 0.002, "loss": 2.3699, "step": 52520 }, { "epoch": 0.20306628937236165, "grad_norm": 0.10270822048187256, "learning_rate": 0.002, "loss": 2.3804, "step": 52530 }, { "epoch": 0.20310494657574493, "grad_norm": 0.11300281435251236, "learning_rate": 0.002, "loss": 2.3653, "step": 52540 }, { "epoch": 0.2031436037791282, "grad_norm": 0.12924693524837494, "learning_rate": 0.002, "loss": 2.3642, "step": 52550 }, { "epoch": 0.2031822609825115, "grad_norm": 0.11208979785442352, "learning_rate": 0.002, "loss": 2.3491, "step": 52560 }, { "epoch": 0.20322091818589477, "grad_norm": 0.1158527284860611, "learning_rate": 0.002, "loss": 2.3682, "step": 52570 }, { "epoch": 0.20325957538927802, "grad_norm": 0.11639354377985, "learning_rate": 0.002, "loss": 2.3624, "step": 52580 }, { "epoch": 0.2032982325926613, "grad_norm": 0.09775479882955551, "learning_rate": 0.002, "loss": 2.3393, "step": 52590 }, { "epoch": 0.20333688979604458, "grad_norm": 0.0977337658405304, "learning_rate": 0.002, "loss": 2.3558, "step": 52600 }, { "epoch": 0.20337554699942786, "grad_norm": 0.1110336184501648, "learning_rate": 0.002, "loss": 2.3641, "step": 52610 }, { "epoch": 0.20341420420281114, "grad_norm": 0.10928376764059067, "learning_rate": 0.002, "loss": 2.348, "step": 52620 }, { "epoch": 0.20345286140619442, "grad_norm": 0.11134651303291321, "learning_rate": 0.002, "loss": 2.3739, "step": 52630 }, { "epoch": 0.2034915186095777, "grad_norm": 0.10814300179481506, "learning_rate": 0.002, "loss": 2.3483, "step": 52640 }, { "epoch": 0.20353017581296098, "grad_norm": 0.10304401814937592, "learning_rate": 0.002, "loss": 2.3597, "step": 52650 }, { "epoch": 0.20356883301634426, "grad_norm": 0.10869714617729187, "learning_rate": 0.002, "loss": 2.3628, "step": 52660 }, { "epoch": 0.20360749021972754, "grad_norm": 0.10580986738204956, "learning_rate": 0.002, "loss": 2.3597, "step": 52670 }, { "epoch": 0.20364614742311082, "grad_norm": 0.09266664832830429, "learning_rate": 0.002, "loss": 2.345, "step": 52680 }, { "epoch": 0.2036848046264941, "grad_norm": 0.10514344274997711, "learning_rate": 0.002, "loss": 2.3538, "step": 52690 }, { "epoch": 0.20372346182987738, "grad_norm": 0.10906578600406647, "learning_rate": 0.002, "loss": 2.3625, "step": 52700 }, { "epoch": 0.20376211903326066, "grad_norm": 0.11037307977676392, "learning_rate": 0.002, "loss": 2.3543, "step": 52710 }, { "epoch": 0.20380077623664394, "grad_norm": 0.09810831397771835, "learning_rate": 0.002, "loss": 2.3405, "step": 52720 }, { "epoch": 0.20383943344002722, "grad_norm": 0.1301860213279724, "learning_rate": 0.002, "loss": 2.3459, "step": 52730 }, { "epoch": 0.2038780906434105, "grad_norm": 0.10767398029565811, "learning_rate": 0.002, "loss": 2.3616, "step": 52740 }, { "epoch": 0.20391674784679378, "grad_norm": 0.09629841148853302, "learning_rate": 0.002, "loss": 2.3554, "step": 52750 }, { "epoch": 0.20395540505017706, "grad_norm": 0.12146098166704178, "learning_rate": 0.002, "loss": 2.3588, "step": 52760 }, { "epoch": 0.20399406225356032, "grad_norm": 0.1003902480006218, "learning_rate": 0.002, "loss": 2.3524, "step": 52770 }, { "epoch": 0.2040327194569436, "grad_norm": 0.11783084273338318, "learning_rate": 0.002, "loss": 2.3593, "step": 52780 }, { "epoch": 0.20407137666032688, "grad_norm": 0.10537533462047577, "learning_rate": 0.002, "loss": 2.3801, "step": 52790 }, { "epoch": 0.20411003386371016, "grad_norm": 0.10663650184869766, "learning_rate": 0.002, "loss": 2.3726, "step": 52800 }, { "epoch": 0.20414869106709344, "grad_norm": 0.11123210191726685, "learning_rate": 0.002, "loss": 2.3582, "step": 52810 }, { "epoch": 0.20418734827047672, "grad_norm": 0.09847772866487503, "learning_rate": 0.002, "loss": 2.3548, "step": 52820 }, { "epoch": 0.20422600547386, "grad_norm": 0.10249926149845123, "learning_rate": 0.002, "loss": 2.3673, "step": 52830 }, { "epoch": 0.20426466267724327, "grad_norm": 0.16679657995700836, "learning_rate": 0.002, "loss": 2.3678, "step": 52840 }, { "epoch": 0.20430331988062655, "grad_norm": 0.11221053451299667, "learning_rate": 0.002, "loss": 2.3833, "step": 52850 }, { "epoch": 0.20434197708400983, "grad_norm": 0.11050441116094589, "learning_rate": 0.002, "loss": 2.3645, "step": 52860 }, { "epoch": 0.20438063428739311, "grad_norm": 0.10989818722009659, "learning_rate": 0.002, "loss": 2.377, "step": 52870 }, { "epoch": 0.2044192914907764, "grad_norm": 0.1037473976612091, "learning_rate": 0.002, "loss": 2.3599, "step": 52880 }, { "epoch": 0.20445794869415967, "grad_norm": 0.11152027547359467, "learning_rate": 0.002, "loss": 2.3573, "step": 52890 }, { "epoch": 0.20449660589754295, "grad_norm": 0.11552037298679352, "learning_rate": 0.002, "loss": 2.364, "step": 52900 }, { "epoch": 0.20453526310092623, "grad_norm": 0.10706567764282227, "learning_rate": 0.002, "loss": 2.3759, "step": 52910 }, { "epoch": 0.20457392030430951, "grad_norm": 0.10491839796304703, "learning_rate": 0.002, "loss": 2.3587, "step": 52920 }, { "epoch": 0.2046125775076928, "grad_norm": 0.11457972228527069, "learning_rate": 0.002, "loss": 2.3663, "step": 52930 }, { "epoch": 0.20465123471107607, "grad_norm": 0.11204688251018524, "learning_rate": 0.002, "loss": 2.3418, "step": 52940 }, { "epoch": 0.20468989191445933, "grad_norm": 0.1085963174700737, "learning_rate": 0.002, "loss": 2.3637, "step": 52950 }, { "epoch": 0.2047285491178426, "grad_norm": 0.11827849596738815, "learning_rate": 0.002, "loss": 2.3568, "step": 52960 }, { "epoch": 0.2047672063212259, "grad_norm": 0.10177775472402573, "learning_rate": 0.002, "loss": 2.3545, "step": 52970 }, { "epoch": 0.20480586352460917, "grad_norm": 0.09992183744907379, "learning_rate": 0.002, "loss": 2.366, "step": 52980 }, { "epoch": 0.20484452072799245, "grad_norm": 0.10621283203363419, "learning_rate": 0.002, "loss": 2.356, "step": 52990 }, { "epoch": 0.20488317793137573, "grad_norm": 0.10804471373558044, "learning_rate": 0.002, "loss": 2.3576, "step": 53000 }, { "epoch": 0.204921835134759, "grad_norm": 0.10280714184045792, "learning_rate": 0.002, "loss": 2.3802, "step": 53010 }, { "epoch": 0.20496049233814229, "grad_norm": 0.10437464714050293, "learning_rate": 0.002, "loss": 2.3555, "step": 53020 }, { "epoch": 0.20499914954152557, "grad_norm": 0.11850042641162872, "learning_rate": 0.002, "loss": 2.3582, "step": 53030 }, { "epoch": 0.20503780674490885, "grad_norm": 0.11176618188619614, "learning_rate": 0.002, "loss": 2.3745, "step": 53040 }, { "epoch": 0.20507646394829213, "grad_norm": 0.095208078622818, "learning_rate": 0.002, "loss": 2.3592, "step": 53050 }, { "epoch": 0.2051151211516754, "grad_norm": 0.09806636720895767, "learning_rate": 0.002, "loss": 2.3474, "step": 53060 }, { "epoch": 0.20515377835505869, "grad_norm": 0.10049755126237869, "learning_rate": 0.002, "loss": 2.3606, "step": 53070 }, { "epoch": 0.20519243555844197, "grad_norm": 0.11506608873605728, "learning_rate": 0.002, "loss": 2.3554, "step": 53080 }, { "epoch": 0.20523109276182525, "grad_norm": 0.1036606952548027, "learning_rate": 0.002, "loss": 2.3564, "step": 53090 }, { "epoch": 0.20526974996520853, "grad_norm": 0.10761820524930954, "learning_rate": 0.002, "loss": 2.355, "step": 53100 }, { "epoch": 0.2053084071685918, "grad_norm": 0.10791739821434021, "learning_rate": 0.002, "loss": 2.3489, "step": 53110 }, { "epoch": 0.20534706437197509, "grad_norm": 0.09568759053945541, "learning_rate": 0.002, "loss": 2.3552, "step": 53120 }, { "epoch": 0.20538572157535837, "grad_norm": 0.1140003353357315, "learning_rate": 0.002, "loss": 2.3577, "step": 53130 }, { "epoch": 0.20542437877874162, "grad_norm": 0.1259390115737915, "learning_rate": 0.002, "loss": 2.3602, "step": 53140 }, { "epoch": 0.2054630359821249, "grad_norm": 0.1129179373383522, "learning_rate": 0.002, "loss": 2.3598, "step": 53150 }, { "epoch": 0.20550169318550818, "grad_norm": 0.09243609011173248, "learning_rate": 0.002, "loss": 2.3555, "step": 53160 }, { "epoch": 0.20554035038889146, "grad_norm": 0.10274989157915115, "learning_rate": 0.002, "loss": 2.3631, "step": 53170 }, { "epoch": 0.20557900759227474, "grad_norm": 0.10646963864564896, "learning_rate": 0.002, "loss": 2.3534, "step": 53180 }, { "epoch": 0.20561766479565802, "grad_norm": 0.11503088474273682, "learning_rate": 0.002, "loss": 2.3532, "step": 53190 }, { "epoch": 0.2056563219990413, "grad_norm": 0.10646315664052963, "learning_rate": 0.002, "loss": 2.3611, "step": 53200 }, { "epoch": 0.20569497920242458, "grad_norm": 0.0975303053855896, "learning_rate": 0.002, "loss": 2.3591, "step": 53210 }, { "epoch": 0.20573363640580786, "grad_norm": 0.11746834218502045, "learning_rate": 0.002, "loss": 2.3477, "step": 53220 }, { "epoch": 0.20577229360919114, "grad_norm": 0.10905087739229202, "learning_rate": 0.002, "loss": 2.3728, "step": 53230 }, { "epoch": 0.20581095081257442, "grad_norm": 0.11742258071899414, "learning_rate": 0.002, "loss": 2.382, "step": 53240 }, { "epoch": 0.2058496080159577, "grad_norm": 0.09772148728370667, "learning_rate": 0.002, "loss": 2.3568, "step": 53250 }, { "epoch": 0.20588826521934098, "grad_norm": 0.12421903759241104, "learning_rate": 0.002, "loss": 2.3552, "step": 53260 }, { "epoch": 0.20592692242272426, "grad_norm": 0.10841967165470123, "learning_rate": 0.002, "loss": 2.367, "step": 53270 }, { "epoch": 0.20596557962610754, "grad_norm": 0.1038578525185585, "learning_rate": 0.002, "loss": 2.3772, "step": 53280 }, { "epoch": 0.20600423682949082, "grad_norm": 0.10237884521484375, "learning_rate": 0.002, "loss": 2.3668, "step": 53290 }, { "epoch": 0.2060428940328741, "grad_norm": 0.10999718308448792, "learning_rate": 0.002, "loss": 2.3513, "step": 53300 }, { "epoch": 0.20608155123625738, "grad_norm": 0.18075010180473328, "learning_rate": 0.002, "loss": 2.3676, "step": 53310 }, { "epoch": 0.20612020843964066, "grad_norm": 0.1075098067522049, "learning_rate": 0.002, "loss": 2.3585, "step": 53320 }, { "epoch": 0.2061588656430239, "grad_norm": 0.11189239472150803, "learning_rate": 0.002, "loss": 2.3543, "step": 53330 }, { "epoch": 0.2061975228464072, "grad_norm": 0.10663151741027832, "learning_rate": 0.002, "loss": 2.3762, "step": 53340 }, { "epoch": 0.20623618004979047, "grad_norm": 0.11522796005010605, "learning_rate": 0.002, "loss": 2.3515, "step": 53350 }, { "epoch": 0.20627483725317375, "grad_norm": 0.0978827029466629, "learning_rate": 0.002, "loss": 2.3612, "step": 53360 }, { "epoch": 0.20631349445655703, "grad_norm": 0.11668947339057922, "learning_rate": 0.002, "loss": 2.3529, "step": 53370 }, { "epoch": 0.2063521516599403, "grad_norm": 0.1104608029127121, "learning_rate": 0.002, "loss": 2.3713, "step": 53380 }, { "epoch": 0.2063908088633236, "grad_norm": 0.11412586271762848, "learning_rate": 0.002, "loss": 2.352, "step": 53390 }, { "epoch": 0.20642946606670687, "grad_norm": 0.1089489609003067, "learning_rate": 0.002, "loss": 2.3642, "step": 53400 }, { "epoch": 0.20646812327009015, "grad_norm": 0.18193396925926208, "learning_rate": 0.002, "loss": 2.3481, "step": 53410 }, { "epoch": 0.20650678047347343, "grad_norm": 0.1322345733642578, "learning_rate": 0.002, "loss": 2.3773, "step": 53420 }, { "epoch": 0.2065454376768567, "grad_norm": 0.10003045946359634, "learning_rate": 0.002, "loss": 2.3594, "step": 53430 }, { "epoch": 0.20658409488024, "grad_norm": 0.08871643245220184, "learning_rate": 0.002, "loss": 2.3511, "step": 53440 }, { "epoch": 0.20662275208362327, "grad_norm": 0.10830266773700714, "learning_rate": 0.002, "loss": 2.358, "step": 53450 }, { "epoch": 0.20666140928700655, "grad_norm": 0.09646753966808319, "learning_rate": 0.002, "loss": 2.3533, "step": 53460 }, { "epoch": 0.20670006649038983, "grad_norm": 0.09747888892889023, "learning_rate": 0.002, "loss": 2.3636, "step": 53470 }, { "epoch": 0.2067387236937731, "grad_norm": 0.11742375046014786, "learning_rate": 0.002, "loss": 2.362, "step": 53480 }, { "epoch": 0.2067773808971564, "grad_norm": 0.10999744385480881, "learning_rate": 0.002, "loss": 2.3669, "step": 53490 }, { "epoch": 0.20681603810053967, "grad_norm": 0.11388671398162842, "learning_rate": 0.002, "loss": 2.3441, "step": 53500 }, { "epoch": 0.20685469530392292, "grad_norm": 0.10879701375961304, "learning_rate": 0.002, "loss": 2.3672, "step": 53510 }, { "epoch": 0.2068933525073062, "grad_norm": 0.12783634662628174, "learning_rate": 0.002, "loss": 2.3596, "step": 53520 }, { "epoch": 0.20693200971068948, "grad_norm": 0.10533930361270905, "learning_rate": 0.002, "loss": 2.3531, "step": 53530 }, { "epoch": 0.20697066691407276, "grad_norm": 0.12564559280872345, "learning_rate": 0.002, "loss": 2.3615, "step": 53540 }, { "epoch": 0.20700932411745604, "grad_norm": 0.1137334555387497, "learning_rate": 0.002, "loss": 2.3636, "step": 53550 }, { "epoch": 0.20704798132083932, "grad_norm": 0.1020798459649086, "learning_rate": 0.002, "loss": 2.3675, "step": 53560 }, { "epoch": 0.2070866385242226, "grad_norm": 0.11555466800928116, "learning_rate": 0.002, "loss": 2.3672, "step": 53570 }, { "epoch": 0.20712529572760588, "grad_norm": 0.1030665710568428, "learning_rate": 0.002, "loss": 2.3531, "step": 53580 }, { "epoch": 0.20716395293098916, "grad_norm": 0.13051795959472656, "learning_rate": 0.002, "loss": 2.3639, "step": 53590 }, { "epoch": 0.20720261013437244, "grad_norm": 0.10592159628868103, "learning_rate": 0.002, "loss": 2.3706, "step": 53600 }, { "epoch": 0.20724126733775572, "grad_norm": 0.6249107122421265, "learning_rate": 0.002, "loss": 2.3579, "step": 53610 }, { "epoch": 0.207279924541139, "grad_norm": 0.11390656977891922, "learning_rate": 0.002, "loss": 2.3675, "step": 53620 }, { "epoch": 0.20731858174452228, "grad_norm": 0.1781582087278366, "learning_rate": 0.002, "loss": 2.384, "step": 53630 }, { "epoch": 0.20735723894790556, "grad_norm": 0.11048318445682526, "learning_rate": 0.002, "loss": 2.3589, "step": 53640 }, { "epoch": 0.20739589615128884, "grad_norm": 0.11458776891231537, "learning_rate": 0.002, "loss": 2.354, "step": 53650 }, { "epoch": 0.20743455335467212, "grad_norm": 0.11732518672943115, "learning_rate": 0.002, "loss": 2.3464, "step": 53660 }, { "epoch": 0.2074732105580554, "grad_norm": 0.0880560651421547, "learning_rate": 0.002, "loss": 2.362, "step": 53670 }, { "epoch": 0.20751186776143868, "grad_norm": 0.08990538120269775, "learning_rate": 0.002, "loss": 2.3455, "step": 53680 }, { "epoch": 0.20755052496482196, "grad_norm": 0.10926368832588196, "learning_rate": 0.002, "loss": 2.3655, "step": 53690 }, { "epoch": 0.2075891821682052, "grad_norm": 0.11573584377765656, "learning_rate": 0.002, "loss": 2.3697, "step": 53700 }, { "epoch": 0.2076278393715885, "grad_norm": 0.1370205134153366, "learning_rate": 0.002, "loss": 2.3743, "step": 53710 }, { "epoch": 0.20766649657497177, "grad_norm": 0.11262237280607224, "learning_rate": 0.002, "loss": 2.3692, "step": 53720 }, { "epoch": 0.20770515377835505, "grad_norm": 0.1154111921787262, "learning_rate": 0.002, "loss": 2.3684, "step": 53730 }, { "epoch": 0.20774381098173833, "grad_norm": 0.09423351287841797, "learning_rate": 0.002, "loss": 2.3656, "step": 53740 }, { "epoch": 0.2077824681851216, "grad_norm": 0.10815108567476273, "learning_rate": 0.002, "loss": 2.358, "step": 53750 }, { "epoch": 0.2078211253885049, "grad_norm": 0.10672447830438614, "learning_rate": 0.002, "loss": 2.3605, "step": 53760 }, { "epoch": 0.20785978259188817, "grad_norm": 0.09784535318613052, "learning_rate": 0.002, "loss": 2.3637, "step": 53770 }, { "epoch": 0.20789843979527145, "grad_norm": 0.12865093350410461, "learning_rate": 0.002, "loss": 2.377, "step": 53780 }, { "epoch": 0.20793709699865473, "grad_norm": 0.10319879651069641, "learning_rate": 0.002, "loss": 2.3575, "step": 53790 }, { "epoch": 0.207975754202038, "grad_norm": 0.11751110851764679, "learning_rate": 0.002, "loss": 2.3569, "step": 53800 }, { "epoch": 0.2080144114054213, "grad_norm": 0.10024145245552063, "learning_rate": 0.002, "loss": 2.3639, "step": 53810 }, { "epoch": 0.20805306860880457, "grad_norm": 0.11266933381557465, "learning_rate": 0.002, "loss": 2.3613, "step": 53820 }, { "epoch": 0.20809172581218785, "grad_norm": 0.09626465290784836, "learning_rate": 0.002, "loss": 2.37, "step": 53830 }, { "epoch": 0.20813038301557113, "grad_norm": 0.10873950272798538, "learning_rate": 0.002, "loss": 2.3624, "step": 53840 }, { "epoch": 0.2081690402189544, "grad_norm": 0.09650988131761551, "learning_rate": 0.002, "loss": 2.3535, "step": 53850 }, { "epoch": 0.2082076974223377, "grad_norm": 0.09024965018033981, "learning_rate": 0.002, "loss": 2.3411, "step": 53860 }, { "epoch": 0.20824635462572097, "grad_norm": 0.13784830272197723, "learning_rate": 0.002, "loss": 2.3719, "step": 53870 }, { "epoch": 0.20828501182910422, "grad_norm": 0.0888957530260086, "learning_rate": 0.002, "loss": 2.3481, "step": 53880 }, { "epoch": 0.2083236690324875, "grad_norm": 0.11602488905191422, "learning_rate": 0.002, "loss": 2.351, "step": 53890 }, { "epoch": 0.20836232623587078, "grad_norm": 0.09725574404001236, "learning_rate": 0.002, "loss": 2.3468, "step": 53900 }, { "epoch": 0.20840098343925406, "grad_norm": 0.10566900670528412, "learning_rate": 0.002, "loss": 2.3558, "step": 53910 }, { "epoch": 0.20843964064263734, "grad_norm": 0.11023826897144318, "learning_rate": 0.002, "loss": 2.3528, "step": 53920 }, { "epoch": 0.20847829784602062, "grad_norm": 0.1042385846376419, "learning_rate": 0.002, "loss": 2.3558, "step": 53930 }, { "epoch": 0.2085169550494039, "grad_norm": 0.11207899451255798, "learning_rate": 0.002, "loss": 2.3486, "step": 53940 }, { "epoch": 0.20855561225278718, "grad_norm": 0.10884620994329453, "learning_rate": 0.002, "loss": 2.3658, "step": 53950 }, { "epoch": 0.20859426945617046, "grad_norm": 0.10588477551937103, "learning_rate": 0.002, "loss": 2.364, "step": 53960 }, { "epoch": 0.20863292665955374, "grad_norm": 0.10225141793489456, "learning_rate": 0.002, "loss": 2.3532, "step": 53970 }, { "epoch": 0.20867158386293702, "grad_norm": 0.11052332818508148, "learning_rate": 0.002, "loss": 2.3785, "step": 53980 }, { "epoch": 0.2087102410663203, "grad_norm": 0.12436167895793915, "learning_rate": 0.002, "loss": 2.37, "step": 53990 }, { "epoch": 0.20874889826970358, "grad_norm": 0.10896704345941544, "learning_rate": 0.002, "loss": 2.3656, "step": 54000 }, { "epoch": 0.20878755547308686, "grad_norm": 0.11404981464147568, "learning_rate": 0.002, "loss": 2.3563, "step": 54010 }, { "epoch": 0.20882621267647014, "grad_norm": 0.12331146001815796, "learning_rate": 0.002, "loss": 2.3704, "step": 54020 }, { "epoch": 0.20886486987985342, "grad_norm": 0.09879045188426971, "learning_rate": 0.002, "loss": 2.3608, "step": 54030 }, { "epoch": 0.2089035270832367, "grad_norm": 0.11368494480848312, "learning_rate": 0.002, "loss": 2.3571, "step": 54040 }, { "epoch": 0.20894218428661998, "grad_norm": 0.13123731315135956, "learning_rate": 0.002, "loss": 2.3564, "step": 54050 }, { "epoch": 0.20898084149000326, "grad_norm": 0.10231734067201614, "learning_rate": 0.002, "loss": 2.3614, "step": 54060 }, { "epoch": 0.2090194986933865, "grad_norm": 0.10188768804073334, "learning_rate": 0.002, "loss": 2.3616, "step": 54070 }, { "epoch": 0.2090581558967698, "grad_norm": 0.10839677602052689, "learning_rate": 0.002, "loss": 2.3649, "step": 54080 }, { "epoch": 0.20909681310015307, "grad_norm": 0.09975928068161011, "learning_rate": 0.002, "loss": 2.3539, "step": 54090 }, { "epoch": 0.20913547030353635, "grad_norm": 0.11016577482223511, "learning_rate": 0.002, "loss": 2.3639, "step": 54100 }, { "epoch": 0.20917412750691963, "grad_norm": 0.11556573957204819, "learning_rate": 0.002, "loss": 2.3595, "step": 54110 }, { "epoch": 0.2092127847103029, "grad_norm": 0.10059158504009247, "learning_rate": 0.002, "loss": 2.3564, "step": 54120 }, { "epoch": 0.2092514419136862, "grad_norm": 0.11386764794588089, "learning_rate": 0.002, "loss": 2.3638, "step": 54130 }, { "epoch": 0.20929009911706947, "grad_norm": 0.13921064138412476, "learning_rate": 0.002, "loss": 2.3629, "step": 54140 }, { "epoch": 0.20932875632045275, "grad_norm": 0.1022169217467308, "learning_rate": 0.002, "loss": 2.3592, "step": 54150 }, { "epoch": 0.20936741352383603, "grad_norm": 0.10318666696548462, "learning_rate": 0.002, "loss": 2.3669, "step": 54160 }, { "epoch": 0.2094060707272193, "grad_norm": 0.1251523792743683, "learning_rate": 0.002, "loss": 2.3576, "step": 54170 }, { "epoch": 0.2094447279306026, "grad_norm": 0.11519023776054382, "learning_rate": 0.002, "loss": 2.3688, "step": 54180 }, { "epoch": 0.20948338513398587, "grad_norm": 0.09141872823238373, "learning_rate": 0.002, "loss": 2.3674, "step": 54190 }, { "epoch": 0.20952204233736915, "grad_norm": 0.11835416406393051, "learning_rate": 0.002, "loss": 2.3543, "step": 54200 }, { "epoch": 0.20956069954075243, "grad_norm": 0.1005050390958786, "learning_rate": 0.002, "loss": 2.3565, "step": 54210 }, { "epoch": 0.2095993567441357, "grad_norm": 0.1052476167678833, "learning_rate": 0.002, "loss": 2.3711, "step": 54220 }, { "epoch": 0.209638013947519, "grad_norm": 0.11354995518922806, "learning_rate": 0.002, "loss": 2.3772, "step": 54230 }, { "epoch": 0.20967667115090227, "grad_norm": 0.11486457288265228, "learning_rate": 0.002, "loss": 2.3536, "step": 54240 }, { "epoch": 0.20971532835428552, "grad_norm": 0.10793580114841461, "learning_rate": 0.002, "loss": 2.3575, "step": 54250 }, { "epoch": 0.2097539855576688, "grad_norm": 0.11628241837024689, "learning_rate": 0.002, "loss": 2.3549, "step": 54260 }, { "epoch": 0.20979264276105208, "grad_norm": 0.10537659376859665, "learning_rate": 0.002, "loss": 2.3515, "step": 54270 }, { "epoch": 0.20983129996443536, "grad_norm": 0.09963370114564896, "learning_rate": 0.002, "loss": 2.3505, "step": 54280 }, { "epoch": 0.20986995716781864, "grad_norm": 0.10262928158044815, "learning_rate": 0.002, "loss": 2.3486, "step": 54290 }, { "epoch": 0.20990861437120192, "grad_norm": 0.13343960046768188, "learning_rate": 0.002, "loss": 2.3701, "step": 54300 }, { "epoch": 0.2099472715745852, "grad_norm": 0.1169983446598053, "learning_rate": 0.002, "loss": 2.367, "step": 54310 }, { "epoch": 0.20998592877796848, "grad_norm": 0.12719640135765076, "learning_rate": 0.002, "loss": 2.3555, "step": 54320 }, { "epoch": 0.21002458598135176, "grad_norm": 0.0954410657286644, "learning_rate": 0.002, "loss": 2.354, "step": 54330 }, { "epoch": 0.21006324318473504, "grad_norm": 0.10446008294820786, "learning_rate": 0.002, "loss": 2.3573, "step": 54340 }, { "epoch": 0.21010190038811832, "grad_norm": 0.14668498933315277, "learning_rate": 0.002, "loss": 2.3675, "step": 54350 }, { "epoch": 0.2101405575915016, "grad_norm": 0.11192698031663895, "learning_rate": 0.002, "loss": 2.3416, "step": 54360 }, { "epoch": 0.21017921479488488, "grad_norm": 0.10454915463924408, "learning_rate": 0.002, "loss": 2.3585, "step": 54370 }, { "epoch": 0.21021787199826816, "grad_norm": 0.10452423244714737, "learning_rate": 0.002, "loss": 2.3481, "step": 54380 }, { "epoch": 0.21025652920165144, "grad_norm": 0.10957881063222885, "learning_rate": 0.002, "loss": 2.3657, "step": 54390 }, { "epoch": 0.21029518640503472, "grad_norm": 0.10211426019668579, "learning_rate": 0.002, "loss": 2.3491, "step": 54400 }, { "epoch": 0.210333843608418, "grad_norm": 0.09771328419446945, "learning_rate": 0.002, "loss": 2.3542, "step": 54410 }, { "epoch": 0.21037250081180128, "grad_norm": 0.10654677450656891, "learning_rate": 0.002, "loss": 2.3581, "step": 54420 }, { "epoch": 0.21041115801518456, "grad_norm": 0.10297355055809021, "learning_rate": 0.002, "loss": 2.3556, "step": 54430 }, { "epoch": 0.21044981521856782, "grad_norm": 0.094737708568573, "learning_rate": 0.002, "loss": 2.3382, "step": 54440 }, { "epoch": 0.2104884724219511, "grad_norm": 0.10669811069965363, "learning_rate": 0.002, "loss": 2.3566, "step": 54450 }, { "epoch": 0.21052712962533437, "grad_norm": 0.09515495598316193, "learning_rate": 0.002, "loss": 2.3603, "step": 54460 }, { "epoch": 0.21056578682871765, "grad_norm": 0.11773104220628738, "learning_rate": 0.002, "loss": 2.3596, "step": 54470 }, { "epoch": 0.21060444403210093, "grad_norm": 0.09968582540750504, "learning_rate": 0.002, "loss": 2.352, "step": 54480 }, { "epoch": 0.21064310123548421, "grad_norm": 0.10157714784145355, "learning_rate": 0.002, "loss": 2.3503, "step": 54490 }, { "epoch": 0.2106817584388675, "grad_norm": 0.09814414381980896, "learning_rate": 0.002, "loss": 2.3598, "step": 54500 }, { "epoch": 0.21072041564225077, "grad_norm": 0.12160098552703857, "learning_rate": 0.002, "loss": 2.3552, "step": 54510 }, { "epoch": 0.21075907284563405, "grad_norm": 0.1207745298743248, "learning_rate": 0.002, "loss": 2.3478, "step": 54520 }, { "epoch": 0.21079773004901733, "grad_norm": 0.11184488236904144, "learning_rate": 0.002, "loss": 2.3752, "step": 54530 }, { "epoch": 0.21083638725240061, "grad_norm": 0.11148563027381897, "learning_rate": 0.002, "loss": 2.3423, "step": 54540 }, { "epoch": 0.2108750444557839, "grad_norm": 0.092194102704525, "learning_rate": 0.002, "loss": 2.3621, "step": 54550 }, { "epoch": 0.21091370165916717, "grad_norm": 0.11377835273742676, "learning_rate": 0.002, "loss": 2.3635, "step": 54560 }, { "epoch": 0.21095235886255045, "grad_norm": 0.10908497869968414, "learning_rate": 0.002, "loss": 2.3561, "step": 54570 }, { "epoch": 0.21099101606593373, "grad_norm": 0.12056691944599152, "learning_rate": 0.002, "loss": 2.3671, "step": 54580 }, { "epoch": 0.21102967326931701, "grad_norm": 0.09635305404663086, "learning_rate": 0.002, "loss": 2.3531, "step": 54590 }, { "epoch": 0.2110683304727003, "grad_norm": 0.09015733003616333, "learning_rate": 0.002, "loss": 2.3547, "step": 54600 }, { "epoch": 0.21110698767608357, "grad_norm": 0.1049027368426323, "learning_rate": 0.002, "loss": 2.3372, "step": 54610 }, { "epoch": 0.21114564487946683, "grad_norm": 0.11679041385650635, "learning_rate": 0.002, "loss": 2.3531, "step": 54620 }, { "epoch": 0.2111843020828501, "grad_norm": 0.11454702913761139, "learning_rate": 0.002, "loss": 2.3622, "step": 54630 }, { "epoch": 0.21122295928623339, "grad_norm": 0.11232933402061462, "learning_rate": 0.002, "loss": 2.3544, "step": 54640 }, { "epoch": 0.21126161648961667, "grad_norm": 0.10257542878389359, "learning_rate": 0.002, "loss": 2.3616, "step": 54650 }, { "epoch": 0.21130027369299995, "grad_norm": 0.09737318754196167, "learning_rate": 0.002, "loss": 2.375, "step": 54660 }, { "epoch": 0.21133893089638323, "grad_norm": 0.1093873605132103, "learning_rate": 0.002, "loss": 2.3552, "step": 54670 }, { "epoch": 0.2113775880997665, "grad_norm": 0.10621381551027298, "learning_rate": 0.002, "loss": 2.3648, "step": 54680 }, { "epoch": 0.21141624530314979, "grad_norm": 0.1178363785147667, "learning_rate": 0.002, "loss": 2.3675, "step": 54690 }, { "epoch": 0.21145490250653307, "grad_norm": 0.10773173719644547, "learning_rate": 0.002, "loss": 2.3544, "step": 54700 }, { "epoch": 0.21149355970991635, "grad_norm": 0.12191834300756454, "learning_rate": 0.002, "loss": 2.3476, "step": 54710 }, { "epoch": 0.21153221691329963, "grad_norm": 0.10326214879751205, "learning_rate": 0.002, "loss": 2.343, "step": 54720 }, { "epoch": 0.2115708741166829, "grad_norm": 0.12878748774528503, "learning_rate": 0.002, "loss": 2.3691, "step": 54730 }, { "epoch": 0.21160953132006619, "grad_norm": 0.10724682360887527, "learning_rate": 0.002, "loss": 2.3677, "step": 54740 }, { "epoch": 0.21164818852344947, "grad_norm": 0.09913008660078049, "learning_rate": 0.002, "loss": 2.3457, "step": 54750 }, { "epoch": 0.21168684572683275, "grad_norm": 0.1128566637635231, "learning_rate": 0.002, "loss": 2.3715, "step": 54760 }, { "epoch": 0.21172550293021603, "grad_norm": 0.10462232679128647, "learning_rate": 0.002, "loss": 2.3477, "step": 54770 }, { "epoch": 0.2117641601335993, "grad_norm": 0.12480421364307404, "learning_rate": 0.002, "loss": 2.3578, "step": 54780 }, { "epoch": 0.21180281733698259, "grad_norm": 0.10443200170993805, "learning_rate": 0.002, "loss": 2.3611, "step": 54790 }, { "epoch": 0.21184147454036586, "grad_norm": 0.09765107929706573, "learning_rate": 0.002, "loss": 2.3657, "step": 54800 }, { "epoch": 0.21188013174374912, "grad_norm": 0.10396154969930649, "learning_rate": 0.002, "loss": 2.3659, "step": 54810 }, { "epoch": 0.2119187889471324, "grad_norm": 0.11567749083042145, "learning_rate": 0.002, "loss": 2.3463, "step": 54820 }, { "epoch": 0.21195744615051568, "grad_norm": 0.09926524013280869, "learning_rate": 0.002, "loss": 2.3474, "step": 54830 }, { "epoch": 0.21199610335389896, "grad_norm": 0.1023450493812561, "learning_rate": 0.002, "loss": 2.3529, "step": 54840 }, { "epoch": 0.21203476055728224, "grad_norm": 0.13101065158843994, "learning_rate": 0.002, "loss": 2.3733, "step": 54850 }, { "epoch": 0.21207341776066552, "grad_norm": 0.1092870682477951, "learning_rate": 0.002, "loss": 2.3642, "step": 54860 }, { "epoch": 0.2121120749640488, "grad_norm": 0.10586284846067429, "learning_rate": 0.002, "loss": 2.371, "step": 54870 }, { "epoch": 0.21215073216743208, "grad_norm": 0.1010589674115181, "learning_rate": 0.002, "loss": 2.3608, "step": 54880 }, { "epoch": 0.21218938937081536, "grad_norm": 0.22518308460712433, "learning_rate": 0.002, "loss": 2.3674, "step": 54890 }, { "epoch": 0.21222804657419864, "grad_norm": 0.0966985747218132, "learning_rate": 0.002, "loss": 2.3655, "step": 54900 }, { "epoch": 0.21226670377758192, "grad_norm": 0.0973641648888588, "learning_rate": 0.002, "loss": 2.3758, "step": 54910 }, { "epoch": 0.2123053609809652, "grad_norm": 0.10347139835357666, "learning_rate": 0.002, "loss": 2.3719, "step": 54920 }, { "epoch": 0.21234401818434848, "grad_norm": 0.10601606220006943, "learning_rate": 0.002, "loss": 2.3659, "step": 54930 }, { "epoch": 0.21238267538773176, "grad_norm": 0.12179460376501083, "learning_rate": 0.002, "loss": 2.344, "step": 54940 }, { "epoch": 0.21242133259111504, "grad_norm": 0.11832549422979355, "learning_rate": 0.002, "loss": 2.3431, "step": 54950 }, { "epoch": 0.21245998979449832, "grad_norm": 0.11510959267616272, "learning_rate": 0.002, "loss": 2.3664, "step": 54960 }, { "epoch": 0.2124986469978816, "grad_norm": 0.10815947502851486, "learning_rate": 0.002, "loss": 2.3613, "step": 54970 }, { "epoch": 0.21253730420126488, "grad_norm": 0.09001940488815308, "learning_rate": 0.002, "loss": 2.3668, "step": 54980 }, { "epoch": 0.21257596140464813, "grad_norm": 0.12041858583688736, "learning_rate": 0.002, "loss": 2.3513, "step": 54990 }, { "epoch": 0.2126146186080314, "grad_norm": 0.1027616411447525, "learning_rate": 0.002, "loss": 2.3769, "step": 55000 }, { "epoch": 0.2126532758114147, "grad_norm": 0.10954099148511887, "learning_rate": 0.002, "loss": 2.3603, "step": 55010 }, { "epoch": 0.21269193301479797, "grad_norm": 0.1290101706981659, "learning_rate": 0.002, "loss": 2.345, "step": 55020 }, { "epoch": 0.21273059021818125, "grad_norm": 0.09908808767795563, "learning_rate": 0.002, "loss": 2.3588, "step": 55030 }, { "epoch": 0.21276924742156453, "grad_norm": 0.1017303317785263, "learning_rate": 0.002, "loss": 2.3446, "step": 55040 }, { "epoch": 0.2128079046249478, "grad_norm": 0.11294441670179367, "learning_rate": 0.002, "loss": 2.3588, "step": 55050 }, { "epoch": 0.2128465618283311, "grad_norm": 0.10736481845378876, "learning_rate": 0.002, "loss": 2.3589, "step": 55060 }, { "epoch": 0.21288521903171437, "grad_norm": 0.09974376112222672, "learning_rate": 0.002, "loss": 2.3661, "step": 55070 }, { "epoch": 0.21292387623509765, "grad_norm": 0.12165064364671707, "learning_rate": 0.002, "loss": 2.3658, "step": 55080 }, { "epoch": 0.21296253343848093, "grad_norm": 0.10961667448282242, "learning_rate": 0.002, "loss": 2.3526, "step": 55090 }, { "epoch": 0.2130011906418642, "grad_norm": 0.11452201008796692, "learning_rate": 0.002, "loss": 2.3575, "step": 55100 }, { "epoch": 0.2130398478452475, "grad_norm": 0.10695372521877289, "learning_rate": 0.002, "loss": 2.3628, "step": 55110 }, { "epoch": 0.21307850504863077, "grad_norm": 0.09367763251066208, "learning_rate": 0.002, "loss": 2.3774, "step": 55120 }, { "epoch": 0.21311716225201405, "grad_norm": 0.09776943176984787, "learning_rate": 0.002, "loss": 2.3694, "step": 55130 }, { "epoch": 0.21315581945539733, "grad_norm": 0.124178446829319, "learning_rate": 0.002, "loss": 2.3504, "step": 55140 }, { "epoch": 0.2131944766587806, "grad_norm": 0.10170625895261765, "learning_rate": 0.002, "loss": 2.3652, "step": 55150 }, { "epoch": 0.2132331338621639, "grad_norm": 0.10083147138357162, "learning_rate": 0.002, "loss": 2.368, "step": 55160 }, { "epoch": 0.21327179106554717, "grad_norm": 0.11676553636789322, "learning_rate": 0.002, "loss": 2.3398, "step": 55170 }, { "epoch": 0.21331044826893042, "grad_norm": 0.10173624008893967, "learning_rate": 0.002, "loss": 2.3556, "step": 55180 }, { "epoch": 0.2133491054723137, "grad_norm": 0.1213875263929367, "learning_rate": 0.002, "loss": 2.3639, "step": 55190 }, { "epoch": 0.21338776267569698, "grad_norm": 0.11385396867990494, "learning_rate": 0.002, "loss": 2.3644, "step": 55200 }, { "epoch": 0.21342641987908026, "grad_norm": 0.11575232446193695, "learning_rate": 0.002, "loss": 2.3538, "step": 55210 }, { "epoch": 0.21346507708246354, "grad_norm": 0.10983169823884964, "learning_rate": 0.002, "loss": 2.3629, "step": 55220 }, { "epoch": 0.21350373428584682, "grad_norm": 0.10628439486026764, "learning_rate": 0.002, "loss": 2.3667, "step": 55230 }, { "epoch": 0.2135423914892301, "grad_norm": 0.11618947982788086, "learning_rate": 0.002, "loss": 2.3487, "step": 55240 }, { "epoch": 0.21358104869261338, "grad_norm": 0.09852719306945801, "learning_rate": 0.002, "loss": 2.3568, "step": 55250 }, { "epoch": 0.21361970589599666, "grad_norm": 0.12594835460186005, "learning_rate": 0.002, "loss": 2.3632, "step": 55260 }, { "epoch": 0.21365836309937994, "grad_norm": 0.10377329587936401, "learning_rate": 0.002, "loss": 2.359, "step": 55270 }, { "epoch": 0.21369702030276322, "grad_norm": 0.12225122004747391, "learning_rate": 0.002, "loss": 2.3581, "step": 55280 }, { "epoch": 0.2137356775061465, "grad_norm": 0.09943044930696487, "learning_rate": 0.002, "loss": 2.3571, "step": 55290 }, { "epoch": 0.21377433470952978, "grad_norm": 0.1107863187789917, "learning_rate": 0.002, "loss": 2.3592, "step": 55300 }, { "epoch": 0.21381299191291306, "grad_norm": 0.11024272441864014, "learning_rate": 0.002, "loss": 2.3468, "step": 55310 }, { "epoch": 0.21385164911629634, "grad_norm": 0.15556177496910095, "learning_rate": 0.002, "loss": 2.3777, "step": 55320 }, { "epoch": 0.21389030631967962, "grad_norm": 0.11116694658994675, "learning_rate": 0.002, "loss": 2.3666, "step": 55330 }, { "epoch": 0.2139289635230629, "grad_norm": 0.09645438939332962, "learning_rate": 0.002, "loss": 2.3556, "step": 55340 }, { "epoch": 0.21396762072644618, "grad_norm": 0.12381944805383682, "learning_rate": 0.002, "loss": 2.366, "step": 55350 }, { "epoch": 0.21400627792982946, "grad_norm": 0.11018754541873932, "learning_rate": 0.002, "loss": 2.3639, "step": 55360 }, { "epoch": 0.2140449351332127, "grad_norm": 0.1027529314160347, "learning_rate": 0.002, "loss": 2.3733, "step": 55370 }, { "epoch": 0.214083592336596, "grad_norm": 0.09542679786682129, "learning_rate": 0.002, "loss": 2.3442, "step": 55380 }, { "epoch": 0.21412224953997927, "grad_norm": 0.12536196410655975, "learning_rate": 0.002, "loss": 2.3675, "step": 55390 }, { "epoch": 0.21416090674336255, "grad_norm": 0.11031936854124069, "learning_rate": 0.002, "loss": 2.3655, "step": 55400 }, { "epoch": 0.21419956394674583, "grad_norm": 0.12041087448596954, "learning_rate": 0.002, "loss": 2.3576, "step": 55410 }, { "epoch": 0.2142382211501291, "grad_norm": 0.11016488820314407, "learning_rate": 0.002, "loss": 2.357, "step": 55420 }, { "epoch": 0.2142768783535124, "grad_norm": 0.13809560239315033, "learning_rate": 0.002, "loss": 2.3418, "step": 55430 }, { "epoch": 0.21431553555689567, "grad_norm": 0.09298276156187057, "learning_rate": 0.002, "loss": 2.351, "step": 55440 }, { "epoch": 0.21435419276027895, "grad_norm": 0.11793974041938782, "learning_rate": 0.002, "loss": 2.3637, "step": 55450 }, { "epoch": 0.21439284996366223, "grad_norm": 0.10310234129428864, "learning_rate": 0.002, "loss": 2.3506, "step": 55460 }, { "epoch": 0.2144315071670455, "grad_norm": 0.11061696708202362, "learning_rate": 0.002, "loss": 2.3573, "step": 55470 }, { "epoch": 0.2144701643704288, "grad_norm": 0.11232610791921616, "learning_rate": 0.002, "loss": 2.3624, "step": 55480 }, { "epoch": 0.21450882157381207, "grad_norm": 0.10409107804298401, "learning_rate": 0.002, "loss": 2.3677, "step": 55490 }, { "epoch": 0.21454747877719535, "grad_norm": 0.1074514091014862, "learning_rate": 0.002, "loss": 2.3577, "step": 55500 }, { "epoch": 0.21458613598057863, "grad_norm": 0.12128176540136337, "learning_rate": 0.002, "loss": 2.3694, "step": 55510 }, { "epoch": 0.2146247931839619, "grad_norm": 0.09980496019124985, "learning_rate": 0.002, "loss": 2.362, "step": 55520 }, { "epoch": 0.2146634503873452, "grad_norm": 0.10034727305173874, "learning_rate": 0.002, "loss": 2.3547, "step": 55530 }, { "epoch": 0.21470210759072847, "grad_norm": 0.10929981619119644, "learning_rate": 0.002, "loss": 2.3616, "step": 55540 }, { "epoch": 0.21474076479411172, "grad_norm": 0.10616452991962433, "learning_rate": 0.002, "loss": 2.3424, "step": 55550 }, { "epoch": 0.214779421997495, "grad_norm": 0.10621832311153412, "learning_rate": 0.002, "loss": 2.3503, "step": 55560 }, { "epoch": 0.21481807920087828, "grad_norm": 0.09339945763349533, "learning_rate": 0.002, "loss": 2.3759, "step": 55570 }, { "epoch": 0.21485673640426156, "grad_norm": 0.12555859982967377, "learning_rate": 0.002, "loss": 2.3584, "step": 55580 }, { "epoch": 0.21489539360764484, "grad_norm": 0.09851313382387161, "learning_rate": 0.002, "loss": 2.3731, "step": 55590 }, { "epoch": 0.21493405081102812, "grad_norm": 0.10451821982860565, "learning_rate": 0.002, "loss": 2.3671, "step": 55600 }, { "epoch": 0.2149727080144114, "grad_norm": 0.09720192104578018, "learning_rate": 0.002, "loss": 2.3613, "step": 55610 }, { "epoch": 0.21501136521779468, "grad_norm": 0.09352467209100723, "learning_rate": 0.002, "loss": 2.3489, "step": 55620 }, { "epoch": 0.21505002242117796, "grad_norm": 0.10066934674978256, "learning_rate": 0.002, "loss": 2.3651, "step": 55630 }, { "epoch": 0.21508867962456124, "grad_norm": 0.13200034201145172, "learning_rate": 0.002, "loss": 2.3507, "step": 55640 }, { "epoch": 0.21512733682794452, "grad_norm": 0.10262254625558853, "learning_rate": 0.002, "loss": 2.3513, "step": 55650 }, { "epoch": 0.2151659940313278, "grad_norm": 0.11579084396362305, "learning_rate": 0.002, "loss": 2.3555, "step": 55660 }, { "epoch": 0.21520465123471108, "grad_norm": 0.10719157755374908, "learning_rate": 0.002, "loss": 2.3449, "step": 55670 }, { "epoch": 0.21524330843809436, "grad_norm": 0.12055464088916779, "learning_rate": 0.002, "loss": 2.3557, "step": 55680 }, { "epoch": 0.21528196564147764, "grad_norm": 0.1007857471704483, "learning_rate": 0.002, "loss": 2.3445, "step": 55690 }, { "epoch": 0.21532062284486092, "grad_norm": 0.10116320103406906, "learning_rate": 0.002, "loss": 2.3655, "step": 55700 }, { "epoch": 0.2153592800482442, "grad_norm": 0.10648627579212189, "learning_rate": 0.002, "loss": 2.3536, "step": 55710 }, { "epoch": 0.21539793725162748, "grad_norm": 0.09784390777349472, "learning_rate": 0.002, "loss": 2.3498, "step": 55720 }, { "epoch": 0.21543659445501076, "grad_norm": 0.1030447855591774, "learning_rate": 0.002, "loss": 2.3485, "step": 55730 }, { "epoch": 0.215475251658394, "grad_norm": 0.09437708556652069, "learning_rate": 0.002, "loss": 2.3589, "step": 55740 }, { "epoch": 0.2155139088617773, "grad_norm": 0.12527985870838165, "learning_rate": 0.002, "loss": 2.3558, "step": 55750 }, { "epoch": 0.21555256606516057, "grad_norm": 0.0960555300116539, "learning_rate": 0.002, "loss": 2.3426, "step": 55760 }, { "epoch": 0.21559122326854385, "grad_norm": 0.12125767767429352, "learning_rate": 0.002, "loss": 2.3616, "step": 55770 }, { "epoch": 0.21562988047192713, "grad_norm": 0.10940881818532944, "learning_rate": 0.002, "loss": 2.339, "step": 55780 }, { "epoch": 0.2156685376753104, "grad_norm": 0.11511075496673584, "learning_rate": 0.002, "loss": 2.3571, "step": 55790 }, { "epoch": 0.2157071948786937, "grad_norm": 0.1234438568353653, "learning_rate": 0.002, "loss": 2.3601, "step": 55800 }, { "epoch": 0.21574585208207697, "grad_norm": 0.0952814370393753, "learning_rate": 0.002, "loss": 2.3628, "step": 55810 }, { "epoch": 0.21578450928546025, "grad_norm": 0.10598743706941605, "learning_rate": 0.002, "loss": 2.3521, "step": 55820 }, { "epoch": 0.21582316648884353, "grad_norm": 0.11886392533779144, "learning_rate": 0.002, "loss": 2.3523, "step": 55830 }, { "epoch": 0.2158618236922268, "grad_norm": 0.09600363671779633, "learning_rate": 0.002, "loss": 2.3569, "step": 55840 }, { "epoch": 0.2159004808956101, "grad_norm": 0.12411284446716309, "learning_rate": 0.002, "loss": 2.3645, "step": 55850 }, { "epoch": 0.21593913809899337, "grad_norm": 0.11406931281089783, "learning_rate": 0.002, "loss": 2.3531, "step": 55860 }, { "epoch": 0.21597779530237665, "grad_norm": 0.09808206558227539, "learning_rate": 0.002, "loss": 2.3601, "step": 55870 }, { "epoch": 0.21601645250575993, "grad_norm": 0.09981367737054825, "learning_rate": 0.002, "loss": 2.3707, "step": 55880 }, { "epoch": 0.2160551097091432, "grad_norm": 0.11539043486118317, "learning_rate": 0.002, "loss": 2.3622, "step": 55890 }, { "epoch": 0.2160937669125265, "grad_norm": 0.09959662705659866, "learning_rate": 0.002, "loss": 2.3736, "step": 55900 }, { "epoch": 0.21613242411590977, "grad_norm": 0.10524085909128189, "learning_rate": 0.002, "loss": 2.3513, "step": 55910 }, { "epoch": 0.21617108131929302, "grad_norm": 0.10049695521593094, "learning_rate": 0.002, "loss": 2.3549, "step": 55920 }, { "epoch": 0.2162097385226763, "grad_norm": 0.11792772263288498, "learning_rate": 0.002, "loss": 2.354, "step": 55930 }, { "epoch": 0.21624839572605958, "grad_norm": 0.1288314014673233, "learning_rate": 0.002, "loss": 2.3652, "step": 55940 }, { "epoch": 0.21628705292944286, "grad_norm": 0.11100339889526367, "learning_rate": 0.002, "loss": 2.3597, "step": 55950 }, { "epoch": 0.21632571013282614, "grad_norm": 0.10390551388263702, "learning_rate": 0.002, "loss": 2.3694, "step": 55960 }, { "epoch": 0.21636436733620942, "grad_norm": 0.10269264876842499, "learning_rate": 0.002, "loss": 2.3482, "step": 55970 }, { "epoch": 0.2164030245395927, "grad_norm": 0.1092856377363205, "learning_rate": 0.002, "loss": 2.3651, "step": 55980 }, { "epoch": 0.21644168174297598, "grad_norm": 0.1061163917183876, "learning_rate": 0.002, "loss": 2.3474, "step": 55990 }, { "epoch": 0.21648033894635926, "grad_norm": 0.09067103266716003, "learning_rate": 0.002, "loss": 2.3649, "step": 56000 }, { "epoch": 0.21651899614974254, "grad_norm": 0.1044374480843544, "learning_rate": 0.002, "loss": 2.372, "step": 56010 }, { "epoch": 0.21655765335312582, "grad_norm": 0.1103706806898117, "learning_rate": 0.002, "loss": 2.3548, "step": 56020 }, { "epoch": 0.2165963105565091, "grad_norm": 0.09729396551847458, "learning_rate": 0.002, "loss": 2.3561, "step": 56030 }, { "epoch": 0.21663496775989238, "grad_norm": 0.11313861608505249, "learning_rate": 0.002, "loss": 2.3576, "step": 56040 }, { "epoch": 0.21667362496327566, "grad_norm": 0.09405819326639175, "learning_rate": 0.002, "loss": 2.3633, "step": 56050 }, { "epoch": 0.21671228216665894, "grad_norm": 0.11277754604816437, "learning_rate": 0.002, "loss": 2.3629, "step": 56060 }, { "epoch": 0.21675093937004222, "grad_norm": 0.10887305438518524, "learning_rate": 0.002, "loss": 2.3478, "step": 56070 }, { "epoch": 0.2167895965734255, "grad_norm": 0.08707216382026672, "learning_rate": 0.002, "loss": 2.3595, "step": 56080 }, { "epoch": 0.21682825377680878, "grad_norm": 0.10491017252206802, "learning_rate": 0.002, "loss": 2.3571, "step": 56090 }, { "epoch": 0.21686691098019206, "grad_norm": 0.11531982570886612, "learning_rate": 0.002, "loss": 2.3718, "step": 56100 }, { "epoch": 0.21690556818357531, "grad_norm": 0.09856010228395462, "learning_rate": 0.002, "loss": 2.3535, "step": 56110 }, { "epoch": 0.2169442253869586, "grad_norm": 0.11213725060224533, "learning_rate": 0.002, "loss": 2.3505, "step": 56120 }, { "epoch": 0.21698288259034187, "grad_norm": 0.11464966833591461, "learning_rate": 0.002, "loss": 2.3526, "step": 56130 }, { "epoch": 0.21702153979372515, "grad_norm": 0.12422860413789749, "learning_rate": 0.002, "loss": 2.3559, "step": 56140 }, { "epoch": 0.21706019699710843, "grad_norm": 0.10502666980028152, "learning_rate": 0.002, "loss": 2.3643, "step": 56150 }, { "epoch": 0.21709885420049171, "grad_norm": 0.0971233919262886, "learning_rate": 0.002, "loss": 2.3671, "step": 56160 }, { "epoch": 0.217137511403875, "grad_norm": 0.1192779615521431, "learning_rate": 0.002, "loss": 2.3452, "step": 56170 }, { "epoch": 0.21717616860725827, "grad_norm": 0.10713006556034088, "learning_rate": 0.002, "loss": 2.3516, "step": 56180 }, { "epoch": 0.21721482581064155, "grad_norm": 0.10372406989336014, "learning_rate": 0.002, "loss": 2.3694, "step": 56190 }, { "epoch": 0.21725348301402483, "grad_norm": 0.11853165179491043, "learning_rate": 0.002, "loss": 2.3602, "step": 56200 }, { "epoch": 0.21729214021740811, "grad_norm": 0.09557045996189117, "learning_rate": 0.002, "loss": 2.3571, "step": 56210 }, { "epoch": 0.2173307974207914, "grad_norm": 0.12145307660102844, "learning_rate": 0.002, "loss": 2.3657, "step": 56220 }, { "epoch": 0.21736945462417467, "grad_norm": 0.10478398203849792, "learning_rate": 0.002, "loss": 2.3606, "step": 56230 }, { "epoch": 0.21740811182755795, "grad_norm": 0.11328970640897751, "learning_rate": 0.002, "loss": 2.3448, "step": 56240 }, { "epoch": 0.21744676903094123, "grad_norm": 0.10635059326887131, "learning_rate": 0.002, "loss": 2.3618, "step": 56250 }, { "epoch": 0.2174854262343245, "grad_norm": 0.1067148745059967, "learning_rate": 0.002, "loss": 2.367, "step": 56260 }, { "epoch": 0.2175240834377078, "grad_norm": 0.11332537978887558, "learning_rate": 0.002, "loss": 2.3372, "step": 56270 }, { "epoch": 0.21756274064109107, "grad_norm": 0.11176859587430954, "learning_rate": 0.002, "loss": 2.3673, "step": 56280 }, { "epoch": 0.21760139784447433, "grad_norm": 0.13134358823299408, "learning_rate": 0.002, "loss": 2.3615, "step": 56290 }, { "epoch": 0.2176400550478576, "grad_norm": 0.1118798553943634, "learning_rate": 0.002, "loss": 2.3451, "step": 56300 }, { "epoch": 0.21767871225124089, "grad_norm": 0.1096770390868187, "learning_rate": 0.002, "loss": 2.3609, "step": 56310 }, { "epoch": 0.21771736945462417, "grad_norm": 0.10314035415649414, "learning_rate": 0.002, "loss": 2.3502, "step": 56320 }, { "epoch": 0.21775602665800745, "grad_norm": 0.11302123218774796, "learning_rate": 0.002, "loss": 2.3637, "step": 56330 }, { "epoch": 0.21779468386139073, "grad_norm": 0.10143207758665085, "learning_rate": 0.002, "loss": 2.3522, "step": 56340 }, { "epoch": 0.217833341064774, "grad_norm": 0.13196396827697754, "learning_rate": 0.002, "loss": 2.3555, "step": 56350 }, { "epoch": 0.21787199826815729, "grad_norm": 0.12415259331464767, "learning_rate": 0.002, "loss": 2.3579, "step": 56360 }, { "epoch": 0.21791065547154057, "grad_norm": 0.10905799269676208, "learning_rate": 0.002, "loss": 2.3565, "step": 56370 }, { "epoch": 0.21794931267492385, "grad_norm": 0.0945185050368309, "learning_rate": 0.002, "loss": 2.3435, "step": 56380 }, { "epoch": 0.21798796987830713, "grad_norm": 0.12273037433624268, "learning_rate": 0.002, "loss": 2.3681, "step": 56390 }, { "epoch": 0.2180266270816904, "grad_norm": 0.1234779804944992, "learning_rate": 0.002, "loss": 2.3697, "step": 56400 }, { "epoch": 0.21806528428507369, "grad_norm": 0.10489704459905624, "learning_rate": 0.002, "loss": 2.3735, "step": 56410 }, { "epoch": 0.21810394148845696, "grad_norm": 0.1268286556005478, "learning_rate": 0.002, "loss": 2.3619, "step": 56420 }, { "epoch": 0.21814259869184024, "grad_norm": 0.09708143025636673, "learning_rate": 0.002, "loss": 2.3607, "step": 56430 }, { "epoch": 0.21818125589522352, "grad_norm": 0.10344573110342026, "learning_rate": 0.002, "loss": 2.3541, "step": 56440 }, { "epoch": 0.2182199130986068, "grad_norm": 0.11367353051900864, "learning_rate": 0.002, "loss": 2.3775, "step": 56450 }, { "epoch": 0.21825857030199008, "grad_norm": 0.11198901385068893, "learning_rate": 0.002, "loss": 2.3506, "step": 56460 }, { "epoch": 0.21829722750537336, "grad_norm": 0.11139329522848129, "learning_rate": 0.002, "loss": 2.3613, "step": 56470 }, { "epoch": 0.21833588470875662, "grad_norm": 0.2934470772743225, "learning_rate": 0.002, "loss": 2.3728, "step": 56480 }, { "epoch": 0.2183745419121399, "grad_norm": 0.09208117425441742, "learning_rate": 0.002, "loss": 2.3549, "step": 56490 }, { "epoch": 0.21841319911552318, "grad_norm": 0.09002061933279037, "learning_rate": 0.002, "loss": 2.3548, "step": 56500 }, { "epoch": 0.21845185631890646, "grad_norm": 0.10619490593671799, "learning_rate": 0.002, "loss": 2.3593, "step": 56510 }, { "epoch": 0.21849051352228974, "grad_norm": 0.1191694512963295, "learning_rate": 0.002, "loss": 2.3713, "step": 56520 }, { "epoch": 0.21852917072567302, "grad_norm": 0.09826357662677765, "learning_rate": 0.002, "loss": 2.372, "step": 56530 }, { "epoch": 0.2185678279290563, "grad_norm": 0.10082120448350906, "learning_rate": 0.002, "loss": 2.3517, "step": 56540 }, { "epoch": 0.21860648513243958, "grad_norm": 0.1363697350025177, "learning_rate": 0.002, "loss": 2.3675, "step": 56550 }, { "epoch": 0.21864514233582286, "grad_norm": 0.11739884316921234, "learning_rate": 0.002, "loss": 2.375, "step": 56560 }, { "epoch": 0.21868379953920614, "grad_norm": 0.09619259089231491, "learning_rate": 0.002, "loss": 2.3591, "step": 56570 }, { "epoch": 0.21872245674258942, "grad_norm": 0.10125040262937546, "learning_rate": 0.002, "loss": 2.3536, "step": 56580 }, { "epoch": 0.2187611139459727, "grad_norm": 0.6269662976264954, "learning_rate": 0.002, "loss": 2.3637, "step": 56590 }, { "epoch": 0.21879977114935598, "grad_norm": 0.14983363449573517, "learning_rate": 0.002, "loss": 2.3736, "step": 56600 }, { "epoch": 0.21883842835273926, "grad_norm": 0.1266162395477295, "learning_rate": 0.002, "loss": 2.35, "step": 56610 }, { "epoch": 0.21887708555612254, "grad_norm": 0.10967439413070679, "learning_rate": 0.002, "loss": 2.3618, "step": 56620 }, { "epoch": 0.21891574275950582, "grad_norm": 0.09150734543800354, "learning_rate": 0.002, "loss": 2.3579, "step": 56630 }, { "epoch": 0.2189543999628891, "grad_norm": 0.2657480239868164, "learning_rate": 0.002, "loss": 2.3672, "step": 56640 }, { "epoch": 0.21899305716627238, "grad_norm": 0.0987754538655281, "learning_rate": 0.002, "loss": 2.3602, "step": 56650 }, { "epoch": 0.21903171436965563, "grad_norm": 0.09909951686859131, "learning_rate": 0.002, "loss": 2.3586, "step": 56660 }, { "epoch": 0.2190703715730389, "grad_norm": 0.09344511479139328, "learning_rate": 0.002, "loss": 2.3472, "step": 56670 }, { "epoch": 0.2191090287764222, "grad_norm": 0.0909435972571373, "learning_rate": 0.002, "loss": 2.3506, "step": 56680 }, { "epoch": 0.21914768597980547, "grad_norm": 0.11159311980009079, "learning_rate": 0.002, "loss": 2.3591, "step": 56690 }, { "epoch": 0.21918634318318875, "grad_norm": 0.09787567704916, "learning_rate": 0.002, "loss": 2.3657, "step": 56700 }, { "epoch": 0.21922500038657203, "grad_norm": 0.09632629156112671, "learning_rate": 0.002, "loss": 2.3623, "step": 56710 }, { "epoch": 0.2192636575899553, "grad_norm": 0.10457716882228851, "learning_rate": 0.002, "loss": 2.3632, "step": 56720 }, { "epoch": 0.2193023147933386, "grad_norm": 0.12746752798557281, "learning_rate": 0.002, "loss": 2.3713, "step": 56730 }, { "epoch": 0.21934097199672187, "grad_norm": 0.10214285552501678, "learning_rate": 0.002, "loss": 2.3636, "step": 56740 }, { "epoch": 0.21937962920010515, "grad_norm": 0.12154365330934525, "learning_rate": 0.002, "loss": 2.3616, "step": 56750 }, { "epoch": 0.21941828640348843, "grad_norm": 0.11975551396608353, "learning_rate": 0.002, "loss": 2.3602, "step": 56760 }, { "epoch": 0.2194569436068717, "grad_norm": 0.10890713334083557, "learning_rate": 0.002, "loss": 2.3647, "step": 56770 }, { "epoch": 0.219495600810255, "grad_norm": 0.11588835716247559, "learning_rate": 0.002, "loss": 2.3662, "step": 56780 }, { "epoch": 0.21953425801363827, "grad_norm": 0.09341085702180862, "learning_rate": 0.002, "loss": 2.3527, "step": 56790 }, { "epoch": 0.21957291521702155, "grad_norm": 0.10048364102840424, "learning_rate": 0.002, "loss": 2.3602, "step": 56800 }, { "epoch": 0.21961157242040483, "grad_norm": 0.11687114834785461, "learning_rate": 0.002, "loss": 2.3579, "step": 56810 }, { "epoch": 0.2196502296237881, "grad_norm": 0.10780845582485199, "learning_rate": 0.002, "loss": 2.3729, "step": 56820 }, { "epoch": 0.2196888868271714, "grad_norm": 0.10212460160255432, "learning_rate": 0.002, "loss": 2.3666, "step": 56830 }, { "epoch": 0.21972754403055467, "grad_norm": 0.1121063157916069, "learning_rate": 0.002, "loss": 2.3743, "step": 56840 }, { "epoch": 0.21976620123393792, "grad_norm": 0.10919679701328278, "learning_rate": 0.002, "loss": 2.3583, "step": 56850 }, { "epoch": 0.2198048584373212, "grad_norm": 0.10835408419370651, "learning_rate": 0.002, "loss": 2.3611, "step": 56860 }, { "epoch": 0.21984351564070448, "grad_norm": 0.10051613301038742, "learning_rate": 0.002, "loss": 2.3581, "step": 56870 }, { "epoch": 0.21988217284408776, "grad_norm": 0.119876429438591, "learning_rate": 0.002, "loss": 2.3544, "step": 56880 }, { "epoch": 0.21992083004747104, "grad_norm": 0.1138211339712143, "learning_rate": 0.002, "loss": 2.3541, "step": 56890 }, { "epoch": 0.21995948725085432, "grad_norm": 0.10223833471536636, "learning_rate": 0.002, "loss": 2.3623, "step": 56900 }, { "epoch": 0.2199981444542376, "grad_norm": 0.10718075186014175, "learning_rate": 0.002, "loss": 2.3613, "step": 56910 }, { "epoch": 0.22003680165762088, "grad_norm": 0.11498738080263138, "learning_rate": 0.002, "loss": 2.3623, "step": 56920 }, { "epoch": 0.22007545886100416, "grad_norm": 0.11930633336305618, "learning_rate": 0.002, "loss": 2.3704, "step": 56930 }, { "epoch": 0.22011411606438744, "grad_norm": 0.11556489765644073, "learning_rate": 0.002, "loss": 2.3572, "step": 56940 }, { "epoch": 0.22015277326777072, "grad_norm": 0.10762212425470352, "learning_rate": 0.002, "loss": 2.3638, "step": 56950 }, { "epoch": 0.220191430471154, "grad_norm": 0.09085428714752197, "learning_rate": 0.002, "loss": 2.37, "step": 56960 }, { "epoch": 0.22023008767453728, "grad_norm": 0.10463374853134155, "learning_rate": 0.002, "loss": 2.3776, "step": 56970 }, { "epoch": 0.22026874487792056, "grad_norm": 0.10341199487447739, "learning_rate": 0.002, "loss": 2.3639, "step": 56980 }, { "epoch": 0.22030740208130384, "grad_norm": 0.10309240221977234, "learning_rate": 0.002, "loss": 2.3343, "step": 56990 }, { "epoch": 0.22034605928468712, "grad_norm": 0.13077403604984283, "learning_rate": 0.002, "loss": 2.3729, "step": 57000 }, { "epoch": 0.2203847164880704, "grad_norm": 0.09969010204076767, "learning_rate": 0.002, "loss": 2.3536, "step": 57010 }, { "epoch": 0.22042337369145368, "grad_norm": 0.18511107563972473, "learning_rate": 0.002, "loss": 2.3536, "step": 57020 }, { "epoch": 0.22046203089483693, "grad_norm": 0.111596018075943, "learning_rate": 0.002, "loss": 2.3747, "step": 57030 }, { "epoch": 0.2205006880982202, "grad_norm": 0.10208901017904282, "learning_rate": 0.002, "loss": 2.3734, "step": 57040 }, { "epoch": 0.2205393453016035, "grad_norm": 0.09552479535341263, "learning_rate": 0.002, "loss": 2.3571, "step": 57050 }, { "epoch": 0.22057800250498677, "grad_norm": 0.10220164060592651, "learning_rate": 0.002, "loss": 2.3664, "step": 57060 }, { "epoch": 0.22061665970837005, "grad_norm": 0.09908819943666458, "learning_rate": 0.002, "loss": 2.3536, "step": 57070 }, { "epoch": 0.22065531691175333, "grad_norm": 0.10516798496246338, "learning_rate": 0.002, "loss": 2.3655, "step": 57080 }, { "epoch": 0.2206939741151366, "grad_norm": 0.11086596548557281, "learning_rate": 0.002, "loss": 2.3626, "step": 57090 }, { "epoch": 0.2207326313185199, "grad_norm": 0.09934603422880173, "learning_rate": 0.002, "loss": 2.3597, "step": 57100 }, { "epoch": 0.22077128852190317, "grad_norm": 0.10181740671396255, "learning_rate": 0.002, "loss": 2.3558, "step": 57110 }, { "epoch": 0.22080994572528645, "grad_norm": 0.11644434183835983, "learning_rate": 0.002, "loss": 2.3572, "step": 57120 }, { "epoch": 0.22084860292866973, "grad_norm": 0.12942452728748322, "learning_rate": 0.002, "loss": 2.3686, "step": 57130 }, { "epoch": 0.220887260132053, "grad_norm": 0.11943166702985764, "learning_rate": 0.002, "loss": 2.3692, "step": 57140 }, { "epoch": 0.2209259173354363, "grad_norm": 0.13018329441547394, "learning_rate": 0.002, "loss": 2.371, "step": 57150 }, { "epoch": 0.22096457453881957, "grad_norm": 0.1093996912240982, "learning_rate": 0.002, "loss": 2.3632, "step": 57160 }, { "epoch": 0.22100323174220285, "grad_norm": 0.10882691293954849, "learning_rate": 0.002, "loss": 2.3429, "step": 57170 }, { "epoch": 0.22104188894558613, "grad_norm": 0.11444193124771118, "learning_rate": 0.002, "loss": 2.3639, "step": 57180 }, { "epoch": 0.2210805461489694, "grad_norm": 0.12251651287078857, "learning_rate": 0.002, "loss": 2.3781, "step": 57190 }, { "epoch": 0.2211192033523527, "grad_norm": 0.11783339083194733, "learning_rate": 0.002, "loss": 2.3512, "step": 57200 }, { "epoch": 0.22115786055573597, "grad_norm": 0.09975133091211319, "learning_rate": 0.002, "loss": 2.3418, "step": 57210 }, { "epoch": 0.22119651775911922, "grad_norm": 0.12574127316474915, "learning_rate": 0.002, "loss": 2.3777, "step": 57220 }, { "epoch": 0.2212351749625025, "grad_norm": 0.10755207389593124, "learning_rate": 0.002, "loss": 2.3472, "step": 57230 }, { "epoch": 0.22127383216588578, "grad_norm": 0.13826222717761993, "learning_rate": 0.002, "loss": 2.3681, "step": 57240 }, { "epoch": 0.22131248936926906, "grad_norm": 0.11009145528078079, "learning_rate": 0.002, "loss": 2.3523, "step": 57250 }, { "epoch": 0.22135114657265234, "grad_norm": 0.10710586607456207, "learning_rate": 0.002, "loss": 2.3551, "step": 57260 }, { "epoch": 0.22138980377603562, "grad_norm": 0.11547312140464783, "learning_rate": 0.002, "loss": 2.3574, "step": 57270 }, { "epoch": 0.2214284609794189, "grad_norm": 0.10780449956655502, "learning_rate": 0.002, "loss": 2.356, "step": 57280 }, { "epoch": 0.22146711818280218, "grad_norm": 0.11152885854244232, "learning_rate": 0.002, "loss": 2.3679, "step": 57290 }, { "epoch": 0.22150577538618546, "grad_norm": 0.10193509608507156, "learning_rate": 0.002, "loss": 2.3464, "step": 57300 }, { "epoch": 0.22154443258956874, "grad_norm": 0.12414531409740448, "learning_rate": 0.002, "loss": 2.3689, "step": 57310 }, { "epoch": 0.22158308979295202, "grad_norm": 0.1089351624250412, "learning_rate": 0.002, "loss": 2.3744, "step": 57320 }, { "epoch": 0.2216217469963353, "grad_norm": 0.11467059701681137, "learning_rate": 0.002, "loss": 2.3505, "step": 57330 }, { "epoch": 0.22166040419971858, "grad_norm": 0.12027007341384888, "learning_rate": 0.002, "loss": 2.3582, "step": 57340 }, { "epoch": 0.22169906140310186, "grad_norm": 0.11830438673496246, "learning_rate": 0.002, "loss": 2.3617, "step": 57350 }, { "epoch": 0.22173771860648514, "grad_norm": 0.11438164114952087, "learning_rate": 0.002, "loss": 2.3681, "step": 57360 }, { "epoch": 0.22177637580986842, "grad_norm": 0.09567909687757492, "learning_rate": 0.002, "loss": 2.3739, "step": 57370 }, { "epoch": 0.2218150330132517, "grad_norm": 0.13299918174743652, "learning_rate": 0.002, "loss": 2.3455, "step": 57380 }, { "epoch": 0.22185369021663498, "grad_norm": 0.10379625111818314, "learning_rate": 0.002, "loss": 2.351, "step": 57390 }, { "epoch": 0.22189234742001826, "grad_norm": 0.11183536052703857, "learning_rate": 0.002, "loss": 2.3524, "step": 57400 }, { "epoch": 0.2219310046234015, "grad_norm": 0.1073332354426384, "learning_rate": 0.002, "loss": 2.3611, "step": 57410 }, { "epoch": 0.2219696618267848, "grad_norm": 0.13551676273345947, "learning_rate": 0.002, "loss": 2.3646, "step": 57420 }, { "epoch": 0.22200831903016807, "grad_norm": 0.10104433447122574, "learning_rate": 0.002, "loss": 2.3537, "step": 57430 }, { "epoch": 0.22204697623355135, "grad_norm": 0.09403500705957413, "learning_rate": 0.002, "loss": 2.3706, "step": 57440 }, { "epoch": 0.22208563343693463, "grad_norm": 0.11605286598205566, "learning_rate": 0.002, "loss": 2.3778, "step": 57450 }, { "epoch": 0.2221242906403179, "grad_norm": 0.11487416177988052, "learning_rate": 0.002, "loss": 2.3731, "step": 57460 }, { "epoch": 0.2221629478437012, "grad_norm": 0.0884837657213211, "learning_rate": 0.002, "loss": 2.3639, "step": 57470 }, { "epoch": 0.22220160504708447, "grad_norm": 0.11717015504837036, "learning_rate": 0.002, "loss": 2.3761, "step": 57480 }, { "epoch": 0.22224026225046775, "grad_norm": 0.10914857685565948, "learning_rate": 0.002, "loss": 2.3599, "step": 57490 }, { "epoch": 0.22227891945385103, "grad_norm": 0.10230378806591034, "learning_rate": 0.002, "loss": 2.3583, "step": 57500 }, { "epoch": 0.2223175766572343, "grad_norm": 0.13150063157081604, "learning_rate": 0.002, "loss": 2.3621, "step": 57510 }, { "epoch": 0.2223562338606176, "grad_norm": 0.1172526627779007, "learning_rate": 0.002, "loss": 2.343, "step": 57520 }, { "epoch": 0.22239489106400087, "grad_norm": 0.09546870738267899, "learning_rate": 0.002, "loss": 2.348, "step": 57530 }, { "epoch": 0.22243354826738415, "grad_norm": 0.10355856269598007, "learning_rate": 0.002, "loss": 2.352, "step": 57540 }, { "epoch": 0.22247220547076743, "grad_norm": 0.09346092492341995, "learning_rate": 0.002, "loss": 2.3573, "step": 57550 }, { "epoch": 0.2225108626741507, "grad_norm": 0.1043282151222229, "learning_rate": 0.002, "loss": 2.3691, "step": 57560 }, { "epoch": 0.222549519877534, "grad_norm": 0.10177697986364365, "learning_rate": 0.002, "loss": 2.3561, "step": 57570 }, { "epoch": 0.22258817708091727, "grad_norm": 0.1164051815867424, "learning_rate": 0.002, "loss": 2.3664, "step": 57580 }, { "epoch": 0.22262683428430052, "grad_norm": 0.0973474457859993, "learning_rate": 0.002, "loss": 2.3682, "step": 57590 }, { "epoch": 0.2226654914876838, "grad_norm": 0.0994463786482811, "learning_rate": 0.002, "loss": 2.358, "step": 57600 }, { "epoch": 0.22270414869106708, "grad_norm": 0.11317646503448486, "learning_rate": 0.002, "loss": 2.3737, "step": 57610 }, { "epoch": 0.22274280589445036, "grad_norm": 0.10045770555734634, "learning_rate": 0.002, "loss": 2.3579, "step": 57620 }, { "epoch": 0.22278146309783364, "grad_norm": 0.11160579323768616, "learning_rate": 0.002, "loss": 2.3593, "step": 57630 }, { "epoch": 0.22282012030121692, "grad_norm": 0.11086764931678772, "learning_rate": 0.002, "loss": 2.3601, "step": 57640 }, { "epoch": 0.2228587775046002, "grad_norm": 0.1002127155661583, "learning_rate": 0.002, "loss": 2.3598, "step": 57650 }, { "epoch": 0.22289743470798348, "grad_norm": 0.11504166573286057, "learning_rate": 0.002, "loss": 2.3622, "step": 57660 }, { "epoch": 0.22293609191136676, "grad_norm": 0.10598036646842957, "learning_rate": 0.002, "loss": 2.3719, "step": 57670 }, { "epoch": 0.22297474911475004, "grad_norm": 0.11816691607236862, "learning_rate": 0.002, "loss": 2.3631, "step": 57680 }, { "epoch": 0.22301340631813332, "grad_norm": 0.10995496809482574, "learning_rate": 0.002, "loss": 2.3675, "step": 57690 }, { "epoch": 0.2230520635215166, "grad_norm": 0.10727677494287491, "learning_rate": 0.002, "loss": 2.346, "step": 57700 }, { "epoch": 0.22309072072489988, "grad_norm": 0.12346167117357254, "learning_rate": 0.002, "loss": 2.3595, "step": 57710 }, { "epoch": 0.22312937792828316, "grad_norm": 0.11119364947080612, "learning_rate": 0.002, "loss": 2.3489, "step": 57720 }, { "epoch": 0.22316803513166644, "grad_norm": 0.10107563436031342, "learning_rate": 0.002, "loss": 2.359, "step": 57730 }, { "epoch": 0.22320669233504972, "grad_norm": 0.11088485270738602, "learning_rate": 0.002, "loss": 2.3598, "step": 57740 }, { "epoch": 0.223245349538433, "grad_norm": 0.09493140131235123, "learning_rate": 0.002, "loss": 2.3569, "step": 57750 }, { "epoch": 0.22328400674181628, "grad_norm": 0.09776858985424042, "learning_rate": 0.002, "loss": 2.3429, "step": 57760 }, { "epoch": 0.22332266394519956, "grad_norm": 0.1105109453201294, "learning_rate": 0.002, "loss": 2.3593, "step": 57770 }, { "epoch": 0.22336132114858281, "grad_norm": 0.09360536187887192, "learning_rate": 0.002, "loss": 2.3697, "step": 57780 }, { "epoch": 0.2233999783519661, "grad_norm": 0.15051110088825226, "learning_rate": 0.002, "loss": 2.3551, "step": 57790 }, { "epoch": 0.22343863555534937, "grad_norm": 0.10816194117069244, "learning_rate": 0.002, "loss": 2.3824, "step": 57800 }, { "epoch": 0.22347729275873265, "grad_norm": 0.11494158208370209, "learning_rate": 0.002, "loss": 2.3561, "step": 57810 }, { "epoch": 0.22351594996211593, "grad_norm": 0.11450178176164627, "learning_rate": 0.002, "loss": 2.3692, "step": 57820 }, { "epoch": 0.22355460716549921, "grad_norm": 0.10050127655267715, "learning_rate": 0.002, "loss": 2.3687, "step": 57830 }, { "epoch": 0.2235932643688825, "grad_norm": 0.09223105013370514, "learning_rate": 0.002, "loss": 2.3704, "step": 57840 }, { "epoch": 0.22363192157226577, "grad_norm": 0.09952362626791, "learning_rate": 0.002, "loss": 2.3516, "step": 57850 }, { "epoch": 0.22367057877564905, "grad_norm": 0.11158590018749237, "learning_rate": 0.002, "loss": 2.3514, "step": 57860 }, { "epoch": 0.22370923597903233, "grad_norm": 0.24610917270183563, "learning_rate": 0.002, "loss": 2.3673, "step": 57870 }, { "epoch": 0.2237478931824156, "grad_norm": 0.10274453461170197, "learning_rate": 0.002, "loss": 2.3545, "step": 57880 }, { "epoch": 0.2237865503857989, "grad_norm": 0.1181669682264328, "learning_rate": 0.002, "loss": 2.3475, "step": 57890 }, { "epoch": 0.22382520758918217, "grad_norm": 0.11647023260593414, "learning_rate": 0.002, "loss": 2.3621, "step": 57900 }, { "epoch": 0.22386386479256545, "grad_norm": 0.10563940554857254, "learning_rate": 0.002, "loss": 2.3542, "step": 57910 }, { "epoch": 0.22390252199594873, "grad_norm": 0.13255992531776428, "learning_rate": 0.002, "loss": 2.3695, "step": 57920 }, { "epoch": 0.223941179199332, "grad_norm": 0.11975933611392975, "learning_rate": 0.002, "loss": 2.351, "step": 57930 }, { "epoch": 0.2239798364027153, "grad_norm": 0.10210419446229935, "learning_rate": 0.002, "loss": 2.3593, "step": 57940 }, { "epoch": 0.22401849360609857, "grad_norm": 0.10627969354391098, "learning_rate": 0.002, "loss": 2.3588, "step": 57950 }, { "epoch": 0.22405715080948183, "grad_norm": 0.14683599770069122, "learning_rate": 0.002, "loss": 2.3539, "step": 57960 }, { "epoch": 0.2240958080128651, "grad_norm": 0.1016928106546402, "learning_rate": 0.002, "loss": 2.3545, "step": 57970 }, { "epoch": 0.22413446521624839, "grad_norm": 0.11856039613485336, "learning_rate": 0.002, "loss": 2.3635, "step": 57980 }, { "epoch": 0.22417312241963167, "grad_norm": 0.10540778189897537, "learning_rate": 0.002, "loss": 2.3673, "step": 57990 }, { "epoch": 0.22421177962301495, "grad_norm": 0.10758113116025925, "learning_rate": 0.002, "loss": 2.3771, "step": 58000 }, { "epoch": 0.22425043682639823, "grad_norm": 0.11392021924257278, "learning_rate": 0.002, "loss": 2.3531, "step": 58010 }, { "epoch": 0.2242890940297815, "grad_norm": 0.10420487821102142, "learning_rate": 0.002, "loss": 2.3463, "step": 58020 }, { "epoch": 0.22432775123316478, "grad_norm": 0.12214615195989609, "learning_rate": 0.002, "loss": 2.3676, "step": 58030 }, { "epoch": 0.22436640843654806, "grad_norm": 0.12638214230537415, "learning_rate": 0.002, "loss": 2.3664, "step": 58040 }, { "epoch": 0.22440506563993134, "grad_norm": 0.10415442287921906, "learning_rate": 0.002, "loss": 2.3721, "step": 58050 }, { "epoch": 0.22444372284331462, "grad_norm": 0.10841263085603714, "learning_rate": 0.002, "loss": 2.3492, "step": 58060 }, { "epoch": 0.2244823800466979, "grad_norm": 0.10981647670269012, "learning_rate": 0.002, "loss": 2.3625, "step": 58070 }, { "epoch": 0.22452103725008118, "grad_norm": 0.1304844319820404, "learning_rate": 0.002, "loss": 2.3347, "step": 58080 }, { "epoch": 0.22455969445346446, "grad_norm": 0.10160935670137405, "learning_rate": 0.002, "loss": 2.3623, "step": 58090 }, { "epoch": 0.22459835165684774, "grad_norm": 0.11425960063934326, "learning_rate": 0.002, "loss": 2.3695, "step": 58100 }, { "epoch": 0.22463700886023102, "grad_norm": 0.1108141615986824, "learning_rate": 0.002, "loss": 2.3523, "step": 58110 }, { "epoch": 0.2246756660636143, "grad_norm": 0.0970022901892662, "learning_rate": 0.002, "loss": 2.3515, "step": 58120 }, { "epoch": 0.22471432326699758, "grad_norm": 0.11046090722084045, "learning_rate": 0.002, "loss": 2.3662, "step": 58130 }, { "epoch": 0.22475298047038086, "grad_norm": 0.11191964149475098, "learning_rate": 0.002, "loss": 2.3607, "step": 58140 }, { "epoch": 0.22479163767376412, "grad_norm": 0.11110240966081619, "learning_rate": 0.002, "loss": 2.3627, "step": 58150 }, { "epoch": 0.2248302948771474, "grad_norm": 0.12536807358264923, "learning_rate": 0.002, "loss": 2.3653, "step": 58160 }, { "epoch": 0.22486895208053068, "grad_norm": 0.1163516640663147, "learning_rate": 0.002, "loss": 2.3593, "step": 58170 }, { "epoch": 0.22490760928391396, "grad_norm": 0.11710215359926224, "learning_rate": 0.002, "loss": 2.371, "step": 58180 }, { "epoch": 0.22494626648729724, "grad_norm": 0.0901746153831482, "learning_rate": 0.002, "loss": 2.3321, "step": 58190 }, { "epoch": 0.22498492369068052, "grad_norm": 0.12406687438488007, "learning_rate": 0.002, "loss": 2.3623, "step": 58200 }, { "epoch": 0.2250235808940638, "grad_norm": 0.1148986741900444, "learning_rate": 0.002, "loss": 2.3474, "step": 58210 }, { "epoch": 0.22506223809744708, "grad_norm": 0.11530084162950516, "learning_rate": 0.002, "loss": 2.3523, "step": 58220 }, { "epoch": 0.22510089530083036, "grad_norm": 0.10330082476139069, "learning_rate": 0.002, "loss": 2.3543, "step": 58230 }, { "epoch": 0.22513955250421364, "grad_norm": 0.10616800934076309, "learning_rate": 0.002, "loss": 2.3818, "step": 58240 }, { "epoch": 0.22517820970759692, "grad_norm": 0.11210612207651138, "learning_rate": 0.002, "loss": 2.3627, "step": 58250 }, { "epoch": 0.2252168669109802, "grad_norm": 0.10534332692623138, "learning_rate": 0.002, "loss": 2.3579, "step": 58260 }, { "epoch": 0.22525552411436348, "grad_norm": 0.11809881031513214, "learning_rate": 0.002, "loss": 2.357, "step": 58270 }, { "epoch": 0.22529418131774676, "grad_norm": 0.11005978286266327, "learning_rate": 0.002, "loss": 2.354, "step": 58280 }, { "epoch": 0.22533283852113004, "grad_norm": 0.10193295776844025, "learning_rate": 0.002, "loss": 2.3596, "step": 58290 }, { "epoch": 0.22537149572451332, "grad_norm": 0.11820928752422333, "learning_rate": 0.002, "loss": 2.3476, "step": 58300 }, { "epoch": 0.2254101529278966, "grad_norm": 0.11508919298648834, "learning_rate": 0.002, "loss": 2.3585, "step": 58310 }, { "epoch": 0.22544881013127988, "grad_norm": 0.10235237330198288, "learning_rate": 0.002, "loss": 2.3567, "step": 58320 }, { "epoch": 0.22548746733466313, "grad_norm": 0.0997655987739563, "learning_rate": 0.002, "loss": 2.3651, "step": 58330 }, { "epoch": 0.2255261245380464, "grad_norm": 0.10590886324644089, "learning_rate": 0.002, "loss": 2.3688, "step": 58340 }, { "epoch": 0.2255647817414297, "grad_norm": 0.11804696917533875, "learning_rate": 0.002, "loss": 2.3759, "step": 58350 }, { "epoch": 0.22560343894481297, "grad_norm": 0.1085897833108902, "learning_rate": 0.002, "loss": 2.3573, "step": 58360 }, { "epoch": 0.22564209614819625, "grad_norm": 0.10946321487426758, "learning_rate": 0.002, "loss": 2.3507, "step": 58370 }, { "epoch": 0.22568075335157953, "grad_norm": 0.11689482629299164, "learning_rate": 0.002, "loss": 2.3637, "step": 58380 }, { "epoch": 0.2257194105549628, "grad_norm": 0.1097857654094696, "learning_rate": 0.002, "loss": 2.362, "step": 58390 }, { "epoch": 0.2257580677583461, "grad_norm": 0.09730439633131027, "learning_rate": 0.002, "loss": 2.345, "step": 58400 }, { "epoch": 0.22579672496172937, "grad_norm": 0.09880250692367554, "learning_rate": 0.002, "loss": 2.3602, "step": 58410 }, { "epoch": 0.22583538216511265, "grad_norm": 0.1136285811662674, "learning_rate": 0.002, "loss": 2.3632, "step": 58420 }, { "epoch": 0.22587403936849593, "grad_norm": 0.1159689649939537, "learning_rate": 0.002, "loss": 2.3725, "step": 58430 }, { "epoch": 0.2259126965718792, "grad_norm": 0.10962986201047897, "learning_rate": 0.002, "loss": 2.3477, "step": 58440 }, { "epoch": 0.2259513537752625, "grad_norm": 0.12517063319683075, "learning_rate": 0.002, "loss": 2.3518, "step": 58450 }, { "epoch": 0.22599001097864577, "grad_norm": 0.10460768640041351, "learning_rate": 0.002, "loss": 2.361, "step": 58460 }, { "epoch": 0.22602866818202905, "grad_norm": 0.1050831526517868, "learning_rate": 0.002, "loss": 2.367, "step": 58470 }, { "epoch": 0.22606732538541233, "grad_norm": 0.10631754994392395, "learning_rate": 0.002, "loss": 2.3569, "step": 58480 }, { "epoch": 0.2261059825887956, "grad_norm": 0.1022988110780716, "learning_rate": 0.002, "loss": 2.3633, "step": 58490 }, { "epoch": 0.2261446397921789, "grad_norm": 0.0885656401515007, "learning_rate": 0.002, "loss": 2.3705, "step": 58500 }, { "epoch": 0.22618329699556217, "grad_norm": 0.10334338992834091, "learning_rate": 0.002, "loss": 2.3441, "step": 58510 }, { "epoch": 0.22622195419894542, "grad_norm": 0.12360124289989471, "learning_rate": 0.002, "loss": 2.3593, "step": 58520 }, { "epoch": 0.2262606114023287, "grad_norm": 0.10091336816549301, "learning_rate": 0.002, "loss": 2.3561, "step": 58530 }, { "epoch": 0.22629926860571198, "grad_norm": 0.09060948342084885, "learning_rate": 0.002, "loss": 2.3627, "step": 58540 }, { "epoch": 0.22633792580909526, "grad_norm": 0.11909083276987076, "learning_rate": 0.002, "loss": 2.3572, "step": 58550 }, { "epoch": 0.22637658301247854, "grad_norm": 0.11162544786930084, "learning_rate": 0.002, "loss": 2.3817, "step": 58560 }, { "epoch": 0.22641524021586182, "grad_norm": 0.11788932979106903, "learning_rate": 0.002, "loss": 2.3522, "step": 58570 }, { "epoch": 0.2264538974192451, "grad_norm": 0.1028372049331665, "learning_rate": 0.002, "loss": 2.3649, "step": 58580 }, { "epoch": 0.22649255462262838, "grad_norm": 0.09843498468399048, "learning_rate": 0.002, "loss": 2.3464, "step": 58590 }, { "epoch": 0.22653121182601166, "grad_norm": 0.1286923885345459, "learning_rate": 0.002, "loss": 2.3517, "step": 58600 }, { "epoch": 0.22656986902939494, "grad_norm": 0.09629490971565247, "learning_rate": 0.002, "loss": 2.3522, "step": 58610 }, { "epoch": 0.22660852623277822, "grad_norm": 0.11018511652946472, "learning_rate": 0.002, "loss": 2.3762, "step": 58620 }, { "epoch": 0.2266471834361615, "grad_norm": 0.09881836175918579, "learning_rate": 0.002, "loss": 2.3647, "step": 58630 }, { "epoch": 0.22668584063954478, "grad_norm": 0.1260494887828827, "learning_rate": 0.002, "loss": 2.3598, "step": 58640 }, { "epoch": 0.22672449784292806, "grad_norm": 0.11758597940206528, "learning_rate": 0.002, "loss": 2.3695, "step": 58650 }, { "epoch": 0.22676315504631134, "grad_norm": 0.11921512335538864, "learning_rate": 0.002, "loss": 2.385, "step": 58660 }, { "epoch": 0.22680181224969462, "grad_norm": 0.1015913262963295, "learning_rate": 0.002, "loss": 2.3765, "step": 58670 }, { "epoch": 0.2268404694530779, "grad_norm": 0.14632849395275116, "learning_rate": 0.002, "loss": 2.3376, "step": 58680 }, { "epoch": 0.22687912665646118, "grad_norm": 0.1042994037270546, "learning_rate": 0.002, "loss": 2.3545, "step": 58690 }, { "epoch": 0.22691778385984443, "grad_norm": 0.10337947309017181, "learning_rate": 0.002, "loss": 2.3517, "step": 58700 }, { "epoch": 0.2269564410632277, "grad_norm": 0.12961353361606598, "learning_rate": 0.002, "loss": 2.3561, "step": 58710 }, { "epoch": 0.226995098266611, "grad_norm": 0.09461600333452225, "learning_rate": 0.002, "loss": 2.3746, "step": 58720 }, { "epoch": 0.22703375546999427, "grad_norm": 0.1150931864976883, "learning_rate": 0.002, "loss": 2.3603, "step": 58730 }, { "epoch": 0.22707241267337755, "grad_norm": 0.13251717388629913, "learning_rate": 0.002, "loss": 2.3503, "step": 58740 }, { "epoch": 0.22711106987676083, "grad_norm": 0.09482169896364212, "learning_rate": 0.002, "loss": 2.3637, "step": 58750 }, { "epoch": 0.2271497270801441, "grad_norm": 0.10086926817893982, "learning_rate": 0.002, "loss": 2.3603, "step": 58760 }, { "epoch": 0.2271883842835274, "grad_norm": 0.10259281098842621, "learning_rate": 0.002, "loss": 2.3653, "step": 58770 }, { "epoch": 0.22722704148691067, "grad_norm": 0.11963513493537903, "learning_rate": 0.002, "loss": 2.3511, "step": 58780 }, { "epoch": 0.22726569869029395, "grad_norm": 0.09868388622999191, "learning_rate": 0.002, "loss": 2.3528, "step": 58790 }, { "epoch": 0.22730435589367723, "grad_norm": 0.10000711679458618, "learning_rate": 0.002, "loss": 2.3543, "step": 58800 }, { "epoch": 0.2273430130970605, "grad_norm": 0.0994882881641388, "learning_rate": 0.002, "loss": 2.3506, "step": 58810 }, { "epoch": 0.2273816703004438, "grad_norm": 0.10000110417604446, "learning_rate": 0.002, "loss": 2.3529, "step": 58820 }, { "epoch": 0.22742032750382707, "grad_norm": 0.10472284257411957, "learning_rate": 0.002, "loss": 2.3635, "step": 58830 }, { "epoch": 0.22745898470721035, "grad_norm": 0.11229816824197769, "learning_rate": 0.002, "loss": 2.3706, "step": 58840 }, { "epoch": 0.22749764191059363, "grad_norm": 0.11290360987186432, "learning_rate": 0.002, "loss": 2.3477, "step": 58850 }, { "epoch": 0.2275362991139769, "grad_norm": 0.12206948548555374, "learning_rate": 0.002, "loss": 2.3631, "step": 58860 }, { "epoch": 0.2275749563173602, "grad_norm": 0.1078030988574028, "learning_rate": 0.002, "loss": 2.3487, "step": 58870 }, { "epoch": 0.22761361352074347, "grad_norm": 0.11390410363674164, "learning_rate": 0.002, "loss": 2.367, "step": 58880 }, { "epoch": 0.22765227072412672, "grad_norm": 0.11171270161867142, "learning_rate": 0.002, "loss": 2.3594, "step": 58890 }, { "epoch": 0.22769092792751, "grad_norm": 0.12419595569372177, "learning_rate": 0.002, "loss": 2.3664, "step": 58900 }, { "epoch": 0.22772958513089328, "grad_norm": 0.08958700299263, "learning_rate": 0.002, "loss": 2.3744, "step": 58910 }, { "epoch": 0.22776824233427656, "grad_norm": 0.10190758109092712, "learning_rate": 0.002, "loss": 2.3524, "step": 58920 }, { "epoch": 0.22780689953765984, "grad_norm": 0.1246049553155899, "learning_rate": 0.002, "loss": 2.3613, "step": 58930 }, { "epoch": 0.22784555674104312, "grad_norm": 0.10058507323265076, "learning_rate": 0.002, "loss": 2.3591, "step": 58940 }, { "epoch": 0.2278842139444264, "grad_norm": 0.10306686908006668, "learning_rate": 0.002, "loss": 2.3679, "step": 58950 }, { "epoch": 0.22792287114780968, "grad_norm": 0.0994425043463707, "learning_rate": 0.002, "loss": 2.3589, "step": 58960 }, { "epoch": 0.22796152835119296, "grad_norm": 0.11332330852746964, "learning_rate": 0.002, "loss": 2.353, "step": 58970 }, { "epoch": 0.22800018555457624, "grad_norm": 0.11806744337081909, "learning_rate": 0.002, "loss": 2.3699, "step": 58980 }, { "epoch": 0.22803884275795952, "grad_norm": 0.09620604664087296, "learning_rate": 0.002, "loss": 2.36, "step": 58990 }, { "epoch": 0.2280774999613428, "grad_norm": 0.10248254984617233, "learning_rate": 0.002, "loss": 2.3518, "step": 59000 }, { "epoch": 0.22811615716472608, "grad_norm": 0.1298442929983139, "learning_rate": 0.002, "loss": 2.3584, "step": 59010 }, { "epoch": 0.22815481436810936, "grad_norm": 0.10102105140686035, "learning_rate": 0.002, "loss": 2.3785, "step": 59020 }, { "epoch": 0.22819347157149264, "grad_norm": 0.13595005869865417, "learning_rate": 0.002, "loss": 2.3692, "step": 59030 }, { "epoch": 0.22823212877487592, "grad_norm": 0.10934408009052277, "learning_rate": 0.002, "loss": 2.356, "step": 59040 }, { "epoch": 0.2282707859782592, "grad_norm": 0.09380467981100082, "learning_rate": 0.002, "loss": 2.3558, "step": 59050 }, { "epoch": 0.22830944318164248, "grad_norm": 0.10101353377103806, "learning_rate": 0.002, "loss": 2.3551, "step": 59060 }, { "epoch": 0.22834810038502576, "grad_norm": 0.10406546294689178, "learning_rate": 0.002, "loss": 2.3639, "step": 59070 }, { "epoch": 0.228386757588409, "grad_norm": 0.1326468586921692, "learning_rate": 0.002, "loss": 2.369, "step": 59080 }, { "epoch": 0.2284254147917923, "grad_norm": 0.108613021671772, "learning_rate": 0.002, "loss": 2.3761, "step": 59090 }, { "epoch": 0.22846407199517557, "grad_norm": 0.10440162569284439, "learning_rate": 0.002, "loss": 2.3685, "step": 59100 }, { "epoch": 0.22850272919855885, "grad_norm": 0.11736375093460083, "learning_rate": 0.002, "loss": 2.3566, "step": 59110 }, { "epoch": 0.22854138640194213, "grad_norm": 0.10554061084985733, "learning_rate": 0.002, "loss": 2.3607, "step": 59120 }, { "epoch": 0.2285800436053254, "grad_norm": 0.0910368263721466, "learning_rate": 0.002, "loss": 2.3663, "step": 59130 }, { "epoch": 0.2286187008087087, "grad_norm": 0.09408886730670929, "learning_rate": 0.002, "loss": 2.3773, "step": 59140 }, { "epoch": 0.22865735801209197, "grad_norm": 0.11463332176208496, "learning_rate": 0.002, "loss": 2.3466, "step": 59150 }, { "epoch": 0.22869601521547525, "grad_norm": 0.11015474796295166, "learning_rate": 0.002, "loss": 2.3621, "step": 59160 }, { "epoch": 0.22873467241885853, "grad_norm": 0.11497203260660172, "learning_rate": 0.002, "loss": 2.3545, "step": 59170 }, { "epoch": 0.2287733296222418, "grad_norm": 0.10321945697069168, "learning_rate": 0.002, "loss": 2.3648, "step": 59180 }, { "epoch": 0.2288119868256251, "grad_norm": 0.11147177219390869, "learning_rate": 0.002, "loss": 2.3617, "step": 59190 }, { "epoch": 0.22885064402900837, "grad_norm": 0.11212391406297684, "learning_rate": 0.002, "loss": 2.3469, "step": 59200 }, { "epoch": 0.22888930123239165, "grad_norm": 0.10757148265838623, "learning_rate": 0.002, "loss": 2.3561, "step": 59210 }, { "epoch": 0.22892795843577493, "grad_norm": 0.11025886237621307, "learning_rate": 0.002, "loss": 2.3658, "step": 59220 }, { "epoch": 0.2289666156391582, "grad_norm": 0.10767524689435959, "learning_rate": 0.002, "loss": 2.3381, "step": 59230 }, { "epoch": 0.2290052728425415, "grad_norm": 0.1106267124414444, "learning_rate": 0.002, "loss": 2.3623, "step": 59240 }, { "epoch": 0.22904393004592477, "grad_norm": 0.11064332723617554, "learning_rate": 0.002, "loss": 2.35, "step": 59250 }, { "epoch": 0.22908258724930802, "grad_norm": 0.10452796518802643, "learning_rate": 0.002, "loss": 2.379, "step": 59260 }, { "epoch": 0.2291212444526913, "grad_norm": 0.15363597869873047, "learning_rate": 0.002, "loss": 2.374, "step": 59270 }, { "epoch": 0.22915990165607458, "grad_norm": 0.1117829754948616, "learning_rate": 0.002, "loss": 2.3523, "step": 59280 }, { "epoch": 0.22919855885945786, "grad_norm": 0.10392409563064575, "learning_rate": 0.002, "loss": 2.3683, "step": 59290 }, { "epoch": 0.22923721606284114, "grad_norm": 0.1403128057718277, "learning_rate": 0.002, "loss": 2.3563, "step": 59300 }, { "epoch": 0.22927587326622442, "grad_norm": 0.09336331486701965, "learning_rate": 0.002, "loss": 2.3723, "step": 59310 }, { "epoch": 0.2293145304696077, "grad_norm": 0.10608049482107162, "learning_rate": 0.002, "loss": 2.3563, "step": 59320 }, { "epoch": 0.22935318767299098, "grad_norm": 0.13066236674785614, "learning_rate": 0.002, "loss": 2.3635, "step": 59330 }, { "epoch": 0.22939184487637426, "grad_norm": 0.1086943969130516, "learning_rate": 0.002, "loss": 2.3553, "step": 59340 }, { "epoch": 0.22943050207975754, "grad_norm": 0.11340256035327911, "learning_rate": 0.002, "loss": 2.3744, "step": 59350 }, { "epoch": 0.22946915928314082, "grad_norm": 0.11242882907390594, "learning_rate": 0.002, "loss": 2.3644, "step": 59360 }, { "epoch": 0.2295078164865241, "grad_norm": 0.10541485995054245, "learning_rate": 0.002, "loss": 2.3571, "step": 59370 }, { "epoch": 0.22954647368990738, "grad_norm": 0.10490956902503967, "learning_rate": 0.002, "loss": 2.3611, "step": 59380 }, { "epoch": 0.22958513089329066, "grad_norm": 0.11540848761796951, "learning_rate": 0.002, "loss": 2.3681, "step": 59390 }, { "epoch": 0.22962378809667394, "grad_norm": 0.10599116235971451, "learning_rate": 0.002, "loss": 2.3715, "step": 59400 }, { "epoch": 0.22966244530005722, "grad_norm": 0.10107158869504929, "learning_rate": 0.002, "loss": 2.3511, "step": 59410 }, { "epoch": 0.2297011025034405, "grad_norm": 0.16774043440818787, "learning_rate": 0.002, "loss": 2.3619, "step": 59420 }, { "epoch": 0.22973975970682378, "grad_norm": 0.10241368412971497, "learning_rate": 0.002, "loss": 2.3468, "step": 59430 }, { "epoch": 0.22977841691020706, "grad_norm": 0.11344046890735626, "learning_rate": 0.002, "loss": 2.3666, "step": 59440 }, { "epoch": 0.22981707411359031, "grad_norm": 0.134840726852417, "learning_rate": 0.002, "loss": 2.3685, "step": 59450 }, { "epoch": 0.2298557313169736, "grad_norm": 0.11046073585748672, "learning_rate": 0.002, "loss": 2.3711, "step": 59460 }, { "epoch": 0.22989438852035687, "grad_norm": 0.09477958083152771, "learning_rate": 0.002, "loss": 2.3623, "step": 59470 }, { "epoch": 0.22993304572374015, "grad_norm": 0.1265595704317093, "learning_rate": 0.002, "loss": 2.3667, "step": 59480 }, { "epoch": 0.22997170292712343, "grad_norm": 0.10804461687803268, "learning_rate": 0.002, "loss": 2.3525, "step": 59490 }, { "epoch": 0.2300103601305067, "grad_norm": 0.11189703643321991, "learning_rate": 0.002, "loss": 2.3502, "step": 59500 }, { "epoch": 0.23004901733389, "grad_norm": 0.09515593200922012, "learning_rate": 0.002, "loss": 2.363, "step": 59510 }, { "epoch": 0.23008767453727327, "grad_norm": 0.12038534879684448, "learning_rate": 0.002, "loss": 2.3414, "step": 59520 }, { "epoch": 0.23012633174065655, "grad_norm": 0.11073701083660126, "learning_rate": 0.002, "loss": 2.3593, "step": 59530 }, { "epoch": 0.23016498894403983, "grad_norm": 0.11130673438310623, "learning_rate": 0.002, "loss": 2.3681, "step": 59540 }, { "epoch": 0.2302036461474231, "grad_norm": 0.11496109515428543, "learning_rate": 0.002, "loss": 2.3526, "step": 59550 }, { "epoch": 0.2302423033508064, "grad_norm": 0.10915815830230713, "learning_rate": 0.002, "loss": 2.3651, "step": 59560 }, { "epoch": 0.23028096055418967, "grad_norm": 0.11543036997318268, "learning_rate": 0.002, "loss": 2.3674, "step": 59570 }, { "epoch": 0.23031961775757295, "grad_norm": 0.11247259378433228, "learning_rate": 0.002, "loss": 2.3588, "step": 59580 }, { "epoch": 0.23035827496095623, "grad_norm": 0.10884750634431839, "learning_rate": 0.002, "loss": 2.3649, "step": 59590 }, { "epoch": 0.2303969321643395, "grad_norm": 0.09827539324760437, "learning_rate": 0.002, "loss": 2.3518, "step": 59600 }, { "epoch": 0.2304355893677228, "grad_norm": 0.093353271484375, "learning_rate": 0.002, "loss": 2.351, "step": 59610 }, { "epoch": 0.23047424657110607, "grad_norm": 0.11901076883077621, "learning_rate": 0.002, "loss": 2.3723, "step": 59620 }, { "epoch": 0.23051290377448933, "grad_norm": 0.09644563496112823, "learning_rate": 0.002, "loss": 2.3663, "step": 59630 }, { "epoch": 0.2305515609778726, "grad_norm": 0.09767736494541168, "learning_rate": 0.002, "loss": 2.3586, "step": 59640 }, { "epoch": 0.23059021818125588, "grad_norm": 0.13363195955753326, "learning_rate": 0.002, "loss": 2.3508, "step": 59650 }, { "epoch": 0.23062887538463916, "grad_norm": 0.1053873673081398, "learning_rate": 0.002, "loss": 2.3639, "step": 59660 }, { "epoch": 0.23066753258802244, "grad_norm": 0.10517704486846924, "learning_rate": 0.002, "loss": 2.358, "step": 59670 }, { "epoch": 0.23070618979140572, "grad_norm": 0.1103927344083786, "learning_rate": 0.002, "loss": 2.3594, "step": 59680 }, { "epoch": 0.230744846994789, "grad_norm": 0.11776106804609299, "learning_rate": 0.002, "loss": 2.3543, "step": 59690 }, { "epoch": 0.23078350419817228, "grad_norm": 0.0981423407793045, "learning_rate": 0.002, "loss": 2.3374, "step": 59700 }, { "epoch": 0.23082216140155556, "grad_norm": 0.11345241963863373, "learning_rate": 0.002, "loss": 2.3614, "step": 59710 }, { "epoch": 0.23086081860493884, "grad_norm": 0.10204170644283295, "learning_rate": 0.002, "loss": 2.343, "step": 59720 }, { "epoch": 0.23089947580832212, "grad_norm": 0.10411133617162704, "learning_rate": 0.002, "loss": 2.3695, "step": 59730 }, { "epoch": 0.2309381330117054, "grad_norm": 0.09219200909137726, "learning_rate": 0.002, "loss": 2.3576, "step": 59740 }, { "epoch": 0.23097679021508868, "grad_norm": 0.12752839922904968, "learning_rate": 0.002, "loss": 2.3573, "step": 59750 }, { "epoch": 0.23101544741847196, "grad_norm": 0.12858721613883972, "learning_rate": 0.002, "loss": 2.3635, "step": 59760 }, { "epoch": 0.23105410462185524, "grad_norm": 0.10879185795783997, "learning_rate": 0.002, "loss": 2.3633, "step": 59770 }, { "epoch": 0.23109276182523852, "grad_norm": 0.09733107686042786, "learning_rate": 0.002, "loss": 2.3513, "step": 59780 }, { "epoch": 0.2311314190286218, "grad_norm": 0.09520354866981506, "learning_rate": 0.002, "loss": 2.361, "step": 59790 }, { "epoch": 0.23117007623200508, "grad_norm": 0.11922292411327362, "learning_rate": 0.002, "loss": 2.3593, "step": 59800 }, { "epoch": 0.23120873343538836, "grad_norm": 0.10134036839008331, "learning_rate": 0.002, "loss": 2.3545, "step": 59810 }, { "epoch": 0.23124739063877162, "grad_norm": 0.12658289074897766, "learning_rate": 0.002, "loss": 2.3565, "step": 59820 }, { "epoch": 0.2312860478421549, "grad_norm": 0.10828740149736404, "learning_rate": 0.002, "loss": 2.3585, "step": 59830 }, { "epoch": 0.23132470504553818, "grad_norm": 0.10405316203832626, "learning_rate": 0.002, "loss": 2.3553, "step": 59840 }, { "epoch": 0.23136336224892146, "grad_norm": 0.12723368406295776, "learning_rate": 0.002, "loss": 2.3575, "step": 59850 }, { "epoch": 0.23140201945230474, "grad_norm": 0.11608844250440598, "learning_rate": 0.002, "loss": 2.3591, "step": 59860 }, { "epoch": 0.23144067665568802, "grad_norm": 0.09689639508724213, "learning_rate": 0.002, "loss": 2.3416, "step": 59870 }, { "epoch": 0.2314793338590713, "grad_norm": 0.10837455838918686, "learning_rate": 0.002, "loss": 2.3709, "step": 59880 }, { "epoch": 0.23151799106245458, "grad_norm": 0.1091194823384285, "learning_rate": 0.002, "loss": 2.3669, "step": 59890 }, { "epoch": 0.23155664826583786, "grad_norm": 0.10450857132673264, "learning_rate": 0.002, "loss": 2.3566, "step": 59900 }, { "epoch": 0.23159530546922114, "grad_norm": 0.11571555584669113, "learning_rate": 0.002, "loss": 2.36, "step": 59910 }, { "epoch": 0.23163396267260442, "grad_norm": 0.10868965834379196, "learning_rate": 0.002, "loss": 2.3479, "step": 59920 }, { "epoch": 0.2316726198759877, "grad_norm": 0.10212372988462448, "learning_rate": 0.002, "loss": 2.3699, "step": 59930 }, { "epoch": 0.23171127707937098, "grad_norm": 0.10903532803058624, "learning_rate": 0.002, "loss": 2.356, "step": 59940 }, { "epoch": 0.23174993428275426, "grad_norm": 0.09267522394657135, "learning_rate": 0.002, "loss": 2.3614, "step": 59950 }, { "epoch": 0.23178859148613754, "grad_norm": 0.1206112876534462, "learning_rate": 0.002, "loss": 2.3537, "step": 59960 }, { "epoch": 0.23182724868952082, "grad_norm": 0.10821081697940826, "learning_rate": 0.002, "loss": 2.3579, "step": 59970 }, { "epoch": 0.2318659058929041, "grad_norm": 0.09382860362529755, "learning_rate": 0.002, "loss": 2.3702, "step": 59980 }, { "epoch": 0.23190456309628737, "grad_norm": 0.10157258808612823, "learning_rate": 0.002, "loss": 2.3491, "step": 59990 }, { "epoch": 0.23194322029967063, "grad_norm": 0.09857197850942612, "learning_rate": 0.002, "loss": 2.3522, "step": 60000 }, { "epoch": 0.2319818775030539, "grad_norm": 0.11237531155347824, "learning_rate": 0.002, "loss": 2.3771, "step": 60010 }, { "epoch": 0.2320205347064372, "grad_norm": 0.11936473846435547, "learning_rate": 0.002, "loss": 2.3565, "step": 60020 }, { "epoch": 0.23205919190982047, "grad_norm": 0.10506843775510788, "learning_rate": 0.002, "loss": 2.3594, "step": 60030 }, { "epoch": 0.23209784911320375, "grad_norm": 0.1060006245970726, "learning_rate": 0.002, "loss": 2.3693, "step": 60040 }, { "epoch": 0.23213650631658703, "grad_norm": 0.10951527208089828, "learning_rate": 0.002, "loss": 2.3626, "step": 60050 }, { "epoch": 0.2321751635199703, "grad_norm": 0.09036508202552795, "learning_rate": 0.002, "loss": 2.3525, "step": 60060 }, { "epoch": 0.2322138207233536, "grad_norm": 0.09523717314004898, "learning_rate": 0.002, "loss": 2.3744, "step": 60070 }, { "epoch": 0.23225247792673687, "grad_norm": 0.11686524003744125, "learning_rate": 0.002, "loss": 2.3468, "step": 60080 }, { "epoch": 0.23229113513012015, "grad_norm": 0.10026539862155914, "learning_rate": 0.002, "loss": 2.3589, "step": 60090 }, { "epoch": 0.23232979233350343, "grad_norm": 0.1290268748998642, "learning_rate": 0.002, "loss": 2.3551, "step": 60100 }, { "epoch": 0.2323684495368867, "grad_norm": 0.10675327479839325, "learning_rate": 0.002, "loss": 2.3494, "step": 60110 }, { "epoch": 0.23240710674027, "grad_norm": 0.1130603775382042, "learning_rate": 0.002, "loss": 2.3701, "step": 60120 }, { "epoch": 0.23244576394365327, "grad_norm": 0.0931505411863327, "learning_rate": 0.002, "loss": 2.3537, "step": 60130 }, { "epoch": 0.23248442114703655, "grad_norm": 0.11370448768138885, "learning_rate": 0.002, "loss": 2.343, "step": 60140 }, { "epoch": 0.23252307835041983, "grad_norm": 0.13120517134666443, "learning_rate": 0.002, "loss": 2.3588, "step": 60150 }, { "epoch": 0.2325617355538031, "grad_norm": 0.1023140400648117, "learning_rate": 0.002, "loss": 2.3649, "step": 60160 }, { "epoch": 0.23260039275718639, "grad_norm": 0.09642255306243896, "learning_rate": 0.002, "loss": 2.3675, "step": 60170 }, { "epoch": 0.23263904996056967, "grad_norm": 0.11504430323839188, "learning_rate": 0.002, "loss": 2.3571, "step": 60180 }, { "epoch": 0.23267770716395292, "grad_norm": 0.10445452481508255, "learning_rate": 0.002, "loss": 2.3561, "step": 60190 }, { "epoch": 0.2327163643673362, "grad_norm": 0.10418809205293655, "learning_rate": 0.002, "loss": 2.3637, "step": 60200 }, { "epoch": 0.23275502157071948, "grad_norm": 0.11320661008358002, "learning_rate": 0.002, "loss": 2.3586, "step": 60210 }, { "epoch": 0.23279367877410276, "grad_norm": 0.10116437077522278, "learning_rate": 0.002, "loss": 2.3611, "step": 60220 }, { "epoch": 0.23283233597748604, "grad_norm": 0.11768436431884766, "learning_rate": 0.002, "loss": 2.3589, "step": 60230 }, { "epoch": 0.23287099318086932, "grad_norm": 0.10723967850208282, "learning_rate": 0.002, "loss": 2.3545, "step": 60240 }, { "epoch": 0.2329096503842526, "grad_norm": 0.1054241880774498, "learning_rate": 0.002, "loss": 2.3649, "step": 60250 }, { "epoch": 0.23294830758763588, "grad_norm": 0.10430363565683365, "learning_rate": 0.002, "loss": 2.351, "step": 60260 }, { "epoch": 0.23298696479101916, "grad_norm": 0.12296608835458755, "learning_rate": 0.002, "loss": 2.3636, "step": 60270 }, { "epoch": 0.23302562199440244, "grad_norm": 0.12378449738025665, "learning_rate": 0.002, "loss": 2.3606, "step": 60280 }, { "epoch": 0.23306427919778572, "grad_norm": 0.10923901200294495, "learning_rate": 0.002, "loss": 2.3583, "step": 60290 }, { "epoch": 0.233102936401169, "grad_norm": 0.10164798051118851, "learning_rate": 0.002, "loss": 2.3615, "step": 60300 }, { "epoch": 0.23314159360455228, "grad_norm": 0.1040244922041893, "learning_rate": 0.002, "loss": 2.3578, "step": 60310 }, { "epoch": 0.23318025080793556, "grad_norm": 0.11190349608659744, "learning_rate": 0.002, "loss": 2.3701, "step": 60320 }, { "epoch": 0.23321890801131884, "grad_norm": 0.09140828996896744, "learning_rate": 0.002, "loss": 2.3681, "step": 60330 }, { "epoch": 0.23325756521470212, "grad_norm": 0.10454193502664566, "learning_rate": 0.002, "loss": 2.3578, "step": 60340 }, { "epoch": 0.2332962224180854, "grad_norm": 0.11909783631563187, "learning_rate": 0.002, "loss": 2.3633, "step": 60350 }, { "epoch": 0.23333487962146868, "grad_norm": 0.09467803686857224, "learning_rate": 0.002, "loss": 2.3407, "step": 60360 }, { "epoch": 0.23337353682485193, "grad_norm": 0.10158351808786392, "learning_rate": 0.002, "loss": 2.3529, "step": 60370 }, { "epoch": 0.2334121940282352, "grad_norm": 0.09694387018680573, "learning_rate": 0.002, "loss": 2.3501, "step": 60380 }, { "epoch": 0.2334508512316185, "grad_norm": 0.09731967002153397, "learning_rate": 0.002, "loss": 2.3517, "step": 60390 }, { "epoch": 0.23348950843500177, "grad_norm": 0.11340931057929993, "learning_rate": 0.002, "loss": 2.3556, "step": 60400 }, { "epoch": 0.23352816563838505, "grad_norm": 0.10410448163747787, "learning_rate": 0.002, "loss": 2.3616, "step": 60410 }, { "epoch": 0.23356682284176833, "grad_norm": 0.10245625674724579, "learning_rate": 0.002, "loss": 2.343, "step": 60420 }, { "epoch": 0.2336054800451516, "grad_norm": 0.10402455925941467, "learning_rate": 0.002, "loss": 2.3522, "step": 60430 }, { "epoch": 0.2336441372485349, "grad_norm": 0.10691343247890472, "learning_rate": 0.002, "loss": 2.3474, "step": 60440 }, { "epoch": 0.23368279445191817, "grad_norm": 0.12020547688007355, "learning_rate": 0.002, "loss": 2.3564, "step": 60450 }, { "epoch": 0.23372145165530145, "grad_norm": 0.12145112454891205, "learning_rate": 0.002, "loss": 2.3544, "step": 60460 }, { "epoch": 0.23376010885868473, "grad_norm": 0.10276582092046738, "learning_rate": 0.002, "loss": 2.3521, "step": 60470 }, { "epoch": 0.233798766062068, "grad_norm": 0.09935696423053741, "learning_rate": 0.002, "loss": 2.3387, "step": 60480 }, { "epoch": 0.2338374232654513, "grad_norm": 0.10785102844238281, "learning_rate": 0.002, "loss": 2.3585, "step": 60490 }, { "epoch": 0.23387608046883457, "grad_norm": 0.1112961694598198, "learning_rate": 0.002, "loss": 2.3533, "step": 60500 }, { "epoch": 0.23391473767221785, "grad_norm": 0.09906793385744095, "learning_rate": 0.002, "loss": 2.3584, "step": 60510 }, { "epoch": 0.23395339487560113, "grad_norm": 0.12156188488006592, "learning_rate": 0.002, "loss": 2.3783, "step": 60520 }, { "epoch": 0.2339920520789844, "grad_norm": 0.12288404256105423, "learning_rate": 0.002, "loss": 2.3631, "step": 60530 }, { "epoch": 0.2340307092823677, "grad_norm": 0.1141674593091011, "learning_rate": 0.002, "loss": 2.3623, "step": 60540 }, { "epoch": 0.23406936648575097, "grad_norm": 0.11037001758813858, "learning_rate": 0.002, "loss": 2.3719, "step": 60550 }, { "epoch": 0.23410802368913422, "grad_norm": 0.12164504826068878, "learning_rate": 0.002, "loss": 2.3598, "step": 60560 }, { "epoch": 0.2341466808925175, "grad_norm": 0.11643952131271362, "learning_rate": 0.002, "loss": 2.3533, "step": 60570 }, { "epoch": 0.23418533809590078, "grad_norm": 0.09684377908706665, "learning_rate": 0.002, "loss": 2.3634, "step": 60580 }, { "epoch": 0.23422399529928406, "grad_norm": 0.09952197968959808, "learning_rate": 0.002, "loss": 2.3577, "step": 60590 }, { "epoch": 0.23426265250266734, "grad_norm": 0.10011433064937592, "learning_rate": 0.002, "loss": 2.3545, "step": 60600 }, { "epoch": 0.23430130970605062, "grad_norm": 0.11262384802103043, "learning_rate": 0.002, "loss": 2.3478, "step": 60610 }, { "epoch": 0.2343399669094339, "grad_norm": 0.10437977313995361, "learning_rate": 0.002, "loss": 2.364, "step": 60620 }, { "epoch": 0.23437862411281718, "grad_norm": 0.08822324872016907, "learning_rate": 0.002, "loss": 2.3505, "step": 60630 }, { "epoch": 0.23441728131620046, "grad_norm": 0.10701391100883484, "learning_rate": 0.002, "loss": 2.3411, "step": 60640 }, { "epoch": 0.23445593851958374, "grad_norm": 0.10735175013542175, "learning_rate": 0.002, "loss": 2.3705, "step": 60650 }, { "epoch": 0.23449459572296702, "grad_norm": 0.11642715334892273, "learning_rate": 0.002, "loss": 2.3649, "step": 60660 }, { "epoch": 0.2345332529263503, "grad_norm": 0.13378360867500305, "learning_rate": 0.002, "loss": 2.3519, "step": 60670 }, { "epoch": 0.23457191012973358, "grad_norm": 0.10131211578845978, "learning_rate": 0.002, "loss": 2.3591, "step": 60680 }, { "epoch": 0.23461056733311686, "grad_norm": 0.10409022122621536, "learning_rate": 0.002, "loss": 2.3638, "step": 60690 }, { "epoch": 0.23464922453650014, "grad_norm": 0.10755995661020279, "learning_rate": 0.002, "loss": 2.3539, "step": 60700 }, { "epoch": 0.23468788173988342, "grad_norm": 0.08830675482749939, "learning_rate": 0.002, "loss": 2.3685, "step": 60710 }, { "epoch": 0.2347265389432667, "grad_norm": 0.10361380130052567, "learning_rate": 0.002, "loss": 2.3612, "step": 60720 }, { "epoch": 0.23476519614664998, "grad_norm": 0.09997949749231339, "learning_rate": 0.002, "loss": 2.3426, "step": 60730 }, { "epoch": 0.23480385335003323, "grad_norm": 0.1282014101743698, "learning_rate": 0.002, "loss": 2.377, "step": 60740 }, { "epoch": 0.2348425105534165, "grad_norm": 0.10668148845434189, "learning_rate": 0.002, "loss": 2.3567, "step": 60750 }, { "epoch": 0.2348811677567998, "grad_norm": 0.09716004133224487, "learning_rate": 0.002, "loss": 2.3658, "step": 60760 }, { "epoch": 0.23491982496018307, "grad_norm": 0.11875343322753906, "learning_rate": 0.002, "loss": 2.369, "step": 60770 }, { "epoch": 0.23495848216356635, "grad_norm": 0.10365365445613861, "learning_rate": 0.002, "loss": 2.3591, "step": 60780 }, { "epoch": 0.23499713936694963, "grad_norm": 0.09198535978794098, "learning_rate": 0.002, "loss": 2.3596, "step": 60790 }, { "epoch": 0.2350357965703329, "grad_norm": 0.10645847767591476, "learning_rate": 0.002, "loss": 2.3622, "step": 60800 }, { "epoch": 0.2350744537737162, "grad_norm": 0.10718676447868347, "learning_rate": 0.002, "loss": 2.3601, "step": 60810 }, { "epoch": 0.23511311097709947, "grad_norm": 0.116599440574646, "learning_rate": 0.002, "loss": 2.3561, "step": 60820 }, { "epoch": 0.23515176818048275, "grad_norm": 0.10643000900745392, "learning_rate": 0.002, "loss": 2.3443, "step": 60830 }, { "epoch": 0.23519042538386603, "grad_norm": 0.11804504692554474, "learning_rate": 0.002, "loss": 2.3438, "step": 60840 }, { "epoch": 0.2352290825872493, "grad_norm": 0.11182983964681625, "learning_rate": 0.002, "loss": 2.3567, "step": 60850 }, { "epoch": 0.2352677397906326, "grad_norm": 0.1037188172340393, "learning_rate": 0.002, "loss": 2.3603, "step": 60860 }, { "epoch": 0.23530639699401587, "grad_norm": 0.12416130304336548, "learning_rate": 0.002, "loss": 2.3582, "step": 60870 }, { "epoch": 0.23534505419739915, "grad_norm": 0.11619950830936432, "learning_rate": 0.002, "loss": 2.3507, "step": 60880 }, { "epoch": 0.23538371140078243, "grad_norm": 0.13519085943698883, "learning_rate": 0.002, "loss": 2.3523, "step": 60890 }, { "epoch": 0.2354223686041657, "grad_norm": 0.10871560871601105, "learning_rate": 0.002, "loss": 2.3672, "step": 60900 }, { "epoch": 0.235461025807549, "grad_norm": 0.18231026828289032, "learning_rate": 0.002, "loss": 2.3496, "step": 60910 }, { "epoch": 0.23549968301093227, "grad_norm": 0.15642951428890228, "learning_rate": 0.002, "loss": 2.3534, "step": 60920 }, { "epoch": 0.23553834021431552, "grad_norm": 0.10259267687797546, "learning_rate": 0.002, "loss": 2.3633, "step": 60930 }, { "epoch": 0.2355769974176988, "grad_norm": 0.10545379668474197, "learning_rate": 0.002, "loss": 2.357, "step": 60940 }, { "epoch": 0.23561565462108208, "grad_norm": 0.08988619595766068, "learning_rate": 0.002, "loss": 2.3514, "step": 60950 }, { "epoch": 0.23565431182446536, "grad_norm": 0.11802471429109573, "learning_rate": 0.002, "loss": 2.3558, "step": 60960 }, { "epoch": 0.23569296902784864, "grad_norm": 0.27038830518722534, "learning_rate": 0.002, "loss": 2.3602, "step": 60970 }, { "epoch": 0.23573162623123192, "grad_norm": 0.10791066288948059, "learning_rate": 0.002, "loss": 2.3612, "step": 60980 }, { "epoch": 0.2357702834346152, "grad_norm": 0.13653592765331268, "learning_rate": 0.002, "loss": 2.3686, "step": 60990 }, { "epoch": 0.23580894063799848, "grad_norm": 0.10227449238300323, "learning_rate": 0.002, "loss": 2.3653, "step": 61000 }, { "epoch": 0.23584759784138176, "grad_norm": 0.10759121924638748, "learning_rate": 0.002, "loss": 2.3579, "step": 61010 }, { "epoch": 0.23588625504476504, "grad_norm": 0.09532109647989273, "learning_rate": 0.002, "loss": 2.3484, "step": 61020 }, { "epoch": 0.23592491224814832, "grad_norm": 0.09882092475891113, "learning_rate": 0.002, "loss": 2.3634, "step": 61030 }, { "epoch": 0.2359635694515316, "grad_norm": 0.1165454238653183, "learning_rate": 0.002, "loss": 2.3457, "step": 61040 }, { "epoch": 0.23600222665491488, "grad_norm": 0.09766757488250732, "learning_rate": 0.002, "loss": 2.3573, "step": 61050 }, { "epoch": 0.23604088385829816, "grad_norm": 0.09498842805624008, "learning_rate": 0.002, "loss": 2.3502, "step": 61060 }, { "epoch": 0.23607954106168144, "grad_norm": 0.1085570901632309, "learning_rate": 0.002, "loss": 2.3579, "step": 61070 }, { "epoch": 0.23611819826506472, "grad_norm": 0.09749253839254379, "learning_rate": 0.002, "loss": 2.3593, "step": 61080 }, { "epoch": 0.236156855468448, "grad_norm": 0.09708741307258606, "learning_rate": 0.002, "loss": 2.363, "step": 61090 }, { "epoch": 0.23619551267183128, "grad_norm": 0.10069181025028229, "learning_rate": 0.002, "loss": 2.3839, "step": 61100 }, { "epoch": 0.23623416987521456, "grad_norm": 0.15146248042583466, "learning_rate": 0.002, "loss": 2.348, "step": 61110 }, { "epoch": 0.2362728270785978, "grad_norm": 0.11124181747436523, "learning_rate": 0.002, "loss": 2.3564, "step": 61120 }, { "epoch": 0.2363114842819811, "grad_norm": 0.09074581414461136, "learning_rate": 0.002, "loss": 2.3594, "step": 61130 }, { "epoch": 0.23635014148536437, "grad_norm": 0.09841548651456833, "learning_rate": 0.002, "loss": 2.3411, "step": 61140 }, { "epoch": 0.23638879868874765, "grad_norm": 0.11487787216901779, "learning_rate": 0.002, "loss": 2.3546, "step": 61150 }, { "epoch": 0.23642745589213093, "grad_norm": 0.10396473854780197, "learning_rate": 0.002, "loss": 2.3484, "step": 61160 }, { "epoch": 0.2364661130955142, "grad_norm": 0.11128471791744232, "learning_rate": 0.002, "loss": 2.3537, "step": 61170 }, { "epoch": 0.2365047702988975, "grad_norm": 0.12065692991018295, "learning_rate": 0.002, "loss": 2.3643, "step": 61180 }, { "epoch": 0.23654342750228077, "grad_norm": 0.10189115256071091, "learning_rate": 0.002, "loss": 2.3528, "step": 61190 }, { "epoch": 0.23658208470566405, "grad_norm": 0.1232014149427414, "learning_rate": 0.002, "loss": 2.3641, "step": 61200 }, { "epoch": 0.23662074190904733, "grad_norm": 0.11311760544776917, "learning_rate": 0.002, "loss": 2.3376, "step": 61210 }, { "epoch": 0.2366593991124306, "grad_norm": 0.10827051103115082, "learning_rate": 0.002, "loss": 2.3612, "step": 61220 }, { "epoch": 0.2366980563158139, "grad_norm": 0.12067557126283646, "learning_rate": 0.002, "loss": 2.3571, "step": 61230 }, { "epoch": 0.23673671351919717, "grad_norm": 0.11931245774030685, "learning_rate": 0.002, "loss": 2.3515, "step": 61240 }, { "epoch": 0.23677537072258045, "grad_norm": 0.10347548872232437, "learning_rate": 0.002, "loss": 2.3758, "step": 61250 }, { "epoch": 0.23681402792596373, "grad_norm": 0.12837807834148407, "learning_rate": 0.002, "loss": 2.3514, "step": 61260 }, { "epoch": 0.236852685129347, "grad_norm": 0.10006406903266907, "learning_rate": 0.002, "loss": 2.3616, "step": 61270 }, { "epoch": 0.2368913423327303, "grad_norm": 0.10347016155719757, "learning_rate": 0.002, "loss": 2.3587, "step": 61280 }, { "epoch": 0.23692999953611357, "grad_norm": 0.14867345988750458, "learning_rate": 0.002, "loss": 2.3501, "step": 61290 }, { "epoch": 0.23696865673949682, "grad_norm": 0.1082024946808815, "learning_rate": 0.002, "loss": 2.3669, "step": 61300 }, { "epoch": 0.2370073139428801, "grad_norm": 0.10785643011331558, "learning_rate": 0.002, "loss": 2.3561, "step": 61310 }, { "epoch": 0.23704597114626338, "grad_norm": 0.10544189065694809, "learning_rate": 0.002, "loss": 2.366, "step": 61320 }, { "epoch": 0.23708462834964666, "grad_norm": 0.11011648178100586, "learning_rate": 0.002, "loss": 2.3539, "step": 61330 }, { "epoch": 0.23712328555302994, "grad_norm": 0.09242957830429077, "learning_rate": 0.002, "loss": 2.363, "step": 61340 }, { "epoch": 0.23716194275641322, "grad_norm": 0.10205409675836563, "learning_rate": 0.002, "loss": 2.3541, "step": 61350 }, { "epoch": 0.2372005999597965, "grad_norm": 0.11633102595806122, "learning_rate": 0.002, "loss": 2.3531, "step": 61360 }, { "epoch": 0.23723925716317978, "grad_norm": 0.11982908844947815, "learning_rate": 0.002, "loss": 2.3619, "step": 61370 }, { "epoch": 0.23727791436656306, "grad_norm": 0.09505453705787659, "learning_rate": 0.002, "loss": 2.3596, "step": 61380 }, { "epoch": 0.23731657156994634, "grad_norm": 0.1351066529750824, "learning_rate": 0.002, "loss": 2.3547, "step": 61390 }, { "epoch": 0.23735522877332962, "grad_norm": 0.10480768233537674, "learning_rate": 0.002, "loss": 2.3665, "step": 61400 }, { "epoch": 0.2373938859767129, "grad_norm": 0.10703454911708832, "learning_rate": 0.002, "loss": 2.3551, "step": 61410 }, { "epoch": 0.23743254318009618, "grad_norm": 0.11127861589193344, "learning_rate": 0.002, "loss": 2.3608, "step": 61420 }, { "epoch": 0.23747120038347946, "grad_norm": 0.1108800619840622, "learning_rate": 0.002, "loss": 2.3623, "step": 61430 }, { "epoch": 0.23750985758686274, "grad_norm": 0.12055522203445435, "learning_rate": 0.002, "loss": 2.3434, "step": 61440 }, { "epoch": 0.23754851479024602, "grad_norm": 0.11530635505914688, "learning_rate": 0.002, "loss": 2.3591, "step": 61450 }, { "epoch": 0.2375871719936293, "grad_norm": 0.13346411287784576, "learning_rate": 0.002, "loss": 2.3579, "step": 61460 }, { "epoch": 0.23762582919701258, "grad_norm": 0.11563769727945328, "learning_rate": 0.002, "loss": 2.3716, "step": 61470 }, { "epoch": 0.23766448640039586, "grad_norm": 0.12334870547056198, "learning_rate": 0.002, "loss": 2.3576, "step": 61480 }, { "epoch": 0.23770314360377912, "grad_norm": 0.4291572868824005, "learning_rate": 0.002, "loss": 2.3704, "step": 61490 }, { "epoch": 0.2377418008071624, "grad_norm": 0.129678413271904, "learning_rate": 0.002, "loss": 2.3524, "step": 61500 }, { "epoch": 0.23778045801054568, "grad_norm": 0.10555509477853775, "learning_rate": 0.002, "loss": 2.3611, "step": 61510 }, { "epoch": 0.23781911521392896, "grad_norm": 0.11403724551200867, "learning_rate": 0.002, "loss": 2.3556, "step": 61520 }, { "epoch": 0.23785777241731224, "grad_norm": 0.08737763017416, "learning_rate": 0.002, "loss": 2.3557, "step": 61530 }, { "epoch": 0.23789642962069552, "grad_norm": 0.10799986124038696, "learning_rate": 0.002, "loss": 2.3499, "step": 61540 }, { "epoch": 0.2379350868240788, "grad_norm": 0.11616066843271255, "learning_rate": 0.002, "loss": 2.3533, "step": 61550 }, { "epoch": 0.23797374402746208, "grad_norm": 0.095784492790699, "learning_rate": 0.002, "loss": 2.345, "step": 61560 }, { "epoch": 0.23801240123084536, "grad_norm": 0.09789443761110306, "learning_rate": 0.002, "loss": 2.368, "step": 61570 }, { "epoch": 0.23805105843422864, "grad_norm": 0.10828810930252075, "learning_rate": 0.002, "loss": 2.3575, "step": 61580 }, { "epoch": 0.23808971563761192, "grad_norm": 0.10432812571525574, "learning_rate": 0.002, "loss": 2.3466, "step": 61590 }, { "epoch": 0.2381283728409952, "grad_norm": 0.10405416041612625, "learning_rate": 0.002, "loss": 2.366, "step": 61600 }, { "epoch": 0.23816703004437847, "grad_norm": 0.0994420126080513, "learning_rate": 0.002, "loss": 2.3498, "step": 61610 }, { "epoch": 0.23820568724776175, "grad_norm": 0.11040692776441574, "learning_rate": 0.002, "loss": 2.3564, "step": 61620 }, { "epoch": 0.23824434445114503, "grad_norm": 0.11844494193792343, "learning_rate": 0.002, "loss": 2.3419, "step": 61630 }, { "epoch": 0.23828300165452831, "grad_norm": 0.10033336281776428, "learning_rate": 0.002, "loss": 2.368, "step": 61640 }, { "epoch": 0.2383216588579116, "grad_norm": 0.10760051757097244, "learning_rate": 0.002, "loss": 2.364, "step": 61650 }, { "epoch": 0.23836031606129487, "grad_norm": 0.10519957542419434, "learning_rate": 0.002, "loss": 2.3665, "step": 61660 }, { "epoch": 0.23839897326467813, "grad_norm": 0.0907941609621048, "learning_rate": 0.002, "loss": 2.3598, "step": 61670 }, { "epoch": 0.2384376304680614, "grad_norm": 0.11044026911258698, "learning_rate": 0.002, "loss": 2.3571, "step": 61680 }, { "epoch": 0.2384762876714447, "grad_norm": 0.12676018476486206, "learning_rate": 0.002, "loss": 2.3706, "step": 61690 }, { "epoch": 0.23851494487482797, "grad_norm": 0.11517304182052612, "learning_rate": 0.002, "loss": 2.3436, "step": 61700 }, { "epoch": 0.23855360207821125, "grad_norm": 0.1039179265499115, "learning_rate": 0.002, "loss": 2.3744, "step": 61710 }, { "epoch": 0.23859225928159453, "grad_norm": 0.1346934586763382, "learning_rate": 0.002, "loss": 2.3493, "step": 61720 }, { "epoch": 0.2386309164849778, "grad_norm": 0.10842429846525192, "learning_rate": 0.002, "loss": 2.3638, "step": 61730 }, { "epoch": 0.2386695736883611, "grad_norm": 0.11558246612548828, "learning_rate": 0.002, "loss": 2.3528, "step": 61740 }, { "epoch": 0.23870823089174437, "grad_norm": 0.10950513184070587, "learning_rate": 0.002, "loss": 2.342, "step": 61750 }, { "epoch": 0.23874688809512765, "grad_norm": 0.11494038254022598, "learning_rate": 0.002, "loss": 2.3512, "step": 61760 }, { "epoch": 0.23878554529851093, "grad_norm": 0.1149371787905693, "learning_rate": 0.002, "loss": 2.3664, "step": 61770 }, { "epoch": 0.2388242025018942, "grad_norm": 0.10944673418998718, "learning_rate": 0.002, "loss": 2.3637, "step": 61780 }, { "epoch": 0.23886285970527749, "grad_norm": 0.09396765381097794, "learning_rate": 0.002, "loss": 2.3468, "step": 61790 }, { "epoch": 0.23890151690866077, "grad_norm": 0.09576135128736496, "learning_rate": 0.002, "loss": 2.368, "step": 61800 }, { "epoch": 0.23894017411204405, "grad_norm": 0.10625293105840683, "learning_rate": 0.002, "loss": 2.3506, "step": 61810 }, { "epoch": 0.23897883131542733, "grad_norm": 0.09761402755975723, "learning_rate": 0.002, "loss": 2.3806, "step": 61820 }, { "epoch": 0.2390174885188106, "grad_norm": 0.1327749788761139, "learning_rate": 0.002, "loss": 2.3585, "step": 61830 }, { "epoch": 0.23905614572219389, "grad_norm": 0.10481736809015274, "learning_rate": 0.002, "loss": 2.3744, "step": 61840 }, { "epoch": 0.23909480292557717, "grad_norm": 0.11769228428602219, "learning_rate": 0.002, "loss": 2.3595, "step": 61850 }, { "epoch": 0.23913346012896042, "grad_norm": 0.1069621592760086, "learning_rate": 0.002, "loss": 2.3541, "step": 61860 }, { "epoch": 0.2391721173323437, "grad_norm": 0.11873393505811691, "learning_rate": 0.002, "loss": 2.3533, "step": 61870 }, { "epoch": 0.23921077453572698, "grad_norm": 0.10264294594526291, "learning_rate": 0.002, "loss": 2.3684, "step": 61880 }, { "epoch": 0.23924943173911026, "grad_norm": 0.11171815544366837, "learning_rate": 0.002, "loss": 2.35, "step": 61890 }, { "epoch": 0.23928808894249354, "grad_norm": 0.09807415306568146, "learning_rate": 0.002, "loss": 2.357, "step": 61900 }, { "epoch": 0.23932674614587682, "grad_norm": 0.10718560963869095, "learning_rate": 0.002, "loss": 2.3678, "step": 61910 }, { "epoch": 0.2393654033492601, "grad_norm": 0.09984877705574036, "learning_rate": 0.002, "loss": 2.3527, "step": 61920 }, { "epoch": 0.23940406055264338, "grad_norm": 0.11579443514347076, "learning_rate": 0.002, "loss": 2.3461, "step": 61930 }, { "epoch": 0.23944271775602666, "grad_norm": 0.09079534560441971, "learning_rate": 0.002, "loss": 2.3685, "step": 61940 }, { "epoch": 0.23948137495940994, "grad_norm": 0.13144853711128235, "learning_rate": 0.002, "loss": 2.3569, "step": 61950 }, { "epoch": 0.23952003216279322, "grad_norm": 0.11411819607019424, "learning_rate": 0.002, "loss": 2.3494, "step": 61960 }, { "epoch": 0.2395586893661765, "grad_norm": 0.10079851001501083, "learning_rate": 0.002, "loss": 2.3434, "step": 61970 }, { "epoch": 0.23959734656955978, "grad_norm": 0.10619625449180603, "learning_rate": 0.002, "loss": 2.3709, "step": 61980 }, { "epoch": 0.23963600377294306, "grad_norm": 0.10123034566640854, "learning_rate": 0.002, "loss": 2.3608, "step": 61990 }, { "epoch": 0.23967466097632634, "grad_norm": 0.10666295140981674, "learning_rate": 0.002, "loss": 2.3637, "step": 62000 }, { "epoch": 0.23971331817970962, "grad_norm": 0.10037509351968765, "learning_rate": 0.002, "loss": 2.355, "step": 62010 }, { "epoch": 0.2397519753830929, "grad_norm": 0.11558493226766586, "learning_rate": 0.002, "loss": 2.3447, "step": 62020 }, { "epoch": 0.23979063258647618, "grad_norm": 0.23745326697826385, "learning_rate": 0.002, "loss": 2.3805, "step": 62030 }, { "epoch": 0.23982928978985943, "grad_norm": 0.10613018274307251, "learning_rate": 0.002, "loss": 2.3781, "step": 62040 }, { "epoch": 0.2398679469932427, "grad_norm": 0.09879384934902191, "learning_rate": 0.002, "loss": 2.3567, "step": 62050 }, { "epoch": 0.239906604196626, "grad_norm": 0.09674771875143051, "learning_rate": 0.002, "loss": 2.3511, "step": 62060 }, { "epoch": 0.23994526140000927, "grad_norm": 0.12776194512844086, "learning_rate": 0.002, "loss": 2.3498, "step": 62070 }, { "epoch": 0.23998391860339255, "grad_norm": 0.105409637093544, "learning_rate": 0.002, "loss": 2.3606, "step": 62080 }, { "epoch": 0.24002257580677583, "grad_norm": 0.11625168472528458, "learning_rate": 0.002, "loss": 2.3556, "step": 62090 }, { "epoch": 0.2400612330101591, "grad_norm": 0.10988224297761917, "learning_rate": 0.002, "loss": 2.3476, "step": 62100 }, { "epoch": 0.2400998902135424, "grad_norm": 0.10473316162824631, "learning_rate": 0.002, "loss": 2.3571, "step": 62110 }, { "epoch": 0.24013854741692567, "grad_norm": 0.1141149252653122, "learning_rate": 0.002, "loss": 2.3536, "step": 62120 }, { "epoch": 0.24017720462030895, "grad_norm": 0.1093212142586708, "learning_rate": 0.002, "loss": 2.3509, "step": 62130 }, { "epoch": 0.24021586182369223, "grad_norm": 0.10351050645112991, "learning_rate": 0.002, "loss": 2.3695, "step": 62140 }, { "epoch": 0.2402545190270755, "grad_norm": 0.11109581589698792, "learning_rate": 0.002, "loss": 2.3495, "step": 62150 }, { "epoch": 0.2402931762304588, "grad_norm": 0.11361625045537949, "learning_rate": 0.002, "loss": 2.3538, "step": 62160 }, { "epoch": 0.24033183343384207, "grad_norm": 0.1043824851512909, "learning_rate": 0.002, "loss": 2.3678, "step": 62170 }, { "epoch": 0.24037049063722535, "grad_norm": 0.11515588313341141, "learning_rate": 0.002, "loss": 2.3564, "step": 62180 }, { "epoch": 0.24040914784060863, "grad_norm": 0.13307060301303864, "learning_rate": 0.002, "loss": 2.357, "step": 62190 }, { "epoch": 0.2404478050439919, "grad_norm": 0.10510449856519699, "learning_rate": 0.002, "loss": 2.344, "step": 62200 }, { "epoch": 0.2404864622473752, "grad_norm": 0.11014141887426376, "learning_rate": 0.002, "loss": 2.3651, "step": 62210 }, { "epoch": 0.24052511945075847, "grad_norm": 0.10206637531518936, "learning_rate": 0.002, "loss": 2.3637, "step": 62220 }, { "epoch": 0.24056377665414172, "grad_norm": 0.10467716306447983, "learning_rate": 0.002, "loss": 2.3492, "step": 62230 }, { "epoch": 0.240602433857525, "grad_norm": 0.13094080984592438, "learning_rate": 0.002, "loss": 2.3506, "step": 62240 }, { "epoch": 0.24064109106090828, "grad_norm": 0.12012360990047455, "learning_rate": 0.002, "loss": 2.3642, "step": 62250 }, { "epoch": 0.24067974826429156, "grad_norm": 0.1046094223856926, "learning_rate": 0.002, "loss": 2.3613, "step": 62260 }, { "epoch": 0.24071840546767484, "grad_norm": 0.11625311523675919, "learning_rate": 0.002, "loss": 2.3577, "step": 62270 }, { "epoch": 0.24075706267105812, "grad_norm": 0.11781848222017288, "learning_rate": 0.002, "loss": 2.3682, "step": 62280 }, { "epoch": 0.2407957198744414, "grad_norm": 0.10195542126893997, "learning_rate": 0.002, "loss": 2.3531, "step": 62290 }, { "epoch": 0.24083437707782468, "grad_norm": 0.13691802322864532, "learning_rate": 0.002, "loss": 2.3682, "step": 62300 }, { "epoch": 0.24087303428120796, "grad_norm": 0.10711605101823807, "learning_rate": 0.002, "loss": 2.3675, "step": 62310 }, { "epoch": 0.24091169148459124, "grad_norm": 0.09732840210199356, "learning_rate": 0.002, "loss": 2.3481, "step": 62320 }, { "epoch": 0.24095034868797452, "grad_norm": 0.10126104950904846, "learning_rate": 0.002, "loss": 2.3484, "step": 62330 }, { "epoch": 0.2409890058913578, "grad_norm": 0.11862193793058395, "learning_rate": 0.002, "loss": 2.3498, "step": 62340 }, { "epoch": 0.24102766309474108, "grad_norm": 0.10822226852178574, "learning_rate": 0.002, "loss": 2.3516, "step": 62350 }, { "epoch": 0.24106632029812436, "grad_norm": 0.12469718605279922, "learning_rate": 0.002, "loss": 2.3553, "step": 62360 }, { "epoch": 0.24110497750150764, "grad_norm": 0.11491573601961136, "learning_rate": 0.002, "loss": 2.3726, "step": 62370 }, { "epoch": 0.24114363470489092, "grad_norm": 0.44333580136299133, "learning_rate": 0.002, "loss": 2.3539, "step": 62380 }, { "epoch": 0.2411822919082742, "grad_norm": 0.11015953868627548, "learning_rate": 0.002, "loss": 2.3851, "step": 62390 }, { "epoch": 0.24122094911165748, "grad_norm": 0.09991714358329773, "learning_rate": 0.002, "loss": 2.3489, "step": 62400 }, { "epoch": 0.24125960631504073, "grad_norm": 0.09338116645812988, "learning_rate": 0.002, "loss": 2.3667, "step": 62410 }, { "epoch": 0.241298263518424, "grad_norm": 0.1053207665681839, "learning_rate": 0.002, "loss": 2.3586, "step": 62420 }, { "epoch": 0.2413369207218073, "grad_norm": 0.1153879463672638, "learning_rate": 0.002, "loss": 2.3483, "step": 62430 }, { "epoch": 0.24137557792519057, "grad_norm": 0.0946960523724556, "learning_rate": 0.002, "loss": 2.3656, "step": 62440 }, { "epoch": 0.24141423512857385, "grad_norm": 0.10729081183671951, "learning_rate": 0.002, "loss": 2.3504, "step": 62450 }, { "epoch": 0.24145289233195713, "grad_norm": 0.1094650998711586, "learning_rate": 0.002, "loss": 2.3685, "step": 62460 }, { "epoch": 0.2414915495353404, "grad_norm": 0.1119939312338829, "learning_rate": 0.002, "loss": 2.3552, "step": 62470 }, { "epoch": 0.2415302067387237, "grad_norm": 0.10452145338058472, "learning_rate": 0.002, "loss": 2.3531, "step": 62480 }, { "epoch": 0.24156886394210697, "grad_norm": 0.11885945498943329, "learning_rate": 0.002, "loss": 2.3598, "step": 62490 }, { "epoch": 0.24160752114549025, "grad_norm": 0.11475305259227753, "learning_rate": 0.002, "loss": 2.3633, "step": 62500 }, { "epoch": 0.24164617834887353, "grad_norm": 0.11734220385551453, "learning_rate": 0.002, "loss": 2.3626, "step": 62510 }, { "epoch": 0.2416848355522568, "grad_norm": 0.10288964956998825, "learning_rate": 0.002, "loss": 2.3547, "step": 62520 }, { "epoch": 0.2417234927556401, "grad_norm": 0.12288731336593628, "learning_rate": 0.002, "loss": 2.3591, "step": 62530 }, { "epoch": 0.24176214995902337, "grad_norm": 0.10755060613155365, "learning_rate": 0.002, "loss": 2.3693, "step": 62540 }, { "epoch": 0.24180080716240665, "grad_norm": 0.10444962233304977, "learning_rate": 0.002, "loss": 2.352, "step": 62550 }, { "epoch": 0.24183946436578993, "grad_norm": 0.1046118289232254, "learning_rate": 0.002, "loss": 2.3581, "step": 62560 }, { "epoch": 0.2418781215691732, "grad_norm": 0.10155737400054932, "learning_rate": 0.002, "loss": 2.3701, "step": 62570 }, { "epoch": 0.2419167787725565, "grad_norm": 0.11808685958385468, "learning_rate": 0.002, "loss": 2.356, "step": 62580 }, { "epoch": 0.24195543597593977, "grad_norm": 0.09973792731761932, "learning_rate": 0.002, "loss": 2.3471, "step": 62590 }, { "epoch": 0.24199409317932302, "grad_norm": 0.10925379395484924, "learning_rate": 0.002, "loss": 2.3736, "step": 62600 }, { "epoch": 0.2420327503827063, "grad_norm": 0.1127404049038887, "learning_rate": 0.002, "loss": 2.3699, "step": 62610 }, { "epoch": 0.24207140758608958, "grad_norm": 0.10948999971151352, "learning_rate": 0.002, "loss": 2.3518, "step": 62620 }, { "epoch": 0.24211006478947286, "grad_norm": 0.13799403607845306, "learning_rate": 0.002, "loss": 2.3558, "step": 62630 }, { "epoch": 0.24214872199285614, "grad_norm": 0.11451857537031174, "learning_rate": 0.002, "loss": 2.3538, "step": 62640 }, { "epoch": 0.24218737919623942, "grad_norm": 0.11420796811580658, "learning_rate": 0.002, "loss": 2.3718, "step": 62650 }, { "epoch": 0.2422260363996227, "grad_norm": 0.11695457994937897, "learning_rate": 0.002, "loss": 2.357, "step": 62660 }, { "epoch": 0.24226469360300598, "grad_norm": 0.10169561952352524, "learning_rate": 0.002, "loss": 2.352, "step": 62670 }, { "epoch": 0.24230335080638926, "grad_norm": 0.11712782829999924, "learning_rate": 0.002, "loss": 2.3601, "step": 62680 }, { "epoch": 0.24234200800977254, "grad_norm": 0.11228419095277786, "learning_rate": 0.002, "loss": 2.3512, "step": 62690 }, { "epoch": 0.24238066521315582, "grad_norm": 0.12641753256320953, "learning_rate": 0.002, "loss": 2.3729, "step": 62700 }, { "epoch": 0.2424193224165391, "grad_norm": 0.11136075109243393, "learning_rate": 0.002, "loss": 2.3589, "step": 62710 }, { "epoch": 0.24245797961992238, "grad_norm": 0.1106911227107048, "learning_rate": 0.002, "loss": 2.3671, "step": 62720 }, { "epoch": 0.24249663682330566, "grad_norm": 0.09890349209308624, "learning_rate": 0.002, "loss": 2.3596, "step": 62730 }, { "epoch": 0.24253529402668894, "grad_norm": 0.13540855050086975, "learning_rate": 0.002, "loss": 2.3408, "step": 62740 }, { "epoch": 0.24257395123007222, "grad_norm": 0.13993631303310394, "learning_rate": 0.002, "loss": 2.3658, "step": 62750 }, { "epoch": 0.2426126084334555, "grad_norm": 0.11235027760267258, "learning_rate": 0.002, "loss": 2.3367, "step": 62760 }, { "epoch": 0.24265126563683878, "grad_norm": 0.10542025417089462, "learning_rate": 0.002, "loss": 2.3463, "step": 62770 }, { "epoch": 0.24268992284022203, "grad_norm": 0.12307173758745193, "learning_rate": 0.002, "loss": 2.3577, "step": 62780 }, { "epoch": 0.2427285800436053, "grad_norm": 0.12093117088079453, "learning_rate": 0.002, "loss": 2.3442, "step": 62790 }, { "epoch": 0.2427672372469886, "grad_norm": 0.09366226196289062, "learning_rate": 0.002, "loss": 2.3633, "step": 62800 }, { "epoch": 0.24280589445037187, "grad_norm": 0.11713926494121552, "learning_rate": 0.002, "loss": 2.3598, "step": 62810 }, { "epoch": 0.24284455165375515, "grad_norm": 0.10119593888521194, "learning_rate": 0.002, "loss": 2.3672, "step": 62820 }, { "epoch": 0.24288320885713843, "grad_norm": 0.13729046285152435, "learning_rate": 0.002, "loss": 2.3596, "step": 62830 }, { "epoch": 0.2429218660605217, "grad_norm": 0.1037496030330658, "learning_rate": 0.002, "loss": 2.3485, "step": 62840 }, { "epoch": 0.242960523263905, "grad_norm": 0.10583599656820297, "learning_rate": 0.002, "loss": 2.3329, "step": 62850 }, { "epoch": 0.24299918046728827, "grad_norm": 0.09716931730508804, "learning_rate": 0.002, "loss": 2.3561, "step": 62860 }, { "epoch": 0.24303783767067155, "grad_norm": 0.11291979253292084, "learning_rate": 0.002, "loss": 2.3578, "step": 62870 }, { "epoch": 0.24307649487405483, "grad_norm": 0.1146511435508728, "learning_rate": 0.002, "loss": 2.3583, "step": 62880 }, { "epoch": 0.2431151520774381, "grad_norm": 0.11932969093322754, "learning_rate": 0.002, "loss": 2.3704, "step": 62890 }, { "epoch": 0.2431538092808214, "grad_norm": 0.12777186930179596, "learning_rate": 0.002, "loss": 2.3595, "step": 62900 }, { "epoch": 0.24319246648420467, "grad_norm": 0.11459875106811523, "learning_rate": 0.002, "loss": 2.3552, "step": 62910 }, { "epoch": 0.24323112368758795, "grad_norm": 0.11585894227027893, "learning_rate": 0.002, "loss": 2.3596, "step": 62920 }, { "epoch": 0.24326978089097123, "grad_norm": 0.1289733350276947, "learning_rate": 0.002, "loss": 2.3675, "step": 62930 }, { "epoch": 0.2433084380943545, "grad_norm": 0.10749118030071259, "learning_rate": 0.002, "loss": 2.347, "step": 62940 }, { "epoch": 0.2433470952977378, "grad_norm": 0.10447361320257187, "learning_rate": 0.002, "loss": 2.3531, "step": 62950 }, { "epoch": 0.24338575250112107, "grad_norm": 0.11179212480783463, "learning_rate": 0.002, "loss": 2.3672, "step": 62960 }, { "epoch": 0.24342440970450432, "grad_norm": 0.1024908795952797, "learning_rate": 0.002, "loss": 2.3555, "step": 62970 }, { "epoch": 0.2434630669078876, "grad_norm": 0.10360907018184662, "learning_rate": 0.002, "loss": 2.3509, "step": 62980 }, { "epoch": 0.24350172411127088, "grad_norm": 0.11925695836544037, "learning_rate": 0.002, "loss": 2.348, "step": 62990 }, { "epoch": 0.24354038131465416, "grad_norm": 0.11314789205789566, "learning_rate": 0.002, "loss": 2.3584, "step": 63000 }, { "epoch": 0.24357903851803744, "grad_norm": 0.09414984285831451, "learning_rate": 0.002, "loss": 2.349, "step": 63010 }, { "epoch": 0.24361769572142072, "grad_norm": 0.1293078511953354, "learning_rate": 0.002, "loss": 2.3494, "step": 63020 }, { "epoch": 0.243656352924804, "grad_norm": 0.1282479166984558, "learning_rate": 0.002, "loss": 2.3643, "step": 63030 }, { "epoch": 0.24369501012818728, "grad_norm": 0.11113385856151581, "learning_rate": 0.002, "loss": 2.349, "step": 63040 }, { "epoch": 0.24373366733157056, "grad_norm": 0.10495486855506897, "learning_rate": 0.002, "loss": 2.3492, "step": 63050 }, { "epoch": 0.24377232453495384, "grad_norm": 0.1034003272652626, "learning_rate": 0.002, "loss": 2.3483, "step": 63060 }, { "epoch": 0.24381098173833712, "grad_norm": 0.103383369743824, "learning_rate": 0.002, "loss": 2.3619, "step": 63070 }, { "epoch": 0.2438496389417204, "grad_norm": 0.13411355018615723, "learning_rate": 0.002, "loss": 2.3648, "step": 63080 }, { "epoch": 0.24388829614510368, "grad_norm": 0.09459537267684937, "learning_rate": 0.002, "loss": 2.3757, "step": 63090 }, { "epoch": 0.24392695334848696, "grad_norm": 0.11230204254388809, "learning_rate": 0.002, "loss": 2.3503, "step": 63100 }, { "epoch": 0.24396561055187024, "grad_norm": 0.10179710388183594, "learning_rate": 0.002, "loss": 2.3333, "step": 63110 }, { "epoch": 0.24400426775525352, "grad_norm": 0.10394256561994553, "learning_rate": 0.002, "loss": 2.367, "step": 63120 }, { "epoch": 0.2440429249586368, "grad_norm": 0.12284719198942184, "learning_rate": 0.002, "loss": 2.3705, "step": 63130 }, { "epoch": 0.24408158216202008, "grad_norm": 0.10525992512702942, "learning_rate": 0.002, "loss": 2.3565, "step": 63140 }, { "epoch": 0.24412023936540336, "grad_norm": 0.11592471599578857, "learning_rate": 0.002, "loss": 2.3661, "step": 63150 }, { "epoch": 0.24415889656878662, "grad_norm": 0.10198235511779785, "learning_rate": 0.002, "loss": 2.3615, "step": 63160 }, { "epoch": 0.2441975537721699, "grad_norm": 0.10999837517738342, "learning_rate": 0.002, "loss": 2.3673, "step": 63170 }, { "epoch": 0.24423621097555318, "grad_norm": 0.13410034775733948, "learning_rate": 0.002, "loss": 2.3499, "step": 63180 }, { "epoch": 0.24427486817893646, "grad_norm": 0.12082164734601974, "learning_rate": 0.002, "loss": 2.349, "step": 63190 }, { "epoch": 0.24431352538231974, "grad_norm": 0.09982411563396454, "learning_rate": 0.002, "loss": 2.3634, "step": 63200 }, { "epoch": 0.24435218258570301, "grad_norm": 0.1246073842048645, "learning_rate": 0.002, "loss": 2.3488, "step": 63210 }, { "epoch": 0.2443908397890863, "grad_norm": 0.10156312584877014, "learning_rate": 0.002, "loss": 2.3542, "step": 63220 }, { "epoch": 0.24442949699246957, "grad_norm": 0.16856735944747925, "learning_rate": 0.002, "loss": 2.3545, "step": 63230 }, { "epoch": 0.24446815419585285, "grad_norm": 0.12928234040737152, "learning_rate": 0.002, "loss": 2.3511, "step": 63240 }, { "epoch": 0.24450681139923613, "grad_norm": 0.11333297938108444, "learning_rate": 0.002, "loss": 2.3604, "step": 63250 }, { "epoch": 0.24454546860261941, "grad_norm": 0.10946240276098251, "learning_rate": 0.002, "loss": 2.3607, "step": 63260 }, { "epoch": 0.2445841258060027, "grad_norm": 0.09647426754236221, "learning_rate": 0.002, "loss": 2.3364, "step": 63270 }, { "epoch": 0.24462278300938597, "grad_norm": 0.10767856240272522, "learning_rate": 0.002, "loss": 2.3652, "step": 63280 }, { "epoch": 0.24466144021276925, "grad_norm": 0.11887312680482864, "learning_rate": 0.002, "loss": 2.361, "step": 63290 }, { "epoch": 0.24470009741615253, "grad_norm": 0.12779520452022552, "learning_rate": 0.002, "loss": 2.3661, "step": 63300 }, { "epoch": 0.24473875461953581, "grad_norm": 0.11230748146772385, "learning_rate": 0.002, "loss": 2.3597, "step": 63310 }, { "epoch": 0.2447774118229191, "grad_norm": 0.11133432388305664, "learning_rate": 0.002, "loss": 2.3675, "step": 63320 }, { "epoch": 0.24481606902630237, "grad_norm": 0.10257939249277115, "learning_rate": 0.002, "loss": 2.3716, "step": 63330 }, { "epoch": 0.24485472622968563, "grad_norm": 0.11476735770702362, "learning_rate": 0.002, "loss": 2.3556, "step": 63340 }, { "epoch": 0.2448933834330689, "grad_norm": 0.11152663081884384, "learning_rate": 0.002, "loss": 2.3433, "step": 63350 }, { "epoch": 0.2449320406364522, "grad_norm": 0.1259431391954422, "learning_rate": 0.002, "loss": 2.3468, "step": 63360 }, { "epoch": 0.24497069783983547, "grad_norm": 0.09820462018251419, "learning_rate": 0.002, "loss": 2.3532, "step": 63370 }, { "epoch": 0.24500935504321875, "grad_norm": 0.13023404777050018, "learning_rate": 0.002, "loss": 2.365, "step": 63380 }, { "epoch": 0.24504801224660203, "grad_norm": 0.11050461232662201, "learning_rate": 0.002, "loss": 2.3637, "step": 63390 }, { "epoch": 0.2450866694499853, "grad_norm": 0.0972348302602768, "learning_rate": 0.002, "loss": 2.3592, "step": 63400 }, { "epoch": 0.24512532665336859, "grad_norm": 0.12149663269519806, "learning_rate": 0.002, "loss": 2.3516, "step": 63410 }, { "epoch": 0.24516398385675187, "grad_norm": 0.1159098818898201, "learning_rate": 0.002, "loss": 2.3542, "step": 63420 }, { "epoch": 0.24520264106013515, "grad_norm": 0.10914923250675201, "learning_rate": 0.002, "loss": 2.3437, "step": 63430 }, { "epoch": 0.24524129826351843, "grad_norm": 0.12055275589227676, "learning_rate": 0.002, "loss": 2.3516, "step": 63440 }, { "epoch": 0.2452799554669017, "grad_norm": 0.1446305811405182, "learning_rate": 0.002, "loss": 2.3554, "step": 63450 }, { "epoch": 0.24531861267028499, "grad_norm": 0.11570040136575699, "learning_rate": 0.002, "loss": 2.3661, "step": 63460 }, { "epoch": 0.24535726987366827, "grad_norm": 0.10370668768882751, "learning_rate": 0.002, "loss": 2.3677, "step": 63470 }, { "epoch": 0.24539592707705155, "grad_norm": 0.11650431901216507, "learning_rate": 0.002, "loss": 2.3588, "step": 63480 }, { "epoch": 0.24543458428043483, "grad_norm": 0.13468730449676514, "learning_rate": 0.002, "loss": 2.3619, "step": 63490 }, { "epoch": 0.2454732414838181, "grad_norm": 0.11168798804283142, "learning_rate": 0.002, "loss": 2.3575, "step": 63500 }, { "epoch": 0.24551189868720139, "grad_norm": 0.1285007745027542, "learning_rate": 0.002, "loss": 2.3414, "step": 63510 }, { "epoch": 0.24555055589058467, "grad_norm": 0.10346856713294983, "learning_rate": 0.002, "loss": 2.3578, "step": 63520 }, { "epoch": 0.24558921309396792, "grad_norm": 0.11400487273931503, "learning_rate": 0.002, "loss": 2.3483, "step": 63530 }, { "epoch": 0.2456278702973512, "grad_norm": 0.10188063234090805, "learning_rate": 0.002, "loss": 2.3674, "step": 63540 }, { "epoch": 0.24566652750073448, "grad_norm": 0.10284477472305298, "learning_rate": 0.002, "loss": 2.3486, "step": 63550 }, { "epoch": 0.24570518470411776, "grad_norm": 0.10538157820701599, "learning_rate": 0.002, "loss": 2.3572, "step": 63560 }, { "epoch": 0.24574384190750104, "grad_norm": 0.10660867393016815, "learning_rate": 0.002, "loss": 2.3642, "step": 63570 }, { "epoch": 0.24578249911088432, "grad_norm": 0.13760226964950562, "learning_rate": 0.002, "loss": 2.359, "step": 63580 }, { "epoch": 0.2458211563142676, "grad_norm": 0.10196994245052338, "learning_rate": 0.002, "loss": 2.3628, "step": 63590 }, { "epoch": 0.24585981351765088, "grad_norm": 0.10736634582281113, "learning_rate": 0.002, "loss": 2.3598, "step": 63600 }, { "epoch": 0.24589847072103416, "grad_norm": 0.11615607142448425, "learning_rate": 0.002, "loss": 2.3491, "step": 63610 }, { "epoch": 0.24593712792441744, "grad_norm": 0.11788640916347504, "learning_rate": 0.002, "loss": 2.368, "step": 63620 }, { "epoch": 0.24597578512780072, "grad_norm": 0.10808571428060532, "learning_rate": 0.002, "loss": 2.3612, "step": 63630 }, { "epoch": 0.246014442331184, "grad_norm": 0.14803314208984375, "learning_rate": 0.002, "loss": 2.3707, "step": 63640 }, { "epoch": 0.24605309953456728, "grad_norm": 0.11327030509710312, "learning_rate": 0.002, "loss": 2.3534, "step": 63650 }, { "epoch": 0.24609175673795056, "grad_norm": 0.11561069637537003, "learning_rate": 0.002, "loss": 2.345, "step": 63660 }, { "epoch": 0.24613041394133384, "grad_norm": 0.10180526971817017, "learning_rate": 0.002, "loss": 2.3537, "step": 63670 }, { "epoch": 0.24616907114471712, "grad_norm": 0.10653835535049438, "learning_rate": 0.002, "loss": 2.356, "step": 63680 }, { "epoch": 0.2462077283481004, "grad_norm": 0.11258967220783234, "learning_rate": 0.002, "loss": 2.3546, "step": 63690 }, { "epoch": 0.24624638555148368, "grad_norm": 0.11039800941944122, "learning_rate": 0.002, "loss": 2.3432, "step": 63700 }, { "epoch": 0.24628504275486693, "grad_norm": 0.13323748111724854, "learning_rate": 0.002, "loss": 2.364, "step": 63710 }, { "epoch": 0.2463236999582502, "grad_norm": 0.1323200762271881, "learning_rate": 0.002, "loss": 2.3652, "step": 63720 }, { "epoch": 0.2463623571616335, "grad_norm": 0.10117621719837189, "learning_rate": 0.002, "loss": 2.3859, "step": 63730 }, { "epoch": 0.24640101436501677, "grad_norm": 0.12777069211006165, "learning_rate": 0.002, "loss": 2.3533, "step": 63740 }, { "epoch": 0.24643967156840005, "grad_norm": 0.11329413205385208, "learning_rate": 0.002, "loss": 2.3504, "step": 63750 }, { "epoch": 0.24647832877178333, "grad_norm": 0.16249066591262817, "learning_rate": 0.002, "loss": 2.3608, "step": 63760 }, { "epoch": 0.2465169859751666, "grad_norm": 0.09796962887048721, "learning_rate": 0.002, "loss": 2.3635, "step": 63770 }, { "epoch": 0.2465556431785499, "grad_norm": 0.15831460058689117, "learning_rate": 0.002, "loss": 2.3828, "step": 63780 }, { "epoch": 0.24659430038193317, "grad_norm": 0.12122119963169098, "learning_rate": 0.002, "loss": 2.3651, "step": 63790 }, { "epoch": 0.24663295758531645, "grad_norm": 0.11653528362512589, "learning_rate": 0.002, "loss": 2.3664, "step": 63800 }, { "epoch": 0.24667161478869973, "grad_norm": 0.13036595284938812, "learning_rate": 0.002, "loss": 2.3551, "step": 63810 }, { "epoch": 0.246710271992083, "grad_norm": 0.10035198926925659, "learning_rate": 0.002, "loss": 2.3648, "step": 63820 }, { "epoch": 0.2467489291954663, "grad_norm": 0.10289250314235687, "learning_rate": 0.002, "loss": 2.337, "step": 63830 }, { "epoch": 0.24678758639884957, "grad_norm": 0.12327629327774048, "learning_rate": 0.002, "loss": 2.3605, "step": 63840 }, { "epoch": 0.24682624360223285, "grad_norm": 0.094744473695755, "learning_rate": 0.002, "loss": 2.3554, "step": 63850 }, { "epoch": 0.24686490080561613, "grad_norm": 0.11189937591552734, "learning_rate": 0.002, "loss": 2.3616, "step": 63860 }, { "epoch": 0.2469035580089994, "grad_norm": 0.11155198514461517, "learning_rate": 0.002, "loss": 2.3511, "step": 63870 }, { "epoch": 0.2469422152123827, "grad_norm": 0.11133726686239243, "learning_rate": 0.002, "loss": 2.3606, "step": 63880 }, { "epoch": 0.24698087241576597, "grad_norm": 0.11880885809659958, "learning_rate": 0.002, "loss": 2.3701, "step": 63890 }, { "epoch": 0.24701952961914922, "grad_norm": 0.1178770512342453, "learning_rate": 0.002, "loss": 2.3538, "step": 63900 }, { "epoch": 0.2470581868225325, "grad_norm": 0.10876142978668213, "learning_rate": 0.002, "loss": 2.356, "step": 63910 }, { "epoch": 0.24709684402591578, "grad_norm": 0.10888998955488205, "learning_rate": 0.002, "loss": 2.3495, "step": 63920 }, { "epoch": 0.24713550122929906, "grad_norm": 0.09465057402849197, "learning_rate": 0.002, "loss": 2.3446, "step": 63930 }, { "epoch": 0.24717415843268234, "grad_norm": 0.12009290605783463, "learning_rate": 0.002, "loss": 2.3637, "step": 63940 }, { "epoch": 0.24721281563606562, "grad_norm": 0.13506600260734558, "learning_rate": 0.002, "loss": 2.35, "step": 63950 }, { "epoch": 0.2472514728394489, "grad_norm": 0.12046731263399124, "learning_rate": 0.002, "loss": 2.3474, "step": 63960 }, { "epoch": 0.24729013004283218, "grad_norm": 0.11283211410045624, "learning_rate": 0.002, "loss": 2.3747, "step": 63970 }, { "epoch": 0.24732878724621546, "grad_norm": 0.10055335611104965, "learning_rate": 0.002, "loss": 2.3672, "step": 63980 }, { "epoch": 0.24736744444959874, "grad_norm": 0.11106719076633453, "learning_rate": 0.002, "loss": 2.3622, "step": 63990 }, { "epoch": 0.24740610165298202, "grad_norm": 0.14089414477348328, "learning_rate": 0.002, "loss": 2.3726, "step": 64000 }, { "epoch": 0.2474447588563653, "grad_norm": 0.10141649097204208, "learning_rate": 0.002, "loss": 2.3571, "step": 64010 }, { "epoch": 0.24748341605974858, "grad_norm": 0.11181320995092392, "learning_rate": 0.002, "loss": 2.3669, "step": 64020 }, { "epoch": 0.24752207326313186, "grad_norm": 0.09916166961193085, "learning_rate": 0.002, "loss": 2.3571, "step": 64030 }, { "epoch": 0.24756073046651514, "grad_norm": 0.14971262216567993, "learning_rate": 0.002, "loss": 2.3491, "step": 64040 }, { "epoch": 0.24759938766989842, "grad_norm": 0.10490719974040985, "learning_rate": 0.002, "loss": 2.3521, "step": 64050 }, { "epoch": 0.2476380448732817, "grad_norm": 0.1055225357413292, "learning_rate": 0.002, "loss": 2.3488, "step": 64060 }, { "epoch": 0.24767670207666498, "grad_norm": 0.10268551856279373, "learning_rate": 0.002, "loss": 2.3552, "step": 64070 }, { "epoch": 0.24771535928004823, "grad_norm": 0.12704938650131226, "learning_rate": 0.002, "loss": 2.3542, "step": 64080 }, { "epoch": 0.2477540164834315, "grad_norm": 0.11193748563528061, "learning_rate": 0.002, "loss": 2.3636, "step": 64090 }, { "epoch": 0.2477926736868148, "grad_norm": 0.110689178109169, "learning_rate": 0.002, "loss": 2.3621, "step": 64100 }, { "epoch": 0.24783133089019807, "grad_norm": 0.12341715395450592, "learning_rate": 0.002, "loss": 2.3663, "step": 64110 }, { "epoch": 0.24786998809358135, "grad_norm": 0.11892364919185638, "learning_rate": 0.002, "loss": 2.3594, "step": 64120 }, { "epoch": 0.24790864529696463, "grad_norm": 0.10833492130041122, "learning_rate": 0.002, "loss": 2.3523, "step": 64130 }, { "epoch": 0.2479473025003479, "grad_norm": 0.12388889491558075, "learning_rate": 0.002, "loss": 2.3568, "step": 64140 }, { "epoch": 0.2479859597037312, "grad_norm": 0.10363738983869553, "learning_rate": 0.002, "loss": 2.3522, "step": 64150 }, { "epoch": 0.24802461690711447, "grad_norm": 0.1311815083026886, "learning_rate": 0.002, "loss": 2.3525, "step": 64160 }, { "epoch": 0.24806327411049775, "grad_norm": 0.09794235974550247, "learning_rate": 0.002, "loss": 2.3566, "step": 64170 }, { "epoch": 0.24810193131388103, "grad_norm": 0.15324456989765167, "learning_rate": 0.002, "loss": 2.3483, "step": 64180 }, { "epoch": 0.2481405885172643, "grad_norm": 0.11453873664140701, "learning_rate": 0.002, "loss": 2.3644, "step": 64190 }, { "epoch": 0.2481792457206476, "grad_norm": 0.0997665673494339, "learning_rate": 0.002, "loss": 2.35, "step": 64200 }, { "epoch": 0.24821790292403087, "grad_norm": 0.10649903118610382, "learning_rate": 0.002, "loss": 2.3399, "step": 64210 }, { "epoch": 0.24825656012741415, "grad_norm": 0.10414480417966843, "learning_rate": 0.002, "loss": 2.3559, "step": 64220 }, { "epoch": 0.24829521733079743, "grad_norm": 0.13123467564582825, "learning_rate": 0.002, "loss": 2.3698, "step": 64230 }, { "epoch": 0.2483338745341807, "grad_norm": 0.11330459266901016, "learning_rate": 0.002, "loss": 2.342, "step": 64240 }, { "epoch": 0.248372531737564, "grad_norm": 0.09887401759624481, "learning_rate": 0.002, "loss": 2.3551, "step": 64250 }, { "epoch": 0.24841118894094727, "grad_norm": 0.09318116307258606, "learning_rate": 0.002, "loss": 2.3665, "step": 64260 }, { "epoch": 0.24844984614433052, "grad_norm": 0.13932709395885468, "learning_rate": 0.002, "loss": 2.3585, "step": 64270 }, { "epoch": 0.2484885033477138, "grad_norm": 0.12776115536689758, "learning_rate": 0.002, "loss": 2.342, "step": 64280 }, { "epoch": 0.24852716055109708, "grad_norm": 0.10516197234392166, "learning_rate": 0.002, "loss": 2.3697, "step": 64290 }, { "epoch": 0.24856581775448036, "grad_norm": 0.10422108322381973, "learning_rate": 0.002, "loss": 2.3516, "step": 64300 }, { "epoch": 0.24860447495786364, "grad_norm": 0.1057262048125267, "learning_rate": 0.002, "loss": 2.3585, "step": 64310 }, { "epoch": 0.24864313216124692, "grad_norm": 0.12558288872241974, "learning_rate": 0.002, "loss": 2.3711, "step": 64320 }, { "epoch": 0.2486817893646302, "grad_norm": 0.10270664840936661, "learning_rate": 0.002, "loss": 2.3619, "step": 64330 }, { "epoch": 0.24872044656801348, "grad_norm": 0.12882837653160095, "learning_rate": 0.002, "loss": 2.3668, "step": 64340 }, { "epoch": 0.24875910377139676, "grad_norm": 0.11752016097307205, "learning_rate": 0.002, "loss": 2.3555, "step": 64350 }, { "epoch": 0.24879776097478004, "grad_norm": 0.10369332134723663, "learning_rate": 0.002, "loss": 2.3579, "step": 64360 }, { "epoch": 0.24883641817816332, "grad_norm": 0.09841576963663101, "learning_rate": 0.002, "loss": 2.3573, "step": 64370 }, { "epoch": 0.2488750753815466, "grad_norm": 0.11956728249788284, "learning_rate": 0.002, "loss": 2.3502, "step": 64380 }, { "epoch": 0.24891373258492988, "grad_norm": 0.11087989062070847, "learning_rate": 0.002, "loss": 2.3518, "step": 64390 }, { "epoch": 0.24895238978831316, "grad_norm": 0.1197686493396759, "learning_rate": 0.002, "loss": 2.3451, "step": 64400 }, { "epoch": 0.24899104699169644, "grad_norm": 0.10942673683166504, "learning_rate": 0.002, "loss": 2.3631, "step": 64410 }, { "epoch": 0.24902970419507972, "grad_norm": 0.1125696524977684, "learning_rate": 0.002, "loss": 2.3645, "step": 64420 }, { "epoch": 0.249068361398463, "grad_norm": 0.12106586247682571, "learning_rate": 0.002, "loss": 2.3479, "step": 64430 }, { "epoch": 0.24910701860184628, "grad_norm": 0.09818416088819504, "learning_rate": 0.002, "loss": 2.3644, "step": 64440 }, { "epoch": 0.24914567580522953, "grad_norm": 0.10371017456054688, "learning_rate": 0.002, "loss": 2.3692, "step": 64450 }, { "epoch": 0.2491843330086128, "grad_norm": 0.10142534971237183, "learning_rate": 0.002, "loss": 2.371, "step": 64460 }, { "epoch": 0.2492229902119961, "grad_norm": 0.15823061764240265, "learning_rate": 0.002, "loss": 2.3506, "step": 64470 }, { "epoch": 0.24926164741537937, "grad_norm": 0.10784637182950974, "learning_rate": 0.002, "loss": 2.3568, "step": 64480 }, { "epoch": 0.24930030461876265, "grad_norm": 0.10801376402378082, "learning_rate": 0.002, "loss": 2.354, "step": 64490 }, { "epoch": 0.24933896182214593, "grad_norm": 0.09740674495697021, "learning_rate": 0.002, "loss": 2.3611, "step": 64500 }, { "epoch": 0.2493776190255292, "grad_norm": 0.11784724146127701, "learning_rate": 0.002, "loss": 2.3439, "step": 64510 }, { "epoch": 0.2494162762289125, "grad_norm": 0.11282724142074585, "learning_rate": 0.002, "loss": 2.3732, "step": 64520 }, { "epoch": 0.24945493343229577, "grad_norm": 0.11594454199075699, "learning_rate": 0.002, "loss": 2.3502, "step": 64530 }, { "epoch": 0.24949359063567905, "grad_norm": 0.09549959003925323, "learning_rate": 0.002, "loss": 2.3669, "step": 64540 }, { "epoch": 0.24953224783906233, "grad_norm": 0.11274974793195724, "learning_rate": 0.002, "loss": 2.3488, "step": 64550 }, { "epoch": 0.2495709050424456, "grad_norm": 0.10660584270954132, "learning_rate": 0.002, "loss": 2.3596, "step": 64560 }, { "epoch": 0.2496095622458289, "grad_norm": 0.09843463450670242, "learning_rate": 0.002, "loss": 2.3724, "step": 64570 }, { "epoch": 0.24964821944921217, "grad_norm": 0.11696872115135193, "learning_rate": 0.002, "loss": 2.3557, "step": 64580 }, { "epoch": 0.24968687665259545, "grad_norm": 0.12023330479860306, "learning_rate": 0.002, "loss": 2.3419, "step": 64590 }, { "epoch": 0.24972553385597873, "grad_norm": 0.10271915048360825, "learning_rate": 0.002, "loss": 2.3689, "step": 64600 }, { "epoch": 0.249764191059362, "grad_norm": 0.11523495614528656, "learning_rate": 0.002, "loss": 2.3568, "step": 64610 }, { "epoch": 0.2498028482627453, "grad_norm": 0.1043362021446228, "learning_rate": 0.002, "loss": 2.3588, "step": 64620 }, { "epoch": 0.24984150546612857, "grad_norm": 0.1054345890879631, "learning_rate": 0.002, "loss": 2.3522, "step": 64630 }, { "epoch": 0.24988016266951182, "grad_norm": 0.13760827481746674, "learning_rate": 0.002, "loss": 2.3588, "step": 64640 }, { "epoch": 0.2499188198728951, "grad_norm": 0.11963876336812973, "learning_rate": 0.002, "loss": 2.3654, "step": 64650 }, { "epoch": 0.24995747707627838, "grad_norm": 0.09870851784944534, "learning_rate": 0.002, "loss": 2.3503, "step": 64660 }, { "epoch": 0.24999613427966166, "grad_norm": 0.13970845937728882, "learning_rate": 0.002, "loss": 2.3503, "step": 64670 }, { "epoch": 0.25003479148304497, "grad_norm": 0.15180478990077972, "learning_rate": 0.002, "loss": 2.3608, "step": 64680 }, { "epoch": 0.2500734486864282, "grad_norm": 0.1098647192120552, "learning_rate": 0.002, "loss": 2.3489, "step": 64690 }, { "epoch": 0.25011210588981153, "grad_norm": 0.11667878180742264, "learning_rate": 0.002, "loss": 2.3695, "step": 64700 }, { "epoch": 0.2501507630931948, "grad_norm": 0.15805795788764954, "learning_rate": 0.002, "loss": 2.3581, "step": 64710 }, { "epoch": 0.2501894202965781, "grad_norm": 0.14936715364456177, "learning_rate": 0.002, "loss": 2.3456, "step": 64720 }, { "epoch": 0.25022807749996134, "grad_norm": 0.09799555689096451, "learning_rate": 0.002, "loss": 2.3572, "step": 64730 }, { "epoch": 0.2502667347033446, "grad_norm": 0.09978866577148438, "learning_rate": 0.002, "loss": 2.3469, "step": 64740 }, { "epoch": 0.2503053919067279, "grad_norm": 0.10472051799297333, "learning_rate": 0.002, "loss": 2.3394, "step": 64750 }, { "epoch": 0.25034404911011116, "grad_norm": 0.10219122469425201, "learning_rate": 0.002, "loss": 2.3491, "step": 64760 }, { "epoch": 0.25038270631349446, "grad_norm": 0.0982801541686058, "learning_rate": 0.002, "loss": 2.3644, "step": 64770 }, { "epoch": 0.2504213635168777, "grad_norm": 0.09922726452350616, "learning_rate": 0.002, "loss": 2.3532, "step": 64780 }, { "epoch": 0.250460020720261, "grad_norm": 0.11779270321130753, "learning_rate": 0.002, "loss": 2.3547, "step": 64790 }, { "epoch": 0.2504986779236443, "grad_norm": 0.12202689051628113, "learning_rate": 0.002, "loss": 2.3615, "step": 64800 }, { "epoch": 0.2505373351270276, "grad_norm": 0.1136423796415329, "learning_rate": 0.002, "loss": 2.3654, "step": 64810 }, { "epoch": 0.25057599233041083, "grad_norm": 0.10997912287712097, "learning_rate": 0.002, "loss": 2.3516, "step": 64820 }, { "epoch": 0.25061464953379414, "grad_norm": 0.10405836254358292, "learning_rate": 0.002, "loss": 2.339, "step": 64830 }, { "epoch": 0.2506533067371774, "grad_norm": 0.10198356956243515, "learning_rate": 0.002, "loss": 2.3481, "step": 64840 }, { "epoch": 0.2506919639405607, "grad_norm": 0.1193748265504837, "learning_rate": 0.002, "loss": 2.3708, "step": 64850 }, { "epoch": 0.25073062114394395, "grad_norm": 0.0984581783413887, "learning_rate": 0.002, "loss": 2.3377, "step": 64860 }, { "epoch": 0.25076927834732726, "grad_norm": 0.12913978099822998, "learning_rate": 0.002, "loss": 2.3621, "step": 64870 }, { "epoch": 0.2508079355507105, "grad_norm": 0.10153471678495407, "learning_rate": 0.002, "loss": 2.3487, "step": 64880 }, { "epoch": 0.2508465927540938, "grad_norm": 0.09593507647514343, "learning_rate": 0.002, "loss": 2.3866, "step": 64890 }, { "epoch": 0.2508852499574771, "grad_norm": 0.1064973697066307, "learning_rate": 0.002, "loss": 2.3485, "step": 64900 }, { "epoch": 0.2509239071608604, "grad_norm": 0.10728035122156143, "learning_rate": 0.002, "loss": 2.3548, "step": 64910 }, { "epoch": 0.25096256436424363, "grad_norm": 0.13115960359573364, "learning_rate": 0.002, "loss": 2.3703, "step": 64920 }, { "epoch": 0.2510012215676269, "grad_norm": 0.11741824448108673, "learning_rate": 0.002, "loss": 2.3557, "step": 64930 }, { "epoch": 0.2510398787710102, "grad_norm": 0.10732880979776382, "learning_rate": 0.002, "loss": 2.3586, "step": 64940 }, { "epoch": 0.25107853597439345, "grad_norm": 0.11535921692848206, "learning_rate": 0.002, "loss": 2.3668, "step": 64950 }, { "epoch": 0.25111719317777675, "grad_norm": 0.11269236356019974, "learning_rate": 0.002, "loss": 2.3551, "step": 64960 }, { "epoch": 0.25115585038116, "grad_norm": 0.10160496830940247, "learning_rate": 0.002, "loss": 2.3625, "step": 64970 }, { "epoch": 0.2511945075845433, "grad_norm": 0.11177372187376022, "learning_rate": 0.002, "loss": 2.3584, "step": 64980 }, { "epoch": 0.25123316478792657, "grad_norm": 0.12382085621356964, "learning_rate": 0.002, "loss": 2.3607, "step": 64990 }, { "epoch": 0.2512718219913099, "grad_norm": 0.12273989617824554, "learning_rate": 0.002, "loss": 2.3487, "step": 65000 }, { "epoch": 0.2513104791946931, "grad_norm": 0.10529609769582748, "learning_rate": 0.002, "loss": 2.3706, "step": 65010 }, { "epoch": 0.25134913639807643, "grad_norm": 0.12380048632621765, "learning_rate": 0.002, "loss": 2.3548, "step": 65020 }, { "epoch": 0.2513877936014597, "grad_norm": 0.11554655432701111, "learning_rate": 0.002, "loss": 2.3633, "step": 65030 }, { "epoch": 0.251426450804843, "grad_norm": 0.12273314595222473, "learning_rate": 0.002, "loss": 2.3573, "step": 65040 }, { "epoch": 0.25146510800822625, "grad_norm": 0.12644176185131073, "learning_rate": 0.002, "loss": 2.3681, "step": 65050 }, { "epoch": 0.25150376521160955, "grad_norm": 0.10715832561254501, "learning_rate": 0.002, "loss": 2.353, "step": 65060 }, { "epoch": 0.2515424224149928, "grad_norm": 0.11817505210638046, "learning_rate": 0.002, "loss": 2.3559, "step": 65070 }, { "epoch": 0.2515810796183761, "grad_norm": 0.1156371533870697, "learning_rate": 0.002, "loss": 2.3623, "step": 65080 }, { "epoch": 0.25161973682175937, "grad_norm": 0.10240025073289871, "learning_rate": 0.002, "loss": 2.3615, "step": 65090 }, { "epoch": 0.2516583940251426, "grad_norm": 0.1039934903383255, "learning_rate": 0.002, "loss": 2.3513, "step": 65100 }, { "epoch": 0.2516970512285259, "grad_norm": 0.12380948662757874, "learning_rate": 0.002, "loss": 2.3546, "step": 65110 }, { "epoch": 0.2517357084319092, "grad_norm": 0.11189432442188263, "learning_rate": 0.002, "loss": 2.3561, "step": 65120 }, { "epoch": 0.2517743656352925, "grad_norm": 0.0978197380900383, "learning_rate": 0.002, "loss": 2.3594, "step": 65130 }, { "epoch": 0.25181302283867574, "grad_norm": 0.11754926294088364, "learning_rate": 0.002, "loss": 2.3561, "step": 65140 }, { "epoch": 0.25185168004205905, "grad_norm": 0.10339406132698059, "learning_rate": 0.002, "loss": 2.3717, "step": 65150 }, { "epoch": 0.2518903372454423, "grad_norm": 0.11164677143096924, "learning_rate": 0.002, "loss": 2.3622, "step": 65160 }, { "epoch": 0.2519289944488256, "grad_norm": 0.09547177702188492, "learning_rate": 0.002, "loss": 2.3805, "step": 65170 }, { "epoch": 0.25196765165220886, "grad_norm": 0.11362284421920776, "learning_rate": 0.002, "loss": 2.3753, "step": 65180 }, { "epoch": 0.25200630885559216, "grad_norm": 0.09665997326374054, "learning_rate": 0.002, "loss": 2.3595, "step": 65190 }, { "epoch": 0.2520449660589754, "grad_norm": 0.12740235030651093, "learning_rate": 0.002, "loss": 2.339, "step": 65200 }, { "epoch": 0.2520836232623587, "grad_norm": 0.10088548064231873, "learning_rate": 0.002, "loss": 2.3532, "step": 65210 }, { "epoch": 0.252122280465742, "grad_norm": 0.08670290559530258, "learning_rate": 0.002, "loss": 2.3578, "step": 65220 }, { "epoch": 0.2521609376691253, "grad_norm": 0.11755174398422241, "learning_rate": 0.002, "loss": 2.3527, "step": 65230 }, { "epoch": 0.25219959487250854, "grad_norm": 0.11353892087936401, "learning_rate": 0.002, "loss": 2.3622, "step": 65240 }, { "epoch": 0.25223825207589184, "grad_norm": 0.11044956743717194, "learning_rate": 0.002, "loss": 2.3471, "step": 65250 }, { "epoch": 0.2522769092792751, "grad_norm": 0.10184010118246078, "learning_rate": 0.002, "loss": 2.3533, "step": 65260 }, { "epoch": 0.2523155664826584, "grad_norm": 0.11825776845216751, "learning_rate": 0.002, "loss": 2.3493, "step": 65270 }, { "epoch": 0.25235422368604166, "grad_norm": 0.11049504578113556, "learning_rate": 0.002, "loss": 2.3763, "step": 65280 }, { "epoch": 0.2523928808894249, "grad_norm": 0.11343789845705032, "learning_rate": 0.002, "loss": 2.374, "step": 65290 }, { "epoch": 0.2524315380928082, "grad_norm": 0.10674849152565002, "learning_rate": 0.002, "loss": 2.3559, "step": 65300 }, { "epoch": 0.25247019529619147, "grad_norm": 0.11158914119005203, "learning_rate": 0.002, "loss": 2.3564, "step": 65310 }, { "epoch": 0.2525088524995748, "grad_norm": 0.13002395629882812, "learning_rate": 0.002, "loss": 2.3418, "step": 65320 }, { "epoch": 0.25254750970295803, "grad_norm": 0.10284659266471863, "learning_rate": 0.002, "loss": 2.3638, "step": 65330 }, { "epoch": 0.25258616690634134, "grad_norm": 0.09975076466798782, "learning_rate": 0.002, "loss": 2.3719, "step": 65340 }, { "epoch": 0.2526248241097246, "grad_norm": 0.0994315892457962, "learning_rate": 0.002, "loss": 2.3705, "step": 65350 }, { "epoch": 0.2526634813131079, "grad_norm": 0.13250277936458588, "learning_rate": 0.002, "loss": 2.359, "step": 65360 }, { "epoch": 0.25270213851649115, "grad_norm": 0.10943359136581421, "learning_rate": 0.002, "loss": 2.3576, "step": 65370 }, { "epoch": 0.25274079571987446, "grad_norm": 0.09684669971466064, "learning_rate": 0.002, "loss": 2.3567, "step": 65380 }, { "epoch": 0.2527794529232577, "grad_norm": 0.10850505530834198, "learning_rate": 0.002, "loss": 2.3606, "step": 65390 }, { "epoch": 0.252818110126641, "grad_norm": 0.2953296899795532, "learning_rate": 0.002, "loss": 2.3732, "step": 65400 }, { "epoch": 0.25285676733002427, "grad_norm": 0.12961512804031372, "learning_rate": 0.002, "loss": 2.3708, "step": 65410 }, { "epoch": 0.2528954245334076, "grad_norm": 0.12566010653972626, "learning_rate": 0.002, "loss": 2.3606, "step": 65420 }, { "epoch": 0.25293408173679083, "grad_norm": 0.13647234439849854, "learning_rate": 0.002, "loss": 2.3656, "step": 65430 }, { "epoch": 0.25297273894017414, "grad_norm": 0.12771177291870117, "learning_rate": 0.002, "loss": 2.3691, "step": 65440 }, { "epoch": 0.2530113961435574, "grad_norm": 0.10627250373363495, "learning_rate": 0.002, "loss": 2.3773, "step": 65450 }, { "epoch": 0.2530500533469407, "grad_norm": 0.10476811975240707, "learning_rate": 0.002, "loss": 2.3561, "step": 65460 }, { "epoch": 0.25308871055032395, "grad_norm": 0.09948313981294632, "learning_rate": 0.002, "loss": 2.3472, "step": 65470 }, { "epoch": 0.2531273677537072, "grad_norm": 0.12087182700634003, "learning_rate": 0.002, "loss": 2.3424, "step": 65480 }, { "epoch": 0.2531660249570905, "grad_norm": 0.10390983521938324, "learning_rate": 0.002, "loss": 2.3571, "step": 65490 }, { "epoch": 0.25320468216047376, "grad_norm": 0.10002344846725464, "learning_rate": 0.002, "loss": 2.3445, "step": 65500 }, { "epoch": 0.25324333936385707, "grad_norm": 0.10570525377988815, "learning_rate": 0.002, "loss": 2.3783, "step": 65510 }, { "epoch": 0.2532819965672403, "grad_norm": 0.1476055532693863, "learning_rate": 0.002, "loss": 2.3691, "step": 65520 }, { "epoch": 0.2533206537706236, "grad_norm": 0.10201095044612885, "learning_rate": 0.002, "loss": 2.3431, "step": 65530 }, { "epoch": 0.2533593109740069, "grad_norm": 0.13619953393936157, "learning_rate": 0.002, "loss": 2.3691, "step": 65540 }, { "epoch": 0.2533979681773902, "grad_norm": 0.1017579585313797, "learning_rate": 0.002, "loss": 2.3511, "step": 65550 }, { "epoch": 0.25343662538077344, "grad_norm": 0.1178319901227951, "learning_rate": 0.002, "loss": 2.356, "step": 65560 }, { "epoch": 0.25347528258415675, "grad_norm": 0.11252082884311676, "learning_rate": 0.002, "loss": 2.3558, "step": 65570 }, { "epoch": 0.25351393978754, "grad_norm": 0.1279241144657135, "learning_rate": 0.002, "loss": 2.3622, "step": 65580 }, { "epoch": 0.2535525969909233, "grad_norm": 0.11474581062793732, "learning_rate": 0.002, "loss": 2.3632, "step": 65590 }, { "epoch": 0.25359125419430656, "grad_norm": 0.16519343852996826, "learning_rate": 0.002, "loss": 2.3567, "step": 65600 }, { "epoch": 0.25362991139768987, "grad_norm": 0.11091284453868866, "learning_rate": 0.002, "loss": 2.3778, "step": 65610 }, { "epoch": 0.2536685686010731, "grad_norm": 0.11500389873981476, "learning_rate": 0.002, "loss": 2.3552, "step": 65620 }, { "epoch": 0.2537072258044564, "grad_norm": 0.10793469846248627, "learning_rate": 0.002, "loss": 2.3676, "step": 65630 }, { "epoch": 0.2537458830078397, "grad_norm": 0.1048169657588005, "learning_rate": 0.002, "loss": 2.352, "step": 65640 }, { "epoch": 0.253784540211223, "grad_norm": 0.1076226532459259, "learning_rate": 0.002, "loss": 2.3553, "step": 65650 }, { "epoch": 0.25382319741460624, "grad_norm": 0.11597827076911926, "learning_rate": 0.002, "loss": 2.3554, "step": 65660 }, { "epoch": 0.2538618546179895, "grad_norm": 0.09700324386358261, "learning_rate": 0.002, "loss": 2.3541, "step": 65670 }, { "epoch": 0.2539005118213728, "grad_norm": 0.11959628015756607, "learning_rate": 0.002, "loss": 2.3666, "step": 65680 }, { "epoch": 0.25393916902475605, "grad_norm": 0.1079908162355423, "learning_rate": 0.002, "loss": 2.353, "step": 65690 }, { "epoch": 0.25397782622813936, "grad_norm": 0.1234707236289978, "learning_rate": 0.002, "loss": 2.3553, "step": 65700 }, { "epoch": 0.2540164834315226, "grad_norm": 0.10080688446760178, "learning_rate": 0.002, "loss": 2.3662, "step": 65710 }, { "epoch": 0.2540551406349059, "grad_norm": 0.11054587364196777, "learning_rate": 0.002, "loss": 2.3376, "step": 65720 }, { "epoch": 0.25409379783828917, "grad_norm": 0.12429597973823547, "learning_rate": 0.002, "loss": 2.3608, "step": 65730 }, { "epoch": 0.2541324550416725, "grad_norm": 0.09979277104139328, "learning_rate": 0.002, "loss": 2.3505, "step": 65740 }, { "epoch": 0.25417111224505573, "grad_norm": 0.14310751855373383, "learning_rate": 0.002, "loss": 2.3508, "step": 65750 }, { "epoch": 0.25420976944843904, "grad_norm": 0.11847370117902756, "learning_rate": 0.002, "loss": 2.3491, "step": 65760 }, { "epoch": 0.2542484266518223, "grad_norm": 0.11517151445150375, "learning_rate": 0.002, "loss": 2.3659, "step": 65770 }, { "epoch": 0.2542870838552056, "grad_norm": 0.11966444551944733, "learning_rate": 0.002, "loss": 2.3604, "step": 65780 }, { "epoch": 0.25432574105858885, "grad_norm": 0.12495843321084976, "learning_rate": 0.002, "loss": 2.3553, "step": 65790 }, { "epoch": 0.25436439826197216, "grad_norm": 0.11208898574113846, "learning_rate": 0.002, "loss": 2.3599, "step": 65800 }, { "epoch": 0.2544030554653554, "grad_norm": 0.1017102375626564, "learning_rate": 0.002, "loss": 2.3556, "step": 65810 }, { "epoch": 0.2544417126687387, "grad_norm": 0.10564885288476944, "learning_rate": 0.002, "loss": 2.3389, "step": 65820 }, { "epoch": 0.25448036987212197, "grad_norm": 0.11047804355621338, "learning_rate": 0.002, "loss": 2.3715, "step": 65830 }, { "epoch": 0.2545190270755052, "grad_norm": 0.11530250310897827, "learning_rate": 0.002, "loss": 2.3645, "step": 65840 }, { "epoch": 0.25455768427888853, "grad_norm": 0.1151326596736908, "learning_rate": 0.002, "loss": 2.3639, "step": 65850 }, { "epoch": 0.2545963414822718, "grad_norm": 0.14109362661838531, "learning_rate": 0.002, "loss": 2.3648, "step": 65860 }, { "epoch": 0.2546349986856551, "grad_norm": 0.10470999777317047, "learning_rate": 0.002, "loss": 2.353, "step": 65870 }, { "epoch": 0.25467365588903834, "grad_norm": 0.12141033262014389, "learning_rate": 0.002, "loss": 2.3639, "step": 65880 }, { "epoch": 0.25471231309242165, "grad_norm": 0.1011107936501503, "learning_rate": 0.002, "loss": 2.3792, "step": 65890 }, { "epoch": 0.2547509702958049, "grad_norm": 0.11235532909631729, "learning_rate": 0.002, "loss": 2.3601, "step": 65900 }, { "epoch": 0.2547896274991882, "grad_norm": 0.10267923027276993, "learning_rate": 0.002, "loss": 2.3749, "step": 65910 }, { "epoch": 0.25482828470257146, "grad_norm": 0.10743974894285202, "learning_rate": 0.002, "loss": 2.3605, "step": 65920 }, { "epoch": 0.25486694190595477, "grad_norm": 0.11289151012897491, "learning_rate": 0.002, "loss": 2.3567, "step": 65930 }, { "epoch": 0.254905599109338, "grad_norm": 0.11451549828052521, "learning_rate": 0.002, "loss": 2.363, "step": 65940 }, { "epoch": 0.25494425631272133, "grad_norm": 0.10237868130207062, "learning_rate": 0.002, "loss": 2.3459, "step": 65950 }, { "epoch": 0.2549829135161046, "grad_norm": 0.11435071378946304, "learning_rate": 0.002, "loss": 2.3579, "step": 65960 }, { "epoch": 0.2550215707194879, "grad_norm": 0.12266162037849426, "learning_rate": 0.002, "loss": 2.352, "step": 65970 }, { "epoch": 0.25506022792287114, "grad_norm": 0.10688599199056625, "learning_rate": 0.002, "loss": 2.3541, "step": 65980 }, { "epoch": 0.25509888512625445, "grad_norm": 0.10259024798870087, "learning_rate": 0.002, "loss": 2.3671, "step": 65990 }, { "epoch": 0.2551375423296377, "grad_norm": 0.11779743432998657, "learning_rate": 0.002, "loss": 2.3572, "step": 66000 }, { "epoch": 0.255176199533021, "grad_norm": 0.12171625345945358, "learning_rate": 0.002, "loss": 2.3522, "step": 66010 }, { "epoch": 0.25521485673640426, "grad_norm": 0.11175300180912018, "learning_rate": 0.002, "loss": 2.3629, "step": 66020 }, { "epoch": 0.2552535139397875, "grad_norm": 0.12124433368444443, "learning_rate": 0.002, "loss": 2.3434, "step": 66030 }, { "epoch": 0.2552921711431708, "grad_norm": 0.11927662044763565, "learning_rate": 0.002, "loss": 2.3518, "step": 66040 }, { "epoch": 0.2553308283465541, "grad_norm": 0.11759792268276215, "learning_rate": 0.002, "loss": 2.3528, "step": 66050 }, { "epoch": 0.2553694855499374, "grad_norm": 0.09895379096269608, "learning_rate": 0.002, "loss": 2.3473, "step": 66060 }, { "epoch": 0.25540814275332063, "grad_norm": 0.1130814403295517, "learning_rate": 0.002, "loss": 2.3596, "step": 66070 }, { "epoch": 0.25544679995670394, "grad_norm": 0.10689357668161392, "learning_rate": 0.002, "loss": 2.345, "step": 66080 }, { "epoch": 0.2554854571600872, "grad_norm": 0.11816614866256714, "learning_rate": 0.002, "loss": 2.3531, "step": 66090 }, { "epoch": 0.2555241143634705, "grad_norm": 0.11300661414861679, "learning_rate": 0.002, "loss": 2.3462, "step": 66100 }, { "epoch": 0.25556277156685375, "grad_norm": 0.11420150101184845, "learning_rate": 0.002, "loss": 2.3581, "step": 66110 }, { "epoch": 0.25560142877023706, "grad_norm": 0.10286738723516464, "learning_rate": 0.002, "loss": 2.3471, "step": 66120 }, { "epoch": 0.2556400859736203, "grad_norm": 0.11612996459007263, "learning_rate": 0.002, "loss": 2.3439, "step": 66130 }, { "epoch": 0.2556787431770036, "grad_norm": 0.13170316815376282, "learning_rate": 0.002, "loss": 2.3451, "step": 66140 }, { "epoch": 0.2557174003803869, "grad_norm": 0.10963205248117447, "learning_rate": 0.002, "loss": 2.3461, "step": 66150 }, { "epoch": 0.2557560575837702, "grad_norm": 0.1187463030219078, "learning_rate": 0.002, "loss": 2.3471, "step": 66160 }, { "epoch": 0.25579471478715343, "grad_norm": 0.114934541285038, "learning_rate": 0.002, "loss": 2.3546, "step": 66170 }, { "epoch": 0.25583337199053674, "grad_norm": 0.10894732922315598, "learning_rate": 0.002, "loss": 2.3632, "step": 66180 }, { "epoch": 0.25587202919392, "grad_norm": 0.11884298920631409, "learning_rate": 0.002, "loss": 2.3536, "step": 66190 }, { "epoch": 0.2559106863973033, "grad_norm": 0.1013733372092247, "learning_rate": 0.002, "loss": 2.3454, "step": 66200 }, { "epoch": 0.25594934360068655, "grad_norm": 0.11026433855295181, "learning_rate": 0.002, "loss": 2.3577, "step": 66210 }, { "epoch": 0.2559880008040698, "grad_norm": 0.09665088355541229, "learning_rate": 0.002, "loss": 2.3622, "step": 66220 }, { "epoch": 0.2560266580074531, "grad_norm": 0.13314181566238403, "learning_rate": 0.002, "loss": 2.3464, "step": 66230 }, { "epoch": 0.25606531521083636, "grad_norm": 0.10690948367118835, "learning_rate": 0.002, "loss": 2.3489, "step": 66240 }, { "epoch": 0.25610397241421967, "grad_norm": 0.11940506845712662, "learning_rate": 0.002, "loss": 2.3632, "step": 66250 }, { "epoch": 0.2561426296176029, "grad_norm": 0.13842318952083588, "learning_rate": 0.002, "loss": 2.3714, "step": 66260 }, { "epoch": 0.25618128682098623, "grad_norm": 0.11469965428113937, "learning_rate": 0.002, "loss": 2.3336, "step": 66270 }, { "epoch": 0.2562199440243695, "grad_norm": 0.09851006418466568, "learning_rate": 0.002, "loss": 2.3636, "step": 66280 }, { "epoch": 0.2562586012277528, "grad_norm": 0.11406917124986649, "learning_rate": 0.002, "loss": 2.3511, "step": 66290 }, { "epoch": 0.25629725843113604, "grad_norm": 0.11309903860092163, "learning_rate": 0.002, "loss": 2.3631, "step": 66300 }, { "epoch": 0.25633591563451935, "grad_norm": 0.11170071363449097, "learning_rate": 0.002, "loss": 2.3813, "step": 66310 }, { "epoch": 0.2563745728379026, "grad_norm": 0.10675996541976929, "learning_rate": 0.002, "loss": 2.3623, "step": 66320 }, { "epoch": 0.2564132300412859, "grad_norm": 0.1084975078701973, "learning_rate": 0.002, "loss": 2.3499, "step": 66330 }, { "epoch": 0.25645188724466916, "grad_norm": 0.12220773845911026, "learning_rate": 0.002, "loss": 2.3781, "step": 66340 }, { "epoch": 0.25649054444805247, "grad_norm": 0.12375793606042862, "learning_rate": 0.002, "loss": 2.3465, "step": 66350 }, { "epoch": 0.2565292016514357, "grad_norm": 0.12166187912225723, "learning_rate": 0.002, "loss": 2.3485, "step": 66360 }, { "epoch": 0.25656785885481903, "grad_norm": 0.10858607292175293, "learning_rate": 0.002, "loss": 2.3506, "step": 66370 }, { "epoch": 0.2566065160582023, "grad_norm": 0.10314149409532547, "learning_rate": 0.002, "loss": 2.3552, "step": 66380 }, { "epoch": 0.2566451732615856, "grad_norm": 0.13672682642936707, "learning_rate": 0.002, "loss": 2.3514, "step": 66390 }, { "epoch": 0.25668383046496884, "grad_norm": 0.11390886455774307, "learning_rate": 0.002, "loss": 2.3482, "step": 66400 }, { "epoch": 0.2567224876683521, "grad_norm": 0.1029527485370636, "learning_rate": 0.002, "loss": 2.362, "step": 66410 }, { "epoch": 0.2567611448717354, "grad_norm": 0.1255478709936142, "learning_rate": 0.002, "loss": 2.3635, "step": 66420 }, { "epoch": 0.25679980207511865, "grad_norm": 0.10678042471408844, "learning_rate": 0.002, "loss": 2.3663, "step": 66430 }, { "epoch": 0.25683845927850196, "grad_norm": 0.11441392451524734, "learning_rate": 0.002, "loss": 2.3594, "step": 66440 }, { "epoch": 0.2568771164818852, "grad_norm": 0.10975232720375061, "learning_rate": 0.002, "loss": 2.3706, "step": 66450 }, { "epoch": 0.2569157736852685, "grad_norm": 0.12330281734466553, "learning_rate": 0.002, "loss": 2.3572, "step": 66460 }, { "epoch": 0.2569544308886518, "grad_norm": 0.1308504343032837, "learning_rate": 0.002, "loss": 2.3534, "step": 66470 }, { "epoch": 0.2569930880920351, "grad_norm": 0.11605305969715118, "learning_rate": 0.002, "loss": 2.3442, "step": 66480 }, { "epoch": 0.25703174529541833, "grad_norm": 0.10903593897819519, "learning_rate": 0.002, "loss": 2.3433, "step": 66490 }, { "epoch": 0.25707040249880164, "grad_norm": 0.12700045108795166, "learning_rate": 0.002, "loss": 2.3597, "step": 66500 }, { "epoch": 0.2571090597021849, "grad_norm": 0.09870582073926926, "learning_rate": 0.002, "loss": 2.3531, "step": 66510 }, { "epoch": 0.2571477169055682, "grad_norm": 0.09971556067466736, "learning_rate": 0.002, "loss": 2.3657, "step": 66520 }, { "epoch": 0.25718637410895145, "grad_norm": 0.10839555412530899, "learning_rate": 0.002, "loss": 2.3611, "step": 66530 }, { "epoch": 0.25722503131233476, "grad_norm": 0.11747601628303528, "learning_rate": 0.002, "loss": 2.3704, "step": 66540 }, { "epoch": 0.257263688515718, "grad_norm": 0.11553992331027985, "learning_rate": 0.002, "loss": 2.3618, "step": 66550 }, { "epoch": 0.2573023457191013, "grad_norm": 0.12473028898239136, "learning_rate": 0.002, "loss": 2.367, "step": 66560 }, { "epoch": 0.2573410029224846, "grad_norm": 0.10672403872013092, "learning_rate": 0.002, "loss": 2.364, "step": 66570 }, { "epoch": 0.2573796601258679, "grad_norm": 0.1254693865776062, "learning_rate": 0.002, "loss": 2.3577, "step": 66580 }, { "epoch": 0.25741831732925113, "grad_norm": 0.12008104473352432, "learning_rate": 0.002, "loss": 2.3596, "step": 66590 }, { "epoch": 0.2574569745326344, "grad_norm": 0.11767129600048065, "learning_rate": 0.002, "loss": 2.3547, "step": 66600 }, { "epoch": 0.2574956317360177, "grad_norm": 0.11705000698566437, "learning_rate": 0.002, "loss": 2.3489, "step": 66610 }, { "epoch": 0.25753428893940095, "grad_norm": 0.11520517617464066, "learning_rate": 0.002, "loss": 2.3509, "step": 66620 }, { "epoch": 0.25757294614278425, "grad_norm": 0.10603370517492294, "learning_rate": 0.002, "loss": 2.3572, "step": 66630 }, { "epoch": 0.2576116033461675, "grad_norm": 0.14179281890392303, "learning_rate": 0.002, "loss": 2.3703, "step": 66640 }, { "epoch": 0.2576502605495508, "grad_norm": 0.10854744166135788, "learning_rate": 0.002, "loss": 2.3547, "step": 66650 }, { "epoch": 0.25768891775293407, "grad_norm": 0.10364288836717606, "learning_rate": 0.002, "loss": 2.3662, "step": 66660 }, { "epoch": 0.2577275749563174, "grad_norm": 0.11076736450195312, "learning_rate": 0.002, "loss": 2.3433, "step": 66670 }, { "epoch": 0.2577662321597006, "grad_norm": 0.12479634582996368, "learning_rate": 0.002, "loss": 2.3433, "step": 66680 }, { "epoch": 0.25780488936308393, "grad_norm": 0.11340264976024628, "learning_rate": 0.002, "loss": 2.3591, "step": 66690 }, { "epoch": 0.2578435465664672, "grad_norm": 0.13055060803890228, "learning_rate": 0.002, "loss": 2.3518, "step": 66700 }, { "epoch": 0.2578822037698505, "grad_norm": 0.09846454113721848, "learning_rate": 0.002, "loss": 2.3482, "step": 66710 }, { "epoch": 0.25792086097323375, "grad_norm": 0.1017606109380722, "learning_rate": 0.002, "loss": 2.3529, "step": 66720 }, { "epoch": 0.25795951817661705, "grad_norm": 0.11539702862501144, "learning_rate": 0.002, "loss": 2.3615, "step": 66730 }, { "epoch": 0.2579981753800003, "grad_norm": 0.1274324655532837, "learning_rate": 0.002, "loss": 2.3561, "step": 66740 }, { "epoch": 0.2580368325833836, "grad_norm": 0.11746063828468323, "learning_rate": 0.002, "loss": 2.3302, "step": 66750 }, { "epoch": 0.25807548978676687, "grad_norm": 0.11583682149648666, "learning_rate": 0.002, "loss": 2.3486, "step": 66760 }, { "epoch": 0.2581141469901501, "grad_norm": 0.11930841952562332, "learning_rate": 0.002, "loss": 2.3447, "step": 66770 }, { "epoch": 0.2581528041935334, "grad_norm": 0.10679621249437332, "learning_rate": 0.002, "loss": 2.3702, "step": 66780 }, { "epoch": 0.2581914613969167, "grad_norm": 0.11766018718481064, "learning_rate": 0.002, "loss": 2.3521, "step": 66790 }, { "epoch": 0.2582301186003, "grad_norm": 0.1263052523136139, "learning_rate": 0.002, "loss": 2.3481, "step": 66800 }, { "epoch": 0.25826877580368324, "grad_norm": 0.10427229851484299, "learning_rate": 0.002, "loss": 2.3478, "step": 66810 }, { "epoch": 0.25830743300706654, "grad_norm": 0.11562903225421906, "learning_rate": 0.002, "loss": 2.3492, "step": 66820 }, { "epoch": 0.2583460902104498, "grad_norm": 0.11342374235391617, "learning_rate": 0.002, "loss": 2.3659, "step": 66830 }, { "epoch": 0.2583847474138331, "grad_norm": 0.11070325970649719, "learning_rate": 0.002, "loss": 2.3573, "step": 66840 }, { "epoch": 0.25842340461721636, "grad_norm": 0.1250879019498825, "learning_rate": 0.002, "loss": 2.3507, "step": 66850 }, { "epoch": 0.25846206182059966, "grad_norm": 0.10222569853067398, "learning_rate": 0.002, "loss": 2.3553, "step": 66860 }, { "epoch": 0.2585007190239829, "grad_norm": 0.11579307913780212, "learning_rate": 0.002, "loss": 2.3366, "step": 66870 }, { "epoch": 0.2585393762273662, "grad_norm": 0.12290211766958237, "learning_rate": 0.002, "loss": 2.3525, "step": 66880 }, { "epoch": 0.2585780334307495, "grad_norm": 0.10187076777219772, "learning_rate": 0.002, "loss": 2.3559, "step": 66890 }, { "epoch": 0.2586166906341328, "grad_norm": 0.10334745049476624, "learning_rate": 0.002, "loss": 2.3551, "step": 66900 }, { "epoch": 0.25865534783751604, "grad_norm": 0.1190209835767746, "learning_rate": 0.002, "loss": 2.3546, "step": 66910 }, { "epoch": 0.25869400504089934, "grad_norm": 0.36490097641944885, "learning_rate": 0.002, "loss": 2.3556, "step": 66920 }, { "epoch": 0.2587326622442826, "grad_norm": 0.13372598588466644, "learning_rate": 0.002, "loss": 2.347, "step": 66930 }, { "epoch": 0.2587713194476659, "grad_norm": 0.10478980094194412, "learning_rate": 0.002, "loss": 2.371, "step": 66940 }, { "epoch": 0.25880997665104916, "grad_norm": 0.10454052686691284, "learning_rate": 0.002, "loss": 2.3611, "step": 66950 }, { "epoch": 0.2588486338544324, "grad_norm": 0.12227415293455124, "learning_rate": 0.002, "loss": 2.3624, "step": 66960 }, { "epoch": 0.2588872910578157, "grad_norm": 0.1253330558538437, "learning_rate": 0.002, "loss": 2.3559, "step": 66970 }, { "epoch": 0.25892594826119897, "grad_norm": 0.10769782960414886, "learning_rate": 0.002, "loss": 2.35, "step": 66980 }, { "epoch": 0.2589646054645823, "grad_norm": 0.11193307489156723, "learning_rate": 0.002, "loss": 2.3437, "step": 66990 }, { "epoch": 0.25900326266796553, "grad_norm": 0.12134253233671188, "learning_rate": 0.002, "loss": 2.3575, "step": 67000 }, { "epoch": 0.25904191987134884, "grad_norm": 0.11123788356781006, "learning_rate": 0.002, "loss": 2.3521, "step": 67010 }, { "epoch": 0.2590805770747321, "grad_norm": 0.11672520637512207, "learning_rate": 0.002, "loss": 2.3487, "step": 67020 }, { "epoch": 0.2591192342781154, "grad_norm": 0.12144597619771957, "learning_rate": 0.002, "loss": 2.3618, "step": 67030 }, { "epoch": 0.25915789148149865, "grad_norm": 0.12573914229869843, "learning_rate": 0.002, "loss": 2.3681, "step": 67040 }, { "epoch": 0.25919654868488196, "grad_norm": 0.1065409928560257, "learning_rate": 0.002, "loss": 2.3424, "step": 67050 }, { "epoch": 0.2592352058882652, "grad_norm": 0.23819462954998016, "learning_rate": 0.002, "loss": 2.3374, "step": 67060 }, { "epoch": 0.2592738630916485, "grad_norm": 0.11490265280008316, "learning_rate": 0.002, "loss": 2.3501, "step": 67070 }, { "epoch": 0.25931252029503177, "grad_norm": 0.11505532264709473, "learning_rate": 0.002, "loss": 2.3621, "step": 67080 }, { "epoch": 0.2593511774984151, "grad_norm": 0.11441343277692795, "learning_rate": 0.002, "loss": 2.3543, "step": 67090 }, { "epoch": 0.2593898347017983, "grad_norm": 0.09996909648180008, "learning_rate": 0.002, "loss": 2.346, "step": 67100 }, { "epoch": 0.25942849190518164, "grad_norm": 0.1097947359085083, "learning_rate": 0.002, "loss": 2.3676, "step": 67110 }, { "epoch": 0.2594671491085649, "grad_norm": 0.12589269876480103, "learning_rate": 0.002, "loss": 2.3542, "step": 67120 }, { "epoch": 0.2595058063119482, "grad_norm": 0.09830565750598907, "learning_rate": 0.002, "loss": 2.3458, "step": 67130 }, { "epoch": 0.25954446351533145, "grad_norm": 0.10417470335960388, "learning_rate": 0.002, "loss": 2.3498, "step": 67140 }, { "epoch": 0.2595831207187147, "grad_norm": 0.11758775264024734, "learning_rate": 0.002, "loss": 2.3445, "step": 67150 }, { "epoch": 0.259621777922098, "grad_norm": 0.13093090057373047, "learning_rate": 0.002, "loss": 2.3498, "step": 67160 }, { "epoch": 0.25966043512548126, "grad_norm": 0.18829749524593353, "learning_rate": 0.002, "loss": 2.3675, "step": 67170 }, { "epoch": 0.25969909232886457, "grad_norm": 0.12005820125341415, "learning_rate": 0.002, "loss": 2.3572, "step": 67180 }, { "epoch": 0.2597377495322478, "grad_norm": 0.10506993532180786, "learning_rate": 0.002, "loss": 2.368, "step": 67190 }, { "epoch": 0.2597764067356311, "grad_norm": 0.09339757263660431, "learning_rate": 0.002, "loss": 2.3525, "step": 67200 }, { "epoch": 0.2598150639390144, "grad_norm": 0.11311523616313934, "learning_rate": 0.002, "loss": 2.3597, "step": 67210 }, { "epoch": 0.2598537211423977, "grad_norm": 0.13281090557575226, "learning_rate": 0.002, "loss": 2.349, "step": 67220 }, { "epoch": 0.25989237834578094, "grad_norm": 0.11049260199069977, "learning_rate": 0.002, "loss": 2.3398, "step": 67230 }, { "epoch": 0.25993103554916425, "grad_norm": 0.10846215486526489, "learning_rate": 0.002, "loss": 2.3431, "step": 67240 }, { "epoch": 0.2599696927525475, "grad_norm": 0.11961953341960907, "learning_rate": 0.002, "loss": 2.356, "step": 67250 }, { "epoch": 0.2600083499559308, "grad_norm": 0.14370691776275635, "learning_rate": 0.002, "loss": 2.3709, "step": 67260 }, { "epoch": 0.26004700715931406, "grad_norm": 0.11629011482000351, "learning_rate": 0.002, "loss": 2.3495, "step": 67270 }, { "epoch": 0.26008566436269737, "grad_norm": 0.09897922724485397, "learning_rate": 0.002, "loss": 2.3466, "step": 67280 }, { "epoch": 0.2601243215660806, "grad_norm": 0.13134950399398804, "learning_rate": 0.002, "loss": 2.362, "step": 67290 }, { "epoch": 0.2601629787694639, "grad_norm": 0.155740886926651, "learning_rate": 0.002, "loss": 2.3531, "step": 67300 }, { "epoch": 0.2602016359728472, "grad_norm": 0.11856327950954437, "learning_rate": 0.002, "loss": 2.3562, "step": 67310 }, { "epoch": 0.2602402931762305, "grad_norm": 0.10641349107027054, "learning_rate": 0.002, "loss": 2.356, "step": 67320 }, { "epoch": 0.26027895037961374, "grad_norm": 0.11414900422096252, "learning_rate": 0.002, "loss": 2.3588, "step": 67330 }, { "epoch": 0.260317607582997, "grad_norm": 0.1279212087392807, "learning_rate": 0.002, "loss": 2.3705, "step": 67340 }, { "epoch": 0.2603562647863803, "grad_norm": 0.09392930567264557, "learning_rate": 0.002, "loss": 2.3703, "step": 67350 }, { "epoch": 0.26039492198976355, "grad_norm": 0.1068810224533081, "learning_rate": 0.002, "loss": 2.3616, "step": 67360 }, { "epoch": 0.26043357919314686, "grad_norm": 0.10254529863595963, "learning_rate": 0.002, "loss": 2.3574, "step": 67370 }, { "epoch": 0.2604722363965301, "grad_norm": 0.10807019472122192, "learning_rate": 0.002, "loss": 2.351, "step": 67380 }, { "epoch": 0.2605108935999134, "grad_norm": 0.11821964383125305, "learning_rate": 0.002, "loss": 2.357, "step": 67390 }, { "epoch": 0.26054955080329667, "grad_norm": 0.11181916296482086, "learning_rate": 0.002, "loss": 2.3528, "step": 67400 }, { "epoch": 0.26058820800668, "grad_norm": 0.11617095023393631, "learning_rate": 0.002, "loss": 2.3582, "step": 67410 }, { "epoch": 0.26062686521006323, "grad_norm": 0.10184059292078018, "learning_rate": 0.002, "loss": 2.3537, "step": 67420 }, { "epoch": 0.26066552241344654, "grad_norm": 0.11712180823087692, "learning_rate": 0.002, "loss": 2.357, "step": 67430 }, { "epoch": 0.2607041796168298, "grad_norm": 0.10022317618131638, "learning_rate": 0.002, "loss": 2.3511, "step": 67440 }, { "epoch": 0.2607428368202131, "grad_norm": 0.10569388419389725, "learning_rate": 0.002, "loss": 2.3507, "step": 67450 }, { "epoch": 0.26078149402359635, "grad_norm": 0.10248992592096329, "learning_rate": 0.002, "loss": 2.3501, "step": 67460 }, { "epoch": 0.26082015122697966, "grad_norm": 0.1069984957575798, "learning_rate": 0.002, "loss": 2.3707, "step": 67470 }, { "epoch": 0.2608588084303629, "grad_norm": 0.1299649029970169, "learning_rate": 0.002, "loss": 2.3574, "step": 67480 }, { "epoch": 0.2608974656337462, "grad_norm": 0.37685492634773254, "learning_rate": 0.002, "loss": 2.386, "step": 67490 }, { "epoch": 0.26093612283712947, "grad_norm": 0.3009447157382965, "learning_rate": 0.002, "loss": 2.3708, "step": 67500 }, { "epoch": 0.2609747800405127, "grad_norm": 0.12166126817464828, "learning_rate": 0.002, "loss": 2.3536, "step": 67510 }, { "epoch": 0.26101343724389603, "grad_norm": 0.1006769984960556, "learning_rate": 0.002, "loss": 2.3713, "step": 67520 }, { "epoch": 0.2610520944472793, "grad_norm": 0.11278746277093887, "learning_rate": 0.002, "loss": 2.3659, "step": 67530 }, { "epoch": 0.2610907516506626, "grad_norm": 0.11016260087490082, "learning_rate": 0.002, "loss": 2.355, "step": 67540 }, { "epoch": 0.26112940885404584, "grad_norm": 0.1064244732260704, "learning_rate": 0.002, "loss": 2.3585, "step": 67550 }, { "epoch": 0.26116806605742915, "grad_norm": 0.11934314668178558, "learning_rate": 0.002, "loss": 2.3688, "step": 67560 }, { "epoch": 0.2612067232608124, "grad_norm": 0.11332755535840988, "learning_rate": 0.002, "loss": 2.3459, "step": 67570 }, { "epoch": 0.2612453804641957, "grad_norm": 0.12420445680618286, "learning_rate": 0.002, "loss": 2.3374, "step": 67580 }, { "epoch": 0.26128403766757896, "grad_norm": 0.11511637270450592, "learning_rate": 0.002, "loss": 2.3458, "step": 67590 }, { "epoch": 0.26132269487096227, "grad_norm": 0.11553613096475601, "learning_rate": 0.002, "loss": 2.3639, "step": 67600 }, { "epoch": 0.2613613520743455, "grad_norm": 0.1271434724330902, "learning_rate": 0.002, "loss": 2.3514, "step": 67610 }, { "epoch": 0.26140000927772883, "grad_norm": 0.11405181139707565, "learning_rate": 0.002, "loss": 2.3537, "step": 67620 }, { "epoch": 0.2614386664811121, "grad_norm": 0.11136946082115173, "learning_rate": 0.002, "loss": 2.3643, "step": 67630 }, { "epoch": 0.2614773236844954, "grad_norm": 0.1222151666879654, "learning_rate": 0.002, "loss": 2.3514, "step": 67640 }, { "epoch": 0.26151598088787864, "grad_norm": 0.11218507587909698, "learning_rate": 0.002, "loss": 2.3579, "step": 67650 }, { "epoch": 0.26155463809126195, "grad_norm": 0.10514702647924423, "learning_rate": 0.002, "loss": 2.3681, "step": 67660 }, { "epoch": 0.2615932952946452, "grad_norm": 0.11863667517900467, "learning_rate": 0.002, "loss": 2.3531, "step": 67670 }, { "epoch": 0.2616319524980285, "grad_norm": 0.10940532386302948, "learning_rate": 0.002, "loss": 2.3596, "step": 67680 }, { "epoch": 0.26167060970141176, "grad_norm": 0.11782362312078476, "learning_rate": 0.002, "loss": 2.35, "step": 67690 }, { "epoch": 0.261709266904795, "grad_norm": 0.11867765337228775, "learning_rate": 0.002, "loss": 2.3768, "step": 67700 }, { "epoch": 0.2617479241081783, "grad_norm": 0.10951172560453415, "learning_rate": 0.002, "loss": 2.3568, "step": 67710 }, { "epoch": 0.2617865813115616, "grad_norm": 0.11456424742937088, "learning_rate": 0.002, "loss": 2.3613, "step": 67720 }, { "epoch": 0.2618252385149449, "grad_norm": 0.11477063596248627, "learning_rate": 0.002, "loss": 2.3629, "step": 67730 }, { "epoch": 0.26186389571832813, "grad_norm": 0.11264248192310333, "learning_rate": 0.002, "loss": 2.3576, "step": 67740 }, { "epoch": 0.26190255292171144, "grad_norm": 0.10030915588140488, "learning_rate": 0.002, "loss": 2.3456, "step": 67750 }, { "epoch": 0.2619412101250947, "grad_norm": 0.10117532312870026, "learning_rate": 0.002, "loss": 2.3609, "step": 67760 }, { "epoch": 0.261979867328478, "grad_norm": 0.11972854286432266, "learning_rate": 0.002, "loss": 2.348, "step": 67770 }, { "epoch": 0.26201852453186125, "grad_norm": 0.11656410247087479, "learning_rate": 0.002, "loss": 2.3502, "step": 67780 }, { "epoch": 0.26205718173524456, "grad_norm": 0.12312465161085129, "learning_rate": 0.002, "loss": 2.3531, "step": 67790 }, { "epoch": 0.2620958389386278, "grad_norm": 0.11731352657079697, "learning_rate": 0.002, "loss": 2.3558, "step": 67800 }, { "epoch": 0.2621344961420111, "grad_norm": 0.1069423109292984, "learning_rate": 0.002, "loss": 2.3591, "step": 67810 }, { "epoch": 0.26217315334539437, "grad_norm": 0.09852375090122223, "learning_rate": 0.002, "loss": 2.3695, "step": 67820 }, { "epoch": 0.2622118105487777, "grad_norm": 0.1118987649679184, "learning_rate": 0.002, "loss": 2.3491, "step": 67830 }, { "epoch": 0.26225046775216093, "grad_norm": 0.12264939397573471, "learning_rate": 0.002, "loss": 2.361, "step": 67840 }, { "epoch": 0.26228912495554424, "grad_norm": 0.10814981162548065, "learning_rate": 0.002, "loss": 2.345, "step": 67850 }, { "epoch": 0.2623277821589275, "grad_norm": 0.10167591273784637, "learning_rate": 0.002, "loss": 2.3565, "step": 67860 }, { "epoch": 0.2623664393623108, "grad_norm": 0.10804091393947601, "learning_rate": 0.002, "loss": 2.3613, "step": 67870 }, { "epoch": 0.26240509656569405, "grad_norm": 0.14333422482013702, "learning_rate": 0.002, "loss": 2.3727, "step": 67880 }, { "epoch": 0.2624437537690773, "grad_norm": 0.13869328796863556, "learning_rate": 0.002, "loss": 2.3564, "step": 67890 }, { "epoch": 0.2624824109724606, "grad_norm": 0.10745614022016525, "learning_rate": 0.002, "loss": 2.3594, "step": 67900 }, { "epoch": 0.26252106817584386, "grad_norm": 0.10607896000146866, "learning_rate": 0.002, "loss": 2.3672, "step": 67910 }, { "epoch": 0.26255972537922717, "grad_norm": 0.10167323052883148, "learning_rate": 0.002, "loss": 2.3662, "step": 67920 }, { "epoch": 0.2625983825826104, "grad_norm": 0.09899301081895828, "learning_rate": 0.002, "loss": 2.3704, "step": 67930 }, { "epoch": 0.26263703978599373, "grad_norm": 0.11459238082170486, "learning_rate": 0.002, "loss": 2.3586, "step": 67940 }, { "epoch": 0.262675696989377, "grad_norm": 0.11651584506034851, "learning_rate": 0.002, "loss": 2.3587, "step": 67950 }, { "epoch": 0.2627143541927603, "grad_norm": 0.10737515985965729, "learning_rate": 0.002, "loss": 2.3571, "step": 67960 }, { "epoch": 0.26275301139614354, "grad_norm": 0.1321590393781662, "learning_rate": 0.002, "loss": 2.3513, "step": 67970 }, { "epoch": 0.26279166859952685, "grad_norm": 0.1289292573928833, "learning_rate": 0.002, "loss": 2.3537, "step": 67980 }, { "epoch": 0.2628303258029101, "grad_norm": 0.11426378786563873, "learning_rate": 0.002, "loss": 2.3566, "step": 67990 }, { "epoch": 0.2628689830062934, "grad_norm": 0.120346799492836, "learning_rate": 0.002, "loss": 2.3688, "step": 68000 }, { "epoch": 0.26290764020967666, "grad_norm": 0.12574881315231323, "learning_rate": 0.002, "loss": 2.3489, "step": 68010 }, { "epoch": 0.26294629741305997, "grad_norm": 0.09758058190345764, "learning_rate": 0.002, "loss": 2.3672, "step": 68020 }, { "epoch": 0.2629849546164432, "grad_norm": 0.10797390341758728, "learning_rate": 0.002, "loss": 2.3667, "step": 68030 }, { "epoch": 0.26302361181982653, "grad_norm": 0.09947437047958374, "learning_rate": 0.002, "loss": 2.3586, "step": 68040 }, { "epoch": 0.2630622690232098, "grad_norm": 0.10770823061466217, "learning_rate": 0.002, "loss": 2.3493, "step": 68050 }, { "epoch": 0.2631009262265931, "grad_norm": 0.10565075278282166, "learning_rate": 0.002, "loss": 2.3483, "step": 68060 }, { "epoch": 0.26313958342997634, "grad_norm": 0.11738776415586472, "learning_rate": 0.002, "loss": 2.3553, "step": 68070 }, { "epoch": 0.2631782406333596, "grad_norm": 0.13340911269187927, "learning_rate": 0.002, "loss": 2.3502, "step": 68080 }, { "epoch": 0.2632168978367429, "grad_norm": 0.10474201291799545, "learning_rate": 0.002, "loss": 2.3605, "step": 68090 }, { "epoch": 0.26325555504012615, "grad_norm": 0.14180724322795868, "learning_rate": 0.002, "loss": 2.352, "step": 68100 }, { "epoch": 0.26329421224350946, "grad_norm": 0.12632973492145538, "learning_rate": 0.002, "loss": 2.3603, "step": 68110 }, { "epoch": 0.2633328694468927, "grad_norm": 0.10560580343008041, "learning_rate": 0.002, "loss": 2.3565, "step": 68120 }, { "epoch": 0.263371526650276, "grad_norm": 0.09889001399278641, "learning_rate": 0.002, "loss": 2.3592, "step": 68130 }, { "epoch": 0.2634101838536593, "grad_norm": 0.15841642022132874, "learning_rate": 0.002, "loss": 2.3621, "step": 68140 }, { "epoch": 0.2634488410570426, "grad_norm": 0.09572744369506836, "learning_rate": 0.002, "loss": 2.3355, "step": 68150 }, { "epoch": 0.26348749826042583, "grad_norm": 0.10834426432847977, "learning_rate": 0.002, "loss": 2.3574, "step": 68160 }, { "epoch": 0.26352615546380914, "grad_norm": 0.11525410413742065, "learning_rate": 0.002, "loss": 2.345, "step": 68170 }, { "epoch": 0.2635648126671924, "grad_norm": 0.11463785916566849, "learning_rate": 0.002, "loss": 2.3694, "step": 68180 }, { "epoch": 0.2636034698705757, "grad_norm": 0.11806154251098633, "learning_rate": 0.002, "loss": 2.3637, "step": 68190 }, { "epoch": 0.26364212707395895, "grad_norm": 0.11061472445726395, "learning_rate": 0.002, "loss": 2.3573, "step": 68200 }, { "epoch": 0.26368078427734226, "grad_norm": 0.10829704999923706, "learning_rate": 0.002, "loss": 2.3406, "step": 68210 }, { "epoch": 0.2637194414807255, "grad_norm": 0.11386598646640778, "learning_rate": 0.002, "loss": 2.3628, "step": 68220 }, { "epoch": 0.2637580986841088, "grad_norm": 0.10931659489870071, "learning_rate": 0.002, "loss": 2.3472, "step": 68230 }, { "epoch": 0.2637967558874921, "grad_norm": 0.1091981828212738, "learning_rate": 0.002, "loss": 2.3594, "step": 68240 }, { "epoch": 0.2638354130908754, "grad_norm": 0.13077843189239502, "learning_rate": 0.002, "loss": 2.3493, "step": 68250 }, { "epoch": 0.26387407029425863, "grad_norm": 0.13880740106105804, "learning_rate": 0.002, "loss": 2.3417, "step": 68260 }, { "epoch": 0.2639127274976419, "grad_norm": 0.11458608508110046, "learning_rate": 0.002, "loss": 2.361, "step": 68270 }, { "epoch": 0.2639513847010252, "grad_norm": 0.14922989904880524, "learning_rate": 0.002, "loss": 2.3575, "step": 68280 }, { "epoch": 0.26399004190440845, "grad_norm": 0.11000477522611618, "learning_rate": 0.002, "loss": 2.3596, "step": 68290 }, { "epoch": 0.26402869910779175, "grad_norm": 0.09621061384677887, "learning_rate": 0.002, "loss": 2.3497, "step": 68300 }, { "epoch": 0.264067356311175, "grad_norm": 0.1066533625125885, "learning_rate": 0.002, "loss": 2.3644, "step": 68310 }, { "epoch": 0.2641060135145583, "grad_norm": 0.1137138158082962, "learning_rate": 0.002, "loss": 2.3519, "step": 68320 }, { "epoch": 0.26414467071794157, "grad_norm": 0.10997021198272705, "learning_rate": 0.002, "loss": 2.368, "step": 68330 }, { "epoch": 0.2641833279213249, "grad_norm": 0.10986141115427017, "learning_rate": 0.002, "loss": 2.3601, "step": 68340 }, { "epoch": 0.2642219851247081, "grad_norm": 0.12483604997396469, "learning_rate": 0.002, "loss": 2.3606, "step": 68350 }, { "epoch": 0.26426064232809143, "grad_norm": 0.10350396484136581, "learning_rate": 0.002, "loss": 2.3674, "step": 68360 }, { "epoch": 0.2642992995314747, "grad_norm": 0.09706956893205643, "learning_rate": 0.002, "loss": 2.3488, "step": 68370 }, { "epoch": 0.264337956734858, "grad_norm": 0.10872679203748703, "learning_rate": 0.002, "loss": 2.3548, "step": 68380 }, { "epoch": 0.26437661393824125, "grad_norm": 0.12254256755113602, "learning_rate": 0.002, "loss": 2.3505, "step": 68390 }, { "epoch": 0.26441527114162455, "grad_norm": 0.11372974514961243, "learning_rate": 0.002, "loss": 2.3525, "step": 68400 }, { "epoch": 0.2644539283450078, "grad_norm": 0.1338338702917099, "learning_rate": 0.002, "loss": 2.3663, "step": 68410 }, { "epoch": 0.2644925855483911, "grad_norm": 0.10851424187421799, "learning_rate": 0.002, "loss": 2.3555, "step": 68420 }, { "epoch": 0.26453124275177436, "grad_norm": 0.1281265914440155, "learning_rate": 0.002, "loss": 2.3456, "step": 68430 }, { "epoch": 0.2645698999551576, "grad_norm": 0.18946754932403564, "learning_rate": 0.002, "loss": 2.351, "step": 68440 }, { "epoch": 0.2646085571585409, "grad_norm": 0.11525660008192062, "learning_rate": 0.002, "loss": 2.3584, "step": 68450 }, { "epoch": 0.2646472143619242, "grad_norm": 0.10857021808624268, "learning_rate": 0.002, "loss": 2.3533, "step": 68460 }, { "epoch": 0.2646858715653075, "grad_norm": 0.11583767086267471, "learning_rate": 0.002, "loss": 2.3543, "step": 68470 }, { "epoch": 0.26472452876869074, "grad_norm": 0.11408522725105286, "learning_rate": 0.002, "loss": 2.3685, "step": 68480 }, { "epoch": 0.26476318597207404, "grad_norm": 0.10725697875022888, "learning_rate": 0.002, "loss": 2.3513, "step": 68490 }, { "epoch": 0.2648018431754573, "grad_norm": 0.12834322452545166, "learning_rate": 0.002, "loss": 2.3612, "step": 68500 }, { "epoch": 0.2648405003788406, "grad_norm": 0.11649401485919952, "learning_rate": 0.002, "loss": 2.3511, "step": 68510 }, { "epoch": 0.26487915758222386, "grad_norm": 0.12402457743883133, "learning_rate": 0.002, "loss": 2.3585, "step": 68520 }, { "epoch": 0.26491781478560716, "grad_norm": 0.11356259882450104, "learning_rate": 0.002, "loss": 2.3546, "step": 68530 }, { "epoch": 0.2649564719889904, "grad_norm": 0.09514134377241135, "learning_rate": 0.002, "loss": 2.3516, "step": 68540 }, { "epoch": 0.2649951291923737, "grad_norm": 0.12344872951507568, "learning_rate": 0.002, "loss": 2.3586, "step": 68550 }, { "epoch": 0.265033786395757, "grad_norm": 0.11852506548166275, "learning_rate": 0.002, "loss": 2.3607, "step": 68560 }, { "epoch": 0.2650724435991403, "grad_norm": 0.11700794845819473, "learning_rate": 0.002, "loss": 2.3684, "step": 68570 }, { "epoch": 0.26511110080252354, "grad_norm": 0.11226040124893188, "learning_rate": 0.002, "loss": 2.3623, "step": 68580 }, { "epoch": 0.26514975800590684, "grad_norm": 0.11499864608049393, "learning_rate": 0.002, "loss": 2.3597, "step": 68590 }, { "epoch": 0.2651884152092901, "grad_norm": 0.10765230655670166, "learning_rate": 0.002, "loss": 2.352, "step": 68600 }, { "epoch": 0.2652270724126734, "grad_norm": 0.10035645961761475, "learning_rate": 0.002, "loss": 2.3534, "step": 68610 }, { "epoch": 0.26526572961605666, "grad_norm": 0.1291760504245758, "learning_rate": 0.002, "loss": 2.3413, "step": 68620 }, { "epoch": 0.2653043868194399, "grad_norm": 0.13323894143104553, "learning_rate": 0.002, "loss": 2.3489, "step": 68630 }, { "epoch": 0.2653430440228232, "grad_norm": 0.10504741221666336, "learning_rate": 0.002, "loss": 2.3492, "step": 68640 }, { "epoch": 0.26538170122620647, "grad_norm": 0.12466032058000565, "learning_rate": 0.002, "loss": 2.3496, "step": 68650 }, { "epoch": 0.2654203584295898, "grad_norm": 0.10273690521717072, "learning_rate": 0.002, "loss": 2.3427, "step": 68660 }, { "epoch": 0.265459015632973, "grad_norm": 0.1336238533258438, "learning_rate": 0.002, "loss": 2.3579, "step": 68670 }, { "epoch": 0.26549767283635634, "grad_norm": 0.1103762686252594, "learning_rate": 0.002, "loss": 2.3454, "step": 68680 }, { "epoch": 0.2655363300397396, "grad_norm": 0.09904215484857559, "learning_rate": 0.002, "loss": 2.3759, "step": 68690 }, { "epoch": 0.2655749872431229, "grad_norm": 0.10359543561935425, "learning_rate": 0.002, "loss": 2.3737, "step": 68700 }, { "epoch": 0.26561364444650615, "grad_norm": 0.11706099659204483, "learning_rate": 0.002, "loss": 2.3525, "step": 68710 }, { "epoch": 0.26565230164988946, "grad_norm": 0.10935186594724655, "learning_rate": 0.002, "loss": 2.3448, "step": 68720 }, { "epoch": 0.2656909588532727, "grad_norm": 0.1055302545428276, "learning_rate": 0.002, "loss": 2.3428, "step": 68730 }, { "epoch": 0.265729616056656, "grad_norm": 0.10079481452703476, "learning_rate": 0.002, "loss": 2.3648, "step": 68740 }, { "epoch": 0.26576827326003927, "grad_norm": 0.12839989364147186, "learning_rate": 0.002, "loss": 2.3651, "step": 68750 }, { "epoch": 0.2658069304634226, "grad_norm": 0.10385528951883316, "learning_rate": 0.002, "loss": 2.355, "step": 68760 }, { "epoch": 0.2658455876668058, "grad_norm": 0.1055331826210022, "learning_rate": 0.002, "loss": 2.3462, "step": 68770 }, { "epoch": 0.26588424487018913, "grad_norm": 0.10499408841133118, "learning_rate": 0.002, "loss": 2.3392, "step": 68780 }, { "epoch": 0.2659229020735724, "grad_norm": 0.1188826709985733, "learning_rate": 0.002, "loss": 2.3577, "step": 68790 }, { "epoch": 0.2659615592769557, "grad_norm": 0.1307165026664734, "learning_rate": 0.002, "loss": 2.365, "step": 68800 }, { "epoch": 0.26600021648033895, "grad_norm": 0.10107318311929703, "learning_rate": 0.002, "loss": 2.3538, "step": 68810 }, { "epoch": 0.2660388736837222, "grad_norm": 0.1242791935801506, "learning_rate": 0.002, "loss": 2.3427, "step": 68820 }, { "epoch": 0.2660775308871055, "grad_norm": 0.11397421360015869, "learning_rate": 0.002, "loss": 2.3668, "step": 68830 }, { "epoch": 0.26611618809048876, "grad_norm": 0.1081373542547226, "learning_rate": 0.002, "loss": 2.3615, "step": 68840 }, { "epoch": 0.26615484529387207, "grad_norm": 0.11046163737773895, "learning_rate": 0.002, "loss": 2.3529, "step": 68850 }, { "epoch": 0.2661935024972553, "grad_norm": 0.11250414699316025, "learning_rate": 0.002, "loss": 2.3499, "step": 68860 }, { "epoch": 0.2662321597006386, "grad_norm": 0.11318423599004745, "learning_rate": 0.002, "loss": 2.3601, "step": 68870 }, { "epoch": 0.2662708169040219, "grad_norm": 0.11168055981397629, "learning_rate": 0.002, "loss": 2.353, "step": 68880 }, { "epoch": 0.2663094741074052, "grad_norm": 0.10483946651220322, "learning_rate": 0.002, "loss": 2.365, "step": 68890 }, { "epoch": 0.26634813131078844, "grad_norm": 0.11276476085186005, "learning_rate": 0.002, "loss": 2.3566, "step": 68900 }, { "epoch": 0.26638678851417175, "grad_norm": 0.12012232840061188, "learning_rate": 0.002, "loss": 2.3492, "step": 68910 }, { "epoch": 0.266425445717555, "grad_norm": 0.15014702081680298, "learning_rate": 0.002, "loss": 2.3539, "step": 68920 }, { "epoch": 0.2664641029209383, "grad_norm": 0.09493235498666763, "learning_rate": 0.002, "loss": 2.3457, "step": 68930 }, { "epoch": 0.26650276012432156, "grad_norm": 0.141010582447052, "learning_rate": 0.002, "loss": 2.359, "step": 68940 }, { "epoch": 0.26654141732770487, "grad_norm": 0.10899730026721954, "learning_rate": 0.002, "loss": 2.3659, "step": 68950 }, { "epoch": 0.2665800745310881, "grad_norm": 0.115720734000206, "learning_rate": 0.002, "loss": 2.3477, "step": 68960 }, { "epoch": 0.2666187317344714, "grad_norm": 0.10011117160320282, "learning_rate": 0.002, "loss": 2.3676, "step": 68970 }, { "epoch": 0.2666573889378547, "grad_norm": 0.0918012335896492, "learning_rate": 0.002, "loss": 2.3521, "step": 68980 }, { "epoch": 0.266696046141238, "grad_norm": 0.16054445505142212, "learning_rate": 0.002, "loss": 2.3478, "step": 68990 }, { "epoch": 0.26673470334462124, "grad_norm": 0.09328989684581757, "learning_rate": 0.002, "loss": 2.3655, "step": 69000 }, { "epoch": 0.2667733605480045, "grad_norm": 0.11078935861587524, "learning_rate": 0.002, "loss": 2.3574, "step": 69010 }, { "epoch": 0.2668120177513878, "grad_norm": 0.12276352941989899, "learning_rate": 0.002, "loss": 2.3558, "step": 69020 }, { "epoch": 0.26685067495477105, "grad_norm": 0.1035182997584343, "learning_rate": 0.002, "loss": 2.3531, "step": 69030 }, { "epoch": 0.26688933215815436, "grad_norm": 0.10880587249994278, "learning_rate": 0.002, "loss": 2.346, "step": 69040 }, { "epoch": 0.2669279893615376, "grad_norm": 0.1148485541343689, "learning_rate": 0.002, "loss": 2.3593, "step": 69050 }, { "epoch": 0.2669666465649209, "grad_norm": 0.11542642116546631, "learning_rate": 0.002, "loss": 2.3537, "step": 69060 }, { "epoch": 0.26700530376830417, "grad_norm": 0.09781506657600403, "learning_rate": 0.002, "loss": 2.3483, "step": 69070 }, { "epoch": 0.2670439609716875, "grad_norm": 0.11610165238380432, "learning_rate": 0.002, "loss": 2.3684, "step": 69080 }, { "epoch": 0.26708261817507073, "grad_norm": 0.11108443886041641, "learning_rate": 0.002, "loss": 2.3602, "step": 69090 }, { "epoch": 0.26712127537845404, "grad_norm": 0.09576795995235443, "learning_rate": 0.002, "loss": 2.3539, "step": 69100 }, { "epoch": 0.2671599325818373, "grad_norm": 0.12218323349952698, "learning_rate": 0.002, "loss": 2.3617, "step": 69110 }, { "epoch": 0.2671985897852206, "grad_norm": 0.12643787264823914, "learning_rate": 0.002, "loss": 2.3587, "step": 69120 }, { "epoch": 0.26723724698860385, "grad_norm": 0.11990267783403397, "learning_rate": 0.002, "loss": 2.3519, "step": 69130 }, { "epoch": 0.26727590419198716, "grad_norm": 0.10244489461183548, "learning_rate": 0.002, "loss": 2.3703, "step": 69140 }, { "epoch": 0.2673145613953704, "grad_norm": 0.10932030528783798, "learning_rate": 0.002, "loss": 2.3749, "step": 69150 }, { "epoch": 0.2673532185987537, "grad_norm": 0.1066504642367363, "learning_rate": 0.002, "loss": 2.3649, "step": 69160 }, { "epoch": 0.26739187580213697, "grad_norm": 0.10350240767002106, "learning_rate": 0.002, "loss": 2.3484, "step": 69170 }, { "epoch": 0.2674305330055202, "grad_norm": 0.10027860105037689, "learning_rate": 0.002, "loss": 2.3462, "step": 69180 }, { "epoch": 0.26746919020890353, "grad_norm": 0.10422100126743317, "learning_rate": 0.002, "loss": 2.3425, "step": 69190 }, { "epoch": 0.2675078474122868, "grad_norm": 0.11838383227586746, "learning_rate": 0.002, "loss": 2.344, "step": 69200 }, { "epoch": 0.2675465046156701, "grad_norm": 0.11060896515846252, "learning_rate": 0.002, "loss": 2.3661, "step": 69210 }, { "epoch": 0.26758516181905334, "grad_norm": 0.1625586599111557, "learning_rate": 0.002, "loss": 2.3386, "step": 69220 }, { "epoch": 0.26762381902243665, "grad_norm": 0.11933229863643646, "learning_rate": 0.002, "loss": 2.3449, "step": 69230 }, { "epoch": 0.2676624762258199, "grad_norm": 0.12870526313781738, "learning_rate": 0.002, "loss": 2.3562, "step": 69240 }, { "epoch": 0.2677011334292032, "grad_norm": 0.10569017380475998, "learning_rate": 0.002, "loss": 2.3469, "step": 69250 }, { "epoch": 0.26773979063258646, "grad_norm": 0.11131662875413895, "learning_rate": 0.002, "loss": 2.3641, "step": 69260 }, { "epoch": 0.26777844783596977, "grad_norm": 0.15190604329109192, "learning_rate": 0.002, "loss": 2.3733, "step": 69270 }, { "epoch": 0.267817105039353, "grad_norm": 0.10054759681224823, "learning_rate": 0.002, "loss": 2.3676, "step": 69280 }, { "epoch": 0.26785576224273633, "grad_norm": 0.10481575131416321, "learning_rate": 0.002, "loss": 2.3645, "step": 69290 }, { "epoch": 0.2678944194461196, "grad_norm": 0.11136908084154129, "learning_rate": 0.002, "loss": 2.364, "step": 69300 }, { "epoch": 0.2679330766495029, "grad_norm": 0.10886896401643753, "learning_rate": 0.002, "loss": 2.3585, "step": 69310 }, { "epoch": 0.26797173385288614, "grad_norm": 0.10115396231412888, "learning_rate": 0.002, "loss": 2.3558, "step": 69320 }, { "epoch": 0.26801039105626945, "grad_norm": 0.11328067630529404, "learning_rate": 0.002, "loss": 2.3485, "step": 69330 }, { "epoch": 0.2680490482596527, "grad_norm": 0.09565749019384384, "learning_rate": 0.002, "loss": 2.3647, "step": 69340 }, { "epoch": 0.268087705463036, "grad_norm": 0.15927979350090027, "learning_rate": 0.002, "loss": 2.3527, "step": 69350 }, { "epoch": 0.26812636266641926, "grad_norm": 0.11798939853906631, "learning_rate": 0.002, "loss": 2.3362, "step": 69360 }, { "epoch": 0.2681650198698025, "grad_norm": 0.1248922348022461, "learning_rate": 0.002, "loss": 2.347, "step": 69370 }, { "epoch": 0.2682036770731858, "grad_norm": 0.11001662909984589, "learning_rate": 0.002, "loss": 2.354, "step": 69380 }, { "epoch": 0.26824233427656907, "grad_norm": 0.10940846800804138, "learning_rate": 0.002, "loss": 2.3754, "step": 69390 }, { "epoch": 0.2682809914799524, "grad_norm": 0.10409963130950928, "learning_rate": 0.002, "loss": 2.3425, "step": 69400 }, { "epoch": 0.26831964868333563, "grad_norm": 0.10977187752723694, "learning_rate": 0.002, "loss": 2.3476, "step": 69410 }, { "epoch": 0.26835830588671894, "grad_norm": 0.10507378727197647, "learning_rate": 0.002, "loss": 2.3578, "step": 69420 }, { "epoch": 0.2683969630901022, "grad_norm": 0.12260796874761581, "learning_rate": 0.002, "loss": 2.3539, "step": 69430 }, { "epoch": 0.2684356202934855, "grad_norm": 0.13973881304264069, "learning_rate": 0.002, "loss": 2.3623, "step": 69440 }, { "epoch": 0.26847427749686875, "grad_norm": 0.12037132680416107, "learning_rate": 0.002, "loss": 2.367, "step": 69450 }, { "epoch": 0.26851293470025206, "grad_norm": 0.11014335602521896, "learning_rate": 0.002, "loss": 2.364, "step": 69460 }, { "epoch": 0.2685515919036353, "grad_norm": 0.10039269924163818, "learning_rate": 0.002, "loss": 2.3457, "step": 69470 }, { "epoch": 0.2685902491070186, "grad_norm": 0.11346255242824554, "learning_rate": 0.002, "loss": 2.357, "step": 69480 }, { "epoch": 0.26862890631040187, "grad_norm": 0.12792587280273438, "learning_rate": 0.002, "loss": 2.3501, "step": 69490 }, { "epoch": 0.2686675635137852, "grad_norm": 0.09618958830833435, "learning_rate": 0.002, "loss": 2.3663, "step": 69500 }, { "epoch": 0.26870622071716843, "grad_norm": 0.10815145820379257, "learning_rate": 0.002, "loss": 2.3658, "step": 69510 }, { "epoch": 0.26874487792055174, "grad_norm": 0.12378823012113571, "learning_rate": 0.002, "loss": 2.3527, "step": 69520 }, { "epoch": 0.268783535123935, "grad_norm": 0.11091689020395279, "learning_rate": 0.002, "loss": 2.3449, "step": 69530 }, { "epoch": 0.2688221923273183, "grad_norm": 0.1288810521364212, "learning_rate": 0.002, "loss": 2.346, "step": 69540 }, { "epoch": 0.26886084953070155, "grad_norm": 0.11948135495185852, "learning_rate": 0.002, "loss": 2.3418, "step": 69550 }, { "epoch": 0.2688995067340848, "grad_norm": 0.11650735884904861, "learning_rate": 0.002, "loss": 2.3544, "step": 69560 }, { "epoch": 0.2689381639374681, "grad_norm": 0.12489194422960281, "learning_rate": 0.002, "loss": 2.3503, "step": 69570 }, { "epoch": 0.26897682114085136, "grad_norm": 0.11284519731998444, "learning_rate": 0.002, "loss": 2.3584, "step": 69580 }, { "epoch": 0.26901547834423467, "grad_norm": 0.10499247908592224, "learning_rate": 0.002, "loss": 2.3326, "step": 69590 }, { "epoch": 0.2690541355476179, "grad_norm": 0.11322970688343048, "learning_rate": 0.002, "loss": 2.36, "step": 69600 }, { "epoch": 0.26909279275100123, "grad_norm": 0.10059107840061188, "learning_rate": 0.002, "loss": 2.3582, "step": 69610 }, { "epoch": 0.2691314499543845, "grad_norm": 0.11188896745443344, "learning_rate": 0.002, "loss": 2.3565, "step": 69620 }, { "epoch": 0.2691701071577678, "grad_norm": 0.10746940225362778, "learning_rate": 0.002, "loss": 2.3667, "step": 69630 }, { "epoch": 0.26920876436115104, "grad_norm": 0.10799665004014969, "learning_rate": 0.002, "loss": 2.3553, "step": 69640 }, { "epoch": 0.26924742156453435, "grad_norm": 0.10684413462877274, "learning_rate": 0.002, "loss": 2.3504, "step": 69650 }, { "epoch": 0.2692860787679176, "grad_norm": 0.11300649493932724, "learning_rate": 0.002, "loss": 2.3627, "step": 69660 }, { "epoch": 0.2693247359713009, "grad_norm": 0.11004617065191269, "learning_rate": 0.002, "loss": 2.3447, "step": 69670 }, { "epoch": 0.26936339317468416, "grad_norm": 0.09407416731119156, "learning_rate": 0.002, "loss": 2.3516, "step": 69680 }, { "epoch": 0.26940205037806747, "grad_norm": 0.14031341671943665, "learning_rate": 0.002, "loss": 2.3264, "step": 69690 }, { "epoch": 0.2694407075814507, "grad_norm": 0.11828654259443283, "learning_rate": 0.002, "loss": 2.3353, "step": 69700 }, { "epoch": 0.26947936478483403, "grad_norm": 0.11903201043605804, "learning_rate": 0.002, "loss": 2.3523, "step": 69710 }, { "epoch": 0.2695180219882173, "grad_norm": 0.1179431676864624, "learning_rate": 0.002, "loss": 2.3694, "step": 69720 }, { "epoch": 0.2695566791916006, "grad_norm": 0.1048092171549797, "learning_rate": 0.002, "loss": 2.3687, "step": 69730 }, { "epoch": 0.26959533639498384, "grad_norm": 0.1114017441868782, "learning_rate": 0.002, "loss": 2.3479, "step": 69740 }, { "epoch": 0.2696339935983671, "grad_norm": 0.10939405858516693, "learning_rate": 0.002, "loss": 2.3435, "step": 69750 }, { "epoch": 0.2696726508017504, "grad_norm": 0.11268754303455353, "learning_rate": 0.002, "loss": 2.33, "step": 69760 }, { "epoch": 0.26971130800513365, "grad_norm": 0.10703985393047333, "learning_rate": 0.002, "loss": 2.3401, "step": 69770 }, { "epoch": 0.26974996520851696, "grad_norm": 0.10865245014429092, "learning_rate": 0.002, "loss": 2.3522, "step": 69780 }, { "epoch": 0.2697886224119002, "grad_norm": 0.11256400495767593, "learning_rate": 0.002, "loss": 2.3508, "step": 69790 }, { "epoch": 0.2698272796152835, "grad_norm": 0.10484255105257034, "learning_rate": 0.002, "loss": 2.3473, "step": 69800 }, { "epoch": 0.2698659368186668, "grad_norm": 0.12422183156013489, "learning_rate": 0.002, "loss": 2.357, "step": 69810 }, { "epoch": 0.2699045940220501, "grad_norm": 0.1264592707157135, "learning_rate": 0.002, "loss": 2.3432, "step": 69820 }, { "epoch": 0.26994325122543333, "grad_norm": 0.1143057644367218, "learning_rate": 0.002, "loss": 2.3558, "step": 69830 }, { "epoch": 0.26998190842881664, "grad_norm": 0.10738317668437958, "learning_rate": 0.002, "loss": 2.3538, "step": 69840 }, { "epoch": 0.2700205656321999, "grad_norm": 0.11261006444692612, "learning_rate": 0.002, "loss": 2.3467, "step": 69850 }, { "epoch": 0.2700592228355832, "grad_norm": 0.1316865086555481, "learning_rate": 0.002, "loss": 2.3483, "step": 69860 }, { "epoch": 0.27009788003896645, "grad_norm": 0.10657945275306702, "learning_rate": 0.002, "loss": 2.3436, "step": 69870 }, { "epoch": 0.27013653724234976, "grad_norm": 0.10246672481298447, "learning_rate": 0.002, "loss": 2.3545, "step": 69880 }, { "epoch": 0.270175194445733, "grad_norm": 0.12110228091478348, "learning_rate": 0.002, "loss": 2.3584, "step": 69890 }, { "epoch": 0.2702138516491163, "grad_norm": 0.1109195128083229, "learning_rate": 0.002, "loss": 2.3612, "step": 69900 }, { "epoch": 0.2702525088524996, "grad_norm": 0.11075662076473236, "learning_rate": 0.002, "loss": 2.3474, "step": 69910 }, { "epoch": 0.2702911660558828, "grad_norm": 0.0981040745973587, "learning_rate": 0.002, "loss": 2.3564, "step": 69920 }, { "epoch": 0.27032982325926613, "grad_norm": 0.13120904564857483, "learning_rate": 0.002, "loss": 2.3564, "step": 69930 }, { "epoch": 0.2703684804626494, "grad_norm": 0.10848618298768997, "learning_rate": 0.002, "loss": 2.3635, "step": 69940 }, { "epoch": 0.2704071376660327, "grad_norm": 0.31828004121780396, "learning_rate": 0.002, "loss": 2.3622, "step": 69950 }, { "epoch": 0.27044579486941595, "grad_norm": 0.10648057609796524, "learning_rate": 0.002, "loss": 2.3495, "step": 69960 }, { "epoch": 0.27048445207279925, "grad_norm": 0.11374471336603165, "learning_rate": 0.002, "loss": 2.3652, "step": 69970 }, { "epoch": 0.2705231092761825, "grad_norm": 0.11823917180299759, "learning_rate": 0.002, "loss": 2.3482, "step": 69980 }, { "epoch": 0.2705617664795658, "grad_norm": 0.09965338557958603, "learning_rate": 0.002, "loss": 2.3446, "step": 69990 }, { "epoch": 0.27060042368294907, "grad_norm": 0.10684505105018616, "learning_rate": 0.002, "loss": 2.3448, "step": 70000 }, { "epoch": 0.2706390808863324, "grad_norm": 0.11145705729722977, "learning_rate": 0.002, "loss": 2.3452, "step": 70010 }, { "epoch": 0.2706777380897156, "grad_norm": 0.11647245287895203, "learning_rate": 0.002, "loss": 2.3293, "step": 70020 }, { "epoch": 0.27071639529309893, "grad_norm": 0.1162240207195282, "learning_rate": 0.002, "loss": 2.3514, "step": 70030 }, { "epoch": 0.2707550524964822, "grad_norm": 0.10605619847774506, "learning_rate": 0.002, "loss": 2.3688, "step": 70040 }, { "epoch": 0.2707937096998655, "grad_norm": 0.11280248314142227, "learning_rate": 0.002, "loss": 2.359, "step": 70050 }, { "epoch": 0.27083236690324874, "grad_norm": 0.10601935535669327, "learning_rate": 0.002, "loss": 2.3521, "step": 70060 }, { "epoch": 0.27087102410663205, "grad_norm": 0.11940313875675201, "learning_rate": 0.002, "loss": 2.3471, "step": 70070 }, { "epoch": 0.2709096813100153, "grad_norm": 0.10181540250778198, "learning_rate": 0.002, "loss": 2.3363, "step": 70080 }, { "epoch": 0.2709483385133986, "grad_norm": 0.10978764295578003, "learning_rate": 0.002, "loss": 2.3335, "step": 70090 }, { "epoch": 0.27098699571678186, "grad_norm": 0.11411808431148529, "learning_rate": 0.002, "loss": 2.3597, "step": 70100 }, { "epoch": 0.2710256529201651, "grad_norm": 0.09403868019580841, "learning_rate": 0.002, "loss": 2.3706, "step": 70110 }, { "epoch": 0.2710643101235484, "grad_norm": 0.11685250699520111, "learning_rate": 0.002, "loss": 2.3497, "step": 70120 }, { "epoch": 0.2711029673269317, "grad_norm": 0.10953215509653091, "learning_rate": 0.002, "loss": 2.339, "step": 70130 }, { "epoch": 0.271141624530315, "grad_norm": 0.1205730140209198, "learning_rate": 0.002, "loss": 2.3465, "step": 70140 }, { "epoch": 0.27118028173369824, "grad_norm": 0.12347997725009918, "learning_rate": 0.002, "loss": 2.3582, "step": 70150 }, { "epoch": 0.27121893893708154, "grad_norm": 0.1174677386879921, "learning_rate": 0.002, "loss": 2.3715, "step": 70160 }, { "epoch": 0.2712575961404648, "grad_norm": 0.11836795508861542, "learning_rate": 0.002, "loss": 2.3617, "step": 70170 }, { "epoch": 0.2712962533438481, "grad_norm": 0.10880803316831589, "learning_rate": 0.002, "loss": 2.3493, "step": 70180 }, { "epoch": 0.27133491054723136, "grad_norm": 0.11703497916460037, "learning_rate": 0.002, "loss": 2.3451, "step": 70190 }, { "epoch": 0.27137356775061466, "grad_norm": 0.1004214659333229, "learning_rate": 0.002, "loss": 2.3516, "step": 70200 }, { "epoch": 0.2714122249539979, "grad_norm": 0.1273142695426941, "learning_rate": 0.002, "loss": 2.3565, "step": 70210 }, { "epoch": 0.2714508821573812, "grad_norm": 0.11645844578742981, "learning_rate": 0.002, "loss": 2.344, "step": 70220 }, { "epoch": 0.2714895393607645, "grad_norm": 0.09346353262662888, "learning_rate": 0.002, "loss": 2.3403, "step": 70230 }, { "epoch": 0.2715281965641478, "grad_norm": 0.10376016795635223, "learning_rate": 0.002, "loss": 2.3574, "step": 70240 }, { "epoch": 0.27156685376753104, "grad_norm": 0.1030738353729248, "learning_rate": 0.002, "loss": 2.3642, "step": 70250 }, { "epoch": 0.27160551097091434, "grad_norm": 0.14229042828083038, "learning_rate": 0.002, "loss": 2.3479, "step": 70260 }, { "epoch": 0.2716441681742976, "grad_norm": 0.10577567666769028, "learning_rate": 0.002, "loss": 2.3506, "step": 70270 }, { "epoch": 0.2716828253776809, "grad_norm": 0.10997182875871658, "learning_rate": 0.002, "loss": 2.3472, "step": 70280 }, { "epoch": 0.27172148258106416, "grad_norm": 0.11268234252929688, "learning_rate": 0.002, "loss": 2.3616, "step": 70290 }, { "epoch": 0.2717601397844474, "grad_norm": 0.09050731360912323, "learning_rate": 0.002, "loss": 2.3512, "step": 70300 }, { "epoch": 0.2717987969878307, "grad_norm": 0.09953863173723221, "learning_rate": 0.002, "loss": 2.35, "step": 70310 }, { "epoch": 0.27183745419121397, "grad_norm": 0.10677150636911392, "learning_rate": 0.002, "loss": 2.3695, "step": 70320 }, { "epoch": 0.2718761113945973, "grad_norm": 0.0925314724445343, "learning_rate": 0.002, "loss": 2.3624, "step": 70330 }, { "epoch": 0.2719147685979805, "grad_norm": 0.11543618142604828, "learning_rate": 0.002, "loss": 2.3736, "step": 70340 }, { "epoch": 0.27195342580136384, "grad_norm": 0.10473021119832993, "learning_rate": 0.002, "loss": 2.3402, "step": 70350 }, { "epoch": 0.2719920830047471, "grad_norm": 0.11223046481609344, "learning_rate": 0.002, "loss": 2.3569, "step": 70360 }, { "epoch": 0.2720307402081304, "grad_norm": 0.10345500707626343, "learning_rate": 0.002, "loss": 2.3657, "step": 70370 }, { "epoch": 0.27206939741151365, "grad_norm": 0.13544058799743652, "learning_rate": 0.002, "loss": 2.3626, "step": 70380 }, { "epoch": 0.27210805461489695, "grad_norm": 0.11092148721218109, "learning_rate": 0.002, "loss": 2.3443, "step": 70390 }, { "epoch": 0.2721467118182802, "grad_norm": 0.10791967064142227, "learning_rate": 0.002, "loss": 2.3436, "step": 70400 }, { "epoch": 0.2721853690216635, "grad_norm": 0.10910028964281082, "learning_rate": 0.002, "loss": 2.3483, "step": 70410 }, { "epoch": 0.27222402622504677, "grad_norm": 0.11330778151750565, "learning_rate": 0.002, "loss": 2.3672, "step": 70420 }, { "epoch": 0.2722626834284301, "grad_norm": 0.10065264999866486, "learning_rate": 0.002, "loss": 2.3644, "step": 70430 }, { "epoch": 0.2723013406318133, "grad_norm": 0.10658468306064606, "learning_rate": 0.002, "loss": 2.3529, "step": 70440 }, { "epoch": 0.27233999783519663, "grad_norm": 0.10998087376356125, "learning_rate": 0.002, "loss": 2.3513, "step": 70450 }, { "epoch": 0.2723786550385799, "grad_norm": 0.1141766682267189, "learning_rate": 0.002, "loss": 2.3675, "step": 70460 }, { "epoch": 0.2724173122419632, "grad_norm": 0.11142230778932571, "learning_rate": 0.002, "loss": 2.3573, "step": 70470 }, { "epoch": 0.27245596944534645, "grad_norm": 0.12735013663768768, "learning_rate": 0.002, "loss": 2.3614, "step": 70480 }, { "epoch": 0.2724946266487297, "grad_norm": 0.1139666959643364, "learning_rate": 0.002, "loss": 2.3656, "step": 70490 }, { "epoch": 0.272533283852113, "grad_norm": 0.11391907185316086, "learning_rate": 0.002, "loss": 2.3551, "step": 70500 }, { "epoch": 0.27257194105549626, "grad_norm": 0.10967147350311279, "learning_rate": 0.002, "loss": 2.3631, "step": 70510 }, { "epoch": 0.27261059825887957, "grad_norm": 0.12889058887958527, "learning_rate": 0.002, "loss": 2.354, "step": 70520 }, { "epoch": 0.2726492554622628, "grad_norm": 0.11671818792819977, "learning_rate": 0.002, "loss": 2.3558, "step": 70530 }, { "epoch": 0.2726879126656461, "grad_norm": 0.13470597565174103, "learning_rate": 0.002, "loss": 2.371, "step": 70540 }, { "epoch": 0.2727265698690294, "grad_norm": 0.1302533745765686, "learning_rate": 0.002, "loss": 2.3493, "step": 70550 }, { "epoch": 0.2727652270724127, "grad_norm": 0.10315962880849838, "learning_rate": 0.002, "loss": 2.3492, "step": 70560 }, { "epoch": 0.27280388427579594, "grad_norm": 0.113266222178936, "learning_rate": 0.002, "loss": 2.3549, "step": 70570 }, { "epoch": 0.27284254147917925, "grad_norm": 0.11444780230522156, "learning_rate": 0.002, "loss": 2.3478, "step": 70580 }, { "epoch": 0.2728811986825625, "grad_norm": 0.12794393301010132, "learning_rate": 0.002, "loss": 2.3539, "step": 70590 }, { "epoch": 0.2729198558859458, "grad_norm": 0.08904918283224106, "learning_rate": 0.002, "loss": 2.3491, "step": 70600 }, { "epoch": 0.27295851308932906, "grad_norm": 0.10772236436605453, "learning_rate": 0.002, "loss": 2.3525, "step": 70610 }, { "epoch": 0.27299717029271237, "grad_norm": 0.0939546599984169, "learning_rate": 0.002, "loss": 2.355, "step": 70620 }, { "epoch": 0.2730358274960956, "grad_norm": 0.10614132881164551, "learning_rate": 0.002, "loss": 2.3644, "step": 70630 }, { "epoch": 0.2730744846994789, "grad_norm": 0.1227312833070755, "learning_rate": 0.002, "loss": 2.3745, "step": 70640 }, { "epoch": 0.2731131419028622, "grad_norm": 0.10652358829975128, "learning_rate": 0.002, "loss": 2.3644, "step": 70650 }, { "epoch": 0.2731517991062455, "grad_norm": 0.10573123395442963, "learning_rate": 0.002, "loss": 2.3402, "step": 70660 }, { "epoch": 0.27319045630962874, "grad_norm": 0.14353644847869873, "learning_rate": 0.002, "loss": 2.3415, "step": 70670 }, { "epoch": 0.273229113513012, "grad_norm": 0.10298144817352295, "learning_rate": 0.002, "loss": 2.3435, "step": 70680 }, { "epoch": 0.2732677707163953, "grad_norm": 0.10104386508464813, "learning_rate": 0.002, "loss": 2.3511, "step": 70690 }, { "epoch": 0.27330642791977855, "grad_norm": 0.12633801996707916, "learning_rate": 0.002, "loss": 2.3529, "step": 70700 }, { "epoch": 0.27334508512316186, "grad_norm": 0.1258019357919693, "learning_rate": 0.002, "loss": 2.3638, "step": 70710 }, { "epoch": 0.2733837423265451, "grad_norm": 0.11459273844957352, "learning_rate": 0.002, "loss": 2.3524, "step": 70720 }, { "epoch": 0.2734223995299284, "grad_norm": 0.12588898837566376, "learning_rate": 0.002, "loss": 2.3538, "step": 70730 }, { "epoch": 0.27346105673331167, "grad_norm": 0.10593008249998093, "learning_rate": 0.002, "loss": 2.3458, "step": 70740 }, { "epoch": 0.273499713936695, "grad_norm": 0.11217566579580307, "learning_rate": 0.002, "loss": 2.3529, "step": 70750 }, { "epoch": 0.27353837114007823, "grad_norm": 0.11181171983480453, "learning_rate": 0.002, "loss": 2.3611, "step": 70760 }, { "epoch": 0.27357702834346154, "grad_norm": 0.1203831285238266, "learning_rate": 0.002, "loss": 2.377, "step": 70770 }, { "epoch": 0.2736156855468448, "grad_norm": 0.10951201617717743, "learning_rate": 0.002, "loss": 2.3657, "step": 70780 }, { "epoch": 0.2736543427502281, "grad_norm": 0.12592849135398865, "learning_rate": 0.002, "loss": 2.3725, "step": 70790 }, { "epoch": 0.27369299995361135, "grad_norm": 0.10407140851020813, "learning_rate": 0.002, "loss": 2.3659, "step": 70800 }, { "epoch": 0.27373165715699466, "grad_norm": 0.1266588568687439, "learning_rate": 0.002, "loss": 2.3601, "step": 70810 }, { "epoch": 0.2737703143603779, "grad_norm": 0.110927052795887, "learning_rate": 0.002, "loss": 2.3626, "step": 70820 }, { "epoch": 0.2738089715637612, "grad_norm": 0.12094797939062119, "learning_rate": 0.002, "loss": 2.3502, "step": 70830 }, { "epoch": 0.27384762876714447, "grad_norm": 0.11010003089904785, "learning_rate": 0.002, "loss": 2.3451, "step": 70840 }, { "epoch": 0.2738862859705277, "grad_norm": 0.10637608915567398, "learning_rate": 0.002, "loss": 2.3517, "step": 70850 }, { "epoch": 0.27392494317391103, "grad_norm": 0.10273260623216629, "learning_rate": 0.002, "loss": 2.3601, "step": 70860 }, { "epoch": 0.2739636003772943, "grad_norm": 0.10922182351350784, "learning_rate": 0.002, "loss": 2.3477, "step": 70870 }, { "epoch": 0.2740022575806776, "grad_norm": 0.12877719104290009, "learning_rate": 0.002, "loss": 2.3537, "step": 70880 }, { "epoch": 0.27404091478406084, "grad_norm": 0.11414679139852524, "learning_rate": 0.002, "loss": 2.3559, "step": 70890 }, { "epoch": 0.27407957198744415, "grad_norm": 0.12255305796861649, "learning_rate": 0.002, "loss": 2.3543, "step": 70900 }, { "epoch": 0.2741182291908274, "grad_norm": 0.10669559985399246, "learning_rate": 0.002, "loss": 2.355, "step": 70910 }, { "epoch": 0.2741568863942107, "grad_norm": 0.09727916121482849, "learning_rate": 0.002, "loss": 2.356, "step": 70920 }, { "epoch": 0.27419554359759396, "grad_norm": 0.11017953604459763, "learning_rate": 0.002, "loss": 2.3654, "step": 70930 }, { "epoch": 0.27423420080097727, "grad_norm": 0.1090618297457695, "learning_rate": 0.002, "loss": 2.3603, "step": 70940 }, { "epoch": 0.2742728580043605, "grad_norm": 0.11688750237226486, "learning_rate": 0.002, "loss": 2.358, "step": 70950 }, { "epoch": 0.27431151520774383, "grad_norm": 0.11693534255027771, "learning_rate": 0.002, "loss": 2.3665, "step": 70960 }, { "epoch": 0.2743501724111271, "grad_norm": 0.10778669267892838, "learning_rate": 0.002, "loss": 2.3491, "step": 70970 }, { "epoch": 0.2743888296145104, "grad_norm": 0.11466331034898758, "learning_rate": 0.002, "loss": 2.3501, "step": 70980 }, { "epoch": 0.27442748681789364, "grad_norm": 0.12696002423763275, "learning_rate": 0.002, "loss": 2.3566, "step": 70990 }, { "epoch": 0.27446614402127695, "grad_norm": 0.09766501933336258, "learning_rate": 0.002, "loss": 2.3653, "step": 71000 }, { "epoch": 0.2745048012246602, "grad_norm": 0.11292238533496857, "learning_rate": 0.002, "loss": 2.3491, "step": 71010 }, { "epoch": 0.2745434584280435, "grad_norm": 0.11505763232707977, "learning_rate": 0.002, "loss": 2.3623, "step": 71020 }, { "epoch": 0.27458211563142676, "grad_norm": 0.13340947031974792, "learning_rate": 0.002, "loss": 2.3536, "step": 71030 }, { "epoch": 0.27462077283481, "grad_norm": 0.11638954281806946, "learning_rate": 0.002, "loss": 2.361, "step": 71040 }, { "epoch": 0.2746594300381933, "grad_norm": 0.09738632291555405, "learning_rate": 0.002, "loss": 2.3516, "step": 71050 }, { "epoch": 0.27469808724157657, "grad_norm": 0.1314162015914917, "learning_rate": 0.002, "loss": 2.3535, "step": 71060 }, { "epoch": 0.2747367444449599, "grad_norm": 0.10656953603029251, "learning_rate": 0.002, "loss": 2.3552, "step": 71070 }, { "epoch": 0.27477540164834313, "grad_norm": 0.22209644317626953, "learning_rate": 0.002, "loss": 2.3441, "step": 71080 }, { "epoch": 0.27481405885172644, "grad_norm": 0.11620117723941803, "learning_rate": 0.002, "loss": 2.3671, "step": 71090 }, { "epoch": 0.2748527160551097, "grad_norm": 0.0935266986489296, "learning_rate": 0.002, "loss": 2.357, "step": 71100 }, { "epoch": 0.274891373258493, "grad_norm": 0.10850471258163452, "learning_rate": 0.002, "loss": 2.3467, "step": 71110 }, { "epoch": 0.27493003046187625, "grad_norm": 0.10303536802530289, "learning_rate": 0.002, "loss": 2.357, "step": 71120 }, { "epoch": 0.27496868766525956, "grad_norm": 0.12467154115438461, "learning_rate": 0.002, "loss": 2.3499, "step": 71130 }, { "epoch": 0.2750073448686428, "grad_norm": 0.10738974064588547, "learning_rate": 0.002, "loss": 2.346, "step": 71140 }, { "epoch": 0.2750460020720261, "grad_norm": 0.3521445691585541, "learning_rate": 0.002, "loss": 2.3452, "step": 71150 }, { "epoch": 0.27508465927540937, "grad_norm": 0.09956452250480652, "learning_rate": 0.002, "loss": 2.3707, "step": 71160 }, { "epoch": 0.2751233164787927, "grad_norm": 0.12146948277950287, "learning_rate": 0.002, "loss": 2.3672, "step": 71170 }, { "epoch": 0.27516197368217593, "grad_norm": 0.10550107061862946, "learning_rate": 0.002, "loss": 2.3684, "step": 71180 }, { "epoch": 0.27520063088555924, "grad_norm": 0.09655023366212845, "learning_rate": 0.002, "loss": 2.3526, "step": 71190 }, { "epoch": 0.2752392880889425, "grad_norm": 0.11164681613445282, "learning_rate": 0.002, "loss": 2.3476, "step": 71200 }, { "epoch": 0.2752779452923258, "grad_norm": 0.10607494413852692, "learning_rate": 0.002, "loss": 2.3368, "step": 71210 }, { "epoch": 0.27531660249570905, "grad_norm": 0.1139015182852745, "learning_rate": 0.002, "loss": 2.3395, "step": 71220 }, { "epoch": 0.2753552596990923, "grad_norm": 0.10601265728473663, "learning_rate": 0.002, "loss": 2.3487, "step": 71230 }, { "epoch": 0.2753939169024756, "grad_norm": 0.1114029809832573, "learning_rate": 0.002, "loss": 2.3356, "step": 71240 }, { "epoch": 0.27543257410585886, "grad_norm": 0.12024658173322678, "learning_rate": 0.002, "loss": 2.3453, "step": 71250 }, { "epoch": 0.27547123130924217, "grad_norm": 0.11128765344619751, "learning_rate": 0.002, "loss": 2.3507, "step": 71260 }, { "epoch": 0.2755098885126254, "grad_norm": 0.10613062232732773, "learning_rate": 0.002, "loss": 2.3513, "step": 71270 }, { "epoch": 0.27554854571600873, "grad_norm": 0.09886675328016281, "learning_rate": 0.002, "loss": 2.3473, "step": 71280 }, { "epoch": 0.275587202919392, "grad_norm": 0.10275650769472122, "learning_rate": 0.002, "loss": 2.3536, "step": 71290 }, { "epoch": 0.2756258601227753, "grad_norm": 0.12820738554000854, "learning_rate": 0.002, "loss": 2.3596, "step": 71300 }, { "epoch": 0.27566451732615854, "grad_norm": 0.10675428062677383, "learning_rate": 0.002, "loss": 2.357, "step": 71310 }, { "epoch": 0.27570317452954185, "grad_norm": 0.10081598907709122, "learning_rate": 0.002, "loss": 2.3439, "step": 71320 }, { "epoch": 0.2757418317329251, "grad_norm": 0.1125272884964943, "learning_rate": 0.002, "loss": 2.3711, "step": 71330 }, { "epoch": 0.2757804889363084, "grad_norm": 0.11027172952890396, "learning_rate": 0.002, "loss": 2.359, "step": 71340 }, { "epoch": 0.27581914613969166, "grad_norm": 0.10339958220720291, "learning_rate": 0.002, "loss": 2.3569, "step": 71350 }, { "epoch": 0.27585780334307497, "grad_norm": 0.08919887244701385, "learning_rate": 0.002, "loss": 2.3497, "step": 71360 }, { "epoch": 0.2758964605464582, "grad_norm": 0.12407740205526352, "learning_rate": 0.002, "loss": 2.3583, "step": 71370 }, { "epoch": 0.27593511774984153, "grad_norm": 0.11857614666223526, "learning_rate": 0.002, "loss": 2.3584, "step": 71380 }, { "epoch": 0.2759737749532248, "grad_norm": 0.1211429014801979, "learning_rate": 0.002, "loss": 2.3664, "step": 71390 }, { "epoch": 0.2760124321566081, "grad_norm": 0.11378125846385956, "learning_rate": 0.002, "loss": 2.3611, "step": 71400 }, { "epoch": 0.27605108935999134, "grad_norm": 0.11991725862026215, "learning_rate": 0.002, "loss": 2.3445, "step": 71410 }, { "epoch": 0.2760897465633746, "grad_norm": 0.12218762189149857, "learning_rate": 0.002, "loss": 2.3669, "step": 71420 }, { "epoch": 0.2761284037667579, "grad_norm": 0.10152004659175873, "learning_rate": 0.002, "loss": 2.3766, "step": 71430 }, { "epoch": 0.27616706097014115, "grad_norm": 0.10593225806951523, "learning_rate": 0.002, "loss": 2.3638, "step": 71440 }, { "epoch": 0.27620571817352446, "grad_norm": 0.10519793629646301, "learning_rate": 0.002, "loss": 2.3759, "step": 71450 }, { "epoch": 0.2762443753769077, "grad_norm": 0.0969279333949089, "learning_rate": 0.002, "loss": 2.3553, "step": 71460 }, { "epoch": 0.276283032580291, "grad_norm": 0.09336300939321518, "learning_rate": 0.002, "loss": 2.3703, "step": 71470 }, { "epoch": 0.2763216897836743, "grad_norm": 0.10432116687297821, "learning_rate": 0.002, "loss": 2.358, "step": 71480 }, { "epoch": 0.2763603469870576, "grad_norm": 0.11239740252494812, "learning_rate": 0.002, "loss": 2.3554, "step": 71490 }, { "epoch": 0.27639900419044083, "grad_norm": 0.10245117545127869, "learning_rate": 0.002, "loss": 2.3467, "step": 71500 }, { "epoch": 0.27643766139382414, "grad_norm": 0.11861246824264526, "learning_rate": 0.002, "loss": 2.349, "step": 71510 }, { "epoch": 0.2764763185972074, "grad_norm": 0.11511997878551483, "learning_rate": 0.002, "loss": 2.3462, "step": 71520 }, { "epoch": 0.2765149758005907, "grad_norm": 0.10100384801626205, "learning_rate": 0.002, "loss": 2.3571, "step": 71530 }, { "epoch": 0.27655363300397395, "grad_norm": 0.126571923494339, "learning_rate": 0.002, "loss": 2.3529, "step": 71540 }, { "epoch": 0.27659229020735726, "grad_norm": 0.1063634380698204, "learning_rate": 0.002, "loss": 2.3489, "step": 71550 }, { "epoch": 0.2766309474107405, "grad_norm": 0.11067322641611099, "learning_rate": 0.002, "loss": 2.3611, "step": 71560 }, { "epoch": 0.2766696046141238, "grad_norm": 0.12833568453788757, "learning_rate": 0.002, "loss": 2.3605, "step": 71570 }, { "epoch": 0.2767082618175071, "grad_norm": 0.10962745547294617, "learning_rate": 0.002, "loss": 2.3601, "step": 71580 }, { "epoch": 0.2767469190208903, "grad_norm": 0.11526691168546677, "learning_rate": 0.002, "loss": 2.3576, "step": 71590 }, { "epoch": 0.27678557622427363, "grad_norm": 0.10324928164482117, "learning_rate": 0.002, "loss": 2.3606, "step": 71600 }, { "epoch": 0.2768242334276569, "grad_norm": 0.11164212971925735, "learning_rate": 0.002, "loss": 2.3564, "step": 71610 }, { "epoch": 0.2768628906310402, "grad_norm": 0.12060465663671494, "learning_rate": 0.002, "loss": 2.3404, "step": 71620 }, { "epoch": 0.27690154783442344, "grad_norm": 0.10645802319049835, "learning_rate": 0.002, "loss": 2.3538, "step": 71630 }, { "epoch": 0.27694020503780675, "grad_norm": 0.11743126064538956, "learning_rate": 0.002, "loss": 2.3612, "step": 71640 }, { "epoch": 0.27697886224119, "grad_norm": 0.1196308434009552, "learning_rate": 0.002, "loss": 2.3533, "step": 71650 }, { "epoch": 0.2770175194445733, "grad_norm": 0.12106142938137054, "learning_rate": 0.002, "loss": 2.3653, "step": 71660 }, { "epoch": 0.27705617664795656, "grad_norm": 0.10936318337917328, "learning_rate": 0.002, "loss": 2.3551, "step": 71670 }, { "epoch": 0.2770948338513399, "grad_norm": 0.11110043525695801, "learning_rate": 0.002, "loss": 2.355, "step": 71680 }, { "epoch": 0.2771334910547231, "grad_norm": 0.10624603182077408, "learning_rate": 0.002, "loss": 2.3523, "step": 71690 }, { "epoch": 0.27717214825810643, "grad_norm": 0.09473574906587601, "learning_rate": 0.002, "loss": 2.3475, "step": 71700 }, { "epoch": 0.2772108054614897, "grad_norm": 0.14341023564338684, "learning_rate": 0.002, "loss": 2.3438, "step": 71710 }, { "epoch": 0.277249462664873, "grad_norm": 0.11800472438335419, "learning_rate": 0.002, "loss": 2.3514, "step": 71720 }, { "epoch": 0.27728811986825624, "grad_norm": 0.11726002395153046, "learning_rate": 0.002, "loss": 2.3617, "step": 71730 }, { "epoch": 0.27732677707163955, "grad_norm": 0.09608127921819687, "learning_rate": 0.002, "loss": 2.3593, "step": 71740 }, { "epoch": 0.2773654342750228, "grad_norm": 0.11779412627220154, "learning_rate": 0.002, "loss": 2.357, "step": 71750 }, { "epoch": 0.2774040914784061, "grad_norm": 0.12998349964618683, "learning_rate": 0.002, "loss": 2.3565, "step": 71760 }, { "epoch": 0.27744274868178936, "grad_norm": 0.1138630211353302, "learning_rate": 0.002, "loss": 2.3588, "step": 71770 }, { "epoch": 0.2774814058851726, "grad_norm": 0.11318184435367584, "learning_rate": 0.002, "loss": 2.3474, "step": 71780 }, { "epoch": 0.2775200630885559, "grad_norm": 0.11765322089195251, "learning_rate": 0.002, "loss": 2.3459, "step": 71790 }, { "epoch": 0.2775587202919392, "grad_norm": 0.09930826723575592, "learning_rate": 0.002, "loss": 2.3698, "step": 71800 }, { "epoch": 0.2775973774953225, "grad_norm": 0.10807865113019943, "learning_rate": 0.002, "loss": 2.347, "step": 71810 }, { "epoch": 0.27763603469870574, "grad_norm": 0.09903464466333389, "learning_rate": 0.002, "loss": 2.3672, "step": 71820 }, { "epoch": 0.27767469190208904, "grad_norm": 0.11516023427248001, "learning_rate": 0.002, "loss": 2.3412, "step": 71830 }, { "epoch": 0.2777133491054723, "grad_norm": 0.11118941754102707, "learning_rate": 0.002, "loss": 2.3514, "step": 71840 }, { "epoch": 0.2777520063088556, "grad_norm": 0.10780233889818192, "learning_rate": 0.002, "loss": 2.3592, "step": 71850 }, { "epoch": 0.27779066351223886, "grad_norm": 0.11606789380311966, "learning_rate": 0.002, "loss": 2.3589, "step": 71860 }, { "epoch": 0.27782932071562216, "grad_norm": 0.14477357268333435, "learning_rate": 0.002, "loss": 2.3592, "step": 71870 }, { "epoch": 0.2778679779190054, "grad_norm": 0.10391691327095032, "learning_rate": 0.002, "loss": 2.3496, "step": 71880 }, { "epoch": 0.2779066351223887, "grad_norm": 0.09747461974620819, "learning_rate": 0.002, "loss": 2.3486, "step": 71890 }, { "epoch": 0.277945292325772, "grad_norm": 0.10803426802158356, "learning_rate": 0.002, "loss": 2.3624, "step": 71900 }, { "epoch": 0.2779839495291553, "grad_norm": 0.3230709731578827, "learning_rate": 0.002, "loss": 2.3693, "step": 71910 }, { "epoch": 0.27802260673253854, "grad_norm": 0.1216980516910553, "learning_rate": 0.002, "loss": 2.3467, "step": 71920 }, { "epoch": 0.27806126393592184, "grad_norm": 0.09765301644802094, "learning_rate": 0.002, "loss": 2.3459, "step": 71930 }, { "epoch": 0.2780999211393051, "grad_norm": 0.11342030018568039, "learning_rate": 0.002, "loss": 2.3512, "step": 71940 }, { "epoch": 0.2781385783426884, "grad_norm": 0.115015409886837, "learning_rate": 0.002, "loss": 2.3544, "step": 71950 }, { "epoch": 0.27817723554607166, "grad_norm": 0.1250956952571869, "learning_rate": 0.002, "loss": 2.3675, "step": 71960 }, { "epoch": 0.2782158927494549, "grad_norm": 0.10484731942415237, "learning_rate": 0.002, "loss": 2.3537, "step": 71970 }, { "epoch": 0.2782545499528382, "grad_norm": 0.12462014704942703, "learning_rate": 0.002, "loss": 2.3441, "step": 71980 }, { "epoch": 0.27829320715622147, "grad_norm": 0.12200839817523956, "learning_rate": 0.002, "loss": 2.358, "step": 71990 }, { "epoch": 0.2783318643596048, "grad_norm": 0.11157329380512238, "learning_rate": 0.002, "loss": 2.3507, "step": 72000 }, { "epoch": 0.278370521562988, "grad_norm": 0.11760713160037994, "learning_rate": 0.002, "loss": 2.3522, "step": 72010 }, { "epoch": 0.27840917876637133, "grad_norm": 0.12260770052671432, "learning_rate": 0.002, "loss": 2.3707, "step": 72020 }, { "epoch": 0.2784478359697546, "grad_norm": 0.11049578338861465, "learning_rate": 0.002, "loss": 2.3402, "step": 72030 }, { "epoch": 0.2784864931731379, "grad_norm": 0.10234503448009491, "learning_rate": 0.002, "loss": 2.3431, "step": 72040 }, { "epoch": 0.27852515037652115, "grad_norm": 0.11964405328035355, "learning_rate": 0.002, "loss": 2.3594, "step": 72050 }, { "epoch": 0.27856380757990445, "grad_norm": 0.10206586122512817, "learning_rate": 0.002, "loss": 2.351, "step": 72060 }, { "epoch": 0.2786024647832877, "grad_norm": 0.10977603495121002, "learning_rate": 0.002, "loss": 2.3537, "step": 72070 }, { "epoch": 0.278641121986671, "grad_norm": 0.10649268329143524, "learning_rate": 0.002, "loss": 2.3368, "step": 72080 }, { "epoch": 0.27867977919005427, "grad_norm": 0.11123888194561005, "learning_rate": 0.002, "loss": 2.3569, "step": 72090 }, { "epoch": 0.2787184363934376, "grad_norm": 0.10736609250307083, "learning_rate": 0.002, "loss": 2.3505, "step": 72100 }, { "epoch": 0.2787570935968208, "grad_norm": 0.1288762390613556, "learning_rate": 0.002, "loss": 2.3543, "step": 72110 }, { "epoch": 0.27879575080020413, "grad_norm": 0.10738539695739746, "learning_rate": 0.002, "loss": 2.3618, "step": 72120 }, { "epoch": 0.2788344080035874, "grad_norm": 0.10828015953302383, "learning_rate": 0.002, "loss": 2.3523, "step": 72130 }, { "epoch": 0.2788730652069707, "grad_norm": 0.11473634839057922, "learning_rate": 0.002, "loss": 2.3615, "step": 72140 }, { "epoch": 0.27891172241035395, "grad_norm": 0.12921828031539917, "learning_rate": 0.002, "loss": 2.3613, "step": 72150 }, { "epoch": 0.2789503796137372, "grad_norm": 0.11381390690803528, "learning_rate": 0.002, "loss": 2.3725, "step": 72160 }, { "epoch": 0.2789890368171205, "grad_norm": 0.1263791173696518, "learning_rate": 0.002, "loss": 2.3354, "step": 72170 }, { "epoch": 0.27902769402050376, "grad_norm": 0.16024067997932434, "learning_rate": 0.002, "loss": 2.3389, "step": 72180 }, { "epoch": 0.27906635122388707, "grad_norm": 0.12159726768732071, "learning_rate": 0.002, "loss": 2.346, "step": 72190 }, { "epoch": 0.2791050084272703, "grad_norm": 0.11728315055370331, "learning_rate": 0.002, "loss": 2.3477, "step": 72200 }, { "epoch": 0.2791436656306536, "grad_norm": 0.11379316449165344, "learning_rate": 0.002, "loss": 2.3574, "step": 72210 }, { "epoch": 0.2791823228340369, "grad_norm": 0.10281714797019958, "learning_rate": 0.002, "loss": 2.3638, "step": 72220 }, { "epoch": 0.2792209800374202, "grad_norm": 0.11599381268024445, "learning_rate": 0.002, "loss": 2.3642, "step": 72230 }, { "epoch": 0.27925963724080344, "grad_norm": 0.1538357138633728, "learning_rate": 0.002, "loss": 2.3537, "step": 72240 }, { "epoch": 0.27929829444418675, "grad_norm": 0.10871226340532303, "learning_rate": 0.002, "loss": 2.3513, "step": 72250 }, { "epoch": 0.27933695164757, "grad_norm": 0.1313319057226181, "learning_rate": 0.002, "loss": 2.3543, "step": 72260 }, { "epoch": 0.2793756088509533, "grad_norm": 0.10848644375801086, "learning_rate": 0.002, "loss": 2.347, "step": 72270 }, { "epoch": 0.27941426605433656, "grad_norm": 0.11999375373125076, "learning_rate": 0.002, "loss": 2.3575, "step": 72280 }, { "epoch": 0.27945292325771987, "grad_norm": 0.102352075278759, "learning_rate": 0.002, "loss": 2.3638, "step": 72290 }, { "epoch": 0.2794915804611031, "grad_norm": 0.09912648797035217, "learning_rate": 0.002, "loss": 2.3545, "step": 72300 }, { "epoch": 0.2795302376644864, "grad_norm": 0.12556324899196625, "learning_rate": 0.002, "loss": 2.3467, "step": 72310 }, { "epoch": 0.2795688948678697, "grad_norm": 0.09553080052137375, "learning_rate": 0.002, "loss": 2.3553, "step": 72320 }, { "epoch": 0.279607552071253, "grad_norm": 0.10858116298913956, "learning_rate": 0.002, "loss": 2.36, "step": 72330 }, { "epoch": 0.27964620927463624, "grad_norm": 0.14677628874778748, "learning_rate": 0.002, "loss": 2.3608, "step": 72340 }, { "epoch": 0.2796848664780195, "grad_norm": 0.1237170547246933, "learning_rate": 0.002, "loss": 2.3521, "step": 72350 }, { "epoch": 0.2797235236814028, "grad_norm": 0.11486019194126129, "learning_rate": 0.002, "loss": 2.3449, "step": 72360 }, { "epoch": 0.27976218088478605, "grad_norm": 0.0969633013010025, "learning_rate": 0.002, "loss": 2.3511, "step": 72370 }, { "epoch": 0.27980083808816936, "grad_norm": 0.13080482184886932, "learning_rate": 0.002, "loss": 2.3588, "step": 72380 }, { "epoch": 0.2798394952915526, "grad_norm": 0.09762602299451828, "learning_rate": 0.002, "loss": 2.3541, "step": 72390 }, { "epoch": 0.2798781524949359, "grad_norm": 0.10149842500686646, "learning_rate": 0.002, "loss": 2.3706, "step": 72400 }, { "epoch": 0.27991680969831917, "grad_norm": 0.11325753480195999, "learning_rate": 0.002, "loss": 2.3446, "step": 72410 }, { "epoch": 0.2799554669017025, "grad_norm": 0.11837144941091537, "learning_rate": 0.002, "loss": 2.3724, "step": 72420 }, { "epoch": 0.27999412410508573, "grad_norm": 0.10435636341571808, "learning_rate": 0.002, "loss": 2.3519, "step": 72430 }, { "epoch": 0.28003278130846904, "grad_norm": 0.129971444606781, "learning_rate": 0.002, "loss": 2.363, "step": 72440 }, { "epoch": 0.2800714385118523, "grad_norm": 0.10650801658630371, "learning_rate": 0.002, "loss": 2.3617, "step": 72450 }, { "epoch": 0.2801100957152356, "grad_norm": 0.11678191274404526, "learning_rate": 0.002, "loss": 2.3445, "step": 72460 }, { "epoch": 0.28014875291861885, "grad_norm": 0.11875230073928833, "learning_rate": 0.002, "loss": 2.3509, "step": 72470 }, { "epoch": 0.28018741012200216, "grad_norm": 0.10601239651441574, "learning_rate": 0.002, "loss": 2.3464, "step": 72480 }, { "epoch": 0.2802260673253854, "grad_norm": 0.12163223326206207, "learning_rate": 0.002, "loss": 2.3622, "step": 72490 }, { "epoch": 0.2802647245287687, "grad_norm": 0.11279140412807465, "learning_rate": 0.002, "loss": 2.3612, "step": 72500 }, { "epoch": 0.28030338173215197, "grad_norm": 0.12758687138557434, "learning_rate": 0.002, "loss": 2.3725, "step": 72510 }, { "epoch": 0.2803420389355352, "grad_norm": 0.10631423443555832, "learning_rate": 0.002, "loss": 2.3471, "step": 72520 }, { "epoch": 0.28038069613891853, "grad_norm": 0.12531808018684387, "learning_rate": 0.002, "loss": 2.3489, "step": 72530 }, { "epoch": 0.2804193533423018, "grad_norm": 0.1248190626502037, "learning_rate": 0.002, "loss": 2.351, "step": 72540 }, { "epoch": 0.2804580105456851, "grad_norm": 0.12048540264368057, "learning_rate": 0.002, "loss": 2.3503, "step": 72550 }, { "epoch": 0.28049666774906834, "grad_norm": 0.11301138252019882, "learning_rate": 0.002, "loss": 2.3587, "step": 72560 }, { "epoch": 0.28053532495245165, "grad_norm": 0.10371150821447372, "learning_rate": 0.002, "loss": 2.3634, "step": 72570 }, { "epoch": 0.2805739821558349, "grad_norm": 0.10555297136306763, "learning_rate": 0.002, "loss": 2.3654, "step": 72580 }, { "epoch": 0.2806126393592182, "grad_norm": 0.12766166031360626, "learning_rate": 0.002, "loss": 2.3455, "step": 72590 }, { "epoch": 0.28065129656260146, "grad_norm": 0.0994403064250946, "learning_rate": 0.002, "loss": 2.3612, "step": 72600 }, { "epoch": 0.28068995376598477, "grad_norm": 0.1184573695063591, "learning_rate": 0.002, "loss": 2.3492, "step": 72610 }, { "epoch": 0.280728610969368, "grad_norm": 0.1224546879529953, "learning_rate": 0.002, "loss": 2.3489, "step": 72620 }, { "epoch": 0.2807672681727513, "grad_norm": 0.11548443138599396, "learning_rate": 0.002, "loss": 2.3381, "step": 72630 }, { "epoch": 0.2808059253761346, "grad_norm": 0.095535509288311, "learning_rate": 0.002, "loss": 2.3504, "step": 72640 }, { "epoch": 0.2808445825795179, "grad_norm": 0.11959680914878845, "learning_rate": 0.002, "loss": 2.3654, "step": 72650 }, { "epoch": 0.28088323978290114, "grad_norm": 0.11694733053445816, "learning_rate": 0.002, "loss": 2.3557, "step": 72660 }, { "epoch": 0.28092189698628445, "grad_norm": 0.09618599712848663, "learning_rate": 0.002, "loss": 2.3532, "step": 72670 }, { "epoch": 0.2809605541896677, "grad_norm": 0.11327013373374939, "learning_rate": 0.002, "loss": 2.347, "step": 72680 }, { "epoch": 0.280999211393051, "grad_norm": 0.10539939999580383, "learning_rate": 0.002, "loss": 2.3577, "step": 72690 }, { "epoch": 0.28103786859643426, "grad_norm": 0.11680128425359726, "learning_rate": 0.002, "loss": 2.3552, "step": 72700 }, { "epoch": 0.2810765257998175, "grad_norm": 0.1366237848997116, "learning_rate": 0.002, "loss": 2.3597, "step": 72710 }, { "epoch": 0.2811151830032008, "grad_norm": 0.12079530209302902, "learning_rate": 0.002, "loss": 2.3585, "step": 72720 }, { "epoch": 0.28115384020658407, "grad_norm": 0.1021251305937767, "learning_rate": 0.002, "loss": 2.3483, "step": 72730 }, { "epoch": 0.2811924974099674, "grad_norm": 0.1378641277551651, "learning_rate": 0.002, "loss": 2.3501, "step": 72740 }, { "epoch": 0.28123115461335063, "grad_norm": 0.11904937773942947, "learning_rate": 0.002, "loss": 2.3502, "step": 72750 }, { "epoch": 0.28126981181673394, "grad_norm": 0.11487920582294464, "learning_rate": 0.002, "loss": 2.349, "step": 72760 }, { "epoch": 0.2813084690201172, "grad_norm": 0.11200712621212006, "learning_rate": 0.002, "loss": 2.3501, "step": 72770 }, { "epoch": 0.2813471262235005, "grad_norm": 0.1166585236787796, "learning_rate": 0.002, "loss": 2.3625, "step": 72780 }, { "epoch": 0.28138578342688375, "grad_norm": 0.11063521355390549, "learning_rate": 0.002, "loss": 2.3495, "step": 72790 }, { "epoch": 0.28142444063026706, "grad_norm": 0.11898882687091827, "learning_rate": 0.002, "loss": 2.3597, "step": 72800 }, { "epoch": 0.2814630978336503, "grad_norm": 0.13146457076072693, "learning_rate": 0.002, "loss": 2.3526, "step": 72810 }, { "epoch": 0.2815017550370336, "grad_norm": 0.11028890311717987, "learning_rate": 0.002, "loss": 2.357, "step": 72820 }, { "epoch": 0.28154041224041687, "grad_norm": 0.10138233006000519, "learning_rate": 0.002, "loss": 2.3464, "step": 72830 }, { "epoch": 0.2815790694438002, "grad_norm": 0.11174673587083817, "learning_rate": 0.002, "loss": 2.3624, "step": 72840 }, { "epoch": 0.28161772664718343, "grad_norm": 0.11663304269313812, "learning_rate": 0.002, "loss": 2.3569, "step": 72850 }, { "epoch": 0.28165638385056674, "grad_norm": 0.11266373097896576, "learning_rate": 0.002, "loss": 2.346, "step": 72860 }, { "epoch": 0.28169504105395, "grad_norm": 0.1342191845178604, "learning_rate": 0.002, "loss": 2.349, "step": 72870 }, { "epoch": 0.2817336982573333, "grad_norm": 0.1129799410700798, "learning_rate": 0.002, "loss": 2.3626, "step": 72880 }, { "epoch": 0.28177235546071655, "grad_norm": 0.11189255118370056, "learning_rate": 0.002, "loss": 2.3448, "step": 72890 }, { "epoch": 0.2818110126640998, "grad_norm": 0.13493746519088745, "learning_rate": 0.002, "loss": 2.3562, "step": 72900 }, { "epoch": 0.2818496698674831, "grad_norm": 0.10434828698635101, "learning_rate": 0.002, "loss": 2.3636, "step": 72910 }, { "epoch": 0.28188832707086636, "grad_norm": 0.10769256204366684, "learning_rate": 0.002, "loss": 2.3554, "step": 72920 }, { "epoch": 0.28192698427424967, "grad_norm": 0.1049484834074974, "learning_rate": 0.002, "loss": 2.3638, "step": 72930 }, { "epoch": 0.2819656414776329, "grad_norm": 0.12519758939743042, "learning_rate": 0.002, "loss": 2.3656, "step": 72940 }, { "epoch": 0.28200429868101623, "grad_norm": 0.10872354358434677, "learning_rate": 0.002, "loss": 2.3724, "step": 72950 }, { "epoch": 0.2820429558843995, "grad_norm": 0.11857183277606964, "learning_rate": 0.002, "loss": 2.3606, "step": 72960 }, { "epoch": 0.2820816130877828, "grad_norm": 0.12134796380996704, "learning_rate": 0.002, "loss": 2.3711, "step": 72970 }, { "epoch": 0.28212027029116604, "grad_norm": 0.10698603838682175, "learning_rate": 0.002, "loss": 2.346, "step": 72980 }, { "epoch": 0.28215892749454935, "grad_norm": 0.10357688367366791, "learning_rate": 0.002, "loss": 2.3646, "step": 72990 }, { "epoch": 0.2821975846979326, "grad_norm": 0.13212862610816956, "learning_rate": 0.002, "loss": 2.3556, "step": 73000 }, { "epoch": 0.2822362419013159, "grad_norm": 0.08346768468618393, "learning_rate": 0.002, "loss": 2.3415, "step": 73010 }, { "epoch": 0.28227489910469916, "grad_norm": 0.10529458522796631, "learning_rate": 0.002, "loss": 2.355, "step": 73020 }, { "epoch": 0.28231355630808247, "grad_norm": 0.10463078320026398, "learning_rate": 0.002, "loss": 2.3471, "step": 73030 }, { "epoch": 0.2823522135114657, "grad_norm": 0.1002931147813797, "learning_rate": 0.002, "loss": 2.3336, "step": 73040 }, { "epoch": 0.28239087071484903, "grad_norm": 0.10691235959529877, "learning_rate": 0.002, "loss": 2.3659, "step": 73050 }, { "epoch": 0.2824295279182323, "grad_norm": 0.1013958677649498, "learning_rate": 0.002, "loss": 2.3561, "step": 73060 }, { "epoch": 0.2824681851216156, "grad_norm": 0.12213055044412613, "learning_rate": 0.002, "loss": 2.3441, "step": 73070 }, { "epoch": 0.28250684232499884, "grad_norm": 0.11261018365621567, "learning_rate": 0.002, "loss": 2.374, "step": 73080 }, { "epoch": 0.2825454995283821, "grad_norm": 0.10393129289150238, "learning_rate": 0.002, "loss": 2.3481, "step": 73090 }, { "epoch": 0.2825841567317654, "grad_norm": 0.10489436984062195, "learning_rate": 0.002, "loss": 2.3582, "step": 73100 }, { "epoch": 0.28262281393514865, "grad_norm": 0.12036898732185364, "learning_rate": 0.002, "loss": 2.3515, "step": 73110 }, { "epoch": 0.28266147113853196, "grad_norm": 0.10465455055236816, "learning_rate": 0.002, "loss": 2.3509, "step": 73120 }, { "epoch": 0.2827001283419152, "grad_norm": 0.10107477009296417, "learning_rate": 0.002, "loss": 2.353, "step": 73130 }, { "epoch": 0.2827387855452985, "grad_norm": 0.10472358018159866, "learning_rate": 0.002, "loss": 2.3417, "step": 73140 }, { "epoch": 0.2827774427486818, "grad_norm": 0.13089285790920258, "learning_rate": 0.002, "loss": 2.3565, "step": 73150 }, { "epoch": 0.2828160999520651, "grad_norm": 0.11773528903722763, "learning_rate": 0.002, "loss": 2.3606, "step": 73160 }, { "epoch": 0.28285475715544833, "grad_norm": 0.10916437208652496, "learning_rate": 0.002, "loss": 2.3621, "step": 73170 }, { "epoch": 0.28289341435883164, "grad_norm": 0.10333863645792007, "learning_rate": 0.002, "loss": 2.3739, "step": 73180 }, { "epoch": 0.2829320715622149, "grad_norm": 0.11203617602586746, "learning_rate": 0.002, "loss": 2.3489, "step": 73190 }, { "epoch": 0.2829707287655982, "grad_norm": 0.12673601508140564, "learning_rate": 0.002, "loss": 2.3378, "step": 73200 }, { "epoch": 0.28300938596898145, "grad_norm": 0.1323099583387375, "learning_rate": 0.002, "loss": 2.3527, "step": 73210 }, { "epoch": 0.28304804317236476, "grad_norm": 0.1025305837392807, "learning_rate": 0.002, "loss": 2.3503, "step": 73220 }, { "epoch": 0.283086700375748, "grad_norm": 0.10774783790111542, "learning_rate": 0.002, "loss": 2.3675, "step": 73230 }, { "epoch": 0.2831253575791313, "grad_norm": 0.12477708607912064, "learning_rate": 0.002, "loss": 2.3666, "step": 73240 }, { "epoch": 0.2831640147825146, "grad_norm": 0.09175287932157516, "learning_rate": 0.002, "loss": 2.3684, "step": 73250 }, { "epoch": 0.2832026719858978, "grad_norm": 0.18007692694664001, "learning_rate": 0.002, "loss": 2.3423, "step": 73260 }, { "epoch": 0.28324132918928113, "grad_norm": 0.10512850433588028, "learning_rate": 0.002, "loss": 2.3529, "step": 73270 }, { "epoch": 0.2832799863926644, "grad_norm": 0.11133573949337006, "learning_rate": 0.002, "loss": 2.352, "step": 73280 }, { "epoch": 0.2833186435960477, "grad_norm": 0.09325826168060303, "learning_rate": 0.002, "loss": 2.3573, "step": 73290 }, { "epoch": 0.28335730079943094, "grad_norm": 0.14559954404830933, "learning_rate": 0.002, "loss": 2.3501, "step": 73300 }, { "epoch": 0.28339595800281425, "grad_norm": 0.11711502075195312, "learning_rate": 0.002, "loss": 2.3462, "step": 73310 }, { "epoch": 0.2834346152061975, "grad_norm": 0.10935034602880478, "learning_rate": 0.002, "loss": 2.3345, "step": 73320 }, { "epoch": 0.2834732724095808, "grad_norm": 0.11247728019952774, "learning_rate": 0.002, "loss": 2.3584, "step": 73330 }, { "epoch": 0.28351192961296406, "grad_norm": 0.10688342154026031, "learning_rate": 0.002, "loss": 2.3573, "step": 73340 }, { "epoch": 0.28355058681634737, "grad_norm": 0.11608456075191498, "learning_rate": 0.002, "loss": 2.3516, "step": 73350 }, { "epoch": 0.2835892440197306, "grad_norm": 0.09863826632499695, "learning_rate": 0.002, "loss": 2.3505, "step": 73360 }, { "epoch": 0.28362790122311393, "grad_norm": 0.11395720392465591, "learning_rate": 0.002, "loss": 2.3512, "step": 73370 }, { "epoch": 0.2836665584264972, "grad_norm": 0.11138497292995453, "learning_rate": 0.002, "loss": 2.3644, "step": 73380 }, { "epoch": 0.2837052156298805, "grad_norm": 0.10729915648698807, "learning_rate": 0.002, "loss": 2.3489, "step": 73390 }, { "epoch": 0.28374387283326374, "grad_norm": 0.11885261535644531, "learning_rate": 0.002, "loss": 2.3541, "step": 73400 }, { "epoch": 0.28378253003664705, "grad_norm": 0.11109798401594162, "learning_rate": 0.002, "loss": 2.3661, "step": 73410 }, { "epoch": 0.2838211872400303, "grad_norm": 0.11580563336610794, "learning_rate": 0.002, "loss": 2.3573, "step": 73420 }, { "epoch": 0.2838598444434136, "grad_norm": 0.12981678545475006, "learning_rate": 0.002, "loss": 2.3804, "step": 73430 }, { "epoch": 0.28389850164679686, "grad_norm": 0.10222245007753372, "learning_rate": 0.002, "loss": 2.3591, "step": 73440 }, { "epoch": 0.2839371588501801, "grad_norm": 0.12582792341709137, "learning_rate": 0.002, "loss": 2.3707, "step": 73450 }, { "epoch": 0.2839758160535634, "grad_norm": 0.10031749308109283, "learning_rate": 0.002, "loss": 2.3596, "step": 73460 }, { "epoch": 0.2840144732569467, "grad_norm": 0.12029723078012466, "learning_rate": 0.002, "loss": 2.3672, "step": 73470 }, { "epoch": 0.28405313046033, "grad_norm": 0.1084158644080162, "learning_rate": 0.002, "loss": 2.3553, "step": 73480 }, { "epoch": 0.28409178766371324, "grad_norm": 0.1025131493806839, "learning_rate": 0.002, "loss": 2.3512, "step": 73490 }, { "epoch": 0.28413044486709654, "grad_norm": 0.09864922612905502, "learning_rate": 0.002, "loss": 2.3399, "step": 73500 }, { "epoch": 0.2841691020704798, "grad_norm": 0.10022611916065216, "learning_rate": 0.002, "loss": 2.3741, "step": 73510 }, { "epoch": 0.2842077592738631, "grad_norm": 0.27158042788505554, "learning_rate": 0.002, "loss": 2.3419, "step": 73520 }, { "epoch": 0.28424641647724636, "grad_norm": 0.10503194481134415, "learning_rate": 0.002, "loss": 2.3653, "step": 73530 }, { "epoch": 0.28428507368062966, "grad_norm": 0.10179532319307327, "learning_rate": 0.002, "loss": 2.3563, "step": 73540 }, { "epoch": 0.2843237308840129, "grad_norm": 0.12431447207927704, "learning_rate": 0.002, "loss": 2.3348, "step": 73550 }, { "epoch": 0.2843623880873962, "grad_norm": 0.12010517716407776, "learning_rate": 0.002, "loss": 2.3533, "step": 73560 }, { "epoch": 0.2844010452907795, "grad_norm": 0.15335272252559662, "learning_rate": 0.002, "loss": 2.3403, "step": 73570 }, { "epoch": 0.2844397024941628, "grad_norm": 0.10576717555522919, "learning_rate": 0.002, "loss": 2.3461, "step": 73580 }, { "epoch": 0.28447835969754603, "grad_norm": 0.10361874103546143, "learning_rate": 0.002, "loss": 2.3651, "step": 73590 }, { "epoch": 0.28451701690092934, "grad_norm": 0.10449161380529404, "learning_rate": 0.002, "loss": 2.3565, "step": 73600 }, { "epoch": 0.2845556741043126, "grad_norm": 0.09918791055679321, "learning_rate": 0.002, "loss": 2.3458, "step": 73610 }, { "epoch": 0.2845943313076959, "grad_norm": 0.11333408206701279, "learning_rate": 0.002, "loss": 2.345, "step": 73620 }, { "epoch": 0.28463298851107915, "grad_norm": 0.11512020975351334, "learning_rate": 0.002, "loss": 2.3691, "step": 73630 }, { "epoch": 0.2846716457144624, "grad_norm": 0.10907591879367828, "learning_rate": 0.002, "loss": 2.3632, "step": 73640 }, { "epoch": 0.2847103029178457, "grad_norm": 0.10631921887397766, "learning_rate": 0.002, "loss": 2.3498, "step": 73650 }, { "epoch": 0.28474896012122897, "grad_norm": 0.10795870423316956, "learning_rate": 0.002, "loss": 2.355, "step": 73660 }, { "epoch": 0.2847876173246123, "grad_norm": 0.12246907502412796, "learning_rate": 0.002, "loss": 2.3554, "step": 73670 }, { "epoch": 0.2848262745279955, "grad_norm": 0.11917459219694138, "learning_rate": 0.002, "loss": 2.3657, "step": 73680 }, { "epoch": 0.28486493173137883, "grad_norm": 0.11256936937570572, "learning_rate": 0.002, "loss": 2.3593, "step": 73690 }, { "epoch": 0.2849035889347621, "grad_norm": 0.1294250190258026, "learning_rate": 0.002, "loss": 2.334, "step": 73700 }, { "epoch": 0.2849422461381454, "grad_norm": 0.10935357213020325, "learning_rate": 0.002, "loss": 2.3733, "step": 73710 }, { "epoch": 0.28498090334152865, "grad_norm": 0.10090257972478867, "learning_rate": 0.002, "loss": 2.3461, "step": 73720 }, { "epoch": 0.28501956054491195, "grad_norm": 0.13061776757240295, "learning_rate": 0.002, "loss": 2.3541, "step": 73730 }, { "epoch": 0.2850582177482952, "grad_norm": 0.11142712831497192, "learning_rate": 0.002, "loss": 2.348, "step": 73740 }, { "epoch": 0.2850968749516785, "grad_norm": 0.09850049018859863, "learning_rate": 0.002, "loss": 2.3802, "step": 73750 }, { "epoch": 0.28513553215506177, "grad_norm": 0.12158714979887009, "learning_rate": 0.002, "loss": 2.3456, "step": 73760 }, { "epoch": 0.2851741893584451, "grad_norm": 0.09792877733707428, "learning_rate": 0.002, "loss": 2.369, "step": 73770 }, { "epoch": 0.2852128465618283, "grad_norm": 0.1119491308927536, "learning_rate": 0.002, "loss": 2.3455, "step": 73780 }, { "epoch": 0.28525150376521163, "grad_norm": 0.11141660809516907, "learning_rate": 0.002, "loss": 2.3535, "step": 73790 }, { "epoch": 0.2852901609685949, "grad_norm": 0.10876152664422989, "learning_rate": 0.002, "loss": 2.3536, "step": 73800 }, { "epoch": 0.2853288181719782, "grad_norm": 0.10910064727067947, "learning_rate": 0.002, "loss": 2.3631, "step": 73810 }, { "epoch": 0.28536747537536145, "grad_norm": 0.12243952602148056, "learning_rate": 0.002, "loss": 2.3505, "step": 73820 }, { "epoch": 0.2854061325787447, "grad_norm": 0.09542898833751678, "learning_rate": 0.002, "loss": 2.3626, "step": 73830 }, { "epoch": 0.285444789782128, "grad_norm": 0.09667696058750153, "learning_rate": 0.002, "loss": 2.3545, "step": 73840 }, { "epoch": 0.28548344698551126, "grad_norm": 0.09974554181098938, "learning_rate": 0.002, "loss": 2.3613, "step": 73850 }, { "epoch": 0.28552210418889457, "grad_norm": 0.11236509680747986, "learning_rate": 0.002, "loss": 2.3558, "step": 73860 }, { "epoch": 0.2855607613922778, "grad_norm": 0.11773736029863358, "learning_rate": 0.002, "loss": 2.355, "step": 73870 }, { "epoch": 0.2855994185956611, "grad_norm": 0.1734328418970108, "learning_rate": 0.002, "loss": 2.3692, "step": 73880 }, { "epoch": 0.2856380757990444, "grad_norm": 0.11204519867897034, "learning_rate": 0.002, "loss": 2.3545, "step": 73890 }, { "epoch": 0.2856767330024277, "grad_norm": 0.11577246338129044, "learning_rate": 0.002, "loss": 2.3621, "step": 73900 }, { "epoch": 0.28571539020581094, "grad_norm": 0.10974831134080887, "learning_rate": 0.002, "loss": 2.3384, "step": 73910 }, { "epoch": 0.28575404740919425, "grad_norm": 0.11790665239095688, "learning_rate": 0.002, "loss": 2.3695, "step": 73920 }, { "epoch": 0.2857927046125775, "grad_norm": 0.12227218598127365, "learning_rate": 0.002, "loss": 2.3593, "step": 73930 }, { "epoch": 0.2858313618159608, "grad_norm": 0.11108889430761337, "learning_rate": 0.002, "loss": 2.3486, "step": 73940 }, { "epoch": 0.28587001901934406, "grad_norm": 0.14347556233406067, "learning_rate": 0.002, "loss": 2.3416, "step": 73950 }, { "epoch": 0.28590867622272736, "grad_norm": 0.09942697733640671, "learning_rate": 0.002, "loss": 2.3635, "step": 73960 }, { "epoch": 0.2859473334261106, "grad_norm": 0.10189233720302582, "learning_rate": 0.002, "loss": 2.3507, "step": 73970 }, { "epoch": 0.2859859906294939, "grad_norm": 0.10416730493307114, "learning_rate": 0.002, "loss": 2.353, "step": 73980 }, { "epoch": 0.2860246478328772, "grad_norm": 0.11276213079690933, "learning_rate": 0.002, "loss": 2.3627, "step": 73990 }, { "epoch": 0.28606330503626043, "grad_norm": 0.11061752587556839, "learning_rate": 0.002, "loss": 2.3513, "step": 74000 }, { "epoch": 0.28610196223964374, "grad_norm": 0.10207962244749069, "learning_rate": 0.002, "loss": 2.3694, "step": 74010 }, { "epoch": 0.286140619443027, "grad_norm": 0.12822595238685608, "learning_rate": 0.002, "loss": 2.3627, "step": 74020 }, { "epoch": 0.2861792766464103, "grad_norm": 0.11346330493688583, "learning_rate": 0.002, "loss": 2.3779, "step": 74030 }, { "epoch": 0.28621793384979355, "grad_norm": 0.1152123212814331, "learning_rate": 0.002, "loss": 2.3502, "step": 74040 }, { "epoch": 0.28625659105317686, "grad_norm": 0.10732946544885635, "learning_rate": 0.002, "loss": 2.3601, "step": 74050 }, { "epoch": 0.2862952482565601, "grad_norm": 0.11564039438962936, "learning_rate": 0.002, "loss": 2.3324, "step": 74060 }, { "epoch": 0.2863339054599434, "grad_norm": 0.10278456658124924, "learning_rate": 0.002, "loss": 2.3407, "step": 74070 }, { "epoch": 0.28637256266332667, "grad_norm": 0.1271258443593979, "learning_rate": 0.002, "loss": 2.3603, "step": 74080 }, { "epoch": 0.28641121986671, "grad_norm": 0.12053419649600983, "learning_rate": 0.002, "loss": 2.3535, "step": 74090 }, { "epoch": 0.28644987707009323, "grad_norm": 0.1091466099023819, "learning_rate": 0.002, "loss": 2.3361, "step": 74100 }, { "epoch": 0.28648853427347654, "grad_norm": 0.12219161540269852, "learning_rate": 0.002, "loss": 2.345, "step": 74110 }, { "epoch": 0.2865271914768598, "grad_norm": 0.11780962347984314, "learning_rate": 0.002, "loss": 2.3629, "step": 74120 }, { "epoch": 0.2865658486802431, "grad_norm": 0.11488896608352661, "learning_rate": 0.002, "loss": 2.3394, "step": 74130 }, { "epoch": 0.28660450588362635, "grad_norm": 0.11384780704975128, "learning_rate": 0.002, "loss": 2.3626, "step": 74140 }, { "epoch": 0.28664316308700966, "grad_norm": 0.11270700395107269, "learning_rate": 0.002, "loss": 2.3541, "step": 74150 }, { "epoch": 0.2866818202903929, "grad_norm": 0.10677814483642578, "learning_rate": 0.002, "loss": 2.3494, "step": 74160 }, { "epoch": 0.2867204774937762, "grad_norm": 0.1030648723244667, "learning_rate": 0.002, "loss": 2.3554, "step": 74170 }, { "epoch": 0.28675913469715947, "grad_norm": 0.11373554915189743, "learning_rate": 0.002, "loss": 2.3639, "step": 74180 }, { "epoch": 0.2867977919005427, "grad_norm": 0.10422752797603607, "learning_rate": 0.002, "loss": 2.3515, "step": 74190 }, { "epoch": 0.28683644910392603, "grad_norm": 0.13142213225364685, "learning_rate": 0.002, "loss": 2.3703, "step": 74200 }, { "epoch": 0.2868751063073093, "grad_norm": 0.11340751498937607, "learning_rate": 0.002, "loss": 2.3434, "step": 74210 }, { "epoch": 0.2869137635106926, "grad_norm": 0.13458040356636047, "learning_rate": 0.002, "loss": 2.3652, "step": 74220 }, { "epoch": 0.28695242071407584, "grad_norm": 0.10151758790016174, "learning_rate": 0.002, "loss": 2.35, "step": 74230 }, { "epoch": 0.28699107791745915, "grad_norm": 0.09405501186847687, "learning_rate": 0.002, "loss": 2.3606, "step": 74240 }, { "epoch": 0.2870297351208424, "grad_norm": 0.10942229628562927, "learning_rate": 0.002, "loss": 2.3556, "step": 74250 }, { "epoch": 0.2870683923242257, "grad_norm": 0.10689356923103333, "learning_rate": 0.002, "loss": 2.3503, "step": 74260 }, { "epoch": 0.28710704952760896, "grad_norm": 0.11839873343706131, "learning_rate": 0.002, "loss": 2.3413, "step": 74270 }, { "epoch": 0.28714570673099227, "grad_norm": 0.10678387433290482, "learning_rate": 0.002, "loss": 2.3496, "step": 74280 }, { "epoch": 0.2871843639343755, "grad_norm": 0.11635982990264893, "learning_rate": 0.002, "loss": 2.3543, "step": 74290 }, { "epoch": 0.2872230211377588, "grad_norm": 0.11184539645910263, "learning_rate": 0.002, "loss": 2.3631, "step": 74300 }, { "epoch": 0.2872616783411421, "grad_norm": 0.10967237502336502, "learning_rate": 0.002, "loss": 2.3656, "step": 74310 }, { "epoch": 0.2873003355445254, "grad_norm": 0.12310586124658585, "learning_rate": 0.002, "loss": 2.3485, "step": 74320 }, { "epoch": 0.28733899274790864, "grad_norm": 0.11185023933649063, "learning_rate": 0.002, "loss": 2.3557, "step": 74330 }, { "epoch": 0.28737764995129195, "grad_norm": 0.09555628895759583, "learning_rate": 0.002, "loss": 2.3626, "step": 74340 }, { "epoch": 0.2874163071546752, "grad_norm": 0.1086990013718605, "learning_rate": 0.002, "loss": 2.3487, "step": 74350 }, { "epoch": 0.2874549643580585, "grad_norm": 0.12187491357326508, "learning_rate": 0.002, "loss": 2.3564, "step": 74360 }, { "epoch": 0.28749362156144176, "grad_norm": 0.10133063793182373, "learning_rate": 0.002, "loss": 2.3532, "step": 74370 }, { "epoch": 0.287532278764825, "grad_norm": 0.11328078806400299, "learning_rate": 0.002, "loss": 2.3727, "step": 74380 }, { "epoch": 0.2875709359682083, "grad_norm": 0.10969198495149612, "learning_rate": 0.002, "loss": 2.3495, "step": 74390 }, { "epoch": 0.28760959317159157, "grad_norm": 0.09657199680805206, "learning_rate": 0.002, "loss": 2.3533, "step": 74400 }, { "epoch": 0.2876482503749749, "grad_norm": 0.1048688217997551, "learning_rate": 0.002, "loss": 2.3501, "step": 74410 }, { "epoch": 0.28768690757835813, "grad_norm": 0.10406801849603653, "learning_rate": 0.002, "loss": 2.3526, "step": 74420 }, { "epoch": 0.28772556478174144, "grad_norm": 0.10882709175348282, "learning_rate": 0.002, "loss": 2.3603, "step": 74430 }, { "epoch": 0.2877642219851247, "grad_norm": 0.11413436383008957, "learning_rate": 0.002, "loss": 2.3543, "step": 74440 }, { "epoch": 0.287802879188508, "grad_norm": 0.10573131591081619, "learning_rate": 0.002, "loss": 2.3548, "step": 74450 }, { "epoch": 0.28784153639189125, "grad_norm": 0.12862974405288696, "learning_rate": 0.002, "loss": 2.369, "step": 74460 }, { "epoch": 0.28788019359527456, "grad_norm": 0.11314857006072998, "learning_rate": 0.002, "loss": 2.347, "step": 74470 }, { "epoch": 0.2879188507986578, "grad_norm": 0.11113854497671127, "learning_rate": 0.002, "loss": 2.3619, "step": 74480 }, { "epoch": 0.2879575080020411, "grad_norm": 0.11759510636329651, "learning_rate": 0.002, "loss": 2.3559, "step": 74490 }, { "epoch": 0.28799616520542437, "grad_norm": 0.12655659019947052, "learning_rate": 0.002, "loss": 2.3595, "step": 74500 }, { "epoch": 0.2880348224088077, "grad_norm": 0.10255160182714462, "learning_rate": 0.002, "loss": 2.3571, "step": 74510 }, { "epoch": 0.28807347961219093, "grad_norm": 0.10705683380365372, "learning_rate": 0.002, "loss": 2.3581, "step": 74520 }, { "epoch": 0.28811213681557424, "grad_norm": 0.12066076695919037, "learning_rate": 0.002, "loss": 2.3581, "step": 74530 }, { "epoch": 0.2881507940189575, "grad_norm": 0.10226041078567505, "learning_rate": 0.002, "loss": 2.3519, "step": 74540 }, { "epoch": 0.2881894512223408, "grad_norm": 0.1151876375079155, "learning_rate": 0.002, "loss": 2.3647, "step": 74550 }, { "epoch": 0.28822810842572405, "grad_norm": 0.10210239142179489, "learning_rate": 0.002, "loss": 2.3482, "step": 74560 }, { "epoch": 0.2882667656291073, "grad_norm": 0.1542600393295288, "learning_rate": 0.002, "loss": 2.351, "step": 74570 }, { "epoch": 0.2883054228324906, "grad_norm": 0.10761580616235733, "learning_rate": 0.002, "loss": 2.3613, "step": 74580 }, { "epoch": 0.28834408003587386, "grad_norm": 0.12081687897443771, "learning_rate": 0.002, "loss": 2.3367, "step": 74590 }, { "epoch": 0.28838273723925717, "grad_norm": 0.4860503673553467, "learning_rate": 0.002, "loss": 2.3559, "step": 74600 }, { "epoch": 0.2884213944426404, "grad_norm": 0.11622172594070435, "learning_rate": 0.002, "loss": 2.348, "step": 74610 }, { "epoch": 0.28846005164602373, "grad_norm": 0.10896652936935425, "learning_rate": 0.002, "loss": 2.3563, "step": 74620 }, { "epoch": 0.288498708849407, "grad_norm": 0.11146285384893417, "learning_rate": 0.002, "loss": 2.3525, "step": 74630 }, { "epoch": 0.2885373660527903, "grad_norm": 0.1089526042342186, "learning_rate": 0.002, "loss": 2.3434, "step": 74640 }, { "epoch": 0.28857602325617354, "grad_norm": 0.09288636595010757, "learning_rate": 0.002, "loss": 2.3465, "step": 74650 }, { "epoch": 0.28861468045955685, "grad_norm": 0.1133343055844307, "learning_rate": 0.002, "loss": 2.3481, "step": 74660 }, { "epoch": 0.2886533376629401, "grad_norm": 0.1155557855963707, "learning_rate": 0.002, "loss": 2.3553, "step": 74670 }, { "epoch": 0.2886919948663234, "grad_norm": 0.10346411168575287, "learning_rate": 0.002, "loss": 2.3581, "step": 74680 }, { "epoch": 0.28873065206970666, "grad_norm": 0.1326557844877243, "learning_rate": 0.002, "loss": 2.3358, "step": 74690 }, { "epoch": 0.28876930927308997, "grad_norm": 0.10779032111167908, "learning_rate": 0.002, "loss": 2.3426, "step": 74700 }, { "epoch": 0.2888079664764732, "grad_norm": 0.10755519568920135, "learning_rate": 0.002, "loss": 2.3536, "step": 74710 }, { "epoch": 0.28884662367985653, "grad_norm": 0.11523166298866272, "learning_rate": 0.002, "loss": 2.3483, "step": 74720 }, { "epoch": 0.2888852808832398, "grad_norm": 0.12562212347984314, "learning_rate": 0.002, "loss": 2.3721, "step": 74730 }, { "epoch": 0.2889239380866231, "grad_norm": 0.12147145718336105, "learning_rate": 0.002, "loss": 2.3738, "step": 74740 }, { "epoch": 0.28896259529000634, "grad_norm": 0.10682717710733414, "learning_rate": 0.002, "loss": 2.3491, "step": 74750 }, { "epoch": 0.2890012524933896, "grad_norm": 0.11858783662319183, "learning_rate": 0.002, "loss": 2.3489, "step": 74760 }, { "epoch": 0.2890399096967729, "grad_norm": 0.10563915967941284, "learning_rate": 0.002, "loss": 2.3794, "step": 74770 }, { "epoch": 0.28907856690015615, "grad_norm": 0.13456431031227112, "learning_rate": 0.002, "loss": 2.3543, "step": 74780 }, { "epoch": 0.28911722410353946, "grad_norm": 0.11352846771478653, "learning_rate": 0.002, "loss": 2.3472, "step": 74790 }, { "epoch": 0.2891558813069227, "grad_norm": 0.10515779256820679, "learning_rate": 0.002, "loss": 2.3579, "step": 74800 }, { "epoch": 0.289194538510306, "grad_norm": 0.11092042177915573, "learning_rate": 0.002, "loss": 2.3693, "step": 74810 }, { "epoch": 0.2892331957136893, "grad_norm": 0.1329876035451889, "learning_rate": 0.002, "loss": 2.3678, "step": 74820 }, { "epoch": 0.2892718529170726, "grad_norm": 0.09356291592121124, "learning_rate": 0.002, "loss": 2.3653, "step": 74830 }, { "epoch": 0.28931051012045583, "grad_norm": 0.09958864748477936, "learning_rate": 0.002, "loss": 2.3598, "step": 74840 }, { "epoch": 0.28934916732383914, "grad_norm": 0.09781919419765472, "learning_rate": 0.002, "loss": 2.3674, "step": 74850 }, { "epoch": 0.2893878245272224, "grad_norm": 0.10494308173656464, "learning_rate": 0.002, "loss": 2.3451, "step": 74860 }, { "epoch": 0.2894264817306057, "grad_norm": 0.09888094663619995, "learning_rate": 0.002, "loss": 2.3534, "step": 74870 }, { "epoch": 0.28946513893398895, "grad_norm": 0.13520100712776184, "learning_rate": 0.002, "loss": 2.3516, "step": 74880 }, { "epoch": 0.28950379613737226, "grad_norm": 0.11058992147445679, "learning_rate": 0.002, "loss": 2.3508, "step": 74890 }, { "epoch": 0.2895424533407555, "grad_norm": 0.11469922959804535, "learning_rate": 0.002, "loss": 2.3512, "step": 74900 }, { "epoch": 0.2895811105441388, "grad_norm": 0.12745223939418793, "learning_rate": 0.002, "loss": 2.3613, "step": 74910 }, { "epoch": 0.28961976774752207, "grad_norm": 0.13199283182621002, "learning_rate": 0.002, "loss": 2.367, "step": 74920 }, { "epoch": 0.2896584249509053, "grad_norm": 0.11187552660703659, "learning_rate": 0.002, "loss": 2.3567, "step": 74930 }, { "epoch": 0.28969708215428863, "grad_norm": 0.1066499799489975, "learning_rate": 0.002, "loss": 2.353, "step": 74940 }, { "epoch": 0.2897357393576719, "grad_norm": 0.10952769219875336, "learning_rate": 0.002, "loss": 2.3472, "step": 74950 }, { "epoch": 0.2897743965610552, "grad_norm": 0.11855336278676987, "learning_rate": 0.002, "loss": 2.3505, "step": 74960 }, { "epoch": 0.28981305376443844, "grad_norm": 0.11813953518867493, "learning_rate": 0.002, "loss": 2.356, "step": 74970 }, { "epoch": 0.28985171096782175, "grad_norm": 0.12099150568246841, "learning_rate": 0.002, "loss": 2.3668, "step": 74980 }, { "epoch": 0.289890368171205, "grad_norm": 0.09434831887483597, "learning_rate": 0.002, "loss": 2.3447, "step": 74990 }, { "epoch": 0.2899290253745883, "grad_norm": 0.11710929870605469, "learning_rate": 0.002, "loss": 2.3523, "step": 75000 }, { "epoch": 0.28996768257797156, "grad_norm": 0.11767060309648514, "learning_rate": 0.002, "loss": 2.3413, "step": 75010 }, { "epoch": 0.29000633978135487, "grad_norm": 0.11021557450294495, "learning_rate": 0.002, "loss": 2.3474, "step": 75020 }, { "epoch": 0.2900449969847381, "grad_norm": 0.11392875760793686, "learning_rate": 0.002, "loss": 2.365, "step": 75030 }, { "epoch": 0.29008365418812143, "grad_norm": 0.1143617182970047, "learning_rate": 0.002, "loss": 2.3514, "step": 75040 }, { "epoch": 0.2901223113915047, "grad_norm": 0.10159295052289963, "learning_rate": 0.002, "loss": 2.3619, "step": 75050 }, { "epoch": 0.290160968594888, "grad_norm": 0.10352852940559387, "learning_rate": 0.002, "loss": 2.3289, "step": 75060 }, { "epoch": 0.29019962579827124, "grad_norm": 0.11853168159723282, "learning_rate": 0.002, "loss": 2.3508, "step": 75070 }, { "epoch": 0.29023828300165455, "grad_norm": 0.11491332948207855, "learning_rate": 0.002, "loss": 2.3548, "step": 75080 }, { "epoch": 0.2902769402050378, "grad_norm": 0.10985317081212997, "learning_rate": 0.002, "loss": 2.362, "step": 75090 }, { "epoch": 0.2903155974084211, "grad_norm": 0.11014048010110855, "learning_rate": 0.002, "loss": 2.3677, "step": 75100 }, { "epoch": 0.29035425461180436, "grad_norm": 0.1153755933046341, "learning_rate": 0.002, "loss": 2.3547, "step": 75110 }, { "epoch": 0.2903929118151876, "grad_norm": 0.12989766895771027, "learning_rate": 0.002, "loss": 2.3593, "step": 75120 }, { "epoch": 0.2904315690185709, "grad_norm": 0.10513586550951004, "learning_rate": 0.002, "loss": 2.3657, "step": 75130 }, { "epoch": 0.2904702262219542, "grad_norm": 0.10572315007448196, "learning_rate": 0.002, "loss": 2.3574, "step": 75140 }, { "epoch": 0.2905088834253375, "grad_norm": 0.1259058564901352, "learning_rate": 0.002, "loss": 2.345, "step": 75150 }, { "epoch": 0.29054754062872074, "grad_norm": 0.10508497804403305, "learning_rate": 0.002, "loss": 2.3475, "step": 75160 }, { "epoch": 0.29058619783210404, "grad_norm": 0.11335578560829163, "learning_rate": 0.002, "loss": 2.3494, "step": 75170 }, { "epoch": 0.2906248550354873, "grad_norm": 0.12427894026041031, "learning_rate": 0.002, "loss": 2.36, "step": 75180 }, { "epoch": 0.2906635122388706, "grad_norm": 0.10209350287914276, "learning_rate": 0.002, "loss": 2.3573, "step": 75190 }, { "epoch": 0.29070216944225385, "grad_norm": 0.11700747907161713, "learning_rate": 0.002, "loss": 2.3403, "step": 75200 }, { "epoch": 0.29074082664563716, "grad_norm": 0.10781227797269821, "learning_rate": 0.002, "loss": 2.3413, "step": 75210 }, { "epoch": 0.2907794838490204, "grad_norm": 0.13687002658843994, "learning_rate": 0.002, "loss": 2.3369, "step": 75220 }, { "epoch": 0.2908181410524037, "grad_norm": 0.11583052575588226, "learning_rate": 0.002, "loss": 2.3448, "step": 75230 }, { "epoch": 0.290856798255787, "grad_norm": 0.10628215968608856, "learning_rate": 0.002, "loss": 2.3641, "step": 75240 }, { "epoch": 0.2908954554591703, "grad_norm": 0.11679268628358841, "learning_rate": 0.002, "loss": 2.3569, "step": 75250 }, { "epoch": 0.29093411266255353, "grad_norm": 0.10749790072441101, "learning_rate": 0.002, "loss": 2.3436, "step": 75260 }, { "epoch": 0.29097276986593684, "grad_norm": 0.10714562982320786, "learning_rate": 0.002, "loss": 2.3609, "step": 75270 }, { "epoch": 0.2910114270693201, "grad_norm": 0.12559176981449127, "learning_rate": 0.002, "loss": 2.3678, "step": 75280 }, { "epoch": 0.2910500842727034, "grad_norm": 0.10603532195091248, "learning_rate": 0.002, "loss": 2.3698, "step": 75290 }, { "epoch": 0.29108874147608665, "grad_norm": 0.1143733486533165, "learning_rate": 0.002, "loss": 2.3575, "step": 75300 }, { "epoch": 0.2911273986794699, "grad_norm": 0.10944660753011703, "learning_rate": 0.002, "loss": 2.3465, "step": 75310 }, { "epoch": 0.2911660558828532, "grad_norm": 0.10007858276367188, "learning_rate": 0.002, "loss": 2.3585, "step": 75320 }, { "epoch": 0.29120471308623647, "grad_norm": 0.12037455290555954, "learning_rate": 0.002, "loss": 2.3658, "step": 75330 }, { "epoch": 0.2912433702896198, "grad_norm": 0.44239410758018494, "learning_rate": 0.002, "loss": 2.3738, "step": 75340 }, { "epoch": 0.291282027493003, "grad_norm": 0.13063888251781464, "learning_rate": 0.002, "loss": 2.3893, "step": 75350 }, { "epoch": 0.29132068469638633, "grad_norm": 0.1133558601140976, "learning_rate": 0.002, "loss": 2.3547, "step": 75360 }, { "epoch": 0.2913593418997696, "grad_norm": 0.094174325466156, "learning_rate": 0.002, "loss": 2.355, "step": 75370 }, { "epoch": 0.2913979991031529, "grad_norm": 0.13268128037452698, "learning_rate": 0.002, "loss": 2.3574, "step": 75380 }, { "epoch": 0.29143665630653615, "grad_norm": 0.11000560224056244, "learning_rate": 0.002, "loss": 2.35, "step": 75390 }, { "epoch": 0.29147531350991945, "grad_norm": 0.10913138091564178, "learning_rate": 0.002, "loss": 2.3461, "step": 75400 }, { "epoch": 0.2915139707133027, "grad_norm": 0.10128472745418549, "learning_rate": 0.002, "loss": 2.3604, "step": 75410 }, { "epoch": 0.291552627916686, "grad_norm": 0.11382602900266647, "learning_rate": 0.002, "loss": 2.3405, "step": 75420 }, { "epoch": 0.29159128512006927, "grad_norm": 0.11891323328018188, "learning_rate": 0.002, "loss": 2.3327, "step": 75430 }, { "epoch": 0.2916299423234526, "grad_norm": 0.10780160874128342, "learning_rate": 0.002, "loss": 2.3495, "step": 75440 }, { "epoch": 0.2916685995268358, "grad_norm": 0.10481828451156616, "learning_rate": 0.002, "loss": 2.3461, "step": 75450 }, { "epoch": 0.29170725673021913, "grad_norm": 0.10003086179494858, "learning_rate": 0.002, "loss": 2.3526, "step": 75460 }, { "epoch": 0.2917459139336024, "grad_norm": 0.12280956655740738, "learning_rate": 0.002, "loss": 2.3504, "step": 75470 }, { "epoch": 0.2917845711369857, "grad_norm": 0.10013121366500854, "learning_rate": 0.002, "loss": 2.3526, "step": 75480 }, { "epoch": 0.29182322834036895, "grad_norm": 0.10595941543579102, "learning_rate": 0.002, "loss": 2.3335, "step": 75490 }, { "epoch": 0.2918618855437522, "grad_norm": 0.14050036668777466, "learning_rate": 0.002, "loss": 2.3714, "step": 75500 }, { "epoch": 0.2919005427471355, "grad_norm": 0.11663522571325302, "learning_rate": 0.002, "loss": 2.3497, "step": 75510 }, { "epoch": 0.29193919995051876, "grad_norm": 0.10302326083183289, "learning_rate": 0.002, "loss": 2.3504, "step": 75520 }, { "epoch": 0.29197785715390207, "grad_norm": 0.10631691664457321, "learning_rate": 0.002, "loss": 2.356, "step": 75530 }, { "epoch": 0.2920165143572853, "grad_norm": 0.09877592325210571, "learning_rate": 0.002, "loss": 2.3536, "step": 75540 }, { "epoch": 0.2920551715606686, "grad_norm": 0.13082407414913177, "learning_rate": 0.002, "loss": 2.3424, "step": 75550 }, { "epoch": 0.2920938287640519, "grad_norm": 0.1210227981209755, "learning_rate": 0.002, "loss": 2.3494, "step": 75560 }, { "epoch": 0.2921324859674352, "grad_norm": 0.10815666615962982, "learning_rate": 0.002, "loss": 2.3619, "step": 75570 }, { "epoch": 0.29217114317081844, "grad_norm": 0.10120754688978195, "learning_rate": 0.002, "loss": 2.354, "step": 75580 }, { "epoch": 0.29220980037420174, "grad_norm": 0.1269613355398178, "learning_rate": 0.002, "loss": 2.3739, "step": 75590 }, { "epoch": 0.292248457577585, "grad_norm": 0.14352715015411377, "learning_rate": 0.002, "loss": 2.3657, "step": 75600 }, { "epoch": 0.2922871147809683, "grad_norm": 0.10628818720579147, "learning_rate": 0.002, "loss": 2.3565, "step": 75610 }, { "epoch": 0.29232577198435156, "grad_norm": 0.12074145674705505, "learning_rate": 0.002, "loss": 2.3534, "step": 75620 }, { "epoch": 0.29236442918773486, "grad_norm": 0.11499188095331192, "learning_rate": 0.002, "loss": 2.3461, "step": 75630 }, { "epoch": 0.2924030863911181, "grad_norm": 0.10540103167295456, "learning_rate": 0.002, "loss": 2.3338, "step": 75640 }, { "epoch": 0.2924417435945014, "grad_norm": 0.10639649629592896, "learning_rate": 0.002, "loss": 2.3432, "step": 75650 }, { "epoch": 0.2924804007978847, "grad_norm": 0.12016444653272629, "learning_rate": 0.002, "loss": 2.3528, "step": 75660 }, { "epoch": 0.29251905800126793, "grad_norm": 0.11414739489555359, "learning_rate": 0.002, "loss": 2.3562, "step": 75670 }, { "epoch": 0.29255771520465124, "grad_norm": 0.1184229627251625, "learning_rate": 0.002, "loss": 2.3482, "step": 75680 }, { "epoch": 0.2925963724080345, "grad_norm": 0.12431208789348602, "learning_rate": 0.002, "loss": 2.3578, "step": 75690 }, { "epoch": 0.2926350296114178, "grad_norm": 0.1032339408993721, "learning_rate": 0.002, "loss": 2.367, "step": 75700 }, { "epoch": 0.29267368681480105, "grad_norm": 0.11731734126806259, "learning_rate": 0.002, "loss": 2.3595, "step": 75710 }, { "epoch": 0.29271234401818436, "grad_norm": 0.12077368795871735, "learning_rate": 0.002, "loss": 2.3418, "step": 75720 }, { "epoch": 0.2927510012215676, "grad_norm": 0.1214771568775177, "learning_rate": 0.002, "loss": 2.3526, "step": 75730 }, { "epoch": 0.2927896584249509, "grad_norm": 0.09876122325658798, "learning_rate": 0.002, "loss": 2.3535, "step": 75740 }, { "epoch": 0.29282831562833417, "grad_norm": 0.12899163365364075, "learning_rate": 0.002, "loss": 2.3474, "step": 75750 }, { "epoch": 0.2928669728317175, "grad_norm": 0.13751502335071564, "learning_rate": 0.002, "loss": 2.3621, "step": 75760 }, { "epoch": 0.29290563003510073, "grad_norm": 0.1476529836654663, "learning_rate": 0.002, "loss": 2.351, "step": 75770 }, { "epoch": 0.29294428723848404, "grad_norm": 0.10541699826717377, "learning_rate": 0.002, "loss": 2.353, "step": 75780 }, { "epoch": 0.2929829444418673, "grad_norm": 0.13030430674552917, "learning_rate": 0.002, "loss": 2.3602, "step": 75790 }, { "epoch": 0.2930216016452506, "grad_norm": 0.1042986512184143, "learning_rate": 0.002, "loss": 2.3424, "step": 75800 }, { "epoch": 0.29306025884863385, "grad_norm": 0.12977388501167297, "learning_rate": 0.002, "loss": 2.3636, "step": 75810 }, { "epoch": 0.29309891605201716, "grad_norm": 0.10096988081932068, "learning_rate": 0.002, "loss": 2.3615, "step": 75820 }, { "epoch": 0.2931375732554004, "grad_norm": 0.1118810772895813, "learning_rate": 0.002, "loss": 2.3522, "step": 75830 }, { "epoch": 0.2931762304587837, "grad_norm": 0.10725447535514832, "learning_rate": 0.002, "loss": 2.3549, "step": 75840 }, { "epoch": 0.29321488766216697, "grad_norm": 0.11546574532985687, "learning_rate": 0.002, "loss": 2.3648, "step": 75850 }, { "epoch": 0.2932535448655502, "grad_norm": 0.09685637801885605, "learning_rate": 0.002, "loss": 2.3473, "step": 75860 }, { "epoch": 0.2932922020689335, "grad_norm": 0.11934518814086914, "learning_rate": 0.002, "loss": 2.3615, "step": 75870 }, { "epoch": 0.2933308592723168, "grad_norm": 0.10132249444723129, "learning_rate": 0.002, "loss": 2.3574, "step": 75880 }, { "epoch": 0.2933695164757001, "grad_norm": 0.12421329319477081, "learning_rate": 0.002, "loss": 2.3704, "step": 75890 }, { "epoch": 0.29340817367908334, "grad_norm": 0.09948378801345825, "learning_rate": 0.002, "loss": 2.3501, "step": 75900 }, { "epoch": 0.29344683088246665, "grad_norm": 0.1021517962217331, "learning_rate": 0.002, "loss": 2.3466, "step": 75910 }, { "epoch": 0.2934854880858499, "grad_norm": 0.12443681061267853, "learning_rate": 0.002, "loss": 2.3602, "step": 75920 }, { "epoch": 0.2935241452892332, "grad_norm": 0.1347322016954422, "learning_rate": 0.002, "loss": 2.3551, "step": 75930 }, { "epoch": 0.29356280249261646, "grad_norm": 0.12235191464424133, "learning_rate": 0.002, "loss": 2.3522, "step": 75940 }, { "epoch": 0.29360145969599977, "grad_norm": 0.1328326165676117, "learning_rate": 0.002, "loss": 2.3527, "step": 75950 }, { "epoch": 0.293640116899383, "grad_norm": 0.103482685983181, "learning_rate": 0.002, "loss": 2.3561, "step": 75960 }, { "epoch": 0.2936787741027663, "grad_norm": 0.12685489654541016, "learning_rate": 0.002, "loss": 2.3454, "step": 75970 }, { "epoch": 0.2937174313061496, "grad_norm": 0.10713542252779007, "learning_rate": 0.002, "loss": 2.3453, "step": 75980 }, { "epoch": 0.2937560885095329, "grad_norm": 0.11853060871362686, "learning_rate": 0.002, "loss": 2.3574, "step": 75990 }, { "epoch": 0.29379474571291614, "grad_norm": 0.13181206583976746, "learning_rate": 0.002, "loss": 2.3573, "step": 76000 }, { "epoch": 0.29383340291629945, "grad_norm": 0.09293057769536972, "learning_rate": 0.002, "loss": 2.3635, "step": 76010 }, { "epoch": 0.2938720601196827, "grad_norm": 0.09713239222764969, "learning_rate": 0.002, "loss": 2.3477, "step": 76020 }, { "epoch": 0.293910717323066, "grad_norm": 0.11791348457336426, "learning_rate": 0.002, "loss": 2.3424, "step": 76030 }, { "epoch": 0.29394937452644926, "grad_norm": 0.10660435259342194, "learning_rate": 0.002, "loss": 2.3534, "step": 76040 }, { "epoch": 0.2939880317298325, "grad_norm": 0.09991808235645294, "learning_rate": 0.002, "loss": 2.3614, "step": 76050 }, { "epoch": 0.2940266889332158, "grad_norm": 0.10632813721895218, "learning_rate": 0.002, "loss": 2.3574, "step": 76060 }, { "epoch": 0.29406534613659907, "grad_norm": 0.16934417188167572, "learning_rate": 0.002, "loss": 2.3384, "step": 76070 }, { "epoch": 0.2941040033399824, "grad_norm": 0.09975051879882812, "learning_rate": 0.002, "loss": 2.3526, "step": 76080 }, { "epoch": 0.29414266054336563, "grad_norm": 0.08869673311710358, "learning_rate": 0.002, "loss": 2.3672, "step": 76090 }, { "epoch": 0.29418131774674894, "grad_norm": 0.11789096891880035, "learning_rate": 0.002, "loss": 2.3532, "step": 76100 }, { "epoch": 0.2942199749501322, "grad_norm": 0.09168128669261932, "learning_rate": 0.002, "loss": 2.3415, "step": 76110 }, { "epoch": 0.2942586321535155, "grad_norm": 0.10032834857702255, "learning_rate": 0.002, "loss": 2.3442, "step": 76120 }, { "epoch": 0.29429728935689875, "grad_norm": 0.12767310440540314, "learning_rate": 0.002, "loss": 2.3685, "step": 76130 }, { "epoch": 0.29433594656028206, "grad_norm": 0.10768149048089981, "learning_rate": 0.002, "loss": 2.3648, "step": 76140 }, { "epoch": 0.2943746037636653, "grad_norm": 0.11018497496843338, "learning_rate": 0.002, "loss": 2.3655, "step": 76150 }, { "epoch": 0.2944132609670486, "grad_norm": 0.10596516728401184, "learning_rate": 0.002, "loss": 2.3554, "step": 76160 }, { "epoch": 0.29445191817043187, "grad_norm": 0.12382093816995621, "learning_rate": 0.002, "loss": 2.3647, "step": 76170 }, { "epoch": 0.2944905753738152, "grad_norm": 0.0991792306303978, "learning_rate": 0.002, "loss": 2.3326, "step": 76180 }, { "epoch": 0.29452923257719843, "grad_norm": 0.11784415692090988, "learning_rate": 0.002, "loss": 2.3483, "step": 76190 }, { "epoch": 0.29456788978058174, "grad_norm": 0.11032547801733017, "learning_rate": 0.002, "loss": 2.3315, "step": 76200 }, { "epoch": 0.294606546983965, "grad_norm": 0.11199129372835159, "learning_rate": 0.002, "loss": 2.3465, "step": 76210 }, { "epoch": 0.2946452041873483, "grad_norm": 0.0953453853726387, "learning_rate": 0.002, "loss": 2.3472, "step": 76220 }, { "epoch": 0.29468386139073155, "grad_norm": 0.10188492387533188, "learning_rate": 0.002, "loss": 2.3338, "step": 76230 }, { "epoch": 0.2947225185941148, "grad_norm": 0.1126917153596878, "learning_rate": 0.002, "loss": 2.3535, "step": 76240 }, { "epoch": 0.2947611757974981, "grad_norm": 0.10451949387788773, "learning_rate": 0.002, "loss": 2.3617, "step": 76250 }, { "epoch": 0.29479983300088136, "grad_norm": 0.11269045621156693, "learning_rate": 0.002, "loss": 2.3613, "step": 76260 }, { "epoch": 0.29483849020426467, "grad_norm": 0.10896584391593933, "learning_rate": 0.002, "loss": 2.3509, "step": 76270 }, { "epoch": 0.2948771474076479, "grad_norm": 0.10747255384922028, "learning_rate": 0.002, "loss": 2.3438, "step": 76280 }, { "epoch": 0.29491580461103123, "grad_norm": 0.12370602786540985, "learning_rate": 0.002, "loss": 2.3439, "step": 76290 }, { "epoch": 0.2949544618144145, "grad_norm": 0.12151787430047989, "learning_rate": 0.002, "loss": 2.3531, "step": 76300 }, { "epoch": 0.2949931190177978, "grad_norm": 0.1202084943652153, "learning_rate": 0.002, "loss": 2.3616, "step": 76310 }, { "epoch": 0.29503177622118104, "grad_norm": 0.22490330040454865, "learning_rate": 0.002, "loss": 2.362, "step": 76320 }, { "epoch": 0.29507043342456435, "grad_norm": 0.12307056784629822, "learning_rate": 0.002, "loss": 2.3588, "step": 76330 }, { "epoch": 0.2951090906279476, "grad_norm": 0.11710674315690994, "learning_rate": 0.002, "loss": 2.3656, "step": 76340 }, { "epoch": 0.2951477478313309, "grad_norm": 0.12242462486028671, "learning_rate": 0.002, "loss": 2.3384, "step": 76350 }, { "epoch": 0.29518640503471416, "grad_norm": 0.10505883395671844, "learning_rate": 0.002, "loss": 2.3651, "step": 76360 }, { "epoch": 0.29522506223809747, "grad_norm": 0.13543276488780975, "learning_rate": 0.002, "loss": 2.3714, "step": 76370 }, { "epoch": 0.2952637194414807, "grad_norm": 0.11254122108221054, "learning_rate": 0.002, "loss": 2.3382, "step": 76380 }, { "epoch": 0.29530237664486403, "grad_norm": 0.10214526206254959, "learning_rate": 0.002, "loss": 2.3624, "step": 76390 }, { "epoch": 0.2953410338482473, "grad_norm": 0.1244417205452919, "learning_rate": 0.002, "loss": 2.3654, "step": 76400 }, { "epoch": 0.2953796910516306, "grad_norm": 0.1231013685464859, "learning_rate": 0.002, "loss": 2.349, "step": 76410 }, { "epoch": 0.29541834825501384, "grad_norm": 0.11223754286766052, "learning_rate": 0.002, "loss": 2.3452, "step": 76420 }, { "epoch": 0.2954570054583971, "grad_norm": 0.09990517050027847, "learning_rate": 0.002, "loss": 2.3508, "step": 76430 }, { "epoch": 0.2954956626617804, "grad_norm": 0.0930066928267479, "learning_rate": 0.002, "loss": 2.3653, "step": 76440 }, { "epoch": 0.29553431986516365, "grad_norm": 0.10342702269554138, "learning_rate": 0.002, "loss": 2.3565, "step": 76450 }, { "epoch": 0.29557297706854696, "grad_norm": 0.12185720354318619, "learning_rate": 0.002, "loss": 2.3672, "step": 76460 }, { "epoch": 0.2956116342719302, "grad_norm": 0.11930191516876221, "learning_rate": 0.002, "loss": 2.3487, "step": 76470 }, { "epoch": 0.2956502914753135, "grad_norm": 0.09556443989276886, "learning_rate": 0.002, "loss": 2.3488, "step": 76480 }, { "epoch": 0.2956889486786968, "grad_norm": 0.12179828435182571, "learning_rate": 0.002, "loss": 2.3577, "step": 76490 }, { "epoch": 0.2957276058820801, "grad_norm": 0.11521507054567337, "learning_rate": 0.002, "loss": 2.3573, "step": 76500 }, { "epoch": 0.29576626308546333, "grad_norm": 0.10643960535526276, "learning_rate": 0.002, "loss": 2.3564, "step": 76510 }, { "epoch": 0.29580492028884664, "grad_norm": 0.11946967989206314, "learning_rate": 0.002, "loss": 2.3589, "step": 76520 }, { "epoch": 0.2958435774922299, "grad_norm": 0.12031736969947815, "learning_rate": 0.002, "loss": 2.3642, "step": 76530 }, { "epoch": 0.2958822346956132, "grad_norm": 0.11797958612442017, "learning_rate": 0.002, "loss": 2.359, "step": 76540 }, { "epoch": 0.29592089189899645, "grad_norm": 0.09968064725399017, "learning_rate": 0.002, "loss": 2.3654, "step": 76550 }, { "epoch": 0.29595954910237976, "grad_norm": 0.09612449258565903, "learning_rate": 0.002, "loss": 2.3657, "step": 76560 }, { "epoch": 0.295998206305763, "grad_norm": 0.14262156188488007, "learning_rate": 0.002, "loss": 2.3512, "step": 76570 }, { "epoch": 0.2960368635091463, "grad_norm": 0.1107003390789032, "learning_rate": 0.002, "loss": 2.3578, "step": 76580 }, { "epoch": 0.29607552071252957, "grad_norm": 0.11529079079627991, "learning_rate": 0.002, "loss": 2.3535, "step": 76590 }, { "epoch": 0.2961141779159128, "grad_norm": 0.1252748817205429, "learning_rate": 0.002, "loss": 2.3573, "step": 76600 }, { "epoch": 0.29615283511929613, "grad_norm": 0.09772691130638123, "learning_rate": 0.002, "loss": 2.3449, "step": 76610 }, { "epoch": 0.2961914923226794, "grad_norm": 0.10354173183441162, "learning_rate": 0.002, "loss": 2.3657, "step": 76620 }, { "epoch": 0.2962301495260627, "grad_norm": 0.12318793684244156, "learning_rate": 0.002, "loss": 2.356, "step": 76630 }, { "epoch": 0.29626880672944594, "grad_norm": 0.11388937383890152, "learning_rate": 0.002, "loss": 2.3646, "step": 76640 }, { "epoch": 0.29630746393282925, "grad_norm": 0.14349432289600372, "learning_rate": 0.002, "loss": 2.3541, "step": 76650 }, { "epoch": 0.2963461211362125, "grad_norm": 0.09921904653310776, "learning_rate": 0.002, "loss": 2.3501, "step": 76660 }, { "epoch": 0.2963847783395958, "grad_norm": 0.1507887989282608, "learning_rate": 0.002, "loss": 2.3537, "step": 76670 }, { "epoch": 0.29642343554297906, "grad_norm": 0.10376685857772827, "learning_rate": 0.002, "loss": 2.3421, "step": 76680 }, { "epoch": 0.29646209274636237, "grad_norm": 0.12126835435628891, "learning_rate": 0.002, "loss": 2.3607, "step": 76690 }, { "epoch": 0.2965007499497456, "grad_norm": 0.10878845304250717, "learning_rate": 0.002, "loss": 2.3578, "step": 76700 }, { "epoch": 0.29653940715312893, "grad_norm": 0.12074249237775803, "learning_rate": 0.002, "loss": 2.3614, "step": 76710 }, { "epoch": 0.2965780643565122, "grad_norm": 0.11226803064346313, "learning_rate": 0.002, "loss": 2.349, "step": 76720 }, { "epoch": 0.2966167215598955, "grad_norm": 0.09781412780284882, "learning_rate": 0.002, "loss": 2.3474, "step": 76730 }, { "epoch": 0.29665537876327874, "grad_norm": 0.11269398033618927, "learning_rate": 0.002, "loss": 2.3571, "step": 76740 }, { "epoch": 0.29669403596666205, "grad_norm": 0.11323588341474533, "learning_rate": 0.002, "loss": 2.3567, "step": 76750 }, { "epoch": 0.2967326931700453, "grad_norm": 0.09751637279987335, "learning_rate": 0.002, "loss": 2.347, "step": 76760 }, { "epoch": 0.2967713503734286, "grad_norm": 0.10050305724143982, "learning_rate": 0.002, "loss": 2.3496, "step": 76770 }, { "epoch": 0.29681000757681186, "grad_norm": 0.10856390744447708, "learning_rate": 0.002, "loss": 2.3609, "step": 76780 }, { "epoch": 0.2968486647801951, "grad_norm": 0.12790358066558838, "learning_rate": 0.002, "loss": 2.3531, "step": 76790 }, { "epoch": 0.2968873219835784, "grad_norm": 0.1087295264005661, "learning_rate": 0.002, "loss": 2.3504, "step": 76800 }, { "epoch": 0.2969259791869617, "grad_norm": 0.11008793860673904, "learning_rate": 0.002, "loss": 2.3402, "step": 76810 }, { "epoch": 0.296964636390345, "grad_norm": 0.11568048596382141, "learning_rate": 0.002, "loss": 2.3667, "step": 76820 }, { "epoch": 0.29700329359372823, "grad_norm": 0.2546520531177521, "learning_rate": 0.002, "loss": 2.348, "step": 76830 }, { "epoch": 0.29704195079711154, "grad_norm": 0.10498040169477463, "learning_rate": 0.002, "loss": 2.3557, "step": 76840 }, { "epoch": 0.2970806080004948, "grad_norm": 0.10412923991680145, "learning_rate": 0.002, "loss": 2.3577, "step": 76850 }, { "epoch": 0.2971192652038781, "grad_norm": 0.10490905493497849, "learning_rate": 0.002, "loss": 2.3684, "step": 76860 }, { "epoch": 0.29715792240726135, "grad_norm": 0.11890088021755219, "learning_rate": 0.002, "loss": 2.3575, "step": 76870 }, { "epoch": 0.29719657961064466, "grad_norm": 0.1103493794798851, "learning_rate": 0.002, "loss": 2.3612, "step": 76880 }, { "epoch": 0.2972352368140279, "grad_norm": 0.1093481257557869, "learning_rate": 0.002, "loss": 2.3436, "step": 76890 }, { "epoch": 0.2972738940174112, "grad_norm": 0.11652281880378723, "learning_rate": 0.002, "loss": 2.3539, "step": 76900 }, { "epoch": 0.2973125512207945, "grad_norm": 0.10535074770450592, "learning_rate": 0.002, "loss": 2.3512, "step": 76910 }, { "epoch": 0.2973512084241778, "grad_norm": 0.11236576735973358, "learning_rate": 0.002, "loss": 2.3583, "step": 76920 }, { "epoch": 0.29738986562756103, "grad_norm": 0.11530499160289764, "learning_rate": 0.002, "loss": 2.3571, "step": 76930 }, { "epoch": 0.29742852283094434, "grad_norm": 0.15051789581775665, "learning_rate": 0.002, "loss": 2.376, "step": 76940 }, { "epoch": 0.2974671800343276, "grad_norm": 0.10237208753824234, "learning_rate": 0.002, "loss": 2.3657, "step": 76950 }, { "epoch": 0.2975058372377109, "grad_norm": 0.11303669959306717, "learning_rate": 0.002, "loss": 2.3584, "step": 76960 }, { "epoch": 0.29754449444109415, "grad_norm": 0.12454624474048615, "learning_rate": 0.002, "loss": 2.3616, "step": 76970 }, { "epoch": 0.2975831516444774, "grad_norm": 0.12306322157382965, "learning_rate": 0.002, "loss": 2.3402, "step": 76980 }, { "epoch": 0.2976218088478607, "grad_norm": 0.10339666157960892, "learning_rate": 0.002, "loss": 2.3573, "step": 76990 }, { "epoch": 0.29766046605124397, "grad_norm": 0.09617898613214493, "learning_rate": 0.002, "loss": 2.3529, "step": 77000 }, { "epoch": 0.2976991232546273, "grad_norm": 0.11983972787857056, "learning_rate": 0.002, "loss": 2.3505, "step": 77010 }, { "epoch": 0.2977377804580105, "grad_norm": 0.13938377797603607, "learning_rate": 0.002, "loss": 2.3476, "step": 77020 }, { "epoch": 0.29777643766139383, "grad_norm": 0.10169366747140884, "learning_rate": 0.002, "loss": 2.348, "step": 77030 }, { "epoch": 0.2978150948647771, "grad_norm": 0.1186128631234169, "learning_rate": 0.002, "loss": 2.346, "step": 77040 }, { "epoch": 0.2978537520681604, "grad_norm": 0.0991736575961113, "learning_rate": 0.002, "loss": 2.3482, "step": 77050 }, { "epoch": 0.29789240927154365, "grad_norm": 0.11100050061941147, "learning_rate": 0.002, "loss": 2.3613, "step": 77060 }, { "epoch": 0.29793106647492695, "grad_norm": 0.1000758484005928, "learning_rate": 0.002, "loss": 2.3486, "step": 77070 }, { "epoch": 0.2979697236783102, "grad_norm": 0.11656024307012558, "learning_rate": 0.002, "loss": 2.3509, "step": 77080 }, { "epoch": 0.2980083808816935, "grad_norm": 0.11905937641859055, "learning_rate": 0.002, "loss": 2.3544, "step": 77090 }, { "epoch": 0.29804703808507677, "grad_norm": 0.12053173780441284, "learning_rate": 0.002, "loss": 2.3492, "step": 77100 }, { "epoch": 0.2980856952884601, "grad_norm": 0.11425994336605072, "learning_rate": 0.002, "loss": 2.354, "step": 77110 }, { "epoch": 0.2981243524918433, "grad_norm": 0.1111239418387413, "learning_rate": 0.002, "loss": 2.3532, "step": 77120 }, { "epoch": 0.29816300969522663, "grad_norm": 0.11100706458091736, "learning_rate": 0.002, "loss": 2.3467, "step": 77130 }, { "epoch": 0.2982016668986099, "grad_norm": 0.10714425891637802, "learning_rate": 0.002, "loss": 2.3579, "step": 77140 }, { "epoch": 0.2982403241019932, "grad_norm": 0.10456814616918564, "learning_rate": 0.002, "loss": 2.3559, "step": 77150 }, { "epoch": 0.29827898130537644, "grad_norm": 0.10369396954774857, "learning_rate": 0.002, "loss": 2.3369, "step": 77160 }, { "epoch": 0.2983176385087597, "grad_norm": 0.12451314181089401, "learning_rate": 0.002, "loss": 2.3443, "step": 77170 }, { "epoch": 0.298356295712143, "grad_norm": 0.10822725296020508, "learning_rate": 0.002, "loss": 2.3613, "step": 77180 }, { "epoch": 0.29839495291552626, "grad_norm": 0.13343235850334167, "learning_rate": 0.002, "loss": 2.3474, "step": 77190 }, { "epoch": 0.29843361011890956, "grad_norm": 0.10224916785955429, "learning_rate": 0.002, "loss": 2.3566, "step": 77200 }, { "epoch": 0.2984722673222928, "grad_norm": 0.11366762965917587, "learning_rate": 0.002, "loss": 2.3607, "step": 77210 }, { "epoch": 0.2985109245256761, "grad_norm": 0.11645630747079849, "learning_rate": 0.002, "loss": 2.3471, "step": 77220 }, { "epoch": 0.2985495817290594, "grad_norm": 0.09775960445404053, "learning_rate": 0.002, "loss": 2.364, "step": 77230 }, { "epoch": 0.2985882389324427, "grad_norm": 0.11187317967414856, "learning_rate": 0.002, "loss": 2.349, "step": 77240 }, { "epoch": 0.29862689613582594, "grad_norm": 0.11537643522024155, "learning_rate": 0.002, "loss": 2.3545, "step": 77250 }, { "epoch": 0.29866555333920924, "grad_norm": 0.10838068276643753, "learning_rate": 0.002, "loss": 2.3429, "step": 77260 }, { "epoch": 0.2987042105425925, "grad_norm": 0.12092362344264984, "learning_rate": 0.002, "loss": 2.3758, "step": 77270 }, { "epoch": 0.2987428677459758, "grad_norm": 0.11384254693984985, "learning_rate": 0.002, "loss": 2.3421, "step": 77280 }, { "epoch": 0.29878152494935906, "grad_norm": 0.11899744719266891, "learning_rate": 0.002, "loss": 2.35, "step": 77290 }, { "epoch": 0.29882018215274236, "grad_norm": 0.12392907589673996, "learning_rate": 0.002, "loss": 2.3679, "step": 77300 }, { "epoch": 0.2988588393561256, "grad_norm": 0.1090821698307991, "learning_rate": 0.002, "loss": 2.3453, "step": 77310 }, { "epoch": 0.2988974965595089, "grad_norm": 0.13105599582195282, "learning_rate": 0.002, "loss": 2.3594, "step": 77320 }, { "epoch": 0.2989361537628922, "grad_norm": 0.11010351777076721, "learning_rate": 0.002, "loss": 2.3581, "step": 77330 }, { "epoch": 0.29897481096627543, "grad_norm": 0.10212308168411255, "learning_rate": 0.002, "loss": 2.34, "step": 77340 }, { "epoch": 0.29901346816965874, "grad_norm": 0.12034210562705994, "learning_rate": 0.002, "loss": 2.3666, "step": 77350 }, { "epoch": 0.299052125373042, "grad_norm": 0.10495106130838394, "learning_rate": 0.002, "loss": 2.3609, "step": 77360 }, { "epoch": 0.2990907825764253, "grad_norm": 0.10167660564184189, "learning_rate": 0.002, "loss": 2.347, "step": 77370 }, { "epoch": 0.29912943977980855, "grad_norm": 0.11317375302314758, "learning_rate": 0.002, "loss": 2.3486, "step": 77380 }, { "epoch": 0.29916809698319186, "grad_norm": 0.10915170609951019, "learning_rate": 0.002, "loss": 2.3379, "step": 77390 }, { "epoch": 0.2992067541865751, "grad_norm": 0.10342042148113251, "learning_rate": 0.002, "loss": 2.3434, "step": 77400 }, { "epoch": 0.2992454113899584, "grad_norm": 0.11812546849250793, "learning_rate": 0.002, "loss": 2.3507, "step": 77410 }, { "epoch": 0.29928406859334167, "grad_norm": 0.10676705092191696, "learning_rate": 0.002, "loss": 2.3616, "step": 77420 }, { "epoch": 0.299322725796725, "grad_norm": 0.11633370071649551, "learning_rate": 0.002, "loss": 2.3553, "step": 77430 }, { "epoch": 0.2993613830001082, "grad_norm": 0.10170099139213562, "learning_rate": 0.002, "loss": 2.3632, "step": 77440 }, { "epoch": 0.29940004020349154, "grad_norm": 0.13479268550872803, "learning_rate": 0.002, "loss": 2.3604, "step": 77450 }, { "epoch": 0.2994386974068748, "grad_norm": 0.11963546276092529, "learning_rate": 0.002, "loss": 2.3661, "step": 77460 }, { "epoch": 0.2994773546102581, "grad_norm": 0.12442842125892639, "learning_rate": 0.002, "loss": 2.3378, "step": 77470 }, { "epoch": 0.29951601181364135, "grad_norm": 0.1305210441350937, "learning_rate": 0.002, "loss": 2.3553, "step": 77480 }, { "epoch": 0.29955466901702466, "grad_norm": 0.10134751349687576, "learning_rate": 0.002, "loss": 2.3502, "step": 77490 }, { "epoch": 0.2995933262204079, "grad_norm": 0.11258494108915329, "learning_rate": 0.002, "loss": 2.3539, "step": 77500 }, { "epoch": 0.2996319834237912, "grad_norm": 0.11224709451198578, "learning_rate": 0.002, "loss": 2.346, "step": 77510 }, { "epoch": 0.29967064062717447, "grad_norm": 0.09768752753734589, "learning_rate": 0.002, "loss": 2.3523, "step": 77520 }, { "epoch": 0.2997092978305577, "grad_norm": 0.1073933094739914, "learning_rate": 0.002, "loss": 2.3612, "step": 77530 }, { "epoch": 0.299747955033941, "grad_norm": 0.10424616187810898, "learning_rate": 0.002, "loss": 2.3516, "step": 77540 }, { "epoch": 0.2997866122373243, "grad_norm": 0.12393969297409058, "learning_rate": 0.002, "loss": 2.3695, "step": 77550 }, { "epoch": 0.2998252694407076, "grad_norm": 0.09931936860084534, "learning_rate": 0.002, "loss": 2.3496, "step": 77560 }, { "epoch": 0.29986392664409084, "grad_norm": 0.09614407271146774, "learning_rate": 0.002, "loss": 2.3544, "step": 77570 }, { "epoch": 0.29990258384747415, "grad_norm": 0.12007300555706024, "learning_rate": 0.002, "loss": 2.3488, "step": 77580 }, { "epoch": 0.2999412410508574, "grad_norm": 0.14052484929561615, "learning_rate": 0.002, "loss": 2.3548, "step": 77590 }, { "epoch": 0.2999798982542407, "grad_norm": 0.12478945404291153, "learning_rate": 0.002, "loss": 2.3555, "step": 77600 }, { "epoch": 0.30001855545762396, "grad_norm": 0.09192482382059097, "learning_rate": 0.002, "loss": 2.3702, "step": 77610 }, { "epoch": 0.30005721266100727, "grad_norm": 0.13805346190929413, "learning_rate": 0.002, "loss": 2.3503, "step": 77620 }, { "epoch": 0.3000958698643905, "grad_norm": 0.10263418406248093, "learning_rate": 0.002, "loss": 2.3527, "step": 77630 }, { "epoch": 0.3001345270677738, "grad_norm": 0.11766941100358963, "learning_rate": 0.002, "loss": 2.3354, "step": 77640 }, { "epoch": 0.3001731842711571, "grad_norm": 0.1011272743344307, "learning_rate": 0.002, "loss": 2.3752, "step": 77650 }, { "epoch": 0.3002118414745404, "grad_norm": 0.11512536555528641, "learning_rate": 0.002, "loss": 2.3602, "step": 77660 }, { "epoch": 0.30025049867792364, "grad_norm": 0.11931298673152924, "learning_rate": 0.002, "loss": 2.3583, "step": 77670 }, { "epoch": 0.30028915588130695, "grad_norm": 0.11683052778244019, "learning_rate": 0.002, "loss": 2.3501, "step": 77680 }, { "epoch": 0.3003278130846902, "grad_norm": 0.10903342068195343, "learning_rate": 0.002, "loss": 2.3664, "step": 77690 }, { "epoch": 0.3003664702880735, "grad_norm": 0.11838795244693756, "learning_rate": 0.002, "loss": 2.357, "step": 77700 }, { "epoch": 0.30040512749145676, "grad_norm": 0.1060962900519371, "learning_rate": 0.002, "loss": 2.3576, "step": 77710 }, { "epoch": 0.30044378469484, "grad_norm": 0.09655400365591049, "learning_rate": 0.002, "loss": 2.3387, "step": 77720 }, { "epoch": 0.3004824418982233, "grad_norm": 0.12824280560016632, "learning_rate": 0.002, "loss": 2.3549, "step": 77730 }, { "epoch": 0.30052109910160657, "grad_norm": 0.10142549872398376, "learning_rate": 0.002, "loss": 2.3538, "step": 77740 }, { "epoch": 0.3005597563049899, "grad_norm": 0.10544409602880478, "learning_rate": 0.002, "loss": 2.3763, "step": 77750 }, { "epoch": 0.30059841350837313, "grad_norm": 0.10959829390048981, "learning_rate": 0.002, "loss": 2.3427, "step": 77760 }, { "epoch": 0.30063707071175644, "grad_norm": 0.11233651638031006, "learning_rate": 0.002, "loss": 2.3622, "step": 77770 }, { "epoch": 0.3006757279151397, "grad_norm": 0.14760273694992065, "learning_rate": 0.002, "loss": 2.3552, "step": 77780 }, { "epoch": 0.300714385118523, "grad_norm": 0.12075531482696533, "learning_rate": 0.002, "loss": 2.3641, "step": 77790 }, { "epoch": 0.30075304232190625, "grad_norm": 0.11051961779594421, "learning_rate": 0.002, "loss": 2.3522, "step": 77800 }, { "epoch": 0.30079169952528956, "grad_norm": 0.10270072519779205, "learning_rate": 0.002, "loss": 2.3451, "step": 77810 }, { "epoch": 0.3008303567286728, "grad_norm": 0.13468053936958313, "learning_rate": 0.002, "loss": 2.3499, "step": 77820 }, { "epoch": 0.3008690139320561, "grad_norm": 0.11314025521278381, "learning_rate": 0.002, "loss": 2.3544, "step": 77830 }, { "epoch": 0.30090767113543937, "grad_norm": 0.09416459500789642, "learning_rate": 0.002, "loss": 2.3564, "step": 77840 }, { "epoch": 0.3009463283388227, "grad_norm": 0.09706717729568481, "learning_rate": 0.002, "loss": 2.3471, "step": 77850 }, { "epoch": 0.30098498554220593, "grad_norm": 0.1195613220334053, "learning_rate": 0.002, "loss": 2.3451, "step": 77860 }, { "epoch": 0.30102364274558924, "grad_norm": 0.10404033958911896, "learning_rate": 0.002, "loss": 2.374, "step": 77870 }, { "epoch": 0.3010622999489725, "grad_norm": 0.10307558625936508, "learning_rate": 0.002, "loss": 2.3526, "step": 77880 }, { "epoch": 0.3011009571523558, "grad_norm": 0.10437768697738647, "learning_rate": 0.002, "loss": 2.3499, "step": 77890 }, { "epoch": 0.30113961435573905, "grad_norm": 0.11560779064893723, "learning_rate": 0.002, "loss": 2.344, "step": 77900 }, { "epoch": 0.3011782715591223, "grad_norm": 0.10063166171312332, "learning_rate": 0.002, "loss": 2.3702, "step": 77910 }, { "epoch": 0.3012169287625056, "grad_norm": 0.1272033303976059, "learning_rate": 0.002, "loss": 2.3419, "step": 77920 }, { "epoch": 0.30125558596588886, "grad_norm": 0.11612261831760406, "learning_rate": 0.002, "loss": 2.3484, "step": 77930 }, { "epoch": 0.30129424316927217, "grad_norm": 0.12208713591098785, "learning_rate": 0.002, "loss": 2.358, "step": 77940 }, { "epoch": 0.3013329003726554, "grad_norm": 0.10828813910484314, "learning_rate": 0.002, "loss": 2.3541, "step": 77950 }, { "epoch": 0.30137155757603873, "grad_norm": 0.11465118825435638, "learning_rate": 0.002, "loss": 2.3613, "step": 77960 }, { "epoch": 0.301410214779422, "grad_norm": 0.11285607516765594, "learning_rate": 0.002, "loss": 2.3686, "step": 77970 }, { "epoch": 0.3014488719828053, "grad_norm": 0.09235849976539612, "learning_rate": 0.002, "loss": 2.3499, "step": 77980 }, { "epoch": 0.30148752918618854, "grad_norm": 0.1082046627998352, "learning_rate": 0.002, "loss": 2.3513, "step": 77990 }, { "epoch": 0.30152618638957185, "grad_norm": 0.09929527342319489, "learning_rate": 0.002, "loss": 2.3567, "step": 78000 }, { "epoch": 0.3015648435929551, "grad_norm": 0.12402932345867157, "learning_rate": 0.002, "loss": 2.3443, "step": 78010 }, { "epoch": 0.3016035007963384, "grad_norm": 0.11547496914863586, "learning_rate": 0.002, "loss": 2.355, "step": 78020 }, { "epoch": 0.30164215799972166, "grad_norm": 0.10738544911146164, "learning_rate": 0.002, "loss": 2.346, "step": 78030 }, { "epoch": 0.30168081520310497, "grad_norm": 0.09993022680282593, "learning_rate": 0.002, "loss": 2.3426, "step": 78040 }, { "epoch": 0.3017194724064882, "grad_norm": 0.10725497454404831, "learning_rate": 0.002, "loss": 2.3513, "step": 78050 }, { "epoch": 0.30175812960987153, "grad_norm": 0.12580275535583496, "learning_rate": 0.002, "loss": 2.3686, "step": 78060 }, { "epoch": 0.3017967868132548, "grad_norm": 0.10700584203004837, "learning_rate": 0.002, "loss": 2.3654, "step": 78070 }, { "epoch": 0.30183544401663803, "grad_norm": 0.11347413808107376, "learning_rate": 0.002, "loss": 2.3537, "step": 78080 }, { "epoch": 0.30187410122002134, "grad_norm": 0.11956752836704254, "learning_rate": 0.002, "loss": 2.3466, "step": 78090 }, { "epoch": 0.3019127584234046, "grad_norm": 0.10778584331274033, "learning_rate": 0.002, "loss": 2.362, "step": 78100 }, { "epoch": 0.3019514156267879, "grad_norm": 0.11137451976537704, "learning_rate": 0.002, "loss": 2.3691, "step": 78110 }, { "epoch": 0.30199007283017115, "grad_norm": 0.12869513034820557, "learning_rate": 0.002, "loss": 2.3466, "step": 78120 }, { "epoch": 0.30202873003355446, "grad_norm": 0.10465627908706665, "learning_rate": 0.002, "loss": 2.3496, "step": 78130 }, { "epoch": 0.3020673872369377, "grad_norm": 0.11821458488702774, "learning_rate": 0.002, "loss": 2.3508, "step": 78140 }, { "epoch": 0.302106044440321, "grad_norm": 0.11543087661266327, "learning_rate": 0.002, "loss": 2.3627, "step": 78150 }, { "epoch": 0.30214470164370427, "grad_norm": 0.12336330860853195, "learning_rate": 0.002, "loss": 2.3426, "step": 78160 }, { "epoch": 0.3021833588470876, "grad_norm": 0.10734038800001144, "learning_rate": 0.002, "loss": 2.362, "step": 78170 }, { "epoch": 0.30222201605047083, "grad_norm": 0.10224812477827072, "learning_rate": 0.002, "loss": 2.3363, "step": 78180 }, { "epoch": 0.30226067325385414, "grad_norm": 0.11591426283121109, "learning_rate": 0.002, "loss": 2.3584, "step": 78190 }, { "epoch": 0.3022993304572374, "grad_norm": 0.09956828504800797, "learning_rate": 0.002, "loss": 2.3534, "step": 78200 }, { "epoch": 0.3023379876606207, "grad_norm": 0.1258390098810196, "learning_rate": 0.002, "loss": 2.3485, "step": 78210 }, { "epoch": 0.30237664486400395, "grad_norm": 0.1432776004076004, "learning_rate": 0.002, "loss": 2.3428, "step": 78220 }, { "epoch": 0.30241530206738726, "grad_norm": 0.1217392161488533, "learning_rate": 0.002, "loss": 2.363, "step": 78230 }, { "epoch": 0.3024539592707705, "grad_norm": 0.1125762015581131, "learning_rate": 0.002, "loss": 2.3493, "step": 78240 }, { "epoch": 0.3024926164741538, "grad_norm": 0.11079467087984085, "learning_rate": 0.002, "loss": 2.3582, "step": 78250 }, { "epoch": 0.30253127367753707, "grad_norm": 0.14208978414535522, "learning_rate": 0.002, "loss": 2.3525, "step": 78260 }, { "epoch": 0.3025699308809203, "grad_norm": 0.10825013369321823, "learning_rate": 0.002, "loss": 2.3468, "step": 78270 }, { "epoch": 0.30260858808430363, "grad_norm": 0.12011104822158813, "learning_rate": 0.002, "loss": 2.3577, "step": 78280 }, { "epoch": 0.3026472452876869, "grad_norm": 0.1183653324842453, "learning_rate": 0.002, "loss": 2.357, "step": 78290 }, { "epoch": 0.3026859024910702, "grad_norm": 0.1488329917192459, "learning_rate": 0.002, "loss": 2.34, "step": 78300 }, { "epoch": 0.30272455969445344, "grad_norm": 0.10774101316928864, "learning_rate": 0.002, "loss": 2.3469, "step": 78310 }, { "epoch": 0.30276321689783675, "grad_norm": 0.11320872604846954, "learning_rate": 0.002, "loss": 2.3528, "step": 78320 }, { "epoch": 0.30280187410122, "grad_norm": 0.12253223359584808, "learning_rate": 0.002, "loss": 2.3439, "step": 78330 }, { "epoch": 0.3028405313046033, "grad_norm": 0.1480216532945633, "learning_rate": 0.002, "loss": 2.368, "step": 78340 }, { "epoch": 0.30287918850798656, "grad_norm": 0.11408364027738571, "learning_rate": 0.002, "loss": 2.3563, "step": 78350 }, { "epoch": 0.30291784571136987, "grad_norm": 0.09980117529630661, "learning_rate": 0.002, "loss": 2.3367, "step": 78360 }, { "epoch": 0.3029565029147531, "grad_norm": 0.1159496158361435, "learning_rate": 0.002, "loss": 2.3644, "step": 78370 }, { "epoch": 0.30299516011813643, "grad_norm": 0.10608360916376114, "learning_rate": 0.002, "loss": 2.3466, "step": 78380 }, { "epoch": 0.3030338173215197, "grad_norm": 0.09666457027196884, "learning_rate": 0.002, "loss": 2.3456, "step": 78390 }, { "epoch": 0.303072474524903, "grad_norm": 0.1286255568265915, "learning_rate": 0.002, "loss": 2.3594, "step": 78400 }, { "epoch": 0.30311113172828624, "grad_norm": 0.14180733263492584, "learning_rate": 0.002, "loss": 2.3551, "step": 78410 }, { "epoch": 0.30314978893166955, "grad_norm": 0.12679146230220795, "learning_rate": 0.002, "loss": 2.3406, "step": 78420 }, { "epoch": 0.3031884461350528, "grad_norm": 0.09461666643619537, "learning_rate": 0.002, "loss": 2.3513, "step": 78430 }, { "epoch": 0.3032271033384361, "grad_norm": 0.17039163410663605, "learning_rate": 0.002, "loss": 2.3561, "step": 78440 }, { "epoch": 0.30326576054181936, "grad_norm": 0.11002831906080246, "learning_rate": 0.002, "loss": 2.3644, "step": 78450 }, { "epoch": 0.3033044177452026, "grad_norm": 0.10490234941244125, "learning_rate": 0.002, "loss": 2.3363, "step": 78460 }, { "epoch": 0.3033430749485859, "grad_norm": 0.10305456072092056, "learning_rate": 0.002, "loss": 2.3609, "step": 78470 }, { "epoch": 0.3033817321519692, "grad_norm": 0.13403716683387756, "learning_rate": 0.002, "loss": 2.3462, "step": 78480 }, { "epoch": 0.3034203893553525, "grad_norm": 0.09878389537334442, "learning_rate": 0.002, "loss": 2.3465, "step": 78490 }, { "epoch": 0.30345904655873573, "grad_norm": 0.10970155149698257, "learning_rate": 0.002, "loss": 2.3579, "step": 78500 }, { "epoch": 0.30349770376211904, "grad_norm": 0.10358286648988724, "learning_rate": 0.002, "loss": 2.3439, "step": 78510 }, { "epoch": 0.3035363609655023, "grad_norm": 0.1220836266875267, "learning_rate": 0.002, "loss": 2.3554, "step": 78520 }, { "epoch": 0.3035750181688856, "grad_norm": 0.10189653187990189, "learning_rate": 0.002, "loss": 2.3631, "step": 78530 }, { "epoch": 0.30361367537226885, "grad_norm": 0.1015162542462349, "learning_rate": 0.002, "loss": 2.3542, "step": 78540 }, { "epoch": 0.30365233257565216, "grad_norm": 0.11576369404792786, "learning_rate": 0.002, "loss": 2.3635, "step": 78550 }, { "epoch": 0.3036909897790354, "grad_norm": 0.18061217665672302, "learning_rate": 0.002, "loss": 2.3564, "step": 78560 }, { "epoch": 0.3037296469824187, "grad_norm": 0.1377808004617691, "learning_rate": 0.002, "loss": 2.3499, "step": 78570 }, { "epoch": 0.303768304185802, "grad_norm": 0.10999388247728348, "learning_rate": 0.002, "loss": 2.3553, "step": 78580 }, { "epoch": 0.3038069613891853, "grad_norm": 0.11197017133235931, "learning_rate": 0.002, "loss": 2.3616, "step": 78590 }, { "epoch": 0.30384561859256853, "grad_norm": 0.10789023339748383, "learning_rate": 0.002, "loss": 2.3523, "step": 78600 }, { "epoch": 0.30388427579595184, "grad_norm": 0.1047627180814743, "learning_rate": 0.002, "loss": 2.3343, "step": 78610 }, { "epoch": 0.3039229329993351, "grad_norm": 0.12693625688552856, "learning_rate": 0.002, "loss": 2.3454, "step": 78620 }, { "epoch": 0.3039615902027184, "grad_norm": 0.10265807062387466, "learning_rate": 0.002, "loss": 2.3457, "step": 78630 }, { "epoch": 0.30400024740610165, "grad_norm": 0.12135670334100723, "learning_rate": 0.002, "loss": 2.3582, "step": 78640 }, { "epoch": 0.3040389046094849, "grad_norm": 0.11370059847831726, "learning_rate": 0.002, "loss": 2.3503, "step": 78650 }, { "epoch": 0.3040775618128682, "grad_norm": 0.10926605761051178, "learning_rate": 0.002, "loss": 2.3557, "step": 78660 }, { "epoch": 0.30411621901625147, "grad_norm": 0.11264129728078842, "learning_rate": 0.002, "loss": 2.3521, "step": 78670 }, { "epoch": 0.3041548762196348, "grad_norm": 0.11051646620035172, "learning_rate": 0.002, "loss": 2.3559, "step": 78680 }, { "epoch": 0.304193533423018, "grad_norm": 0.11824481189250946, "learning_rate": 0.002, "loss": 2.3611, "step": 78690 }, { "epoch": 0.30423219062640133, "grad_norm": 0.11840855330228806, "learning_rate": 0.002, "loss": 2.3554, "step": 78700 }, { "epoch": 0.3042708478297846, "grad_norm": 0.12107060849666595, "learning_rate": 0.002, "loss": 2.3555, "step": 78710 }, { "epoch": 0.3043095050331679, "grad_norm": 0.11350583285093307, "learning_rate": 0.002, "loss": 2.3586, "step": 78720 }, { "epoch": 0.30434816223655115, "grad_norm": 0.11291664838790894, "learning_rate": 0.002, "loss": 2.3558, "step": 78730 }, { "epoch": 0.30438681943993445, "grad_norm": 0.1264103800058365, "learning_rate": 0.002, "loss": 2.3559, "step": 78740 }, { "epoch": 0.3044254766433177, "grad_norm": 0.10281990468502045, "learning_rate": 0.002, "loss": 2.3433, "step": 78750 }, { "epoch": 0.304464133846701, "grad_norm": 0.13915292918682098, "learning_rate": 0.002, "loss": 2.3678, "step": 78760 }, { "epoch": 0.30450279105008426, "grad_norm": 0.1522568017244339, "learning_rate": 0.002, "loss": 2.3713, "step": 78770 }, { "epoch": 0.3045414482534676, "grad_norm": 0.09680493921041489, "learning_rate": 0.002, "loss": 2.3594, "step": 78780 }, { "epoch": 0.3045801054568508, "grad_norm": 0.0962231457233429, "learning_rate": 0.002, "loss": 2.3576, "step": 78790 }, { "epoch": 0.30461876266023413, "grad_norm": 0.12869805097579956, "learning_rate": 0.002, "loss": 2.3435, "step": 78800 }, { "epoch": 0.3046574198636174, "grad_norm": 0.0894971638917923, "learning_rate": 0.002, "loss": 2.3597, "step": 78810 }, { "epoch": 0.3046960770670007, "grad_norm": 0.11182959377765656, "learning_rate": 0.002, "loss": 2.3635, "step": 78820 }, { "epoch": 0.30473473427038394, "grad_norm": 0.1098380908370018, "learning_rate": 0.002, "loss": 2.3474, "step": 78830 }, { "epoch": 0.3047733914737672, "grad_norm": 0.13078713417053223, "learning_rate": 0.002, "loss": 2.3575, "step": 78840 }, { "epoch": 0.3048120486771505, "grad_norm": 0.21705102920532227, "learning_rate": 0.002, "loss": 2.3538, "step": 78850 }, { "epoch": 0.30485070588053376, "grad_norm": 0.1491706222295761, "learning_rate": 0.002, "loss": 2.3536, "step": 78860 }, { "epoch": 0.30488936308391706, "grad_norm": 0.12133818864822388, "learning_rate": 0.002, "loss": 2.3507, "step": 78870 }, { "epoch": 0.3049280202873003, "grad_norm": 0.11912494152784348, "learning_rate": 0.002, "loss": 2.344, "step": 78880 }, { "epoch": 0.3049666774906836, "grad_norm": 0.11220871657133102, "learning_rate": 0.002, "loss": 2.3636, "step": 78890 }, { "epoch": 0.3050053346940669, "grad_norm": 0.13244259357452393, "learning_rate": 0.002, "loss": 2.3666, "step": 78900 }, { "epoch": 0.3050439918974502, "grad_norm": 0.11833749711513519, "learning_rate": 0.002, "loss": 2.3521, "step": 78910 }, { "epoch": 0.30508264910083344, "grad_norm": 0.1003163605928421, "learning_rate": 0.002, "loss": 2.3561, "step": 78920 }, { "epoch": 0.30512130630421674, "grad_norm": 0.11988737434148788, "learning_rate": 0.002, "loss": 2.3634, "step": 78930 }, { "epoch": 0.3051599635076, "grad_norm": 0.11213191598653793, "learning_rate": 0.002, "loss": 2.3571, "step": 78940 }, { "epoch": 0.3051986207109833, "grad_norm": 0.11382569372653961, "learning_rate": 0.002, "loss": 2.3568, "step": 78950 }, { "epoch": 0.30523727791436656, "grad_norm": 0.1199234127998352, "learning_rate": 0.002, "loss": 2.3565, "step": 78960 }, { "epoch": 0.30527593511774986, "grad_norm": 0.11544682085514069, "learning_rate": 0.002, "loss": 2.3443, "step": 78970 }, { "epoch": 0.3053145923211331, "grad_norm": 0.09563816338777542, "learning_rate": 0.002, "loss": 2.3668, "step": 78980 }, { "epoch": 0.3053532495245164, "grad_norm": 0.1085505336523056, "learning_rate": 0.002, "loss": 2.3546, "step": 78990 }, { "epoch": 0.3053919067278997, "grad_norm": 0.11609254777431488, "learning_rate": 0.002, "loss": 2.3486, "step": 79000 }, { "epoch": 0.30543056393128293, "grad_norm": 0.10157139599323273, "learning_rate": 0.002, "loss": 2.3536, "step": 79010 }, { "epoch": 0.30546922113466624, "grad_norm": 0.11894809454679489, "learning_rate": 0.002, "loss": 2.3742, "step": 79020 }, { "epoch": 0.3055078783380495, "grad_norm": 0.10765823721885681, "learning_rate": 0.002, "loss": 2.3674, "step": 79030 }, { "epoch": 0.3055465355414328, "grad_norm": 0.10459846258163452, "learning_rate": 0.002, "loss": 2.3758, "step": 79040 }, { "epoch": 0.30558519274481605, "grad_norm": 0.09812411665916443, "learning_rate": 0.002, "loss": 2.3542, "step": 79050 }, { "epoch": 0.30562384994819936, "grad_norm": 0.1044439822435379, "learning_rate": 0.002, "loss": 2.3586, "step": 79060 }, { "epoch": 0.3056625071515826, "grad_norm": 0.11629011482000351, "learning_rate": 0.002, "loss": 2.3605, "step": 79070 }, { "epoch": 0.3057011643549659, "grad_norm": 0.09759361296892166, "learning_rate": 0.002, "loss": 2.3508, "step": 79080 }, { "epoch": 0.30573982155834917, "grad_norm": 0.12417322397232056, "learning_rate": 0.002, "loss": 2.3677, "step": 79090 }, { "epoch": 0.3057784787617325, "grad_norm": 0.12513114511966705, "learning_rate": 0.002, "loss": 2.3594, "step": 79100 }, { "epoch": 0.3058171359651157, "grad_norm": 0.10083790868520737, "learning_rate": 0.002, "loss": 2.3584, "step": 79110 }, { "epoch": 0.30585579316849903, "grad_norm": 0.13127751648426056, "learning_rate": 0.002, "loss": 2.3516, "step": 79120 }, { "epoch": 0.3058944503718823, "grad_norm": 0.1046181470155716, "learning_rate": 0.002, "loss": 2.3559, "step": 79130 }, { "epoch": 0.3059331075752656, "grad_norm": 0.11963056027889252, "learning_rate": 0.002, "loss": 2.3551, "step": 79140 }, { "epoch": 0.30597176477864885, "grad_norm": 0.10972888022661209, "learning_rate": 0.002, "loss": 2.3767, "step": 79150 }, { "epoch": 0.30601042198203215, "grad_norm": 0.103045254945755, "learning_rate": 0.002, "loss": 2.3729, "step": 79160 }, { "epoch": 0.3060490791854154, "grad_norm": 0.12305113673210144, "learning_rate": 0.002, "loss": 2.3506, "step": 79170 }, { "epoch": 0.3060877363887987, "grad_norm": 0.11825665831565857, "learning_rate": 0.002, "loss": 2.3588, "step": 79180 }, { "epoch": 0.30612639359218197, "grad_norm": 0.10759798437356949, "learning_rate": 0.002, "loss": 2.3509, "step": 79190 }, { "epoch": 0.3061650507955652, "grad_norm": 0.14260214567184448, "learning_rate": 0.002, "loss": 2.3664, "step": 79200 }, { "epoch": 0.3062037079989485, "grad_norm": 0.10587218403816223, "learning_rate": 0.002, "loss": 2.3597, "step": 79210 }, { "epoch": 0.3062423652023318, "grad_norm": 0.11592812836170197, "learning_rate": 0.002, "loss": 2.3547, "step": 79220 }, { "epoch": 0.3062810224057151, "grad_norm": 0.11225926131010056, "learning_rate": 0.002, "loss": 2.3547, "step": 79230 }, { "epoch": 0.30631967960909834, "grad_norm": 0.10934487730264664, "learning_rate": 0.002, "loss": 2.3507, "step": 79240 }, { "epoch": 0.30635833681248165, "grad_norm": 0.10247712582349777, "learning_rate": 0.002, "loss": 2.3478, "step": 79250 }, { "epoch": 0.3063969940158649, "grad_norm": 0.10369041562080383, "learning_rate": 0.002, "loss": 2.3627, "step": 79260 }, { "epoch": 0.3064356512192482, "grad_norm": 0.10814987868070602, "learning_rate": 0.002, "loss": 2.3479, "step": 79270 }, { "epoch": 0.30647430842263146, "grad_norm": 0.11166869848966599, "learning_rate": 0.002, "loss": 2.3722, "step": 79280 }, { "epoch": 0.30651296562601477, "grad_norm": 0.11336232721805573, "learning_rate": 0.002, "loss": 2.3592, "step": 79290 }, { "epoch": 0.306551622829398, "grad_norm": 0.0989774540066719, "learning_rate": 0.002, "loss": 2.3466, "step": 79300 }, { "epoch": 0.3065902800327813, "grad_norm": 0.14258378744125366, "learning_rate": 0.002, "loss": 2.3743, "step": 79310 }, { "epoch": 0.3066289372361646, "grad_norm": 0.10717329382896423, "learning_rate": 0.002, "loss": 2.3554, "step": 79320 }, { "epoch": 0.3066675944395479, "grad_norm": 0.10588625818490982, "learning_rate": 0.002, "loss": 2.3562, "step": 79330 }, { "epoch": 0.30670625164293114, "grad_norm": 0.1122480183839798, "learning_rate": 0.002, "loss": 2.3717, "step": 79340 }, { "epoch": 0.30674490884631445, "grad_norm": 0.11568798869848251, "learning_rate": 0.002, "loss": 2.3366, "step": 79350 }, { "epoch": 0.3067835660496977, "grad_norm": 0.11585716903209686, "learning_rate": 0.002, "loss": 2.3578, "step": 79360 }, { "epoch": 0.306822223253081, "grad_norm": 0.10288020968437195, "learning_rate": 0.002, "loss": 2.3483, "step": 79370 }, { "epoch": 0.30686088045646426, "grad_norm": 0.09953874349594116, "learning_rate": 0.002, "loss": 2.3692, "step": 79380 }, { "epoch": 0.3068995376598475, "grad_norm": 0.10273531079292297, "learning_rate": 0.002, "loss": 2.3487, "step": 79390 }, { "epoch": 0.3069381948632308, "grad_norm": 0.12214646488428116, "learning_rate": 0.002, "loss": 2.363, "step": 79400 }, { "epoch": 0.30697685206661407, "grad_norm": 0.11876621097326279, "learning_rate": 0.002, "loss": 2.3613, "step": 79410 }, { "epoch": 0.3070155092699974, "grad_norm": 0.11528085172176361, "learning_rate": 0.002, "loss": 2.3693, "step": 79420 }, { "epoch": 0.30705416647338063, "grad_norm": 0.10709972679615021, "learning_rate": 0.002, "loss": 2.3538, "step": 79430 }, { "epoch": 0.30709282367676394, "grad_norm": 0.11976133286952972, "learning_rate": 0.002, "loss": 2.3558, "step": 79440 }, { "epoch": 0.3071314808801472, "grad_norm": 0.1332187056541443, "learning_rate": 0.002, "loss": 2.3575, "step": 79450 }, { "epoch": 0.3071701380835305, "grad_norm": 0.1383569836616516, "learning_rate": 0.002, "loss": 2.3414, "step": 79460 }, { "epoch": 0.30720879528691375, "grad_norm": 0.1024542897939682, "learning_rate": 0.002, "loss": 2.3491, "step": 79470 }, { "epoch": 0.30724745249029706, "grad_norm": 0.10686483234167099, "learning_rate": 0.002, "loss": 2.3628, "step": 79480 }, { "epoch": 0.3072861096936803, "grad_norm": 0.11085661500692368, "learning_rate": 0.002, "loss": 2.3535, "step": 79490 }, { "epoch": 0.3073247668970636, "grad_norm": 0.11389243602752686, "learning_rate": 0.002, "loss": 2.3537, "step": 79500 }, { "epoch": 0.30736342410044687, "grad_norm": 0.10158166289329529, "learning_rate": 0.002, "loss": 2.3635, "step": 79510 }, { "epoch": 0.3074020813038302, "grad_norm": 0.12603077292442322, "learning_rate": 0.002, "loss": 2.3473, "step": 79520 }, { "epoch": 0.30744073850721343, "grad_norm": 0.1034865453839302, "learning_rate": 0.002, "loss": 2.3502, "step": 79530 }, { "epoch": 0.30747939571059674, "grad_norm": 0.12243582308292389, "learning_rate": 0.002, "loss": 2.3579, "step": 79540 }, { "epoch": 0.30751805291398, "grad_norm": 0.09311690926551819, "learning_rate": 0.002, "loss": 2.3532, "step": 79550 }, { "epoch": 0.3075567101173633, "grad_norm": 0.12692442536354065, "learning_rate": 0.002, "loss": 2.3478, "step": 79560 }, { "epoch": 0.30759536732074655, "grad_norm": 0.12448505312204361, "learning_rate": 0.002, "loss": 2.3652, "step": 79570 }, { "epoch": 0.3076340245241298, "grad_norm": 0.12252768129110336, "learning_rate": 0.002, "loss": 2.3458, "step": 79580 }, { "epoch": 0.3076726817275131, "grad_norm": 0.09933305531740189, "learning_rate": 0.002, "loss": 2.3624, "step": 79590 }, { "epoch": 0.30771133893089636, "grad_norm": 0.12263075262308121, "learning_rate": 0.002, "loss": 2.3569, "step": 79600 }, { "epoch": 0.30774999613427967, "grad_norm": 0.10927719622850418, "learning_rate": 0.002, "loss": 2.3378, "step": 79610 }, { "epoch": 0.3077886533376629, "grad_norm": 0.10547421872615814, "learning_rate": 0.002, "loss": 2.3579, "step": 79620 }, { "epoch": 0.30782731054104623, "grad_norm": 0.10980977863073349, "learning_rate": 0.002, "loss": 2.3545, "step": 79630 }, { "epoch": 0.3078659677444295, "grad_norm": 0.10171563178300858, "learning_rate": 0.002, "loss": 2.3635, "step": 79640 }, { "epoch": 0.3079046249478128, "grad_norm": 0.11577558517456055, "learning_rate": 0.002, "loss": 2.3572, "step": 79650 }, { "epoch": 0.30794328215119604, "grad_norm": 0.10839863121509552, "learning_rate": 0.002, "loss": 2.3425, "step": 79660 }, { "epoch": 0.30798193935457935, "grad_norm": 0.11198613047599792, "learning_rate": 0.002, "loss": 2.359, "step": 79670 }, { "epoch": 0.3080205965579626, "grad_norm": 0.10426493734121323, "learning_rate": 0.002, "loss": 2.3361, "step": 79680 }, { "epoch": 0.3080592537613459, "grad_norm": 0.11114238947629929, "learning_rate": 0.002, "loss": 2.3483, "step": 79690 }, { "epoch": 0.30809791096472916, "grad_norm": 0.11620768904685974, "learning_rate": 0.002, "loss": 2.3488, "step": 79700 }, { "epoch": 0.30813656816811247, "grad_norm": 0.10813305526971817, "learning_rate": 0.002, "loss": 2.3534, "step": 79710 }, { "epoch": 0.3081752253714957, "grad_norm": 0.11177055537700653, "learning_rate": 0.002, "loss": 2.3701, "step": 79720 }, { "epoch": 0.30821388257487903, "grad_norm": 0.12193883955478668, "learning_rate": 0.002, "loss": 2.3586, "step": 79730 }, { "epoch": 0.3082525397782623, "grad_norm": 0.10940881073474884, "learning_rate": 0.002, "loss": 2.3544, "step": 79740 }, { "epoch": 0.30829119698164553, "grad_norm": 0.09680374711751938, "learning_rate": 0.002, "loss": 2.3561, "step": 79750 }, { "epoch": 0.30832985418502884, "grad_norm": 0.14382275938987732, "learning_rate": 0.002, "loss": 2.3482, "step": 79760 }, { "epoch": 0.3083685113884121, "grad_norm": 0.10033523291349411, "learning_rate": 0.002, "loss": 2.3443, "step": 79770 }, { "epoch": 0.3084071685917954, "grad_norm": 0.13805530965328217, "learning_rate": 0.002, "loss": 2.3582, "step": 79780 }, { "epoch": 0.30844582579517865, "grad_norm": 0.11651264876127243, "learning_rate": 0.002, "loss": 2.3502, "step": 79790 }, { "epoch": 0.30848448299856196, "grad_norm": 0.10696911066770554, "learning_rate": 0.002, "loss": 2.3587, "step": 79800 }, { "epoch": 0.3085231402019452, "grad_norm": 0.13329538702964783, "learning_rate": 0.002, "loss": 2.3512, "step": 79810 }, { "epoch": 0.3085617974053285, "grad_norm": 0.09899991005659103, "learning_rate": 0.002, "loss": 2.355, "step": 79820 }, { "epoch": 0.30860045460871177, "grad_norm": 0.11121714860200882, "learning_rate": 0.002, "loss": 2.3512, "step": 79830 }, { "epoch": 0.3086391118120951, "grad_norm": 0.10053054988384247, "learning_rate": 0.002, "loss": 2.3441, "step": 79840 }, { "epoch": 0.30867776901547833, "grad_norm": 0.13535206019878387, "learning_rate": 0.002, "loss": 2.3645, "step": 79850 }, { "epoch": 0.30871642621886164, "grad_norm": 0.10976517200469971, "learning_rate": 0.002, "loss": 2.3674, "step": 79860 }, { "epoch": 0.3087550834222449, "grad_norm": 0.12436148524284363, "learning_rate": 0.002, "loss": 2.3459, "step": 79870 }, { "epoch": 0.3087937406256282, "grad_norm": 0.10850168764591217, "learning_rate": 0.002, "loss": 2.3519, "step": 79880 }, { "epoch": 0.30883239782901145, "grad_norm": 0.1158076673746109, "learning_rate": 0.002, "loss": 2.3573, "step": 79890 }, { "epoch": 0.30887105503239476, "grad_norm": 0.10798767954111099, "learning_rate": 0.002, "loss": 2.3578, "step": 79900 }, { "epoch": 0.308909712235778, "grad_norm": 0.28459790349006653, "learning_rate": 0.002, "loss": 2.3639, "step": 79910 }, { "epoch": 0.3089483694391613, "grad_norm": 0.12084505707025528, "learning_rate": 0.002, "loss": 2.3508, "step": 79920 }, { "epoch": 0.30898702664254457, "grad_norm": 0.0929480493068695, "learning_rate": 0.002, "loss": 2.3521, "step": 79930 }, { "epoch": 0.3090256838459278, "grad_norm": 0.11760727316141129, "learning_rate": 0.002, "loss": 2.3517, "step": 79940 }, { "epoch": 0.30906434104931113, "grad_norm": 0.11596400290727615, "learning_rate": 0.002, "loss": 2.3417, "step": 79950 }, { "epoch": 0.3091029982526944, "grad_norm": 0.12344510108232498, "learning_rate": 0.002, "loss": 2.3567, "step": 79960 }, { "epoch": 0.3091416554560777, "grad_norm": 0.11655568331480026, "learning_rate": 0.002, "loss": 2.3584, "step": 79970 }, { "epoch": 0.30918031265946094, "grad_norm": 0.11264428496360779, "learning_rate": 0.002, "loss": 2.3416, "step": 79980 }, { "epoch": 0.30921896986284425, "grad_norm": 0.11132732033729553, "learning_rate": 0.002, "loss": 2.3621, "step": 79990 }, { "epoch": 0.3092576270662275, "grad_norm": 0.11722130328416824, "learning_rate": 0.002, "loss": 2.3517, "step": 80000 }, { "epoch": 0.3092962842696108, "grad_norm": 0.12687674164772034, "learning_rate": 0.002, "loss": 2.3521, "step": 80010 }, { "epoch": 0.30933494147299406, "grad_norm": 0.0991467535495758, "learning_rate": 0.002, "loss": 2.3453, "step": 80020 }, { "epoch": 0.30937359867637737, "grad_norm": 0.11307033151388168, "learning_rate": 0.002, "loss": 2.3592, "step": 80030 }, { "epoch": 0.3094122558797606, "grad_norm": 0.10980504751205444, "learning_rate": 0.002, "loss": 2.3406, "step": 80040 }, { "epoch": 0.30945091308314393, "grad_norm": 0.15489093959331512, "learning_rate": 0.002, "loss": 2.374, "step": 80050 }, { "epoch": 0.3094895702865272, "grad_norm": 0.10452447086572647, "learning_rate": 0.002, "loss": 2.3578, "step": 80060 }, { "epoch": 0.3095282274899105, "grad_norm": 0.12048021703958511, "learning_rate": 0.002, "loss": 2.3629, "step": 80070 }, { "epoch": 0.30956688469329374, "grad_norm": 0.10281501710414886, "learning_rate": 0.002, "loss": 2.3573, "step": 80080 }, { "epoch": 0.30960554189667705, "grad_norm": 0.10218989104032516, "learning_rate": 0.002, "loss": 2.3573, "step": 80090 }, { "epoch": 0.3096441991000603, "grad_norm": 0.10425220429897308, "learning_rate": 0.002, "loss": 2.3541, "step": 80100 }, { "epoch": 0.3096828563034436, "grad_norm": 0.11669370532035828, "learning_rate": 0.002, "loss": 2.3551, "step": 80110 }, { "epoch": 0.30972151350682686, "grad_norm": 0.10010499507188797, "learning_rate": 0.002, "loss": 2.3455, "step": 80120 }, { "epoch": 0.3097601707102101, "grad_norm": 0.12721529603004456, "learning_rate": 0.002, "loss": 2.3415, "step": 80130 }, { "epoch": 0.3097988279135934, "grad_norm": 0.10680372267961502, "learning_rate": 0.002, "loss": 2.3646, "step": 80140 }, { "epoch": 0.3098374851169767, "grad_norm": 0.12047622352838516, "learning_rate": 0.002, "loss": 2.371, "step": 80150 }, { "epoch": 0.30987614232036, "grad_norm": 0.11787277460098267, "learning_rate": 0.002, "loss": 2.3468, "step": 80160 }, { "epoch": 0.30991479952374323, "grad_norm": 0.1253037452697754, "learning_rate": 0.002, "loss": 2.3568, "step": 80170 }, { "epoch": 0.30995345672712654, "grad_norm": 0.11033058166503906, "learning_rate": 0.002, "loss": 2.3594, "step": 80180 }, { "epoch": 0.3099921139305098, "grad_norm": 0.10820924490690231, "learning_rate": 0.002, "loss": 2.3668, "step": 80190 }, { "epoch": 0.3100307711338931, "grad_norm": 0.10344351083040237, "learning_rate": 0.002, "loss": 2.3463, "step": 80200 }, { "epoch": 0.31006942833727635, "grad_norm": 0.13518910109996796, "learning_rate": 0.002, "loss": 2.3505, "step": 80210 }, { "epoch": 0.31010808554065966, "grad_norm": 0.11561016738414764, "learning_rate": 0.002, "loss": 2.3553, "step": 80220 }, { "epoch": 0.3101467427440429, "grad_norm": 0.11577355861663818, "learning_rate": 0.002, "loss": 2.3389, "step": 80230 }, { "epoch": 0.3101853999474262, "grad_norm": 0.1060025542974472, "learning_rate": 0.002, "loss": 2.3496, "step": 80240 }, { "epoch": 0.3102240571508095, "grad_norm": 0.11184864491224289, "learning_rate": 0.002, "loss": 2.3449, "step": 80250 }, { "epoch": 0.3102627143541928, "grad_norm": 0.11953794211149216, "learning_rate": 0.002, "loss": 2.3443, "step": 80260 }, { "epoch": 0.31030137155757603, "grad_norm": 0.11260048300027847, "learning_rate": 0.002, "loss": 2.3437, "step": 80270 }, { "epoch": 0.31034002876095934, "grad_norm": 0.11399795114994049, "learning_rate": 0.002, "loss": 2.3485, "step": 80280 }, { "epoch": 0.3103786859643426, "grad_norm": 0.10122570395469666, "learning_rate": 0.002, "loss": 2.3562, "step": 80290 }, { "epoch": 0.3104173431677259, "grad_norm": 0.10667862743139267, "learning_rate": 0.002, "loss": 2.3503, "step": 80300 }, { "epoch": 0.31045600037110915, "grad_norm": 0.10589800029993057, "learning_rate": 0.002, "loss": 2.3604, "step": 80310 }, { "epoch": 0.3104946575744924, "grad_norm": 0.10616010427474976, "learning_rate": 0.002, "loss": 2.358, "step": 80320 }, { "epoch": 0.3105333147778757, "grad_norm": 0.13653241097927094, "learning_rate": 0.002, "loss": 2.3369, "step": 80330 }, { "epoch": 0.31057197198125897, "grad_norm": 0.11566481739282608, "learning_rate": 0.002, "loss": 2.3371, "step": 80340 }, { "epoch": 0.3106106291846423, "grad_norm": 0.12117500603199005, "learning_rate": 0.002, "loss": 2.3474, "step": 80350 }, { "epoch": 0.3106492863880255, "grad_norm": 0.11408274620771408, "learning_rate": 0.002, "loss": 2.3481, "step": 80360 }, { "epoch": 0.31068794359140883, "grad_norm": 0.09207921475172043, "learning_rate": 0.002, "loss": 2.3638, "step": 80370 }, { "epoch": 0.3107266007947921, "grad_norm": 0.11657524853944778, "learning_rate": 0.002, "loss": 2.3498, "step": 80380 }, { "epoch": 0.3107652579981754, "grad_norm": 0.10842855274677277, "learning_rate": 0.002, "loss": 2.3657, "step": 80390 }, { "epoch": 0.31080391520155864, "grad_norm": 0.10754778236150742, "learning_rate": 0.002, "loss": 2.3536, "step": 80400 }, { "epoch": 0.31084257240494195, "grad_norm": 0.12526315450668335, "learning_rate": 0.002, "loss": 2.352, "step": 80410 }, { "epoch": 0.3108812296083252, "grad_norm": 0.10728145390748978, "learning_rate": 0.002, "loss": 2.366, "step": 80420 }, { "epoch": 0.3109198868117085, "grad_norm": 0.11626534909009933, "learning_rate": 0.002, "loss": 2.3329, "step": 80430 }, { "epoch": 0.31095854401509176, "grad_norm": 0.10921600461006165, "learning_rate": 0.002, "loss": 2.3598, "step": 80440 }, { "epoch": 0.3109972012184751, "grad_norm": 0.10860076546669006, "learning_rate": 0.002, "loss": 2.3307, "step": 80450 }, { "epoch": 0.3110358584218583, "grad_norm": 0.10265084356069565, "learning_rate": 0.002, "loss": 2.3414, "step": 80460 }, { "epoch": 0.31107451562524163, "grad_norm": 0.11555089056491852, "learning_rate": 0.002, "loss": 2.3523, "step": 80470 }, { "epoch": 0.3111131728286249, "grad_norm": 0.11266104876995087, "learning_rate": 0.002, "loss": 2.3456, "step": 80480 }, { "epoch": 0.3111518300320082, "grad_norm": 0.11351647228002548, "learning_rate": 0.002, "loss": 2.3537, "step": 80490 }, { "epoch": 0.31119048723539144, "grad_norm": 0.10889441519975662, "learning_rate": 0.002, "loss": 2.364, "step": 80500 }, { "epoch": 0.3112291444387747, "grad_norm": 0.11522519588470459, "learning_rate": 0.002, "loss": 2.3512, "step": 80510 }, { "epoch": 0.311267801642158, "grad_norm": 0.11582613736391068, "learning_rate": 0.002, "loss": 2.3543, "step": 80520 }, { "epoch": 0.31130645884554126, "grad_norm": 0.10472138971090317, "learning_rate": 0.002, "loss": 2.352, "step": 80530 }, { "epoch": 0.31134511604892456, "grad_norm": 0.0929357036948204, "learning_rate": 0.002, "loss": 2.3534, "step": 80540 }, { "epoch": 0.3113837732523078, "grad_norm": 0.12064792960882187, "learning_rate": 0.002, "loss": 2.3544, "step": 80550 }, { "epoch": 0.3114224304556911, "grad_norm": 0.09501705318689346, "learning_rate": 0.002, "loss": 2.3575, "step": 80560 }, { "epoch": 0.3114610876590744, "grad_norm": 0.10537790507078171, "learning_rate": 0.002, "loss": 2.3494, "step": 80570 }, { "epoch": 0.3114997448624577, "grad_norm": 0.1048625260591507, "learning_rate": 0.002, "loss": 2.3492, "step": 80580 }, { "epoch": 0.31153840206584094, "grad_norm": 0.114802785217762, "learning_rate": 0.002, "loss": 2.3458, "step": 80590 }, { "epoch": 0.31157705926922424, "grad_norm": 0.10885383188724518, "learning_rate": 0.002, "loss": 2.3605, "step": 80600 }, { "epoch": 0.3116157164726075, "grad_norm": 0.10919132828712463, "learning_rate": 0.002, "loss": 2.3606, "step": 80610 }, { "epoch": 0.3116543736759908, "grad_norm": 0.12491604685783386, "learning_rate": 0.002, "loss": 2.3405, "step": 80620 }, { "epoch": 0.31169303087937406, "grad_norm": 0.1278710514307022, "learning_rate": 0.002, "loss": 2.3772, "step": 80630 }, { "epoch": 0.31173168808275736, "grad_norm": 0.1066141352057457, "learning_rate": 0.002, "loss": 2.3443, "step": 80640 }, { "epoch": 0.3117703452861406, "grad_norm": 0.10080710053443909, "learning_rate": 0.002, "loss": 2.3496, "step": 80650 }, { "epoch": 0.3118090024895239, "grad_norm": 0.12186428159475327, "learning_rate": 0.002, "loss": 2.3474, "step": 80660 }, { "epoch": 0.3118476596929072, "grad_norm": 0.11064880341291428, "learning_rate": 0.002, "loss": 2.3532, "step": 80670 }, { "epoch": 0.3118863168962904, "grad_norm": 0.09742892533540726, "learning_rate": 0.002, "loss": 2.3321, "step": 80680 }, { "epoch": 0.31192497409967374, "grad_norm": 0.11213352531194687, "learning_rate": 0.002, "loss": 2.3467, "step": 80690 }, { "epoch": 0.311963631303057, "grad_norm": 0.10177134722471237, "learning_rate": 0.002, "loss": 2.3427, "step": 80700 }, { "epoch": 0.3120022885064403, "grad_norm": 0.11354486644268036, "learning_rate": 0.002, "loss": 2.3589, "step": 80710 }, { "epoch": 0.31204094570982355, "grad_norm": 0.12874093651771545, "learning_rate": 0.002, "loss": 2.3468, "step": 80720 }, { "epoch": 0.31207960291320685, "grad_norm": 0.12072081863880157, "learning_rate": 0.002, "loss": 2.3639, "step": 80730 }, { "epoch": 0.3121182601165901, "grad_norm": 0.10747833549976349, "learning_rate": 0.002, "loss": 2.3614, "step": 80740 }, { "epoch": 0.3121569173199734, "grad_norm": 0.1078917533159256, "learning_rate": 0.002, "loss": 2.3395, "step": 80750 }, { "epoch": 0.31219557452335667, "grad_norm": 0.11337679624557495, "learning_rate": 0.002, "loss": 2.3603, "step": 80760 }, { "epoch": 0.31223423172674, "grad_norm": 0.11525086313486099, "learning_rate": 0.002, "loss": 2.3572, "step": 80770 }, { "epoch": 0.3122728889301232, "grad_norm": 0.11412528157234192, "learning_rate": 0.002, "loss": 2.3488, "step": 80780 }, { "epoch": 0.31231154613350653, "grad_norm": 0.12456825375556946, "learning_rate": 0.002, "loss": 2.3736, "step": 80790 }, { "epoch": 0.3123502033368898, "grad_norm": 0.10382735729217529, "learning_rate": 0.002, "loss": 2.357, "step": 80800 }, { "epoch": 0.3123888605402731, "grad_norm": 0.12227677553892136, "learning_rate": 0.002, "loss": 2.349, "step": 80810 }, { "epoch": 0.31242751774365635, "grad_norm": 0.112001933157444, "learning_rate": 0.002, "loss": 2.3541, "step": 80820 }, { "epoch": 0.31246617494703965, "grad_norm": 0.13644681870937347, "learning_rate": 0.002, "loss": 2.3614, "step": 80830 }, { "epoch": 0.3125048321504229, "grad_norm": 0.10916776955127716, "learning_rate": 0.002, "loss": 2.3666, "step": 80840 }, { "epoch": 0.3125434893538062, "grad_norm": 0.11666324734687805, "learning_rate": 0.002, "loss": 2.3628, "step": 80850 }, { "epoch": 0.31258214655718947, "grad_norm": 0.12494746595621109, "learning_rate": 0.002, "loss": 2.3438, "step": 80860 }, { "epoch": 0.3126208037605727, "grad_norm": 0.10945124179124832, "learning_rate": 0.002, "loss": 2.3676, "step": 80870 }, { "epoch": 0.312659460963956, "grad_norm": 0.09687016904354095, "learning_rate": 0.002, "loss": 2.3625, "step": 80880 }, { "epoch": 0.3126981181673393, "grad_norm": 0.11021928489208221, "learning_rate": 0.002, "loss": 2.3565, "step": 80890 }, { "epoch": 0.3127367753707226, "grad_norm": 0.13564597070217133, "learning_rate": 0.002, "loss": 2.349, "step": 80900 }, { "epoch": 0.31277543257410584, "grad_norm": 0.10439195483922958, "learning_rate": 0.002, "loss": 2.3501, "step": 80910 }, { "epoch": 0.31281408977748915, "grad_norm": 0.10311180353164673, "learning_rate": 0.002, "loss": 2.3516, "step": 80920 }, { "epoch": 0.3128527469808724, "grad_norm": 0.12455517053604126, "learning_rate": 0.002, "loss": 2.3649, "step": 80930 }, { "epoch": 0.3128914041842557, "grad_norm": 0.11676601320505142, "learning_rate": 0.002, "loss": 2.3558, "step": 80940 }, { "epoch": 0.31293006138763896, "grad_norm": 0.14396095275878906, "learning_rate": 0.002, "loss": 2.3707, "step": 80950 }, { "epoch": 0.31296871859102227, "grad_norm": 0.11289811134338379, "learning_rate": 0.002, "loss": 2.3579, "step": 80960 }, { "epoch": 0.3130073757944055, "grad_norm": 0.1121041402220726, "learning_rate": 0.002, "loss": 2.359, "step": 80970 }, { "epoch": 0.3130460329977888, "grad_norm": 0.10463961213827133, "learning_rate": 0.002, "loss": 2.3609, "step": 80980 }, { "epoch": 0.3130846902011721, "grad_norm": 0.12142655998468399, "learning_rate": 0.002, "loss": 2.3554, "step": 80990 }, { "epoch": 0.3131233474045554, "grad_norm": 0.09865577518939972, "learning_rate": 0.002, "loss": 2.3422, "step": 81000 }, { "epoch": 0.31316200460793864, "grad_norm": 0.10284759849309921, "learning_rate": 0.002, "loss": 2.3498, "step": 81010 }, { "epoch": 0.31320066181132195, "grad_norm": 0.1375407725572586, "learning_rate": 0.002, "loss": 2.3649, "step": 81020 }, { "epoch": 0.3132393190147052, "grad_norm": 0.10573754459619522, "learning_rate": 0.002, "loss": 2.3561, "step": 81030 }, { "epoch": 0.3132779762180885, "grad_norm": 0.12139849364757538, "learning_rate": 0.002, "loss": 2.3687, "step": 81040 }, { "epoch": 0.31331663342147176, "grad_norm": 0.10832665115594864, "learning_rate": 0.002, "loss": 2.3667, "step": 81050 }, { "epoch": 0.313355290624855, "grad_norm": 0.11262708157300949, "learning_rate": 0.002, "loss": 2.3603, "step": 81060 }, { "epoch": 0.3133939478282383, "grad_norm": 0.1364460289478302, "learning_rate": 0.002, "loss": 2.3566, "step": 81070 }, { "epoch": 0.31343260503162157, "grad_norm": 0.10935191810131073, "learning_rate": 0.002, "loss": 2.3711, "step": 81080 }, { "epoch": 0.3134712622350049, "grad_norm": 0.10730627179145813, "learning_rate": 0.002, "loss": 2.3653, "step": 81090 }, { "epoch": 0.31350991943838813, "grad_norm": 0.13448749482631683, "learning_rate": 0.002, "loss": 2.3581, "step": 81100 }, { "epoch": 0.31354857664177144, "grad_norm": 0.11057820171117783, "learning_rate": 0.002, "loss": 2.3496, "step": 81110 }, { "epoch": 0.3135872338451547, "grad_norm": 0.12507446110248566, "learning_rate": 0.002, "loss": 2.3349, "step": 81120 }, { "epoch": 0.313625891048538, "grad_norm": 0.13825474679470062, "learning_rate": 0.002, "loss": 2.3547, "step": 81130 }, { "epoch": 0.31366454825192125, "grad_norm": 0.11821473389863968, "learning_rate": 0.002, "loss": 2.3519, "step": 81140 }, { "epoch": 0.31370320545530456, "grad_norm": 0.12365194410085678, "learning_rate": 0.002, "loss": 2.3613, "step": 81150 }, { "epoch": 0.3137418626586878, "grad_norm": 0.10776308923959732, "learning_rate": 0.002, "loss": 2.3501, "step": 81160 }, { "epoch": 0.3137805198620711, "grad_norm": 0.1093917042016983, "learning_rate": 0.002, "loss": 2.3487, "step": 81170 }, { "epoch": 0.31381917706545437, "grad_norm": 0.11749949306249619, "learning_rate": 0.002, "loss": 2.355, "step": 81180 }, { "epoch": 0.3138578342688377, "grad_norm": 0.10888249427080154, "learning_rate": 0.002, "loss": 2.3462, "step": 81190 }, { "epoch": 0.31389649147222093, "grad_norm": 0.12317828834056854, "learning_rate": 0.002, "loss": 2.3601, "step": 81200 }, { "epoch": 0.31393514867560424, "grad_norm": 0.10111136734485626, "learning_rate": 0.002, "loss": 2.3542, "step": 81210 }, { "epoch": 0.3139738058789875, "grad_norm": 0.11437378078699112, "learning_rate": 0.002, "loss": 2.3598, "step": 81220 }, { "epoch": 0.3140124630823708, "grad_norm": 0.11509440839290619, "learning_rate": 0.002, "loss": 2.361, "step": 81230 }, { "epoch": 0.31405112028575405, "grad_norm": 0.11991222202777863, "learning_rate": 0.002, "loss": 2.3498, "step": 81240 }, { "epoch": 0.3140897774891373, "grad_norm": 0.16248053312301636, "learning_rate": 0.002, "loss": 2.3636, "step": 81250 }, { "epoch": 0.3141284346925206, "grad_norm": 0.1027098223567009, "learning_rate": 0.002, "loss": 2.3566, "step": 81260 }, { "epoch": 0.31416709189590386, "grad_norm": 0.12584351003170013, "learning_rate": 0.002, "loss": 2.357, "step": 81270 }, { "epoch": 0.31420574909928717, "grad_norm": 0.10472863912582397, "learning_rate": 0.002, "loss": 2.3548, "step": 81280 }, { "epoch": 0.3142444063026704, "grad_norm": 0.112847700715065, "learning_rate": 0.002, "loss": 2.3469, "step": 81290 }, { "epoch": 0.31428306350605373, "grad_norm": 0.15817201137542725, "learning_rate": 0.002, "loss": 2.3605, "step": 81300 }, { "epoch": 0.314321720709437, "grad_norm": 0.09936563670635223, "learning_rate": 0.002, "loss": 2.3484, "step": 81310 }, { "epoch": 0.3143603779128203, "grad_norm": 0.10678784549236298, "learning_rate": 0.002, "loss": 2.3457, "step": 81320 }, { "epoch": 0.31439903511620354, "grad_norm": 0.10541539639234543, "learning_rate": 0.002, "loss": 2.3635, "step": 81330 }, { "epoch": 0.31443769231958685, "grad_norm": 0.1101844385266304, "learning_rate": 0.002, "loss": 2.3526, "step": 81340 }, { "epoch": 0.3144763495229701, "grad_norm": 0.1240110918879509, "learning_rate": 0.002, "loss": 2.3746, "step": 81350 }, { "epoch": 0.3145150067263534, "grad_norm": 0.09983433783054352, "learning_rate": 0.002, "loss": 2.3525, "step": 81360 }, { "epoch": 0.31455366392973666, "grad_norm": 0.1422368884086609, "learning_rate": 0.002, "loss": 2.3488, "step": 81370 }, { "epoch": 0.31459232113311997, "grad_norm": 0.10908190160989761, "learning_rate": 0.002, "loss": 2.3536, "step": 81380 }, { "epoch": 0.3146309783365032, "grad_norm": 0.09860409051179886, "learning_rate": 0.002, "loss": 2.3468, "step": 81390 }, { "epoch": 0.3146696355398865, "grad_norm": 0.111362025141716, "learning_rate": 0.002, "loss": 2.3563, "step": 81400 }, { "epoch": 0.3147082927432698, "grad_norm": 0.10219592601060867, "learning_rate": 0.002, "loss": 2.344, "step": 81410 }, { "epoch": 0.31474694994665303, "grad_norm": 0.12257792055606842, "learning_rate": 0.002, "loss": 2.3452, "step": 81420 }, { "epoch": 0.31478560715003634, "grad_norm": 0.216417133808136, "learning_rate": 0.002, "loss": 2.3658, "step": 81430 }, { "epoch": 0.3148242643534196, "grad_norm": 0.1161540076136589, "learning_rate": 0.002, "loss": 2.351, "step": 81440 }, { "epoch": 0.3148629215568029, "grad_norm": 0.10609345138072968, "learning_rate": 0.002, "loss": 2.3575, "step": 81450 }, { "epoch": 0.31490157876018615, "grad_norm": 0.10580547153949738, "learning_rate": 0.002, "loss": 2.3662, "step": 81460 }, { "epoch": 0.31494023596356946, "grad_norm": 0.11898103356361389, "learning_rate": 0.002, "loss": 2.3452, "step": 81470 }, { "epoch": 0.3149788931669527, "grad_norm": 0.10301675647497177, "learning_rate": 0.002, "loss": 2.3638, "step": 81480 }, { "epoch": 0.315017550370336, "grad_norm": 0.09303360432386398, "learning_rate": 0.002, "loss": 2.3522, "step": 81490 }, { "epoch": 0.31505620757371927, "grad_norm": 0.10423095524311066, "learning_rate": 0.002, "loss": 2.3585, "step": 81500 }, { "epoch": 0.3150948647771026, "grad_norm": 0.09868019819259644, "learning_rate": 0.002, "loss": 2.3634, "step": 81510 }, { "epoch": 0.31513352198048583, "grad_norm": 0.10548140853643417, "learning_rate": 0.002, "loss": 2.335, "step": 81520 }, { "epoch": 0.31517217918386914, "grad_norm": 0.16394978761672974, "learning_rate": 0.002, "loss": 2.3521, "step": 81530 }, { "epoch": 0.3152108363872524, "grad_norm": 0.12324749678373337, "learning_rate": 0.002, "loss": 2.3655, "step": 81540 }, { "epoch": 0.3152494935906357, "grad_norm": 0.10595546662807465, "learning_rate": 0.002, "loss": 2.3512, "step": 81550 }, { "epoch": 0.31528815079401895, "grad_norm": 0.12034602463245392, "learning_rate": 0.002, "loss": 2.3558, "step": 81560 }, { "epoch": 0.31532680799740226, "grad_norm": 0.11614970117807388, "learning_rate": 0.002, "loss": 2.3483, "step": 81570 }, { "epoch": 0.3153654652007855, "grad_norm": 0.1015036553144455, "learning_rate": 0.002, "loss": 2.3553, "step": 81580 }, { "epoch": 0.3154041224041688, "grad_norm": 0.10688374936580658, "learning_rate": 0.002, "loss": 2.3638, "step": 81590 }, { "epoch": 0.31544277960755207, "grad_norm": 0.10944467782974243, "learning_rate": 0.002, "loss": 2.3409, "step": 81600 }, { "epoch": 0.3154814368109353, "grad_norm": 0.10430245846509933, "learning_rate": 0.002, "loss": 2.3493, "step": 81610 }, { "epoch": 0.31552009401431863, "grad_norm": 0.09625860303640366, "learning_rate": 0.002, "loss": 2.348, "step": 81620 }, { "epoch": 0.3155587512177019, "grad_norm": 0.12270866334438324, "learning_rate": 0.002, "loss": 2.3486, "step": 81630 }, { "epoch": 0.3155974084210852, "grad_norm": 0.11860179901123047, "learning_rate": 0.002, "loss": 2.3503, "step": 81640 }, { "epoch": 0.31563606562446844, "grad_norm": 0.10587330907583237, "learning_rate": 0.002, "loss": 2.3707, "step": 81650 }, { "epoch": 0.31567472282785175, "grad_norm": 0.0979042649269104, "learning_rate": 0.002, "loss": 2.3574, "step": 81660 }, { "epoch": 0.315713380031235, "grad_norm": 0.11213138699531555, "learning_rate": 0.002, "loss": 2.3696, "step": 81670 }, { "epoch": 0.3157520372346183, "grad_norm": 0.11067654192447662, "learning_rate": 0.002, "loss": 2.3421, "step": 81680 }, { "epoch": 0.31579069443800156, "grad_norm": 0.1135677695274353, "learning_rate": 0.002, "loss": 2.3607, "step": 81690 }, { "epoch": 0.31582935164138487, "grad_norm": 0.10148513317108154, "learning_rate": 0.002, "loss": 2.3371, "step": 81700 }, { "epoch": 0.3158680088447681, "grad_norm": 0.09532175213098526, "learning_rate": 0.002, "loss": 2.3574, "step": 81710 }, { "epoch": 0.31590666604815143, "grad_norm": 0.10355016589164734, "learning_rate": 0.002, "loss": 2.3618, "step": 81720 }, { "epoch": 0.3159453232515347, "grad_norm": 0.11893083155155182, "learning_rate": 0.002, "loss": 2.3604, "step": 81730 }, { "epoch": 0.315983980454918, "grad_norm": 0.1218695119023323, "learning_rate": 0.002, "loss": 2.3561, "step": 81740 }, { "epoch": 0.31602263765830124, "grad_norm": 0.13430169224739075, "learning_rate": 0.002, "loss": 2.3598, "step": 81750 }, { "epoch": 0.31606129486168455, "grad_norm": 0.10158354789018631, "learning_rate": 0.002, "loss": 2.3584, "step": 81760 }, { "epoch": 0.3160999520650678, "grad_norm": 0.10698272287845612, "learning_rate": 0.002, "loss": 2.3458, "step": 81770 }, { "epoch": 0.3161386092684511, "grad_norm": 0.11060472577810287, "learning_rate": 0.002, "loss": 2.3641, "step": 81780 }, { "epoch": 0.31617726647183436, "grad_norm": 0.1195729449391365, "learning_rate": 0.002, "loss": 2.3572, "step": 81790 }, { "epoch": 0.3162159236752176, "grad_norm": 0.10561022907495499, "learning_rate": 0.002, "loss": 2.3736, "step": 81800 }, { "epoch": 0.3162545808786009, "grad_norm": 0.10653810948133469, "learning_rate": 0.002, "loss": 2.3597, "step": 81810 }, { "epoch": 0.3162932380819842, "grad_norm": 0.14653615653514862, "learning_rate": 0.002, "loss": 2.3544, "step": 81820 }, { "epoch": 0.3163318952853675, "grad_norm": 0.12520267069339752, "learning_rate": 0.002, "loss": 2.3553, "step": 81830 }, { "epoch": 0.31637055248875073, "grad_norm": 0.10568630695343018, "learning_rate": 0.002, "loss": 2.3645, "step": 81840 }, { "epoch": 0.31640920969213404, "grad_norm": 0.1051153689622879, "learning_rate": 0.002, "loss": 2.3634, "step": 81850 }, { "epoch": 0.3164478668955173, "grad_norm": 0.13551384210586548, "learning_rate": 0.002, "loss": 2.3617, "step": 81860 }, { "epoch": 0.3164865240989006, "grad_norm": 0.10480249673128128, "learning_rate": 0.002, "loss": 2.3552, "step": 81870 }, { "epoch": 0.31652518130228385, "grad_norm": 0.09846390038728714, "learning_rate": 0.002, "loss": 2.3597, "step": 81880 }, { "epoch": 0.31656383850566716, "grad_norm": 0.12012314796447754, "learning_rate": 0.002, "loss": 2.3741, "step": 81890 }, { "epoch": 0.3166024957090504, "grad_norm": 0.10799364000558853, "learning_rate": 0.002, "loss": 2.3486, "step": 81900 }, { "epoch": 0.3166411529124337, "grad_norm": 0.1147104874253273, "learning_rate": 0.002, "loss": 2.3648, "step": 81910 }, { "epoch": 0.316679810115817, "grad_norm": 0.09747300297021866, "learning_rate": 0.002, "loss": 2.3545, "step": 81920 }, { "epoch": 0.3167184673192003, "grad_norm": 0.11662698537111282, "learning_rate": 0.002, "loss": 2.3626, "step": 81930 }, { "epoch": 0.31675712452258353, "grad_norm": 0.10373350977897644, "learning_rate": 0.002, "loss": 2.3576, "step": 81940 }, { "epoch": 0.31679578172596684, "grad_norm": 0.10500016063451767, "learning_rate": 0.002, "loss": 2.3579, "step": 81950 }, { "epoch": 0.3168344389293501, "grad_norm": 0.10469872504472733, "learning_rate": 0.002, "loss": 2.3626, "step": 81960 }, { "epoch": 0.3168730961327334, "grad_norm": 0.11039718240499496, "learning_rate": 0.002, "loss": 2.347, "step": 81970 }, { "epoch": 0.31691175333611665, "grad_norm": 0.13921773433685303, "learning_rate": 0.002, "loss": 2.3456, "step": 81980 }, { "epoch": 0.3169504105394999, "grad_norm": 0.12752720713615417, "learning_rate": 0.002, "loss": 2.372, "step": 81990 }, { "epoch": 0.3169890677428832, "grad_norm": 0.10381372272968292, "learning_rate": 0.002, "loss": 2.349, "step": 82000 }, { "epoch": 0.31702772494626646, "grad_norm": 0.09850414842367172, "learning_rate": 0.002, "loss": 2.3699, "step": 82010 }, { "epoch": 0.3170663821496498, "grad_norm": 0.12372690439224243, "learning_rate": 0.002, "loss": 2.3663, "step": 82020 }, { "epoch": 0.317105039353033, "grad_norm": 0.10919710248708725, "learning_rate": 0.002, "loss": 2.3498, "step": 82030 }, { "epoch": 0.31714369655641633, "grad_norm": 0.10725739598274231, "learning_rate": 0.002, "loss": 2.3559, "step": 82040 }, { "epoch": 0.3171823537597996, "grad_norm": 0.11192844808101654, "learning_rate": 0.002, "loss": 2.3612, "step": 82050 }, { "epoch": 0.3172210109631829, "grad_norm": 0.10848018527030945, "learning_rate": 0.002, "loss": 2.345, "step": 82060 }, { "epoch": 0.31725966816656614, "grad_norm": 0.11807937920093536, "learning_rate": 0.002, "loss": 2.3639, "step": 82070 }, { "epoch": 0.31729832536994945, "grad_norm": 0.09818805009126663, "learning_rate": 0.002, "loss": 2.344, "step": 82080 }, { "epoch": 0.3173369825733327, "grad_norm": 0.1123327910900116, "learning_rate": 0.002, "loss": 2.3552, "step": 82090 }, { "epoch": 0.317375639776716, "grad_norm": 0.114064522087574, "learning_rate": 0.002, "loss": 2.3654, "step": 82100 }, { "epoch": 0.31741429698009926, "grad_norm": 0.1075097844004631, "learning_rate": 0.002, "loss": 2.3626, "step": 82110 }, { "epoch": 0.31745295418348257, "grad_norm": 0.1298528015613556, "learning_rate": 0.002, "loss": 2.3514, "step": 82120 }, { "epoch": 0.3174916113868658, "grad_norm": 0.10509567707777023, "learning_rate": 0.002, "loss": 2.3704, "step": 82130 }, { "epoch": 0.31753026859024913, "grad_norm": 0.09571907669305801, "learning_rate": 0.002, "loss": 2.3568, "step": 82140 }, { "epoch": 0.3175689257936324, "grad_norm": 0.11480700224637985, "learning_rate": 0.002, "loss": 2.3283, "step": 82150 }, { "epoch": 0.3176075829970157, "grad_norm": 0.10877734422683716, "learning_rate": 0.002, "loss": 2.343, "step": 82160 }, { "epoch": 0.31764624020039894, "grad_norm": 0.10204993933439255, "learning_rate": 0.002, "loss": 2.3637, "step": 82170 }, { "epoch": 0.3176848974037822, "grad_norm": 0.128915473818779, "learning_rate": 0.002, "loss": 2.3521, "step": 82180 }, { "epoch": 0.3177235546071655, "grad_norm": 0.11168088018894196, "learning_rate": 0.002, "loss": 2.3586, "step": 82190 }, { "epoch": 0.31776221181054876, "grad_norm": 0.11000935733318329, "learning_rate": 0.002, "loss": 2.3659, "step": 82200 }, { "epoch": 0.31780086901393206, "grad_norm": 0.10184872150421143, "learning_rate": 0.002, "loss": 2.3548, "step": 82210 }, { "epoch": 0.3178395262173153, "grad_norm": 0.11816468834877014, "learning_rate": 0.002, "loss": 2.3565, "step": 82220 }, { "epoch": 0.3178781834206986, "grad_norm": 0.11335434019565582, "learning_rate": 0.002, "loss": 2.3402, "step": 82230 }, { "epoch": 0.3179168406240819, "grad_norm": 0.10691909492015839, "learning_rate": 0.002, "loss": 2.3514, "step": 82240 }, { "epoch": 0.3179554978274652, "grad_norm": 0.1318056583404541, "learning_rate": 0.002, "loss": 2.3527, "step": 82250 }, { "epoch": 0.31799415503084844, "grad_norm": 0.11883347481489182, "learning_rate": 0.002, "loss": 2.3569, "step": 82260 }, { "epoch": 0.31803281223423174, "grad_norm": 0.10550377517938614, "learning_rate": 0.002, "loss": 2.3623, "step": 82270 }, { "epoch": 0.318071469437615, "grad_norm": 0.11388055980205536, "learning_rate": 0.002, "loss": 2.3527, "step": 82280 }, { "epoch": 0.3181101266409983, "grad_norm": 0.10221295803785324, "learning_rate": 0.002, "loss": 2.344, "step": 82290 }, { "epoch": 0.31814878384438156, "grad_norm": 0.09508053958415985, "learning_rate": 0.002, "loss": 2.355, "step": 82300 }, { "epoch": 0.31818744104776486, "grad_norm": 0.14823344349861145, "learning_rate": 0.002, "loss": 2.3476, "step": 82310 }, { "epoch": 0.3182260982511481, "grad_norm": 0.11601465940475464, "learning_rate": 0.002, "loss": 2.3544, "step": 82320 }, { "epoch": 0.3182647554545314, "grad_norm": 0.10855911672115326, "learning_rate": 0.002, "loss": 2.3508, "step": 82330 }, { "epoch": 0.3183034126579147, "grad_norm": 0.11325045675039291, "learning_rate": 0.002, "loss": 2.3589, "step": 82340 }, { "epoch": 0.3183420698612979, "grad_norm": 0.12187658250331879, "learning_rate": 0.002, "loss": 2.3661, "step": 82350 }, { "epoch": 0.31838072706468123, "grad_norm": 0.10567446798086166, "learning_rate": 0.002, "loss": 2.3584, "step": 82360 }, { "epoch": 0.3184193842680645, "grad_norm": 0.119540274143219, "learning_rate": 0.002, "loss": 2.3566, "step": 82370 }, { "epoch": 0.3184580414714478, "grad_norm": 0.10615716874599457, "learning_rate": 0.002, "loss": 2.3289, "step": 82380 }, { "epoch": 0.31849669867483105, "grad_norm": 0.1109447181224823, "learning_rate": 0.002, "loss": 2.3525, "step": 82390 }, { "epoch": 0.31853535587821435, "grad_norm": 0.12240978330373764, "learning_rate": 0.002, "loss": 2.3598, "step": 82400 }, { "epoch": 0.3185740130815976, "grad_norm": 0.1345326006412506, "learning_rate": 0.002, "loss": 2.3575, "step": 82410 }, { "epoch": 0.3186126702849809, "grad_norm": 0.12959595024585724, "learning_rate": 0.002, "loss": 2.3553, "step": 82420 }, { "epoch": 0.31865132748836417, "grad_norm": 0.12424161285161972, "learning_rate": 0.002, "loss": 2.3523, "step": 82430 }, { "epoch": 0.3186899846917475, "grad_norm": 0.11147624999284744, "learning_rate": 0.002, "loss": 2.3529, "step": 82440 }, { "epoch": 0.3187286418951307, "grad_norm": 0.11418869346380234, "learning_rate": 0.002, "loss": 2.3629, "step": 82450 }, { "epoch": 0.31876729909851403, "grad_norm": 0.112371526658535, "learning_rate": 0.002, "loss": 2.3585, "step": 82460 }, { "epoch": 0.3188059563018973, "grad_norm": 0.13134798407554626, "learning_rate": 0.002, "loss": 2.3577, "step": 82470 }, { "epoch": 0.3188446135052806, "grad_norm": 0.11740058660507202, "learning_rate": 0.002, "loss": 2.3594, "step": 82480 }, { "epoch": 0.31888327070866385, "grad_norm": 0.11432835459709167, "learning_rate": 0.002, "loss": 2.3418, "step": 82490 }, { "epoch": 0.31892192791204715, "grad_norm": 0.10555962473154068, "learning_rate": 0.002, "loss": 2.356, "step": 82500 }, { "epoch": 0.3189605851154304, "grad_norm": 0.10396882146596909, "learning_rate": 0.002, "loss": 2.3676, "step": 82510 }, { "epoch": 0.3189992423188137, "grad_norm": 0.10428617149591446, "learning_rate": 0.002, "loss": 2.3592, "step": 82520 }, { "epoch": 0.31903789952219697, "grad_norm": 0.12038405984640121, "learning_rate": 0.002, "loss": 2.3596, "step": 82530 }, { "epoch": 0.3190765567255802, "grad_norm": 0.10019529610872269, "learning_rate": 0.002, "loss": 2.3494, "step": 82540 }, { "epoch": 0.3191152139289635, "grad_norm": 0.1231880858540535, "learning_rate": 0.002, "loss": 2.3526, "step": 82550 }, { "epoch": 0.3191538711323468, "grad_norm": 0.09879335016012192, "learning_rate": 0.002, "loss": 2.348, "step": 82560 }, { "epoch": 0.3191925283357301, "grad_norm": 0.1070961058139801, "learning_rate": 0.002, "loss": 2.345, "step": 82570 }, { "epoch": 0.31923118553911334, "grad_norm": 0.14890055358409882, "learning_rate": 0.002, "loss": 2.3498, "step": 82580 }, { "epoch": 0.31926984274249665, "grad_norm": 0.11926935613155365, "learning_rate": 0.002, "loss": 2.3529, "step": 82590 }, { "epoch": 0.3193084999458799, "grad_norm": 0.10481767356395721, "learning_rate": 0.002, "loss": 2.3553, "step": 82600 }, { "epoch": 0.3193471571492632, "grad_norm": 0.09248179197311401, "learning_rate": 0.002, "loss": 2.3566, "step": 82610 }, { "epoch": 0.31938581435264646, "grad_norm": 0.11034086346626282, "learning_rate": 0.002, "loss": 2.3481, "step": 82620 }, { "epoch": 0.31942447155602977, "grad_norm": 0.15140187740325928, "learning_rate": 0.002, "loss": 2.3438, "step": 82630 }, { "epoch": 0.319463128759413, "grad_norm": 0.12119888514280319, "learning_rate": 0.002, "loss": 2.344, "step": 82640 }, { "epoch": 0.3195017859627963, "grad_norm": 0.12400045245885849, "learning_rate": 0.002, "loss": 2.349, "step": 82650 }, { "epoch": 0.3195404431661796, "grad_norm": 0.10955554991960526, "learning_rate": 0.002, "loss": 2.3488, "step": 82660 }, { "epoch": 0.3195791003695629, "grad_norm": 0.1160394623875618, "learning_rate": 0.002, "loss": 2.3525, "step": 82670 }, { "epoch": 0.31961775757294614, "grad_norm": 0.1217651292681694, "learning_rate": 0.002, "loss": 2.3447, "step": 82680 }, { "epoch": 0.31965641477632945, "grad_norm": 0.11645574867725372, "learning_rate": 0.002, "loss": 2.3643, "step": 82690 }, { "epoch": 0.3196950719797127, "grad_norm": 0.10777121782302856, "learning_rate": 0.002, "loss": 2.3443, "step": 82700 }, { "epoch": 0.319733729183096, "grad_norm": 0.10144933313131332, "learning_rate": 0.002, "loss": 2.3537, "step": 82710 }, { "epoch": 0.31977238638647926, "grad_norm": 0.13218508660793304, "learning_rate": 0.002, "loss": 2.3622, "step": 82720 }, { "epoch": 0.3198110435898625, "grad_norm": 0.125457301735878, "learning_rate": 0.002, "loss": 2.3451, "step": 82730 }, { "epoch": 0.3198497007932458, "grad_norm": 0.11357942223548889, "learning_rate": 0.002, "loss": 2.3637, "step": 82740 }, { "epoch": 0.31988835799662907, "grad_norm": 0.11013224720954895, "learning_rate": 0.002, "loss": 2.3454, "step": 82750 }, { "epoch": 0.3199270152000124, "grad_norm": 0.11626985669136047, "learning_rate": 0.002, "loss": 2.3555, "step": 82760 }, { "epoch": 0.31996567240339563, "grad_norm": 0.11156153678894043, "learning_rate": 0.002, "loss": 2.3564, "step": 82770 }, { "epoch": 0.32000432960677894, "grad_norm": 0.14560703933238983, "learning_rate": 0.002, "loss": 2.3635, "step": 82780 }, { "epoch": 0.3200429868101622, "grad_norm": 0.10113231837749481, "learning_rate": 0.002, "loss": 2.3526, "step": 82790 }, { "epoch": 0.3200816440135455, "grad_norm": 0.10319309681653976, "learning_rate": 0.002, "loss": 2.3598, "step": 82800 }, { "epoch": 0.32012030121692875, "grad_norm": 0.12566417455673218, "learning_rate": 0.002, "loss": 2.3419, "step": 82810 }, { "epoch": 0.32015895842031206, "grad_norm": 0.0948578342795372, "learning_rate": 0.002, "loss": 2.3416, "step": 82820 }, { "epoch": 0.3201976156236953, "grad_norm": 0.11071591079235077, "learning_rate": 0.002, "loss": 2.358, "step": 82830 }, { "epoch": 0.3202362728270786, "grad_norm": 0.09550026059150696, "learning_rate": 0.002, "loss": 2.3451, "step": 82840 }, { "epoch": 0.32027493003046187, "grad_norm": 0.10873312503099442, "learning_rate": 0.002, "loss": 2.3538, "step": 82850 }, { "epoch": 0.3203135872338452, "grad_norm": 0.09585629403591156, "learning_rate": 0.002, "loss": 2.3319, "step": 82860 }, { "epoch": 0.32035224443722843, "grad_norm": 0.1324733942747116, "learning_rate": 0.002, "loss": 2.3571, "step": 82870 }, { "epoch": 0.32039090164061174, "grad_norm": 0.10929979383945465, "learning_rate": 0.002, "loss": 2.3579, "step": 82880 }, { "epoch": 0.320429558843995, "grad_norm": 0.09126242995262146, "learning_rate": 0.002, "loss": 2.3578, "step": 82890 }, { "epoch": 0.3204682160473783, "grad_norm": 0.12022841721773148, "learning_rate": 0.002, "loss": 2.3588, "step": 82900 }, { "epoch": 0.32050687325076155, "grad_norm": 0.12190208584070206, "learning_rate": 0.002, "loss": 2.3541, "step": 82910 }, { "epoch": 0.3205455304541448, "grad_norm": 0.11144687980413437, "learning_rate": 0.002, "loss": 2.3507, "step": 82920 }, { "epoch": 0.3205841876575281, "grad_norm": 0.12261205166578293, "learning_rate": 0.002, "loss": 2.3654, "step": 82930 }, { "epoch": 0.32062284486091136, "grad_norm": 0.10348153114318848, "learning_rate": 0.002, "loss": 2.3583, "step": 82940 }, { "epoch": 0.32066150206429467, "grad_norm": 0.10360660403966904, "learning_rate": 0.002, "loss": 2.3686, "step": 82950 }, { "epoch": 0.3207001592676779, "grad_norm": 0.1277213990688324, "learning_rate": 0.002, "loss": 2.3626, "step": 82960 }, { "epoch": 0.3207388164710612, "grad_norm": 0.11334872245788574, "learning_rate": 0.002, "loss": 2.3488, "step": 82970 }, { "epoch": 0.3207774736744445, "grad_norm": 0.22948279976844788, "learning_rate": 0.002, "loss": 2.3499, "step": 82980 }, { "epoch": 0.3208161308778278, "grad_norm": 0.09425721317529678, "learning_rate": 0.002, "loss": 2.3466, "step": 82990 }, { "epoch": 0.32085478808121104, "grad_norm": 0.13531698286533356, "learning_rate": 0.002, "loss": 2.3623, "step": 83000 }, { "epoch": 0.32089344528459435, "grad_norm": 0.0998660996556282, "learning_rate": 0.002, "loss": 2.3505, "step": 83010 }, { "epoch": 0.3209321024879776, "grad_norm": 0.1346549391746521, "learning_rate": 0.002, "loss": 2.3384, "step": 83020 }, { "epoch": 0.3209707596913609, "grad_norm": 0.11035139858722687, "learning_rate": 0.002, "loss": 2.3541, "step": 83030 }, { "epoch": 0.32100941689474416, "grad_norm": 0.11539337784051895, "learning_rate": 0.002, "loss": 2.3523, "step": 83040 }, { "epoch": 0.32104807409812747, "grad_norm": 0.11156875640153885, "learning_rate": 0.002, "loss": 2.3653, "step": 83050 }, { "epoch": 0.3210867313015107, "grad_norm": 0.11881548166275024, "learning_rate": 0.002, "loss": 2.3656, "step": 83060 }, { "epoch": 0.321125388504894, "grad_norm": 0.12837591767311096, "learning_rate": 0.002, "loss": 2.3517, "step": 83070 }, { "epoch": 0.3211640457082773, "grad_norm": 0.10233955830335617, "learning_rate": 0.002, "loss": 2.359, "step": 83080 }, { "epoch": 0.32120270291166053, "grad_norm": 0.11211925745010376, "learning_rate": 0.002, "loss": 2.354, "step": 83090 }, { "epoch": 0.32124136011504384, "grad_norm": 0.10675840824842453, "learning_rate": 0.002, "loss": 2.3502, "step": 83100 }, { "epoch": 0.3212800173184271, "grad_norm": 0.12227018922567368, "learning_rate": 0.002, "loss": 2.349, "step": 83110 }, { "epoch": 0.3213186745218104, "grad_norm": 0.10975679010152817, "learning_rate": 0.002, "loss": 2.3547, "step": 83120 }, { "epoch": 0.32135733172519365, "grad_norm": 0.11084916442632675, "learning_rate": 0.002, "loss": 2.3567, "step": 83130 }, { "epoch": 0.32139598892857696, "grad_norm": 0.12091030925512314, "learning_rate": 0.002, "loss": 2.3552, "step": 83140 }, { "epoch": 0.3214346461319602, "grad_norm": 0.1246369257569313, "learning_rate": 0.002, "loss": 2.3505, "step": 83150 }, { "epoch": 0.3214733033353435, "grad_norm": 0.10367751866579056, "learning_rate": 0.002, "loss": 2.343, "step": 83160 }, { "epoch": 0.32151196053872677, "grad_norm": 0.11552299559116364, "learning_rate": 0.002, "loss": 2.3652, "step": 83170 }, { "epoch": 0.3215506177421101, "grad_norm": 0.12590712308883667, "learning_rate": 0.002, "loss": 2.3702, "step": 83180 }, { "epoch": 0.32158927494549333, "grad_norm": 0.1412360519170761, "learning_rate": 0.002, "loss": 2.349, "step": 83190 }, { "epoch": 0.32162793214887664, "grad_norm": 0.11818281561136246, "learning_rate": 0.002, "loss": 2.3605, "step": 83200 }, { "epoch": 0.3216665893522599, "grad_norm": 0.11225637048482895, "learning_rate": 0.002, "loss": 2.3477, "step": 83210 }, { "epoch": 0.3217052465556432, "grad_norm": 0.10759352892637253, "learning_rate": 0.002, "loss": 2.3571, "step": 83220 }, { "epoch": 0.32174390375902645, "grad_norm": 0.11591193825006485, "learning_rate": 0.002, "loss": 2.3707, "step": 83230 }, { "epoch": 0.32178256096240976, "grad_norm": 0.10086407512426376, "learning_rate": 0.002, "loss": 2.3398, "step": 83240 }, { "epoch": 0.321821218165793, "grad_norm": 0.12272131443023682, "learning_rate": 0.002, "loss": 2.3511, "step": 83250 }, { "epoch": 0.3218598753691763, "grad_norm": 0.12350817024707794, "learning_rate": 0.002, "loss": 2.3512, "step": 83260 }, { "epoch": 0.32189853257255957, "grad_norm": 0.10002472251653671, "learning_rate": 0.002, "loss": 2.3662, "step": 83270 }, { "epoch": 0.3219371897759428, "grad_norm": 0.10490340739488602, "learning_rate": 0.002, "loss": 2.3584, "step": 83280 }, { "epoch": 0.32197584697932613, "grad_norm": 0.1162208840250969, "learning_rate": 0.002, "loss": 2.3349, "step": 83290 }, { "epoch": 0.3220145041827094, "grad_norm": 0.12758538126945496, "learning_rate": 0.002, "loss": 2.3483, "step": 83300 }, { "epoch": 0.3220531613860927, "grad_norm": 0.10897503793239594, "learning_rate": 0.002, "loss": 2.357, "step": 83310 }, { "epoch": 0.32209181858947594, "grad_norm": 0.10656184703111649, "learning_rate": 0.002, "loss": 2.3638, "step": 83320 }, { "epoch": 0.32213047579285925, "grad_norm": 0.10997114330530167, "learning_rate": 0.002, "loss": 2.3418, "step": 83330 }, { "epoch": 0.3221691329962425, "grad_norm": 0.1076001301407814, "learning_rate": 0.002, "loss": 2.3415, "step": 83340 }, { "epoch": 0.3222077901996258, "grad_norm": 0.11565978080034256, "learning_rate": 0.002, "loss": 2.3546, "step": 83350 }, { "epoch": 0.32224644740300906, "grad_norm": 0.09923960268497467, "learning_rate": 0.002, "loss": 2.345, "step": 83360 }, { "epoch": 0.32228510460639237, "grad_norm": 0.10747136175632477, "learning_rate": 0.002, "loss": 2.3477, "step": 83370 }, { "epoch": 0.3223237618097756, "grad_norm": 0.119499072432518, "learning_rate": 0.002, "loss": 2.3498, "step": 83380 }, { "epoch": 0.32236241901315893, "grad_norm": 0.11400345712900162, "learning_rate": 0.002, "loss": 2.3541, "step": 83390 }, { "epoch": 0.3224010762165422, "grad_norm": 0.10465895384550095, "learning_rate": 0.002, "loss": 2.3498, "step": 83400 }, { "epoch": 0.3224397334199255, "grad_norm": 0.10290923714637756, "learning_rate": 0.002, "loss": 2.327, "step": 83410 }, { "epoch": 0.32247839062330874, "grad_norm": 0.13437430560588837, "learning_rate": 0.002, "loss": 2.3707, "step": 83420 }, { "epoch": 0.32251704782669205, "grad_norm": 0.10731037706136703, "learning_rate": 0.002, "loss": 2.3344, "step": 83430 }, { "epoch": 0.3225557050300753, "grad_norm": 0.12197359651327133, "learning_rate": 0.002, "loss": 2.3495, "step": 83440 }, { "epoch": 0.3225943622334586, "grad_norm": 0.11455868929624557, "learning_rate": 0.002, "loss": 2.3665, "step": 83450 }, { "epoch": 0.32263301943684186, "grad_norm": 0.11100361496210098, "learning_rate": 0.002, "loss": 2.3652, "step": 83460 }, { "epoch": 0.3226716766402251, "grad_norm": 0.10418584197759628, "learning_rate": 0.002, "loss": 2.3556, "step": 83470 }, { "epoch": 0.3227103338436084, "grad_norm": 0.1303698569536209, "learning_rate": 0.002, "loss": 2.3578, "step": 83480 }, { "epoch": 0.3227489910469917, "grad_norm": 0.1358424574136734, "learning_rate": 0.002, "loss": 2.3504, "step": 83490 }, { "epoch": 0.322787648250375, "grad_norm": 0.10467436909675598, "learning_rate": 0.002, "loss": 2.3351, "step": 83500 }, { "epoch": 0.32282630545375823, "grad_norm": 0.12298433482646942, "learning_rate": 0.002, "loss": 2.3499, "step": 83510 }, { "epoch": 0.32286496265714154, "grad_norm": 0.12430752068758011, "learning_rate": 0.002, "loss": 2.3512, "step": 83520 }, { "epoch": 0.3229036198605248, "grad_norm": 0.11504051834344864, "learning_rate": 0.002, "loss": 2.3393, "step": 83530 }, { "epoch": 0.3229422770639081, "grad_norm": 0.11451207846403122, "learning_rate": 0.002, "loss": 2.3422, "step": 83540 }, { "epoch": 0.32298093426729135, "grad_norm": 0.11278517544269562, "learning_rate": 0.002, "loss": 2.365, "step": 83550 }, { "epoch": 0.32301959147067466, "grad_norm": 0.11106080561876297, "learning_rate": 0.002, "loss": 2.3416, "step": 83560 }, { "epoch": 0.3230582486740579, "grad_norm": 0.09784231334924698, "learning_rate": 0.002, "loss": 2.3558, "step": 83570 }, { "epoch": 0.3230969058774412, "grad_norm": 0.12368303537368774, "learning_rate": 0.002, "loss": 2.3502, "step": 83580 }, { "epoch": 0.3231355630808245, "grad_norm": 0.11726155877113342, "learning_rate": 0.002, "loss": 2.3676, "step": 83590 }, { "epoch": 0.3231742202842078, "grad_norm": 0.11166765540838242, "learning_rate": 0.002, "loss": 2.3616, "step": 83600 }, { "epoch": 0.32321287748759103, "grad_norm": 0.11560188978910446, "learning_rate": 0.002, "loss": 2.3434, "step": 83610 }, { "epoch": 0.32325153469097434, "grad_norm": 0.12689295411109924, "learning_rate": 0.002, "loss": 2.3451, "step": 83620 }, { "epoch": 0.3232901918943576, "grad_norm": 0.12828336656093597, "learning_rate": 0.002, "loss": 2.3608, "step": 83630 }, { "epoch": 0.3233288490977409, "grad_norm": 0.1015634834766388, "learning_rate": 0.002, "loss": 2.3505, "step": 83640 }, { "epoch": 0.32336750630112415, "grad_norm": 0.10310135781764984, "learning_rate": 0.002, "loss": 2.3366, "step": 83650 }, { "epoch": 0.3234061635045074, "grad_norm": 0.11620552092790604, "learning_rate": 0.002, "loss": 2.3469, "step": 83660 }, { "epoch": 0.3234448207078907, "grad_norm": 0.11301601678133011, "learning_rate": 0.002, "loss": 2.3488, "step": 83670 }, { "epoch": 0.32348347791127396, "grad_norm": 0.12127354741096497, "learning_rate": 0.002, "loss": 2.373, "step": 83680 }, { "epoch": 0.32352213511465727, "grad_norm": 0.10062091797590256, "learning_rate": 0.002, "loss": 2.3676, "step": 83690 }, { "epoch": 0.3235607923180405, "grad_norm": 0.11745966225862503, "learning_rate": 0.002, "loss": 2.3475, "step": 83700 }, { "epoch": 0.32359944952142383, "grad_norm": 0.10977525264024734, "learning_rate": 0.002, "loss": 2.3543, "step": 83710 }, { "epoch": 0.3236381067248071, "grad_norm": 0.10556995123624802, "learning_rate": 0.002, "loss": 2.3453, "step": 83720 }, { "epoch": 0.3236767639281904, "grad_norm": 0.10026714950799942, "learning_rate": 0.002, "loss": 2.3443, "step": 83730 }, { "epoch": 0.32371542113157364, "grad_norm": 0.1397588849067688, "learning_rate": 0.002, "loss": 2.3586, "step": 83740 }, { "epoch": 0.32375407833495695, "grad_norm": 0.10103439539670944, "learning_rate": 0.002, "loss": 2.3555, "step": 83750 }, { "epoch": 0.3237927355383402, "grad_norm": 0.10274723917245865, "learning_rate": 0.002, "loss": 2.3453, "step": 83760 }, { "epoch": 0.3238313927417235, "grad_norm": 0.10949409008026123, "learning_rate": 0.002, "loss": 2.3696, "step": 83770 }, { "epoch": 0.32387004994510676, "grad_norm": 0.10061365365982056, "learning_rate": 0.002, "loss": 2.3425, "step": 83780 }, { "epoch": 0.32390870714849007, "grad_norm": 0.11748245358467102, "learning_rate": 0.002, "loss": 2.3557, "step": 83790 }, { "epoch": 0.3239473643518733, "grad_norm": 0.11854736506938934, "learning_rate": 0.002, "loss": 2.3505, "step": 83800 }, { "epoch": 0.32398602155525663, "grad_norm": 0.10991797596216202, "learning_rate": 0.002, "loss": 2.3527, "step": 83810 }, { "epoch": 0.3240246787586399, "grad_norm": 0.12242380529642105, "learning_rate": 0.002, "loss": 2.3373, "step": 83820 }, { "epoch": 0.32406333596202314, "grad_norm": 0.10684510320425034, "learning_rate": 0.002, "loss": 2.3516, "step": 83830 }, { "epoch": 0.32410199316540644, "grad_norm": 0.10553453117609024, "learning_rate": 0.002, "loss": 2.3691, "step": 83840 }, { "epoch": 0.3241406503687897, "grad_norm": 0.13125893473625183, "learning_rate": 0.002, "loss": 2.3663, "step": 83850 }, { "epoch": 0.324179307572173, "grad_norm": 0.10782486200332642, "learning_rate": 0.002, "loss": 2.3294, "step": 83860 }, { "epoch": 0.32421796477555626, "grad_norm": 0.12534086406230927, "learning_rate": 0.002, "loss": 2.3555, "step": 83870 }, { "epoch": 0.32425662197893956, "grad_norm": 0.13595376908779144, "learning_rate": 0.002, "loss": 2.3548, "step": 83880 }, { "epoch": 0.3242952791823228, "grad_norm": 0.09643189609050751, "learning_rate": 0.002, "loss": 2.365, "step": 83890 }, { "epoch": 0.3243339363857061, "grad_norm": 0.10394199937582016, "learning_rate": 0.002, "loss": 2.3592, "step": 83900 }, { "epoch": 0.3243725935890894, "grad_norm": 0.12716563045978546, "learning_rate": 0.002, "loss": 2.3409, "step": 83910 }, { "epoch": 0.3244112507924727, "grad_norm": 0.09696212410926819, "learning_rate": 0.002, "loss": 2.3627, "step": 83920 }, { "epoch": 0.32444990799585594, "grad_norm": 0.11893827468156815, "learning_rate": 0.002, "loss": 2.3606, "step": 83930 }, { "epoch": 0.32448856519923924, "grad_norm": 0.1292070746421814, "learning_rate": 0.002, "loss": 2.3345, "step": 83940 }, { "epoch": 0.3245272224026225, "grad_norm": 0.09975294023752213, "learning_rate": 0.002, "loss": 2.3439, "step": 83950 }, { "epoch": 0.3245658796060058, "grad_norm": 0.09658270329236984, "learning_rate": 0.002, "loss": 2.3581, "step": 83960 }, { "epoch": 0.32460453680938905, "grad_norm": 0.1587447077035904, "learning_rate": 0.002, "loss": 2.358, "step": 83970 }, { "epoch": 0.32464319401277236, "grad_norm": 0.14374689757823944, "learning_rate": 0.002, "loss": 2.3589, "step": 83980 }, { "epoch": 0.3246818512161556, "grad_norm": 0.1280277520418167, "learning_rate": 0.002, "loss": 2.3532, "step": 83990 }, { "epoch": 0.3247205084195389, "grad_norm": 0.10204432159662247, "learning_rate": 0.002, "loss": 2.3652, "step": 84000 }, { "epoch": 0.3247591656229222, "grad_norm": 0.11131812632083893, "learning_rate": 0.002, "loss": 2.3539, "step": 84010 }, { "epoch": 0.3247978228263054, "grad_norm": 0.1131935566663742, "learning_rate": 0.002, "loss": 2.36, "step": 84020 }, { "epoch": 0.32483648002968873, "grad_norm": 0.12737193703651428, "learning_rate": 0.002, "loss": 2.356, "step": 84030 }, { "epoch": 0.324875137233072, "grad_norm": 0.10414043068885803, "learning_rate": 0.002, "loss": 2.3503, "step": 84040 }, { "epoch": 0.3249137944364553, "grad_norm": 0.12210876494646072, "learning_rate": 0.002, "loss": 2.3569, "step": 84050 }, { "epoch": 0.32495245163983855, "grad_norm": 0.13212950527668, "learning_rate": 0.002, "loss": 2.3668, "step": 84060 }, { "epoch": 0.32499110884322185, "grad_norm": 0.1294020563364029, "learning_rate": 0.002, "loss": 2.3538, "step": 84070 }, { "epoch": 0.3250297660466051, "grad_norm": 0.11428891867399216, "learning_rate": 0.002, "loss": 2.3568, "step": 84080 }, { "epoch": 0.3250684232499884, "grad_norm": 0.10889285057783127, "learning_rate": 0.002, "loss": 2.3526, "step": 84090 }, { "epoch": 0.32510708045337167, "grad_norm": 0.11247077584266663, "learning_rate": 0.002, "loss": 2.362, "step": 84100 }, { "epoch": 0.325145737656755, "grad_norm": 0.10409799963235855, "learning_rate": 0.002, "loss": 2.3578, "step": 84110 }, { "epoch": 0.3251843948601382, "grad_norm": 0.1056474819779396, "learning_rate": 0.002, "loss": 2.3651, "step": 84120 }, { "epoch": 0.32522305206352153, "grad_norm": 0.1274235099554062, "learning_rate": 0.002, "loss": 2.3641, "step": 84130 }, { "epoch": 0.3252617092669048, "grad_norm": 0.13003771007061005, "learning_rate": 0.002, "loss": 2.3579, "step": 84140 }, { "epoch": 0.3253003664702881, "grad_norm": 0.11398117244243622, "learning_rate": 0.002, "loss": 2.3361, "step": 84150 }, { "epoch": 0.32533902367367135, "grad_norm": 0.10530584305524826, "learning_rate": 0.002, "loss": 2.3554, "step": 84160 }, { "epoch": 0.32537768087705465, "grad_norm": 0.11318954825401306, "learning_rate": 0.002, "loss": 2.3646, "step": 84170 }, { "epoch": 0.3254163380804379, "grad_norm": 0.11111290007829666, "learning_rate": 0.002, "loss": 2.3476, "step": 84180 }, { "epoch": 0.3254549952838212, "grad_norm": 0.10164378583431244, "learning_rate": 0.002, "loss": 2.3526, "step": 84190 }, { "epoch": 0.32549365248720447, "grad_norm": 0.13917332887649536, "learning_rate": 0.002, "loss": 2.332, "step": 84200 }, { "epoch": 0.3255323096905877, "grad_norm": 0.10684075951576233, "learning_rate": 0.002, "loss": 2.3637, "step": 84210 }, { "epoch": 0.325570966893971, "grad_norm": 0.10433298349380493, "learning_rate": 0.002, "loss": 2.3564, "step": 84220 }, { "epoch": 0.3256096240973543, "grad_norm": 0.09909933060407639, "learning_rate": 0.002, "loss": 2.3607, "step": 84230 }, { "epoch": 0.3256482813007376, "grad_norm": 0.10950423032045364, "learning_rate": 0.002, "loss": 2.351, "step": 84240 }, { "epoch": 0.32568693850412084, "grad_norm": 0.09400998055934906, "learning_rate": 0.002, "loss": 2.3635, "step": 84250 }, { "epoch": 0.32572559570750415, "grad_norm": 0.09943398833274841, "learning_rate": 0.002, "loss": 2.36, "step": 84260 }, { "epoch": 0.3257642529108874, "grad_norm": 0.13087767362594604, "learning_rate": 0.002, "loss": 2.3479, "step": 84270 }, { "epoch": 0.3258029101142707, "grad_norm": 0.11305786669254303, "learning_rate": 0.002, "loss": 2.3679, "step": 84280 }, { "epoch": 0.32584156731765396, "grad_norm": 0.11657200753688812, "learning_rate": 0.002, "loss": 2.3685, "step": 84290 }, { "epoch": 0.32588022452103727, "grad_norm": 0.09545467048883438, "learning_rate": 0.002, "loss": 2.3503, "step": 84300 }, { "epoch": 0.3259188817244205, "grad_norm": 0.12025730311870575, "learning_rate": 0.002, "loss": 2.3495, "step": 84310 }, { "epoch": 0.3259575389278038, "grad_norm": 0.10664772987365723, "learning_rate": 0.002, "loss": 2.3621, "step": 84320 }, { "epoch": 0.3259961961311871, "grad_norm": 0.11544671654701233, "learning_rate": 0.002, "loss": 2.3444, "step": 84330 }, { "epoch": 0.3260348533345704, "grad_norm": 0.1060868427157402, "learning_rate": 0.002, "loss": 2.3541, "step": 84340 }, { "epoch": 0.32607351053795364, "grad_norm": 0.11616748571395874, "learning_rate": 0.002, "loss": 2.3614, "step": 84350 }, { "epoch": 0.32611216774133694, "grad_norm": 0.10780932754278183, "learning_rate": 0.002, "loss": 2.3489, "step": 84360 }, { "epoch": 0.3261508249447202, "grad_norm": 0.10857664793729782, "learning_rate": 0.002, "loss": 2.3749, "step": 84370 }, { "epoch": 0.3261894821481035, "grad_norm": 0.12012740224599838, "learning_rate": 0.002, "loss": 2.3639, "step": 84380 }, { "epoch": 0.32622813935148676, "grad_norm": 0.10782299190759659, "learning_rate": 0.002, "loss": 2.3737, "step": 84390 }, { "epoch": 0.32626679655487, "grad_norm": 0.13160741329193115, "learning_rate": 0.002, "loss": 2.3526, "step": 84400 }, { "epoch": 0.3263054537582533, "grad_norm": 0.1270124465227127, "learning_rate": 0.002, "loss": 2.3541, "step": 84410 }, { "epoch": 0.32634411096163657, "grad_norm": 0.1109585240483284, "learning_rate": 0.002, "loss": 2.3543, "step": 84420 }, { "epoch": 0.3263827681650199, "grad_norm": 0.11671182513237, "learning_rate": 0.002, "loss": 2.364, "step": 84430 }, { "epoch": 0.32642142536840313, "grad_norm": 0.11640793085098267, "learning_rate": 0.002, "loss": 2.352, "step": 84440 }, { "epoch": 0.32646008257178644, "grad_norm": 0.1214011162519455, "learning_rate": 0.002, "loss": 2.359, "step": 84450 }, { "epoch": 0.3264987397751697, "grad_norm": 0.10922209918498993, "learning_rate": 0.002, "loss": 2.353, "step": 84460 }, { "epoch": 0.326537396978553, "grad_norm": 0.10862427949905396, "learning_rate": 0.002, "loss": 2.3553, "step": 84470 }, { "epoch": 0.32657605418193625, "grad_norm": 0.11239778250455856, "learning_rate": 0.002, "loss": 2.3513, "step": 84480 }, { "epoch": 0.32661471138531956, "grad_norm": 0.11458240449428558, "learning_rate": 0.002, "loss": 2.3588, "step": 84490 }, { "epoch": 0.3266533685887028, "grad_norm": 0.11729695647954941, "learning_rate": 0.002, "loss": 2.3556, "step": 84500 }, { "epoch": 0.3266920257920861, "grad_norm": 0.09423644095659256, "learning_rate": 0.002, "loss": 2.3547, "step": 84510 }, { "epoch": 0.32673068299546937, "grad_norm": 0.09924380481243134, "learning_rate": 0.002, "loss": 2.3525, "step": 84520 }, { "epoch": 0.3267693401988527, "grad_norm": 0.126951664686203, "learning_rate": 0.002, "loss": 2.3552, "step": 84530 }, { "epoch": 0.32680799740223593, "grad_norm": 0.10472346097230911, "learning_rate": 0.002, "loss": 2.3502, "step": 84540 }, { "epoch": 0.32684665460561924, "grad_norm": 0.1225130558013916, "learning_rate": 0.002, "loss": 2.3338, "step": 84550 }, { "epoch": 0.3268853118090025, "grad_norm": 0.12059375643730164, "learning_rate": 0.002, "loss": 2.3593, "step": 84560 }, { "epoch": 0.3269239690123858, "grad_norm": 0.11787009984254837, "learning_rate": 0.002, "loss": 2.3552, "step": 84570 }, { "epoch": 0.32696262621576905, "grad_norm": 0.10444694757461548, "learning_rate": 0.002, "loss": 2.338, "step": 84580 }, { "epoch": 0.3270012834191523, "grad_norm": 0.12187785655260086, "learning_rate": 0.002, "loss": 2.3549, "step": 84590 }, { "epoch": 0.3270399406225356, "grad_norm": 0.10000617057085037, "learning_rate": 0.002, "loss": 2.3592, "step": 84600 }, { "epoch": 0.32707859782591886, "grad_norm": 0.10169254243373871, "learning_rate": 0.002, "loss": 2.3532, "step": 84610 }, { "epoch": 0.32711725502930217, "grad_norm": 0.11734003573656082, "learning_rate": 0.002, "loss": 2.3511, "step": 84620 }, { "epoch": 0.3271559122326854, "grad_norm": 0.11790499836206436, "learning_rate": 0.002, "loss": 2.3555, "step": 84630 }, { "epoch": 0.3271945694360687, "grad_norm": 0.11842221766710281, "learning_rate": 0.002, "loss": 2.3563, "step": 84640 }, { "epoch": 0.327233226639452, "grad_norm": 0.10803110897541046, "learning_rate": 0.002, "loss": 2.3639, "step": 84650 }, { "epoch": 0.3272718838428353, "grad_norm": 0.11215823143720627, "learning_rate": 0.002, "loss": 2.3389, "step": 84660 }, { "epoch": 0.32731054104621854, "grad_norm": 0.0982016995549202, "learning_rate": 0.002, "loss": 2.3446, "step": 84670 }, { "epoch": 0.32734919824960185, "grad_norm": 0.10206621140241623, "learning_rate": 0.002, "loss": 2.3636, "step": 84680 }, { "epoch": 0.3273878554529851, "grad_norm": 0.11638012528419495, "learning_rate": 0.002, "loss": 2.3493, "step": 84690 }, { "epoch": 0.3274265126563684, "grad_norm": 0.10438395291566849, "learning_rate": 0.002, "loss": 2.365, "step": 84700 }, { "epoch": 0.32746516985975166, "grad_norm": 0.15618744492530823, "learning_rate": 0.002, "loss": 2.3397, "step": 84710 }, { "epoch": 0.32750382706313497, "grad_norm": 0.10686315596103668, "learning_rate": 0.002, "loss": 2.3467, "step": 84720 }, { "epoch": 0.3275424842665182, "grad_norm": 0.10776001960039139, "learning_rate": 0.002, "loss": 2.3575, "step": 84730 }, { "epoch": 0.3275811414699015, "grad_norm": 0.1202462762594223, "learning_rate": 0.002, "loss": 2.3541, "step": 84740 }, { "epoch": 0.3276197986732848, "grad_norm": 0.11879925429821014, "learning_rate": 0.002, "loss": 2.35, "step": 84750 }, { "epoch": 0.32765845587666803, "grad_norm": 0.10598697513341904, "learning_rate": 0.002, "loss": 2.3635, "step": 84760 }, { "epoch": 0.32769711308005134, "grad_norm": 0.09640336781740189, "learning_rate": 0.002, "loss": 2.3449, "step": 84770 }, { "epoch": 0.3277357702834346, "grad_norm": 0.13301733136177063, "learning_rate": 0.002, "loss": 2.3485, "step": 84780 }, { "epoch": 0.3277744274868179, "grad_norm": 0.10526037961244583, "learning_rate": 0.002, "loss": 2.3748, "step": 84790 }, { "epoch": 0.32781308469020115, "grad_norm": 0.1060631051659584, "learning_rate": 0.002, "loss": 2.3517, "step": 84800 }, { "epoch": 0.32785174189358446, "grad_norm": 0.09776698797941208, "learning_rate": 0.002, "loss": 2.361, "step": 84810 }, { "epoch": 0.3278903990969677, "grad_norm": 0.11571425944566727, "learning_rate": 0.002, "loss": 2.3699, "step": 84820 }, { "epoch": 0.327929056300351, "grad_norm": 0.12359791249036789, "learning_rate": 0.002, "loss": 2.3655, "step": 84830 }, { "epoch": 0.32796771350373427, "grad_norm": 0.11640583723783493, "learning_rate": 0.002, "loss": 2.3482, "step": 84840 }, { "epoch": 0.3280063707071176, "grad_norm": 0.12452420592308044, "learning_rate": 0.002, "loss": 2.3494, "step": 84850 }, { "epoch": 0.32804502791050083, "grad_norm": 0.11438850313425064, "learning_rate": 0.002, "loss": 2.3502, "step": 84860 }, { "epoch": 0.32808368511388414, "grad_norm": 0.1265454739332199, "learning_rate": 0.002, "loss": 2.3537, "step": 84870 }, { "epoch": 0.3281223423172674, "grad_norm": 0.1110704094171524, "learning_rate": 0.002, "loss": 2.3599, "step": 84880 }, { "epoch": 0.3281609995206507, "grad_norm": 0.11462464183568954, "learning_rate": 0.002, "loss": 2.3507, "step": 84890 }, { "epoch": 0.32819965672403395, "grad_norm": 0.10768561065196991, "learning_rate": 0.002, "loss": 2.3702, "step": 84900 }, { "epoch": 0.32823831392741726, "grad_norm": 0.11579468846321106, "learning_rate": 0.002, "loss": 2.333, "step": 84910 }, { "epoch": 0.3282769711308005, "grad_norm": 0.11966416984796524, "learning_rate": 0.002, "loss": 2.3585, "step": 84920 }, { "epoch": 0.3283156283341838, "grad_norm": 0.11327365785837173, "learning_rate": 0.002, "loss": 2.3618, "step": 84930 }, { "epoch": 0.32835428553756707, "grad_norm": 0.1026577427983284, "learning_rate": 0.002, "loss": 2.3462, "step": 84940 }, { "epoch": 0.3283929427409503, "grad_norm": 0.10104644298553467, "learning_rate": 0.002, "loss": 2.3605, "step": 84950 }, { "epoch": 0.32843159994433363, "grad_norm": 0.10899697989225388, "learning_rate": 0.002, "loss": 2.3611, "step": 84960 }, { "epoch": 0.3284702571477169, "grad_norm": 0.1111191138625145, "learning_rate": 0.002, "loss": 2.3336, "step": 84970 }, { "epoch": 0.3285089143511002, "grad_norm": 0.11514212936162949, "learning_rate": 0.002, "loss": 2.3452, "step": 84980 }, { "epoch": 0.32854757155448344, "grad_norm": 0.09942366182804108, "learning_rate": 0.002, "loss": 2.3562, "step": 84990 }, { "epoch": 0.32858622875786675, "grad_norm": 0.10499731451272964, "learning_rate": 0.002, "loss": 2.3451, "step": 85000 }, { "epoch": 0.32862488596125, "grad_norm": 0.1169121041893959, "learning_rate": 0.002, "loss": 2.3371, "step": 85010 }, { "epoch": 0.3286635431646333, "grad_norm": 0.10293654352426529, "learning_rate": 0.002, "loss": 2.3575, "step": 85020 }, { "epoch": 0.32870220036801656, "grad_norm": 0.13729114830493927, "learning_rate": 0.002, "loss": 2.345, "step": 85030 }, { "epoch": 0.32874085757139987, "grad_norm": 0.12647894024848938, "learning_rate": 0.002, "loss": 2.3539, "step": 85040 }, { "epoch": 0.3287795147747831, "grad_norm": 0.11063408851623535, "learning_rate": 0.002, "loss": 2.3501, "step": 85050 }, { "epoch": 0.32881817197816643, "grad_norm": 0.12294568866491318, "learning_rate": 0.002, "loss": 2.3483, "step": 85060 }, { "epoch": 0.3288568291815497, "grad_norm": 0.117158442735672, "learning_rate": 0.002, "loss": 2.342, "step": 85070 }, { "epoch": 0.328895486384933, "grad_norm": 0.10392977297306061, "learning_rate": 0.002, "loss": 2.3467, "step": 85080 }, { "epoch": 0.32893414358831624, "grad_norm": 0.11240453273057938, "learning_rate": 0.002, "loss": 2.3464, "step": 85090 }, { "epoch": 0.32897280079169955, "grad_norm": 0.11931589990854263, "learning_rate": 0.002, "loss": 2.3432, "step": 85100 }, { "epoch": 0.3290114579950828, "grad_norm": 0.12070564180612564, "learning_rate": 0.002, "loss": 2.3609, "step": 85110 }, { "epoch": 0.3290501151984661, "grad_norm": 0.09947573393583298, "learning_rate": 0.002, "loss": 2.3601, "step": 85120 }, { "epoch": 0.32908877240184936, "grad_norm": 0.13053150475025177, "learning_rate": 0.002, "loss": 2.368, "step": 85130 }, { "epoch": 0.3291274296052326, "grad_norm": 0.1006920337677002, "learning_rate": 0.002, "loss": 2.3515, "step": 85140 }, { "epoch": 0.3291660868086159, "grad_norm": 0.1335725635290146, "learning_rate": 0.002, "loss": 2.3662, "step": 85150 }, { "epoch": 0.3292047440119992, "grad_norm": 0.10223272442817688, "learning_rate": 0.002, "loss": 2.359, "step": 85160 }, { "epoch": 0.3292434012153825, "grad_norm": 0.10168513655662537, "learning_rate": 0.002, "loss": 2.3572, "step": 85170 }, { "epoch": 0.32928205841876573, "grad_norm": 0.11560312658548355, "learning_rate": 0.002, "loss": 2.351, "step": 85180 }, { "epoch": 0.32932071562214904, "grad_norm": 0.10953016579151154, "learning_rate": 0.002, "loss": 2.3625, "step": 85190 }, { "epoch": 0.3293593728255323, "grad_norm": 0.10393237322568893, "learning_rate": 0.002, "loss": 2.3467, "step": 85200 }, { "epoch": 0.3293980300289156, "grad_norm": 0.11780615150928497, "learning_rate": 0.002, "loss": 2.3525, "step": 85210 }, { "epoch": 0.32943668723229885, "grad_norm": 0.1172422245144844, "learning_rate": 0.002, "loss": 2.3583, "step": 85220 }, { "epoch": 0.32947534443568216, "grad_norm": 0.10669662058353424, "learning_rate": 0.002, "loss": 2.3406, "step": 85230 }, { "epoch": 0.3295140016390654, "grad_norm": 0.1318225860595703, "learning_rate": 0.002, "loss": 2.373, "step": 85240 }, { "epoch": 0.3295526588424487, "grad_norm": 0.09088876098394394, "learning_rate": 0.002, "loss": 2.3476, "step": 85250 }, { "epoch": 0.329591316045832, "grad_norm": 0.11682692170143127, "learning_rate": 0.002, "loss": 2.3599, "step": 85260 }, { "epoch": 0.3296299732492153, "grad_norm": 0.10686168074607849, "learning_rate": 0.002, "loss": 2.3492, "step": 85270 }, { "epoch": 0.32966863045259853, "grad_norm": 0.0991005152463913, "learning_rate": 0.002, "loss": 2.3454, "step": 85280 }, { "epoch": 0.32970728765598184, "grad_norm": 0.11931562423706055, "learning_rate": 0.002, "loss": 2.3462, "step": 85290 }, { "epoch": 0.3297459448593651, "grad_norm": 0.11068251729011536, "learning_rate": 0.002, "loss": 2.3426, "step": 85300 }, { "epoch": 0.3297846020627484, "grad_norm": 0.12620867788791656, "learning_rate": 0.002, "loss": 2.3698, "step": 85310 }, { "epoch": 0.32982325926613165, "grad_norm": 0.12862162292003632, "learning_rate": 0.002, "loss": 2.351, "step": 85320 }, { "epoch": 0.3298619164695149, "grad_norm": 0.11188545823097229, "learning_rate": 0.002, "loss": 2.3549, "step": 85330 }, { "epoch": 0.3299005736728982, "grad_norm": 0.10457547008991241, "learning_rate": 0.002, "loss": 2.3653, "step": 85340 }, { "epoch": 0.32993923087628146, "grad_norm": 0.11040732264518738, "learning_rate": 0.002, "loss": 2.357, "step": 85350 }, { "epoch": 0.32997788807966477, "grad_norm": 0.11926726251840591, "learning_rate": 0.002, "loss": 2.3343, "step": 85360 }, { "epoch": 0.330016545283048, "grad_norm": 0.10826878994703293, "learning_rate": 0.002, "loss": 2.3607, "step": 85370 }, { "epoch": 0.33005520248643133, "grad_norm": 0.11095569282770157, "learning_rate": 0.002, "loss": 2.3513, "step": 85380 }, { "epoch": 0.3300938596898146, "grad_norm": 0.12638893723487854, "learning_rate": 0.002, "loss": 2.3676, "step": 85390 }, { "epoch": 0.3301325168931979, "grad_norm": 0.11446123570203781, "learning_rate": 0.002, "loss": 2.3468, "step": 85400 }, { "epoch": 0.33017117409658114, "grad_norm": 0.12310647964477539, "learning_rate": 0.002, "loss": 2.3516, "step": 85410 }, { "epoch": 0.33020983129996445, "grad_norm": 0.10424927622079849, "learning_rate": 0.002, "loss": 2.3526, "step": 85420 }, { "epoch": 0.3302484885033477, "grad_norm": 0.1351475864648819, "learning_rate": 0.002, "loss": 2.3609, "step": 85430 }, { "epoch": 0.330287145706731, "grad_norm": 0.12132125347852707, "learning_rate": 0.002, "loss": 2.3483, "step": 85440 }, { "epoch": 0.33032580291011426, "grad_norm": 0.10493456572294235, "learning_rate": 0.002, "loss": 2.3535, "step": 85450 }, { "epoch": 0.33036446011349757, "grad_norm": 0.10808337479829788, "learning_rate": 0.002, "loss": 2.3598, "step": 85460 }, { "epoch": 0.3304031173168808, "grad_norm": 0.15387500822544098, "learning_rate": 0.002, "loss": 2.3643, "step": 85470 }, { "epoch": 0.33044177452026413, "grad_norm": 0.1107737347483635, "learning_rate": 0.002, "loss": 2.3527, "step": 85480 }, { "epoch": 0.3304804317236474, "grad_norm": 0.1131087988615036, "learning_rate": 0.002, "loss": 2.361, "step": 85490 }, { "epoch": 0.33051908892703064, "grad_norm": 0.11487334221601486, "learning_rate": 0.002, "loss": 2.3538, "step": 85500 }, { "epoch": 0.33055774613041394, "grad_norm": 0.11559855937957764, "learning_rate": 0.002, "loss": 2.339, "step": 85510 }, { "epoch": 0.3305964033337972, "grad_norm": 0.1240239292383194, "learning_rate": 0.002, "loss": 2.3522, "step": 85520 }, { "epoch": 0.3306350605371805, "grad_norm": 0.13296452164649963, "learning_rate": 0.002, "loss": 2.3603, "step": 85530 }, { "epoch": 0.33067371774056376, "grad_norm": 0.11823895573616028, "learning_rate": 0.002, "loss": 2.3562, "step": 85540 }, { "epoch": 0.33071237494394706, "grad_norm": 0.10527436435222626, "learning_rate": 0.002, "loss": 2.3481, "step": 85550 }, { "epoch": 0.3307510321473303, "grad_norm": 0.10656841099262238, "learning_rate": 0.002, "loss": 2.3263, "step": 85560 }, { "epoch": 0.3307896893507136, "grad_norm": 0.11199898272752762, "learning_rate": 0.002, "loss": 2.3498, "step": 85570 }, { "epoch": 0.3308283465540969, "grad_norm": 0.1321028769016266, "learning_rate": 0.002, "loss": 2.3353, "step": 85580 }, { "epoch": 0.3308670037574802, "grad_norm": 0.1020321249961853, "learning_rate": 0.002, "loss": 2.3391, "step": 85590 }, { "epoch": 0.33090566096086343, "grad_norm": 0.11091432720422745, "learning_rate": 0.002, "loss": 2.3452, "step": 85600 }, { "epoch": 0.33094431816424674, "grad_norm": 0.12364307790994644, "learning_rate": 0.002, "loss": 2.3481, "step": 85610 }, { "epoch": 0.33098297536763, "grad_norm": 0.10222646594047546, "learning_rate": 0.002, "loss": 2.3552, "step": 85620 }, { "epoch": 0.3310216325710133, "grad_norm": 0.10115206241607666, "learning_rate": 0.002, "loss": 2.3525, "step": 85630 }, { "epoch": 0.33106028977439655, "grad_norm": 0.10824427008628845, "learning_rate": 0.002, "loss": 2.3602, "step": 85640 }, { "epoch": 0.33109894697777986, "grad_norm": 0.13098442554473877, "learning_rate": 0.002, "loss": 2.3579, "step": 85650 }, { "epoch": 0.3311376041811631, "grad_norm": 0.10785630345344543, "learning_rate": 0.002, "loss": 2.3525, "step": 85660 }, { "epoch": 0.3311762613845464, "grad_norm": 0.11627553403377533, "learning_rate": 0.002, "loss": 2.3602, "step": 85670 }, { "epoch": 0.3312149185879297, "grad_norm": 0.13079549372196198, "learning_rate": 0.002, "loss": 2.3619, "step": 85680 }, { "epoch": 0.3312535757913129, "grad_norm": 0.1064203754067421, "learning_rate": 0.002, "loss": 2.3605, "step": 85690 }, { "epoch": 0.33129223299469623, "grad_norm": 0.11663869023323059, "learning_rate": 0.002, "loss": 2.3524, "step": 85700 }, { "epoch": 0.3313308901980795, "grad_norm": 0.10477962344884872, "learning_rate": 0.002, "loss": 2.3417, "step": 85710 }, { "epoch": 0.3313695474014628, "grad_norm": 0.12362811714410782, "learning_rate": 0.002, "loss": 2.3645, "step": 85720 }, { "epoch": 0.33140820460484605, "grad_norm": 0.1224687248468399, "learning_rate": 0.002, "loss": 2.3311, "step": 85730 }, { "epoch": 0.33144686180822935, "grad_norm": 0.11334341019392014, "learning_rate": 0.002, "loss": 2.3527, "step": 85740 }, { "epoch": 0.3314855190116126, "grad_norm": 0.10793153941631317, "learning_rate": 0.002, "loss": 2.3409, "step": 85750 }, { "epoch": 0.3315241762149959, "grad_norm": 0.13817518949508667, "learning_rate": 0.002, "loss": 2.3268, "step": 85760 }, { "epoch": 0.33156283341837917, "grad_norm": 0.10457106679677963, "learning_rate": 0.002, "loss": 2.3411, "step": 85770 }, { "epoch": 0.3316014906217625, "grad_norm": 0.11196237057447433, "learning_rate": 0.002, "loss": 2.3344, "step": 85780 }, { "epoch": 0.3316401478251457, "grad_norm": 0.11701985448598862, "learning_rate": 0.002, "loss": 2.3411, "step": 85790 }, { "epoch": 0.33167880502852903, "grad_norm": 0.0993732213973999, "learning_rate": 0.002, "loss": 2.352, "step": 85800 }, { "epoch": 0.3317174622319123, "grad_norm": 0.1316983848810196, "learning_rate": 0.002, "loss": 2.3534, "step": 85810 }, { "epoch": 0.3317561194352956, "grad_norm": 0.12413479387760162, "learning_rate": 0.002, "loss": 2.3639, "step": 85820 }, { "epoch": 0.33179477663867885, "grad_norm": 0.12227200716733932, "learning_rate": 0.002, "loss": 2.3588, "step": 85830 }, { "epoch": 0.33183343384206215, "grad_norm": 0.09567388147115707, "learning_rate": 0.002, "loss": 2.3533, "step": 85840 }, { "epoch": 0.3318720910454454, "grad_norm": 0.10324221104383469, "learning_rate": 0.002, "loss": 2.3679, "step": 85850 }, { "epoch": 0.3319107482488287, "grad_norm": 0.11741742491722107, "learning_rate": 0.002, "loss": 2.3608, "step": 85860 }, { "epoch": 0.33194940545221197, "grad_norm": 0.11068105697631836, "learning_rate": 0.002, "loss": 2.3465, "step": 85870 }, { "epoch": 0.3319880626555952, "grad_norm": 0.10848050564527512, "learning_rate": 0.002, "loss": 2.3422, "step": 85880 }, { "epoch": 0.3320267198589785, "grad_norm": 0.10607170313596725, "learning_rate": 0.002, "loss": 2.3417, "step": 85890 }, { "epoch": 0.3320653770623618, "grad_norm": 0.11788028478622437, "learning_rate": 0.002, "loss": 2.3598, "step": 85900 }, { "epoch": 0.3321040342657451, "grad_norm": 0.10743343830108643, "learning_rate": 0.002, "loss": 2.3627, "step": 85910 }, { "epoch": 0.33214269146912834, "grad_norm": 0.11620127409696579, "learning_rate": 0.002, "loss": 2.3368, "step": 85920 }, { "epoch": 0.33218134867251164, "grad_norm": 0.0988125130534172, "learning_rate": 0.002, "loss": 2.3566, "step": 85930 }, { "epoch": 0.3322200058758949, "grad_norm": 0.11408448219299316, "learning_rate": 0.002, "loss": 2.3535, "step": 85940 }, { "epoch": 0.3322586630792782, "grad_norm": 0.1145530492067337, "learning_rate": 0.002, "loss": 2.3694, "step": 85950 }, { "epoch": 0.33229732028266146, "grad_norm": 0.11560901999473572, "learning_rate": 0.002, "loss": 2.3361, "step": 85960 }, { "epoch": 0.33233597748604476, "grad_norm": 0.11214718222618103, "learning_rate": 0.002, "loss": 2.3546, "step": 85970 }, { "epoch": 0.332374634689428, "grad_norm": 0.09654606878757477, "learning_rate": 0.002, "loss": 2.3607, "step": 85980 }, { "epoch": 0.3324132918928113, "grad_norm": 0.098363496363163, "learning_rate": 0.002, "loss": 2.3505, "step": 85990 }, { "epoch": 0.3324519490961946, "grad_norm": 0.10288723558187485, "learning_rate": 0.002, "loss": 2.354, "step": 86000 }, { "epoch": 0.3324906062995779, "grad_norm": 0.10426101833581924, "learning_rate": 0.002, "loss": 2.3461, "step": 86010 }, { "epoch": 0.33252926350296114, "grad_norm": 0.11998550593852997, "learning_rate": 0.002, "loss": 2.3669, "step": 86020 }, { "epoch": 0.33256792070634444, "grad_norm": 0.12091478705406189, "learning_rate": 0.002, "loss": 2.3578, "step": 86030 }, { "epoch": 0.3326065779097277, "grad_norm": 0.09535927325487137, "learning_rate": 0.002, "loss": 2.3551, "step": 86040 }, { "epoch": 0.332645235113111, "grad_norm": 0.11487176269292831, "learning_rate": 0.002, "loss": 2.3607, "step": 86050 }, { "epoch": 0.33268389231649426, "grad_norm": 0.11748003959655762, "learning_rate": 0.002, "loss": 2.3633, "step": 86060 }, { "epoch": 0.3327225495198775, "grad_norm": 0.108509361743927, "learning_rate": 0.002, "loss": 2.3501, "step": 86070 }, { "epoch": 0.3327612067232608, "grad_norm": 0.10883913934230804, "learning_rate": 0.002, "loss": 2.354, "step": 86080 }, { "epoch": 0.33279986392664407, "grad_norm": 0.11875820904970169, "learning_rate": 0.002, "loss": 2.353, "step": 86090 }, { "epoch": 0.3328385211300274, "grad_norm": 0.10039224475622177, "learning_rate": 0.002, "loss": 2.3495, "step": 86100 }, { "epoch": 0.33287717833341063, "grad_norm": 0.11563944071531296, "learning_rate": 0.002, "loss": 2.3542, "step": 86110 }, { "epoch": 0.33291583553679394, "grad_norm": 0.12889650464057922, "learning_rate": 0.002, "loss": 2.361, "step": 86120 }, { "epoch": 0.3329544927401772, "grad_norm": 0.1014760285615921, "learning_rate": 0.002, "loss": 2.3451, "step": 86130 }, { "epoch": 0.3329931499435605, "grad_norm": 0.11509194225072861, "learning_rate": 0.002, "loss": 2.3575, "step": 86140 }, { "epoch": 0.33303180714694375, "grad_norm": 0.1407669335603714, "learning_rate": 0.002, "loss": 2.3313, "step": 86150 }, { "epoch": 0.33307046435032706, "grad_norm": 0.10478154569864273, "learning_rate": 0.002, "loss": 2.3587, "step": 86160 }, { "epoch": 0.3331091215537103, "grad_norm": 0.12240909785032272, "learning_rate": 0.002, "loss": 2.3592, "step": 86170 }, { "epoch": 0.3331477787570936, "grad_norm": 0.09189334511756897, "learning_rate": 0.002, "loss": 2.3496, "step": 86180 }, { "epoch": 0.33318643596047687, "grad_norm": 0.12059196829795837, "learning_rate": 0.002, "loss": 2.3695, "step": 86190 }, { "epoch": 0.3332250931638602, "grad_norm": 0.13283218443393707, "learning_rate": 0.002, "loss": 2.3569, "step": 86200 }, { "epoch": 0.3332637503672434, "grad_norm": 0.11821234226226807, "learning_rate": 0.002, "loss": 2.3545, "step": 86210 }, { "epoch": 0.33330240757062674, "grad_norm": 0.106609046459198, "learning_rate": 0.002, "loss": 2.353, "step": 86220 }, { "epoch": 0.33334106477401, "grad_norm": 0.10692576318979263, "learning_rate": 0.002, "loss": 2.3388, "step": 86230 }, { "epoch": 0.3333797219773933, "grad_norm": 0.10893997550010681, "learning_rate": 0.002, "loss": 2.3512, "step": 86240 }, { "epoch": 0.33341837918077655, "grad_norm": 0.12112563848495483, "learning_rate": 0.002, "loss": 2.3562, "step": 86250 }, { "epoch": 0.3334570363841598, "grad_norm": 0.09665486216545105, "learning_rate": 0.002, "loss": 2.3681, "step": 86260 }, { "epoch": 0.3334956935875431, "grad_norm": 0.12508168816566467, "learning_rate": 0.002, "loss": 2.3459, "step": 86270 }, { "epoch": 0.33353435079092636, "grad_norm": 0.12956631183624268, "learning_rate": 0.002, "loss": 2.3476, "step": 86280 }, { "epoch": 0.33357300799430967, "grad_norm": 0.11325182765722275, "learning_rate": 0.002, "loss": 2.3326, "step": 86290 }, { "epoch": 0.3336116651976929, "grad_norm": 0.11303581297397614, "learning_rate": 0.002, "loss": 2.352, "step": 86300 }, { "epoch": 0.3336503224010762, "grad_norm": 0.12054084241390228, "learning_rate": 0.002, "loss": 2.3592, "step": 86310 }, { "epoch": 0.3336889796044595, "grad_norm": 0.10581088066101074, "learning_rate": 0.002, "loss": 2.3455, "step": 86320 }, { "epoch": 0.3337276368078428, "grad_norm": 0.1152268648147583, "learning_rate": 0.002, "loss": 2.3337, "step": 86330 }, { "epoch": 0.33376629401122604, "grad_norm": 0.1067994236946106, "learning_rate": 0.002, "loss": 2.3421, "step": 86340 }, { "epoch": 0.33380495121460935, "grad_norm": 0.10510335117578506, "learning_rate": 0.002, "loss": 2.363, "step": 86350 }, { "epoch": 0.3338436084179926, "grad_norm": 0.09770243614912033, "learning_rate": 0.002, "loss": 2.3533, "step": 86360 }, { "epoch": 0.3338822656213759, "grad_norm": 0.11518805474042892, "learning_rate": 0.002, "loss": 2.3362, "step": 86370 }, { "epoch": 0.33392092282475916, "grad_norm": 0.10489653050899506, "learning_rate": 0.002, "loss": 2.3666, "step": 86380 }, { "epoch": 0.33395958002814247, "grad_norm": 0.1306338906288147, "learning_rate": 0.002, "loss": 2.3545, "step": 86390 }, { "epoch": 0.3339982372315257, "grad_norm": 0.10293161869049072, "learning_rate": 0.002, "loss": 2.3551, "step": 86400 }, { "epoch": 0.334036894434909, "grad_norm": 0.10411642491817474, "learning_rate": 0.002, "loss": 2.3604, "step": 86410 }, { "epoch": 0.3340755516382923, "grad_norm": 0.1078294888138771, "learning_rate": 0.002, "loss": 2.3579, "step": 86420 }, { "epoch": 0.33411420884167553, "grad_norm": 0.10516948252916336, "learning_rate": 0.002, "loss": 2.3653, "step": 86430 }, { "epoch": 0.33415286604505884, "grad_norm": 0.12160937488079071, "learning_rate": 0.002, "loss": 2.3458, "step": 86440 }, { "epoch": 0.3341915232484421, "grad_norm": 0.10858595371246338, "learning_rate": 0.002, "loss": 2.343, "step": 86450 }, { "epoch": 0.3342301804518254, "grad_norm": 0.12583279609680176, "learning_rate": 0.002, "loss": 2.3479, "step": 86460 }, { "epoch": 0.33426883765520865, "grad_norm": 0.10968731343746185, "learning_rate": 0.002, "loss": 2.3654, "step": 86470 }, { "epoch": 0.33430749485859196, "grad_norm": 0.10739333182573318, "learning_rate": 0.002, "loss": 2.3482, "step": 86480 }, { "epoch": 0.3343461520619752, "grad_norm": 0.09372472763061523, "learning_rate": 0.002, "loss": 2.3406, "step": 86490 }, { "epoch": 0.3343848092653585, "grad_norm": 0.10811075568199158, "learning_rate": 0.002, "loss": 2.3395, "step": 86500 }, { "epoch": 0.33442346646874177, "grad_norm": 0.10535812377929688, "learning_rate": 0.002, "loss": 2.3461, "step": 86510 }, { "epoch": 0.3344621236721251, "grad_norm": 0.0991438776254654, "learning_rate": 0.002, "loss": 2.341, "step": 86520 }, { "epoch": 0.33450078087550833, "grad_norm": 0.09942366927862167, "learning_rate": 0.002, "loss": 2.3528, "step": 86530 }, { "epoch": 0.33453943807889164, "grad_norm": 0.10737288743257523, "learning_rate": 0.002, "loss": 2.3588, "step": 86540 }, { "epoch": 0.3345780952822749, "grad_norm": 0.10016623139381409, "learning_rate": 0.002, "loss": 2.3393, "step": 86550 }, { "epoch": 0.3346167524856582, "grad_norm": 0.1013760194182396, "learning_rate": 0.002, "loss": 2.3638, "step": 86560 }, { "epoch": 0.33465540968904145, "grad_norm": 0.09310334175825119, "learning_rate": 0.002, "loss": 2.3386, "step": 86570 }, { "epoch": 0.33469406689242476, "grad_norm": 0.13864712417125702, "learning_rate": 0.002, "loss": 2.3649, "step": 86580 }, { "epoch": 0.334732724095808, "grad_norm": 0.10814253240823746, "learning_rate": 0.002, "loss": 2.3382, "step": 86590 }, { "epoch": 0.3347713812991913, "grad_norm": 0.11107059568166733, "learning_rate": 0.002, "loss": 2.3382, "step": 86600 }, { "epoch": 0.33481003850257457, "grad_norm": 0.10960592329502106, "learning_rate": 0.002, "loss": 2.3522, "step": 86610 }, { "epoch": 0.3348486957059578, "grad_norm": 0.12890039384365082, "learning_rate": 0.002, "loss": 2.3517, "step": 86620 }, { "epoch": 0.33488735290934113, "grad_norm": 0.11162910610437393, "learning_rate": 0.002, "loss": 2.3536, "step": 86630 }, { "epoch": 0.3349260101127244, "grad_norm": 0.11151379346847534, "learning_rate": 0.002, "loss": 2.3477, "step": 86640 }, { "epoch": 0.3349646673161077, "grad_norm": 0.0964745432138443, "learning_rate": 0.002, "loss": 2.3495, "step": 86650 }, { "epoch": 0.33500332451949094, "grad_norm": 0.11261545866727829, "learning_rate": 0.002, "loss": 2.3582, "step": 86660 }, { "epoch": 0.33504198172287425, "grad_norm": 0.10220064222812653, "learning_rate": 0.002, "loss": 2.3611, "step": 86670 }, { "epoch": 0.3350806389262575, "grad_norm": 0.12906724214553833, "learning_rate": 0.002, "loss": 2.3376, "step": 86680 }, { "epoch": 0.3351192961296408, "grad_norm": 0.10271503031253815, "learning_rate": 0.002, "loss": 2.3564, "step": 86690 }, { "epoch": 0.33515795333302406, "grad_norm": 0.11177756637334824, "learning_rate": 0.002, "loss": 2.3522, "step": 86700 }, { "epoch": 0.33519661053640737, "grad_norm": 0.10489747673273087, "learning_rate": 0.002, "loss": 2.3356, "step": 86710 }, { "epoch": 0.3352352677397906, "grad_norm": 0.10679716616868973, "learning_rate": 0.002, "loss": 2.3582, "step": 86720 }, { "epoch": 0.33527392494317393, "grad_norm": 0.09327898919582367, "learning_rate": 0.002, "loss": 2.3547, "step": 86730 }, { "epoch": 0.3353125821465572, "grad_norm": 0.10325710475444794, "learning_rate": 0.002, "loss": 2.354, "step": 86740 }, { "epoch": 0.3353512393499405, "grad_norm": 0.11265038698911667, "learning_rate": 0.002, "loss": 2.3521, "step": 86750 }, { "epoch": 0.33538989655332374, "grad_norm": 0.12953822314739227, "learning_rate": 0.002, "loss": 2.3607, "step": 86760 }, { "epoch": 0.33542855375670705, "grad_norm": 0.1095634251832962, "learning_rate": 0.002, "loss": 2.3522, "step": 86770 }, { "epoch": 0.3354672109600903, "grad_norm": 0.10133285075426102, "learning_rate": 0.002, "loss": 2.3522, "step": 86780 }, { "epoch": 0.3355058681634736, "grad_norm": 0.1050833910703659, "learning_rate": 0.002, "loss": 2.356, "step": 86790 }, { "epoch": 0.33554452536685686, "grad_norm": 0.11059989780187607, "learning_rate": 0.002, "loss": 2.3527, "step": 86800 }, { "epoch": 0.3355831825702401, "grad_norm": 0.10115838050842285, "learning_rate": 0.002, "loss": 2.3438, "step": 86810 }, { "epoch": 0.3356218397736234, "grad_norm": 0.1260637491941452, "learning_rate": 0.002, "loss": 2.3652, "step": 86820 }, { "epoch": 0.3356604969770067, "grad_norm": 0.10474376380443573, "learning_rate": 0.002, "loss": 2.3467, "step": 86830 }, { "epoch": 0.33569915418039, "grad_norm": 0.10274680703878403, "learning_rate": 0.002, "loss": 2.3611, "step": 86840 }, { "epoch": 0.33573781138377323, "grad_norm": 0.10135255008935928, "learning_rate": 0.002, "loss": 2.365, "step": 86850 }, { "epoch": 0.33577646858715654, "grad_norm": 0.12209215760231018, "learning_rate": 0.002, "loss": 2.3377, "step": 86860 }, { "epoch": 0.3358151257905398, "grad_norm": 0.0951184555888176, "learning_rate": 0.002, "loss": 2.3293, "step": 86870 }, { "epoch": 0.3358537829939231, "grad_norm": 0.10640900582075119, "learning_rate": 0.002, "loss": 2.3461, "step": 86880 }, { "epoch": 0.33589244019730635, "grad_norm": 0.08872721344232559, "learning_rate": 0.002, "loss": 2.3334, "step": 86890 }, { "epoch": 0.33593109740068966, "grad_norm": 0.12645526230335236, "learning_rate": 0.002, "loss": 2.3575, "step": 86900 }, { "epoch": 0.3359697546040729, "grad_norm": 0.127633735537529, "learning_rate": 0.002, "loss": 2.3607, "step": 86910 }, { "epoch": 0.3360084118074562, "grad_norm": 0.10818655043840408, "learning_rate": 0.002, "loss": 2.352, "step": 86920 }, { "epoch": 0.33604706901083947, "grad_norm": 0.13198406994342804, "learning_rate": 0.002, "loss": 2.3526, "step": 86930 }, { "epoch": 0.3360857262142228, "grad_norm": 0.1011178269982338, "learning_rate": 0.002, "loss": 2.3538, "step": 86940 }, { "epoch": 0.33612438341760603, "grad_norm": 0.1177314817905426, "learning_rate": 0.002, "loss": 2.3533, "step": 86950 }, { "epoch": 0.33616304062098934, "grad_norm": 0.11771221458911896, "learning_rate": 0.002, "loss": 2.3525, "step": 86960 }, { "epoch": 0.3362016978243726, "grad_norm": 0.11730633676052094, "learning_rate": 0.002, "loss": 2.3478, "step": 86970 }, { "epoch": 0.3362403550277559, "grad_norm": 0.10943958163261414, "learning_rate": 0.002, "loss": 2.3486, "step": 86980 }, { "epoch": 0.33627901223113915, "grad_norm": 0.10826060175895691, "learning_rate": 0.002, "loss": 2.3487, "step": 86990 }, { "epoch": 0.3363176694345224, "grad_norm": 0.10730686783790588, "learning_rate": 0.002, "loss": 2.3642, "step": 87000 }, { "epoch": 0.3363563266379057, "grad_norm": 0.1155748963356018, "learning_rate": 0.002, "loss": 2.3498, "step": 87010 }, { "epoch": 0.33639498384128896, "grad_norm": 0.10520046949386597, "learning_rate": 0.002, "loss": 2.3443, "step": 87020 }, { "epoch": 0.33643364104467227, "grad_norm": 0.11502497643232346, "learning_rate": 0.002, "loss": 2.3794, "step": 87030 }, { "epoch": 0.3364722982480555, "grad_norm": 0.11109548062086105, "learning_rate": 0.002, "loss": 2.3596, "step": 87040 }, { "epoch": 0.33651095545143883, "grad_norm": 0.10664211958646774, "learning_rate": 0.002, "loss": 2.3585, "step": 87050 }, { "epoch": 0.3365496126548221, "grad_norm": 0.10760653764009476, "learning_rate": 0.002, "loss": 2.3466, "step": 87060 }, { "epoch": 0.3365882698582054, "grad_norm": 0.12719734013080597, "learning_rate": 0.002, "loss": 2.3565, "step": 87070 }, { "epoch": 0.33662692706158864, "grad_norm": 0.12140123546123505, "learning_rate": 0.002, "loss": 2.363, "step": 87080 }, { "epoch": 0.33666558426497195, "grad_norm": 0.1130446195602417, "learning_rate": 0.002, "loss": 2.3624, "step": 87090 }, { "epoch": 0.3367042414683552, "grad_norm": 0.10486172884702682, "learning_rate": 0.002, "loss": 2.3504, "step": 87100 }, { "epoch": 0.3367428986717385, "grad_norm": 0.11093088239431381, "learning_rate": 0.002, "loss": 2.347, "step": 87110 }, { "epoch": 0.33678155587512176, "grad_norm": 0.10533785820007324, "learning_rate": 0.002, "loss": 2.356, "step": 87120 }, { "epoch": 0.33682021307850507, "grad_norm": 0.10832514613866806, "learning_rate": 0.002, "loss": 2.35, "step": 87130 }, { "epoch": 0.3368588702818883, "grad_norm": 0.11467445641756058, "learning_rate": 0.002, "loss": 2.3647, "step": 87140 }, { "epoch": 0.33689752748527163, "grad_norm": 0.10939925909042358, "learning_rate": 0.002, "loss": 2.3445, "step": 87150 }, { "epoch": 0.3369361846886549, "grad_norm": 0.12103261053562164, "learning_rate": 0.002, "loss": 2.3394, "step": 87160 }, { "epoch": 0.33697484189203813, "grad_norm": 0.0894695296883583, "learning_rate": 0.002, "loss": 2.3603, "step": 87170 }, { "epoch": 0.33701349909542144, "grad_norm": 0.10069043189287186, "learning_rate": 0.002, "loss": 2.3509, "step": 87180 }, { "epoch": 0.3370521562988047, "grad_norm": 0.10153687745332718, "learning_rate": 0.002, "loss": 2.3412, "step": 87190 }, { "epoch": 0.337090813502188, "grad_norm": 0.1355798840522766, "learning_rate": 0.002, "loss": 2.3385, "step": 87200 }, { "epoch": 0.33712947070557125, "grad_norm": 0.11290738731622696, "learning_rate": 0.002, "loss": 2.3472, "step": 87210 }, { "epoch": 0.33716812790895456, "grad_norm": 0.12131005525588989, "learning_rate": 0.002, "loss": 2.355, "step": 87220 }, { "epoch": 0.3372067851123378, "grad_norm": 0.10573593527078629, "learning_rate": 0.002, "loss": 2.35, "step": 87230 }, { "epoch": 0.3372454423157211, "grad_norm": 0.1039854884147644, "learning_rate": 0.002, "loss": 2.3546, "step": 87240 }, { "epoch": 0.3372840995191044, "grad_norm": 0.1108429804444313, "learning_rate": 0.002, "loss": 2.349, "step": 87250 }, { "epoch": 0.3373227567224877, "grad_norm": 0.10919930040836334, "learning_rate": 0.002, "loss": 2.358, "step": 87260 }, { "epoch": 0.33736141392587093, "grad_norm": 0.12618575990200043, "learning_rate": 0.002, "loss": 2.3658, "step": 87270 }, { "epoch": 0.33740007112925424, "grad_norm": 0.10285606980323792, "learning_rate": 0.002, "loss": 2.3575, "step": 87280 }, { "epoch": 0.3374387283326375, "grad_norm": 0.10287448018789291, "learning_rate": 0.002, "loss": 2.3568, "step": 87290 }, { "epoch": 0.3374773855360208, "grad_norm": 0.11148592084646225, "learning_rate": 0.002, "loss": 2.3562, "step": 87300 }, { "epoch": 0.33751604273940405, "grad_norm": 0.10517746210098267, "learning_rate": 0.002, "loss": 2.3416, "step": 87310 }, { "epoch": 0.33755469994278736, "grad_norm": 0.11742986738681793, "learning_rate": 0.002, "loss": 2.3447, "step": 87320 }, { "epoch": 0.3375933571461706, "grad_norm": 0.10818766802549362, "learning_rate": 0.002, "loss": 2.3365, "step": 87330 }, { "epoch": 0.3376320143495539, "grad_norm": 0.1280340552330017, "learning_rate": 0.002, "loss": 2.3638, "step": 87340 }, { "epoch": 0.3376706715529372, "grad_norm": 0.131245419383049, "learning_rate": 0.002, "loss": 2.3391, "step": 87350 }, { "epoch": 0.3377093287563204, "grad_norm": 0.10057683289051056, "learning_rate": 0.002, "loss": 2.3442, "step": 87360 }, { "epoch": 0.33774798595970373, "grad_norm": 0.11801260709762573, "learning_rate": 0.002, "loss": 2.3616, "step": 87370 }, { "epoch": 0.337786643163087, "grad_norm": 0.10263387113809586, "learning_rate": 0.002, "loss": 2.3646, "step": 87380 }, { "epoch": 0.3378253003664703, "grad_norm": 0.10963422060012817, "learning_rate": 0.002, "loss": 2.3692, "step": 87390 }, { "epoch": 0.33786395756985355, "grad_norm": 0.10239773243665695, "learning_rate": 0.002, "loss": 2.354, "step": 87400 }, { "epoch": 0.33790261477323685, "grad_norm": 0.14015544950962067, "learning_rate": 0.002, "loss": 2.3596, "step": 87410 }, { "epoch": 0.3379412719766201, "grad_norm": 0.11772413551807404, "learning_rate": 0.002, "loss": 2.3511, "step": 87420 }, { "epoch": 0.3379799291800034, "grad_norm": 0.09805656224489212, "learning_rate": 0.002, "loss": 2.3539, "step": 87430 }, { "epoch": 0.33801858638338667, "grad_norm": 0.12166161090135574, "learning_rate": 0.002, "loss": 2.3561, "step": 87440 }, { "epoch": 0.33805724358677, "grad_norm": 0.09780203551054001, "learning_rate": 0.002, "loss": 2.3463, "step": 87450 }, { "epoch": 0.3380959007901532, "grad_norm": 0.1183888167142868, "learning_rate": 0.002, "loss": 2.3466, "step": 87460 }, { "epoch": 0.33813455799353653, "grad_norm": 0.1062530130147934, "learning_rate": 0.002, "loss": 2.3396, "step": 87470 }, { "epoch": 0.3381732151969198, "grad_norm": 0.09859632700681686, "learning_rate": 0.002, "loss": 2.3501, "step": 87480 }, { "epoch": 0.3382118724003031, "grad_norm": 0.10625289380550385, "learning_rate": 0.002, "loss": 2.3517, "step": 87490 }, { "epoch": 0.33825052960368635, "grad_norm": 0.1421256959438324, "learning_rate": 0.002, "loss": 2.3461, "step": 87500 }, { "epoch": 0.33828918680706965, "grad_norm": 0.11129707843065262, "learning_rate": 0.002, "loss": 2.3587, "step": 87510 }, { "epoch": 0.3383278440104529, "grad_norm": 0.1156439557671547, "learning_rate": 0.002, "loss": 2.3708, "step": 87520 }, { "epoch": 0.3383665012138362, "grad_norm": 0.10404882580041885, "learning_rate": 0.002, "loss": 2.335, "step": 87530 }, { "epoch": 0.33840515841721946, "grad_norm": 0.10040571540594101, "learning_rate": 0.002, "loss": 2.3718, "step": 87540 }, { "epoch": 0.3384438156206027, "grad_norm": 0.10529720783233643, "learning_rate": 0.002, "loss": 2.3479, "step": 87550 }, { "epoch": 0.338482472823986, "grad_norm": 0.1062098890542984, "learning_rate": 0.002, "loss": 2.364, "step": 87560 }, { "epoch": 0.3385211300273693, "grad_norm": 0.15115439891815186, "learning_rate": 0.002, "loss": 2.3456, "step": 87570 }, { "epoch": 0.3385597872307526, "grad_norm": 0.10261274874210358, "learning_rate": 0.002, "loss": 2.3581, "step": 87580 }, { "epoch": 0.33859844443413584, "grad_norm": 0.10737352818250656, "learning_rate": 0.002, "loss": 2.3659, "step": 87590 }, { "epoch": 0.33863710163751914, "grad_norm": 0.12016598880290985, "learning_rate": 0.002, "loss": 2.3518, "step": 87600 }, { "epoch": 0.3386757588409024, "grad_norm": 0.11309704929590225, "learning_rate": 0.002, "loss": 2.3585, "step": 87610 }, { "epoch": 0.3387144160442857, "grad_norm": 0.12417756021022797, "learning_rate": 0.002, "loss": 2.3545, "step": 87620 }, { "epoch": 0.33875307324766896, "grad_norm": 0.10050065070390701, "learning_rate": 0.002, "loss": 2.3388, "step": 87630 }, { "epoch": 0.33879173045105226, "grad_norm": 0.11637856811285019, "learning_rate": 0.002, "loss": 2.3483, "step": 87640 }, { "epoch": 0.3388303876544355, "grad_norm": 0.12719853222370148, "learning_rate": 0.002, "loss": 2.3575, "step": 87650 }, { "epoch": 0.3388690448578188, "grad_norm": 0.34996548295021057, "learning_rate": 0.002, "loss": 2.338, "step": 87660 }, { "epoch": 0.3389077020612021, "grad_norm": 0.100518599152565, "learning_rate": 0.002, "loss": 2.3524, "step": 87670 }, { "epoch": 0.3389463592645854, "grad_norm": 0.11817089468240738, "learning_rate": 0.002, "loss": 2.3562, "step": 87680 }, { "epoch": 0.33898501646796864, "grad_norm": 0.1029106006026268, "learning_rate": 0.002, "loss": 2.3516, "step": 87690 }, { "epoch": 0.33902367367135194, "grad_norm": 0.11708049476146698, "learning_rate": 0.002, "loss": 2.3613, "step": 87700 }, { "epoch": 0.3390623308747352, "grad_norm": 0.1058112159371376, "learning_rate": 0.002, "loss": 2.3381, "step": 87710 }, { "epoch": 0.3391009880781185, "grad_norm": 0.11289533972740173, "learning_rate": 0.002, "loss": 2.3665, "step": 87720 }, { "epoch": 0.33913964528150176, "grad_norm": 0.12030244618654251, "learning_rate": 0.002, "loss": 2.3666, "step": 87730 }, { "epoch": 0.339178302484885, "grad_norm": 0.10465981066226959, "learning_rate": 0.002, "loss": 2.3484, "step": 87740 }, { "epoch": 0.3392169596882683, "grad_norm": 0.11341659724712372, "learning_rate": 0.002, "loss": 2.3485, "step": 87750 }, { "epoch": 0.33925561689165157, "grad_norm": 0.10221397876739502, "learning_rate": 0.002, "loss": 2.3466, "step": 87760 }, { "epoch": 0.3392942740950349, "grad_norm": 0.11447005718946457, "learning_rate": 0.002, "loss": 2.3467, "step": 87770 }, { "epoch": 0.33933293129841813, "grad_norm": 0.10479959100484848, "learning_rate": 0.002, "loss": 2.3421, "step": 87780 }, { "epoch": 0.33937158850180144, "grad_norm": 0.14646686613559723, "learning_rate": 0.002, "loss": 2.3576, "step": 87790 }, { "epoch": 0.3394102457051847, "grad_norm": 0.11034227907657623, "learning_rate": 0.002, "loss": 2.3482, "step": 87800 }, { "epoch": 0.339448902908568, "grad_norm": 0.116272933781147, "learning_rate": 0.002, "loss": 2.3559, "step": 87810 }, { "epoch": 0.33948756011195125, "grad_norm": 0.12207002937793732, "learning_rate": 0.002, "loss": 2.3511, "step": 87820 }, { "epoch": 0.33952621731533456, "grad_norm": 0.1189301460981369, "learning_rate": 0.002, "loss": 2.3731, "step": 87830 }, { "epoch": 0.3395648745187178, "grad_norm": 0.115324467420578, "learning_rate": 0.002, "loss": 2.3473, "step": 87840 }, { "epoch": 0.3396035317221011, "grad_norm": 0.11082760244607925, "learning_rate": 0.002, "loss": 2.3565, "step": 87850 }, { "epoch": 0.33964218892548437, "grad_norm": 0.13625425100326538, "learning_rate": 0.002, "loss": 2.3569, "step": 87860 }, { "epoch": 0.3396808461288677, "grad_norm": 0.10276339948177338, "learning_rate": 0.002, "loss": 2.3565, "step": 87870 }, { "epoch": 0.3397195033322509, "grad_norm": 0.10634419322013855, "learning_rate": 0.002, "loss": 2.3484, "step": 87880 }, { "epoch": 0.33975816053563423, "grad_norm": 0.11744745820760727, "learning_rate": 0.002, "loss": 2.3708, "step": 87890 }, { "epoch": 0.3397968177390175, "grad_norm": 0.10343390703201294, "learning_rate": 0.002, "loss": 2.3434, "step": 87900 }, { "epoch": 0.33983547494240074, "grad_norm": 0.10963012278079987, "learning_rate": 0.002, "loss": 2.348, "step": 87910 }, { "epoch": 0.33987413214578405, "grad_norm": 0.11228656768798828, "learning_rate": 0.002, "loss": 2.3462, "step": 87920 }, { "epoch": 0.3399127893491673, "grad_norm": 0.1294107437133789, "learning_rate": 0.002, "loss": 2.3495, "step": 87930 }, { "epoch": 0.3399514465525506, "grad_norm": 0.13598628342151642, "learning_rate": 0.002, "loss": 2.3527, "step": 87940 }, { "epoch": 0.33999010375593386, "grad_norm": 0.11039962619543076, "learning_rate": 0.002, "loss": 2.3569, "step": 87950 }, { "epoch": 0.34002876095931717, "grad_norm": 0.1061733067035675, "learning_rate": 0.002, "loss": 2.3551, "step": 87960 }, { "epoch": 0.3400674181627004, "grad_norm": 0.12140204012393951, "learning_rate": 0.002, "loss": 2.3548, "step": 87970 }, { "epoch": 0.3401060753660837, "grad_norm": 0.09958139806985855, "learning_rate": 0.002, "loss": 2.3639, "step": 87980 }, { "epoch": 0.340144732569467, "grad_norm": 0.10445655882358551, "learning_rate": 0.002, "loss": 2.3507, "step": 87990 }, { "epoch": 0.3401833897728503, "grad_norm": 0.12749019265174866, "learning_rate": 0.002, "loss": 2.3425, "step": 88000 }, { "epoch": 0.34022204697623354, "grad_norm": 0.10322877019643784, "learning_rate": 0.002, "loss": 2.3536, "step": 88010 }, { "epoch": 0.34026070417961685, "grad_norm": 0.12513993680477142, "learning_rate": 0.002, "loss": 2.3524, "step": 88020 }, { "epoch": 0.3402993613830001, "grad_norm": 0.10777510702610016, "learning_rate": 0.002, "loss": 2.3482, "step": 88030 }, { "epoch": 0.3403380185863834, "grad_norm": 0.11263205856084824, "learning_rate": 0.002, "loss": 2.3546, "step": 88040 }, { "epoch": 0.34037667578976666, "grad_norm": 0.11279111355543137, "learning_rate": 0.002, "loss": 2.358, "step": 88050 }, { "epoch": 0.34041533299314997, "grad_norm": 0.10997018218040466, "learning_rate": 0.002, "loss": 2.3427, "step": 88060 }, { "epoch": 0.3404539901965332, "grad_norm": 0.12844707071781158, "learning_rate": 0.002, "loss": 2.357, "step": 88070 }, { "epoch": 0.3404926473999165, "grad_norm": 0.10197935253381729, "learning_rate": 0.002, "loss": 2.3763, "step": 88080 }, { "epoch": 0.3405313046032998, "grad_norm": 0.09389302879571915, "learning_rate": 0.002, "loss": 2.3545, "step": 88090 }, { "epoch": 0.34056996180668303, "grad_norm": 0.10149596631526947, "learning_rate": 0.002, "loss": 2.3544, "step": 88100 }, { "epoch": 0.34060861901006634, "grad_norm": 0.10434433817863464, "learning_rate": 0.002, "loss": 2.3456, "step": 88110 }, { "epoch": 0.3406472762134496, "grad_norm": 0.11394469439983368, "learning_rate": 0.002, "loss": 2.3294, "step": 88120 }, { "epoch": 0.3406859334168329, "grad_norm": 0.1021641194820404, "learning_rate": 0.002, "loss": 2.3532, "step": 88130 }, { "epoch": 0.34072459062021615, "grad_norm": 0.11137614399194717, "learning_rate": 0.002, "loss": 2.336, "step": 88140 }, { "epoch": 0.34076324782359946, "grad_norm": 0.1125793531537056, "learning_rate": 0.002, "loss": 2.3569, "step": 88150 }, { "epoch": 0.3408019050269827, "grad_norm": 0.11630513519048691, "learning_rate": 0.002, "loss": 2.3441, "step": 88160 }, { "epoch": 0.340840562230366, "grad_norm": 0.1093737930059433, "learning_rate": 0.002, "loss": 2.3708, "step": 88170 }, { "epoch": 0.34087921943374927, "grad_norm": 0.10950718820095062, "learning_rate": 0.002, "loss": 2.3548, "step": 88180 }, { "epoch": 0.3409178766371326, "grad_norm": 0.12360899150371552, "learning_rate": 0.002, "loss": 2.3363, "step": 88190 }, { "epoch": 0.34095653384051583, "grad_norm": 0.0962943509221077, "learning_rate": 0.002, "loss": 2.343, "step": 88200 }, { "epoch": 0.34099519104389914, "grad_norm": 0.10609276592731476, "learning_rate": 0.002, "loss": 2.3619, "step": 88210 }, { "epoch": 0.3410338482472824, "grad_norm": 0.10679814964532852, "learning_rate": 0.002, "loss": 2.3566, "step": 88220 }, { "epoch": 0.3410725054506657, "grad_norm": 0.1043362095952034, "learning_rate": 0.002, "loss": 2.3489, "step": 88230 }, { "epoch": 0.34111116265404895, "grad_norm": 0.1262073963880539, "learning_rate": 0.002, "loss": 2.3448, "step": 88240 }, { "epoch": 0.34114981985743226, "grad_norm": 0.13010995090007782, "learning_rate": 0.002, "loss": 2.3616, "step": 88250 }, { "epoch": 0.3411884770608155, "grad_norm": 0.10564157366752625, "learning_rate": 0.002, "loss": 2.3645, "step": 88260 }, { "epoch": 0.3412271342641988, "grad_norm": 0.1198716014623642, "learning_rate": 0.002, "loss": 2.3543, "step": 88270 }, { "epoch": 0.34126579146758207, "grad_norm": 0.11889446526765823, "learning_rate": 0.002, "loss": 2.3651, "step": 88280 }, { "epoch": 0.3413044486709653, "grad_norm": 0.10770682245492935, "learning_rate": 0.002, "loss": 2.3417, "step": 88290 }, { "epoch": 0.34134310587434863, "grad_norm": 0.09319154173135757, "learning_rate": 0.002, "loss": 2.3416, "step": 88300 }, { "epoch": 0.3413817630777319, "grad_norm": 0.11082503944635391, "learning_rate": 0.002, "loss": 2.3512, "step": 88310 }, { "epoch": 0.3414204202811152, "grad_norm": 0.12003304809331894, "learning_rate": 0.002, "loss": 2.3496, "step": 88320 }, { "epoch": 0.34145907748449844, "grad_norm": 0.10606896877288818, "learning_rate": 0.002, "loss": 2.3481, "step": 88330 }, { "epoch": 0.34149773468788175, "grad_norm": 0.11933515220880508, "learning_rate": 0.002, "loss": 2.3485, "step": 88340 }, { "epoch": 0.341536391891265, "grad_norm": 0.11698786169290543, "learning_rate": 0.002, "loss": 2.344, "step": 88350 }, { "epoch": 0.3415750490946483, "grad_norm": 0.12531299889087677, "learning_rate": 0.002, "loss": 2.3585, "step": 88360 }, { "epoch": 0.34161370629803156, "grad_norm": 0.12377554923295975, "learning_rate": 0.002, "loss": 2.3637, "step": 88370 }, { "epoch": 0.34165236350141487, "grad_norm": 0.10473481565713882, "learning_rate": 0.002, "loss": 2.3652, "step": 88380 }, { "epoch": 0.3416910207047981, "grad_norm": 0.12325559556484222, "learning_rate": 0.002, "loss": 2.3463, "step": 88390 }, { "epoch": 0.34172967790818143, "grad_norm": 0.09602730721235275, "learning_rate": 0.002, "loss": 2.3438, "step": 88400 }, { "epoch": 0.3417683351115647, "grad_norm": 0.1111978143453598, "learning_rate": 0.002, "loss": 2.3611, "step": 88410 }, { "epoch": 0.341806992314948, "grad_norm": 0.1265242099761963, "learning_rate": 0.002, "loss": 2.3534, "step": 88420 }, { "epoch": 0.34184564951833124, "grad_norm": 0.10700623691082001, "learning_rate": 0.002, "loss": 2.3413, "step": 88430 }, { "epoch": 0.34188430672171455, "grad_norm": 0.10761568695306778, "learning_rate": 0.002, "loss": 2.3528, "step": 88440 }, { "epoch": 0.3419229639250978, "grad_norm": 0.10627592355012894, "learning_rate": 0.002, "loss": 2.3449, "step": 88450 }, { "epoch": 0.3419616211284811, "grad_norm": 0.09872213751077652, "learning_rate": 0.002, "loss": 2.3453, "step": 88460 }, { "epoch": 0.34200027833186436, "grad_norm": 0.1186567172408104, "learning_rate": 0.002, "loss": 2.3539, "step": 88470 }, { "epoch": 0.3420389355352476, "grad_norm": 0.11164540797472, "learning_rate": 0.002, "loss": 2.3485, "step": 88480 }, { "epoch": 0.3420775927386309, "grad_norm": 0.09923987090587616, "learning_rate": 0.002, "loss": 2.3584, "step": 88490 }, { "epoch": 0.3421162499420142, "grad_norm": 0.12012343853712082, "learning_rate": 0.002, "loss": 2.3545, "step": 88500 }, { "epoch": 0.3421549071453975, "grad_norm": 0.13408435881137848, "learning_rate": 0.002, "loss": 2.3545, "step": 88510 }, { "epoch": 0.34219356434878073, "grad_norm": 0.11128830909729004, "learning_rate": 0.002, "loss": 2.3507, "step": 88520 }, { "epoch": 0.34223222155216404, "grad_norm": 0.13357031345367432, "learning_rate": 0.002, "loss": 2.3515, "step": 88530 }, { "epoch": 0.3422708787555473, "grad_norm": 0.1076250970363617, "learning_rate": 0.002, "loss": 2.3467, "step": 88540 }, { "epoch": 0.3423095359589306, "grad_norm": 0.1268393099308014, "learning_rate": 0.002, "loss": 2.3697, "step": 88550 }, { "epoch": 0.34234819316231385, "grad_norm": 0.14251774549484253, "learning_rate": 0.002, "loss": 2.3525, "step": 88560 }, { "epoch": 0.34238685036569716, "grad_norm": 0.10408865660429001, "learning_rate": 0.002, "loss": 2.3637, "step": 88570 }, { "epoch": 0.3424255075690804, "grad_norm": 0.10077012330293655, "learning_rate": 0.002, "loss": 2.3581, "step": 88580 }, { "epoch": 0.3424641647724637, "grad_norm": 0.11243431270122528, "learning_rate": 0.002, "loss": 2.349, "step": 88590 }, { "epoch": 0.34250282197584697, "grad_norm": 0.11361908912658691, "learning_rate": 0.002, "loss": 2.3512, "step": 88600 }, { "epoch": 0.3425414791792303, "grad_norm": 0.10903273522853851, "learning_rate": 0.002, "loss": 2.3544, "step": 88610 }, { "epoch": 0.34258013638261353, "grad_norm": 0.14040608704090118, "learning_rate": 0.002, "loss": 2.3408, "step": 88620 }, { "epoch": 0.34261879358599684, "grad_norm": 0.12683793902397156, "learning_rate": 0.002, "loss": 2.3611, "step": 88630 }, { "epoch": 0.3426574507893801, "grad_norm": 0.11451121419668198, "learning_rate": 0.002, "loss": 2.352, "step": 88640 }, { "epoch": 0.3426961079927634, "grad_norm": 0.10799139738082886, "learning_rate": 0.002, "loss": 2.3601, "step": 88650 }, { "epoch": 0.34273476519614665, "grad_norm": 0.11651720106601715, "learning_rate": 0.002, "loss": 2.3698, "step": 88660 }, { "epoch": 0.3427734223995299, "grad_norm": 0.10717178881168365, "learning_rate": 0.002, "loss": 2.3745, "step": 88670 }, { "epoch": 0.3428120796029132, "grad_norm": 0.11072812229394913, "learning_rate": 0.002, "loss": 2.3603, "step": 88680 }, { "epoch": 0.34285073680629646, "grad_norm": 0.14107677340507507, "learning_rate": 0.002, "loss": 2.3528, "step": 88690 }, { "epoch": 0.34288939400967977, "grad_norm": 0.11483429372310638, "learning_rate": 0.002, "loss": 2.3579, "step": 88700 }, { "epoch": 0.342928051213063, "grad_norm": 0.10340628027915955, "learning_rate": 0.002, "loss": 2.3474, "step": 88710 }, { "epoch": 0.34296670841644633, "grad_norm": 0.10923565924167633, "learning_rate": 0.002, "loss": 2.3635, "step": 88720 }, { "epoch": 0.3430053656198296, "grad_norm": 0.10067480057477951, "learning_rate": 0.002, "loss": 2.3501, "step": 88730 }, { "epoch": 0.3430440228232129, "grad_norm": 0.10290886461734772, "learning_rate": 0.002, "loss": 2.3524, "step": 88740 }, { "epoch": 0.34308268002659614, "grad_norm": 0.11461234837770462, "learning_rate": 0.002, "loss": 2.3428, "step": 88750 }, { "epoch": 0.34312133722997945, "grad_norm": 0.10550641268491745, "learning_rate": 0.002, "loss": 2.3651, "step": 88760 }, { "epoch": 0.3431599944333627, "grad_norm": 0.12442265450954437, "learning_rate": 0.002, "loss": 2.3505, "step": 88770 }, { "epoch": 0.343198651636746, "grad_norm": 0.10196845233440399, "learning_rate": 0.002, "loss": 2.3614, "step": 88780 }, { "epoch": 0.34323730884012926, "grad_norm": 0.11056846380233765, "learning_rate": 0.002, "loss": 2.36, "step": 88790 }, { "epoch": 0.34327596604351257, "grad_norm": 0.09794145077466965, "learning_rate": 0.002, "loss": 2.36, "step": 88800 }, { "epoch": 0.3433146232468958, "grad_norm": 0.10579000413417816, "learning_rate": 0.002, "loss": 2.342, "step": 88810 }, { "epoch": 0.34335328045027913, "grad_norm": 0.09847233444452286, "learning_rate": 0.002, "loss": 2.3479, "step": 88820 }, { "epoch": 0.3433919376536624, "grad_norm": 0.1011461690068245, "learning_rate": 0.002, "loss": 2.3537, "step": 88830 }, { "epoch": 0.34343059485704563, "grad_norm": 0.09516558051109314, "learning_rate": 0.002, "loss": 2.3402, "step": 88840 }, { "epoch": 0.34346925206042894, "grad_norm": 0.12700580060482025, "learning_rate": 0.002, "loss": 2.35, "step": 88850 }, { "epoch": 0.3435079092638122, "grad_norm": 0.10590207576751709, "learning_rate": 0.002, "loss": 2.3344, "step": 88860 }, { "epoch": 0.3435465664671955, "grad_norm": 0.11522980779409409, "learning_rate": 0.002, "loss": 2.3559, "step": 88870 }, { "epoch": 0.34358522367057875, "grad_norm": 0.11492440104484558, "learning_rate": 0.002, "loss": 2.3748, "step": 88880 }, { "epoch": 0.34362388087396206, "grad_norm": 0.10037875920534134, "learning_rate": 0.002, "loss": 2.3543, "step": 88890 }, { "epoch": 0.3436625380773453, "grad_norm": 0.10939756035804749, "learning_rate": 0.002, "loss": 2.3576, "step": 88900 }, { "epoch": 0.3437011952807286, "grad_norm": 0.10372549295425415, "learning_rate": 0.002, "loss": 2.3504, "step": 88910 }, { "epoch": 0.3437398524841119, "grad_norm": 0.2707937955856323, "learning_rate": 0.002, "loss": 2.3511, "step": 88920 }, { "epoch": 0.3437785096874952, "grad_norm": 0.13005994260311127, "learning_rate": 0.002, "loss": 2.3827, "step": 88930 }, { "epoch": 0.34381716689087843, "grad_norm": 0.20814430713653564, "learning_rate": 0.002, "loss": 2.3497, "step": 88940 }, { "epoch": 0.34385582409426174, "grad_norm": 0.09909890592098236, "learning_rate": 0.002, "loss": 2.3645, "step": 88950 }, { "epoch": 0.343894481297645, "grad_norm": 0.12622849643230438, "learning_rate": 0.002, "loss": 2.3675, "step": 88960 }, { "epoch": 0.3439331385010283, "grad_norm": 0.10739883780479431, "learning_rate": 0.002, "loss": 2.3517, "step": 88970 }, { "epoch": 0.34397179570441155, "grad_norm": 0.11089082062244415, "learning_rate": 0.002, "loss": 2.354, "step": 88980 }, { "epoch": 0.34401045290779486, "grad_norm": 0.10172960162162781, "learning_rate": 0.002, "loss": 2.3521, "step": 88990 }, { "epoch": 0.3440491101111781, "grad_norm": 0.11513733118772507, "learning_rate": 0.002, "loss": 2.3681, "step": 89000 }, { "epoch": 0.3440877673145614, "grad_norm": 0.1210126280784607, "learning_rate": 0.002, "loss": 2.3381, "step": 89010 }, { "epoch": 0.3441264245179447, "grad_norm": 0.1345120519399643, "learning_rate": 0.002, "loss": 2.3529, "step": 89020 }, { "epoch": 0.3441650817213279, "grad_norm": 0.11055257171392441, "learning_rate": 0.002, "loss": 2.3492, "step": 89030 }, { "epoch": 0.34420373892471123, "grad_norm": 0.11494717001914978, "learning_rate": 0.002, "loss": 2.3564, "step": 89040 }, { "epoch": 0.3442423961280945, "grad_norm": 0.11111489683389664, "learning_rate": 0.002, "loss": 2.3618, "step": 89050 }, { "epoch": 0.3442810533314778, "grad_norm": 0.10929816216230392, "learning_rate": 0.002, "loss": 2.3432, "step": 89060 }, { "epoch": 0.34431971053486105, "grad_norm": 0.10010305047035217, "learning_rate": 0.002, "loss": 2.3486, "step": 89070 }, { "epoch": 0.34435836773824435, "grad_norm": 0.11715249717235565, "learning_rate": 0.002, "loss": 2.3413, "step": 89080 }, { "epoch": 0.3443970249416276, "grad_norm": 0.11301356554031372, "learning_rate": 0.002, "loss": 2.3559, "step": 89090 }, { "epoch": 0.3444356821450109, "grad_norm": 0.12198659777641296, "learning_rate": 0.002, "loss": 2.3459, "step": 89100 }, { "epoch": 0.34447433934839417, "grad_norm": 0.4755155146121979, "learning_rate": 0.002, "loss": 2.3562, "step": 89110 }, { "epoch": 0.3445129965517775, "grad_norm": 0.11209255456924438, "learning_rate": 0.002, "loss": 2.3698, "step": 89120 }, { "epoch": 0.3445516537551607, "grad_norm": 0.11255837976932526, "learning_rate": 0.002, "loss": 2.3653, "step": 89130 }, { "epoch": 0.34459031095854403, "grad_norm": 0.12965063750743866, "learning_rate": 0.002, "loss": 2.3483, "step": 89140 }, { "epoch": 0.3446289681619273, "grad_norm": 0.1032148078083992, "learning_rate": 0.002, "loss": 2.3613, "step": 89150 }, { "epoch": 0.3446676253653106, "grad_norm": 0.09087871760129929, "learning_rate": 0.002, "loss": 2.3463, "step": 89160 }, { "epoch": 0.34470628256869384, "grad_norm": 0.11577743291854858, "learning_rate": 0.002, "loss": 2.3637, "step": 89170 }, { "epoch": 0.34474493977207715, "grad_norm": 0.10745842754840851, "learning_rate": 0.002, "loss": 2.3449, "step": 89180 }, { "epoch": 0.3447835969754604, "grad_norm": 0.10638607293367386, "learning_rate": 0.002, "loss": 2.3491, "step": 89190 }, { "epoch": 0.3448222541788437, "grad_norm": 0.15563400089740753, "learning_rate": 0.002, "loss": 2.3591, "step": 89200 }, { "epoch": 0.34486091138222696, "grad_norm": 0.11064454168081284, "learning_rate": 0.002, "loss": 2.3446, "step": 89210 }, { "epoch": 0.3448995685856102, "grad_norm": 0.1113789826631546, "learning_rate": 0.002, "loss": 2.3469, "step": 89220 }, { "epoch": 0.3449382257889935, "grad_norm": 0.10496684908866882, "learning_rate": 0.002, "loss": 2.3534, "step": 89230 }, { "epoch": 0.3449768829923768, "grad_norm": 0.11125270277261734, "learning_rate": 0.002, "loss": 2.3437, "step": 89240 }, { "epoch": 0.3450155401957601, "grad_norm": 0.10501991957426071, "learning_rate": 0.002, "loss": 2.3512, "step": 89250 }, { "epoch": 0.34505419739914334, "grad_norm": 0.10074132680892944, "learning_rate": 0.002, "loss": 2.3322, "step": 89260 }, { "epoch": 0.34509285460252664, "grad_norm": 0.1131800040602684, "learning_rate": 0.002, "loss": 2.3702, "step": 89270 }, { "epoch": 0.3451315118059099, "grad_norm": 0.11575585603713989, "learning_rate": 0.002, "loss": 2.3532, "step": 89280 }, { "epoch": 0.3451701690092932, "grad_norm": 0.1217227578163147, "learning_rate": 0.002, "loss": 2.3587, "step": 89290 }, { "epoch": 0.34520882621267646, "grad_norm": 0.10264372825622559, "learning_rate": 0.002, "loss": 2.3323, "step": 89300 }, { "epoch": 0.34524748341605976, "grad_norm": 0.11179578304290771, "learning_rate": 0.002, "loss": 2.3665, "step": 89310 }, { "epoch": 0.345286140619443, "grad_norm": 0.12197361886501312, "learning_rate": 0.002, "loss": 2.3544, "step": 89320 }, { "epoch": 0.3453247978228263, "grad_norm": 0.1156570091843605, "learning_rate": 0.002, "loss": 2.3623, "step": 89330 }, { "epoch": 0.3453634550262096, "grad_norm": 0.12871617078781128, "learning_rate": 0.002, "loss": 2.3593, "step": 89340 }, { "epoch": 0.3454021122295929, "grad_norm": 0.11505485326051712, "learning_rate": 0.002, "loss": 2.3479, "step": 89350 }, { "epoch": 0.34544076943297614, "grad_norm": 0.1145724356174469, "learning_rate": 0.002, "loss": 2.3564, "step": 89360 }, { "epoch": 0.34547942663635944, "grad_norm": 0.11084239184856415, "learning_rate": 0.002, "loss": 2.3575, "step": 89370 }, { "epoch": 0.3455180838397427, "grad_norm": 0.12926331162452698, "learning_rate": 0.002, "loss": 2.3611, "step": 89380 }, { "epoch": 0.345556741043126, "grad_norm": 0.10337939858436584, "learning_rate": 0.002, "loss": 2.361, "step": 89390 }, { "epoch": 0.34559539824650926, "grad_norm": 0.09303661435842514, "learning_rate": 0.002, "loss": 2.3625, "step": 89400 }, { "epoch": 0.3456340554498925, "grad_norm": 0.10564062744379044, "learning_rate": 0.002, "loss": 2.3456, "step": 89410 }, { "epoch": 0.3456727126532758, "grad_norm": 0.11615221202373505, "learning_rate": 0.002, "loss": 2.344, "step": 89420 }, { "epoch": 0.34571136985665907, "grad_norm": 0.11174018681049347, "learning_rate": 0.002, "loss": 2.3519, "step": 89430 }, { "epoch": 0.3457500270600424, "grad_norm": 0.09725714474916458, "learning_rate": 0.002, "loss": 2.3378, "step": 89440 }, { "epoch": 0.3457886842634256, "grad_norm": 0.22712481021881104, "learning_rate": 0.002, "loss": 2.3741, "step": 89450 }, { "epoch": 0.34582734146680894, "grad_norm": 0.09932104498147964, "learning_rate": 0.002, "loss": 2.3679, "step": 89460 }, { "epoch": 0.3458659986701922, "grad_norm": 0.103012815117836, "learning_rate": 0.002, "loss": 2.3497, "step": 89470 }, { "epoch": 0.3459046558735755, "grad_norm": 0.09817779809236526, "learning_rate": 0.002, "loss": 2.3616, "step": 89480 }, { "epoch": 0.34594331307695875, "grad_norm": 0.11735199391841888, "learning_rate": 0.002, "loss": 2.347, "step": 89490 }, { "epoch": 0.34598197028034205, "grad_norm": 0.11065150797367096, "learning_rate": 0.002, "loss": 2.3552, "step": 89500 }, { "epoch": 0.3460206274837253, "grad_norm": 0.10216960310935974, "learning_rate": 0.002, "loss": 2.3581, "step": 89510 }, { "epoch": 0.3460592846871086, "grad_norm": 0.1042240783572197, "learning_rate": 0.002, "loss": 2.3668, "step": 89520 }, { "epoch": 0.34609794189049187, "grad_norm": 0.11820337921380997, "learning_rate": 0.002, "loss": 2.3493, "step": 89530 }, { "epoch": 0.3461365990938752, "grad_norm": 0.10278623551130295, "learning_rate": 0.002, "loss": 2.3658, "step": 89540 }, { "epoch": 0.3461752562972584, "grad_norm": 0.10534875094890594, "learning_rate": 0.002, "loss": 2.3477, "step": 89550 }, { "epoch": 0.34621391350064173, "grad_norm": 0.10304959118366241, "learning_rate": 0.002, "loss": 2.3593, "step": 89560 }, { "epoch": 0.346252570704025, "grad_norm": 0.10242988914251328, "learning_rate": 0.002, "loss": 2.3414, "step": 89570 }, { "epoch": 0.34629122790740824, "grad_norm": 0.11503741890192032, "learning_rate": 0.002, "loss": 2.3552, "step": 89580 }, { "epoch": 0.34632988511079155, "grad_norm": 0.1033119484782219, "learning_rate": 0.002, "loss": 2.3614, "step": 89590 }, { "epoch": 0.3463685423141748, "grad_norm": 0.11573673039674759, "learning_rate": 0.002, "loss": 2.3629, "step": 89600 }, { "epoch": 0.3464071995175581, "grad_norm": 0.10920635610818863, "learning_rate": 0.002, "loss": 2.3462, "step": 89610 }, { "epoch": 0.34644585672094136, "grad_norm": 0.10660285502672195, "learning_rate": 0.002, "loss": 2.3542, "step": 89620 }, { "epoch": 0.34648451392432467, "grad_norm": 0.106463722884655, "learning_rate": 0.002, "loss": 2.3528, "step": 89630 }, { "epoch": 0.3465231711277079, "grad_norm": 0.13084150850772858, "learning_rate": 0.002, "loss": 2.3408, "step": 89640 }, { "epoch": 0.3465618283310912, "grad_norm": 0.13473057746887207, "learning_rate": 0.002, "loss": 2.3509, "step": 89650 }, { "epoch": 0.3466004855344745, "grad_norm": 0.10127367079257965, "learning_rate": 0.002, "loss": 2.3528, "step": 89660 }, { "epoch": 0.3466391427378578, "grad_norm": 0.10174165666103363, "learning_rate": 0.002, "loss": 2.3323, "step": 89670 }, { "epoch": 0.34667779994124104, "grad_norm": 0.102707639336586, "learning_rate": 0.002, "loss": 2.3394, "step": 89680 }, { "epoch": 0.34671645714462435, "grad_norm": 0.10903124511241913, "learning_rate": 0.002, "loss": 2.3568, "step": 89690 }, { "epoch": 0.3467551143480076, "grad_norm": 0.10413419455289841, "learning_rate": 0.002, "loss": 2.3636, "step": 89700 }, { "epoch": 0.3467937715513909, "grad_norm": 0.11480054259300232, "learning_rate": 0.002, "loss": 2.3563, "step": 89710 }, { "epoch": 0.34683242875477416, "grad_norm": 0.1044558733701706, "learning_rate": 0.002, "loss": 2.3465, "step": 89720 }, { "epoch": 0.34687108595815747, "grad_norm": 0.1191057562828064, "learning_rate": 0.002, "loss": 2.3569, "step": 89730 }, { "epoch": 0.3469097431615407, "grad_norm": 0.11219650506973267, "learning_rate": 0.002, "loss": 2.3668, "step": 89740 }, { "epoch": 0.346948400364924, "grad_norm": 0.11830902099609375, "learning_rate": 0.002, "loss": 2.3586, "step": 89750 }, { "epoch": 0.3469870575683073, "grad_norm": 0.09900355339050293, "learning_rate": 0.002, "loss": 2.3517, "step": 89760 }, { "epoch": 0.34702571477169053, "grad_norm": 0.10634540766477585, "learning_rate": 0.002, "loss": 2.3479, "step": 89770 }, { "epoch": 0.34706437197507384, "grad_norm": 0.11318907886743546, "learning_rate": 0.002, "loss": 2.3481, "step": 89780 }, { "epoch": 0.3471030291784571, "grad_norm": 0.10234291106462479, "learning_rate": 0.002, "loss": 2.355, "step": 89790 }, { "epoch": 0.3471416863818404, "grad_norm": 0.11988101899623871, "learning_rate": 0.002, "loss": 2.3472, "step": 89800 }, { "epoch": 0.34718034358522365, "grad_norm": 0.10497704148292542, "learning_rate": 0.002, "loss": 2.35, "step": 89810 }, { "epoch": 0.34721900078860696, "grad_norm": 0.10977409780025482, "learning_rate": 0.002, "loss": 2.3403, "step": 89820 }, { "epoch": 0.3472576579919902, "grad_norm": 0.10474051535129547, "learning_rate": 0.002, "loss": 2.3594, "step": 89830 }, { "epoch": 0.3472963151953735, "grad_norm": 0.10059578716754913, "learning_rate": 0.002, "loss": 2.3549, "step": 89840 }, { "epoch": 0.34733497239875677, "grad_norm": 0.13399332761764526, "learning_rate": 0.002, "loss": 2.35, "step": 89850 }, { "epoch": 0.3473736296021401, "grad_norm": 0.09893032908439636, "learning_rate": 0.002, "loss": 2.3759, "step": 89860 }, { "epoch": 0.34741228680552333, "grad_norm": 0.10207752883434296, "learning_rate": 0.002, "loss": 2.3489, "step": 89870 }, { "epoch": 0.34745094400890664, "grad_norm": 0.10035461187362671, "learning_rate": 0.002, "loss": 2.367, "step": 89880 }, { "epoch": 0.3474896012122899, "grad_norm": 0.1164204478263855, "learning_rate": 0.002, "loss": 2.3653, "step": 89890 }, { "epoch": 0.3475282584156732, "grad_norm": 0.11555982381105423, "learning_rate": 0.002, "loss": 2.3481, "step": 89900 }, { "epoch": 0.34756691561905645, "grad_norm": 0.13107863068580627, "learning_rate": 0.002, "loss": 2.3546, "step": 89910 }, { "epoch": 0.34760557282243976, "grad_norm": 0.0959523543715477, "learning_rate": 0.002, "loss": 2.3539, "step": 89920 }, { "epoch": 0.347644230025823, "grad_norm": 0.1214970275759697, "learning_rate": 0.002, "loss": 2.357, "step": 89930 }, { "epoch": 0.3476828872292063, "grad_norm": 0.1260724663734436, "learning_rate": 0.002, "loss": 2.3563, "step": 89940 }, { "epoch": 0.34772154443258957, "grad_norm": 0.10034012794494629, "learning_rate": 0.002, "loss": 2.3455, "step": 89950 }, { "epoch": 0.3477602016359728, "grad_norm": 0.11203020066022873, "learning_rate": 0.002, "loss": 2.3578, "step": 89960 }, { "epoch": 0.34779885883935613, "grad_norm": 0.12978847324848175, "learning_rate": 0.002, "loss": 2.3559, "step": 89970 }, { "epoch": 0.3478375160427394, "grad_norm": 0.09564714133739471, "learning_rate": 0.002, "loss": 2.3545, "step": 89980 }, { "epoch": 0.3478761732461227, "grad_norm": 0.10480118542909622, "learning_rate": 0.002, "loss": 2.3586, "step": 89990 }, { "epoch": 0.34791483044950594, "grad_norm": 0.09294721484184265, "learning_rate": 0.002, "loss": 2.3413, "step": 90000 }, { "epoch": 0.34795348765288925, "grad_norm": 0.12380876392126083, "learning_rate": 0.002, "loss": 2.3642, "step": 90010 }, { "epoch": 0.3479921448562725, "grad_norm": 0.1039481908082962, "learning_rate": 0.002, "loss": 2.339, "step": 90020 }, { "epoch": 0.3480308020596558, "grad_norm": 0.10823719948530197, "learning_rate": 0.002, "loss": 2.3668, "step": 90030 }, { "epoch": 0.34806945926303906, "grad_norm": 0.09384870529174805, "learning_rate": 0.002, "loss": 2.3437, "step": 90040 }, { "epoch": 0.34810811646642237, "grad_norm": 0.0991966649889946, "learning_rate": 0.002, "loss": 2.3775, "step": 90050 }, { "epoch": 0.3481467736698056, "grad_norm": 0.11492978036403656, "learning_rate": 0.002, "loss": 2.3573, "step": 90060 }, { "epoch": 0.34818543087318893, "grad_norm": 0.11849114298820496, "learning_rate": 0.002, "loss": 2.3551, "step": 90070 }, { "epoch": 0.3482240880765722, "grad_norm": 0.10737583786249161, "learning_rate": 0.002, "loss": 2.3523, "step": 90080 }, { "epoch": 0.3482627452799555, "grad_norm": 0.09581415355205536, "learning_rate": 0.002, "loss": 2.3623, "step": 90090 }, { "epoch": 0.34830140248333874, "grad_norm": 0.11195375770330429, "learning_rate": 0.002, "loss": 2.3463, "step": 90100 }, { "epoch": 0.34834005968672205, "grad_norm": 0.11681769043207169, "learning_rate": 0.002, "loss": 2.3518, "step": 90110 }, { "epoch": 0.3483787168901053, "grad_norm": 0.13141022622585297, "learning_rate": 0.002, "loss": 2.3586, "step": 90120 }, { "epoch": 0.3484173740934886, "grad_norm": 0.10962757468223572, "learning_rate": 0.002, "loss": 2.352, "step": 90130 }, { "epoch": 0.34845603129687186, "grad_norm": 0.11447460949420929, "learning_rate": 0.002, "loss": 2.3403, "step": 90140 }, { "epoch": 0.3484946885002551, "grad_norm": 0.09598016738891602, "learning_rate": 0.002, "loss": 2.3512, "step": 90150 }, { "epoch": 0.3485333457036384, "grad_norm": 0.1261773258447647, "learning_rate": 0.002, "loss": 2.3539, "step": 90160 }, { "epoch": 0.34857200290702167, "grad_norm": 0.11035182327032089, "learning_rate": 0.002, "loss": 2.3645, "step": 90170 }, { "epoch": 0.348610660110405, "grad_norm": 0.10185796022415161, "learning_rate": 0.002, "loss": 2.3576, "step": 90180 }, { "epoch": 0.34864931731378823, "grad_norm": 0.11506524682044983, "learning_rate": 0.002, "loss": 2.3632, "step": 90190 }, { "epoch": 0.34868797451717154, "grad_norm": 0.12680265307426453, "learning_rate": 0.002, "loss": 2.3606, "step": 90200 }, { "epoch": 0.3487266317205548, "grad_norm": 0.10287638753652573, "learning_rate": 0.002, "loss": 2.363, "step": 90210 }, { "epoch": 0.3487652889239381, "grad_norm": 0.1062975525856018, "learning_rate": 0.002, "loss": 2.3525, "step": 90220 }, { "epoch": 0.34880394612732135, "grad_norm": 0.09651152789592743, "learning_rate": 0.002, "loss": 2.3494, "step": 90230 }, { "epoch": 0.34884260333070466, "grad_norm": 0.40010392665863037, "learning_rate": 0.002, "loss": 2.3664, "step": 90240 }, { "epoch": 0.3488812605340879, "grad_norm": 0.12321013957262039, "learning_rate": 0.002, "loss": 2.3528, "step": 90250 }, { "epoch": 0.3489199177374712, "grad_norm": 0.1322363018989563, "learning_rate": 0.002, "loss": 2.3647, "step": 90260 }, { "epoch": 0.34895857494085447, "grad_norm": 0.11214771866798401, "learning_rate": 0.002, "loss": 2.355, "step": 90270 }, { "epoch": 0.3489972321442378, "grad_norm": 0.10390046238899231, "learning_rate": 0.002, "loss": 2.3441, "step": 90280 }, { "epoch": 0.34903588934762103, "grad_norm": 0.0927385538816452, "learning_rate": 0.002, "loss": 2.3767, "step": 90290 }, { "epoch": 0.34907454655100434, "grad_norm": 0.12240613996982574, "learning_rate": 0.002, "loss": 2.3432, "step": 90300 }, { "epoch": 0.3491132037543876, "grad_norm": 0.10265913605690002, "learning_rate": 0.002, "loss": 2.3606, "step": 90310 }, { "epoch": 0.3491518609577709, "grad_norm": 0.1110464334487915, "learning_rate": 0.002, "loss": 2.3411, "step": 90320 }, { "epoch": 0.34919051816115415, "grad_norm": 0.11584184318780899, "learning_rate": 0.002, "loss": 2.3616, "step": 90330 }, { "epoch": 0.3492291753645374, "grad_norm": 0.1128770038485527, "learning_rate": 0.002, "loss": 2.3486, "step": 90340 }, { "epoch": 0.3492678325679207, "grad_norm": 0.09741054475307465, "learning_rate": 0.002, "loss": 2.3599, "step": 90350 }, { "epoch": 0.34930648977130396, "grad_norm": 0.09995657205581665, "learning_rate": 0.002, "loss": 2.3597, "step": 90360 }, { "epoch": 0.34934514697468727, "grad_norm": 0.11530742049217224, "learning_rate": 0.002, "loss": 2.3555, "step": 90370 }, { "epoch": 0.3493838041780705, "grad_norm": 0.11915342509746552, "learning_rate": 0.002, "loss": 2.3555, "step": 90380 }, { "epoch": 0.34942246138145383, "grad_norm": 0.10356609523296356, "learning_rate": 0.002, "loss": 2.3595, "step": 90390 }, { "epoch": 0.3494611185848371, "grad_norm": 0.10015129297971725, "learning_rate": 0.002, "loss": 2.3536, "step": 90400 }, { "epoch": 0.3494997757882204, "grad_norm": 0.11576341837644577, "learning_rate": 0.002, "loss": 2.355, "step": 90410 }, { "epoch": 0.34953843299160364, "grad_norm": 0.10489057749509811, "learning_rate": 0.002, "loss": 2.3471, "step": 90420 }, { "epoch": 0.34957709019498695, "grad_norm": 0.11370553821325302, "learning_rate": 0.002, "loss": 2.3529, "step": 90430 }, { "epoch": 0.3496157473983702, "grad_norm": 0.10937584191560745, "learning_rate": 0.002, "loss": 2.3456, "step": 90440 }, { "epoch": 0.3496544046017535, "grad_norm": 0.11915868520736694, "learning_rate": 0.002, "loss": 2.3668, "step": 90450 }, { "epoch": 0.34969306180513676, "grad_norm": 0.09813954681158066, "learning_rate": 0.002, "loss": 2.3452, "step": 90460 }, { "epoch": 0.34973171900852007, "grad_norm": 0.11999206244945526, "learning_rate": 0.002, "loss": 2.34, "step": 90470 }, { "epoch": 0.3497703762119033, "grad_norm": 0.14466355741024017, "learning_rate": 0.002, "loss": 2.3573, "step": 90480 }, { "epoch": 0.34980903341528663, "grad_norm": 0.09629233181476593, "learning_rate": 0.002, "loss": 2.3514, "step": 90490 }, { "epoch": 0.3498476906186699, "grad_norm": 0.10470282286405563, "learning_rate": 0.002, "loss": 2.3491, "step": 90500 }, { "epoch": 0.34988634782205313, "grad_norm": 0.1057397648692131, "learning_rate": 0.002, "loss": 2.3455, "step": 90510 }, { "epoch": 0.34992500502543644, "grad_norm": 0.1281382441520691, "learning_rate": 0.002, "loss": 2.3611, "step": 90520 }, { "epoch": 0.3499636622288197, "grad_norm": 0.11314621567726135, "learning_rate": 0.002, "loss": 2.3537, "step": 90530 }, { "epoch": 0.350002319432203, "grad_norm": 0.11300663650035858, "learning_rate": 0.002, "loss": 2.3505, "step": 90540 }, { "epoch": 0.35004097663558625, "grad_norm": 0.10833021998405457, "learning_rate": 0.002, "loss": 2.3528, "step": 90550 }, { "epoch": 0.35007963383896956, "grad_norm": 0.09223330020904541, "learning_rate": 0.002, "loss": 2.3364, "step": 90560 }, { "epoch": 0.3501182910423528, "grad_norm": 0.1024753525853157, "learning_rate": 0.002, "loss": 2.3467, "step": 90570 }, { "epoch": 0.3501569482457361, "grad_norm": 0.09533923119306564, "learning_rate": 0.002, "loss": 2.3408, "step": 90580 }, { "epoch": 0.3501956054491194, "grad_norm": 0.11984021961688995, "learning_rate": 0.002, "loss": 2.3533, "step": 90590 }, { "epoch": 0.3502342626525027, "grad_norm": 0.10845741629600525, "learning_rate": 0.002, "loss": 2.3609, "step": 90600 }, { "epoch": 0.35027291985588593, "grad_norm": 0.09411056339740753, "learning_rate": 0.002, "loss": 2.3468, "step": 90610 }, { "epoch": 0.35031157705926924, "grad_norm": 0.10180425643920898, "learning_rate": 0.002, "loss": 2.3533, "step": 90620 }, { "epoch": 0.3503502342626525, "grad_norm": 0.24714742600917816, "learning_rate": 0.002, "loss": 2.3582, "step": 90630 }, { "epoch": 0.3503888914660358, "grad_norm": 0.10621386766433716, "learning_rate": 0.002, "loss": 2.3541, "step": 90640 }, { "epoch": 0.35042754866941905, "grad_norm": 0.09178213775157928, "learning_rate": 0.002, "loss": 2.3438, "step": 90650 }, { "epoch": 0.35046620587280236, "grad_norm": 0.10342089831829071, "learning_rate": 0.002, "loss": 2.3513, "step": 90660 }, { "epoch": 0.3505048630761856, "grad_norm": 0.11341840028762817, "learning_rate": 0.002, "loss": 2.3565, "step": 90670 }, { "epoch": 0.3505435202795689, "grad_norm": 0.18417048454284668, "learning_rate": 0.002, "loss": 2.3653, "step": 90680 }, { "epoch": 0.3505821774829522, "grad_norm": 0.10052413493394852, "learning_rate": 0.002, "loss": 2.3546, "step": 90690 }, { "epoch": 0.3506208346863354, "grad_norm": 0.1067756861448288, "learning_rate": 0.002, "loss": 2.3741, "step": 90700 }, { "epoch": 0.35065949188971873, "grad_norm": 0.10531529039144516, "learning_rate": 0.002, "loss": 2.3531, "step": 90710 }, { "epoch": 0.350698149093102, "grad_norm": 0.09352817386388779, "learning_rate": 0.002, "loss": 2.3768, "step": 90720 }, { "epoch": 0.3507368062964853, "grad_norm": 0.1080123707652092, "learning_rate": 0.002, "loss": 2.3392, "step": 90730 }, { "epoch": 0.35077546349986855, "grad_norm": 0.10450432449579239, "learning_rate": 0.002, "loss": 2.3501, "step": 90740 }, { "epoch": 0.35081412070325185, "grad_norm": 0.11024756729602814, "learning_rate": 0.002, "loss": 2.3512, "step": 90750 }, { "epoch": 0.3508527779066351, "grad_norm": 0.10455089062452316, "learning_rate": 0.002, "loss": 2.3575, "step": 90760 }, { "epoch": 0.3508914351100184, "grad_norm": 0.10989495366811752, "learning_rate": 0.002, "loss": 2.34, "step": 90770 }, { "epoch": 0.35093009231340166, "grad_norm": 0.25357547402381897, "learning_rate": 0.002, "loss": 2.3481, "step": 90780 }, { "epoch": 0.350968749516785, "grad_norm": 0.11106446385383606, "learning_rate": 0.002, "loss": 2.368, "step": 90790 }, { "epoch": 0.3510074067201682, "grad_norm": 0.10034093260765076, "learning_rate": 0.002, "loss": 2.3505, "step": 90800 }, { "epoch": 0.35104606392355153, "grad_norm": 0.10782653838396072, "learning_rate": 0.002, "loss": 2.367, "step": 90810 }, { "epoch": 0.3510847211269348, "grad_norm": 0.1085231602191925, "learning_rate": 0.002, "loss": 2.3294, "step": 90820 }, { "epoch": 0.3511233783303181, "grad_norm": 0.11838402599096298, "learning_rate": 0.002, "loss": 2.3592, "step": 90830 }, { "epoch": 0.35116203553370134, "grad_norm": 0.11609060317277908, "learning_rate": 0.002, "loss": 2.3506, "step": 90840 }, { "epoch": 0.35120069273708465, "grad_norm": 0.12586942315101624, "learning_rate": 0.002, "loss": 2.3441, "step": 90850 }, { "epoch": 0.3512393499404679, "grad_norm": 0.10402925312519073, "learning_rate": 0.002, "loss": 2.3409, "step": 90860 }, { "epoch": 0.3512780071438512, "grad_norm": 0.128147691488266, "learning_rate": 0.002, "loss": 2.3466, "step": 90870 }, { "epoch": 0.35131666434723446, "grad_norm": 0.11510083824396133, "learning_rate": 0.002, "loss": 2.3579, "step": 90880 }, { "epoch": 0.3513553215506177, "grad_norm": 0.10362055897712708, "learning_rate": 0.002, "loss": 2.3472, "step": 90890 }, { "epoch": 0.351393978754001, "grad_norm": 0.09541057795286179, "learning_rate": 0.002, "loss": 2.3498, "step": 90900 }, { "epoch": 0.3514326359573843, "grad_norm": 0.12204904854297638, "learning_rate": 0.002, "loss": 2.3626, "step": 90910 }, { "epoch": 0.3514712931607676, "grad_norm": 0.11423654854297638, "learning_rate": 0.002, "loss": 2.3581, "step": 90920 }, { "epoch": 0.35150995036415084, "grad_norm": 0.11182309687137604, "learning_rate": 0.002, "loss": 2.3443, "step": 90930 }, { "epoch": 0.35154860756753414, "grad_norm": 0.11528758704662323, "learning_rate": 0.002, "loss": 2.3388, "step": 90940 }, { "epoch": 0.3515872647709174, "grad_norm": 0.11533716320991516, "learning_rate": 0.002, "loss": 2.3592, "step": 90950 }, { "epoch": 0.3516259219743007, "grad_norm": 0.103913314640522, "learning_rate": 0.002, "loss": 2.3626, "step": 90960 }, { "epoch": 0.35166457917768396, "grad_norm": 0.10832104831933975, "learning_rate": 0.002, "loss": 2.3425, "step": 90970 }, { "epoch": 0.35170323638106726, "grad_norm": 0.12107133865356445, "learning_rate": 0.002, "loss": 2.3596, "step": 90980 }, { "epoch": 0.3517418935844505, "grad_norm": 0.10678855329751968, "learning_rate": 0.002, "loss": 2.3667, "step": 90990 }, { "epoch": 0.3517805507878338, "grad_norm": 0.10596594214439392, "learning_rate": 0.002, "loss": 2.3533, "step": 91000 }, { "epoch": 0.3518192079912171, "grad_norm": 0.11420460045337677, "learning_rate": 0.002, "loss": 2.3664, "step": 91010 }, { "epoch": 0.3518578651946004, "grad_norm": 0.38878318667411804, "learning_rate": 0.002, "loss": 2.3416, "step": 91020 }, { "epoch": 0.35189652239798364, "grad_norm": 0.0983964204788208, "learning_rate": 0.002, "loss": 2.3455, "step": 91030 }, { "epoch": 0.35193517960136694, "grad_norm": 0.10400891304016113, "learning_rate": 0.002, "loss": 2.3496, "step": 91040 }, { "epoch": 0.3519738368047502, "grad_norm": 0.11215876042842865, "learning_rate": 0.002, "loss": 2.3537, "step": 91050 }, { "epoch": 0.3520124940081335, "grad_norm": 0.12313003838062286, "learning_rate": 0.002, "loss": 2.3536, "step": 91060 }, { "epoch": 0.35205115121151676, "grad_norm": 0.11094030737876892, "learning_rate": 0.002, "loss": 2.3447, "step": 91070 }, { "epoch": 0.3520898084149, "grad_norm": 0.0991196483373642, "learning_rate": 0.002, "loss": 2.3686, "step": 91080 }, { "epoch": 0.3521284656182833, "grad_norm": 0.10023964941501617, "learning_rate": 0.002, "loss": 2.3519, "step": 91090 }, { "epoch": 0.35216712282166657, "grad_norm": 0.10561109334230423, "learning_rate": 0.002, "loss": 2.3558, "step": 91100 }, { "epoch": 0.3522057800250499, "grad_norm": 0.11194334179162979, "learning_rate": 0.002, "loss": 2.3622, "step": 91110 }, { "epoch": 0.3522444372284331, "grad_norm": 0.11674809455871582, "learning_rate": 0.002, "loss": 2.3537, "step": 91120 }, { "epoch": 0.35228309443181643, "grad_norm": 0.09680789709091187, "learning_rate": 0.002, "loss": 2.3585, "step": 91130 }, { "epoch": 0.3523217516351997, "grad_norm": 0.11687670648097992, "learning_rate": 0.002, "loss": 2.3538, "step": 91140 }, { "epoch": 0.352360408838583, "grad_norm": 0.11844884604215622, "learning_rate": 0.002, "loss": 2.3611, "step": 91150 }, { "epoch": 0.35239906604196625, "grad_norm": 0.13373969495296478, "learning_rate": 0.002, "loss": 2.3355, "step": 91160 }, { "epoch": 0.35243772324534955, "grad_norm": 0.10809897631406784, "learning_rate": 0.002, "loss": 2.3416, "step": 91170 }, { "epoch": 0.3524763804487328, "grad_norm": 0.10605595260858536, "learning_rate": 0.002, "loss": 2.3493, "step": 91180 }, { "epoch": 0.3525150376521161, "grad_norm": 0.10121627897024155, "learning_rate": 0.002, "loss": 2.3464, "step": 91190 }, { "epoch": 0.35255369485549937, "grad_norm": 0.11170769482851028, "learning_rate": 0.002, "loss": 2.3445, "step": 91200 }, { "epoch": 0.3525923520588827, "grad_norm": 0.12599629163742065, "learning_rate": 0.002, "loss": 2.3528, "step": 91210 }, { "epoch": 0.3526310092622659, "grad_norm": 0.09302575886249542, "learning_rate": 0.002, "loss": 2.35, "step": 91220 }, { "epoch": 0.35266966646564923, "grad_norm": 0.13696546852588654, "learning_rate": 0.002, "loss": 2.3556, "step": 91230 }, { "epoch": 0.3527083236690325, "grad_norm": 0.09611720591783524, "learning_rate": 0.002, "loss": 2.3482, "step": 91240 }, { "epoch": 0.35274698087241574, "grad_norm": 0.10214751213788986, "learning_rate": 0.002, "loss": 2.3426, "step": 91250 }, { "epoch": 0.35278563807579905, "grad_norm": 0.10894928127527237, "learning_rate": 0.002, "loss": 2.3581, "step": 91260 }, { "epoch": 0.3528242952791823, "grad_norm": 0.10802995413541794, "learning_rate": 0.002, "loss": 2.3428, "step": 91270 }, { "epoch": 0.3528629524825656, "grad_norm": 0.1036207526922226, "learning_rate": 0.002, "loss": 2.3461, "step": 91280 }, { "epoch": 0.35290160968594886, "grad_norm": 0.11407134681940079, "learning_rate": 0.002, "loss": 2.3535, "step": 91290 }, { "epoch": 0.35294026688933217, "grad_norm": 0.10703326016664505, "learning_rate": 0.002, "loss": 2.3605, "step": 91300 }, { "epoch": 0.3529789240927154, "grad_norm": 0.1012360006570816, "learning_rate": 0.002, "loss": 2.3455, "step": 91310 }, { "epoch": 0.3530175812960987, "grad_norm": 0.11459990590810776, "learning_rate": 0.002, "loss": 2.3521, "step": 91320 }, { "epoch": 0.353056238499482, "grad_norm": 0.1016920953989029, "learning_rate": 0.002, "loss": 2.3508, "step": 91330 }, { "epoch": 0.3530948957028653, "grad_norm": 0.11714129894971848, "learning_rate": 0.002, "loss": 2.3518, "step": 91340 }, { "epoch": 0.35313355290624854, "grad_norm": 0.10821188241243362, "learning_rate": 0.002, "loss": 2.364, "step": 91350 }, { "epoch": 0.35317221010963185, "grad_norm": 0.11678121238946915, "learning_rate": 0.002, "loss": 2.3591, "step": 91360 }, { "epoch": 0.3532108673130151, "grad_norm": 0.11290333420038223, "learning_rate": 0.002, "loss": 2.347, "step": 91370 }, { "epoch": 0.3532495245163984, "grad_norm": 0.10532896965742111, "learning_rate": 0.002, "loss": 2.356, "step": 91380 }, { "epoch": 0.35328818171978166, "grad_norm": 0.10636654496192932, "learning_rate": 0.002, "loss": 2.3547, "step": 91390 }, { "epoch": 0.35332683892316497, "grad_norm": 0.09490149468183517, "learning_rate": 0.002, "loss": 2.3431, "step": 91400 }, { "epoch": 0.3533654961265482, "grad_norm": 0.09918931126594543, "learning_rate": 0.002, "loss": 2.362, "step": 91410 }, { "epoch": 0.3534041533299315, "grad_norm": 0.10238192975521088, "learning_rate": 0.002, "loss": 2.3599, "step": 91420 }, { "epoch": 0.3534428105333148, "grad_norm": 0.12480217218399048, "learning_rate": 0.002, "loss": 2.3514, "step": 91430 }, { "epoch": 0.35348146773669803, "grad_norm": 0.10058391094207764, "learning_rate": 0.002, "loss": 2.3465, "step": 91440 }, { "epoch": 0.35352012494008134, "grad_norm": 0.10655000805854797, "learning_rate": 0.002, "loss": 2.3492, "step": 91450 }, { "epoch": 0.3535587821434646, "grad_norm": 0.10399568825960159, "learning_rate": 0.002, "loss": 2.3381, "step": 91460 }, { "epoch": 0.3535974393468479, "grad_norm": 0.1104901060461998, "learning_rate": 0.002, "loss": 2.3443, "step": 91470 }, { "epoch": 0.35363609655023115, "grad_norm": 0.11655103415250778, "learning_rate": 0.002, "loss": 2.3635, "step": 91480 }, { "epoch": 0.35367475375361446, "grad_norm": 0.11704276502132416, "learning_rate": 0.002, "loss": 2.3451, "step": 91490 }, { "epoch": 0.3537134109569977, "grad_norm": 0.10143926739692688, "learning_rate": 0.002, "loss": 2.3465, "step": 91500 }, { "epoch": 0.353752068160381, "grad_norm": 0.11378287523984909, "learning_rate": 0.002, "loss": 2.3457, "step": 91510 }, { "epoch": 0.35379072536376427, "grad_norm": 0.11769437789916992, "learning_rate": 0.002, "loss": 2.358, "step": 91520 }, { "epoch": 0.3538293825671476, "grad_norm": 0.10199916362762451, "learning_rate": 0.002, "loss": 2.3499, "step": 91530 }, { "epoch": 0.35386803977053083, "grad_norm": 0.12485165148973465, "learning_rate": 0.002, "loss": 2.3399, "step": 91540 }, { "epoch": 0.35390669697391414, "grad_norm": 0.12636171281337738, "learning_rate": 0.002, "loss": 2.3605, "step": 91550 }, { "epoch": 0.3539453541772974, "grad_norm": 0.10836539417505264, "learning_rate": 0.002, "loss": 2.3717, "step": 91560 }, { "epoch": 0.3539840113806807, "grad_norm": 0.12219023704528809, "learning_rate": 0.002, "loss": 2.3542, "step": 91570 }, { "epoch": 0.35402266858406395, "grad_norm": 0.1115482971072197, "learning_rate": 0.002, "loss": 2.3584, "step": 91580 }, { "epoch": 0.35406132578744726, "grad_norm": 0.09615295380353928, "learning_rate": 0.002, "loss": 2.3542, "step": 91590 }, { "epoch": 0.3540999829908305, "grad_norm": 0.11683586239814758, "learning_rate": 0.002, "loss": 2.3404, "step": 91600 }, { "epoch": 0.3541386401942138, "grad_norm": 0.10799077898263931, "learning_rate": 0.002, "loss": 2.3568, "step": 91610 }, { "epoch": 0.35417729739759707, "grad_norm": 0.1008571982383728, "learning_rate": 0.002, "loss": 2.3398, "step": 91620 }, { "epoch": 0.3542159546009803, "grad_norm": 0.12394775450229645, "learning_rate": 0.002, "loss": 2.3552, "step": 91630 }, { "epoch": 0.35425461180436363, "grad_norm": 0.0992874875664711, "learning_rate": 0.002, "loss": 2.3558, "step": 91640 }, { "epoch": 0.3542932690077469, "grad_norm": 0.10617563128471375, "learning_rate": 0.002, "loss": 2.3646, "step": 91650 }, { "epoch": 0.3543319262111302, "grad_norm": 0.11367245763540268, "learning_rate": 0.002, "loss": 2.3412, "step": 91660 }, { "epoch": 0.35437058341451344, "grad_norm": 0.10529954731464386, "learning_rate": 0.002, "loss": 2.3544, "step": 91670 }, { "epoch": 0.35440924061789675, "grad_norm": 0.10327640175819397, "learning_rate": 0.002, "loss": 2.3528, "step": 91680 }, { "epoch": 0.35444789782128, "grad_norm": 0.12126418948173523, "learning_rate": 0.002, "loss": 2.362, "step": 91690 }, { "epoch": 0.3544865550246633, "grad_norm": 0.10129111260175705, "learning_rate": 0.002, "loss": 2.3595, "step": 91700 }, { "epoch": 0.35452521222804656, "grad_norm": 0.11337399482727051, "learning_rate": 0.002, "loss": 2.3591, "step": 91710 }, { "epoch": 0.35456386943142987, "grad_norm": 0.1090293824672699, "learning_rate": 0.002, "loss": 2.3581, "step": 91720 }, { "epoch": 0.3546025266348131, "grad_norm": 0.11311867833137512, "learning_rate": 0.002, "loss": 2.3514, "step": 91730 }, { "epoch": 0.3546411838381964, "grad_norm": 0.11192495375871658, "learning_rate": 0.002, "loss": 2.351, "step": 91740 }, { "epoch": 0.3546798410415797, "grad_norm": 0.11630705744028091, "learning_rate": 0.002, "loss": 2.3476, "step": 91750 }, { "epoch": 0.354718498244963, "grad_norm": 0.09618744254112244, "learning_rate": 0.002, "loss": 2.3545, "step": 91760 }, { "epoch": 0.35475715544834624, "grad_norm": 0.10563170164823532, "learning_rate": 0.002, "loss": 2.3375, "step": 91770 }, { "epoch": 0.35479581265172955, "grad_norm": 0.10414636135101318, "learning_rate": 0.002, "loss": 2.3428, "step": 91780 }, { "epoch": 0.3548344698551128, "grad_norm": 0.10870873928070068, "learning_rate": 0.002, "loss": 2.3589, "step": 91790 }, { "epoch": 0.3548731270584961, "grad_norm": 0.1033695712685585, "learning_rate": 0.002, "loss": 2.3632, "step": 91800 }, { "epoch": 0.35491178426187936, "grad_norm": 0.10390590131282806, "learning_rate": 0.002, "loss": 2.3373, "step": 91810 }, { "epoch": 0.3549504414652626, "grad_norm": 0.11860582232475281, "learning_rate": 0.002, "loss": 2.3524, "step": 91820 }, { "epoch": 0.3549890986686459, "grad_norm": 0.10430839657783508, "learning_rate": 0.002, "loss": 2.3552, "step": 91830 }, { "epoch": 0.35502775587202917, "grad_norm": 0.10912937670946121, "learning_rate": 0.002, "loss": 2.342, "step": 91840 }, { "epoch": 0.3550664130754125, "grad_norm": 0.11180725693702698, "learning_rate": 0.002, "loss": 2.3376, "step": 91850 }, { "epoch": 0.35510507027879573, "grad_norm": 0.1123897135257721, "learning_rate": 0.002, "loss": 2.3614, "step": 91860 }, { "epoch": 0.35514372748217904, "grad_norm": 0.10792334377765656, "learning_rate": 0.002, "loss": 2.3434, "step": 91870 }, { "epoch": 0.3551823846855623, "grad_norm": 0.12448374181985855, "learning_rate": 0.002, "loss": 2.3547, "step": 91880 }, { "epoch": 0.3552210418889456, "grad_norm": 0.11364062875509262, "learning_rate": 0.002, "loss": 2.3474, "step": 91890 }, { "epoch": 0.35525969909232885, "grad_norm": 0.10941498726606369, "learning_rate": 0.002, "loss": 2.3498, "step": 91900 }, { "epoch": 0.35529835629571216, "grad_norm": 0.10665152221918106, "learning_rate": 0.002, "loss": 2.3673, "step": 91910 }, { "epoch": 0.3553370134990954, "grad_norm": 0.10886060446500778, "learning_rate": 0.002, "loss": 2.3433, "step": 91920 }, { "epoch": 0.3553756707024787, "grad_norm": 0.113824762403965, "learning_rate": 0.002, "loss": 2.3775, "step": 91930 }, { "epoch": 0.35541432790586197, "grad_norm": 0.11197502166032791, "learning_rate": 0.002, "loss": 2.3339, "step": 91940 }, { "epoch": 0.3554529851092453, "grad_norm": 0.11675450950860977, "learning_rate": 0.002, "loss": 2.353, "step": 91950 }, { "epoch": 0.35549164231262853, "grad_norm": 0.12733623385429382, "learning_rate": 0.002, "loss": 2.3565, "step": 91960 }, { "epoch": 0.35553029951601184, "grad_norm": 0.12649857997894287, "learning_rate": 0.002, "loss": 2.3361, "step": 91970 }, { "epoch": 0.3555689567193951, "grad_norm": 0.10965298861265182, "learning_rate": 0.002, "loss": 2.3472, "step": 91980 }, { "epoch": 0.35560761392277834, "grad_norm": 0.11883385479450226, "learning_rate": 0.002, "loss": 2.3688, "step": 91990 }, { "epoch": 0.35564627112616165, "grad_norm": 0.10928390920162201, "learning_rate": 0.002, "loss": 2.332, "step": 92000 }, { "epoch": 0.3556849283295449, "grad_norm": 0.10265020281076431, "learning_rate": 0.002, "loss": 2.3601, "step": 92010 }, { "epoch": 0.3557235855329282, "grad_norm": 0.11968455463647842, "learning_rate": 0.002, "loss": 2.3545, "step": 92020 }, { "epoch": 0.35576224273631146, "grad_norm": 0.1736990213394165, "learning_rate": 0.002, "loss": 2.3518, "step": 92030 }, { "epoch": 0.35580089993969477, "grad_norm": 0.10316959768533707, "learning_rate": 0.002, "loss": 2.3738, "step": 92040 }, { "epoch": 0.355839557143078, "grad_norm": 0.10016027837991714, "learning_rate": 0.002, "loss": 2.3622, "step": 92050 }, { "epoch": 0.35587821434646133, "grad_norm": 0.11082405596971512, "learning_rate": 0.002, "loss": 2.3375, "step": 92060 }, { "epoch": 0.3559168715498446, "grad_norm": 0.11873820424079895, "learning_rate": 0.002, "loss": 2.3659, "step": 92070 }, { "epoch": 0.3559555287532279, "grad_norm": 0.13147886097431183, "learning_rate": 0.002, "loss": 2.3506, "step": 92080 }, { "epoch": 0.35599418595661114, "grad_norm": 0.29662472009658813, "learning_rate": 0.002, "loss": 2.3529, "step": 92090 }, { "epoch": 0.35603284315999445, "grad_norm": 0.1175631433725357, "learning_rate": 0.002, "loss": 2.3533, "step": 92100 }, { "epoch": 0.3560715003633777, "grad_norm": 0.10309901833534241, "learning_rate": 0.002, "loss": 2.3626, "step": 92110 }, { "epoch": 0.356110157566761, "grad_norm": 0.0919211357831955, "learning_rate": 0.002, "loss": 2.34, "step": 92120 }, { "epoch": 0.35614881477014426, "grad_norm": 0.09981719404459, "learning_rate": 0.002, "loss": 2.3381, "step": 92130 }, { "epoch": 0.35618747197352757, "grad_norm": 0.1206381544470787, "learning_rate": 0.002, "loss": 2.3414, "step": 92140 }, { "epoch": 0.3562261291769108, "grad_norm": 0.12230429798364639, "learning_rate": 0.002, "loss": 2.3286, "step": 92150 }, { "epoch": 0.35626478638029413, "grad_norm": 0.10440311580896378, "learning_rate": 0.002, "loss": 2.357, "step": 92160 }, { "epoch": 0.3563034435836774, "grad_norm": 0.11227347701787949, "learning_rate": 0.002, "loss": 2.349, "step": 92170 }, { "epoch": 0.35634210078706063, "grad_norm": 0.1011897549033165, "learning_rate": 0.002, "loss": 2.3524, "step": 92180 }, { "epoch": 0.35638075799044394, "grad_norm": 0.11445823311805725, "learning_rate": 0.002, "loss": 2.366, "step": 92190 }, { "epoch": 0.3564194151938272, "grad_norm": 0.0916280522942543, "learning_rate": 0.002, "loss": 2.3551, "step": 92200 }, { "epoch": 0.3564580723972105, "grad_norm": 0.1168455183506012, "learning_rate": 0.002, "loss": 2.3498, "step": 92210 }, { "epoch": 0.35649672960059375, "grad_norm": 0.12422209978103638, "learning_rate": 0.002, "loss": 2.348, "step": 92220 }, { "epoch": 0.35653538680397706, "grad_norm": 0.10890578478574753, "learning_rate": 0.002, "loss": 2.3701, "step": 92230 }, { "epoch": 0.3565740440073603, "grad_norm": 0.11009981483221054, "learning_rate": 0.002, "loss": 2.3597, "step": 92240 }, { "epoch": 0.3566127012107436, "grad_norm": 0.0971464216709137, "learning_rate": 0.002, "loss": 2.3392, "step": 92250 }, { "epoch": 0.3566513584141269, "grad_norm": 0.10866507887840271, "learning_rate": 0.002, "loss": 2.3507, "step": 92260 }, { "epoch": 0.3566900156175102, "grad_norm": 0.10748264193534851, "learning_rate": 0.002, "loss": 2.3545, "step": 92270 }, { "epoch": 0.35672867282089343, "grad_norm": 0.12036815285682678, "learning_rate": 0.002, "loss": 2.3427, "step": 92280 }, { "epoch": 0.35676733002427674, "grad_norm": 0.1034461110830307, "learning_rate": 0.002, "loss": 2.3518, "step": 92290 }, { "epoch": 0.35680598722766, "grad_norm": 0.13951030373573303, "learning_rate": 0.002, "loss": 2.3743, "step": 92300 }, { "epoch": 0.3568446444310433, "grad_norm": 0.0967438593506813, "learning_rate": 0.002, "loss": 2.3665, "step": 92310 }, { "epoch": 0.35688330163442655, "grad_norm": 0.10530319064855576, "learning_rate": 0.002, "loss": 2.3465, "step": 92320 }, { "epoch": 0.35692195883780986, "grad_norm": 0.10578849166631699, "learning_rate": 0.002, "loss": 2.3465, "step": 92330 }, { "epoch": 0.3569606160411931, "grad_norm": 0.10831478238105774, "learning_rate": 0.002, "loss": 2.3431, "step": 92340 }, { "epoch": 0.3569992732445764, "grad_norm": 0.10773373395204544, "learning_rate": 0.002, "loss": 2.3726, "step": 92350 }, { "epoch": 0.3570379304479597, "grad_norm": 0.11242599040269852, "learning_rate": 0.002, "loss": 2.3544, "step": 92360 }, { "epoch": 0.3570765876513429, "grad_norm": 0.12345028668642044, "learning_rate": 0.002, "loss": 2.3494, "step": 92370 }, { "epoch": 0.35711524485472623, "grad_norm": 0.10369107127189636, "learning_rate": 0.002, "loss": 2.354, "step": 92380 }, { "epoch": 0.3571539020581095, "grad_norm": 0.1744854897260666, "learning_rate": 0.002, "loss": 2.3592, "step": 92390 }, { "epoch": 0.3571925592614928, "grad_norm": 0.09829877316951752, "learning_rate": 0.002, "loss": 2.3398, "step": 92400 }, { "epoch": 0.35723121646487604, "grad_norm": 0.12784981727600098, "learning_rate": 0.002, "loss": 2.3539, "step": 92410 }, { "epoch": 0.35726987366825935, "grad_norm": 0.10792503505945206, "learning_rate": 0.002, "loss": 2.3614, "step": 92420 }, { "epoch": 0.3573085308716426, "grad_norm": 0.10034052282571793, "learning_rate": 0.002, "loss": 2.3408, "step": 92430 }, { "epoch": 0.3573471880750259, "grad_norm": 0.13357554376125336, "learning_rate": 0.002, "loss": 2.357, "step": 92440 }, { "epoch": 0.35738584527840916, "grad_norm": 0.28310683369636536, "learning_rate": 0.002, "loss": 2.3505, "step": 92450 }, { "epoch": 0.35742450248179247, "grad_norm": 0.1128840446472168, "learning_rate": 0.002, "loss": 2.3569, "step": 92460 }, { "epoch": 0.3574631596851757, "grad_norm": 0.14639827609062195, "learning_rate": 0.002, "loss": 2.3529, "step": 92470 }, { "epoch": 0.35750181688855903, "grad_norm": 0.11009307205677032, "learning_rate": 0.002, "loss": 2.3455, "step": 92480 }, { "epoch": 0.3575404740919423, "grad_norm": 0.10203824937343597, "learning_rate": 0.002, "loss": 2.3532, "step": 92490 }, { "epoch": 0.3575791312953256, "grad_norm": 0.11197981238365173, "learning_rate": 0.002, "loss": 2.3412, "step": 92500 }, { "epoch": 0.35761778849870884, "grad_norm": 0.1011284589767456, "learning_rate": 0.002, "loss": 2.3434, "step": 92510 }, { "epoch": 0.35765644570209215, "grad_norm": 0.11905599385499954, "learning_rate": 0.002, "loss": 2.354, "step": 92520 }, { "epoch": 0.3576951029054754, "grad_norm": 0.10511179268360138, "learning_rate": 0.002, "loss": 2.3629, "step": 92530 }, { "epoch": 0.3577337601088587, "grad_norm": 0.10166637599468231, "learning_rate": 0.002, "loss": 2.3434, "step": 92540 }, { "epoch": 0.35777241731224196, "grad_norm": 0.11526691913604736, "learning_rate": 0.002, "loss": 2.3454, "step": 92550 }, { "epoch": 0.3578110745156252, "grad_norm": 0.12250806391239166, "learning_rate": 0.002, "loss": 2.3349, "step": 92560 }, { "epoch": 0.3578497317190085, "grad_norm": 0.10860671103000641, "learning_rate": 0.002, "loss": 2.371, "step": 92570 }, { "epoch": 0.3578883889223918, "grad_norm": 0.11007049679756165, "learning_rate": 0.002, "loss": 2.3603, "step": 92580 }, { "epoch": 0.3579270461257751, "grad_norm": 0.11075068265199661, "learning_rate": 0.002, "loss": 2.3588, "step": 92590 }, { "epoch": 0.35796570332915834, "grad_norm": 0.09433702379465103, "learning_rate": 0.002, "loss": 2.3437, "step": 92600 }, { "epoch": 0.35800436053254164, "grad_norm": 0.09903281182050705, "learning_rate": 0.002, "loss": 2.3641, "step": 92610 }, { "epoch": 0.3580430177359249, "grad_norm": 0.12536199390888214, "learning_rate": 0.002, "loss": 2.3419, "step": 92620 }, { "epoch": 0.3580816749393082, "grad_norm": 0.10655872523784637, "learning_rate": 0.002, "loss": 2.3441, "step": 92630 }, { "epoch": 0.35812033214269146, "grad_norm": 0.09485611319541931, "learning_rate": 0.002, "loss": 2.3402, "step": 92640 }, { "epoch": 0.35815898934607476, "grad_norm": 0.11137279123067856, "learning_rate": 0.002, "loss": 2.3451, "step": 92650 }, { "epoch": 0.358197646549458, "grad_norm": 0.11436645686626434, "learning_rate": 0.002, "loss": 2.3583, "step": 92660 }, { "epoch": 0.3582363037528413, "grad_norm": 0.12434104084968567, "learning_rate": 0.002, "loss": 2.3636, "step": 92670 }, { "epoch": 0.3582749609562246, "grad_norm": 0.10417872667312622, "learning_rate": 0.002, "loss": 2.3479, "step": 92680 }, { "epoch": 0.3583136181596079, "grad_norm": 0.10744566470384598, "learning_rate": 0.002, "loss": 2.3484, "step": 92690 }, { "epoch": 0.35835227536299114, "grad_norm": 0.09762832522392273, "learning_rate": 0.002, "loss": 2.3604, "step": 92700 }, { "epoch": 0.35839093256637444, "grad_norm": 0.12386251240968704, "learning_rate": 0.002, "loss": 2.3504, "step": 92710 }, { "epoch": 0.3584295897697577, "grad_norm": 0.09850963950157166, "learning_rate": 0.002, "loss": 2.3537, "step": 92720 }, { "epoch": 0.358468246973141, "grad_norm": 0.12494229525327682, "learning_rate": 0.002, "loss": 2.3566, "step": 92730 }, { "epoch": 0.35850690417652425, "grad_norm": 0.1048731803894043, "learning_rate": 0.002, "loss": 2.3369, "step": 92740 }, { "epoch": 0.3585455613799075, "grad_norm": 0.11765920370817184, "learning_rate": 0.002, "loss": 2.3748, "step": 92750 }, { "epoch": 0.3585842185832908, "grad_norm": 0.11133372783660889, "learning_rate": 0.002, "loss": 2.3525, "step": 92760 }, { "epoch": 0.35862287578667407, "grad_norm": 0.10941710323095322, "learning_rate": 0.002, "loss": 2.3691, "step": 92770 }, { "epoch": 0.3586615329900574, "grad_norm": 0.10292758792638779, "learning_rate": 0.002, "loss": 2.3583, "step": 92780 }, { "epoch": 0.3587001901934406, "grad_norm": 0.09566858410835266, "learning_rate": 0.002, "loss": 2.3568, "step": 92790 }, { "epoch": 0.35873884739682393, "grad_norm": 0.1483062505722046, "learning_rate": 0.002, "loss": 2.3578, "step": 92800 }, { "epoch": 0.3587775046002072, "grad_norm": 0.11509834229946136, "learning_rate": 0.002, "loss": 2.3458, "step": 92810 }, { "epoch": 0.3588161618035905, "grad_norm": 0.09733151644468307, "learning_rate": 0.002, "loss": 2.3717, "step": 92820 }, { "epoch": 0.35885481900697375, "grad_norm": 0.09017828851938248, "learning_rate": 0.002, "loss": 2.3558, "step": 92830 }, { "epoch": 0.35889347621035705, "grad_norm": 0.10793974995613098, "learning_rate": 0.002, "loss": 2.3553, "step": 92840 }, { "epoch": 0.3589321334137403, "grad_norm": 0.10240405797958374, "learning_rate": 0.002, "loss": 2.3558, "step": 92850 }, { "epoch": 0.3589707906171236, "grad_norm": 0.09181945770978928, "learning_rate": 0.002, "loss": 2.3628, "step": 92860 }, { "epoch": 0.35900944782050687, "grad_norm": 0.1125139519572258, "learning_rate": 0.002, "loss": 2.3406, "step": 92870 }, { "epoch": 0.3590481050238902, "grad_norm": 0.09814155846834183, "learning_rate": 0.002, "loss": 2.3412, "step": 92880 }, { "epoch": 0.3590867622272734, "grad_norm": 0.12032115459442139, "learning_rate": 0.002, "loss": 2.3487, "step": 92890 }, { "epoch": 0.35912541943065673, "grad_norm": 0.10703227669000626, "learning_rate": 0.002, "loss": 2.3468, "step": 92900 }, { "epoch": 0.35916407663404, "grad_norm": 0.11409148573875427, "learning_rate": 0.002, "loss": 2.3504, "step": 92910 }, { "epoch": 0.35920273383742324, "grad_norm": 0.11414051055908203, "learning_rate": 0.002, "loss": 2.3544, "step": 92920 }, { "epoch": 0.35924139104080655, "grad_norm": 0.10449480265378952, "learning_rate": 0.002, "loss": 2.3505, "step": 92930 }, { "epoch": 0.3592800482441898, "grad_norm": 0.1115059182047844, "learning_rate": 0.002, "loss": 2.3375, "step": 92940 }, { "epoch": 0.3593187054475731, "grad_norm": 0.11202369630336761, "learning_rate": 0.002, "loss": 2.3652, "step": 92950 }, { "epoch": 0.35935736265095636, "grad_norm": 0.10093695670366287, "learning_rate": 0.002, "loss": 2.3574, "step": 92960 }, { "epoch": 0.35939601985433967, "grad_norm": 0.10710848122835159, "learning_rate": 0.002, "loss": 2.3385, "step": 92970 }, { "epoch": 0.3594346770577229, "grad_norm": 0.11425944417715073, "learning_rate": 0.002, "loss": 2.3426, "step": 92980 }, { "epoch": 0.3594733342611062, "grad_norm": 0.12227421253919601, "learning_rate": 0.002, "loss": 2.3515, "step": 92990 }, { "epoch": 0.3595119914644895, "grad_norm": 0.10208730399608612, "learning_rate": 0.002, "loss": 2.3605, "step": 93000 }, { "epoch": 0.3595506486678728, "grad_norm": 0.1007249504327774, "learning_rate": 0.002, "loss": 2.3646, "step": 93010 }, { "epoch": 0.35958930587125604, "grad_norm": 0.10328896343708038, "learning_rate": 0.002, "loss": 2.352, "step": 93020 }, { "epoch": 0.35962796307463935, "grad_norm": 0.10905642807483673, "learning_rate": 0.002, "loss": 2.3499, "step": 93030 }, { "epoch": 0.3596666202780226, "grad_norm": 0.11170773953199387, "learning_rate": 0.002, "loss": 2.3414, "step": 93040 }, { "epoch": 0.3597052774814059, "grad_norm": 0.09574315696954727, "learning_rate": 0.002, "loss": 2.3437, "step": 93050 }, { "epoch": 0.35974393468478916, "grad_norm": 0.1102205142378807, "learning_rate": 0.002, "loss": 2.3503, "step": 93060 }, { "epoch": 0.35978259188817246, "grad_norm": 0.10546736419200897, "learning_rate": 0.002, "loss": 2.3448, "step": 93070 }, { "epoch": 0.3598212490915557, "grad_norm": 0.11542915552854538, "learning_rate": 0.002, "loss": 2.3553, "step": 93080 }, { "epoch": 0.359859906294939, "grad_norm": 0.11506014317274094, "learning_rate": 0.002, "loss": 2.3529, "step": 93090 }, { "epoch": 0.3598985634983223, "grad_norm": 0.09840735793113708, "learning_rate": 0.002, "loss": 2.3594, "step": 93100 }, { "epoch": 0.35993722070170553, "grad_norm": 0.09480399638414383, "learning_rate": 0.002, "loss": 2.3487, "step": 93110 }, { "epoch": 0.35997587790508884, "grad_norm": 0.12192349880933762, "learning_rate": 0.002, "loss": 2.336, "step": 93120 }, { "epoch": 0.3600145351084721, "grad_norm": 0.103517085313797, "learning_rate": 0.002, "loss": 2.3587, "step": 93130 }, { "epoch": 0.3600531923118554, "grad_norm": 0.10403070598840714, "learning_rate": 0.002, "loss": 2.3486, "step": 93140 }, { "epoch": 0.36009184951523865, "grad_norm": 0.09937585890293121, "learning_rate": 0.002, "loss": 2.3556, "step": 93150 }, { "epoch": 0.36013050671862196, "grad_norm": 0.10563259571790695, "learning_rate": 0.002, "loss": 2.3465, "step": 93160 }, { "epoch": 0.3601691639220052, "grad_norm": 0.11061891913414001, "learning_rate": 0.002, "loss": 2.3672, "step": 93170 }, { "epoch": 0.3602078211253885, "grad_norm": 0.11007753014564514, "learning_rate": 0.002, "loss": 2.3692, "step": 93180 }, { "epoch": 0.36024647832877177, "grad_norm": 0.111729197204113, "learning_rate": 0.002, "loss": 2.3341, "step": 93190 }, { "epoch": 0.3602851355321551, "grad_norm": 0.09627048671245575, "learning_rate": 0.002, "loss": 2.3533, "step": 93200 }, { "epoch": 0.36032379273553833, "grad_norm": 0.12040142714977264, "learning_rate": 0.002, "loss": 2.349, "step": 93210 }, { "epoch": 0.36036244993892164, "grad_norm": 0.11313582956790924, "learning_rate": 0.002, "loss": 2.3522, "step": 93220 }, { "epoch": 0.3604011071423049, "grad_norm": 0.11437612771987915, "learning_rate": 0.002, "loss": 2.3474, "step": 93230 }, { "epoch": 0.3604397643456882, "grad_norm": 0.10665919631719589, "learning_rate": 0.002, "loss": 2.3587, "step": 93240 }, { "epoch": 0.36047842154907145, "grad_norm": 0.10124839842319489, "learning_rate": 0.002, "loss": 2.3605, "step": 93250 }, { "epoch": 0.36051707875245476, "grad_norm": 0.11167119443416595, "learning_rate": 0.002, "loss": 2.3605, "step": 93260 }, { "epoch": 0.360555735955838, "grad_norm": 0.09860999882221222, "learning_rate": 0.002, "loss": 2.352, "step": 93270 }, { "epoch": 0.3605943931592213, "grad_norm": 0.11736667901277542, "learning_rate": 0.002, "loss": 2.3604, "step": 93280 }, { "epoch": 0.36063305036260457, "grad_norm": 0.10274846106767654, "learning_rate": 0.002, "loss": 2.3543, "step": 93290 }, { "epoch": 0.3606717075659878, "grad_norm": 0.10463331639766693, "learning_rate": 0.002, "loss": 2.3455, "step": 93300 }, { "epoch": 0.36071036476937113, "grad_norm": 0.10493109375238419, "learning_rate": 0.002, "loss": 2.3552, "step": 93310 }, { "epoch": 0.3607490219727544, "grad_norm": 0.09283468127250671, "learning_rate": 0.002, "loss": 2.3504, "step": 93320 }, { "epoch": 0.3607876791761377, "grad_norm": 0.11488457024097443, "learning_rate": 0.002, "loss": 2.3528, "step": 93330 }, { "epoch": 0.36082633637952094, "grad_norm": 0.11543713510036469, "learning_rate": 0.002, "loss": 2.3517, "step": 93340 }, { "epoch": 0.36086499358290425, "grad_norm": 0.10279402881860733, "learning_rate": 0.002, "loss": 2.338, "step": 93350 }, { "epoch": 0.3609036507862875, "grad_norm": 0.09951184689998627, "learning_rate": 0.002, "loss": 2.3351, "step": 93360 }, { "epoch": 0.3609423079896708, "grad_norm": 0.12271188944578171, "learning_rate": 0.002, "loss": 2.344, "step": 93370 }, { "epoch": 0.36098096519305406, "grad_norm": 0.09874992072582245, "learning_rate": 0.002, "loss": 2.3624, "step": 93380 }, { "epoch": 0.36101962239643737, "grad_norm": 0.09354733675718307, "learning_rate": 0.002, "loss": 2.3561, "step": 93390 }, { "epoch": 0.3610582795998206, "grad_norm": 0.1220618411898613, "learning_rate": 0.002, "loss": 2.3382, "step": 93400 }, { "epoch": 0.3610969368032039, "grad_norm": 0.1061968058347702, "learning_rate": 0.002, "loss": 2.3527, "step": 93410 }, { "epoch": 0.3611355940065872, "grad_norm": 0.10884738713502884, "learning_rate": 0.002, "loss": 2.3604, "step": 93420 }, { "epoch": 0.3611742512099705, "grad_norm": 0.11903372406959534, "learning_rate": 0.002, "loss": 2.3522, "step": 93430 }, { "epoch": 0.36121290841335374, "grad_norm": 0.11023060977458954, "learning_rate": 0.002, "loss": 2.3532, "step": 93440 }, { "epoch": 0.36125156561673705, "grad_norm": 0.10786955803632736, "learning_rate": 0.002, "loss": 2.3525, "step": 93450 }, { "epoch": 0.3612902228201203, "grad_norm": 0.14083674550056458, "learning_rate": 0.002, "loss": 2.3487, "step": 93460 }, { "epoch": 0.3613288800235036, "grad_norm": 0.10121428966522217, "learning_rate": 0.002, "loss": 2.3576, "step": 93470 }, { "epoch": 0.36136753722688686, "grad_norm": 0.1078413799405098, "learning_rate": 0.002, "loss": 2.3561, "step": 93480 }, { "epoch": 0.3614061944302701, "grad_norm": 0.1193716898560524, "learning_rate": 0.002, "loss": 2.3381, "step": 93490 }, { "epoch": 0.3614448516336534, "grad_norm": 0.10673259943723679, "learning_rate": 0.002, "loss": 2.3568, "step": 93500 }, { "epoch": 0.36148350883703667, "grad_norm": 0.10572133958339691, "learning_rate": 0.002, "loss": 2.3602, "step": 93510 }, { "epoch": 0.36152216604042, "grad_norm": 0.10543181002140045, "learning_rate": 0.002, "loss": 2.3412, "step": 93520 }, { "epoch": 0.36156082324380323, "grad_norm": 0.09912768006324768, "learning_rate": 0.002, "loss": 2.3561, "step": 93530 }, { "epoch": 0.36159948044718654, "grad_norm": 0.09820456057786942, "learning_rate": 0.002, "loss": 2.3485, "step": 93540 }, { "epoch": 0.3616381376505698, "grad_norm": 0.10524751991033554, "learning_rate": 0.002, "loss": 2.354, "step": 93550 }, { "epoch": 0.3616767948539531, "grad_norm": 0.09994732588529587, "learning_rate": 0.002, "loss": 2.3489, "step": 93560 }, { "epoch": 0.36171545205733635, "grad_norm": 0.10982286185026169, "learning_rate": 0.002, "loss": 2.3504, "step": 93570 }, { "epoch": 0.36175410926071966, "grad_norm": 0.09946732968091965, "learning_rate": 0.002, "loss": 2.3566, "step": 93580 }, { "epoch": 0.3617927664641029, "grad_norm": 0.09362184256315231, "learning_rate": 0.002, "loss": 2.3413, "step": 93590 }, { "epoch": 0.3618314236674862, "grad_norm": 0.10326167941093445, "learning_rate": 0.002, "loss": 2.349, "step": 93600 }, { "epoch": 0.36187008087086947, "grad_norm": 0.12770886719226837, "learning_rate": 0.002, "loss": 2.3512, "step": 93610 }, { "epoch": 0.3619087380742528, "grad_norm": 0.10822170972824097, "learning_rate": 0.002, "loss": 2.344, "step": 93620 }, { "epoch": 0.36194739527763603, "grad_norm": 0.09560711681842804, "learning_rate": 0.002, "loss": 2.3402, "step": 93630 }, { "epoch": 0.36198605248101934, "grad_norm": 0.1178121343255043, "learning_rate": 0.002, "loss": 2.3526, "step": 93640 }, { "epoch": 0.3620247096844026, "grad_norm": 0.11329429596662521, "learning_rate": 0.002, "loss": 2.3668, "step": 93650 }, { "epoch": 0.36206336688778584, "grad_norm": 0.11173105239868164, "learning_rate": 0.002, "loss": 2.3545, "step": 93660 }, { "epoch": 0.36210202409116915, "grad_norm": 0.09975706785917282, "learning_rate": 0.002, "loss": 2.3368, "step": 93670 }, { "epoch": 0.3621406812945524, "grad_norm": 0.10781252384185791, "learning_rate": 0.002, "loss": 2.3575, "step": 93680 }, { "epoch": 0.3621793384979357, "grad_norm": 0.11460427939891815, "learning_rate": 0.002, "loss": 2.3504, "step": 93690 }, { "epoch": 0.36221799570131896, "grad_norm": 0.10809577256441116, "learning_rate": 0.002, "loss": 2.3696, "step": 93700 }, { "epoch": 0.36225665290470227, "grad_norm": 0.1188875287771225, "learning_rate": 0.002, "loss": 2.3578, "step": 93710 }, { "epoch": 0.3622953101080855, "grad_norm": 0.128997340798378, "learning_rate": 0.002, "loss": 2.3473, "step": 93720 }, { "epoch": 0.36233396731146883, "grad_norm": 0.1061416044831276, "learning_rate": 0.002, "loss": 2.3528, "step": 93730 }, { "epoch": 0.3623726245148521, "grad_norm": 0.10427704453468323, "learning_rate": 0.002, "loss": 2.3388, "step": 93740 }, { "epoch": 0.3624112817182354, "grad_norm": 0.10611186176538467, "learning_rate": 0.002, "loss": 2.3702, "step": 93750 }, { "epoch": 0.36244993892161864, "grad_norm": 0.10901859402656555, "learning_rate": 0.002, "loss": 2.3373, "step": 93760 }, { "epoch": 0.36248859612500195, "grad_norm": 0.13348504900932312, "learning_rate": 0.002, "loss": 2.3502, "step": 93770 }, { "epoch": 0.3625272533283852, "grad_norm": 0.09349879622459412, "learning_rate": 0.002, "loss": 2.3625, "step": 93780 }, { "epoch": 0.3625659105317685, "grad_norm": 0.10900308191776276, "learning_rate": 0.002, "loss": 2.3447, "step": 93790 }, { "epoch": 0.36260456773515176, "grad_norm": 0.12062409520149231, "learning_rate": 0.002, "loss": 2.3583, "step": 93800 }, { "epoch": 0.36264322493853507, "grad_norm": 0.12152603268623352, "learning_rate": 0.002, "loss": 2.3519, "step": 93810 }, { "epoch": 0.3626818821419183, "grad_norm": 0.09724075347185135, "learning_rate": 0.002, "loss": 2.3619, "step": 93820 }, { "epoch": 0.36272053934530163, "grad_norm": 0.10395587980747223, "learning_rate": 0.002, "loss": 2.3516, "step": 93830 }, { "epoch": 0.3627591965486849, "grad_norm": 0.0977049246430397, "learning_rate": 0.002, "loss": 2.3559, "step": 93840 }, { "epoch": 0.36279785375206813, "grad_norm": 0.10224491357803345, "learning_rate": 0.002, "loss": 2.3536, "step": 93850 }, { "epoch": 0.36283651095545144, "grad_norm": 0.1509615033864975, "learning_rate": 0.002, "loss": 2.347, "step": 93860 }, { "epoch": 0.3628751681588347, "grad_norm": 0.1311606913805008, "learning_rate": 0.002, "loss": 2.365, "step": 93870 }, { "epoch": 0.362913825362218, "grad_norm": 0.11439214646816254, "learning_rate": 0.002, "loss": 2.3474, "step": 93880 }, { "epoch": 0.36295248256560125, "grad_norm": 0.1027848944067955, "learning_rate": 0.002, "loss": 2.3499, "step": 93890 }, { "epoch": 0.36299113976898456, "grad_norm": 0.11457744240760803, "learning_rate": 0.002, "loss": 2.3574, "step": 93900 }, { "epoch": 0.3630297969723678, "grad_norm": 0.1078697144985199, "learning_rate": 0.002, "loss": 2.3373, "step": 93910 }, { "epoch": 0.3630684541757511, "grad_norm": 0.09145821630954742, "learning_rate": 0.002, "loss": 2.372, "step": 93920 }, { "epoch": 0.3631071113791344, "grad_norm": 0.1060747280716896, "learning_rate": 0.002, "loss": 2.3364, "step": 93930 }, { "epoch": 0.3631457685825177, "grad_norm": 0.1224551796913147, "learning_rate": 0.002, "loss": 2.368, "step": 93940 }, { "epoch": 0.36318442578590093, "grad_norm": 0.10747087746858597, "learning_rate": 0.002, "loss": 2.363, "step": 93950 }, { "epoch": 0.36322308298928424, "grad_norm": 0.1146809458732605, "learning_rate": 0.002, "loss": 2.3491, "step": 93960 }, { "epoch": 0.3632617401926675, "grad_norm": 0.12076695263385773, "learning_rate": 0.002, "loss": 2.3484, "step": 93970 }, { "epoch": 0.3633003973960508, "grad_norm": 0.10855092853307724, "learning_rate": 0.002, "loss": 2.3586, "step": 93980 }, { "epoch": 0.36333905459943405, "grad_norm": 0.10646027326583862, "learning_rate": 0.002, "loss": 2.3575, "step": 93990 }, { "epoch": 0.36337771180281736, "grad_norm": 0.11337734013795853, "learning_rate": 0.002, "loss": 2.3672, "step": 94000 }, { "epoch": 0.3634163690062006, "grad_norm": 0.10778078436851501, "learning_rate": 0.002, "loss": 2.3509, "step": 94010 }, { "epoch": 0.3634550262095839, "grad_norm": 0.10317351669073105, "learning_rate": 0.002, "loss": 2.3405, "step": 94020 }, { "epoch": 0.3634936834129672, "grad_norm": 0.12111659348011017, "learning_rate": 0.002, "loss": 2.3449, "step": 94030 }, { "epoch": 0.3635323406163504, "grad_norm": 0.10069271922111511, "learning_rate": 0.002, "loss": 2.3472, "step": 94040 }, { "epoch": 0.36357099781973373, "grad_norm": 0.09737294912338257, "learning_rate": 0.002, "loss": 2.3477, "step": 94050 }, { "epoch": 0.363609655023117, "grad_norm": 0.09887672960758209, "learning_rate": 0.002, "loss": 2.3493, "step": 94060 }, { "epoch": 0.3636483122265003, "grad_norm": 0.0944209098815918, "learning_rate": 0.002, "loss": 2.3347, "step": 94070 }, { "epoch": 0.36368696942988354, "grad_norm": 0.11983916163444519, "learning_rate": 0.002, "loss": 2.3717, "step": 94080 }, { "epoch": 0.36372562663326685, "grad_norm": 0.12243504822254181, "learning_rate": 0.002, "loss": 2.3604, "step": 94090 }, { "epoch": 0.3637642838366501, "grad_norm": 0.12480605393648148, "learning_rate": 0.002, "loss": 2.3378, "step": 94100 }, { "epoch": 0.3638029410400334, "grad_norm": 0.09351043403148651, "learning_rate": 0.002, "loss": 2.3578, "step": 94110 }, { "epoch": 0.36384159824341666, "grad_norm": 0.09889024496078491, "learning_rate": 0.002, "loss": 2.3416, "step": 94120 }, { "epoch": 0.36388025544679997, "grad_norm": 0.11944614350795746, "learning_rate": 0.002, "loss": 2.3438, "step": 94130 }, { "epoch": 0.3639189126501832, "grad_norm": 0.12215811014175415, "learning_rate": 0.002, "loss": 2.3451, "step": 94140 }, { "epoch": 0.36395756985356653, "grad_norm": 0.11237761378288269, "learning_rate": 0.002, "loss": 2.3643, "step": 94150 }, { "epoch": 0.3639962270569498, "grad_norm": 0.09652663767337799, "learning_rate": 0.002, "loss": 2.3469, "step": 94160 }, { "epoch": 0.3640348842603331, "grad_norm": 0.11002325266599655, "learning_rate": 0.002, "loss": 2.3507, "step": 94170 }, { "epoch": 0.36407354146371634, "grad_norm": 0.10044171661138535, "learning_rate": 0.002, "loss": 2.3676, "step": 94180 }, { "epoch": 0.36411219866709965, "grad_norm": 0.12201712280511856, "learning_rate": 0.002, "loss": 2.3496, "step": 94190 }, { "epoch": 0.3641508558704829, "grad_norm": 0.0983533188700676, "learning_rate": 0.002, "loss": 2.3461, "step": 94200 }, { "epoch": 0.3641895130738662, "grad_norm": 0.11045312136411667, "learning_rate": 0.002, "loss": 2.3424, "step": 94210 }, { "epoch": 0.36422817027724946, "grad_norm": 0.10729973763227463, "learning_rate": 0.002, "loss": 2.3422, "step": 94220 }, { "epoch": 0.3642668274806327, "grad_norm": 0.11313141137361526, "learning_rate": 0.002, "loss": 2.3538, "step": 94230 }, { "epoch": 0.364305484684016, "grad_norm": 0.11352071911096573, "learning_rate": 0.002, "loss": 2.3574, "step": 94240 }, { "epoch": 0.3643441418873993, "grad_norm": 0.11024627834558487, "learning_rate": 0.002, "loss": 2.3549, "step": 94250 }, { "epoch": 0.3643827990907826, "grad_norm": 0.12387119978666306, "learning_rate": 0.002, "loss": 2.3324, "step": 94260 }, { "epoch": 0.36442145629416584, "grad_norm": 0.1023462638258934, "learning_rate": 0.002, "loss": 2.3592, "step": 94270 }, { "epoch": 0.36446011349754914, "grad_norm": 0.11274252831935883, "learning_rate": 0.002, "loss": 2.338, "step": 94280 }, { "epoch": 0.3644987707009324, "grad_norm": 0.09633929282426834, "learning_rate": 0.002, "loss": 2.35, "step": 94290 }, { "epoch": 0.3645374279043157, "grad_norm": 0.10494520515203476, "learning_rate": 0.002, "loss": 2.3669, "step": 94300 }, { "epoch": 0.36457608510769896, "grad_norm": 0.11165018379688263, "learning_rate": 0.002, "loss": 2.3346, "step": 94310 }, { "epoch": 0.36461474231108226, "grad_norm": 0.10920723527669907, "learning_rate": 0.002, "loss": 2.3415, "step": 94320 }, { "epoch": 0.3646533995144655, "grad_norm": 0.12387372553348541, "learning_rate": 0.002, "loss": 2.3582, "step": 94330 }, { "epoch": 0.3646920567178488, "grad_norm": 0.11350051313638687, "learning_rate": 0.002, "loss": 2.3381, "step": 94340 }, { "epoch": 0.3647307139212321, "grad_norm": 0.11592475324869156, "learning_rate": 0.002, "loss": 2.3489, "step": 94350 }, { "epoch": 0.3647693711246154, "grad_norm": 0.12059365212917328, "learning_rate": 0.002, "loss": 2.3499, "step": 94360 }, { "epoch": 0.36480802832799863, "grad_norm": 0.10482364147901535, "learning_rate": 0.002, "loss": 2.3552, "step": 94370 }, { "epoch": 0.36484668553138194, "grad_norm": 0.10916123539209366, "learning_rate": 0.002, "loss": 2.34, "step": 94380 }, { "epoch": 0.3648853427347652, "grad_norm": 0.10865961015224457, "learning_rate": 0.002, "loss": 2.3632, "step": 94390 }, { "epoch": 0.3649239999381485, "grad_norm": 0.11367253959178925, "learning_rate": 0.002, "loss": 2.3328, "step": 94400 }, { "epoch": 0.36496265714153175, "grad_norm": 0.10054226964712143, "learning_rate": 0.002, "loss": 2.3555, "step": 94410 }, { "epoch": 0.365001314344915, "grad_norm": 0.1019655168056488, "learning_rate": 0.002, "loss": 2.3511, "step": 94420 }, { "epoch": 0.3650399715482983, "grad_norm": 0.12992607057094574, "learning_rate": 0.002, "loss": 2.3533, "step": 94430 }, { "epoch": 0.36507862875168157, "grad_norm": 0.10929842293262482, "learning_rate": 0.002, "loss": 2.3462, "step": 94440 }, { "epoch": 0.3651172859550649, "grad_norm": 0.12828373908996582, "learning_rate": 0.002, "loss": 2.3441, "step": 94450 }, { "epoch": 0.3651559431584481, "grad_norm": 0.12689423561096191, "learning_rate": 0.002, "loss": 2.3353, "step": 94460 }, { "epoch": 0.36519460036183143, "grad_norm": 0.1337389349937439, "learning_rate": 0.002, "loss": 2.3545, "step": 94470 }, { "epoch": 0.3652332575652147, "grad_norm": 0.09126801043748856, "learning_rate": 0.002, "loss": 2.3575, "step": 94480 }, { "epoch": 0.365271914768598, "grad_norm": 0.118788942694664, "learning_rate": 0.002, "loss": 2.3693, "step": 94490 }, { "epoch": 0.36531057197198125, "grad_norm": 0.09804191440343857, "learning_rate": 0.002, "loss": 2.3651, "step": 94500 }, { "epoch": 0.36534922917536455, "grad_norm": 0.10007265210151672, "learning_rate": 0.002, "loss": 2.3547, "step": 94510 }, { "epoch": 0.3653878863787478, "grad_norm": 0.11969096213579178, "learning_rate": 0.002, "loss": 2.3507, "step": 94520 }, { "epoch": 0.3654265435821311, "grad_norm": 0.1275913268327713, "learning_rate": 0.002, "loss": 2.3553, "step": 94530 }, { "epoch": 0.36546520078551437, "grad_norm": 0.1127488911151886, "learning_rate": 0.002, "loss": 2.3535, "step": 94540 }, { "epoch": 0.3655038579888977, "grad_norm": 0.10983943939208984, "learning_rate": 0.002, "loss": 2.3626, "step": 94550 }, { "epoch": 0.3655425151922809, "grad_norm": 0.11403996497392654, "learning_rate": 0.002, "loss": 2.3417, "step": 94560 }, { "epoch": 0.36558117239566423, "grad_norm": 0.10751233249902725, "learning_rate": 0.002, "loss": 2.3744, "step": 94570 }, { "epoch": 0.3656198295990475, "grad_norm": 0.10286374390125275, "learning_rate": 0.002, "loss": 2.3314, "step": 94580 }, { "epoch": 0.36565848680243074, "grad_norm": 0.10275772213935852, "learning_rate": 0.002, "loss": 2.3555, "step": 94590 }, { "epoch": 0.36569714400581405, "grad_norm": 0.12379030883312225, "learning_rate": 0.002, "loss": 2.3511, "step": 94600 }, { "epoch": 0.3657358012091973, "grad_norm": 0.10697554796934128, "learning_rate": 0.002, "loss": 2.3499, "step": 94610 }, { "epoch": 0.3657744584125806, "grad_norm": 0.1183970719575882, "learning_rate": 0.002, "loss": 2.366, "step": 94620 }, { "epoch": 0.36581311561596386, "grad_norm": 0.10810043662786484, "learning_rate": 0.002, "loss": 2.3624, "step": 94630 }, { "epoch": 0.36585177281934717, "grad_norm": 0.08997925370931625, "learning_rate": 0.002, "loss": 2.3436, "step": 94640 }, { "epoch": 0.3658904300227304, "grad_norm": 0.12436822801828384, "learning_rate": 0.002, "loss": 2.3633, "step": 94650 }, { "epoch": 0.3659290872261137, "grad_norm": 0.11453136801719666, "learning_rate": 0.002, "loss": 2.3772, "step": 94660 }, { "epoch": 0.365967744429497, "grad_norm": 0.09779436886310577, "learning_rate": 0.002, "loss": 2.3416, "step": 94670 }, { "epoch": 0.3660064016328803, "grad_norm": 0.1175207644701004, "learning_rate": 0.002, "loss": 2.3656, "step": 94680 }, { "epoch": 0.36604505883626354, "grad_norm": 0.09444441646337509, "learning_rate": 0.002, "loss": 2.335, "step": 94690 }, { "epoch": 0.36608371603964684, "grad_norm": 0.11687050014734268, "learning_rate": 0.002, "loss": 2.3375, "step": 94700 }, { "epoch": 0.3661223732430301, "grad_norm": 0.11369027197360992, "learning_rate": 0.002, "loss": 2.3588, "step": 94710 }, { "epoch": 0.3661610304464134, "grad_norm": 0.09931481629610062, "learning_rate": 0.002, "loss": 2.3512, "step": 94720 }, { "epoch": 0.36619968764979666, "grad_norm": 0.12874948978424072, "learning_rate": 0.002, "loss": 2.3492, "step": 94730 }, { "epoch": 0.36623834485317996, "grad_norm": 0.1392941176891327, "learning_rate": 0.002, "loss": 2.3443, "step": 94740 }, { "epoch": 0.3662770020565632, "grad_norm": 0.1356460601091385, "learning_rate": 0.002, "loss": 2.3414, "step": 94750 }, { "epoch": 0.3663156592599465, "grad_norm": 0.10736415535211563, "learning_rate": 0.002, "loss": 2.3489, "step": 94760 }, { "epoch": 0.3663543164633298, "grad_norm": 0.11838210374116898, "learning_rate": 0.002, "loss": 2.352, "step": 94770 }, { "epoch": 0.36639297366671303, "grad_norm": 0.1028166115283966, "learning_rate": 0.002, "loss": 2.352, "step": 94780 }, { "epoch": 0.36643163087009634, "grad_norm": 0.1099080815911293, "learning_rate": 0.002, "loss": 2.3494, "step": 94790 }, { "epoch": 0.3664702880734796, "grad_norm": 0.12541049718856812, "learning_rate": 0.002, "loss": 2.3578, "step": 94800 }, { "epoch": 0.3665089452768629, "grad_norm": 0.11674153059720993, "learning_rate": 0.002, "loss": 2.3549, "step": 94810 }, { "epoch": 0.36654760248024615, "grad_norm": 0.10327841341495514, "learning_rate": 0.002, "loss": 2.3468, "step": 94820 }, { "epoch": 0.36658625968362946, "grad_norm": 0.13201890885829926, "learning_rate": 0.002, "loss": 2.3412, "step": 94830 }, { "epoch": 0.3666249168870127, "grad_norm": 0.09061075001955032, "learning_rate": 0.002, "loss": 2.3428, "step": 94840 }, { "epoch": 0.366663574090396, "grad_norm": 0.13644355535507202, "learning_rate": 0.002, "loss": 2.3562, "step": 94850 }, { "epoch": 0.36670223129377927, "grad_norm": 0.11915605515241623, "learning_rate": 0.002, "loss": 2.3466, "step": 94860 }, { "epoch": 0.3667408884971626, "grad_norm": 0.11097453534603119, "learning_rate": 0.002, "loss": 2.3446, "step": 94870 }, { "epoch": 0.36677954570054583, "grad_norm": 0.13492131233215332, "learning_rate": 0.002, "loss": 2.3571, "step": 94880 }, { "epoch": 0.36681820290392914, "grad_norm": 0.10164101421833038, "learning_rate": 0.002, "loss": 2.3485, "step": 94890 }, { "epoch": 0.3668568601073124, "grad_norm": 0.09530888497829437, "learning_rate": 0.002, "loss": 2.3557, "step": 94900 }, { "epoch": 0.3668955173106957, "grad_norm": 0.12775802612304688, "learning_rate": 0.002, "loss": 2.3545, "step": 94910 }, { "epoch": 0.36693417451407895, "grad_norm": 0.10944028943777084, "learning_rate": 0.002, "loss": 2.344, "step": 94920 }, { "epoch": 0.36697283171746226, "grad_norm": 0.1615549772977829, "learning_rate": 0.002, "loss": 2.3554, "step": 94930 }, { "epoch": 0.3670114889208455, "grad_norm": 0.117291659116745, "learning_rate": 0.002, "loss": 2.3514, "step": 94940 }, { "epoch": 0.3670501461242288, "grad_norm": 0.11271423101425171, "learning_rate": 0.002, "loss": 2.3562, "step": 94950 }, { "epoch": 0.36708880332761207, "grad_norm": 0.09867943078279495, "learning_rate": 0.002, "loss": 2.3497, "step": 94960 }, { "epoch": 0.3671274605309953, "grad_norm": 0.12499125301837921, "learning_rate": 0.002, "loss": 2.357, "step": 94970 }, { "epoch": 0.3671661177343786, "grad_norm": 0.11344917863607407, "learning_rate": 0.002, "loss": 2.3385, "step": 94980 }, { "epoch": 0.3672047749377619, "grad_norm": 0.11792797595262527, "learning_rate": 0.002, "loss": 2.3564, "step": 94990 }, { "epoch": 0.3672434321411452, "grad_norm": 0.11155661940574646, "learning_rate": 0.002, "loss": 2.3484, "step": 95000 }, { "epoch": 0.36728208934452844, "grad_norm": 0.09080757200717926, "learning_rate": 0.002, "loss": 2.3425, "step": 95010 }, { "epoch": 0.36732074654791175, "grad_norm": 0.10427747666835785, "learning_rate": 0.002, "loss": 2.3488, "step": 95020 }, { "epoch": 0.367359403751295, "grad_norm": 0.10497792810201645, "learning_rate": 0.002, "loss": 2.3315, "step": 95030 }, { "epoch": 0.3673980609546783, "grad_norm": 0.09480317682027817, "learning_rate": 0.002, "loss": 2.3339, "step": 95040 }, { "epoch": 0.36743671815806156, "grad_norm": 0.11391110718250275, "learning_rate": 0.002, "loss": 2.3622, "step": 95050 }, { "epoch": 0.36747537536144487, "grad_norm": 0.10386598110198975, "learning_rate": 0.002, "loss": 2.3414, "step": 95060 }, { "epoch": 0.3675140325648281, "grad_norm": 0.1177930235862732, "learning_rate": 0.002, "loss": 2.3537, "step": 95070 }, { "epoch": 0.3675526897682114, "grad_norm": 0.11678066849708557, "learning_rate": 0.002, "loss": 2.3425, "step": 95080 }, { "epoch": 0.3675913469715947, "grad_norm": 0.09995070099830627, "learning_rate": 0.002, "loss": 2.3455, "step": 95090 }, { "epoch": 0.367630004174978, "grad_norm": 0.10244379937648773, "learning_rate": 0.002, "loss": 2.3555, "step": 95100 }, { "epoch": 0.36766866137836124, "grad_norm": 0.10045316070318222, "learning_rate": 0.002, "loss": 2.3432, "step": 95110 }, { "epoch": 0.36770731858174455, "grad_norm": 0.10228868573904037, "learning_rate": 0.002, "loss": 2.3374, "step": 95120 }, { "epoch": 0.3677459757851278, "grad_norm": 0.10760489106178284, "learning_rate": 0.002, "loss": 2.3487, "step": 95130 }, { "epoch": 0.3677846329885111, "grad_norm": 0.10703897476196289, "learning_rate": 0.002, "loss": 2.3474, "step": 95140 }, { "epoch": 0.36782329019189436, "grad_norm": 0.12178358435630798, "learning_rate": 0.002, "loss": 2.351, "step": 95150 }, { "epoch": 0.3678619473952776, "grad_norm": 0.09344667941331863, "learning_rate": 0.002, "loss": 2.3511, "step": 95160 }, { "epoch": 0.3679006045986609, "grad_norm": 0.11168822646141052, "learning_rate": 0.002, "loss": 2.3727, "step": 95170 }, { "epoch": 0.36793926180204417, "grad_norm": 0.1050405502319336, "learning_rate": 0.002, "loss": 2.3608, "step": 95180 }, { "epoch": 0.3679779190054275, "grad_norm": 0.12630388140678406, "learning_rate": 0.002, "loss": 2.3373, "step": 95190 }, { "epoch": 0.36801657620881073, "grad_norm": 0.11250712722539902, "learning_rate": 0.002, "loss": 2.3561, "step": 95200 }, { "epoch": 0.36805523341219404, "grad_norm": 0.104097880423069, "learning_rate": 0.002, "loss": 2.3499, "step": 95210 }, { "epoch": 0.3680938906155773, "grad_norm": 0.11244583129882812, "learning_rate": 0.002, "loss": 2.3591, "step": 95220 }, { "epoch": 0.3681325478189606, "grad_norm": 0.09972363710403442, "learning_rate": 0.002, "loss": 2.3422, "step": 95230 }, { "epoch": 0.36817120502234385, "grad_norm": 0.10571520775556564, "learning_rate": 0.002, "loss": 2.3652, "step": 95240 }, { "epoch": 0.36820986222572716, "grad_norm": 0.1095057874917984, "learning_rate": 0.002, "loss": 2.3528, "step": 95250 }, { "epoch": 0.3682485194291104, "grad_norm": 0.1093902736902237, "learning_rate": 0.002, "loss": 2.3503, "step": 95260 }, { "epoch": 0.3682871766324937, "grad_norm": 0.11586953699588776, "learning_rate": 0.002, "loss": 2.3539, "step": 95270 }, { "epoch": 0.36832583383587697, "grad_norm": 0.10942596942186356, "learning_rate": 0.002, "loss": 2.3456, "step": 95280 }, { "epoch": 0.3683644910392603, "grad_norm": 0.09717775136232376, "learning_rate": 0.002, "loss": 2.3522, "step": 95290 }, { "epoch": 0.36840314824264353, "grad_norm": 0.10185949504375458, "learning_rate": 0.002, "loss": 2.3472, "step": 95300 }, { "epoch": 0.36844180544602684, "grad_norm": 0.11331840604543686, "learning_rate": 0.002, "loss": 2.344, "step": 95310 }, { "epoch": 0.3684804626494101, "grad_norm": 0.10007768869400024, "learning_rate": 0.002, "loss": 2.3595, "step": 95320 }, { "epoch": 0.36851911985279334, "grad_norm": 0.10249575227499008, "learning_rate": 0.002, "loss": 2.3488, "step": 95330 }, { "epoch": 0.36855777705617665, "grad_norm": 0.12019418925046921, "learning_rate": 0.002, "loss": 2.3486, "step": 95340 }, { "epoch": 0.3685964342595599, "grad_norm": 0.11633682996034622, "learning_rate": 0.002, "loss": 2.3443, "step": 95350 }, { "epoch": 0.3686350914629432, "grad_norm": 0.12141241133213043, "learning_rate": 0.002, "loss": 2.3511, "step": 95360 }, { "epoch": 0.36867374866632646, "grad_norm": 0.09715086966753006, "learning_rate": 0.002, "loss": 2.3453, "step": 95370 }, { "epoch": 0.36871240586970977, "grad_norm": 0.11704263091087341, "learning_rate": 0.002, "loss": 2.3536, "step": 95380 }, { "epoch": 0.368751063073093, "grad_norm": 0.11834775656461716, "learning_rate": 0.002, "loss": 2.3454, "step": 95390 }, { "epoch": 0.36878972027647633, "grad_norm": 0.105399951338768, "learning_rate": 0.002, "loss": 2.3375, "step": 95400 }, { "epoch": 0.3688283774798596, "grad_norm": 0.11063943058252335, "learning_rate": 0.002, "loss": 2.3465, "step": 95410 }, { "epoch": 0.3688670346832429, "grad_norm": 0.12205120921134949, "learning_rate": 0.002, "loss": 2.3563, "step": 95420 }, { "epoch": 0.36890569188662614, "grad_norm": 0.1160108670592308, "learning_rate": 0.002, "loss": 2.356, "step": 95430 }, { "epoch": 0.36894434909000945, "grad_norm": 0.09953995794057846, "learning_rate": 0.002, "loss": 2.3448, "step": 95440 }, { "epoch": 0.3689830062933927, "grad_norm": 0.09703753143548965, "learning_rate": 0.002, "loss": 2.3579, "step": 95450 }, { "epoch": 0.369021663496776, "grad_norm": 0.11698596179485321, "learning_rate": 0.002, "loss": 2.3519, "step": 95460 }, { "epoch": 0.36906032070015926, "grad_norm": 0.11900828778743744, "learning_rate": 0.002, "loss": 2.3554, "step": 95470 }, { "epoch": 0.36909897790354257, "grad_norm": 0.11115144938230515, "learning_rate": 0.002, "loss": 2.3485, "step": 95480 }, { "epoch": 0.3691376351069258, "grad_norm": 0.10782202333211899, "learning_rate": 0.002, "loss": 2.3579, "step": 95490 }, { "epoch": 0.36917629231030913, "grad_norm": 0.11181306093931198, "learning_rate": 0.002, "loss": 2.3418, "step": 95500 }, { "epoch": 0.3692149495136924, "grad_norm": 0.11659783869981766, "learning_rate": 0.002, "loss": 2.3489, "step": 95510 }, { "epoch": 0.36925360671707563, "grad_norm": 0.11101959645748138, "learning_rate": 0.002, "loss": 2.3627, "step": 95520 }, { "epoch": 0.36929226392045894, "grad_norm": 0.11048574000597, "learning_rate": 0.002, "loss": 2.3452, "step": 95530 }, { "epoch": 0.3693309211238422, "grad_norm": 0.10598281770944595, "learning_rate": 0.002, "loss": 2.3259, "step": 95540 }, { "epoch": 0.3693695783272255, "grad_norm": 0.1005745604634285, "learning_rate": 0.002, "loss": 2.3479, "step": 95550 }, { "epoch": 0.36940823553060875, "grad_norm": 0.1077663004398346, "learning_rate": 0.002, "loss": 2.3356, "step": 95560 }, { "epoch": 0.36944689273399206, "grad_norm": 0.09539022296667099, "learning_rate": 0.002, "loss": 2.3513, "step": 95570 }, { "epoch": 0.3694855499373753, "grad_norm": 0.12170499563217163, "learning_rate": 0.002, "loss": 2.3465, "step": 95580 }, { "epoch": 0.3695242071407586, "grad_norm": 0.09968260675668716, "learning_rate": 0.002, "loss": 2.3503, "step": 95590 }, { "epoch": 0.3695628643441419, "grad_norm": 0.10713895410299301, "learning_rate": 0.002, "loss": 2.3415, "step": 95600 }, { "epoch": 0.3696015215475252, "grad_norm": 0.11493389308452606, "learning_rate": 0.002, "loss": 2.3542, "step": 95610 }, { "epoch": 0.36964017875090843, "grad_norm": 0.1029747948050499, "learning_rate": 0.002, "loss": 2.3526, "step": 95620 }, { "epoch": 0.36967883595429174, "grad_norm": 0.1195148453116417, "learning_rate": 0.002, "loss": 2.3534, "step": 95630 }, { "epoch": 0.369717493157675, "grad_norm": 0.10771917551755905, "learning_rate": 0.002, "loss": 2.353, "step": 95640 }, { "epoch": 0.3697561503610583, "grad_norm": 0.12105977535247803, "learning_rate": 0.002, "loss": 2.3492, "step": 95650 }, { "epoch": 0.36979480756444155, "grad_norm": 0.10347715020179749, "learning_rate": 0.002, "loss": 2.3491, "step": 95660 }, { "epoch": 0.36983346476782486, "grad_norm": 0.10475093871355057, "learning_rate": 0.002, "loss": 2.3559, "step": 95670 }, { "epoch": 0.3698721219712081, "grad_norm": 0.12026255577802658, "learning_rate": 0.002, "loss": 2.3386, "step": 95680 }, { "epoch": 0.3699107791745914, "grad_norm": 0.11982334405183792, "learning_rate": 0.002, "loss": 2.3449, "step": 95690 }, { "epoch": 0.36994943637797467, "grad_norm": 0.10484941303730011, "learning_rate": 0.002, "loss": 2.3564, "step": 95700 }, { "epoch": 0.3699880935813579, "grad_norm": 0.10729681700468063, "learning_rate": 0.002, "loss": 2.3529, "step": 95710 }, { "epoch": 0.37002675078474123, "grad_norm": 0.09477420151233673, "learning_rate": 0.002, "loss": 2.3583, "step": 95720 }, { "epoch": 0.3700654079881245, "grad_norm": 0.1019349992275238, "learning_rate": 0.002, "loss": 2.3624, "step": 95730 }, { "epoch": 0.3701040651915078, "grad_norm": 0.10930332541465759, "learning_rate": 0.002, "loss": 2.3676, "step": 95740 }, { "epoch": 0.37014272239489104, "grad_norm": 0.09593915194272995, "learning_rate": 0.002, "loss": 2.3498, "step": 95750 }, { "epoch": 0.37018137959827435, "grad_norm": 0.11683354526758194, "learning_rate": 0.002, "loss": 2.3487, "step": 95760 }, { "epoch": 0.3702200368016576, "grad_norm": 0.10252895206212997, "learning_rate": 0.002, "loss": 2.3569, "step": 95770 }, { "epoch": 0.3702586940050409, "grad_norm": 0.10136279463768005, "learning_rate": 0.002, "loss": 2.3367, "step": 95780 }, { "epoch": 0.37029735120842416, "grad_norm": 0.13326722383499146, "learning_rate": 0.002, "loss": 2.3494, "step": 95790 }, { "epoch": 0.37033600841180747, "grad_norm": 0.14020223915576935, "learning_rate": 0.002, "loss": 2.3478, "step": 95800 }, { "epoch": 0.3703746656151907, "grad_norm": 0.09939390420913696, "learning_rate": 0.002, "loss": 2.3572, "step": 95810 }, { "epoch": 0.37041332281857403, "grad_norm": 0.10651414841413498, "learning_rate": 0.002, "loss": 2.3523, "step": 95820 }, { "epoch": 0.3704519800219573, "grad_norm": 0.10730873793363571, "learning_rate": 0.002, "loss": 2.3512, "step": 95830 }, { "epoch": 0.3704906372253406, "grad_norm": 0.10960599780082703, "learning_rate": 0.002, "loss": 2.3292, "step": 95840 }, { "epoch": 0.37052929442872384, "grad_norm": 0.11024501174688339, "learning_rate": 0.002, "loss": 2.3545, "step": 95850 }, { "epoch": 0.37056795163210715, "grad_norm": 0.11730218678712845, "learning_rate": 0.002, "loss": 2.356, "step": 95860 }, { "epoch": 0.3706066088354904, "grad_norm": 0.10243767499923706, "learning_rate": 0.002, "loss": 2.3515, "step": 95870 }, { "epoch": 0.3706452660388737, "grad_norm": 0.11166820675134659, "learning_rate": 0.002, "loss": 2.3549, "step": 95880 }, { "epoch": 0.37068392324225696, "grad_norm": 0.120213583111763, "learning_rate": 0.002, "loss": 2.354, "step": 95890 }, { "epoch": 0.3707225804456402, "grad_norm": 0.09946276247501373, "learning_rate": 0.002, "loss": 2.3409, "step": 95900 }, { "epoch": 0.3707612376490235, "grad_norm": 0.11692880094051361, "learning_rate": 0.002, "loss": 2.3444, "step": 95910 }, { "epoch": 0.3707998948524068, "grad_norm": 0.11046019941568375, "learning_rate": 0.002, "loss": 2.3632, "step": 95920 }, { "epoch": 0.3708385520557901, "grad_norm": 0.10743413865566254, "learning_rate": 0.002, "loss": 2.3579, "step": 95930 }, { "epoch": 0.37087720925917333, "grad_norm": 0.10488501936197281, "learning_rate": 0.002, "loss": 2.3397, "step": 95940 }, { "epoch": 0.37091586646255664, "grad_norm": 0.1552240252494812, "learning_rate": 0.002, "loss": 2.3625, "step": 95950 }, { "epoch": 0.3709545236659399, "grad_norm": 0.09831106662750244, "learning_rate": 0.002, "loss": 2.3469, "step": 95960 }, { "epoch": 0.3709931808693232, "grad_norm": 0.1153578832745552, "learning_rate": 0.002, "loss": 2.3553, "step": 95970 }, { "epoch": 0.37103183807270645, "grad_norm": 0.1132926493883133, "learning_rate": 0.002, "loss": 2.3595, "step": 95980 }, { "epoch": 0.37107049527608976, "grad_norm": 0.1189347356557846, "learning_rate": 0.002, "loss": 2.3661, "step": 95990 }, { "epoch": 0.371109152479473, "grad_norm": 0.10806018859148026, "learning_rate": 0.002, "loss": 2.3446, "step": 96000 }, { "epoch": 0.3711478096828563, "grad_norm": 0.11607592552900314, "learning_rate": 0.002, "loss": 2.3509, "step": 96010 }, { "epoch": 0.3711864668862396, "grad_norm": 0.0974685400724411, "learning_rate": 0.002, "loss": 2.3493, "step": 96020 }, { "epoch": 0.3712251240896229, "grad_norm": 0.22955001890659332, "learning_rate": 0.002, "loss": 2.3762, "step": 96030 }, { "epoch": 0.37126378129300613, "grad_norm": 0.10110989212989807, "learning_rate": 0.002, "loss": 2.3448, "step": 96040 }, { "epoch": 0.37130243849638944, "grad_norm": 0.10315662622451782, "learning_rate": 0.002, "loss": 2.3483, "step": 96050 }, { "epoch": 0.3713410956997727, "grad_norm": 0.09571203589439392, "learning_rate": 0.002, "loss": 2.3402, "step": 96060 }, { "epoch": 0.37137975290315595, "grad_norm": 0.10977359116077423, "learning_rate": 0.002, "loss": 2.3474, "step": 96070 }, { "epoch": 0.37141841010653925, "grad_norm": 0.27224186062812805, "learning_rate": 0.002, "loss": 2.3624, "step": 96080 }, { "epoch": 0.3714570673099225, "grad_norm": 0.11507481336593628, "learning_rate": 0.002, "loss": 2.3618, "step": 96090 }, { "epoch": 0.3714957245133058, "grad_norm": 0.1136784628033638, "learning_rate": 0.002, "loss": 2.3525, "step": 96100 }, { "epoch": 0.37153438171668907, "grad_norm": 0.12112376093864441, "learning_rate": 0.002, "loss": 2.3424, "step": 96110 }, { "epoch": 0.3715730389200724, "grad_norm": 0.10242711007595062, "learning_rate": 0.002, "loss": 2.3549, "step": 96120 }, { "epoch": 0.3716116961234556, "grad_norm": 0.1104845181107521, "learning_rate": 0.002, "loss": 2.3529, "step": 96130 }, { "epoch": 0.37165035332683893, "grad_norm": 0.10000521689653397, "learning_rate": 0.002, "loss": 2.358, "step": 96140 }, { "epoch": 0.3716890105302222, "grad_norm": 0.10198475420475006, "learning_rate": 0.002, "loss": 2.3567, "step": 96150 }, { "epoch": 0.3717276677336055, "grad_norm": 0.1174432635307312, "learning_rate": 0.002, "loss": 2.337, "step": 96160 }, { "epoch": 0.37176632493698875, "grad_norm": 0.1013440415263176, "learning_rate": 0.002, "loss": 2.3573, "step": 96170 }, { "epoch": 0.37180498214037205, "grad_norm": 0.10320638865232468, "learning_rate": 0.002, "loss": 2.3394, "step": 96180 }, { "epoch": 0.3718436393437553, "grad_norm": 0.11725156009197235, "learning_rate": 0.002, "loss": 2.3567, "step": 96190 }, { "epoch": 0.3718822965471386, "grad_norm": 0.10073649883270264, "learning_rate": 0.002, "loss": 2.3545, "step": 96200 }, { "epoch": 0.37192095375052187, "grad_norm": 0.09045589715242386, "learning_rate": 0.002, "loss": 2.3444, "step": 96210 }, { "epoch": 0.3719596109539052, "grad_norm": 0.11545506864786148, "learning_rate": 0.002, "loss": 2.364, "step": 96220 }, { "epoch": 0.3719982681572884, "grad_norm": 0.1079561710357666, "learning_rate": 0.002, "loss": 2.3541, "step": 96230 }, { "epoch": 0.37203692536067173, "grad_norm": 0.1202242448925972, "learning_rate": 0.002, "loss": 2.3545, "step": 96240 }, { "epoch": 0.372075582564055, "grad_norm": 0.09573031216859818, "learning_rate": 0.002, "loss": 2.342, "step": 96250 }, { "epoch": 0.37211423976743824, "grad_norm": 0.11349551379680634, "learning_rate": 0.002, "loss": 2.3419, "step": 96260 }, { "epoch": 0.37215289697082155, "grad_norm": 0.11149785667657852, "learning_rate": 0.002, "loss": 2.3487, "step": 96270 }, { "epoch": 0.3721915541742048, "grad_norm": 0.09543846547603607, "learning_rate": 0.002, "loss": 2.3536, "step": 96280 }, { "epoch": 0.3722302113775881, "grad_norm": 0.10158144682645798, "learning_rate": 0.002, "loss": 2.3599, "step": 96290 }, { "epoch": 0.37226886858097136, "grad_norm": 0.13857035338878632, "learning_rate": 0.002, "loss": 2.369, "step": 96300 }, { "epoch": 0.37230752578435466, "grad_norm": 0.11624547839164734, "learning_rate": 0.002, "loss": 2.3413, "step": 96310 }, { "epoch": 0.3723461829877379, "grad_norm": 0.10812583565711975, "learning_rate": 0.002, "loss": 2.3781, "step": 96320 }, { "epoch": 0.3723848401911212, "grad_norm": 0.09926456212997437, "learning_rate": 0.002, "loss": 2.3631, "step": 96330 }, { "epoch": 0.3724234973945045, "grad_norm": 0.10067030787467957, "learning_rate": 0.002, "loss": 2.3522, "step": 96340 }, { "epoch": 0.3724621545978878, "grad_norm": 0.09444724768400192, "learning_rate": 0.002, "loss": 2.3443, "step": 96350 }, { "epoch": 0.37250081180127104, "grad_norm": 0.1284225434064865, "learning_rate": 0.002, "loss": 2.3533, "step": 96360 }, { "epoch": 0.37253946900465434, "grad_norm": 0.09563818573951721, "learning_rate": 0.002, "loss": 2.3554, "step": 96370 }, { "epoch": 0.3725781262080376, "grad_norm": 0.10342410206794739, "learning_rate": 0.002, "loss": 2.3532, "step": 96380 }, { "epoch": 0.3726167834114209, "grad_norm": 0.1257498413324356, "learning_rate": 0.002, "loss": 2.3444, "step": 96390 }, { "epoch": 0.37265544061480416, "grad_norm": 0.10211283713579178, "learning_rate": 0.002, "loss": 2.3674, "step": 96400 }, { "epoch": 0.37269409781818746, "grad_norm": 0.10875606536865234, "learning_rate": 0.002, "loss": 2.3545, "step": 96410 }, { "epoch": 0.3727327550215707, "grad_norm": 0.11335258930921555, "learning_rate": 0.002, "loss": 2.3412, "step": 96420 }, { "epoch": 0.372771412224954, "grad_norm": 0.1230175718665123, "learning_rate": 0.002, "loss": 2.3337, "step": 96430 }, { "epoch": 0.3728100694283373, "grad_norm": 0.10346999764442444, "learning_rate": 0.002, "loss": 2.3286, "step": 96440 }, { "epoch": 0.37284872663172053, "grad_norm": 0.1104331836104393, "learning_rate": 0.002, "loss": 2.3658, "step": 96450 }, { "epoch": 0.37288738383510384, "grad_norm": 0.11010728776454926, "learning_rate": 0.002, "loss": 2.3423, "step": 96460 }, { "epoch": 0.3729260410384871, "grad_norm": 0.09920106828212738, "learning_rate": 0.002, "loss": 2.3559, "step": 96470 }, { "epoch": 0.3729646982418704, "grad_norm": 0.1061813235282898, "learning_rate": 0.002, "loss": 2.3535, "step": 96480 }, { "epoch": 0.37300335544525365, "grad_norm": 0.12769728899002075, "learning_rate": 0.002, "loss": 2.3541, "step": 96490 }, { "epoch": 0.37304201264863696, "grad_norm": 0.11152181774377823, "learning_rate": 0.002, "loss": 2.3504, "step": 96500 }, { "epoch": 0.3730806698520202, "grad_norm": 0.10381684452295303, "learning_rate": 0.002, "loss": 2.346, "step": 96510 }, { "epoch": 0.3731193270554035, "grad_norm": 0.09493334591388702, "learning_rate": 0.002, "loss": 2.3438, "step": 96520 }, { "epoch": 0.37315798425878677, "grad_norm": 0.10465577989816666, "learning_rate": 0.002, "loss": 2.3538, "step": 96530 }, { "epoch": 0.3731966414621701, "grad_norm": 0.11680374294519424, "learning_rate": 0.002, "loss": 2.3496, "step": 96540 }, { "epoch": 0.37323529866555333, "grad_norm": 0.10015437752008438, "learning_rate": 0.002, "loss": 2.3448, "step": 96550 }, { "epoch": 0.37327395586893664, "grad_norm": 0.10573911666870117, "learning_rate": 0.002, "loss": 2.3496, "step": 96560 }, { "epoch": 0.3733126130723199, "grad_norm": 0.10745342075824738, "learning_rate": 0.002, "loss": 2.3601, "step": 96570 }, { "epoch": 0.3733512702757032, "grad_norm": 0.10386376827955246, "learning_rate": 0.002, "loss": 2.35, "step": 96580 }, { "epoch": 0.37338992747908645, "grad_norm": 0.10528868436813354, "learning_rate": 0.002, "loss": 2.3563, "step": 96590 }, { "epoch": 0.37342858468246976, "grad_norm": 0.10415124893188477, "learning_rate": 0.002, "loss": 2.3606, "step": 96600 }, { "epoch": 0.373467241885853, "grad_norm": 0.12020357698202133, "learning_rate": 0.002, "loss": 2.3529, "step": 96610 }, { "epoch": 0.3735058990892363, "grad_norm": 0.10863015800714493, "learning_rate": 0.002, "loss": 2.3552, "step": 96620 }, { "epoch": 0.37354455629261957, "grad_norm": 0.09396566450595856, "learning_rate": 0.002, "loss": 2.3423, "step": 96630 }, { "epoch": 0.3735832134960028, "grad_norm": 0.11533330380916595, "learning_rate": 0.002, "loss": 2.3439, "step": 96640 }, { "epoch": 0.3736218706993861, "grad_norm": 0.10272518545389175, "learning_rate": 0.002, "loss": 2.3517, "step": 96650 }, { "epoch": 0.3736605279027694, "grad_norm": 0.08855467289686203, "learning_rate": 0.002, "loss": 2.3461, "step": 96660 }, { "epoch": 0.3736991851061527, "grad_norm": 0.12185222655534744, "learning_rate": 0.002, "loss": 2.3716, "step": 96670 }, { "epoch": 0.37373784230953594, "grad_norm": 0.09124171733856201, "learning_rate": 0.002, "loss": 2.3395, "step": 96680 }, { "epoch": 0.37377649951291925, "grad_norm": 0.10895459353923798, "learning_rate": 0.002, "loss": 2.3423, "step": 96690 }, { "epoch": 0.3738151567163025, "grad_norm": 0.1075105369091034, "learning_rate": 0.002, "loss": 2.3497, "step": 96700 }, { "epoch": 0.3738538139196858, "grad_norm": 0.08626224845647812, "learning_rate": 0.002, "loss": 2.3507, "step": 96710 }, { "epoch": 0.37389247112306906, "grad_norm": 0.0954337790608406, "learning_rate": 0.002, "loss": 2.3644, "step": 96720 }, { "epoch": 0.37393112832645237, "grad_norm": 0.10353606939315796, "learning_rate": 0.002, "loss": 2.347, "step": 96730 }, { "epoch": 0.3739697855298356, "grad_norm": 0.10286654531955719, "learning_rate": 0.002, "loss": 2.353, "step": 96740 }, { "epoch": 0.3740084427332189, "grad_norm": 0.09858877211809158, "learning_rate": 0.002, "loss": 2.3404, "step": 96750 }, { "epoch": 0.3740470999366022, "grad_norm": 0.11728513985872269, "learning_rate": 0.002, "loss": 2.3472, "step": 96760 }, { "epoch": 0.3740857571399855, "grad_norm": 0.13198749721050262, "learning_rate": 0.002, "loss": 2.3585, "step": 96770 }, { "epoch": 0.37412441434336874, "grad_norm": 0.10547465831041336, "learning_rate": 0.002, "loss": 2.3568, "step": 96780 }, { "epoch": 0.37416307154675205, "grad_norm": 0.1052117720246315, "learning_rate": 0.002, "loss": 2.3529, "step": 96790 }, { "epoch": 0.3742017287501353, "grad_norm": 0.10624329000711441, "learning_rate": 0.002, "loss": 2.3591, "step": 96800 }, { "epoch": 0.3742403859535186, "grad_norm": 0.10447991639375687, "learning_rate": 0.002, "loss": 2.35, "step": 96810 }, { "epoch": 0.37427904315690186, "grad_norm": 0.10218934714794159, "learning_rate": 0.002, "loss": 2.3585, "step": 96820 }, { "epoch": 0.3743177003602851, "grad_norm": 0.12363198399543762, "learning_rate": 0.002, "loss": 2.342, "step": 96830 }, { "epoch": 0.3743563575636684, "grad_norm": 0.1127898246049881, "learning_rate": 0.002, "loss": 2.3523, "step": 96840 }, { "epoch": 0.37439501476705167, "grad_norm": 0.11404630541801453, "learning_rate": 0.002, "loss": 2.3423, "step": 96850 }, { "epoch": 0.374433671970435, "grad_norm": 0.10721984505653381, "learning_rate": 0.002, "loss": 2.3557, "step": 96860 }, { "epoch": 0.37447232917381823, "grad_norm": 0.10237617790699005, "learning_rate": 0.002, "loss": 2.3584, "step": 96870 }, { "epoch": 0.37451098637720154, "grad_norm": 0.11818091571331024, "learning_rate": 0.002, "loss": 2.3561, "step": 96880 }, { "epoch": 0.3745496435805848, "grad_norm": 0.12402646988630295, "learning_rate": 0.002, "loss": 2.3449, "step": 96890 }, { "epoch": 0.3745883007839681, "grad_norm": 0.10971760004758835, "learning_rate": 0.002, "loss": 2.3535, "step": 96900 }, { "epoch": 0.37462695798735135, "grad_norm": 0.11073453724384308, "learning_rate": 0.002, "loss": 2.3506, "step": 96910 }, { "epoch": 0.37466561519073466, "grad_norm": 0.5414279103279114, "learning_rate": 0.002, "loss": 2.3554, "step": 96920 }, { "epoch": 0.3747042723941179, "grad_norm": 0.12150266021490097, "learning_rate": 0.002, "loss": 2.3575, "step": 96930 }, { "epoch": 0.3747429295975012, "grad_norm": 0.09668447822332382, "learning_rate": 0.002, "loss": 2.3323, "step": 96940 }, { "epoch": 0.37478158680088447, "grad_norm": 0.11486414819955826, "learning_rate": 0.002, "loss": 2.3584, "step": 96950 }, { "epoch": 0.3748202440042678, "grad_norm": 0.13404735922813416, "learning_rate": 0.002, "loss": 2.3544, "step": 96960 }, { "epoch": 0.37485890120765103, "grad_norm": 0.11293788254261017, "learning_rate": 0.002, "loss": 2.3423, "step": 96970 }, { "epoch": 0.37489755841103434, "grad_norm": 0.0979723185300827, "learning_rate": 0.002, "loss": 2.3491, "step": 96980 }, { "epoch": 0.3749362156144176, "grad_norm": 0.10279171913862228, "learning_rate": 0.002, "loss": 2.3592, "step": 96990 }, { "epoch": 0.37497487281780084, "grad_norm": 0.11414069682359695, "learning_rate": 0.002, "loss": 2.33, "step": 97000 }, { "epoch": 0.37501353002118415, "grad_norm": 0.11273612082004547, "learning_rate": 0.002, "loss": 2.3421, "step": 97010 }, { "epoch": 0.3750521872245674, "grad_norm": 0.10220120847225189, "learning_rate": 0.002, "loss": 2.3559, "step": 97020 }, { "epoch": 0.3750908444279507, "grad_norm": 0.1266205608844757, "learning_rate": 0.002, "loss": 2.3566, "step": 97030 }, { "epoch": 0.37512950163133396, "grad_norm": 0.09308404475450516, "learning_rate": 0.002, "loss": 2.347, "step": 97040 }, { "epoch": 0.37516815883471727, "grad_norm": 0.11220557242631912, "learning_rate": 0.002, "loss": 2.3548, "step": 97050 }, { "epoch": 0.3752068160381005, "grad_norm": 0.12289377301931381, "learning_rate": 0.002, "loss": 2.3505, "step": 97060 }, { "epoch": 0.37524547324148383, "grad_norm": 0.11952238529920578, "learning_rate": 0.002, "loss": 2.3454, "step": 97070 }, { "epoch": 0.3752841304448671, "grad_norm": 0.0957283079624176, "learning_rate": 0.002, "loss": 2.3505, "step": 97080 }, { "epoch": 0.3753227876482504, "grad_norm": 0.11071771383285522, "learning_rate": 0.002, "loss": 2.3541, "step": 97090 }, { "epoch": 0.37536144485163364, "grad_norm": 0.10729561746120453, "learning_rate": 0.002, "loss": 2.3503, "step": 97100 }, { "epoch": 0.37540010205501695, "grad_norm": 0.25892820954322815, "learning_rate": 0.002, "loss": 2.3349, "step": 97110 }, { "epoch": 0.3754387592584002, "grad_norm": 0.09794148802757263, "learning_rate": 0.002, "loss": 2.3371, "step": 97120 }, { "epoch": 0.3754774164617835, "grad_norm": 0.09345117211341858, "learning_rate": 0.002, "loss": 2.3518, "step": 97130 }, { "epoch": 0.37551607366516676, "grad_norm": 0.11832363903522491, "learning_rate": 0.002, "loss": 2.3549, "step": 97140 }, { "epoch": 0.37555473086855007, "grad_norm": 0.1080976128578186, "learning_rate": 0.002, "loss": 2.3694, "step": 97150 }, { "epoch": 0.3755933880719333, "grad_norm": 0.09897181391716003, "learning_rate": 0.002, "loss": 2.3728, "step": 97160 }, { "epoch": 0.37563204527531663, "grad_norm": 0.0907154381275177, "learning_rate": 0.002, "loss": 2.3394, "step": 97170 }, { "epoch": 0.3756707024786999, "grad_norm": 0.13835717737674713, "learning_rate": 0.002, "loss": 2.3596, "step": 97180 }, { "epoch": 0.37570935968208313, "grad_norm": 0.10165028274059296, "learning_rate": 0.002, "loss": 2.3525, "step": 97190 }, { "epoch": 0.37574801688546644, "grad_norm": 0.11934266984462738, "learning_rate": 0.002, "loss": 2.3415, "step": 97200 }, { "epoch": 0.3757866740888497, "grad_norm": 0.11578059196472168, "learning_rate": 0.002, "loss": 2.3467, "step": 97210 }, { "epoch": 0.375825331292233, "grad_norm": 0.12091308832168579, "learning_rate": 0.002, "loss": 2.3341, "step": 97220 }, { "epoch": 0.37586398849561625, "grad_norm": 0.12344370037317276, "learning_rate": 0.002, "loss": 2.3447, "step": 97230 }, { "epoch": 0.37590264569899956, "grad_norm": 0.12931908667087555, "learning_rate": 0.002, "loss": 2.3479, "step": 97240 }, { "epoch": 0.3759413029023828, "grad_norm": 0.10830745846033096, "learning_rate": 0.002, "loss": 2.3611, "step": 97250 }, { "epoch": 0.3759799601057661, "grad_norm": 0.10483557730913162, "learning_rate": 0.002, "loss": 2.3576, "step": 97260 }, { "epoch": 0.37601861730914937, "grad_norm": 0.10018157958984375, "learning_rate": 0.002, "loss": 2.3605, "step": 97270 }, { "epoch": 0.3760572745125327, "grad_norm": 0.10008195042610168, "learning_rate": 0.002, "loss": 2.3484, "step": 97280 }, { "epoch": 0.37609593171591593, "grad_norm": 0.09947829693555832, "learning_rate": 0.002, "loss": 2.3604, "step": 97290 }, { "epoch": 0.37613458891929924, "grad_norm": 0.10209108889102936, "learning_rate": 0.002, "loss": 2.3415, "step": 97300 }, { "epoch": 0.3761732461226825, "grad_norm": 0.1302970051765442, "learning_rate": 0.002, "loss": 2.3489, "step": 97310 }, { "epoch": 0.3762119033260658, "grad_norm": 0.1283721923828125, "learning_rate": 0.002, "loss": 2.3672, "step": 97320 }, { "epoch": 0.37625056052944905, "grad_norm": 0.11452938616275787, "learning_rate": 0.002, "loss": 2.3435, "step": 97330 }, { "epoch": 0.37628921773283236, "grad_norm": 0.11943821609020233, "learning_rate": 0.002, "loss": 2.3591, "step": 97340 }, { "epoch": 0.3763278749362156, "grad_norm": 0.09788057953119278, "learning_rate": 0.002, "loss": 2.3467, "step": 97350 }, { "epoch": 0.3763665321395989, "grad_norm": 0.10299163311719894, "learning_rate": 0.002, "loss": 2.3498, "step": 97360 }, { "epoch": 0.37640518934298217, "grad_norm": 0.0997253954410553, "learning_rate": 0.002, "loss": 2.3572, "step": 97370 }, { "epoch": 0.3764438465463654, "grad_norm": 0.10356666147708893, "learning_rate": 0.002, "loss": 2.3516, "step": 97380 }, { "epoch": 0.37648250374974873, "grad_norm": 0.12059180438518524, "learning_rate": 0.002, "loss": 2.354, "step": 97390 }, { "epoch": 0.376521160953132, "grad_norm": 0.10693185776472092, "learning_rate": 0.002, "loss": 2.3663, "step": 97400 }, { "epoch": 0.3765598181565153, "grad_norm": 0.12091413140296936, "learning_rate": 0.002, "loss": 2.3531, "step": 97410 }, { "epoch": 0.37659847535989854, "grad_norm": 0.1141573116183281, "learning_rate": 0.002, "loss": 2.3362, "step": 97420 }, { "epoch": 0.37663713256328185, "grad_norm": 0.10382256656885147, "learning_rate": 0.002, "loss": 2.3343, "step": 97430 }, { "epoch": 0.3766757897666651, "grad_norm": 0.11503864079713821, "learning_rate": 0.002, "loss": 2.3558, "step": 97440 }, { "epoch": 0.3767144469700484, "grad_norm": 0.11092833429574966, "learning_rate": 0.002, "loss": 2.337, "step": 97450 }, { "epoch": 0.37675310417343166, "grad_norm": 0.09959018975496292, "learning_rate": 0.002, "loss": 2.3491, "step": 97460 }, { "epoch": 0.37679176137681497, "grad_norm": 0.11717210710048676, "learning_rate": 0.002, "loss": 2.3555, "step": 97470 }, { "epoch": 0.3768304185801982, "grad_norm": 0.11616809666156769, "learning_rate": 0.002, "loss": 2.3538, "step": 97480 }, { "epoch": 0.37686907578358153, "grad_norm": 0.09150540083646774, "learning_rate": 0.002, "loss": 2.3619, "step": 97490 }, { "epoch": 0.3769077329869648, "grad_norm": 0.09806869924068451, "learning_rate": 0.002, "loss": 2.3424, "step": 97500 }, { "epoch": 0.3769463901903481, "grad_norm": 0.12199116498231888, "learning_rate": 0.002, "loss": 2.3563, "step": 97510 }, { "epoch": 0.37698504739373134, "grad_norm": 0.10591573268175125, "learning_rate": 0.002, "loss": 2.3487, "step": 97520 }, { "epoch": 0.37702370459711465, "grad_norm": 0.145875483751297, "learning_rate": 0.002, "loss": 2.3412, "step": 97530 }, { "epoch": 0.3770623618004979, "grad_norm": 0.10147275775671005, "learning_rate": 0.002, "loss": 2.3406, "step": 97540 }, { "epoch": 0.3771010190038812, "grad_norm": 0.11346987634897232, "learning_rate": 0.002, "loss": 2.3495, "step": 97550 }, { "epoch": 0.37713967620726446, "grad_norm": 0.10580125451087952, "learning_rate": 0.002, "loss": 2.3378, "step": 97560 }, { "epoch": 0.3771783334106477, "grad_norm": 0.10647939890623093, "learning_rate": 0.002, "loss": 2.368, "step": 97570 }, { "epoch": 0.377216990614031, "grad_norm": 0.11025557667016983, "learning_rate": 0.002, "loss": 2.3399, "step": 97580 }, { "epoch": 0.3772556478174143, "grad_norm": 0.099567711353302, "learning_rate": 0.002, "loss": 2.3472, "step": 97590 }, { "epoch": 0.3772943050207976, "grad_norm": 0.10200417786836624, "learning_rate": 0.002, "loss": 2.3509, "step": 97600 }, { "epoch": 0.37733296222418083, "grad_norm": 0.10742084681987762, "learning_rate": 0.002, "loss": 2.3452, "step": 97610 }, { "epoch": 0.37737161942756414, "grad_norm": 0.08838263899087906, "learning_rate": 0.002, "loss": 2.3501, "step": 97620 }, { "epoch": 0.3774102766309474, "grad_norm": 0.12436746060848236, "learning_rate": 0.002, "loss": 2.352, "step": 97630 }, { "epoch": 0.3774489338343307, "grad_norm": 0.10953371971845627, "learning_rate": 0.002, "loss": 2.3456, "step": 97640 }, { "epoch": 0.37748759103771395, "grad_norm": 0.08956970274448395, "learning_rate": 0.002, "loss": 2.337, "step": 97650 }, { "epoch": 0.37752624824109726, "grad_norm": 0.11406704038381577, "learning_rate": 0.002, "loss": 2.3572, "step": 97660 }, { "epoch": 0.3775649054444805, "grad_norm": 0.10393359512090683, "learning_rate": 0.002, "loss": 2.3529, "step": 97670 }, { "epoch": 0.3776035626478638, "grad_norm": 0.11221380531787872, "learning_rate": 0.002, "loss": 2.3532, "step": 97680 }, { "epoch": 0.3776422198512471, "grad_norm": 0.09847605228424072, "learning_rate": 0.002, "loss": 2.3451, "step": 97690 }, { "epoch": 0.3776808770546304, "grad_norm": 0.12601011991500854, "learning_rate": 0.002, "loss": 2.3606, "step": 97700 }, { "epoch": 0.37771953425801363, "grad_norm": 0.11960670351982117, "learning_rate": 0.002, "loss": 2.3541, "step": 97710 }, { "epoch": 0.37775819146139694, "grad_norm": 0.09933385998010635, "learning_rate": 0.002, "loss": 2.3454, "step": 97720 }, { "epoch": 0.3777968486647802, "grad_norm": 0.11257601529359818, "learning_rate": 0.002, "loss": 2.358, "step": 97730 }, { "epoch": 0.37783550586816345, "grad_norm": 0.11432036757469177, "learning_rate": 0.002, "loss": 2.3571, "step": 97740 }, { "epoch": 0.37787416307154675, "grad_norm": 0.12867693603038788, "learning_rate": 0.002, "loss": 2.3631, "step": 97750 }, { "epoch": 0.37791282027493, "grad_norm": 0.10882619023323059, "learning_rate": 0.002, "loss": 2.3476, "step": 97760 }, { "epoch": 0.3779514774783133, "grad_norm": 0.10990484058856964, "learning_rate": 0.002, "loss": 2.3644, "step": 97770 }, { "epoch": 0.37799013468169657, "grad_norm": 0.11563392728567123, "learning_rate": 0.002, "loss": 2.3472, "step": 97780 }, { "epoch": 0.3780287918850799, "grad_norm": 0.10057725757360458, "learning_rate": 0.002, "loss": 2.3506, "step": 97790 }, { "epoch": 0.3780674490884631, "grad_norm": 0.10217918455600739, "learning_rate": 0.002, "loss": 2.3541, "step": 97800 }, { "epoch": 0.37810610629184643, "grad_norm": 0.11344746500253677, "learning_rate": 0.002, "loss": 2.3605, "step": 97810 }, { "epoch": 0.3781447634952297, "grad_norm": 0.11891037225723267, "learning_rate": 0.002, "loss": 2.355, "step": 97820 }, { "epoch": 0.378183420698613, "grad_norm": 0.10988009721040726, "learning_rate": 0.002, "loss": 2.3482, "step": 97830 }, { "epoch": 0.37822207790199625, "grad_norm": 0.11649468541145325, "learning_rate": 0.002, "loss": 2.357, "step": 97840 }, { "epoch": 0.37826073510537955, "grad_norm": 0.09838244318962097, "learning_rate": 0.002, "loss": 2.3361, "step": 97850 }, { "epoch": 0.3782993923087628, "grad_norm": 0.100824736058712, "learning_rate": 0.002, "loss": 2.3509, "step": 97860 }, { "epoch": 0.3783380495121461, "grad_norm": 0.1081569567322731, "learning_rate": 0.002, "loss": 2.3479, "step": 97870 }, { "epoch": 0.37837670671552937, "grad_norm": 0.11926387250423431, "learning_rate": 0.002, "loss": 2.3593, "step": 97880 }, { "epoch": 0.3784153639189127, "grad_norm": 0.1074013039469719, "learning_rate": 0.002, "loss": 2.3545, "step": 97890 }, { "epoch": 0.3784540211222959, "grad_norm": 0.11106620728969574, "learning_rate": 0.002, "loss": 2.344, "step": 97900 }, { "epoch": 0.37849267832567923, "grad_norm": 0.12460718303918839, "learning_rate": 0.002, "loss": 2.3637, "step": 97910 }, { "epoch": 0.3785313355290625, "grad_norm": 0.1140846386551857, "learning_rate": 0.002, "loss": 2.3559, "step": 97920 }, { "epoch": 0.37856999273244574, "grad_norm": 0.12423646450042725, "learning_rate": 0.002, "loss": 2.3596, "step": 97930 }, { "epoch": 0.37860864993582904, "grad_norm": 0.10674221813678741, "learning_rate": 0.002, "loss": 2.3311, "step": 97940 }, { "epoch": 0.3786473071392123, "grad_norm": 0.09802944213151932, "learning_rate": 0.002, "loss": 2.3467, "step": 97950 }, { "epoch": 0.3786859643425956, "grad_norm": 0.09465181082487106, "learning_rate": 0.002, "loss": 2.3567, "step": 97960 }, { "epoch": 0.37872462154597886, "grad_norm": 0.10393911600112915, "learning_rate": 0.002, "loss": 2.3499, "step": 97970 }, { "epoch": 0.37876327874936216, "grad_norm": 0.11896202713251114, "learning_rate": 0.002, "loss": 2.3637, "step": 97980 }, { "epoch": 0.3788019359527454, "grad_norm": 0.10339158028364182, "learning_rate": 0.002, "loss": 2.3424, "step": 97990 }, { "epoch": 0.3788405931561287, "grad_norm": 0.1181914359331131, "learning_rate": 0.002, "loss": 2.3518, "step": 98000 }, { "epoch": 0.378879250359512, "grad_norm": 0.11918921768665314, "learning_rate": 0.002, "loss": 2.3467, "step": 98010 }, { "epoch": 0.3789179075628953, "grad_norm": 0.10368076711893082, "learning_rate": 0.002, "loss": 2.347, "step": 98020 }, { "epoch": 0.37895656476627854, "grad_norm": 0.10133563727140427, "learning_rate": 0.002, "loss": 2.3555, "step": 98030 }, { "epoch": 0.37899522196966184, "grad_norm": 0.11930687725543976, "learning_rate": 0.002, "loss": 2.3561, "step": 98040 }, { "epoch": 0.3790338791730451, "grad_norm": 0.12275891751050949, "learning_rate": 0.002, "loss": 2.3624, "step": 98050 }, { "epoch": 0.3790725363764284, "grad_norm": 0.10572463274002075, "learning_rate": 0.002, "loss": 2.352, "step": 98060 }, { "epoch": 0.37911119357981166, "grad_norm": 0.10790557414293289, "learning_rate": 0.002, "loss": 2.3521, "step": 98070 }, { "epoch": 0.37914985078319496, "grad_norm": 0.11652061343193054, "learning_rate": 0.002, "loss": 2.3636, "step": 98080 }, { "epoch": 0.3791885079865782, "grad_norm": 0.10054761916399002, "learning_rate": 0.002, "loss": 2.3591, "step": 98090 }, { "epoch": 0.3792271651899615, "grad_norm": 0.10746383666992188, "learning_rate": 0.002, "loss": 2.3663, "step": 98100 }, { "epoch": 0.3792658223933448, "grad_norm": 0.12173549830913544, "learning_rate": 0.002, "loss": 2.3517, "step": 98110 }, { "epoch": 0.37930447959672803, "grad_norm": 0.11230204999446869, "learning_rate": 0.002, "loss": 2.359, "step": 98120 }, { "epoch": 0.37934313680011134, "grad_norm": 0.09881207346916199, "learning_rate": 0.002, "loss": 2.3403, "step": 98130 }, { "epoch": 0.3793817940034946, "grad_norm": 1.9445171356201172, "learning_rate": 0.002, "loss": 2.354, "step": 98140 }, { "epoch": 0.3794204512068779, "grad_norm": 0.11113037914037704, "learning_rate": 0.002, "loss": 2.3644, "step": 98150 }, { "epoch": 0.37945910841026115, "grad_norm": 0.11721598356962204, "learning_rate": 0.002, "loss": 2.3487, "step": 98160 }, { "epoch": 0.37949776561364446, "grad_norm": 0.1077088937163353, "learning_rate": 0.002, "loss": 2.3349, "step": 98170 }, { "epoch": 0.3795364228170277, "grad_norm": 0.10289087891578674, "learning_rate": 0.002, "loss": 2.3417, "step": 98180 }, { "epoch": 0.379575080020411, "grad_norm": 0.0997520238161087, "learning_rate": 0.002, "loss": 2.3554, "step": 98190 }, { "epoch": 0.37961373722379427, "grad_norm": 0.10366350412368774, "learning_rate": 0.002, "loss": 2.3613, "step": 98200 }, { "epoch": 0.3796523944271776, "grad_norm": 0.10660743713378906, "learning_rate": 0.002, "loss": 2.3437, "step": 98210 }, { "epoch": 0.3796910516305608, "grad_norm": 0.1255519837141037, "learning_rate": 0.002, "loss": 2.3435, "step": 98220 }, { "epoch": 0.37972970883394414, "grad_norm": 0.12160088866949081, "learning_rate": 0.002, "loss": 2.3397, "step": 98230 }, { "epoch": 0.3797683660373274, "grad_norm": 0.10024891793727875, "learning_rate": 0.002, "loss": 2.352, "step": 98240 }, { "epoch": 0.3798070232407107, "grad_norm": 0.11600322276353836, "learning_rate": 0.002, "loss": 2.349, "step": 98250 }, { "epoch": 0.37984568044409395, "grad_norm": 0.09131622314453125, "learning_rate": 0.002, "loss": 2.3384, "step": 98260 }, { "epoch": 0.37988433764747725, "grad_norm": 0.08803276717662811, "learning_rate": 0.002, "loss": 2.3514, "step": 98270 }, { "epoch": 0.3799229948508605, "grad_norm": 0.0936739444732666, "learning_rate": 0.002, "loss": 2.3647, "step": 98280 }, { "epoch": 0.3799616520542438, "grad_norm": 0.10160307586193085, "learning_rate": 0.002, "loss": 2.3574, "step": 98290 }, { "epoch": 0.38000030925762707, "grad_norm": 0.11466259509325027, "learning_rate": 0.002, "loss": 2.3501, "step": 98300 }, { "epoch": 0.3800389664610103, "grad_norm": 0.0952131450176239, "learning_rate": 0.002, "loss": 2.369, "step": 98310 }, { "epoch": 0.3800776236643936, "grad_norm": 0.10741259157657623, "learning_rate": 0.002, "loss": 2.3453, "step": 98320 }, { "epoch": 0.3801162808677769, "grad_norm": 0.09484761208295822, "learning_rate": 0.002, "loss": 2.3529, "step": 98330 }, { "epoch": 0.3801549380711602, "grad_norm": 0.1288508027791977, "learning_rate": 0.002, "loss": 2.351, "step": 98340 }, { "epoch": 0.38019359527454344, "grad_norm": 0.09343413263559341, "learning_rate": 0.002, "loss": 2.3547, "step": 98350 }, { "epoch": 0.38023225247792675, "grad_norm": 0.10034056752920151, "learning_rate": 0.002, "loss": 2.342, "step": 98360 }, { "epoch": 0.38027090968131, "grad_norm": 0.10348794609308243, "learning_rate": 0.002, "loss": 2.366, "step": 98370 }, { "epoch": 0.3803095668846933, "grad_norm": 0.09628686308860779, "learning_rate": 0.002, "loss": 2.3542, "step": 98380 }, { "epoch": 0.38034822408807656, "grad_norm": 0.1153346374630928, "learning_rate": 0.002, "loss": 2.3458, "step": 98390 }, { "epoch": 0.38038688129145987, "grad_norm": 0.1036255732178688, "learning_rate": 0.002, "loss": 2.3361, "step": 98400 }, { "epoch": 0.3804255384948431, "grad_norm": 0.12434010207653046, "learning_rate": 0.002, "loss": 2.3445, "step": 98410 }, { "epoch": 0.3804641956982264, "grad_norm": 0.09980366379022598, "learning_rate": 0.002, "loss": 2.3456, "step": 98420 }, { "epoch": 0.3805028529016097, "grad_norm": 0.09620928019285202, "learning_rate": 0.002, "loss": 2.3442, "step": 98430 }, { "epoch": 0.380541510104993, "grad_norm": 0.11719270050525665, "learning_rate": 0.002, "loss": 2.3444, "step": 98440 }, { "epoch": 0.38058016730837624, "grad_norm": 0.0989764928817749, "learning_rate": 0.002, "loss": 2.3436, "step": 98450 }, { "epoch": 0.38061882451175955, "grad_norm": 0.11647021770477295, "learning_rate": 0.002, "loss": 2.3452, "step": 98460 }, { "epoch": 0.3806574817151428, "grad_norm": 0.11862388253211975, "learning_rate": 0.002, "loss": 2.3412, "step": 98470 }, { "epoch": 0.3806961389185261, "grad_norm": 0.09813261777162552, "learning_rate": 0.002, "loss": 2.3605, "step": 98480 }, { "epoch": 0.38073479612190936, "grad_norm": 0.10625061392784119, "learning_rate": 0.002, "loss": 2.3605, "step": 98490 }, { "epoch": 0.3807734533252926, "grad_norm": 0.11874415725469589, "learning_rate": 0.002, "loss": 2.3392, "step": 98500 }, { "epoch": 0.3808121105286759, "grad_norm": 0.12056658416986465, "learning_rate": 0.002, "loss": 2.3444, "step": 98510 }, { "epoch": 0.38085076773205917, "grad_norm": 0.10916418582201004, "learning_rate": 0.002, "loss": 2.3609, "step": 98520 }, { "epoch": 0.3808894249354425, "grad_norm": 0.13108888268470764, "learning_rate": 0.002, "loss": 2.353, "step": 98530 }, { "epoch": 0.38092808213882573, "grad_norm": 0.10709444433450699, "learning_rate": 0.002, "loss": 2.3561, "step": 98540 }, { "epoch": 0.38096673934220904, "grad_norm": 0.09459146857261658, "learning_rate": 0.002, "loss": 2.3544, "step": 98550 }, { "epoch": 0.3810053965455923, "grad_norm": 0.10232074558734894, "learning_rate": 0.002, "loss": 2.3479, "step": 98560 }, { "epoch": 0.3810440537489756, "grad_norm": 0.12341094017028809, "learning_rate": 0.002, "loss": 2.3538, "step": 98570 }, { "epoch": 0.38108271095235885, "grad_norm": 0.10351862013339996, "learning_rate": 0.002, "loss": 2.3462, "step": 98580 }, { "epoch": 0.38112136815574216, "grad_norm": 0.12595586478710175, "learning_rate": 0.002, "loss": 2.352, "step": 98590 }, { "epoch": 0.3811600253591254, "grad_norm": 0.08833569288253784, "learning_rate": 0.002, "loss": 2.3651, "step": 98600 }, { "epoch": 0.3811986825625087, "grad_norm": 0.09319626539945602, "learning_rate": 0.002, "loss": 2.3598, "step": 98610 }, { "epoch": 0.38123733976589197, "grad_norm": 0.1339443475008011, "learning_rate": 0.002, "loss": 2.3705, "step": 98620 }, { "epoch": 0.3812759969692753, "grad_norm": 0.12461947649717331, "learning_rate": 0.002, "loss": 2.342, "step": 98630 }, { "epoch": 0.38131465417265853, "grad_norm": 0.10771634429693222, "learning_rate": 0.002, "loss": 2.3391, "step": 98640 }, { "epoch": 0.38135331137604184, "grad_norm": 0.1092999279499054, "learning_rate": 0.002, "loss": 2.3428, "step": 98650 }, { "epoch": 0.3813919685794251, "grad_norm": 0.10708318650722504, "learning_rate": 0.002, "loss": 2.3493, "step": 98660 }, { "epoch": 0.38143062578280834, "grad_norm": 0.11872132867574692, "learning_rate": 0.002, "loss": 2.3525, "step": 98670 }, { "epoch": 0.38146928298619165, "grad_norm": 0.10649701207876205, "learning_rate": 0.002, "loss": 2.3624, "step": 98680 }, { "epoch": 0.3815079401895749, "grad_norm": 0.0995868369936943, "learning_rate": 0.002, "loss": 2.3612, "step": 98690 }, { "epoch": 0.3815465973929582, "grad_norm": 0.10507569462060928, "learning_rate": 0.002, "loss": 2.3568, "step": 98700 }, { "epoch": 0.38158525459634146, "grad_norm": 0.0972597673535347, "learning_rate": 0.002, "loss": 2.3476, "step": 98710 }, { "epoch": 0.38162391179972477, "grad_norm": 0.47026678919792175, "learning_rate": 0.002, "loss": 2.3579, "step": 98720 }, { "epoch": 0.381662569003108, "grad_norm": 0.10154223442077637, "learning_rate": 0.002, "loss": 2.3655, "step": 98730 }, { "epoch": 0.38170122620649133, "grad_norm": 0.10038376599550247, "learning_rate": 0.002, "loss": 2.3575, "step": 98740 }, { "epoch": 0.3817398834098746, "grad_norm": 0.11687665432691574, "learning_rate": 0.002, "loss": 2.351, "step": 98750 }, { "epoch": 0.3817785406132579, "grad_norm": 0.09308470785617828, "learning_rate": 0.002, "loss": 2.3534, "step": 98760 }, { "epoch": 0.38181719781664114, "grad_norm": 0.11552231013774872, "learning_rate": 0.002, "loss": 2.3443, "step": 98770 }, { "epoch": 0.38185585502002445, "grad_norm": 0.13166840374469757, "learning_rate": 0.002, "loss": 2.3241, "step": 98780 }, { "epoch": 0.3818945122234077, "grad_norm": 0.13677500188350677, "learning_rate": 0.002, "loss": 2.3516, "step": 98790 }, { "epoch": 0.381933169426791, "grad_norm": 0.13627128303050995, "learning_rate": 0.002, "loss": 2.3494, "step": 98800 }, { "epoch": 0.38197182663017426, "grad_norm": 0.09993304312229156, "learning_rate": 0.002, "loss": 2.358, "step": 98810 }, { "epoch": 0.38201048383355757, "grad_norm": 0.10988759994506836, "learning_rate": 0.002, "loss": 2.352, "step": 98820 }, { "epoch": 0.3820491410369408, "grad_norm": 0.11517474800348282, "learning_rate": 0.002, "loss": 2.3379, "step": 98830 }, { "epoch": 0.38208779824032413, "grad_norm": 0.10560702532529831, "learning_rate": 0.002, "loss": 2.3492, "step": 98840 }, { "epoch": 0.3821264554437074, "grad_norm": 0.11196266114711761, "learning_rate": 0.002, "loss": 2.3409, "step": 98850 }, { "epoch": 0.38216511264709063, "grad_norm": 0.10116425156593323, "learning_rate": 0.002, "loss": 2.3617, "step": 98860 }, { "epoch": 0.38220376985047394, "grad_norm": 0.11830340325832367, "learning_rate": 0.002, "loss": 2.3666, "step": 98870 }, { "epoch": 0.3822424270538572, "grad_norm": 0.11167599260807037, "learning_rate": 0.002, "loss": 2.3457, "step": 98880 }, { "epoch": 0.3822810842572405, "grad_norm": 0.11318913847208023, "learning_rate": 0.002, "loss": 2.3534, "step": 98890 }, { "epoch": 0.38231974146062375, "grad_norm": 0.12292078137397766, "learning_rate": 0.002, "loss": 2.3667, "step": 98900 }, { "epoch": 0.38235839866400706, "grad_norm": 0.11362916976213455, "learning_rate": 0.002, "loss": 2.3601, "step": 98910 }, { "epoch": 0.3823970558673903, "grad_norm": 0.13096804916858673, "learning_rate": 0.002, "loss": 2.3631, "step": 98920 }, { "epoch": 0.3824357130707736, "grad_norm": 0.1046454980969429, "learning_rate": 0.002, "loss": 2.3495, "step": 98930 }, { "epoch": 0.38247437027415687, "grad_norm": 0.11374887824058533, "learning_rate": 0.002, "loss": 2.3336, "step": 98940 }, { "epoch": 0.3825130274775402, "grad_norm": 0.1099177896976471, "learning_rate": 0.002, "loss": 2.3395, "step": 98950 }, { "epoch": 0.38255168468092343, "grad_norm": 0.10790438205003738, "learning_rate": 0.002, "loss": 2.3443, "step": 98960 }, { "epoch": 0.38259034188430674, "grad_norm": 0.2521016001701355, "learning_rate": 0.002, "loss": 2.3604, "step": 98970 }, { "epoch": 0.38262899908769, "grad_norm": 0.09931904077529907, "learning_rate": 0.002, "loss": 2.3424, "step": 98980 }, { "epoch": 0.3826676562910733, "grad_norm": 0.11894132196903229, "learning_rate": 0.002, "loss": 2.3424, "step": 98990 }, { "epoch": 0.38270631349445655, "grad_norm": 0.12821850180625916, "learning_rate": 0.002, "loss": 2.3543, "step": 99000 }, { "epoch": 0.38274497069783986, "grad_norm": 0.10607095062732697, "learning_rate": 0.002, "loss": 2.3441, "step": 99010 }, { "epoch": 0.3827836279012231, "grad_norm": 0.09565222263336182, "learning_rate": 0.002, "loss": 2.3441, "step": 99020 }, { "epoch": 0.3828222851046064, "grad_norm": 0.11812412738800049, "learning_rate": 0.002, "loss": 2.3651, "step": 99030 }, { "epoch": 0.38286094230798967, "grad_norm": 0.12434925884008408, "learning_rate": 0.002, "loss": 2.3536, "step": 99040 }, { "epoch": 0.3828995995113729, "grad_norm": 0.09686396270990372, "learning_rate": 0.002, "loss": 2.3606, "step": 99050 }, { "epoch": 0.38293825671475623, "grad_norm": 0.1269039660692215, "learning_rate": 0.002, "loss": 2.3439, "step": 99060 }, { "epoch": 0.3829769139181395, "grad_norm": 0.09965300559997559, "learning_rate": 0.002, "loss": 2.3529, "step": 99070 }, { "epoch": 0.3830155711215228, "grad_norm": 0.09235601127147675, "learning_rate": 0.002, "loss": 2.3576, "step": 99080 }, { "epoch": 0.38305422832490604, "grad_norm": 0.10772426426410675, "learning_rate": 0.002, "loss": 2.3319, "step": 99090 }, { "epoch": 0.38309288552828935, "grad_norm": 0.1444188356399536, "learning_rate": 0.002, "loss": 2.3436, "step": 99100 }, { "epoch": 0.3831315427316726, "grad_norm": 0.11507777869701385, "learning_rate": 0.002, "loss": 2.3402, "step": 99110 }, { "epoch": 0.3831701999350559, "grad_norm": 0.11316626518964767, "learning_rate": 0.002, "loss": 2.3571, "step": 99120 }, { "epoch": 0.38320885713843916, "grad_norm": 0.11571101099252701, "learning_rate": 0.002, "loss": 2.3585, "step": 99130 }, { "epoch": 0.38324751434182247, "grad_norm": 0.0992959663271904, "learning_rate": 0.002, "loss": 2.3627, "step": 99140 }, { "epoch": 0.3832861715452057, "grad_norm": 0.12370272725820541, "learning_rate": 0.002, "loss": 2.3487, "step": 99150 }, { "epoch": 0.38332482874858903, "grad_norm": 0.0909237340092659, "learning_rate": 0.002, "loss": 2.3398, "step": 99160 }, { "epoch": 0.3833634859519723, "grad_norm": 0.13060149550437927, "learning_rate": 0.002, "loss": 2.359, "step": 99170 }, { "epoch": 0.3834021431553556, "grad_norm": 0.1097990944981575, "learning_rate": 0.002, "loss": 2.3511, "step": 99180 }, { "epoch": 0.38344080035873884, "grad_norm": 0.12019643187522888, "learning_rate": 0.002, "loss": 2.3563, "step": 99190 }, { "epoch": 0.38347945756212215, "grad_norm": 0.10726134479045868, "learning_rate": 0.002, "loss": 2.3526, "step": 99200 }, { "epoch": 0.3835181147655054, "grad_norm": 0.11693156510591507, "learning_rate": 0.002, "loss": 2.37, "step": 99210 }, { "epoch": 0.3835567719688887, "grad_norm": 0.10005655139684677, "learning_rate": 0.002, "loss": 2.3648, "step": 99220 }, { "epoch": 0.38359542917227196, "grad_norm": 0.10546668618917465, "learning_rate": 0.002, "loss": 2.3511, "step": 99230 }, { "epoch": 0.3836340863756552, "grad_norm": 0.11036413162946701, "learning_rate": 0.002, "loss": 2.3456, "step": 99240 }, { "epoch": 0.3836727435790385, "grad_norm": 0.09516773372888565, "learning_rate": 0.002, "loss": 2.349, "step": 99250 }, { "epoch": 0.3837114007824218, "grad_norm": 0.10512733459472656, "learning_rate": 0.002, "loss": 2.3508, "step": 99260 }, { "epoch": 0.3837500579858051, "grad_norm": 0.1009531170129776, "learning_rate": 0.002, "loss": 2.3521, "step": 99270 }, { "epoch": 0.38378871518918833, "grad_norm": 0.29204094409942627, "learning_rate": 0.002, "loss": 2.3476, "step": 99280 }, { "epoch": 0.38382737239257164, "grad_norm": 0.1206972673535347, "learning_rate": 0.002, "loss": 2.36, "step": 99290 }, { "epoch": 0.3838660295959549, "grad_norm": 0.09276767075061798, "learning_rate": 0.002, "loss": 2.3647, "step": 99300 }, { "epoch": 0.3839046867993382, "grad_norm": 0.09996117651462555, "learning_rate": 0.002, "loss": 2.3559, "step": 99310 }, { "epoch": 0.38394334400272145, "grad_norm": 0.09887313842773438, "learning_rate": 0.002, "loss": 2.3615, "step": 99320 }, { "epoch": 0.38398200120610476, "grad_norm": 0.10435585677623749, "learning_rate": 0.002, "loss": 2.3481, "step": 99330 }, { "epoch": 0.384020658409488, "grad_norm": 0.10483123362064362, "learning_rate": 0.002, "loss": 2.3395, "step": 99340 }, { "epoch": 0.3840593156128713, "grad_norm": 0.10192303359508514, "learning_rate": 0.002, "loss": 2.3609, "step": 99350 }, { "epoch": 0.3840979728162546, "grad_norm": 0.1300593763589859, "learning_rate": 0.002, "loss": 2.3706, "step": 99360 }, { "epoch": 0.3841366300196379, "grad_norm": 0.10089082270860672, "learning_rate": 0.002, "loss": 2.3627, "step": 99370 }, { "epoch": 0.38417528722302113, "grad_norm": 0.11565294116735458, "learning_rate": 0.002, "loss": 2.3452, "step": 99380 }, { "epoch": 0.38421394442640444, "grad_norm": 0.10272479802370071, "learning_rate": 0.002, "loss": 2.3495, "step": 99390 }, { "epoch": 0.3842526016297877, "grad_norm": 0.12943372130393982, "learning_rate": 0.002, "loss": 2.3626, "step": 99400 }, { "epoch": 0.38429125883317095, "grad_norm": 0.11217237263917923, "learning_rate": 0.002, "loss": 2.3465, "step": 99410 }, { "epoch": 0.38432991603655425, "grad_norm": 0.12317653745412827, "learning_rate": 0.002, "loss": 2.3647, "step": 99420 }, { "epoch": 0.3843685732399375, "grad_norm": 0.10992909222841263, "learning_rate": 0.002, "loss": 2.365, "step": 99430 }, { "epoch": 0.3844072304433208, "grad_norm": 0.11047405749559402, "learning_rate": 0.002, "loss": 2.3682, "step": 99440 }, { "epoch": 0.38444588764670407, "grad_norm": 0.10105373710393906, "learning_rate": 0.002, "loss": 2.353, "step": 99450 }, { "epoch": 0.3844845448500874, "grad_norm": 0.12573686242103577, "learning_rate": 0.002, "loss": 2.3561, "step": 99460 }, { "epoch": 0.3845232020534706, "grad_norm": 0.12352735549211502, "learning_rate": 0.002, "loss": 2.3446, "step": 99470 }, { "epoch": 0.38456185925685393, "grad_norm": 0.1070810928940773, "learning_rate": 0.002, "loss": 2.3613, "step": 99480 }, { "epoch": 0.3846005164602372, "grad_norm": 0.10397505760192871, "learning_rate": 0.002, "loss": 2.3495, "step": 99490 }, { "epoch": 0.3846391736636205, "grad_norm": 0.1048097237944603, "learning_rate": 0.002, "loss": 2.375, "step": 99500 }, { "epoch": 0.38467783086700374, "grad_norm": 0.10362658649682999, "learning_rate": 0.002, "loss": 2.3469, "step": 99510 }, { "epoch": 0.38471648807038705, "grad_norm": 0.1036435216665268, "learning_rate": 0.002, "loss": 2.3525, "step": 99520 }, { "epoch": 0.3847551452737703, "grad_norm": 0.11805278807878494, "learning_rate": 0.002, "loss": 2.3403, "step": 99530 }, { "epoch": 0.3847938024771536, "grad_norm": 0.11448401212692261, "learning_rate": 0.002, "loss": 2.34, "step": 99540 }, { "epoch": 0.38483245968053686, "grad_norm": 0.10449942946434021, "learning_rate": 0.002, "loss": 2.3523, "step": 99550 }, { "epoch": 0.3848711168839202, "grad_norm": 0.08898783475160599, "learning_rate": 0.002, "loss": 2.3542, "step": 99560 }, { "epoch": 0.3849097740873034, "grad_norm": 0.12039284408092499, "learning_rate": 0.002, "loss": 2.3386, "step": 99570 }, { "epoch": 0.38494843129068673, "grad_norm": 0.09976094961166382, "learning_rate": 0.002, "loss": 2.339, "step": 99580 }, { "epoch": 0.38498708849407, "grad_norm": 0.10564619302749634, "learning_rate": 0.002, "loss": 2.3476, "step": 99590 }, { "epoch": 0.38502574569745324, "grad_norm": 0.10846693813800812, "learning_rate": 0.002, "loss": 2.358, "step": 99600 }, { "epoch": 0.38506440290083654, "grad_norm": 0.10320775210857391, "learning_rate": 0.002, "loss": 2.3449, "step": 99610 }, { "epoch": 0.3851030601042198, "grad_norm": 0.09724020957946777, "learning_rate": 0.002, "loss": 2.3413, "step": 99620 }, { "epoch": 0.3851417173076031, "grad_norm": 0.1234230324625969, "learning_rate": 0.002, "loss": 2.3598, "step": 99630 }, { "epoch": 0.38518037451098636, "grad_norm": 0.10737951844930649, "learning_rate": 0.002, "loss": 2.3435, "step": 99640 }, { "epoch": 0.38521903171436966, "grad_norm": 0.10597635805606842, "learning_rate": 0.002, "loss": 2.3516, "step": 99650 }, { "epoch": 0.3852576889177529, "grad_norm": 0.10188396275043488, "learning_rate": 0.002, "loss": 2.3468, "step": 99660 }, { "epoch": 0.3852963461211362, "grad_norm": 0.10658920556306839, "learning_rate": 0.002, "loss": 2.36, "step": 99670 }, { "epoch": 0.3853350033245195, "grad_norm": 0.11044390499591827, "learning_rate": 0.002, "loss": 2.3544, "step": 99680 }, { "epoch": 0.3853736605279028, "grad_norm": 0.09951883554458618, "learning_rate": 0.002, "loss": 2.3418, "step": 99690 }, { "epoch": 0.38541231773128604, "grad_norm": 0.1322626769542694, "learning_rate": 0.002, "loss": 2.3566, "step": 99700 }, { "epoch": 0.38545097493466934, "grad_norm": 0.10381346940994263, "learning_rate": 0.002, "loss": 2.3437, "step": 99710 }, { "epoch": 0.3854896321380526, "grad_norm": 0.09962229430675507, "learning_rate": 0.002, "loss": 2.3455, "step": 99720 }, { "epoch": 0.3855282893414359, "grad_norm": 0.1005672961473465, "learning_rate": 0.002, "loss": 2.3502, "step": 99730 }, { "epoch": 0.38556694654481916, "grad_norm": 0.11470509320497513, "learning_rate": 0.002, "loss": 2.3409, "step": 99740 }, { "epoch": 0.38560560374820246, "grad_norm": 0.09590152651071548, "learning_rate": 0.002, "loss": 2.3534, "step": 99750 }, { "epoch": 0.3856442609515857, "grad_norm": 0.12328245490789413, "learning_rate": 0.002, "loss": 2.3471, "step": 99760 }, { "epoch": 0.385682918154969, "grad_norm": 0.10069391131401062, "learning_rate": 0.002, "loss": 2.3697, "step": 99770 }, { "epoch": 0.3857215753583523, "grad_norm": 0.11742331087589264, "learning_rate": 0.002, "loss": 2.3389, "step": 99780 }, { "epoch": 0.3857602325617355, "grad_norm": 0.10783084481954575, "learning_rate": 0.002, "loss": 2.3516, "step": 99790 }, { "epoch": 0.38579888976511884, "grad_norm": 0.10142330825328827, "learning_rate": 0.002, "loss": 2.3556, "step": 99800 }, { "epoch": 0.3858375469685021, "grad_norm": 0.10665824264287949, "learning_rate": 0.002, "loss": 2.3471, "step": 99810 }, { "epoch": 0.3858762041718854, "grad_norm": 0.1259702444076538, "learning_rate": 0.002, "loss": 2.3443, "step": 99820 }, { "epoch": 0.38591486137526865, "grad_norm": 0.08524170517921448, "learning_rate": 0.002, "loss": 2.3453, "step": 99830 }, { "epoch": 0.38595351857865196, "grad_norm": 0.10294031351804733, "learning_rate": 0.002, "loss": 2.3562, "step": 99840 }, { "epoch": 0.3859921757820352, "grad_norm": 0.091279037296772, "learning_rate": 0.002, "loss": 2.3615, "step": 99850 }, { "epoch": 0.3860308329854185, "grad_norm": 0.13400551676750183, "learning_rate": 0.002, "loss": 2.3447, "step": 99860 }, { "epoch": 0.38606949018880177, "grad_norm": 0.11633538454771042, "learning_rate": 0.002, "loss": 2.341, "step": 99870 }, { "epoch": 0.3861081473921851, "grad_norm": 0.09606362134218216, "learning_rate": 0.002, "loss": 2.3507, "step": 99880 }, { "epoch": 0.3861468045955683, "grad_norm": 0.1037554070353508, "learning_rate": 0.002, "loss": 2.3339, "step": 99890 }, { "epoch": 0.38618546179895163, "grad_norm": 0.10212548822164536, "learning_rate": 0.002, "loss": 2.338, "step": 99900 }, { "epoch": 0.3862241190023349, "grad_norm": 0.10313539206981659, "learning_rate": 0.002, "loss": 2.3452, "step": 99910 }, { "epoch": 0.3862627762057182, "grad_norm": 0.12302293628454208, "learning_rate": 0.002, "loss": 2.3687, "step": 99920 }, { "epoch": 0.38630143340910145, "grad_norm": 0.12717850506305695, "learning_rate": 0.002, "loss": 2.3598, "step": 99930 }, { "epoch": 0.38634009061248475, "grad_norm": 0.11817540228366852, "learning_rate": 0.002, "loss": 2.3537, "step": 99940 }, { "epoch": 0.386378747815868, "grad_norm": 0.1065082773566246, "learning_rate": 0.002, "loss": 2.3583, "step": 99950 }, { "epoch": 0.3864174050192513, "grad_norm": 0.11863639950752258, "learning_rate": 0.002, "loss": 2.3699, "step": 99960 }, { "epoch": 0.38645606222263457, "grad_norm": 0.10106471925973892, "learning_rate": 0.002, "loss": 2.3521, "step": 99970 }, { "epoch": 0.3864947194260178, "grad_norm": 0.10772629082202911, "learning_rate": 0.002, "loss": 2.357, "step": 99980 }, { "epoch": 0.3865333766294011, "grad_norm": 0.11508717387914658, "learning_rate": 0.002, "loss": 2.3613, "step": 99990 }, { "epoch": 0.3865720338327844, "grad_norm": 0.12177982926368713, "learning_rate": 0.002, "loss": 2.3544, "step": 100000 }, { "epoch": 0.3866106910361677, "grad_norm": 0.10621000081300735, "learning_rate": 0.002, "loss": 2.355, "step": 100010 }, { "epoch": 0.38664934823955094, "grad_norm": 0.09023228287696838, "learning_rate": 0.002, "loss": 2.3398, "step": 100020 }, { "epoch": 0.38668800544293425, "grad_norm": 0.13165788352489471, "learning_rate": 0.002, "loss": 2.353, "step": 100030 }, { "epoch": 0.3867266626463175, "grad_norm": 0.10812313854694366, "learning_rate": 0.002, "loss": 2.3567, "step": 100040 }, { "epoch": 0.3867653198497008, "grad_norm": 0.1083117350935936, "learning_rate": 0.002, "loss": 2.3651, "step": 100050 }, { "epoch": 0.38680397705308406, "grad_norm": 0.12282432615756989, "learning_rate": 0.002, "loss": 2.3435, "step": 100060 }, { "epoch": 0.38684263425646737, "grad_norm": 0.10567369312047958, "learning_rate": 0.002, "loss": 2.3682, "step": 100070 }, { "epoch": 0.3868812914598506, "grad_norm": 0.13557782769203186, "learning_rate": 0.002, "loss": 2.3436, "step": 100080 }, { "epoch": 0.3869199486632339, "grad_norm": 0.0997626781463623, "learning_rate": 0.002, "loss": 2.3415, "step": 100090 }, { "epoch": 0.3869586058666172, "grad_norm": 0.11340288817882538, "learning_rate": 0.002, "loss": 2.3433, "step": 100100 }, { "epoch": 0.3869972630700005, "grad_norm": 0.11692352592945099, "learning_rate": 0.002, "loss": 2.365, "step": 100110 }, { "epoch": 0.38703592027338374, "grad_norm": 0.10254278779029846, "learning_rate": 0.002, "loss": 2.349, "step": 100120 }, { "epoch": 0.38707457747676705, "grad_norm": 0.1118793785572052, "learning_rate": 0.002, "loss": 2.3528, "step": 100130 }, { "epoch": 0.3871132346801503, "grad_norm": 0.11556681990623474, "learning_rate": 0.002, "loss": 2.3475, "step": 100140 }, { "epoch": 0.3871518918835336, "grad_norm": 0.12200041115283966, "learning_rate": 0.002, "loss": 2.3454, "step": 100150 }, { "epoch": 0.38719054908691686, "grad_norm": 0.125214621424675, "learning_rate": 0.002, "loss": 2.3604, "step": 100160 }, { "epoch": 0.3872292062903001, "grad_norm": 0.10922552645206451, "learning_rate": 0.002, "loss": 2.3383, "step": 100170 }, { "epoch": 0.3872678634936834, "grad_norm": 0.09976062923669815, "learning_rate": 0.002, "loss": 2.358, "step": 100180 }, { "epoch": 0.38730652069706667, "grad_norm": 0.10824978351593018, "learning_rate": 0.002, "loss": 2.3426, "step": 100190 }, { "epoch": 0.38734517790045, "grad_norm": 0.09569326043128967, "learning_rate": 0.002, "loss": 2.3456, "step": 100200 }, { "epoch": 0.38738383510383323, "grad_norm": 0.10602735728025436, "learning_rate": 0.002, "loss": 2.3337, "step": 100210 }, { "epoch": 0.38742249230721654, "grad_norm": 0.09490755200386047, "learning_rate": 0.002, "loss": 2.344, "step": 100220 }, { "epoch": 0.3874611495105998, "grad_norm": 0.100970558822155, "learning_rate": 0.002, "loss": 2.3477, "step": 100230 }, { "epoch": 0.3874998067139831, "grad_norm": 0.10085583478212357, "learning_rate": 0.002, "loss": 2.3495, "step": 100240 }, { "epoch": 0.38753846391736635, "grad_norm": 0.09461560845375061, "learning_rate": 0.002, "loss": 2.3436, "step": 100250 }, { "epoch": 0.38757712112074966, "grad_norm": 0.12680354714393616, "learning_rate": 0.002, "loss": 2.3358, "step": 100260 }, { "epoch": 0.3876157783241329, "grad_norm": 0.11424418538808823, "learning_rate": 0.002, "loss": 2.3644, "step": 100270 }, { "epoch": 0.3876544355275162, "grad_norm": 0.1029893308877945, "learning_rate": 0.002, "loss": 2.3513, "step": 100280 }, { "epoch": 0.38769309273089947, "grad_norm": 0.11332143098115921, "learning_rate": 0.002, "loss": 2.3487, "step": 100290 }, { "epoch": 0.3877317499342828, "grad_norm": 0.11435232311487198, "learning_rate": 0.002, "loss": 2.3515, "step": 100300 }, { "epoch": 0.38777040713766603, "grad_norm": 0.09768123924732208, "learning_rate": 0.002, "loss": 2.357, "step": 100310 }, { "epoch": 0.38780906434104934, "grad_norm": 0.11493542790412903, "learning_rate": 0.002, "loss": 2.358, "step": 100320 }, { "epoch": 0.3878477215444326, "grad_norm": 0.09657100588083267, "learning_rate": 0.002, "loss": 2.3491, "step": 100330 }, { "epoch": 0.38788637874781584, "grad_norm": 0.11080711334943771, "learning_rate": 0.002, "loss": 2.3483, "step": 100340 }, { "epoch": 0.38792503595119915, "grad_norm": 0.12045042961835861, "learning_rate": 0.002, "loss": 2.3463, "step": 100350 }, { "epoch": 0.3879636931545824, "grad_norm": 0.12552835047245026, "learning_rate": 0.002, "loss": 2.3519, "step": 100360 }, { "epoch": 0.3880023503579657, "grad_norm": 0.10142803192138672, "learning_rate": 0.002, "loss": 2.3563, "step": 100370 }, { "epoch": 0.38804100756134896, "grad_norm": 0.09850650280714035, "learning_rate": 0.002, "loss": 2.3424, "step": 100380 }, { "epoch": 0.38807966476473227, "grad_norm": 0.09998472779989243, "learning_rate": 0.002, "loss": 2.3501, "step": 100390 }, { "epoch": 0.3881183219681155, "grad_norm": 0.11177127808332443, "learning_rate": 0.002, "loss": 2.3461, "step": 100400 }, { "epoch": 0.38815697917149883, "grad_norm": 0.11975325644016266, "learning_rate": 0.002, "loss": 2.3539, "step": 100410 }, { "epoch": 0.3881956363748821, "grad_norm": 0.10338979959487915, "learning_rate": 0.002, "loss": 2.3457, "step": 100420 }, { "epoch": 0.3882342935782654, "grad_norm": 0.10348150879144669, "learning_rate": 0.002, "loss": 2.3322, "step": 100430 }, { "epoch": 0.38827295078164864, "grad_norm": 0.09385969489812851, "learning_rate": 0.002, "loss": 2.3364, "step": 100440 }, { "epoch": 0.38831160798503195, "grad_norm": 0.11644785851240158, "learning_rate": 0.002, "loss": 2.3507, "step": 100450 }, { "epoch": 0.3883502651884152, "grad_norm": 0.09925386309623718, "learning_rate": 0.002, "loss": 2.3319, "step": 100460 }, { "epoch": 0.3883889223917985, "grad_norm": 0.1058383509516716, "learning_rate": 0.002, "loss": 2.3368, "step": 100470 }, { "epoch": 0.38842757959518176, "grad_norm": 0.11195088922977448, "learning_rate": 0.002, "loss": 2.3567, "step": 100480 }, { "epoch": 0.38846623679856507, "grad_norm": 0.11256535351276398, "learning_rate": 0.002, "loss": 2.3495, "step": 100490 }, { "epoch": 0.3885048940019483, "grad_norm": 0.1299486607313156, "learning_rate": 0.002, "loss": 2.3463, "step": 100500 }, { "epoch": 0.3885435512053316, "grad_norm": 0.10605520009994507, "learning_rate": 0.002, "loss": 2.357, "step": 100510 }, { "epoch": 0.3885822084087149, "grad_norm": 0.1053491160273552, "learning_rate": 0.002, "loss": 2.3392, "step": 100520 }, { "epoch": 0.38862086561209813, "grad_norm": 0.10584665834903717, "learning_rate": 0.002, "loss": 2.3673, "step": 100530 }, { "epoch": 0.38865952281548144, "grad_norm": 0.12226726114749908, "learning_rate": 0.002, "loss": 2.3576, "step": 100540 }, { "epoch": 0.3886981800188647, "grad_norm": 0.09885209053754807, "learning_rate": 0.002, "loss": 2.359, "step": 100550 }, { "epoch": 0.388736837222248, "grad_norm": 0.11455842852592468, "learning_rate": 0.002, "loss": 2.3347, "step": 100560 }, { "epoch": 0.38877549442563125, "grad_norm": 0.10637041181325912, "learning_rate": 0.002, "loss": 2.3457, "step": 100570 }, { "epoch": 0.38881415162901456, "grad_norm": 0.12398865073919296, "learning_rate": 0.002, "loss": 2.3565, "step": 100580 }, { "epoch": 0.3888528088323978, "grad_norm": 0.9685719013214111, "learning_rate": 0.002, "loss": 2.3601, "step": 100590 }, { "epoch": 0.3888914660357811, "grad_norm": 0.21529839932918549, "learning_rate": 0.002, "loss": 2.3701, "step": 100600 }, { "epoch": 0.38893012323916437, "grad_norm": 0.10525954514741898, "learning_rate": 0.002, "loss": 2.3658, "step": 100610 }, { "epoch": 0.3889687804425477, "grad_norm": 0.0974649041891098, "learning_rate": 0.002, "loss": 2.3566, "step": 100620 }, { "epoch": 0.38900743764593093, "grad_norm": 0.10220787674188614, "learning_rate": 0.002, "loss": 2.3444, "step": 100630 }, { "epoch": 0.38904609484931424, "grad_norm": 0.11424384266138077, "learning_rate": 0.002, "loss": 2.338, "step": 100640 }, { "epoch": 0.3890847520526975, "grad_norm": 0.09804636985063553, "learning_rate": 0.002, "loss": 2.3421, "step": 100650 }, { "epoch": 0.3891234092560808, "grad_norm": 0.10460495948791504, "learning_rate": 0.002, "loss": 2.3541, "step": 100660 }, { "epoch": 0.38916206645946405, "grad_norm": 0.09209802001714706, "learning_rate": 0.002, "loss": 2.3446, "step": 100670 }, { "epoch": 0.38920072366284736, "grad_norm": 0.1093793734908104, "learning_rate": 0.002, "loss": 2.3431, "step": 100680 }, { "epoch": 0.3892393808662306, "grad_norm": 0.1261608600616455, "learning_rate": 0.002, "loss": 2.363, "step": 100690 }, { "epoch": 0.3892780380696139, "grad_norm": 0.1179034411907196, "learning_rate": 0.002, "loss": 2.3649, "step": 100700 }, { "epoch": 0.38931669527299717, "grad_norm": 0.10968802869319916, "learning_rate": 0.002, "loss": 2.3619, "step": 100710 }, { "epoch": 0.3893553524763804, "grad_norm": 0.10325589776039124, "learning_rate": 0.002, "loss": 2.3519, "step": 100720 }, { "epoch": 0.38939400967976373, "grad_norm": 0.1065792515873909, "learning_rate": 0.002, "loss": 2.3419, "step": 100730 }, { "epoch": 0.389432666883147, "grad_norm": 0.12430709600448608, "learning_rate": 0.002, "loss": 2.3603, "step": 100740 }, { "epoch": 0.3894713240865303, "grad_norm": 0.12091759592294693, "learning_rate": 0.002, "loss": 2.3484, "step": 100750 }, { "epoch": 0.38950998128991354, "grad_norm": 0.10765664279460907, "learning_rate": 0.002, "loss": 2.3393, "step": 100760 }, { "epoch": 0.38954863849329685, "grad_norm": 0.09883479028940201, "learning_rate": 0.002, "loss": 2.3605, "step": 100770 }, { "epoch": 0.3895872956966801, "grad_norm": 0.12051030993461609, "learning_rate": 0.002, "loss": 2.342, "step": 100780 }, { "epoch": 0.3896259529000634, "grad_norm": 0.1364177167415619, "learning_rate": 0.002, "loss": 2.36, "step": 100790 }, { "epoch": 0.38966461010344666, "grad_norm": 0.11860163509845734, "learning_rate": 0.002, "loss": 2.3469, "step": 100800 }, { "epoch": 0.38970326730682997, "grad_norm": 0.09373585134744644, "learning_rate": 0.002, "loss": 2.3593, "step": 100810 }, { "epoch": 0.3897419245102132, "grad_norm": 0.10079554468393326, "learning_rate": 0.002, "loss": 2.3548, "step": 100820 }, { "epoch": 0.38978058171359653, "grad_norm": 0.1616877168416977, "learning_rate": 0.002, "loss": 2.3458, "step": 100830 }, { "epoch": 0.3898192389169798, "grad_norm": 0.11990071833133698, "learning_rate": 0.002, "loss": 2.3505, "step": 100840 }, { "epoch": 0.3898578961203631, "grad_norm": 0.1280861496925354, "learning_rate": 0.002, "loss": 2.3584, "step": 100850 }, { "epoch": 0.38989655332374634, "grad_norm": 0.10243765264749527, "learning_rate": 0.002, "loss": 2.3412, "step": 100860 }, { "epoch": 0.38993521052712965, "grad_norm": 0.10121989250183105, "learning_rate": 0.002, "loss": 2.3458, "step": 100870 }, { "epoch": 0.3899738677305129, "grad_norm": 0.12439978867769241, "learning_rate": 0.002, "loss": 2.3637, "step": 100880 }, { "epoch": 0.3900125249338962, "grad_norm": 0.10615507513284683, "learning_rate": 0.002, "loss": 2.3552, "step": 100890 }, { "epoch": 0.39005118213727946, "grad_norm": 0.10463973879814148, "learning_rate": 0.002, "loss": 2.3467, "step": 100900 }, { "epoch": 0.3900898393406627, "grad_norm": 0.13091835379600525, "learning_rate": 0.002, "loss": 2.373, "step": 100910 }, { "epoch": 0.390128496544046, "grad_norm": 0.11568377912044525, "learning_rate": 0.002, "loss": 2.3499, "step": 100920 }, { "epoch": 0.3901671537474293, "grad_norm": 0.11564431339502335, "learning_rate": 0.002, "loss": 2.3529, "step": 100930 }, { "epoch": 0.3902058109508126, "grad_norm": 0.11365380883216858, "learning_rate": 0.002, "loss": 2.3602, "step": 100940 }, { "epoch": 0.39024446815419583, "grad_norm": 0.11408404260873795, "learning_rate": 0.002, "loss": 2.3638, "step": 100950 }, { "epoch": 0.39028312535757914, "grad_norm": 0.10143405199050903, "learning_rate": 0.002, "loss": 2.3463, "step": 100960 }, { "epoch": 0.3903217825609624, "grad_norm": 0.11916525661945343, "learning_rate": 0.002, "loss": 2.3344, "step": 100970 }, { "epoch": 0.3903604397643457, "grad_norm": 0.11719028651714325, "learning_rate": 0.002, "loss": 2.3601, "step": 100980 }, { "epoch": 0.39039909696772895, "grad_norm": 0.1009218841791153, "learning_rate": 0.002, "loss": 2.3485, "step": 100990 }, { "epoch": 0.39043775417111226, "grad_norm": 0.17523200809955597, "learning_rate": 0.002, "loss": 2.3587, "step": 101000 }, { "epoch": 0.3904764113744955, "grad_norm": 0.11103260517120361, "learning_rate": 0.002, "loss": 2.3415, "step": 101010 }, { "epoch": 0.3905150685778788, "grad_norm": 0.09755750745534897, "learning_rate": 0.002, "loss": 2.3474, "step": 101020 }, { "epoch": 0.3905537257812621, "grad_norm": 0.09089493751525879, "learning_rate": 0.002, "loss": 2.3647, "step": 101030 }, { "epoch": 0.3905923829846454, "grad_norm": 0.09701880812644958, "learning_rate": 0.002, "loss": 2.3554, "step": 101040 }, { "epoch": 0.39063104018802863, "grad_norm": 0.10882467031478882, "learning_rate": 0.002, "loss": 2.339, "step": 101050 }, { "epoch": 0.39066969739141194, "grad_norm": 0.11161111295223236, "learning_rate": 0.002, "loss": 2.3589, "step": 101060 }, { "epoch": 0.3907083545947952, "grad_norm": 0.11580555140972137, "learning_rate": 0.002, "loss": 2.3537, "step": 101070 }, { "epoch": 0.39074701179817845, "grad_norm": 0.10042113810777664, "learning_rate": 0.002, "loss": 2.3564, "step": 101080 }, { "epoch": 0.39078566900156175, "grad_norm": 0.12678062915802002, "learning_rate": 0.002, "loss": 2.3567, "step": 101090 }, { "epoch": 0.390824326204945, "grad_norm": 0.09452252835035324, "learning_rate": 0.002, "loss": 2.3595, "step": 101100 }, { "epoch": 0.3908629834083283, "grad_norm": 0.10759731382131577, "learning_rate": 0.002, "loss": 2.3494, "step": 101110 }, { "epoch": 0.39090164061171156, "grad_norm": 0.11984322965145111, "learning_rate": 0.002, "loss": 2.3671, "step": 101120 }, { "epoch": 0.3909402978150949, "grad_norm": 0.11291025578975677, "learning_rate": 0.002, "loss": 2.3592, "step": 101130 }, { "epoch": 0.3909789550184781, "grad_norm": 0.1867348998785019, "learning_rate": 0.002, "loss": 2.3457, "step": 101140 }, { "epoch": 0.39101761222186143, "grad_norm": 0.10820949822664261, "learning_rate": 0.002, "loss": 2.3613, "step": 101150 }, { "epoch": 0.3910562694252447, "grad_norm": 0.11150885373353958, "learning_rate": 0.002, "loss": 2.3521, "step": 101160 }, { "epoch": 0.391094926628628, "grad_norm": 0.12619318068027496, "learning_rate": 0.002, "loss": 2.3607, "step": 101170 }, { "epoch": 0.39113358383201124, "grad_norm": 0.09870398789644241, "learning_rate": 0.002, "loss": 2.3581, "step": 101180 }, { "epoch": 0.39117224103539455, "grad_norm": 0.1280161589384079, "learning_rate": 0.002, "loss": 2.3513, "step": 101190 }, { "epoch": 0.3912108982387778, "grad_norm": 0.10799254477024078, "learning_rate": 0.002, "loss": 2.3613, "step": 101200 }, { "epoch": 0.3912495554421611, "grad_norm": 0.10901007801294327, "learning_rate": 0.002, "loss": 2.3712, "step": 101210 }, { "epoch": 0.39128821264554436, "grad_norm": 0.10274651646614075, "learning_rate": 0.002, "loss": 2.3452, "step": 101220 }, { "epoch": 0.39132686984892767, "grad_norm": 0.11202777922153473, "learning_rate": 0.002, "loss": 2.3497, "step": 101230 }, { "epoch": 0.3913655270523109, "grad_norm": 0.10203932970762253, "learning_rate": 0.002, "loss": 2.3467, "step": 101240 }, { "epoch": 0.39140418425569423, "grad_norm": 0.10965485125780106, "learning_rate": 0.002, "loss": 2.3605, "step": 101250 }, { "epoch": 0.3914428414590775, "grad_norm": 0.10451711714267731, "learning_rate": 0.002, "loss": 2.342, "step": 101260 }, { "epoch": 0.39148149866246074, "grad_norm": 0.10822159796953201, "learning_rate": 0.002, "loss": 2.3509, "step": 101270 }, { "epoch": 0.39152015586584404, "grad_norm": 0.10773058980703354, "learning_rate": 0.002, "loss": 2.3638, "step": 101280 }, { "epoch": 0.3915588130692273, "grad_norm": 0.11595533788204193, "learning_rate": 0.002, "loss": 2.3638, "step": 101290 }, { "epoch": 0.3915974702726106, "grad_norm": 0.0982164740562439, "learning_rate": 0.002, "loss": 2.3481, "step": 101300 }, { "epoch": 0.39163612747599386, "grad_norm": 0.11945968121290207, "learning_rate": 0.002, "loss": 2.3412, "step": 101310 }, { "epoch": 0.39167478467937716, "grad_norm": 0.09924999624490738, "learning_rate": 0.002, "loss": 2.3493, "step": 101320 }, { "epoch": 0.3917134418827604, "grad_norm": 0.13069695234298706, "learning_rate": 0.002, "loss": 2.3508, "step": 101330 }, { "epoch": 0.3917520990861437, "grad_norm": 0.10410667210817337, "learning_rate": 0.002, "loss": 2.3591, "step": 101340 }, { "epoch": 0.391790756289527, "grad_norm": 0.11061934381723404, "learning_rate": 0.002, "loss": 2.3555, "step": 101350 }, { "epoch": 0.3918294134929103, "grad_norm": 0.11253561824560165, "learning_rate": 0.002, "loss": 2.3545, "step": 101360 }, { "epoch": 0.39186807069629354, "grad_norm": 0.09618701040744781, "learning_rate": 0.002, "loss": 2.3468, "step": 101370 }, { "epoch": 0.39190672789967684, "grad_norm": 0.10598044097423553, "learning_rate": 0.002, "loss": 2.3497, "step": 101380 }, { "epoch": 0.3919453851030601, "grad_norm": 0.11163297295570374, "learning_rate": 0.002, "loss": 2.34, "step": 101390 }, { "epoch": 0.3919840423064434, "grad_norm": 0.1058531403541565, "learning_rate": 0.002, "loss": 2.3408, "step": 101400 }, { "epoch": 0.39202269950982666, "grad_norm": 0.10144970566034317, "learning_rate": 0.002, "loss": 2.3474, "step": 101410 }, { "epoch": 0.39206135671320996, "grad_norm": 0.1309688538312912, "learning_rate": 0.002, "loss": 2.3583, "step": 101420 }, { "epoch": 0.3921000139165932, "grad_norm": 0.0972873792052269, "learning_rate": 0.002, "loss": 2.3533, "step": 101430 }, { "epoch": 0.3921386711199765, "grad_norm": 0.11298844963312149, "learning_rate": 0.002, "loss": 2.3403, "step": 101440 }, { "epoch": 0.3921773283233598, "grad_norm": 0.09406055510044098, "learning_rate": 0.002, "loss": 2.3698, "step": 101450 }, { "epoch": 0.392215985526743, "grad_norm": 0.10956672579050064, "learning_rate": 0.002, "loss": 2.3538, "step": 101460 }, { "epoch": 0.39225464273012633, "grad_norm": 0.1047091856598854, "learning_rate": 0.002, "loss": 2.3302, "step": 101470 }, { "epoch": 0.3922932999335096, "grad_norm": 0.09906767308712006, "learning_rate": 0.002, "loss": 2.3663, "step": 101480 }, { "epoch": 0.3923319571368929, "grad_norm": 0.1101418063044548, "learning_rate": 0.002, "loss": 2.3578, "step": 101490 }, { "epoch": 0.39237061434027615, "grad_norm": 0.10937904566526413, "learning_rate": 0.002, "loss": 2.3506, "step": 101500 }, { "epoch": 0.39240927154365945, "grad_norm": 0.09774620085954666, "learning_rate": 0.002, "loss": 2.3523, "step": 101510 }, { "epoch": 0.3924479287470427, "grad_norm": 0.11182210594415665, "learning_rate": 0.002, "loss": 2.3593, "step": 101520 }, { "epoch": 0.392486585950426, "grad_norm": 0.11913948506116867, "learning_rate": 0.002, "loss": 2.3594, "step": 101530 }, { "epoch": 0.39252524315380927, "grad_norm": 0.1034182757139206, "learning_rate": 0.002, "loss": 2.3509, "step": 101540 }, { "epoch": 0.3925639003571926, "grad_norm": 0.11861073225736618, "learning_rate": 0.002, "loss": 2.3367, "step": 101550 }, { "epoch": 0.3926025575605758, "grad_norm": 0.10756992548704147, "learning_rate": 0.002, "loss": 2.344, "step": 101560 }, { "epoch": 0.39264121476395913, "grad_norm": 0.10961413383483887, "learning_rate": 0.002, "loss": 2.3586, "step": 101570 }, { "epoch": 0.3926798719673424, "grad_norm": 0.10385117679834366, "learning_rate": 0.002, "loss": 2.355, "step": 101580 }, { "epoch": 0.3927185291707257, "grad_norm": 0.10954749584197998, "learning_rate": 0.002, "loss": 2.3539, "step": 101590 }, { "epoch": 0.39275718637410895, "grad_norm": 0.10971171408891678, "learning_rate": 0.002, "loss": 2.3477, "step": 101600 }, { "epoch": 0.39279584357749225, "grad_norm": 0.10529763251543045, "learning_rate": 0.002, "loss": 2.3511, "step": 101610 }, { "epoch": 0.3928345007808755, "grad_norm": 0.10678326338529587, "learning_rate": 0.002, "loss": 2.3542, "step": 101620 }, { "epoch": 0.3928731579842588, "grad_norm": 0.1138482615351677, "learning_rate": 0.002, "loss": 2.3512, "step": 101630 }, { "epoch": 0.39291181518764207, "grad_norm": 0.10951939970254898, "learning_rate": 0.002, "loss": 2.3525, "step": 101640 }, { "epoch": 0.3929504723910253, "grad_norm": 0.10590513050556183, "learning_rate": 0.002, "loss": 2.3471, "step": 101650 }, { "epoch": 0.3929891295944086, "grad_norm": 0.10488978028297424, "learning_rate": 0.002, "loss": 2.357, "step": 101660 }, { "epoch": 0.3930277867977919, "grad_norm": 0.1191963478922844, "learning_rate": 0.002, "loss": 2.3513, "step": 101670 }, { "epoch": 0.3930664440011752, "grad_norm": 0.10693497955799103, "learning_rate": 0.002, "loss": 2.3418, "step": 101680 }, { "epoch": 0.39310510120455844, "grad_norm": 0.1340564340353012, "learning_rate": 0.002, "loss": 2.3491, "step": 101690 }, { "epoch": 0.39314375840794175, "grad_norm": 0.13421568274497986, "learning_rate": 0.002, "loss": 2.3455, "step": 101700 }, { "epoch": 0.393182415611325, "grad_norm": 0.10439509898424149, "learning_rate": 0.002, "loss": 2.3378, "step": 101710 }, { "epoch": 0.3932210728147083, "grad_norm": 0.1057930588722229, "learning_rate": 0.002, "loss": 2.3632, "step": 101720 }, { "epoch": 0.39325973001809156, "grad_norm": 0.11698611825704575, "learning_rate": 0.002, "loss": 2.3447, "step": 101730 }, { "epoch": 0.39329838722147487, "grad_norm": 0.12837250530719757, "learning_rate": 0.002, "loss": 2.3534, "step": 101740 }, { "epoch": 0.3933370444248581, "grad_norm": 0.09668096154928207, "learning_rate": 0.002, "loss": 2.3451, "step": 101750 }, { "epoch": 0.3933757016282414, "grad_norm": 0.1062922403216362, "learning_rate": 0.002, "loss": 2.356, "step": 101760 }, { "epoch": 0.3934143588316247, "grad_norm": 0.10767418146133423, "learning_rate": 0.002, "loss": 2.3467, "step": 101770 }, { "epoch": 0.393453016035008, "grad_norm": 0.1008111760020256, "learning_rate": 0.002, "loss": 2.3526, "step": 101780 }, { "epoch": 0.39349167323839124, "grad_norm": 0.1303732544183731, "learning_rate": 0.002, "loss": 2.3563, "step": 101790 }, { "epoch": 0.39353033044177455, "grad_norm": 0.10353969037532806, "learning_rate": 0.002, "loss": 2.3452, "step": 101800 }, { "epoch": 0.3935689876451578, "grad_norm": 0.0988667905330658, "learning_rate": 0.002, "loss": 2.3572, "step": 101810 }, { "epoch": 0.39360764484854105, "grad_norm": 0.10777206718921661, "learning_rate": 0.002, "loss": 2.3503, "step": 101820 }, { "epoch": 0.39364630205192436, "grad_norm": 0.14098067581653595, "learning_rate": 0.002, "loss": 2.3521, "step": 101830 }, { "epoch": 0.3936849592553076, "grad_norm": 0.10238955169916153, "learning_rate": 0.002, "loss": 2.3519, "step": 101840 }, { "epoch": 0.3937236164586909, "grad_norm": 0.1051107719540596, "learning_rate": 0.002, "loss": 2.3434, "step": 101850 }, { "epoch": 0.39376227366207417, "grad_norm": 0.1134108379483223, "learning_rate": 0.002, "loss": 2.3354, "step": 101860 }, { "epoch": 0.3938009308654575, "grad_norm": 0.11773721873760223, "learning_rate": 0.002, "loss": 2.3692, "step": 101870 }, { "epoch": 0.39383958806884073, "grad_norm": 0.11069432646036148, "learning_rate": 0.002, "loss": 2.3494, "step": 101880 }, { "epoch": 0.39387824527222404, "grad_norm": 0.10330471396446228, "learning_rate": 0.002, "loss": 2.3426, "step": 101890 }, { "epoch": 0.3939169024756073, "grad_norm": 0.09684577584266663, "learning_rate": 0.002, "loss": 2.3536, "step": 101900 }, { "epoch": 0.3939555596789906, "grad_norm": 0.11704834550619125, "learning_rate": 0.002, "loss": 2.3623, "step": 101910 }, { "epoch": 0.39399421688237385, "grad_norm": 0.109725721180439, "learning_rate": 0.002, "loss": 2.3518, "step": 101920 }, { "epoch": 0.39403287408575716, "grad_norm": 0.09688954055309296, "learning_rate": 0.002, "loss": 2.3528, "step": 101930 }, { "epoch": 0.3940715312891404, "grad_norm": 0.13549424707889557, "learning_rate": 0.002, "loss": 2.3574, "step": 101940 }, { "epoch": 0.3941101884925237, "grad_norm": 0.09275535494089127, "learning_rate": 0.002, "loss": 2.3498, "step": 101950 }, { "epoch": 0.39414884569590697, "grad_norm": 0.09769611805677414, "learning_rate": 0.002, "loss": 2.3538, "step": 101960 }, { "epoch": 0.3941875028992903, "grad_norm": 0.1042463481426239, "learning_rate": 0.002, "loss": 2.346, "step": 101970 }, { "epoch": 0.39422616010267353, "grad_norm": 0.10281561315059662, "learning_rate": 0.002, "loss": 2.3554, "step": 101980 }, { "epoch": 0.39426481730605684, "grad_norm": 0.11353082209825516, "learning_rate": 0.002, "loss": 2.3381, "step": 101990 }, { "epoch": 0.3943034745094401, "grad_norm": 0.11525498330593109, "learning_rate": 0.002, "loss": 2.3479, "step": 102000 }, { "epoch": 0.39434213171282334, "grad_norm": 0.09364749491214752, "learning_rate": 0.002, "loss": 2.3435, "step": 102010 }, { "epoch": 0.39438078891620665, "grad_norm": 0.09514220058917999, "learning_rate": 0.002, "loss": 2.3519, "step": 102020 }, { "epoch": 0.3944194461195899, "grad_norm": 0.09396491944789886, "learning_rate": 0.002, "loss": 2.3584, "step": 102030 }, { "epoch": 0.3944581033229732, "grad_norm": 0.09882369637489319, "learning_rate": 0.002, "loss": 2.3494, "step": 102040 }, { "epoch": 0.39449676052635646, "grad_norm": 0.10531751811504364, "learning_rate": 0.002, "loss": 2.3565, "step": 102050 }, { "epoch": 0.39453541772973977, "grad_norm": 0.10298800468444824, "learning_rate": 0.002, "loss": 2.3588, "step": 102060 }, { "epoch": 0.394574074933123, "grad_norm": 0.10079105943441391, "learning_rate": 0.002, "loss": 2.3572, "step": 102070 }, { "epoch": 0.39461273213650633, "grad_norm": 0.1270875334739685, "learning_rate": 0.002, "loss": 2.3524, "step": 102080 }, { "epoch": 0.3946513893398896, "grad_norm": 0.1297849863767624, "learning_rate": 0.002, "loss": 2.3694, "step": 102090 }, { "epoch": 0.3946900465432729, "grad_norm": 0.09605436772108078, "learning_rate": 0.002, "loss": 2.3425, "step": 102100 }, { "epoch": 0.39472870374665614, "grad_norm": 0.09594700485467911, "learning_rate": 0.002, "loss": 2.3424, "step": 102110 }, { "epoch": 0.39476736095003945, "grad_norm": 0.1143462061882019, "learning_rate": 0.002, "loss": 2.3477, "step": 102120 }, { "epoch": 0.3948060181534227, "grad_norm": 0.11425799131393433, "learning_rate": 0.002, "loss": 2.3529, "step": 102130 }, { "epoch": 0.394844675356806, "grad_norm": 0.10427649319171906, "learning_rate": 0.002, "loss": 2.3552, "step": 102140 }, { "epoch": 0.39488333256018926, "grad_norm": 0.09673214703798294, "learning_rate": 0.002, "loss": 2.3413, "step": 102150 }, { "epoch": 0.39492198976357257, "grad_norm": 0.10224836319684982, "learning_rate": 0.002, "loss": 2.3486, "step": 102160 }, { "epoch": 0.3949606469669558, "grad_norm": 0.09987068176269531, "learning_rate": 0.002, "loss": 2.3489, "step": 102170 }, { "epoch": 0.3949993041703391, "grad_norm": 0.10207951813936234, "learning_rate": 0.002, "loss": 2.351, "step": 102180 }, { "epoch": 0.3950379613737224, "grad_norm": 0.11631299555301666, "learning_rate": 0.002, "loss": 2.3507, "step": 102190 }, { "epoch": 0.39507661857710563, "grad_norm": 0.10433251410722733, "learning_rate": 0.002, "loss": 2.3438, "step": 102200 }, { "epoch": 0.39511527578048894, "grad_norm": 0.10600210726261139, "learning_rate": 0.002, "loss": 2.3562, "step": 102210 }, { "epoch": 0.3951539329838722, "grad_norm": 0.09988721460103989, "learning_rate": 0.002, "loss": 2.3407, "step": 102220 }, { "epoch": 0.3951925901872555, "grad_norm": 0.10811429470777512, "learning_rate": 0.002, "loss": 2.3435, "step": 102230 }, { "epoch": 0.39523124739063875, "grad_norm": 0.10436130315065384, "learning_rate": 0.002, "loss": 2.368, "step": 102240 }, { "epoch": 0.39526990459402206, "grad_norm": 0.10150985419750214, "learning_rate": 0.002, "loss": 2.3616, "step": 102250 }, { "epoch": 0.3953085617974053, "grad_norm": 0.10846556723117828, "learning_rate": 0.002, "loss": 2.3493, "step": 102260 }, { "epoch": 0.3953472190007886, "grad_norm": 0.10766957700252533, "learning_rate": 0.002, "loss": 2.3477, "step": 102270 }, { "epoch": 0.39538587620417187, "grad_norm": 0.12314067780971527, "learning_rate": 0.002, "loss": 2.3528, "step": 102280 }, { "epoch": 0.3954245334075552, "grad_norm": 0.1232890635728836, "learning_rate": 0.002, "loss": 2.3519, "step": 102290 }, { "epoch": 0.39546319061093843, "grad_norm": 0.11686611920595169, "learning_rate": 0.002, "loss": 2.3394, "step": 102300 }, { "epoch": 0.39550184781432174, "grad_norm": 0.093016117811203, "learning_rate": 0.002, "loss": 2.3598, "step": 102310 }, { "epoch": 0.395540505017705, "grad_norm": 0.11799125373363495, "learning_rate": 0.002, "loss": 2.349, "step": 102320 }, { "epoch": 0.3955791622210883, "grad_norm": 0.10260337591171265, "learning_rate": 0.002, "loss": 2.3445, "step": 102330 }, { "epoch": 0.39561781942447155, "grad_norm": 0.12818340957164764, "learning_rate": 0.002, "loss": 2.3507, "step": 102340 }, { "epoch": 0.39565647662785486, "grad_norm": 0.09108906984329224, "learning_rate": 0.002, "loss": 2.3402, "step": 102350 }, { "epoch": 0.3956951338312381, "grad_norm": 0.10101552307605743, "learning_rate": 0.002, "loss": 2.3391, "step": 102360 }, { "epoch": 0.3957337910346214, "grad_norm": 0.1083957850933075, "learning_rate": 0.002, "loss": 2.342, "step": 102370 }, { "epoch": 0.39577244823800467, "grad_norm": 0.10751020908355713, "learning_rate": 0.002, "loss": 2.3399, "step": 102380 }, { "epoch": 0.3958111054413879, "grad_norm": 0.12719103693962097, "learning_rate": 0.002, "loss": 2.3603, "step": 102390 }, { "epoch": 0.39584976264477123, "grad_norm": 0.09584097564220428, "learning_rate": 0.002, "loss": 2.3504, "step": 102400 }, { "epoch": 0.3958884198481545, "grad_norm": 0.11484284698963165, "learning_rate": 0.002, "loss": 2.3417, "step": 102410 }, { "epoch": 0.3959270770515378, "grad_norm": 0.10826011002063751, "learning_rate": 0.002, "loss": 2.3706, "step": 102420 }, { "epoch": 0.39596573425492104, "grad_norm": 0.10017026960849762, "learning_rate": 0.002, "loss": 2.3452, "step": 102430 }, { "epoch": 0.39600439145830435, "grad_norm": 0.10661051422357559, "learning_rate": 0.002, "loss": 2.334, "step": 102440 }, { "epoch": 0.3960430486616876, "grad_norm": 0.10739060491323471, "learning_rate": 0.002, "loss": 2.3385, "step": 102450 }, { "epoch": 0.3960817058650709, "grad_norm": 0.1067962646484375, "learning_rate": 0.002, "loss": 2.3383, "step": 102460 }, { "epoch": 0.39612036306845416, "grad_norm": 0.11817949265241623, "learning_rate": 0.002, "loss": 2.3583, "step": 102470 }, { "epoch": 0.39615902027183747, "grad_norm": 0.11644504964351654, "learning_rate": 0.002, "loss": 2.3568, "step": 102480 }, { "epoch": 0.3961976774752207, "grad_norm": 0.11023860424757004, "learning_rate": 0.002, "loss": 2.3504, "step": 102490 }, { "epoch": 0.39623633467860403, "grad_norm": 0.3637651205062866, "learning_rate": 0.002, "loss": 2.3418, "step": 102500 }, { "epoch": 0.3962749918819873, "grad_norm": 0.0973176658153534, "learning_rate": 0.002, "loss": 2.3475, "step": 102510 }, { "epoch": 0.3963136490853706, "grad_norm": 0.10590552538633347, "learning_rate": 0.002, "loss": 2.3443, "step": 102520 }, { "epoch": 0.39635230628875384, "grad_norm": 0.10570419579744339, "learning_rate": 0.002, "loss": 2.355, "step": 102530 }, { "epoch": 0.39639096349213715, "grad_norm": 0.09971023350954056, "learning_rate": 0.002, "loss": 2.3535, "step": 102540 }, { "epoch": 0.3964296206955204, "grad_norm": 0.10089641809463501, "learning_rate": 0.002, "loss": 2.3513, "step": 102550 }, { "epoch": 0.3964682778989037, "grad_norm": 0.12613315880298615, "learning_rate": 0.002, "loss": 2.3457, "step": 102560 }, { "epoch": 0.39650693510228696, "grad_norm": 0.10003890842199326, "learning_rate": 0.002, "loss": 2.3381, "step": 102570 }, { "epoch": 0.3965455923056702, "grad_norm": 0.12416119128465652, "learning_rate": 0.002, "loss": 2.3561, "step": 102580 }, { "epoch": 0.3965842495090535, "grad_norm": 0.10028557479381561, "learning_rate": 0.002, "loss": 2.3375, "step": 102590 }, { "epoch": 0.3966229067124368, "grad_norm": 0.11924396455287933, "learning_rate": 0.002, "loss": 2.3465, "step": 102600 }, { "epoch": 0.3966615639158201, "grad_norm": 0.09663531929254532, "learning_rate": 0.002, "loss": 2.3528, "step": 102610 }, { "epoch": 0.39670022111920333, "grad_norm": 0.09673736989498138, "learning_rate": 0.002, "loss": 2.3511, "step": 102620 }, { "epoch": 0.39673887832258664, "grad_norm": 0.10709283500909805, "learning_rate": 0.002, "loss": 2.3508, "step": 102630 }, { "epoch": 0.3967775355259699, "grad_norm": 0.104097880423069, "learning_rate": 0.002, "loss": 2.3481, "step": 102640 }, { "epoch": 0.3968161927293532, "grad_norm": 0.11647637188434601, "learning_rate": 0.002, "loss": 2.3618, "step": 102650 }, { "epoch": 0.39685484993273645, "grad_norm": 0.09082239121198654, "learning_rate": 0.002, "loss": 2.361, "step": 102660 }, { "epoch": 0.39689350713611976, "grad_norm": 0.10260343551635742, "learning_rate": 0.002, "loss": 2.3493, "step": 102670 }, { "epoch": 0.396932164339503, "grad_norm": 0.09805385768413544, "learning_rate": 0.002, "loss": 2.3531, "step": 102680 }, { "epoch": 0.3969708215428863, "grad_norm": 0.11476589739322662, "learning_rate": 0.002, "loss": 2.3401, "step": 102690 }, { "epoch": 0.3970094787462696, "grad_norm": 0.13257639110088348, "learning_rate": 0.002, "loss": 2.3345, "step": 102700 }, { "epoch": 0.3970481359496529, "grad_norm": 0.12035766243934631, "learning_rate": 0.002, "loss": 2.3586, "step": 102710 }, { "epoch": 0.39708679315303613, "grad_norm": 0.09962354600429535, "learning_rate": 0.002, "loss": 2.3388, "step": 102720 }, { "epoch": 0.39712545035641944, "grad_norm": 0.11391833424568176, "learning_rate": 0.002, "loss": 2.3499, "step": 102730 }, { "epoch": 0.3971641075598027, "grad_norm": 0.09128336608409882, "learning_rate": 0.002, "loss": 2.333, "step": 102740 }, { "epoch": 0.39720276476318594, "grad_norm": 0.10476367920637131, "learning_rate": 0.002, "loss": 2.3402, "step": 102750 }, { "epoch": 0.39724142196656925, "grad_norm": 0.11733968555927277, "learning_rate": 0.002, "loss": 2.3464, "step": 102760 }, { "epoch": 0.3972800791699525, "grad_norm": 0.0970330610871315, "learning_rate": 0.002, "loss": 2.3602, "step": 102770 }, { "epoch": 0.3973187363733358, "grad_norm": 0.10360578447580338, "learning_rate": 0.002, "loss": 2.3499, "step": 102780 }, { "epoch": 0.39735739357671906, "grad_norm": 0.11639165133237839, "learning_rate": 0.002, "loss": 2.3466, "step": 102790 }, { "epoch": 0.3973960507801024, "grad_norm": 0.1210801899433136, "learning_rate": 0.002, "loss": 2.3356, "step": 102800 }, { "epoch": 0.3974347079834856, "grad_norm": 0.09269449859857559, "learning_rate": 0.002, "loss": 2.3434, "step": 102810 }, { "epoch": 0.39747336518686893, "grad_norm": 0.14338408410549164, "learning_rate": 0.002, "loss": 2.3497, "step": 102820 }, { "epoch": 0.3975120223902522, "grad_norm": 0.09909947961568832, "learning_rate": 0.002, "loss": 2.3512, "step": 102830 }, { "epoch": 0.3975506795936355, "grad_norm": 0.1005893275141716, "learning_rate": 0.002, "loss": 2.3548, "step": 102840 }, { "epoch": 0.39758933679701874, "grad_norm": 0.11026202142238617, "learning_rate": 0.002, "loss": 2.3484, "step": 102850 }, { "epoch": 0.39762799400040205, "grad_norm": 0.12471762299537659, "learning_rate": 0.002, "loss": 2.3611, "step": 102860 }, { "epoch": 0.3976666512037853, "grad_norm": 0.10944908857345581, "learning_rate": 0.002, "loss": 2.3485, "step": 102870 }, { "epoch": 0.3977053084071686, "grad_norm": 0.10404592752456665, "learning_rate": 0.002, "loss": 2.3514, "step": 102880 }, { "epoch": 0.39774396561055186, "grad_norm": 0.12181949615478516, "learning_rate": 0.002, "loss": 2.3637, "step": 102890 }, { "epoch": 0.39778262281393517, "grad_norm": 0.10199355334043503, "learning_rate": 0.002, "loss": 2.3307, "step": 102900 }, { "epoch": 0.3978212800173184, "grad_norm": 0.10170946270227432, "learning_rate": 0.002, "loss": 2.3581, "step": 102910 }, { "epoch": 0.39785993722070173, "grad_norm": 0.12267200648784637, "learning_rate": 0.002, "loss": 2.3589, "step": 102920 }, { "epoch": 0.397898594424085, "grad_norm": 0.11017567664384842, "learning_rate": 0.002, "loss": 2.3386, "step": 102930 }, { "epoch": 0.39793725162746824, "grad_norm": 0.1563769429922104, "learning_rate": 0.002, "loss": 2.3444, "step": 102940 }, { "epoch": 0.39797590883085154, "grad_norm": 0.10438354313373566, "learning_rate": 0.002, "loss": 2.3502, "step": 102950 }, { "epoch": 0.3980145660342348, "grad_norm": 0.10634902864694595, "learning_rate": 0.002, "loss": 2.3623, "step": 102960 }, { "epoch": 0.3980532232376181, "grad_norm": 0.0988369733095169, "learning_rate": 0.002, "loss": 2.34, "step": 102970 }, { "epoch": 0.39809188044100136, "grad_norm": 0.09615012258291245, "learning_rate": 0.002, "loss": 2.3484, "step": 102980 }, { "epoch": 0.39813053764438466, "grad_norm": 0.11945393681526184, "learning_rate": 0.002, "loss": 2.3369, "step": 102990 }, { "epoch": 0.3981691948477679, "grad_norm": 0.10153481364250183, "learning_rate": 0.002, "loss": 2.3499, "step": 103000 }, { "epoch": 0.3982078520511512, "grad_norm": 0.10002585500478745, "learning_rate": 0.002, "loss": 2.3519, "step": 103010 }, { "epoch": 0.3982465092545345, "grad_norm": 0.11555618792772293, "learning_rate": 0.002, "loss": 2.3507, "step": 103020 }, { "epoch": 0.3982851664579178, "grad_norm": 0.10872345417737961, "learning_rate": 0.002, "loss": 2.3404, "step": 103030 }, { "epoch": 0.39832382366130104, "grad_norm": 0.11528029292821884, "learning_rate": 0.002, "loss": 2.3606, "step": 103040 }, { "epoch": 0.39836248086468434, "grad_norm": 0.10003827512264252, "learning_rate": 0.002, "loss": 2.329, "step": 103050 }, { "epoch": 0.3984011380680676, "grad_norm": 0.11331178992986679, "learning_rate": 0.002, "loss": 2.3398, "step": 103060 }, { "epoch": 0.3984397952714509, "grad_norm": 0.12022735923528671, "learning_rate": 0.002, "loss": 2.3465, "step": 103070 }, { "epoch": 0.39847845247483415, "grad_norm": 0.10411456227302551, "learning_rate": 0.002, "loss": 2.3571, "step": 103080 }, { "epoch": 0.39851710967821746, "grad_norm": 0.10312678664922714, "learning_rate": 0.002, "loss": 2.3434, "step": 103090 }, { "epoch": 0.3985557668816007, "grad_norm": 0.11242202669382095, "learning_rate": 0.002, "loss": 2.3419, "step": 103100 }, { "epoch": 0.398594424084984, "grad_norm": 0.10822474956512451, "learning_rate": 0.002, "loss": 2.3318, "step": 103110 }, { "epoch": 0.3986330812883673, "grad_norm": 0.10457213968038559, "learning_rate": 0.002, "loss": 2.3593, "step": 103120 }, { "epoch": 0.3986717384917505, "grad_norm": 0.11331282556056976, "learning_rate": 0.002, "loss": 2.3561, "step": 103130 }, { "epoch": 0.39871039569513383, "grad_norm": 0.10345172882080078, "learning_rate": 0.002, "loss": 2.3394, "step": 103140 }, { "epoch": 0.3987490528985171, "grad_norm": 0.09362413734197617, "learning_rate": 0.002, "loss": 2.3497, "step": 103150 }, { "epoch": 0.3987877101019004, "grad_norm": 0.09951414912939072, "learning_rate": 0.002, "loss": 2.3505, "step": 103160 }, { "epoch": 0.39882636730528365, "grad_norm": 0.10795580595731735, "learning_rate": 0.002, "loss": 2.3455, "step": 103170 }, { "epoch": 0.39886502450866695, "grad_norm": 0.10748409479856491, "learning_rate": 0.002, "loss": 2.3509, "step": 103180 }, { "epoch": 0.3989036817120502, "grad_norm": 0.10139096528291702, "learning_rate": 0.002, "loss": 2.3517, "step": 103190 }, { "epoch": 0.3989423389154335, "grad_norm": 0.10073640942573547, "learning_rate": 0.002, "loss": 2.3432, "step": 103200 }, { "epoch": 0.39898099611881677, "grad_norm": 0.09807011485099792, "learning_rate": 0.002, "loss": 2.3585, "step": 103210 }, { "epoch": 0.3990196533222001, "grad_norm": 0.10768158733844757, "learning_rate": 0.002, "loss": 2.3434, "step": 103220 }, { "epoch": 0.3990583105255833, "grad_norm": 0.10457302629947662, "learning_rate": 0.002, "loss": 2.3529, "step": 103230 }, { "epoch": 0.39909696772896663, "grad_norm": 0.12398704141378403, "learning_rate": 0.002, "loss": 2.3441, "step": 103240 }, { "epoch": 0.3991356249323499, "grad_norm": 0.0993892028927803, "learning_rate": 0.002, "loss": 2.3473, "step": 103250 }, { "epoch": 0.3991742821357332, "grad_norm": 0.10782822966575623, "learning_rate": 0.002, "loss": 2.3513, "step": 103260 }, { "epoch": 0.39921293933911645, "grad_norm": 0.10682012140750885, "learning_rate": 0.002, "loss": 2.3567, "step": 103270 }, { "epoch": 0.39925159654249975, "grad_norm": 0.10554670542478561, "learning_rate": 0.002, "loss": 2.3378, "step": 103280 }, { "epoch": 0.399290253745883, "grad_norm": 0.11510240286588669, "learning_rate": 0.002, "loss": 2.3545, "step": 103290 }, { "epoch": 0.3993289109492663, "grad_norm": 0.10856513679027557, "learning_rate": 0.002, "loss": 2.3552, "step": 103300 }, { "epoch": 0.39936756815264957, "grad_norm": 0.10347574204206467, "learning_rate": 0.002, "loss": 2.3527, "step": 103310 }, { "epoch": 0.3994062253560328, "grad_norm": 1.0058751106262207, "learning_rate": 0.002, "loss": 2.3423, "step": 103320 }, { "epoch": 0.3994448825594161, "grad_norm": 0.13041211664676666, "learning_rate": 0.002, "loss": 2.3599, "step": 103330 }, { "epoch": 0.3994835397627994, "grad_norm": 0.10741354525089264, "learning_rate": 0.002, "loss": 2.3544, "step": 103340 }, { "epoch": 0.3995221969661827, "grad_norm": 0.11092893034219742, "learning_rate": 0.002, "loss": 2.3549, "step": 103350 }, { "epoch": 0.39956085416956594, "grad_norm": 0.08905737102031708, "learning_rate": 0.002, "loss": 2.3547, "step": 103360 }, { "epoch": 0.39959951137294925, "grad_norm": 0.10552625358104706, "learning_rate": 0.002, "loss": 2.3537, "step": 103370 }, { "epoch": 0.3996381685763325, "grad_norm": 0.10402737557888031, "learning_rate": 0.002, "loss": 2.3484, "step": 103380 }, { "epoch": 0.3996768257797158, "grad_norm": 0.1106201559305191, "learning_rate": 0.002, "loss": 2.334, "step": 103390 }, { "epoch": 0.39971548298309906, "grad_norm": 0.1093304455280304, "learning_rate": 0.002, "loss": 2.3487, "step": 103400 }, { "epoch": 0.39975414018648237, "grad_norm": 0.09958748519420624, "learning_rate": 0.002, "loss": 2.3538, "step": 103410 }, { "epoch": 0.3997927973898656, "grad_norm": 0.11973224580287933, "learning_rate": 0.002, "loss": 2.3302, "step": 103420 }, { "epoch": 0.3998314545932489, "grad_norm": 0.10258115828037262, "learning_rate": 0.002, "loss": 2.3517, "step": 103430 }, { "epoch": 0.3998701117966322, "grad_norm": 0.3839898705482483, "learning_rate": 0.002, "loss": 2.328, "step": 103440 }, { "epoch": 0.3999087690000155, "grad_norm": 0.11322489380836487, "learning_rate": 0.002, "loss": 2.3522, "step": 103450 }, { "epoch": 0.39994742620339874, "grad_norm": 0.10540080815553665, "learning_rate": 0.002, "loss": 2.3561, "step": 103460 }, { "epoch": 0.39998608340678204, "grad_norm": 0.11972621828317642, "learning_rate": 0.002, "loss": 2.359, "step": 103470 }, { "epoch": 0.4000247406101653, "grad_norm": 0.11651024967432022, "learning_rate": 0.002, "loss": 2.349, "step": 103480 }, { "epoch": 0.40006339781354855, "grad_norm": 0.11100133508443832, "learning_rate": 0.002, "loss": 2.3435, "step": 103490 }, { "epoch": 0.40010205501693186, "grad_norm": 0.11071529984474182, "learning_rate": 0.002, "loss": 2.3312, "step": 103500 }, { "epoch": 0.4001407122203151, "grad_norm": 0.10063956677913666, "learning_rate": 0.002, "loss": 2.3264, "step": 103510 }, { "epoch": 0.4001793694236984, "grad_norm": 0.12929409742355347, "learning_rate": 0.002, "loss": 2.3438, "step": 103520 }, { "epoch": 0.40021802662708167, "grad_norm": 0.12283274531364441, "learning_rate": 0.002, "loss": 2.352, "step": 103530 }, { "epoch": 0.400256683830465, "grad_norm": 0.09345348924398422, "learning_rate": 0.002, "loss": 2.3707, "step": 103540 }, { "epoch": 0.40029534103384823, "grad_norm": 0.1171153336763382, "learning_rate": 0.002, "loss": 2.3561, "step": 103550 }, { "epoch": 0.40033399823723154, "grad_norm": 0.10845030099153519, "learning_rate": 0.002, "loss": 2.3435, "step": 103560 }, { "epoch": 0.4003726554406148, "grad_norm": 0.1053931936621666, "learning_rate": 0.002, "loss": 2.3443, "step": 103570 }, { "epoch": 0.4004113126439981, "grad_norm": 0.13287438452243805, "learning_rate": 0.002, "loss": 2.3542, "step": 103580 }, { "epoch": 0.40044996984738135, "grad_norm": 0.10590993613004684, "learning_rate": 0.002, "loss": 2.3439, "step": 103590 }, { "epoch": 0.40048862705076466, "grad_norm": 0.10484614968299866, "learning_rate": 0.002, "loss": 2.3386, "step": 103600 }, { "epoch": 0.4005272842541479, "grad_norm": 0.09930567443370819, "learning_rate": 0.002, "loss": 2.3601, "step": 103610 }, { "epoch": 0.4005659414575312, "grad_norm": 0.1418037712574005, "learning_rate": 0.002, "loss": 2.3417, "step": 103620 }, { "epoch": 0.40060459866091447, "grad_norm": 0.11796095222234726, "learning_rate": 0.002, "loss": 2.3513, "step": 103630 }, { "epoch": 0.4006432558642978, "grad_norm": 0.10199625045061111, "learning_rate": 0.002, "loss": 2.3547, "step": 103640 }, { "epoch": 0.40068191306768103, "grad_norm": 0.11979342997074127, "learning_rate": 0.002, "loss": 2.3509, "step": 103650 }, { "epoch": 0.40072057027106434, "grad_norm": 0.1384526789188385, "learning_rate": 0.002, "loss": 2.3395, "step": 103660 }, { "epoch": 0.4007592274744476, "grad_norm": 0.09928222000598907, "learning_rate": 0.002, "loss": 2.3528, "step": 103670 }, { "epoch": 0.40079788467783084, "grad_norm": 0.10234517604112625, "learning_rate": 0.002, "loss": 2.3394, "step": 103680 }, { "epoch": 0.40083654188121415, "grad_norm": 0.10509918630123138, "learning_rate": 0.002, "loss": 2.342, "step": 103690 }, { "epoch": 0.4008751990845974, "grad_norm": 0.12255855649709702, "learning_rate": 0.002, "loss": 2.3573, "step": 103700 }, { "epoch": 0.4009138562879807, "grad_norm": 0.10494012385606766, "learning_rate": 0.002, "loss": 2.3472, "step": 103710 }, { "epoch": 0.40095251349136396, "grad_norm": 0.09403003007173538, "learning_rate": 0.002, "loss": 2.3393, "step": 103720 }, { "epoch": 0.40099117069474727, "grad_norm": 0.1168377697467804, "learning_rate": 0.002, "loss": 2.356, "step": 103730 }, { "epoch": 0.4010298278981305, "grad_norm": 0.10256733745336533, "learning_rate": 0.002, "loss": 2.3514, "step": 103740 }, { "epoch": 0.4010684851015138, "grad_norm": 0.11215299367904663, "learning_rate": 0.002, "loss": 2.3463, "step": 103750 }, { "epoch": 0.4011071423048971, "grad_norm": 0.10858602821826935, "learning_rate": 0.002, "loss": 2.3551, "step": 103760 }, { "epoch": 0.4011457995082804, "grad_norm": 0.10192175954580307, "learning_rate": 0.002, "loss": 2.3526, "step": 103770 }, { "epoch": 0.40118445671166364, "grad_norm": 0.10576779395341873, "learning_rate": 0.002, "loss": 2.352, "step": 103780 }, { "epoch": 0.40122311391504695, "grad_norm": 0.12473911792039871, "learning_rate": 0.002, "loss": 2.3245, "step": 103790 }, { "epoch": 0.4012617711184302, "grad_norm": 0.10249465703964233, "learning_rate": 0.002, "loss": 2.3651, "step": 103800 }, { "epoch": 0.4013004283218135, "grad_norm": 0.11050129681825638, "learning_rate": 0.002, "loss": 2.3407, "step": 103810 }, { "epoch": 0.40133908552519676, "grad_norm": 0.11205238103866577, "learning_rate": 0.002, "loss": 2.3554, "step": 103820 }, { "epoch": 0.40137774272858007, "grad_norm": 0.12690870463848114, "learning_rate": 0.002, "loss": 2.3516, "step": 103830 }, { "epoch": 0.4014163999319633, "grad_norm": 0.10052044689655304, "learning_rate": 0.002, "loss": 2.3428, "step": 103840 }, { "epoch": 0.4014550571353466, "grad_norm": 0.10286065191030502, "learning_rate": 0.002, "loss": 2.3471, "step": 103850 }, { "epoch": 0.4014937143387299, "grad_norm": 0.11306209862232208, "learning_rate": 0.002, "loss": 2.3508, "step": 103860 }, { "epoch": 0.40153237154211313, "grad_norm": 0.10765783488750458, "learning_rate": 0.002, "loss": 2.3359, "step": 103870 }, { "epoch": 0.40157102874549644, "grad_norm": 0.0955042615532875, "learning_rate": 0.002, "loss": 2.328, "step": 103880 }, { "epoch": 0.4016096859488797, "grad_norm": 0.10637594014406204, "learning_rate": 0.002, "loss": 2.3285, "step": 103890 }, { "epoch": 0.401648343152263, "grad_norm": 0.1104235053062439, "learning_rate": 0.002, "loss": 2.3579, "step": 103900 }, { "epoch": 0.40168700035564625, "grad_norm": 0.09370309114456177, "learning_rate": 0.002, "loss": 2.3597, "step": 103910 }, { "epoch": 0.40172565755902956, "grad_norm": 0.12469807267189026, "learning_rate": 0.002, "loss": 2.3485, "step": 103920 }, { "epoch": 0.4017643147624128, "grad_norm": 0.12554563581943512, "learning_rate": 0.002, "loss": 2.3642, "step": 103930 }, { "epoch": 0.4018029719657961, "grad_norm": 0.10871759802103043, "learning_rate": 0.002, "loss": 2.357, "step": 103940 }, { "epoch": 0.40184162916917937, "grad_norm": 0.10306213796138763, "learning_rate": 0.002, "loss": 2.3645, "step": 103950 }, { "epoch": 0.4018802863725627, "grad_norm": 0.11437251418828964, "learning_rate": 0.002, "loss": 2.3589, "step": 103960 }, { "epoch": 0.40191894357594593, "grad_norm": 0.09903883934020996, "learning_rate": 0.002, "loss": 2.3531, "step": 103970 }, { "epoch": 0.40195760077932924, "grad_norm": 0.10756850987672806, "learning_rate": 0.002, "loss": 2.3352, "step": 103980 }, { "epoch": 0.4019962579827125, "grad_norm": 0.10248870402574539, "learning_rate": 0.002, "loss": 2.3492, "step": 103990 }, { "epoch": 0.4020349151860958, "grad_norm": 0.11387482285499573, "learning_rate": 0.002, "loss": 2.3575, "step": 104000 }, { "epoch": 0.40207357238947905, "grad_norm": 0.10296018421649933, "learning_rate": 0.002, "loss": 2.3484, "step": 104010 }, { "epoch": 0.40211222959286236, "grad_norm": 0.11493543535470963, "learning_rate": 0.002, "loss": 2.3519, "step": 104020 }, { "epoch": 0.4021508867962456, "grad_norm": 0.11589168012142181, "learning_rate": 0.002, "loss": 2.3637, "step": 104030 }, { "epoch": 0.4021895439996289, "grad_norm": 0.09854254126548767, "learning_rate": 0.002, "loss": 2.3477, "step": 104040 }, { "epoch": 0.40222820120301217, "grad_norm": 0.10222586244344711, "learning_rate": 0.002, "loss": 2.3519, "step": 104050 }, { "epoch": 0.4022668584063954, "grad_norm": 0.11131079494953156, "learning_rate": 0.002, "loss": 2.3577, "step": 104060 }, { "epoch": 0.40230551560977873, "grad_norm": 0.11473394930362701, "learning_rate": 0.002, "loss": 2.3509, "step": 104070 }, { "epoch": 0.402344172813162, "grad_norm": 0.10697763413190842, "learning_rate": 0.002, "loss": 2.3433, "step": 104080 }, { "epoch": 0.4023828300165453, "grad_norm": 0.12346579879522324, "learning_rate": 0.002, "loss": 2.3614, "step": 104090 }, { "epoch": 0.40242148721992854, "grad_norm": 0.11302345246076584, "learning_rate": 0.002, "loss": 2.3522, "step": 104100 }, { "epoch": 0.40246014442331185, "grad_norm": 0.09973450005054474, "learning_rate": 0.002, "loss": 2.3495, "step": 104110 }, { "epoch": 0.4024988016266951, "grad_norm": 0.10161063820123672, "learning_rate": 0.002, "loss": 2.3274, "step": 104120 }, { "epoch": 0.4025374588300784, "grad_norm": 0.10558301210403442, "learning_rate": 0.002, "loss": 2.3555, "step": 104130 }, { "epoch": 0.40257611603346166, "grad_norm": 0.10620249807834625, "learning_rate": 0.002, "loss": 2.3285, "step": 104140 }, { "epoch": 0.40261477323684497, "grad_norm": 0.10933729261159897, "learning_rate": 0.002, "loss": 2.3449, "step": 104150 }, { "epoch": 0.4026534304402282, "grad_norm": 0.11010383814573288, "learning_rate": 0.002, "loss": 2.3423, "step": 104160 }, { "epoch": 0.40269208764361153, "grad_norm": 0.10498680174350739, "learning_rate": 0.002, "loss": 2.3579, "step": 104170 }, { "epoch": 0.4027307448469948, "grad_norm": 0.1227358728647232, "learning_rate": 0.002, "loss": 2.353, "step": 104180 }, { "epoch": 0.4027694020503781, "grad_norm": 0.1105983778834343, "learning_rate": 0.002, "loss": 2.3538, "step": 104190 }, { "epoch": 0.40280805925376134, "grad_norm": 0.12710198760032654, "learning_rate": 0.002, "loss": 2.3577, "step": 104200 }, { "epoch": 0.40284671645714465, "grad_norm": 0.10611477494239807, "learning_rate": 0.002, "loss": 2.3573, "step": 104210 }, { "epoch": 0.4028853736605279, "grad_norm": 0.10403914749622345, "learning_rate": 0.002, "loss": 2.3503, "step": 104220 }, { "epoch": 0.4029240308639112, "grad_norm": 0.10941457003355026, "learning_rate": 0.002, "loss": 2.344, "step": 104230 }, { "epoch": 0.40296268806729446, "grad_norm": 0.10502897948026657, "learning_rate": 0.002, "loss": 2.3481, "step": 104240 }, { "epoch": 0.4030013452706777, "grad_norm": 0.10019665956497192, "learning_rate": 0.002, "loss": 2.3431, "step": 104250 }, { "epoch": 0.403040002474061, "grad_norm": 0.13944409787654877, "learning_rate": 0.002, "loss": 2.3311, "step": 104260 }, { "epoch": 0.4030786596774443, "grad_norm": 0.101788729429245, "learning_rate": 0.002, "loss": 2.3625, "step": 104270 }, { "epoch": 0.4031173168808276, "grad_norm": 0.10548502206802368, "learning_rate": 0.002, "loss": 2.3582, "step": 104280 }, { "epoch": 0.40315597408421083, "grad_norm": 0.11913346499204636, "learning_rate": 0.002, "loss": 2.3496, "step": 104290 }, { "epoch": 0.40319463128759414, "grad_norm": 0.10414118319749832, "learning_rate": 0.002, "loss": 2.3485, "step": 104300 }, { "epoch": 0.4032332884909774, "grad_norm": 0.10877209901809692, "learning_rate": 0.002, "loss": 2.3501, "step": 104310 }, { "epoch": 0.4032719456943607, "grad_norm": 0.10864819586277008, "learning_rate": 0.002, "loss": 2.3545, "step": 104320 }, { "epoch": 0.40331060289774395, "grad_norm": 0.09370414912700653, "learning_rate": 0.002, "loss": 2.356, "step": 104330 }, { "epoch": 0.40334926010112726, "grad_norm": 0.0959504172205925, "learning_rate": 0.002, "loss": 2.3601, "step": 104340 }, { "epoch": 0.4033879173045105, "grad_norm": 0.11955036967992783, "learning_rate": 0.002, "loss": 2.357, "step": 104350 }, { "epoch": 0.4034265745078938, "grad_norm": 0.10818187147378922, "learning_rate": 0.002, "loss": 2.3492, "step": 104360 }, { "epoch": 0.4034652317112771, "grad_norm": 0.10869024693965912, "learning_rate": 0.002, "loss": 2.3574, "step": 104370 }, { "epoch": 0.4035038889146604, "grad_norm": 0.11019674688577652, "learning_rate": 0.002, "loss": 2.3394, "step": 104380 }, { "epoch": 0.40354254611804363, "grad_norm": 0.1048496887087822, "learning_rate": 0.002, "loss": 2.3346, "step": 104390 }, { "epoch": 0.40358120332142694, "grad_norm": 0.10483365505933762, "learning_rate": 0.002, "loss": 2.3519, "step": 104400 }, { "epoch": 0.4036198605248102, "grad_norm": 0.10269634425640106, "learning_rate": 0.002, "loss": 2.3422, "step": 104410 }, { "epoch": 0.40365851772819344, "grad_norm": 0.10694144666194916, "learning_rate": 0.002, "loss": 2.3357, "step": 104420 }, { "epoch": 0.40369717493157675, "grad_norm": 0.09867972135543823, "learning_rate": 0.002, "loss": 2.3463, "step": 104430 }, { "epoch": 0.40373583213496, "grad_norm": 0.1175709143280983, "learning_rate": 0.002, "loss": 2.3493, "step": 104440 }, { "epoch": 0.4037744893383433, "grad_norm": 0.09954724460840225, "learning_rate": 0.002, "loss": 2.3433, "step": 104450 }, { "epoch": 0.40381314654172656, "grad_norm": 0.12176381796598434, "learning_rate": 0.002, "loss": 2.3507, "step": 104460 }, { "epoch": 0.40385180374510987, "grad_norm": 0.11036128550767899, "learning_rate": 0.002, "loss": 2.3549, "step": 104470 }, { "epoch": 0.4038904609484931, "grad_norm": 0.10182004421949387, "learning_rate": 0.002, "loss": 2.3391, "step": 104480 }, { "epoch": 0.40392911815187643, "grad_norm": 0.11092185229063034, "learning_rate": 0.002, "loss": 2.3645, "step": 104490 }, { "epoch": 0.4039677753552597, "grad_norm": 0.0999113991856575, "learning_rate": 0.002, "loss": 2.3424, "step": 104500 }, { "epoch": 0.404006432558643, "grad_norm": 0.10253775119781494, "learning_rate": 0.002, "loss": 2.3449, "step": 104510 }, { "epoch": 0.40404508976202624, "grad_norm": 0.10981032252311707, "learning_rate": 0.002, "loss": 2.3534, "step": 104520 }, { "epoch": 0.40408374696540955, "grad_norm": 0.09996292740106583, "learning_rate": 0.002, "loss": 2.3502, "step": 104530 }, { "epoch": 0.4041224041687928, "grad_norm": 0.13263949751853943, "learning_rate": 0.002, "loss": 2.3655, "step": 104540 }, { "epoch": 0.4041610613721761, "grad_norm": 0.11656218022108078, "learning_rate": 0.002, "loss": 2.3354, "step": 104550 }, { "epoch": 0.40419971857555936, "grad_norm": 0.11897653341293335, "learning_rate": 0.002, "loss": 2.342, "step": 104560 }, { "epoch": 0.40423837577894267, "grad_norm": 0.10586295276880264, "learning_rate": 0.002, "loss": 2.3396, "step": 104570 }, { "epoch": 0.4042770329823259, "grad_norm": 0.10061287879943848, "learning_rate": 0.002, "loss": 2.3519, "step": 104580 }, { "epoch": 0.40431569018570923, "grad_norm": 0.1381102204322815, "learning_rate": 0.002, "loss": 2.3482, "step": 104590 }, { "epoch": 0.4043543473890925, "grad_norm": 0.11627264320850372, "learning_rate": 0.002, "loss": 2.3416, "step": 104600 }, { "epoch": 0.40439300459247574, "grad_norm": 0.10314036905765533, "learning_rate": 0.002, "loss": 2.3503, "step": 104610 }, { "epoch": 0.40443166179585904, "grad_norm": 0.10095860809087753, "learning_rate": 0.002, "loss": 2.3408, "step": 104620 }, { "epoch": 0.4044703189992423, "grad_norm": 0.1049952283501625, "learning_rate": 0.002, "loss": 2.3421, "step": 104630 }, { "epoch": 0.4045089762026256, "grad_norm": 0.11221372336149216, "learning_rate": 0.002, "loss": 2.3511, "step": 104640 }, { "epoch": 0.40454763340600886, "grad_norm": 0.10091055184602737, "learning_rate": 0.002, "loss": 2.3514, "step": 104650 }, { "epoch": 0.40458629060939216, "grad_norm": 0.09377772361040115, "learning_rate": 0.002, "loss": 2.3452, "step": 104660 }, { "epoch": 0.4046249478127754, "grad_norm": 0.09856727719306946, "learning_rate": 0.002, "loss": 2.3433, "step": 104670 }, { "epoch": 0.4046636050161587, "grad_norm": 0.11405321210622787, "learning_rate": 0.002, "loss": 2.357, "step": 104680 }, { "epoch": 0.404702262219542, "grad_norm": 0.10105791687965393, "learning_rate": 0.002, "loss": 2.3443, "step": 104690 }, { "epoch": 0.4047409194229253, "grad_norm": 0.10604265332221985, "learning_rate": 0.002, "loss": 2.3482, "step": 104700 }, { "epoch": 0.40477957662630853, "grad_norm": 0.10855577141046524, "learning_rate": 0.002, "loss": 2.3437, "step": 104710 }, { "epoch": 0.40481823382969184, "grad_norm": 0.11611035466194153, "learning_rate": 0.002, "loss": 2.3353, "step": 104720 }, { "epoch": 0.4048568910330751, "grad_norm": 0.1073804497718811, "learning_rate": 0.002, "loss": 2.3625, "step": 104730 }, { "epoch": 0.4048955482364584, "grad_norm": 0.10479304194450378, "learning_rate": 0.002, "loss": 2.3321, "step": 104740 }, { "epoch": 0.40493420543984165, "grad_norm": 0.1819019615650177, "learning_rate": 0.002, "loss": 2.3437, "step": 104750 }, { "epoch": 0.40497286264322496, "grad_norm": 0.11257249116897583, "learning_rate": 0.002, "loss": 2.3606, "step": 104760 }, { "epoch": 0.4050115198466082, "grad_norm": 0.11527591943740845, "learning_rate": 0.002, "loss": 2.3507, "step": 104770 }, { "epoch": 0.4050501770499915, "grad_norm": 0.10990086197853088, "learning_rate": 0.002, "loss": 2.3531, "step": 104780 }, { "epoch": 0.4050888342533748, "grad_norm": 0.11660205572843552, "learning_rate": 0.002, "loss": 2.3661, "step": 104790 }, { "epoch": 0.405127491456758, "grad_norm": 0.08872044831514359, "learning_rate": 0.002, "loss": 2.3542, "step": 104800 }, { "epoch": 0.40516614866014133, "grad_norm": 0.12553000450134277, "learning_rate": 0.002, "loss": 2.3634, "step": 104810 }, { "epoch": 0.4052048058635246, "grad_norm": 0.09895485639572144, "learning_rate": 0.002, "loss": 2.348, "step": 104820 }, { "epoch": 0.4052434630669079, "grad_norm": 0.11463747918605804, "learning_rate": 0.002, "loss": 2.3511, "step": 104830 }, { "epoch": 0.40528212027029115, "grad_norm": 0.09791713953018188, "learning_rate": 0.002, "loss": 2.3493, "step": 104840 }, { "epoch": 0.40532077747367445, "grad_norm": 0.13168174028396606, "learning_rate": 0.002, "loss": 2.3657, "step": 104850 }, { "epoch": 0.4053594346770577, "grad_norm": 0.10816147923469543, "learning_rate": 0.002, "loss": 2.3585, "step": 104860 }, { "epoch": 0.405398091880441, "grad_norm": 0.10885234922170639, "learning_rate": 0.002, "loss": 2.3653, "step": 104870 }, { "epoch": 0.40543674908382427, "grad_norm": 0.09620843082666397, "learning_rate": 0.002, "loss": 2.3429, "step": 104880 }, { "epoch": 0.4054754062872076, "grad_norm": 0.10717884451150894, "learning_rate": 0.002, "loss": 2.3448, "step": 104890 }, { "epoch": 0.4055140634905908, "grad_norm": 0.5260806679725647, "learning_rate": 0.002, "loss": 2.3414, "step": 104900 }, { "epoch": 0.40555272069397413, "grad_norm": 0.11013922840356827, "learning_rate": 0.002, "loss": 2.3537, "step": 104910 }, { "epoch": 0.4055913778973574, "grad_norm": 0.09905974566936493, "learning_rate": 0.002, "loss": 2.3377, "step": 104920 }, { "epoch": 0.4056300351007407, "grad_norm": 0.09774720668792725, "learning_rate": 0.002, "loss": 2.3507, "step": 104930 }, { "epoch": 0.40566869230412395, "grad_norm": 0.10519535094499588, "learning_rate": 0.002, "loss": 2.3528, "step": 104940 }, { "epoch": 0.40570734950750725, "grad_norm": 0.12374908477067947, "learning_rate": 0.002, "loss": 2.3538, "step": 104950 }, { "epoch": 0.4057460067108905, "grad_norm": 0.10747280716896057, "learning_rate": 0.002, "loss": 2.3314, "step": 104960 }, { "epoch": 0.4057846639142738, "grad_norm": 0.10043985396623611, "learning_rate": 0.002, "loss": 2.3495, "step": 104970 }, { "epoch": 0.40582332111765707, "grad_norm": 0.1304740309715271, "learning_rate": 0.002, "loss": 2.3428, "step": 104980 }, { "epoch": 0.4058619783210403, "grad_norm": 0.1062544658780098, "learning_rate": 0.002, "loss": 2.3591, "step": 104990 }, { "epoch": 0.4059006355244236, "grad_norm": 0.10171884298324585, "learning_rate": 0.002, "loss": 2.336, "step": 105000 }, { "epoch": 0.4059392927278069, "grad_norm": 0.11481379717588425, "learning_rate": 0.002, "loss": 2.3618, "step": 105010 }, { "epoch": 0.4059779499311902, "grad_norm": 0.10549386590719223, "learning_rate": 0.002, "loss": 2.3462, "step": 105020 }, { "epoch": 0.40601660713457344, "grad_norm": 0.1036456972360611, "learning_rate": 0.002, "loss": 2.3427, "step": 105030 }, { "epoch": 0.40605526433795675, "grad_norm": 0.0944003090262413, "learning_rate": 0.002, "loss": 2.3458, "step": 105040 }, { "epoch": 0.40609392154134, "grad_norm": 0.11297862231731415, "learning_rate": 0.002, "loss": 2.352, "step": 105050 }, { "epoch": 0.4061325787447233, "grad_norm": 0.11371901631355286, "learning_rate": 0.002, "loss": 2.3389, "step": 105060 }, { "epoch": 0.40617123594810656, "grad_norm": 0.1074351891875267, "learning_rate": 0.002, "loss": 2.3662, "step": 105070 }, { "epoch": 0.40620989315148986, "grad_norm": 0.1140996515750885, "learning_rate": 0.002, "loss": 2.3538, "step": 105080 }, { "epoch": 0.4062485503548731, "grad_norm": 0.10436806827783585, "learning_rate": 0.002, "loss": 2.3539, "step": 105090 }, { "epoch": 0.4062872075582564, "grad_norm": 0.10116159915924072, "learning_rate": 0.002, "loss": 2.3362, "step": 105100 }, { "epoch": 0.4063258647616397, "grad_norm": 0.11260448396205902, "learning_rate": 0.002, "loss": 2.3585, "step": 105110 }, { "epoch": 0.406364521965023, "grad_norm": 0.10626557469367981, "learning_rate": 0.002, "loss": 2.3435, "step": 105120 }, { "epoch": 0.40640317916840624, "grad_norm": 0.11319101601839066, "learning_rate": 0.002, "loss": 2.3566, "step": 105130 }, { "epoch": 0.40644183637178954, "grad_norm": 0.11202666908502579, "learning_rate": 0.002, "loss": 2.3538, "step": 105140 }, { "epoch": 0.4064804935751728, "grad_norm": 0.1096389889717102, "learning_rate": 0.002, "loss": 2.3415, "step": 105150 }, { "epoch": 0.40651915077855605, "grad_norm": 0.10176476836204529, "learning_rate": 0.002, "loss": 2.3483, "step": 105160 }, { "epoch": 0.40655780798193936, "grad_norm": 0.10365190356969833, "learning_rate": 0.002, "loss": 2.3405, "step": 105170 }, { "epoch": 0.4065964651853226, "grad_norm": 0.11331445723772049, "learning_rate": 0.002, "loss": 2.3339, "step": 105180 }, { "epoch": 0.4066351223887059, "grad_norm": 0.11339357495307922, "learning_rate": 0.002, "loss": 2.3496, "step": 105190 }, { "epoch": 0.40667377959208917, "grad_norm": 0.09283099323511124, "learning_rate": 0.002, "loss": 2.3639, "step": 105200 }, { "epoch": 0.4067124367954725, "grad_norm": 0.12029348313808441, "learning_rate": 0.002, "loss": 2.3449, "step": 105210 }, { "epoch": 0.40675109399885573, "grad_norm": 0.11738674342632294, "learning_rate": 0.002, "loss": 2.3463, "step": 105220 }, { "epoch": 0.40678975120223904, "grad_norm": 0.09112228453159332, "learning_rate": 0.002, "loss": 2.3512, "step": 105230 }, { "epoch": 0.4068284084056223, "grad_norm": 0.1028887927532196, "learning_rate": 0.002, "loss": 2.3435, "step": 105240 }, { "epoch": 0.4068670656090056, "grad_norm": 0.10166403651237488, "learning_rate": 0.002, "loss": 2.3462, "step": 105250 }, { "epoch": 0.40690572281238885, "grad_norm": 0.10761390626430511, "learning_rate": 0.002, "loss": 2.3445, "step": 105260 }, { "epoch": 0.40694438001577216, "grad_norm": 0.10484203696250916, "learning_rate": 0.002, "loss": 2.3511, "step": 105270 }, { "epoch": 0.4069830372191554, "grad_norm": 0.0940113440155983, "learning_rate": 0.002, "loss": 2.3489, "step": 105280 }, { "epoch": 0.4070216944225387, "grad_norm": 0.11318383365869522, "learning_rate": 0.002, "loss": 2.3612, "step": 105290 }, { "epoch": 0.40706035162592197, "grad_norm": 0.10878942161798477, "learning_rate": 0.002, "loss": 2.3434, "step": 105300 }, { "epoch": 0.4070990088293053, "grad_norm": 0.12262070178985596, "learning_rate": 0.002, "loss": 2.3554, "step": 105310 }, { "epoch": 0.4071376660326885, "grad_norm": 0.11862120032310486, "learning_rate": 0.002, "loss": 2.348, "step": 105320 }, { "epoch": 0.40717632323607184, "grad_norm": 0.11528993397951126, "learning_rate": 0.002, "loss": 2.3545, "step": 105330 }, { "epoch": 0.4072149804394551, "grad_norm": 0.10077010840177536, "learning_rate": 0.002, "loss": 2.3454, "step": 105340 }, { "epoch": 0.40725363764283834, "grad_norm": 0.0977274626493454, "learning_rate": 0.002, "loss": 2.328, "step": 105350 }, { "epoch": 0.40729229484622165, "grad_norm": 0.0942181721329689, "learning_rate": 0.002, "loss": 2.3426, "step": 105360 }, { "epoch": 0.4073309520496049, "grad_norm": 0.0979325994849205, "learning_rate": 0.002, "loss": 2.335, "step": 105370 }, { "epoch": 0.4073696092529882, "grad_norm": 0.11181334406137466, "learning_rate": 0.002, "loss": 2.3587, "step": 105380 }, { "epoch": 0.40740826645637146, "grad_norm": 0.1126566082239151, "learning_rate": 0.002, "loss": 2.3506, "step": 105390 }, { "epoch": 0.40744692365975477, "grad_norm": 0.12782098352909088, "learning_rate": 0.002, "loss": 2.3396, "step": 105400 }, { "epoch": 0.407485580863138, "grad_norm": 0.10336902737617493, "learning_rate": 0.002, "loss": 2.3358, "step": 105410 }, { "epoch": 0.4075242380665213, "grad_norm": 0.09775971621274948, "learning_rate": 0.002, "loss": 2.3478, "step": 105420 }, { "epoch": 0.4075628952699046, "grad_norm": 0.10696987807750702, "learning_rate": 0.002, "loss": 2.3407, "step": 105430 }, { "epoch": 0.4076015524732879, "grad_norm": 0.12094870954751968, "learning_rate": 0.002, "loss": 2.3425, "step": 105440 }, { "epoch": 0.40764020967667114, "grad_norm": 0.1064288467168808, "learning_rate": 0.002, "loss": 2.3398, "step": 105450 }, { "epoch": 0.40767886688005445, "grad_norm": 0.10916435718536377, "learning_rate": 0.002, "loss": 2.3523, "step": 105460 }, { "epoch": 0.4077175240834377, "grad_norm": 0.10927937179803848, "learning_rate": 0.002, "loss": 2.3608, "step": 105470 }, { "epoch": 0.407756181286821, "grad_norm": 0.12412333488464355, "learning_rate": 0.002, "loss": 2.3545, "step": 105480 }, { "epoch": 0.40779483849020426, "grad_norm": 0.11743910610675812, "learning_rate": 0.002, "loss": 2.3486, "step": 105490 }, { "epoch": 0.40783349569358757, "grad_norm": 0.09887317568063736, "learning_rate": 0.002, "loss": 2.3456, "step": 105500 }, { "epoch": 0.4078721528969708, "grad_norm": 0.15866301953792572, "learning_rate": 0.002, "loss": 2.3594, "step": 105510 }, { "epoch": 0.4079108101003541, "grad_norm": 0.10922886431217194, "learning_rate": 0.002, "loss": 2.3385, "step": 105520 }, { "epoch": 0.4079494673037374, "grad_norm": 0.10216683894395828, "learning_rate": 0.002, "loss": 2.3522, "step": 105530 }, { "epoch": 0.40798812450712063, "grad_norm": 0.12087800353765488, "learning_rate": 0.002, "loss": 2.3521, "step": 105540 }, { "epoch": 0.40802678171050394, "grad_norm": 0.10817883163690567, "learning_rate": 0.002, "loss": 2.359, "step": 105550 }, { "epoch": 0.4080654389138872, "grad_norm": 0.11017916351556778, "learning_rate": 0.002, "loss": 2.3543, "step": 105560 }, { "epoch": 0.4081040961172705, "grad_norm": 0.10396047681570053, "learning_rate": 0.002, "loss": 2.3534, "step": 105570 }, { "epoch": 0.40814275332065375, "grad_norm": 0.11955048143863678, "learning_rate": 0.002, "loss": 2.3489, "step": 105580 }, { "epoch": 0.40818141052403706, "grad_norm": 0.10369185358285904, "learning_rate": 0.002, "loss": 2.3553, "step": 105590 }, { "epoch": 0.4082200677274203, "grad_norm": 0.10445476323366165, "learning_rate": 0.002, "loss": 2.3484, "step": 105600 }, { "epoch": 0.4082587249308036, "grad_norm": 0.09951169043779373, "learning_rate": 0.002, "loss": 2.3569, "step": 105610 }, { "epoch": 0.40829738213418687, "grad_norm": 0.09170377254486084, "learning_rate": 0.002, "loss": 2.3492, "step": 105620 }, { "epoch": 0.4083360393375702, "grad_norm": 0.1086997240781784, "learning_rate": 0.002, "loss": 2.3529, "step": 105630 }, { "epoch": 0.40837469654095343, "grad_norm": 0.1046295091509819, "learning_rate": 0.002, "loss": 2.3394, "step": 105640 }, { "epoch": 0.40841335374433674, "grad_norm": 0.10843697935342789, "learning_rate": 0.002, "loss": 2.3363, "step": 105650 }, { "epoch": 0.40845201094772, "grad_norm": 0.09829798340797424, "learning_rate": 0.002, "loss": 2.3529, "step": 105660 }, { "epoch": 0.4084906681511033, "grad_norm": 0.11206449568271637, "learning_rate": 0.002, "loss": 2.3456, "step": 105670 }, { "epoch": 0.40852932535448655, "grad_norm": 0.10997811704874039, "learning_rate": 0.002, "loss": 2.3559, "step": 105680 }, { "epoch": 0.40856798255786986, "grad_norm": 0.10579683631658554, "learning_rate": 0.002, "loss": 2.3296, "step": 105690 }, { "epoch": 0.4086066397612531, "grad_norm": 0.10083601623773575, "learning_rate": 0.002, "loss": 2.3565, "step": 105700 }, { "epoch": 0.4086452969646364, "grad_norm": 0.09944254159927368, "learning_rate": 0.002, "loss": 2.3737, "step": 105710 }, { "epoch": 0.40868395416801967, "grad_norm": 0.0935181975364685, "learning_rate": 0.002, "loss": 2.3503, "step": 105720 }, { "epoch": 0.4087226113714029, "grad_norm": 0.11061915010213852, "learning_rate": 0.002, "loss": 2.3473, "step": 105730 }, { "epoch": 0.40876126857478623, "grad_norm": 0.11106288433074951, "learning_rate": 0.002, "loss": 2.3546, "step": 105740 }, { "epoch": 0.4087999257781695, "grad_norm": 0.10588667541742325, "learning_rate": 0.002, "loss": 2.3618, "step": 105750 }, { "epoch": 0.4088385829815528, "grad_norm": 0.10737384855747223, "learning_rate": 0.002, "loss": 2.3539, "step": 105760 }, { "epoch": 0.40887724018493604, "grad_norm": 0.1374361664056778, "learning_rate": 0.002, "loss": 2.3534, "step": 105770 }, { "epoch": 0.40891589738831935, "grad_norm": 0.11525832861661911, "learning_rate": 0.002, "loss": 2.3564, "step": 105780 }, { "epoch": 0.4089545545917026, "grad_norm": 0.10367967188358307, "learning_rate": 0.002, "loss": 2.3534, "step": 105790 }, { "epoch": 0.4089932117950859, "grad_norm": 0.10809770226478577, "learning_rate": 0.002, "loss": 2.3342, "step": 105800 }, { "epoch": 0.40903186899846916, "grad_norm": 0.10791625082492828, "learning_rate": 0.002, "loss": 2.3589, "step": 105810 }, { "epoch": 0.40907052620185247, "grad_norm": 0.12326755374670029, "learning_rate": 0.002, "loss": 2.3564, "step": 105820 }, { "epoch": 0.4091091834052357, "grad_norm": 0.10126736760139465, "learning_rate": 0.002, "loss": 2.339, "step": 105830 }, { "epoch": 0.40914784060861903, "grad_norm": 0.13051722943782806, "learning_rate": 0.002, "loss": 2.351, "step": 105840 }, { "epoch": 0.4091864978120023, "grad_norm": 0.10089803487062454, "learning_rate": 0.002, "loss": 2.3439, "step": 105850 }, { "epoch": 0.4092251550153856, "grad_norm": 0.11042924225330353, "learning_rate": 0.002, "loss": 2.3478, "step": 105860 }, { "epoch": 0.40926381221876884, "grad_norm": 0.09863913804292679, "learning_rate": 0.002, "loss": 2.3605, "step": 105870 }, { "epoch": 0.40930246942215215, "grad_norm": 0.10707473754882812, "learning_rate": 0.002, "loss": 2.3313, "step": 105880 }, { "epoch": 0.4093411266255354, "grad_norm": 0.0968175083398819, "learning_rate": 0.002, "loss": 2.3495, "step": 105890 }, { "epoch": 0.40937978382891865, "grad_norm": 0.1127745509147644, "learning_rate": 0.002, "loss": 2.3431, "step": 105900 }, { "epoch": 0.40941844103230196, "grad_norm": 0.11764765530824661, "learning_rate": 0.002, "loss": 2.3459, "step": 105910 }, { "epoch": 0.4094570982356852, "grad_norm": 0.09807364642620087, "learning_rate": 0.002, "loss": 2.3473, "step": 105920 }, { "epoch": 0.4094957554390685, "grad_norm": 0.11089298129081726, "learning_rate": 0.002, "loss": 2.3479, "step": 105930 }, { "epoch": 0.4095344126424518, "grad_norm": 0.11690270900726318, "learning_rate": 0.002, "loss": 2.3332, "step": 105940 }, { "epoch": 0.4095730698458351, "grad_norm": 0.11261983960866928, "learning_rate": 0.002, "loss": 2.3547, "step": 105950 }, { "epoch": 0.40961172704921833, "grad_norm": 0.11782555282115936, "learning_rate": 0.002, "loss": 2.3497, "step": 105960 }, { "epoch": 0.40965038425260164, "grad_norm": 0.11236891895532608, "learning_rate": 0.002, "loss": 2.3326, "step": 105970 }, { "epoch": 0.4096890414559849, "grad_norm": 0.1086883395910263, "learning_rate": 0.002, "loss": 2.3577, "step": 105980 }, { "epoch": 0.4097276986593682, "grad_norm": 0.1255752593278885, "learning_rate": 0.002, "loss": 2.3254, "step": 105990 }, { "epoch": 0.40976635586275145, "grad_norm": 0.11962178349494934, "learning_rate": 0.002, "loss": 2.3524, "step": 106000 }, { "epoch": 0.40980501306613476, "grad_norm": 0.130793496966362, "learning_rate": 0.002, "loss": 2.3656, "step": 106010 }, { "epoch": 0.409843670269518, "grad_norm": 0.09094803780317307, "learning_rate": 0.002, "loss": 2.3475, "step": 106020 }, { "epoch": 0.4098823274729013, "grad_norm": 0.11750062555074692, "learning_rate": 0.002, "loss": 2.3532, "step": 106030 }, { "epoch": 0.40992098467628457, "grad_norm": 0.09984748065471649, "learning_rate": 0.002, "loss": 2.3656, "step": 106040 }, { "epoch": 0.4099596418796679, "grad_norm": 0.1055670976638794, "learning_rate": 0.002, "loss": 2.3412, "step": 106050 }, { "epoch": 0.40999829908305113, "grad_norm": 0.10549233108758926, "learning_rate": 0.002, "loss": 2.348, "step": 106060 }, { "epoch": 0.41003695628643444, "grad_norm": 0.09887228906154633, "learning_rate": 0.002, "loss": 2.3573, "step": 106070 }, { "epoch": 0.4100756134898177, "grad_norm": 0.09559593349695206, "learning_rate": 0.002, "loss": 2.3444, "step": 106080 }, { "epoch": 0.41011427069320094, "grad_norm": 0.10596460849046707, "learning_rate": 0.002, "loss": 2.3452, "step": 106090 }, { "epoch": 0.41015292789658425, "grad_norm": 0.11016621440649033, "learning_rate": 0.002, "loss": 2.3659, "step": 106100 }, { "epoch": 0.4101915850999675, "grad_norm": 0.10938017815351486, "learning_rate": 0.002, "loss": 2.3378, "step": 106110 }, { "epoch": 0.4102302423033508, "grad_norm": 0.11935988813638687, "learning_rate": 0.002, "loss": 2.3586, "step": 106120 }, { "epoch": 0.41026889950673406, "grad_norm": 0.10754488408565521, "learning_rate": 0.002, "loss": 2.344, "step": 106130 }, { "epoch": 0.41030755671011737, "grad_norm": 0.11438973993062973, "learning_rate": 0.002, "loss": 2.3479, "step": 106140 }, { "epoch": 0.4103462139135006, "grad_norm": 0.09833820909261703, "learning_rate": 0.002, "loss": 2.3496, "step": 106150 }, { "epoch": 0.41038487111688393, "grad_norm": 0.1259237676858902, "learning_rate": 0.002, "loss": 2.3348, "step": 106160 }, { "epoch": 0.4104235283202672, "grad_norm": 0.10356998443603516, "learning_rate": 0.002, "loss": 2.3631, "step": 106170 }, { "epoch": 0.4104621855236505, "grad_norm": 0.13003289699554443, "learning_rate": 0.002, "loss": 2.3369, "step": 106180 }, { "epoch": 0.41050084272703374, "grad_norm": 0.11426402628421783, "learning_rate": 0.002, "loss": 2.3419, "step": 106190 }, { "epoch": 0.41053949993041705, "grad_norm": 0.11478512734174728, "learning_rate": 0.002, "loss": 2.3433, "step": 106200 }, { "epoch": 0.4105781571338003, "grad_norm": 0.1121613010764122, "learning_rate": 0.002, "loss": 2.358, "step": 106210 }, { "epoch": 0.4106168143371836, "grad_norm": 0.12235704064369202, "learning_rate": 0.002, "loss": 2.3441, "step": 106220 }, { "epoch": 0.41065547154056686, "grad_norm": 0.10301785916090012, "learning_rate": 0.002, "loss": 2.3512, "step": 106230 }, { "epoch": 0.41069412874395017, "grad_norm": 0.10899627953767776, "learning_rate": 0.002, "loss": 2.3441, "step": 106240 }, { "epoch": 0.4107327859473334, "grad_norm": 0.13639208674430847, "learning_rate": 0.002, "loss": 2.3489, "step": 106250 }, { "epoch": 0.41077144315071673, "grad_norm": 0.10303498804569244, "learning_rate": 0.002, "loss": 2.3563, "step": 106260 }, { "epoch": 0.4108101003541, "grad_norm": 0.09691531956195831, "learning_rate": 0.002, "loss": 2.3406, "step": 106270 }, { "epoch": 0.41084875755748324, "grad_norm": 0.11968454718589783, "learning_rate": 0.002, "loss": 2.3516, "step": 106280 }, { "epoch": 0.41088741476086654, "grad_norm": 0.11995863914489746, "learning_rate": 0.002, "loss": 2.3474, "step": 106290 }, { "epoch": 0.4109260719642498, "grad_norm": 0.10339125245809555, "learning_rate": 0.002, "loss": 2.3406, "step": 106300 }, { "epoch": 0.4109647291676331, "grad_norm": 0.11860562115907669, "learning_rate": 0.002, "loss": 2.3597, "step": 106310 }, { "epoch": 0.41100338637101635, "grad_norm": 0.11929059028625488, "learning_rate": 0.002, "loss": 2.3402, "step": 106320 }, { "epoch": 0.41104204357439966, "grad_norm": 0.11013022810220718, "learning_rate": 0.002, "loss": 2.3339, "step": 106330 }, { "epoch": 0.4110807007777829, "grad_norm": 0.09725578874349594, "learning_rate": 0.002, "loss": 2.3372, "step": 106340 }, { "epoch": 0.4111193579811662, "grad_norm": 0.10636448115110397, "learning_rate": 0.002, "loss": 2.3587, "step": 106350 }, { "epoch": 0.4111580151845495, "grad_norm": 0.11153136938810349, "learning_rate": 0.002, "loss": 2.3533, "step": 106360 }, { "epoch": 0.4111966723879328, "grad_norm": 0.10090118646621704, "learning_rate": 0.002, "loss": 2.3385, "step": 106370 }, { "epoch": 0.41123532959131603, "grad_norm": 0.112342469394207, "learning_rate": 0.002, "loss": 2.3394, "step": 106380 }, { "epoch": 0.41127398679469934, "grad_norm": 0.10965652018785477, "learning_rate": 0.002, "loss": 2.3713, "step": 106390 }, { "epoch": 0.4113126439980826, "grad_norm": 0.11831896007061005, "learning_rate": 0.002, "loss": 2.3502, "step": 106400 }, { "epoch": 0.4113513012014659, "grad_norm": 0.09602269530296326, "learning_rate": 0.002, "loss": 2.3472, "step": 106410 }, { "epoch": 0.41138995840484915, "grad_norm": 0.10719896852970123, "learning_rate": 0.002, "loss": 2.3364, "step": 106420 }, { "epoch": 0.41142861560823246, "grad_norm": 0.11348539590835571, "learning_rate": 0.002, "loss": 2.3306, "step": 106430 }, { "epoch": 0.4114672728116157, "grad_norm": 0.12239199876785278, "learning_rate": 0.002, "loss": 2.3539, "step": 106440 }, { "epoch": 0.411505930014999, "grad_norm": 0.12341707199811935, "learning_rate": 0.002, "loss": 2.3679, "step": 106450 }, { "epoch": 0.4115445872183823, "grad_norm": 0.10860970616340637, "learning_rate": 0.002, "loss": 2.3431, "step": 106460 }, { "epoch": 0.4115832444217655, "grad_norm": 0.1317441314458847, "learning_rate": 0.002, "loss": 2.3407, "step": 106470 }, { "epoch": 0.41162190162514883, "grad_norm": 0.11049704253673553, "learning_rate": 0.002, "loss": 2.3588, "step": 106480 }, { "epoch": 0.4116605588285321, "grad_norm": 0.12230638414621353, "learning_rate": 0.002, "loss": 2.3514, "step": 106490 }, { "epoch": 0.4116992160319154, "grad_norm": 0.09292108565568924, "learning_rate": 0.002, "loss": 2.3477, "step": 106500 }, { "epoch": 0.41173787323529865, "grad_norm": 0.11424490809440613, "learning_rate": 0.002, "loss": 2.3365, "step": 106510 }, { "epoch": 0.41177653043868195, "grad_norm": 0.1115012913942337, "learning_rate": 0.002, "loss": 2.3585, "step": 106520 }, { "epoch": 0.4118151876420652, "grad_norm": 0.09740482270717621, "learning_rate": 0.002, "loss": 2.3456, "step": 106530 }, { "epoch": 0.4118538448454485, "grad_norm": 0.09662970900535583, "learning_rate": 0.002, "loss": 2.3507, "step": 106540 }, { "epoch": 0.41189250204883177, "grad_norm": 0.12155226618051529, "learning_rate": 0.002, "loss": 2.3337, "step": 106550 }, { "epoch": 0.4119311592522151, "grad_norm": 0.1143067330121994, "learning_rate": 0.002, "loss": 2.3374, "step": 106560 }, { "epoch": 0.4119698164555983, "grad_norm": 0.12124653905630112, "learning_rate": 0.002, "loss": 2.3429, "step": 106570 }, { "epoch": 0.41200847365898163, "grad_norm": 0.10572107136249542, "learning_rate": 0.002, "loss": 2.3562, "step": 106580 }, { "epoch": 0.4120471308623649, "grad_norm": 0.12428019195795059, "learning_rate": 0.002, "loss": 2.3612, "step": 106590 }, { "epoch": 0.4120857880657482, "grad_norm": 0.10228022933006287, "learning_rate": 0.002, "loss": 2.347, "step": 106600 }, { "epoch": 0.41212444526913145, "grad_norm": 0.11307167261838913, "learning_rate": 0.002, "loss": 2.3483, "step": 106610 }, { "epoch": 0.41216310247251475, "grad_norm": 0.10472637414932251, "learning_rate": 0.002, "loss": 2.3332, "step": 106620 }, { "epoch": 0.412201759675898, "grad_norm": 0.1070481687784195, "learning_rate": 0.002, "loss": 2.358, "step": 106630 }, { "epoch": 0.4122404168792813, "grad_norm": 0.10893788933753967, "learning_rate": 0.002, "loss": 2.3681, "step": 106640 }, { "epoch": 0.41227907408266457, "grad_norm": 0.09896333515644073, "learning_rate": 0.002, "loss": 2.3593, "step": 106650 }, { "epoch": 0.4123177312860478, "grad_norm": 0.10149840265512466, "learning_rate": 0.002, "loss": 2.3555, "step": 106660 }, { "epoch": 0.4123563884894311, "grad_norm": 0.10944268852472305, "learning_rate": 0.002, "loss": 2.3465, "step": 106670 }, { "epoch": 0.4123950456928144, "grad_norm": 0.10874912887811661, "learning_rate": 0.002, "loss": 2.3433, "step": 106680 }, { "epoch": 0.4124337028961977, "grad_norm": 0.12328887730836868, "learning_rate": 0.002, "loss": 2.3292, "step": 106690 }, { "epoch": 0.41247236009958094, "grad_norm": 0.10866201668977737, "learning_rate": 0.002, "loss": 2.3565, "step": 106700 }, { "epoch": 0.41251101730296424, "grad_norm": 0.10595963895320892, "learning_rate": 0.002, "loss": 2.3555, "step": 106710 }, { "epoch": 0.4125496745063475, "grad_norm": 0.12083906680345535, "learning_rate": 0.002, "loss": 2.3574, "step": 106720 }, { "epoch": 0.4125883317097308, "grad_norm": 0.10444987565279007, "learning_rate": 0.002, "loss": 2.3488, "step": 106730 }, { "epoch": 0.41262698891311406, "grad_norm": 0.11359119415283203, "learning_rate": 0.002, "loss": 2.3572, "step": 106740 }, { "epoch": 0.41266564611649736, "grad_norm": 0.10856324434280396, "learning_rate": 0.002, "loss": 2.342, "step": 106750 }, { "epoch": 0.4127043033198806, "grad_norm": 0.09387435019016266, "learning_rate": 0.002, "loss": 2.356, "step": 106760 }, { "epoch": 0.4127429605232639, "grad_norm": 0.12010136246681213, "learning_rate": 0.002, "loss": 2.3497, "step": 106770 }, { "epoch": 0.4127816177266472, "grad_norm": 0.11715513467788696, "learning_rate": 0.002, "loss": 2.3397, "step": 106780 }, { "epoch": 0.4128202749300305, "grad_norm": 0.10977223515510559, "learning_rate": 0.002, "loss": 2.355, "step": 106790 }, { "epoch": 0.41285893213341374, "grad_norm": 0.10523109883069992, "learning_rate": 0.002, "loss": 2.3382, "step": 106800 }, { "epoch": 0.41289758933679704, "grad_norm": 0.09810391813516617, "learning_rate": 0.002, "loss": 2.3568, "step": 106810 }, { "epoch": 0.4129362465401803, "grad_norm": 0.1089128777384758, "learning_rate": 0.002, "loss": 2.3367, "step": 106820 }, { "epoch": 0.41297490374356355, "grad_norm": 0.09619535505771637, "learning_rate": 0.002, "loss": 2.3425, "step": 106830 }, { "epoch": 0.41301356094694686, "grad_norm": 0.10872559249401093, "learning_rate": 0.002, "loss": 2.3339, "step": 106840 }, { "epoch": 0.4130522181503301, "grad_norm": 0.11064718663692474, "learning_rate": 0.002, "loss": 2.3388, "step": 106850 }, { "epoch": 0.4130908753537134, "grad_norm": 0.0972963497042656, "learning_rate": 0.002, "loss": 2.3552, "step": 106860 }, { "epoch": 0.41312953255709667, "grad_norm": 0.1047365665435791, "learning_rate": 0.002, "loss": 2.3462, "step": 106870 }, { "epoch": 0.41316818976048, "grad_norm": 0.1001274511218071, "learning_rate": 0.002, "loss": 2.3473, "step": 106880 }, { "epoch": 0.41320684696386323, "grad_norm": 0.12085501104593277, "learning_rate": 0.002, "loss": 2.341, "step": 106890 }, { "epoch": 0.41324550416724654, "grad_norm": 0.09879327565431595, "learning_rate": 0.002, "loss": 2.347, "step": 106900 }, { "epoch": 0.4132841613706298, "grad_norm": 0.10587608814239502, "learning_rate": 0.002, "loss": 2.3594, "step": 106910 }, { "epoch": 0.4133228185740131, "grad_norm": 0.12716144323349, "learning_rate": 0.002, "loss": 2.3623, "step": 106920 }, { "epoch": 0.41336147577739635, "grad_norm": 0.11554614454507828, "learning_rate": 0.002, "loss": 2.3376, "step": 106930 }, { "epoch": 0.41340013298077966, "grad_norm": 0.09272408485412598, "learning_rate": 0.002, "loss": 2.3414, "step": 106940 }, { "epoch": 0.4134387901841629, "grad_norm": 0.11044779419898987, "learning_rate": 0.002, "loss": 2.3351, "step": 106950 }, { "epoch": 0.4134774473875462, "grad_norm": 0.1153455302119255, "learning_rate": 0.002, "loss": 2.3477, "step": 106960 }, { "epoch": 0.41351610459092947, "grad_norm": 0.10462018847465515, "learning_rate": 0.002, "loss": 2.3429, "step": 106970 }, { "epoch": 0.4135547617943128, "grad_norm": 0.10811108350753784, "learning_rate": 0.002, "loss": 2.3415, "step": 106980 }, { "epoch": 0.413593418997696, "grad_norm": 0.10190818458795547, "learning_rate": 0.002, "loss": 2.3452, "step": 106990 }, { "epoch": 0.41363207620107934, "grad_norm": 0.11271360516548157, "learning_rate": 0.002, "loss": 2.3469, "step": 107000 }, { "epoch": 0.4136707334044626, "grad_norm": 0.09467026591300964, "learning_rate": 0.002, "loss": 2.3379, "step": 107010 }, { "epoch": 0.41370939060784584, "grad_norm": 0.11904910951852798, "learning_rate": 0.002, "loss": 2.3503, "step": 107020 }, { "epoch": 0.41374804781122915, "grad_norm": 0.09495735913515091, "learning_rate": 0.002, "loss": 2.3514, "step": 107030 }, { "epoch": 0.4137867050146124, "grad_norm": 0.14995649456977844, "learning_rate": 0.002, "loss": 2.3553, "step": 107040 }, { "epoch": 0.4138253622179957, "grad_norm": 0.09876397252082825, "learning_rate": 0.002, "loss": 2.3485, "step": 107050 }, { "epoch": 0.41386401942137896, "grad_norm": 0.1055774912238121, "learning_rate": 0.002, "loss": 2.333, "step": 107060 }, { "epoch": 0.41390267662476227, "grad_norm": 0.10995199531316757, "learning_rate": 0.002, "loss": 2.3413, "step": 107070 }, { "epoch": 0.4139413338281455, "grad_norm": 0.10983167588710785, "learning_rate": 0.002, "loss": 2.3379, "step": 107080 }, { "epoch": 0.4139799910315288, "grad_norm": 0.11265630275011063, "learning_rate": 0.002, "loss": 2.3509, "step": 107090 }, { "epoch": 0.4140186482349121, "grad_norm": 0.11353180557489395, "learning_rate": 0.002, "loss": 2.3504, "step": 107100 }, { "epoch": 0.4140573054382954, "grad_norm": 0.100088931620121, "learning_rate": 0.002, "loss": 2.338, "step": 107110 }, { "epoch": 0.41409596264167864, "grad_norm": 0.09908263385295868, "learning_rate": 0.002, "loss": 2.3355, "step": 107120 }, { "epoch": 0.41413461984506195, "grad_norm": 0.10703715682029724, "learning_rate": 0.002, "loss": 2.3465, "step": 107130 }, { "epoch": 0.4141732770484452, "grad_norm": 0.1116807758808136, "learning_rate": 0.002, "loss": 2.3588, "step": 107140 }, { "epoch": 0.4142119342518285, "grad_norm": 0.1046561673283577, "learning_rate": 0.002, "loss": 2.3453, "step": 107150 }, { "epoch": 0.41425059145521176, "grad_norm": 0.09532133489847183, "learning_rate": 0.002, "loss": 2.3491, "step": 107160 }, { "epoch": 0.41428924865859507, "grad_norm": 0.10459303855895996, "learning_rate": 0.002, "loss": 2.3517, "step": 107170 }, { "epoch": 0.4143279058619783, "grad_norm": 0.12302389740943909, "learning_rate": 0.002, "loss": 2.3454, "step": 107180 }, { "epoch": 0.4143665630653616, "grad_norm": 0.11073164641857147, "learning_rate": 0.002, "loss": 2.3505, "step": 107190 }, { "epoch": 0.4144052202687449, "grad_norm": 0.1090584397315979, "learning_rate": 0.002, "loss": 2.354, "step": 107200 }, { "epoch": 0.41444387747212813, "grad_norm": 0.1135617047548294, "learning_rate": 0.002, "loss": 2.3395, "step": 107210 }, { "epoch": 0.41448253467551144, "grad_norm": 0.10791900008916855, "learning_rate": 0.002, "loss": 2.3493, "step": 107220 }, { "epoch": 0.4145211918788947, "grad_norm": 0.10380267351865768, "learning_rate": 0.002, "loss": 2.3507, "step": 107230 }, { "epoch": 0.414559849082278, "grad_norm": 0.10777231305837631, "learning_rate": 0.002, "loss": 2.3442, "step": 107240 }, { "epoch": 0.41459850628566125, "grad_norm": 0.10638809949159622, "learning_rate": 0.002, "loss": 2.3573, "step": 107250 }, { "epoch": 0.41463716348904456, "grad_norm": 0.09734046459197998, "learning_rate": 0.002, "loss": 2.3388, "step": 107260 }, { "epoch": 0.4146758206924278, "grad_norm": 0.10681114345788956, "learning_rate": 0.002, "loss": 2.3592, "step": 107270 }, { "epoch": 0.4147144778958111, "grad_norm": 0.11719990521669388, "learning_rate": 0.002, "loss": 2.3404, "step": 107280 }, { "epoch": 0.41475313509919437, "grad_norm": 0.11085479706525803, "learning_rate": 0.002, "loss": 2.3513, "step": 107290 }, { "epoch": 0.4147917923025777, "grad_norm": 0.11180248111486435, "learning_rate": 0.002, "loss": 2.3529, "step": 107300 }, { "epoch": 0.41483044950596093, "grad_norm": 0.11796993762254715, "learning_rate": 0.002, "loss": 2.3635, "step": 107310 }, { "epoch": 0.41486910670934424, "grad_norm": 0.09738306701183319, "learning_rate": 0.002, "loss": 2.3523, "step": 107320 }, { "epoch": 0.4149077639127275, "grad_norm": 0.12929093837738037, "learning_rate": 0.002, "loss": 2.3297, "step": 107330 }, { "epoch": 0.4149464211161108, "grad_norm": 0.10308624804019928, "learning_rate": 0.002, "loss": 2.3718, "step": 107340 }, { "epoch": 0.41498507831949405, "grad_norm": 0.11912292242050171, "learning_rate": 0.002, "loss": 2.3554, "step": 107350 }, { "epoch": 0.41502373552287736, "grad_norm": 0.09912280738353729, "learning_rate": 0.002, "loss": 2.3404, "step": 107360 }, { "epoch": 0.4150623927262606, "grad_norm": 0.10287779569625854, "learning_rate": 0.002, "loss": 2.3406, "step": 107370 }, { "epoch": 0.4151010499296439, "grad_norm": 0.1134791225194931, "learning_rate": 0.002, "loss": 2.3682, "step": 107380 }, { "epoch": 0.41513970713302717, "grad_norm": 0.12196041643619537, "learning_rate": 0.002, "loss": 2.3469, "step": 107390 }, { "epoch": 0.4151783643364104, "grad_norm": 0.10882119834423065, "learning_rate": 0.002, "loss": 2.332, "step": 107400 }, { "epoch": 0.41521702153979373, "grad_norm": 0.11768088489770889, "learning_rate": 0.002, "loss": 2.3299, "step": 107410 }, { "epoch": 0.415255678743177, "grad_norm": 0.10867585241794586, "learning_rate": 0.002, "loss": 2.3515, "step": 107420 }, { "epoch": 0.4152943359465603, "grad_norm": 0.10891842097043991, "learning_rate": 0.002, "loss": 2.3422, "step": 107430 }, { "epoch": 0.41533299314994354, "grad_norm": 0.11367695778608322, "learning_rate": 0.002, "loss": 2.3409, "step": 107440 }, { "epoch": 0.41537165035332685, "grad_norm": 0.11949791759252548, "learning_rate": 0.002, "loss": 2.3414, "step": 107450 }, { "epoch": 0.4154103075567101, "grad_norm": 0.10001518577337265, "learning_rate": 0.002, "loss": 2.3342, "step": 107460 }, { "epoch": 0.4154489647600934, "grad_norm": 0.0977843701839447, "learning_rate": 0.002, "loss": 2.358, "step": 107470 }, { "epoch": 0.41548762196347666, "grad_norm": 0.12059198319911957, "learning_rate": 0.002, "loss": 2.3651, "step": 107480 }, { "epoch": 0.41552627916685997, "grad_norm": 0.11329180002212524, "learning_rate": 0.002, "loss": 2.3451, "step": 107490 }, { "epoch": 0.4155649363702432, "grad_norm": 0.11127132177352905, "learning_rate": 0.002, "loss": 2.355, "step": 107500 }, { "epoch": 0.41560359357362653, "grad_norm": 0.11088550090789795, "learning_rate": 0.002, "loss": 2.3583, "step": 107510 }, { "epoch": 0.4156422507770098, "grad_norm": 0.10160195827484131, "learning_rate": 0.002, "loss": 2.3584, "step": 107520 }, { "epoch": 0.4156809079803931, "grad_norm": 0.10547743737697601, "learning_rate": 0.002, "loss": 2.351, "step": 107530 }, { "epoch": 0.41571956518377634, "grad_norm": 0.09817437827587128, "learning_rate": 0.002, "loss": 2.343, "step": 107540 }, { "epoch": 0.41575822238715965, "grad_norm": 0.10416698455810547, "learning_rate": 0.002, "loss": 2.3451, "step": 107550 }, { "epoch": 0.4157968795905429, "grad_norm": 0.11405066400766373, "learning_rate": 0.002, "loss": 2.3433, "step": 107560 }, { "epoch": 0.41583553679392615, "grad_norm": 0.10036198049783707, "learning_rate": 0.002, "loss": 2.3466, "step": 107570 }, { "epoch": 0.41587419399730946, "grad_norm": 0.09698426723480225, "learning_rate": 0.002, "loss": 2.3395, "step": 107580 }, { "epoch": 0.4159128512006927, "grad_norm": 0.1098899245262146, "learning_rate": 0.002, "loss": 2.3518, "step": 107590 }, { "epoch": 0.415951508404076, "grad_norm": 0.10067665576934814, "learning_rate": 0.002, "loss": 2.3658, "step": 107600 }, { "epoch": 0.4159901656074593, "grad_norm": 0.12178201973438263, "learning_rate": 0.002, "loss": 2.3306, "step": 107610 }, { "epoch": 0.4160288228108426, "grad_norm": 0.10082918405532837, "learning_rate": 0.002, "loss": 2.357, "step": 107620 }, { "epoch": 0.41606748001422583, "grad_norm": 0.10588428378105164, "learning_rate": 0.002, "loss": 2.3578, "step": 107630 }, { "epoch": 0.41610613721760914, "grad_norm": 0.11798638105392456, "learning_rate": 0.002, "loss": 2.3551, "step": 107640 }, { "epoch": 0.4161447944209924, "grad_norm": 0.10255949944257736, "learning_rate": 0.002, "loss": 2.3435, "step": 107650 }, { "epoch": 0.4161834516243757, "grad_norm": 0.11976686865091324, "learning_rate": 0.002, "loss": 2.3638, "step": 107660 }, { "epoch": 0.41622210882775895, "grad_norm": 0.09483418613672256, "learning_rate": 0.002, "loss": 2.3455, "step": 107670 }, { "epoch": 0.41626076603114226, "grad_norm": 0.09426402300596237, "learning_rate": 0.002, "loss": 2.3471, "step": 107680 }, { "epoch": 0.4162994232345255, "grad_norm": 0.09577522426843643, "learning_rate": 0.002, "loss": 2.3387, "step": 107690 }, { "epoch": 0.4163380804379088, "grad_norm": 0.09289302676916122, "learning_rate": 0.002, "loss": 2.3468, "step": 107700 }, { "epoch": 0.41637673764129207, "grad_norm": 0.11215560138225555, "learning_rate": 0.002, "loss": 2.3457, "step": 107710 }, { "epoch": 0.4164153948446754, "grad_norm": 0.10089147090911865, "learning_rate": 0.002, "loss": 2.3452, "step": 107720 }, { "epoch": 0.41645405204805863, "grad_norm": 0.10375631600618362, "learning_rate": 0.002, "loss": 2.3361, "step": 107730 }, { "epoch": 0.41649270925144194, "grad_norm": 0.1042468249797821, "learning_rate": 0.002, "loss": 2.343, "step": 107740 }, { "epoch": 0.4165313664548252, "grad_norm": 0.1084354892373085, "learning_rate": 0.002, "loss": 2.3627, "step": 107750 }, { "epoch": 0.41657002365820844, "grad_norm": 0.10817048698663712, "learning_rate": 0.002, "loss": 2.3597, "step": 107760 }, { "epoch": 0.41660868086159175, "grad_norm": 0.09909099340438843, "learning_rate": 0.002, "loss": 2.3604, "step": 107770 }, { "epoch": 0.416647338064975, "grad_norm": 0.09349465370178223, "learning_rate": 0.002, "loss": 2.3344, "step": 107780 }, { "epoch": 0.4166859952683583, "grad_norm": 0.12862420082092285, "learning_rate": 0.002, "loss": 2.3654, "step": 107790 }, { "epoch": 0.41672465247174156, "grad_norm": 0.10976967960596085, "learning_rate": 0.002, "loss": 2.3322, "step": 107800 }, { "epoch": 0.41676330967512487, "grad_norm": 0.17169824242591858, "learning_rate": 0.002, "loss": 2.3661, "step": 107810 }, { "epoch": 0.4168019668785081, "grad_norm": 0.09951794892549515, "learning_rate": 0.002, "loss": 2.3454, "step": 107820 }, { "epoch": 0.41684062408189143, "grad_norm": 0.11703871935606003, "learning_rate": 0.002, "loss": 2.3452, "step": 107830 }, { "epoch": 0.4168792812852747, "grad_norm": 0.10398931801319122, "learning_rate": 0.002, "loss": 2.3482, "step": 107840 }, { "epoch": 0.416917938488658, "grad_norm": 0.09673362970352173, "learning_rate": 0.002, "loss": 2.3557, "step": 107850 }, { "epoch": 0.41695659569204124, "grad_norm": 0.11419400572776794, "learning_rate": 0.002, "loss": 2.3478, "step": 107860 }, { "epoch": 0.41699525289542455, "grad_norm": 0.126008540391922, "learning_rate": 0.002, "loss": 2.3499, "step": 107870 }, { "epoch": 0.4170339100988078, "grad_norm": 0.11196989566087723, "learning_rate": 0.002, "loss": 2.3528, "step": 107880 }, { "epoch": 0.4170725673021911, "grad_norm": 0.10262865573167801, "learning_rate": 0.002, "loss": 2.3723, "step": 107890 }, { "epoch": 0.41711122450557436, "grad_norm": 0.10403737425804138, "learning_rate": 0.002, "loss": 2.3515, "step": 107900 }, { "epoch": 0.41714988170895767, "grad_norm": 0.09810178726911545, "learning_rate": 0.002, "loss": 2.3601, "step": 107910 }, { "epoch": 0.4171885389123409, "grad_norm": 0.101883165538311, "learning_rate": 0.002, "loss": 2.3388, "step": 107920 }, { "epoch": 0.41722719611572423, "grad_norm": 0.12212926894426346, "learning_rate": 0.002, "loss": 2.3532, "step": 107930 }, { "epoch": 0.4172658533191075, "grad_norm": 0.11113600432872772, "learning_rate": 0.002, "loss": 2.3426, "step": 107940 }, { "epoch": 0.41730451052249073, "grad_norm": 0.10756055265665054, "learning_rate": 0.002, "loss": 2.3512, "step": 107950 }, { "epoch": 0.41734316772587404, "grad_norm": 0.10574343800544739, "learning_rate": 0.002, "loss": 2.3361, "step": 107960 }, { "epoch": 0.4173818249292573, "grad_norm": 0.10672122985124588, "learning_rate": 0.002, "loss": 2.3578, "step": 107970 }, { "epoch": 0.4174204821326406, "grad_norm": 0.11252713203430176, "learning_rate": 0.002, "loss": 2.3539, "step": 107980 }, { "epoch": 0.41745913933602385, "grad_norm": 0.1140674352645874, "learning_rate": 0.002, "loss": 2.3539, "step": 107990 }, { "epoch": 0.41749779653940716, "grad_norm": 0.13871759176254272, "learning_rate": 0.002, "loss": 2.3498, "step": 108000 }, { "epoch": 0.4175364537427904, "grad_norm": 0.10054554790258408, "learning_rate": 0.002, "loss": 2.3522, "step": 108010 }, { "epoch": 0.4175751109461737, "grad_norm": 0.10804137587547302, "learning_rate": 0.002, "loss": 2.3413, "step": 108020 }, { "epoch": 0.417613768149557, "grad_norm": 0.1006598025560379, "learning_rate": 0.002, "loss": 2.3494, "step": 108030 }, { "epoch": 0.4176524253529403, "grad_norm": 0.14845651388168335, "learning_rate": 0.002, "loss": 2.3473, "step": 108040 }, { "epoch": 0.41769108255632353, "grad_norm": 0.10627973079681396, "learning_rate": 0.002, "loss": 2.3555, "step": 108050 }, { "epoch": 0.41772973975970684, "grad_norm": 0.11973363161087036, "learning_rate": 0.002, "loss": 2.3426, "step": 108060 }, { "epoch": 0.4177683969630901, "grad_norm": 0.12248005717992783, "learning_rate": 0.002, "loss": 2.3405, "step": 108070 }, { "epoch": 0.4178070541664734, "grad_norm": 0.09348124265670776, "learning_rate": 0.002, "loss": 2.3404, "step": 108080 }, { "epoch": 0.41784571136985665, "grad_norm": 0.10690181702375412, "learning_rate": 0.002, "loss": 2.3448, "step": 108090 }, { "epoch": 0.41788436857323996, "grad_norm": 0.10468243807554245, "learning_rate": 0.002, "loss": 2.3371, "step": 108100 }, { "epoch": 0.4179230257766232, "grad_norm": 0.1052987203001976, "learning_rate": 0.002, "loss": 2.3635, "step": 108110 }, { "epoch": 0.4179616829800065, "grad_norm": 0.1134672611951828, "learning_rate": 0.002, "loss": 2.3641, "step": 108120 }, { "epoch": 0.4180003401833898, "grad_norm": 0.10740668326616287, "learning_rate": 0.002, "loss": 2.3512, "step": 108130 }, { "epoch": 0.418038997386773, "grad_norm": 0.12365444004535675, "learning_rate": 0.002, "loss": 2.3523, "step": 108140 }, { "epoch": 0.41807765459015633, "grad_norm": 0.10862985253334045, "learning_rate": 0.002, "loss": 2.3554, "step": 108150 }, { "epoch": 0.4181163117935396, "grad_norm": 0.10410787910223007, "learning_rate": 0.002, "loss": 2.3448, "step": 108160 }, { "epoch": 0.4181549689969229, "grad_norm": 0.09590797871351242, "learning_rate": 0.002, "loss": 2.3325, "step": 108170 }, { "epoch": 0.41819362620030615, "grad_norm": 0.11057563126087189, "learning_rate": 0.002, "loss": 2.3505, "step": 108180 }, { "epoch": 0.41823228340368945, "grad_norm": 0.09386027604341507, "learning_rate": 0.002, "loss": 2.3624, "step": 108190 }, { "epoch": 0.4182709406070727, "grad_norm": 0.11849407851696014, "learning_rate": 0.002, "loss": 2.3491, "step": 108200 }, { "epoch": 0.418309597810456, "grad_norm": 0.10359001904726028, "learning_rate": 0.002, "loss": 2.3526, "step": 108210 }, { "epoch": 0.41834825501383927, "grad_norm": 0.10447119176387787, "learning_rate": 0.002, "loss": 2.375, "step": 108220 }, { "epoch": 0.4183869122172226, "grad_norm": 0.10291597247123718, "learning_rate": 0.002, "loss": 2.3531, "step": 108230 }, { "epoch": 0.4184255694206058, "grad_norm": 0.12071507424116135, "learning_rate": 0.002, "loss": 2.3396, "step": 108240 }, { "epoch": 0.41846422662398913, "grad_norm": 0.12983140349388123, "learning_rate": 0.002, "loss": 2.3614, "step": 108250 }, { "epoch": 0.4185028838273724, "grad_norm": 0.0902920514345169, "learning_rate": 0.002, "loss": 2.3469, "step": 108260 }, { "epoch": 0.4185415410307557, "grad_norm": 0.107839435338974, "learning_rate": 0.002, "loss": 2.363, "step": 108270 }, { "epoch": 0.41858019823413894, "grad_norm": 0.11176130920648575, "learning_rate": 0.002, "loss": 2.3528, "step": 108280 }, { "epoch": 0.41861885543752225, "grad_norm": 0.11270838975906372, "learning_rate": 0.002, "loss": 2.341, "step": 108290 }, { "epoch": 0.4186575126409055, "grad_norm": 0.10063161700963974, "learning_rate": 0.002, "loss": 2.3665, "step": 108300 }, { "epoch": 0.4186961698442888, "grad_norm": 0.10945228487253189, "learning_rate": 0.002, "loss": 2.3571, "step": 108310 }, { "epoch": 0.41873482704767206, "grad_norm": 0.11797590553760529, "learning_rate": 0.002, "loss": 2.3408, "step": 108320 }, { "epoch": 0.4187734842510553, "grad_norm": 0.1306760460138321, "learning_rate": 0.002, "loss": 2.3509, "step": 108330 }, { "epoch": 0.4188121414544386, "grad_norm": 0.11011867225170135, "learning_rate": 0.002, "loss": 2.3382, "step": 108340 }, { "epoch": 0.4188507986578219, "grad_norm": 0.10377843677997589, "learning_rate": 0.002, "loss": 2.3435, "step": 108350 }, { "epoch": 0.4188894558612052, "grad_norm": 0.12875615060329437, "learning_rate": 0.002, "loss": 2.355, "step": 108360 }, { "epoch": 0.41892811306458844, "grad_norm": 0.10549406707286835, "learning_rate": 0.002, "loss": 2.3609, "step": 108370 }, { "epoch": 0.41896677026797174, "grad_norm": 0.12035589665174484, "learning_rate": 0.002, "loss": 2.3564, "step": 108380 }, { "epoch": 0.419005427471355, "grad_norm": 0.11725050956010818, "learning_rate": 0.002, "loss": 2.3532, "step": 108390 }, { "epoch": 0.4190440846747383, "grad_norm": 0.09805501252412796, "learning_rate": 0.002, "loss": 2.3554, "step": 108400 }, { "epoch": 0.41908274187812156, "grad_norm": 0.11357685923576355, "learning_rate": 0.002, "loss": 2.3641, "step": 108410 }, { "epoch": 0.41912139908150486, "grad_norm": 0.13415689766407013, "learning_rate": 0.002, "loss": 2.3745, "step": 108420 }, { "epoch": 0.4191600562848881, "grad_norm": 0.10046670585870743, "learning_rate": 0.002, "loss": 2.343, "step": 108430 }, { "epoch": 0.4191987134882714, "grad_norm": 0.11026618629693985, "learning_rate": 0.002, "loss": 2.3523, "step": 108440 }, { "epoch": 0.4192373706916547, "grad_norm": 0.10505351424217224, "learning_rate": 0.002, "loss": 2.3426, "step": 108450 }, { "epoch": 0.419276027895038, "grad_norm": 0.114077128469944, "learning_rate": 0.002, "loss": 2.3497, "step": 108460 }, { "epoch": 0.41931468509842124, "grad_norm": 0.11202042549848557, "learning_rate": 0.002, "loss": 2.3577, "step": 108470 }, { "epoch": 0.41935334230180454, "grad_norm": 0.11489993333816528, "learning_rate": 0.002, "loss": 2.3519, "step": 108480 }, { "epoch": 0.4193919995051878, "grad_norm": 0.11202628910541534, "learning_rate": 0.002, "loss": 2.3509, "step": 108490 }, { "epoch": 0.41943065670857105, "grad_norm": 0.10482347756624222, "learning_rate": 0.002, "loss": 2.3501, "step": 108500 }, { "epoch": 0.41946931391195436, "grad_norm": 0.10478508472442627, "learning_rate": 0.002, "loss": 2.3578, "step": 108510 }, { "epoch": 0.4195079711153376, "grad_norm": 0.10179168730974197, "learning_rate": 0.002, "loss": 2.3296, "step": 108520 }, { "epoch": 0.4195466283187209, "grad_norm": 0.10658016055822372, "learning_rate": 0.002, "loss": 2.3668, "step": 108530 }, { "epoch": 0.41958528552210417, "grad_norm": 0.12446645647287369, "learning_rate": 0.002, "loss": 2.3294, "step": 108540 }, { "epoch": 0.4196239427254875, "grad_norm": 0.11415350437164307, "learning_rate": 0.002, "loss": 2.3553, "step": 108550 }, { "epoch": 0.4196625999288707, "grad_norm": 0.11967852711677551, "learning_rate": 0.002, "loss": 2.3456, "step": 108560 }, { "epoch": 0.41970125713225404, "grad_norm": 0.11305100470781326, "learning_rate": 0.002, "loss": 2.3589, "step": 108570 }, { "epoch": 0.4197399143356373, "grad_norm": 0.09932440519332886, "learning_rate": 0.002, "loss": 2.3465, "step": 108580 }, { "epoch": 0.4197785715390206, "grad_norm": 0.10772234946489334, "learning_rate": 0.002, "loss": 2.3591, "step": 108590 }, { "epoch": 0.41981722874240385, "grad_norm": 0.1011858731508255, "learning_rate": 0.002, "loss": 2.3531, "step": 108600 }, { "epoch": 0.41985588594578716, "grad_norm": 0.10514667630195618, "learning_rate": 0.002, "loss": 2.3512, "step": 108610 }, { "epoch": 0.4198945431491704, "grad_norm": 0.0941380187869072, "learning_rate": 0.002, "loss": 2.35, "step": 108620 }, { "epoch": 0.4199332003525537, "grad_norm": 0.10625889152288437, "learning_rate": 0.002, "loss": 2.3528, "step": 108630 }, { "epoch": 0.41997185755593697, "grad_norm": 0.12152034789323807, "learning_rate": 0.002, "loss": 2.3466, "step": 108640 }, { "epoch": 0.4200105147593203, "grad_norm": 0.11208917945623398, "learning_rate": 0.002, "loss": 2.3539, "step": 108650 }, { "epoch": 0.4200491719627035, "grad_norm": 0.10172926634550095, "learning_rate": 0.002, "loss": 2.3526, "step": 108660 }, { "epoch": 0.42008782916608683, "grad_norm": 0.10808614641427994, "learning_rate": 0.002, "loss": 2.3345, "step": 108670 }, { "epoch": 0.4201264863694701, "grad_norm": 0.10961339622735977, "learning_rate": 0.002, "loss": 2.3289, "step": 108680 }, { "epoch": 0.42016514357285334, "grad_norm": 0.09312523901462555, "learning_rate": 0.002, "loss": 2.3537, "step": 108690 }, { "epoch": 0.42020380077623665, "grad_norm": 0.09796839952468872, "learning_rate": 0.002, "loss": 2.3574, "step": 108700 }, { "epoch": 0.4202424579796199, "grad_norm": 0.11329052597284317, "learning_rate": 0.002, "loss": 2.356, "step": 108710 }, { "epoch": 0.4202811151830032, "grad_norm": 0.11482075601816177, "learning_rate": 0.002, "loss": 2.3464, "step": 108720 }, { "epoch": 0.42031977238638646, "grad_norm": 0.11668254435062408, "learning_rate": 0.002, "loss": 2.3491, "step": 108730 }, { "epoch": 0.42035842958976977, "grad_norm": 0.0998915284872055, "learning_rate": 0.002, "loss": 2.3495, "step": 108740 }, { "epoch": 0.420397086793153, "grad_norm": 0.11637427657842636, "learning_rate": 0.002, "loss": 2.3627, "step": 108750 }, { "epoch": 0.4204357439965363, "grad_norm": 0.10949467122554779, "learning_rate": 0.002, "loss": 2.3509, "step": 108760 }, { "epoch": 0.4204744011999196, "grad_norm": 0.10018786042928696, "learning_rate": 0.002, "loss": 2.3462, "step": 108770 }, { "epoch": 0.4205130584033029, "grad_norm": 0.12903913855552673, "learning_rate": 0.002, "loss": 2.3491, "step": 108780 }, { "epoch": 0.42055171560668614, "grad_norm": 0.0959692895412445, "learning_rate": 0.002, "loss": 2.3595, "step": 108790 }, { "epoch": 0.42059037281006945, "grad_norm": 0.11546919494867325, "learning_rate": 0.002, "loss": 2.3285, "step": 108800 }, { "epoch": 0.4206290300134527, "grad_norm": 0.10684224218130112, "learning_rate": 0.002, "loss": 2.3435, "step": 108810 }, { "epoch": 0.420667687216836, "grad_norm": 0.10135342925786972, "learning_rate": 0.002, "loss": 2.3655, "step": 108820 }, { "epoch": 0.42070634442021926, "grad_norm": 0.09952717274427414, "learning_rate": 0.002, "loss": 2.3381, "step": 108830 }, { "epoch": 0.42074500162360257, "grad_norm": 0.11792684346437454, "learning_rate": 0.002, "loss": 2.3423, "step": 108840 }, { "epoch": 0.4207836588269858, "grad_norm": 0.108956478536129, "learning_rate": 0.002, "loss": 2.3554, "step": 108850 }, { "epoch": 0.4208223160303691, "grad_norm": 0.1198933944106102, "learning_rate": 0.002, "loss": 2.3654, "step": 108860 }, { "epoch": 0.4208609732337524, "grad_norm": 0.09738875180482864, "learning_rate": 0.002, "loss": 2.3467, "step": 108870 }, { "epoch": 0.42089963043713563, "grad_norm": 0.10686634480953217, "learning_rate": 0.002, "loss": 2.3481, "step": 108880 }, { "epoch": 0.42093828764051894, "grad_norm": 0.09045381098985672, "learning_rate": 0.002, "loss": 2.3429, "step": 108890 }, { "epoch": 0.4209769448439022, "grad_norm": 0.0957198441028595, "learning_rate": 0.002, "loss": 2.3418, "step": 108900 }, { "epoch": 0.4210156020472855, "grad_norm": 0.10955697298049927, "learning_rate": 0.002, "loss": 2.3521, "step": 108910 }, { "epoch": 0.42105425925066875, "grad_norm": 0.10267753154039383, "learning_rate": 0.002, "loss": 2.3425, "step": 108920 }, { "epoch": 0.42109291645405206, "grad_norm": 0.09929932653903961, "learning_rate": 0.002, "loss": 2.3417, "step": 108930 }, { "epoch": 0.4211315736574353, "grad_norm": 0.10407166928052902, "learning_rate": 0.002, "loss": 2.3425, "step": 108940 }, { "epoch": 0.4211702308608186, "grad_norm": 0.11709648370742798, "learning_rate": 0.002, "loss": 2.3407, "step": 108950 }, { "epoch": 0.42120888806420187, "grad_norm": 0.09369426965713501, "learning_rate": 0.002, "loss": 2.3509, "step": 108960 }, { "epoch": 0.4212475452675852, "grad_norm": 0.1129760593175888, "learning_rate": 0.002, "loss": 2.3559, "step": 108970 }, { "epoch": 0.42128620247096843, "grad_norm": 0.10691390931606293, "learning_rate": 0.002, "loss": 2.3287, "step": 108980 }, { "epoch": 0.42132485967435174, "grad_norm": 0.11483962833881378, "learning_rate": 0.002, "loss": 2.3512, "step": 108990 }, { "epoch": 0.421363516877735, "grad_norm": 0.11706017702817917, "learning_rate": 0.002, "loss": 2.3566, "step": 109000 }, { "epoch": 0.4214021740811183, "grad_norm": 0.09031584113836288, "learning_rate": 0.002, "loss": 2.3477, "step": 109010 }, { "epoch": 0.42144083128450155, "grad_norm": 0.1046423614025116, "learning_rate": 0.002, "loss": 2.3497, "step": 109020 }, { "epoch": 0.42147948848788486, "grad_norm": 0.10996993631124496, "learning_rate": 0.002, "loss": 2.3472, "step": 109030 }, { "epoch": 0.4215181456912681, "grad_norm": 0.10464781522750854, "learning_rate": 0.002, "loss": 2.3583, "step": 109040 }, { "epoch": 0.4215568028946514, "grad_norm": 0.10103486478328705, "learning_rate": 0.002, "loss": 2.3409, "step": 109050 }, { "epoch": 0.42159546009803467, "grad_norm": 0.11956936120986938, "learning_rate": 0.002, "loss": 2.3334, "step": 109060 }, { "epoch": 0.4216341173014179, "grad_norm": 0.10601706057786942, "learning_rate": 0.002, "loss": 2.369, "step": 109070 }, { "epoch": 0.42167277450480123, "grad_norm": 0.11325680464506149, "learning_rate": 0.002, "loss": 2.3444, "step": 109080 }, { "epoch": 0.4217114317081845, "grad_norm": 0.11050796508789062, "learning_rate": 0.002, "loss": 2.3613, "step": 109090 }, { "epoch": 0.4217500889115678, "grad_norm": 0.11937505006790161, "learning_rate": 0.002, "loss": 2.3501, "step": 109100 }, { "epoch": 0.42178874611495104, "grad_norm": 0.10740794986486435, "learning_rate": 0.002, "loss": 2.3562, "step": 109110 }, { "epoch": 0.42182740331833435, "grad_norm": 0.10873711109161377, "learning_rate": 0.002, "loss": 2.3541, "step": 109120 }, { "epoch": 0.4218660605217176, "grad_norm": 0.09789435565471649, "learning_rate": 0.002, "loss": 2.3579, "step": 109130 }, { "epoch": 0.4219047177251009, "grad_norm": 0.12149273604154587, "learning_rate": 0.002, "loss": 2.3501, "step": 109140 }, { "epoch": 0.42194337492848416, "grad_norm": 0.12185237556695938, "learning_rate": 0.002, "loss": 2.349, "step": 109150 }, { "epoch": 0.42198203213186747, "grad_norm": 0.10570470988750458, "learning_rate": 0.002, "loss": 2.3531, "step": 109160 }, { "epoch": 0.4220206893352507, "grad_norm": 0.11031711846590042, "learning_rate": 0.002, "loss": 2.3539, "step": 109170 }, { "epoch": 0.42205934653863403, "grad_norm": 0.1117752268910408, "learning_rate": 0.002, "loss": 2.3317, "step": 109180 }, { "epoch": 0.4220980037420173, "grad_norm": 0.10328806936740875, "learning_rate": 0.002, "loss": 2.3696, "step": 109190 }, { "epoch": 0.4221366609454006, "grad_norm": 0.11777733266353607, "learning_rate": 0.002, "loss": 2.3464, "step": 109200 }, { "epoch": 0.42217531814878384, "grad_norm": 0.10123222321271896, "learning_rate": 0.002, "loss": 2.3405, "step": 109210 }, { "epoch": 0.42221397535216715, "grad_norm": 0.09601963311433792, "learning_rate": 0.002, "loss": 2.3509, "step": 109220 }, { "epoch": 0.4222526325555504, "grad_norm": 0.10355490446090698, "learning_rate": 0.002, "loss": 2.3536, "step": 109230 }, { "epoch": 0.42229128975893365, "grad_norm": 0.12778176367282867, "learning_rate": 0.002, "loss": 2.3596, "step": 109240 }, { "epoch": 0.42232994696231696, "grad_norm": 0.1110726073384285, "learning_rate": 0.002, "loss": 2.3386, "step": 109250 }, { "epoch": 0.4223686041657002, "grad_norm": 0.10866408050060272, "learning_rate": 0.002, "loss": 2.3444, "step": 109260 }, { "epoch": 0.4224072613690835, "grad_norm": 0.10042106360197067, "learning_rate": 0.002, "loss": 2.3554, "step": 109270 }, { "epoch": 0.42244591857246677, "grad_norm": 0.11243009567260742, "learning_rate": 0.002, "loss": 2.3618, "step": 109280 }, { "epoch": 0.4224845757758501, "grad_norm": 0.10251044481992722, "learning_rate": 0.002, "loss": 2.3435, "step": 109290 }, { "epoch": 0.42252323297923333, "grad_norm": 0.0979798436164856, "learning_rate": 0.002, "loss": 2.3526, "step": 109300 }, { "epoch": 0.42256189018261664, "grad_norm": 0.1106082871556282, "learning_rate": 0.002, "loss": 2.3547, "step": 109310 }, { "epoch": 0.4226005473859999, "grad_norm": 0.11181111633777618, "learning_rate": 0.002, "loss": 2.3472, "step": 109320 }, { "epoch": 0.4226392045893832, "grad_norm": 0.09771327674388885, "learning_rate": 0.002, "loss": 2.3531, "step": 109330 }, { "epoch": 0.42267786179276645, "grad_norm": 0.11357173323631287, "learning_rate": 0.002, "loss": 2.3352, "step": 109340 }, { "epoch": 0.42271651899614976, "grad_norm": 0.10991322249174118, "learning_rate": 0.002, "loss": 2.354, "step": 109350 }, { "epoch": 0.422755176199533, "grad_norm": 0.09731178730726242, "learning_rate": 0.002, "loss": 2.3335, "step": 109360 }, { "epoch": 0.4227938334029163, "grad_norm": 0.09690798819065094, "learning_rate": 0.002, "loss": 2.3525, "step": 109370 }, { "epoch": 0.42283249060629957, "grad_norm": 0.09169499576091766, "learning_rate": 0.002, "loss": 2.3482, "step": 109380 }, { "epoch": 0.4228711478096829, "grad_norm": 0.11018433421850204, "learning_rate": 0.002, "loss": 2.3484, "step": 109390 }, { "epoch": 0.42290980501306613, "grad_norm": 0.10204283148050308, "learning_rate": 0.002, "loss": 2.3658, "step": 109400 }, { "epoch": 0.42294846221644944, "grad_norm": 0.12003272771835327, "learning_rate": 0.002, "loss": 2.3538, "step": 109410 }, { "epoch": 0.4229871194198327, "grad_norm": 0.10572025179862976, "learning_rate": 0.002, "loss": 2.3661, "step": 109420 }, { "epoch": 0.42302577662321594, "grad_norm": 0.11625831574201584, "learning_rate": 0.002, "loss": 2.346, "step": 109430 }, { "epoch": 0.42306443382659925, "grad_norm": 0.11393840610980988, "learning_rate": 0.002, "loss": 2.3351, "step": 109440 }, { "epoch": 0.4231030910299825, "grad_norm": 0.10184957087039948, "learning_rate": 0.002, "loss": 2.3567, "step": 109450 }, { "epoch": 0.4231417482333658, "grad_norm": 0.11889674514532089, "learning_rate": 0.002, "loss": 2.343, "step": 109460 }, { "epoch": 0.42318040543674906, "grad_norm": 0.09927832335233688, "learning_rate": 0.002, "loss": 2.3461, "step": 109470 }, { "epoch": 0.42321906264013237, "grad_norm": 0.09848331660032272, "learning_rate": 0.002, "loss": 2.3355, "step": 109480 }, { "epoch": 0.4232577198435156, "grad_norm": 0.1316344141960144, "learning_rate": 0.002, "loss": 2.3462, "step": 109490 }, { "epoch": 0.42329637704689893, "grad_norm": 0.09667333960533142, "learning_rate": 0.002, "loss": 2.354, "step": 109500 }, { "epoch": 0.4233350342502822, "grad_norm": 0.11364229023456573, "learning_rate": 0.002, "loss": 2.336, "step": 109510 }, { "epoch": 0.4233736914536655, "grad_norm": 0.09983102232217789, "learning_rate": 0.002, "loss": 2.3514, "step": 109520 }, { "epoch": 0.42341234865704874, "grad_norm": 0.10466358810663223, "learning_rate": 0.002, "loss": 2.3386, "step": 109530 }, { "epoch": 0.42345100586043205, "grad_norm": 0.17259664833545685, "learning_rate": 0.002, "loss": 2.3316, "step": 109540 }, { "epoch": 0.4234896630638153, "grad_norm": 0.11260571330785751, "learning_rate": 0.002, "loss": 2.3571, "step": 109550 }, { "epoch": 0.4235283202671986, "grad_norm": 0.11814837157726288, "learning_rate": 0.002, "loss": 2.3375, "step": 109560 }, { "epoch": 0.42356697747058186, "grad_norm": 0.11993677169084549, "learning_rate": 0.002, "loss": 2.3481, "step": 109570 }, { "epoch": 0.42360563467396517, "grad_norm": 0.12143225222826004, "learning_rate": 0.002, "loss": 2.3552, "step": 109580 }, { "epoch": 0.4236442918773484, "grad_norm": 0.10307957977056503, "learning_rate": 0.002, "loss": 2.3395, "step": 109590 }, { "epoch": 0.42368294908073173, "grad_norm": 0.09886326640844345, "learning_rate": 0.002, "loss": 2.3591, "step": 109600 }, { "epoch": 0.423721606284115, "grad_norm": 0.10816150903701782, "learning_rate": 0.002, "loss": 2.342, "step": 109610 }, { "epoch": 0.42376026348749823, "grad_norm": 0.09745252877473831, "learning_rate": 0.002, "loss": 2.3412, "step": 109620 }, { "epoch": 0.42379892069088154, "grad_norm": 0.11790237575769424, "learning_rate": 0.002, "loss": 2.344, "step": 109630 }, { "epoch": 0.4238375778942648, "grad_norm": 0.11370725184679031, "learning_rate": 0.002, "loss": 2.3505, "step": 109640 }, { "epoch": 0.4238762350976481, "grad_norm": 0.1040082573890686, "learning_rate": 0.002, "loss": 2.3275, "step": 109650 }, { "epoch": 0.42391489230103135, "grad_norm": 0.12172899395227432, "learning_rate": 0.002, "loss": 2.3443, "step": 109660 }, { "epoch": 0.42395354950441466, "grad_norm": 0.12151210010051727, "learning_rate": 0.002, "loss": 2.3519, "step": 109670 }, { "epoch": 0.4239922067077979, "grad_norm": 0.09635493159294128, "learning_rate": 0.002, "loss": 2.3467, "step": 109680 }, { "epoch": 0.4240308639111812, "grad_norm": 0.1302810162305832, "learning_rate": 0.002, "loss": 2.3633, "step": 109690 }, { "epoch": 0.4240695211145645, "grad_norm": 0.1121583878993988, "learning_rate": 0.002, "loss": 2.3324, "step": 109700 }, { "epoch": 0.4241081783179478, "grad_norm": 0.10202564299106598, "learning_rate": 0.002, "loss": 2.3531, "step": 109710 }, { "epoch": 0.42414683552133103, "grad_norm": 0.11339136958122253, "learning_rate": 0.002, "loss": 2.3609, "step": 109720 }, { "epoch": 0.42418549272471434, "grad_norm": 0.10718164592981339, "learning_rate": 0.002, "loss": 2.3531, "step": 109730 }, { "epoch": 0.4242241499280976, "grad_norm": 0.11814633756875992, "learning_rate": 0.002, "loss": 2.3498, "step": 109740 }, { "epoch": 0.4242628071314809, "grad_norm": 0.1083814725279808, "learning_rate": 0.002, "loss": 2.3351, "step": 109750 }, { "epoch": 0.42430146433486415, "grad_norm": 0.10616223514080048, "learning_rate": 0.002, "loss": 2.3415, "step": 109760 }, { "epoch": 0.42434012153824746, "grad_norm": 0.1067415177822113, "learning_rate": 0.002, "loss": 2.3579, "step": 109770 }, { "epoch": 0.4243787787416307, "grad_norm": 0.1123105064034462, "learning_rate": 0.002, "loss": 2.3614, "step": 109780 }, { "epoch": 0.424417435945014, "grad_norm": 0.10695965588092804, "learning_rate": 0.002, "loss": 2.3441, "step": 109790 }, { "epoch": 0.4244560931483973, "grad_norm": 0.11023906618356705, "learning_rate": 0.002, "loss": 2.3445, "step": 109800 }, { "epoch": 0.4244947503517805, "grad_norm": 0.11257080733776093, "learning_rate": 0.002, "loss": 2.3386, "step": 109810 }, { "epoch": 0.42453340755516383, "grad_norm": 0.12878021597862244, "learning_rate": 0.002, "loss": 2.3517, "step": 109820 }, { "epoch": 0.4245720647585471, "grad_norm": 0.10409737378358841, "learning_rate": 0.002, "loss": 2.35, "step": 109830 }, { "epoch": 0.4246107219619304, "grad_norm": 0.11641906946897507, "learning_rate": 0.002, "loss": 2.3551, "step": 109840 }, { "epoch": 0.42464937916531365, "grad_norm": 0.10731948912143707, "learning_rate": 0.002, "loss": 2.3548, "step": 109850 }, { "epoch": 0.42468803636869695, "grad_norm": 0.1166120246052742, "learning_rate": 0.002, "loss": 2.3554, "step": 109860 }, { "epoch": 0.4247266935720802, "grad_norm": 0.1179104596376419, "learning_rate": 0.002, "loss": 2.3506, "step": 109870 }, { "epoch": 0.4247653507754635, "grad_norm": 0.10144560039043427, "learning_rate": 0.002, "loss": 2.3473, "step": 109880 }, { "epoch": 0.42480400797884676, "grad_norm": 0.10600446164608002, "learning_rate": 0.002, "loss": 2.3329, "step": 109890 }, { "epoch": 0.4248426651822301, "grad_norm": 0.10021763294935226, "learning_rate": 0.002, "loss": 2.3429, "step": 109900 }, { "epoch": 0.4248813223856133, "grad_norm": 0.12647105753421783, "learning_rate": 0.002, "loss": 2.3543, "step": 109910 }, { "epoch": 0.42491997958899663, "grad_norm": 0.10528162866830826, "learning_rate": 0.002, "loss": 2.3453, "step": 109920 }, { "epoch": 0.4249586367923799, "grad_norm": 0.09694304317235947, "learning_rate": 0.002, "loss": 2.3644, "step": 109930 }, { "epoch": 0.4249972939957632, "grad_norm": 0.12218614667654037, "learning_rate": 0.002, "loss": 2.3579, "step": 109940 }, { "epoch": 0.42503595119914644, "grad_norm": 0.09037092328071594, "learning_rate": 0.002, "loss": 2.3445, "step": 109950 }, { "epoch": 0.42507460840252975, "grad_norm": 0.10441536456346512, "learning_rate": 0.002, "loss": 2.3544, "step": 109960 }, { "epoch": 0.425113265605913, "grad_norm": 0.09831688553094864, "learning_rate": 0.002, "loss": 2.3357, "step": 109970 }, { "epoch": 0.42515192280929626, "grad_norm": 0.13294687867164612, "learning_rate": 0.002, "loss": 2.3375, "step": 109980 }, { "epoch": 0.42519058001267956, "grad_norm": 0.10908882319927216, "learning_rate": 0.002, "loss": 2.3431, "step": 109990 }, { "epoch": 0.4252292372160628, "grad_norm": 0.12384915351867676, "learning_rate": 0.002, "loss": 2.3564, "step": 110000 }, { "epoch": 0.4252678944194461, "grad_norm": 0.09297414124011993, "learning_rate": 0.002, "loss": 2.3509, "step": 110010 }, { "epoch": 0.4253065516228294, "grad_norm": 0.10207641124725342, "learning_rate": 0.002, "loss": 2.3426, "step": 110020 }, { "epoch": 0.4253452088262127, "grad_norm": 0.09486313164234161, "learning_rate": 0.002, "loss": 2.3438, "step": 110030 }, { "epoch": 0.42538386602959594, "grad_norm": 0.10835953801870346, "learning_rate": 0.002, "loss": 2.3405, "step": 110040 }, { "epoch": 0.42542252323297924, "grad_norm": 0.10523147881031036, "learning_rate": 0.002, "loss": 2.3474, "step": 110050 }, { "epoch": 0.4254611804363625, "grad_norm": 0.10926689207553864, "learning_rate": 0.002, "loss": 2.3545, "step": 110060 }, { "epoch": 0.4254998376397458, "grad_norm": 0.10223328322172165, "learning_rate": 0.002, "loss": 2.3487, "step": 110070 }, { "epoch": 0.42553849484312906, "grad_norm": 0.1133745089173317, "learning_rate": 0.002, "loss": 2.3612, "step": 110080 }, { "epoch": 0.42557715204651236, "grad_norm": 0.11308858543634415, "learning_rate": 0.002, "loss": 2.3338, "step": 110090 }, { "epoch": 0.4256158092498956, "grad_norm": 0.10442328453063965, "learning_rate": 0.002, "loss": 2.3581, "step": 110100 }, { "epoch": 0.4256544664532789, "grad_norm": 0.1094212755560875, "learning_rate": 0.002, "loss": 2.3425, "step": 110110 }, { "epoch": 0.4256931236566622, "grad_norm": 0.1028483510017395, "learning_rate": 0.002, "loss": 2.347, "step": 110120 }, { "epoch": 0.4257317808600455, "grad_norm": 0.10775095969438553, "learning_rate": 0.002, "loss": 2.343, "step": 110130 }, { "epoch": 0.42577043806342874, "grad_norm": 0.10518835484981537, "learning_rate": 0.002, "loss": 2.3298, "step": 110140 }, { "epoch": 0.42580909526681204, "grad_norm": 0.11880367249250412, "learning_rate": 0.002, "loss": 2.3509, "step": 110150 }, { "epoch": 0.4258477524701953, "grad_norm": 0.10569731891155243, "learning_rate": 0.002, "loss": 2.3559, "step": 110160 }, { "epoch": 0.42588640967357855, "grad_norm": 0.10588982701301575, "learning_rate": 0.002, "loss": 2.3342, "step": 110170 }, { "epoch": 0.42592506687696186, "grad_norm": 0.10703343152999878, "learning_rate": 0.002, "loss": 2.3548, "step": 110180 }, { "epoch": 0.4259637240803451, "grad_norm": 0.10179496556520462, "learning_rate": 0.002, "loss": 2.3371, "step": 110190 }, { "epoch": 0.4260023812837284, "grad_norm": 0.11455044895410538, "learning_rate": 0.002, "loss": 2.3536, "step": 110200 }, { "epoch": 0.42604103848711167, "grad_norm": 0.1237921491265297, "learning_rate": 0.002, "loss": 2.3574, "step": 110210 }, { "epoch": 0.426079695690495, "grad_norm": 0.10922089219093323, "learning_rate": 0.002, "loss": 2.3555, "step": 110220 }, { "epoch": 0.4261183528938782, "grad_norm": 0.11585081368684769, "learning_rate": 0.002, "loss": 2.3486, "step": 110230 }, { "epoch": 0.42615701009726153, "grad_norm": 0.09888070076704025, "learning_rate": 0.002, "loss": 2.3485, "step": 110240 }, { "epoch": 0.4261956673006448, "grad_norm": 0.1655680388212204, "learning_rate": 0.002, "loss": 2.3673, "step": 110250 }, { "epoch": 0.4262343245040281, "grad_norm": 0.11325088143348694, "learning_rate": 0.002, "loss": 2.358, "step": 110260 }, { "epoch": 0.42627298170741135, "grad_norm": 0.13202178478240967, "learning_rate": 0.002, "loss": 2.3502, "step": 110270 }, { "epoch": 0.42631163891079465, "grad_norm": 0.11255593597888947, "learning_rate": 0.002, "loss": 2.3533, "step": 110280 }, { "epoch": 0.4263502961141779, "grad_norm": 0.1038581058382988, "learning_rate": 0.002, "loss": 2.3572, "step": 110290 }, { "epoch": 0.4263889533175612, "grad_norm": 0.11428995430469513, "learning_rate": 0.002, "loss": 2.3389, "step": 110300 }, { "epoch": 0.42642761052094447, "grad_norm": 0.10118252038955688, "learning_rate": 0.002, "loss": 2.3621, "step": 110310 }, { "epoch": 0.4264662677243278, "grad_norm": 0.10601963847875595, "learning_rate": 0.002, "loss": 2.3574, "step": 110320 }, { "epoch": 0.426504924927711, "grad_norm": 0.11903627216815948, "learning_rate": 0.002, "loss": 2.3437, "step": 110330 }, { "epoch": 0.42654358213109433, "grad_norm": 0.10223052650690079, "learning_rate": 0.002, "loss": 2.3382, "step": 110340 }, { "epoch": 0.4265822393344776, "grad_norm": 0.3356073796749115, "learning_rate": 0.002, "loss": 2.3438, "step": 110350 }, { "epoch": 0.42662089653786084, "grad_norm": 0.10752939432859421, "learning_rate": 0.002, "loss": 2.3453, "step": 110360 }, { "epoch": 0.42665955374124415, "grad_norm": 0.10979057103395462, "learning_rate": 0.002, "loss": 2.3411, "step": 110370 }, { "epoch": 0.4266982109446274, "grad_norm": 0.10704939812421799, "learning_rate": 0.002, "loss": 2.3621, "step": 110380 }, { "epoch": 0.4267368681480107, "grad_norm": 0.09147896617650986, "learning_rate": 0.002, "loss": 2.3419, "step": 110390 }, { "epoch": 0.42677552535139396, "grad_norm": 0.14855432510375977, "learning_rate": 0.002, "loss": 2.3615, "step": 110400 }, { "epoch": 0.42681418255477727, "grad_norm": 0.11184597760438919, "learning_rate": 0.002, "loss": 2.3471, "step": 110410 }, { "epoch": 0.4268528397581605, "grad_norm": 0.08798114955425262, "learning_rate": 0.002, "loss": 2.3265, "step": 110420 }, { "epoch": 0.4268914969615438, "grad_norm": 0.10517899692058563, "learning_rate": 0.002, "loss": 2.344, "step": 110430 }, { "epoch": 0.4269301541649271, "grad_norm": 0.09603867679834366, "learning_rate": 0.002, "loss": 2.3397, "step": 110440 }, { "epoch": 0.4269688113683104, "grad_norm": 0.10815239697694778, "learning_rate": 0.002, "loss": 2.3593, "step": 110450 }, { "epoch": 0.42700746857169364, "grad_norm": 0.09845545142889023, "learning_rate": 0.002, "loss": 2.3596, "step": 110460 }, { "epoch": 0.42704612577507695, "grad_norm": 0.0918077826499939, "learning_rate": 0.002, "loss": 2.3604, "step": 110470 }, { "epoch": 0.4270847829784602, "grad_norm": 0.11321431398391724, "learning_rate": 0.002, "loss": 2.3489, "step": 110480 }, { "epoch": 0.4271234401818435, "grad_norm": 0.11410441249608994, "learning_rate": 0.002, "loss": 2.3365, "step": 110490 }, { "epoch": 0.42716209738522676, "grad_norm": 0.09834035485982895, "learning_rate": 0.002, "loss": 2.3396, "step": 110500 }, { "epoch": 0.42720075458861007, "grad_norm": 0.1138286218047142, "learning_rate": 0.002, "loss": 2.354, "step": 110510 }, { "epoch": 0.4272394117919933, "grad_norm": 0.11498411744832993, "learning_rate": 0.002, "loss": 2.3399, "step": 110520 }, { "epoch": 0.4272780689953766, "grad_norm": 0.11137287318706512, "learning_rate": 0.002, "loss": 2.35, "step": 110530 }, { "epoch": 0.4273167261987599, "grad_norm": 0.10131389647722244, "learning_rate": 0.002, "loss": 2.3383, "step": 110540 }, { "epoch": 0.42735538340214313, "grad_norm": 0.10402471572160721, "learning_rate": 0.002, "loss": 2.3467, "step": 110550 }, { "epoch": 0.42739404060552644, "grad_norm": 0.11300648748874664, "learning_rate": 0.002, "loss": 2.3507, "step": 110560 }, { "epoch": 0.4274326978089097, "grad_norm": 0.09731554985046387, "learning_rate": 0.002, "loss": 2.3599, "step": 110570 }, { "epoch": 0.427471355012293, "grad_norm": 0.1155574768781662, "learning_rate": 0.002, "loss": 2.3509, "step": 110580 }, { "epoch": 0.42751001221567625, "grad_norm": 0.10829325020313263, "learning_rate": 0.002, "loss": 2.3406, "step": 110590 }, { "epoch": 0.42754866941905956, "grad_norm": 0.10412159562110901, "learning_rate": 0.002, "loss": 2.3427, "step": 110600 }, { "epoch": 0.4275873266224428, "grad_norm": 0.11188670992851257, "learning_rate": 0.002, "loss": 2.3462, "step": 110610 }, { "epoch": 0.4276259838258261, "grad_norm": 0.1147642582654953, "learning_rate": 0.002, "loss": 2.3496, "step": 110620 }, { "epoch": 0.42766464102920937, "grad_norm": 0.11237642914056778, "learning_rate": 0.002, "loss": 2.3365, "step": 110630 }, { "epoch": 0.4277032982325927, "grad_norm": 0.13152167201042175, "learning_rate": 0.002, "loss": 2.3537, "step": 110640 }, { "epoch": 0.42774195543597593, "grad_norm": 0.10723838210105896, "learning_rate": 0.002, "loss": 2.3465, "step": 110650 }, { "epoch": 0.42778061263935924, "grad_norm": 0.10246115922927856, "learning_rate": 0.002, "loss": 2.3433, "step": 110660 }, { "epoch": 0.4278192698427425, "grad_norm": 0.11409907788038254, "learning_rate": 0.002, "loss": 2.3466, "step": 110670 }, { "epoch": 0.4278579270461258, "grad_norm": 0.11728665977716446, "learning_rate": 0.002, "loss": 2.3485, "step": 110680 }, { "epoch": 0.42789658424950905, "grad_norm": 0.12186159938573837, "learning_rate": 0.002, "loss": 2.3283, "step": 110690 }, { "epoch": 0.42793524145289236, "grad_norm": 0.10480479896068573, "learning_rate": 0.002, "loss": 2.3352, "step": 110700 }, { "epoch": 0.4279738986562756, "grad_norm": 0.10415849089622498, "learning_rate": 0.002, "loss": 2.3387, "step": 110710 }, { "epoch": 0.4280125558596589, "grad_norm": 0.10350564867258072, "learning_rate": 0.002, "loss": 2.3446, "step": 110720 }, { "epoch": 0.42805121306304217, "grad_norm": 0.10367821902036667, "learning_rate": 0.002, "loss": 2.3496, "step": 110730 }, { "epoch": 0.4280898702664254, "grad_norm": 0.10381826758384705, "learning_rate": 0.002, "loss": 2.3301, "step": 110740 }, { "epoch": 0.42812852746980873, "grad_norm": 0.10306168347597122, "learning_rate": 0.002, "loss": 2.3759, "step": 110750 }, { "epoch": 0.428167184673192, "grad_norm": 0.12796978652477264, "learning_rate": 0.002, "loss": 2.3525, "step": 110760 }, { "epoch": 0.4282058418765753, "grad_norm": 0.10191497951745987, "learning_rate": 0.002, "loss": 2.3476, "step": 110770 }, { "epoch": 0.42824449907995854, "grad_norm": 0.10926347225904465, "learning_rate": 0.002, "loss": 2.3494, "step": 110780 }, { "epoch": 0.42828315628334185, "grad_norm": 0.10035771876573563, "learning_rate": 0.002, "loss": 2.364, "step": 110790 }, { "epoch": 0.4283218134867251, "grad_norm": 0.10627880692481995, "learning_rate": 0.002, "loss": 2.3487, "step": 110800 }, { "epoch": 0.4283604706901084, "grad_norm": 0.10428674519062042, "learning_rate": 0.002, "loss": 2.3654, "step": 110810 }, { "epoch": 0.42839912789349166, "grad_norm": 0.11040990799665451, "learning_rate": 0.002, "loss": 2.3414, "step": 110820 }, { "epoch": 0.42843778509687497, "grad_norm": 0.09725743532180786, "learning_rate": 0.002, "loss": 2.3443, "step": 110830 }, { "epoch": 0.4284764423002582, "grad_norm": 0.10322292149066925, "learning_rate": 0.002, "loss": 2.3426, "step": 110840 }, { "epoch": 0.42851509950364153, "grad_norm": 0.12217211723327637, "learning_rate": 0.002, "loss": 2.3415, "step": 110850 }, { "epoch": 0.4285537567070248, "grad_norm": 0.1275206059217453, "learning_rate": 0.002, "loss": 2.3452, "step": 110860 }, { "epoch": 0.4285924139104081, "grad_norm": 0.11368425190448761, "learning_rate": 0.002, "loss": 2.3503, "step": 110870 }, { "epoch": 0.42863107111379134, "grad_norm": 0.10147102922201157, "learning_rate": 0.002, "loss": 2.3644, "step": 110880 }, { "epoch": 0.42866972831717465, "grad_norm": 0.0996956005692482, "learning_rate": 0.002, "loss": 2.3377, "step": 110890 }, { "epoch": 0.4287083855205579, "grad_norm": 0.3376283645629883, "learning_rate": 0.002, "loss": 2.3564, "step": 110900 }, { "epoch": 0.42874704272394115, "grad_norm": 0.13506639003753662, "learning_rate": 0.002, "loss": 2.3571, "step": 110910 }, { "epoch": 0.42878569992732446, "grad_norm": 0.10100368410348892, "learning_rate": 0.002, "loss": 2.3641, "step": 110920 }, { "epoch": 0.4288243571307077, "grad_norm": 0.09164312481880188, "learning_rate": 0.002, "loss": 2.3457, "step": 110930 }, { "epoch": 0.428863014334091, "grad_norm": 0.12385259568691254, "learning_rate": 0.002, "loss": 2.3536, "step": 110940 }, { "epoch": 0.42890167153747427, "grad_norm": 0.1286320984363556, "learning_rate": 0.002, "loss": 2.3554, "step": 110950 }, { "epoch": 0.4289403287408576, "grad_norm": 0.11350031942129135, "learning_rate": 0.002, "loss": 2.3563, "step": 110960 }, { "epoch": 0.42897898594424083, "grad_norm": 0.10365412384271622, "learning_rate": 0.002, "loss": 2.3453, "step": 110970 }, { "epoch": 0.42901764314762414, "grad_norm": 0.1234087273478508, "learning_rate": 0.002, "loss": 2.3447, "step": 110980 }, { "epoch": 0.4290563003510074, "grad_norm": 0.11059076339006424, "learning_rate": 0.002, "loss": 2.3395, "step": 110990 }, { "epoch": 0.4290949575543907, "grad_norm": 0.09807975590229034, "learning_rate": 0.002, "loss": 2.36, "step": 111000 }, { "epoch": 0.42913361475777395, "grad_norm": 0.12637066841125488, "learning_rate": 0.002, "loss": 2.3516, "step": 111010 }, { "epoch": 0.42917227196115726, "grad_norm": 0.08945341408252716, "learning_rate": 0.002, "loss": 2.3569, "step": 111020 }, { "epoch": 0.4292109291645405, "grad_norm": 0.10467803478240967, "learning_rate": 0.002, "loss": 2.3432, "step": 111030 }, { "epoch": 0.4292495863679238, "grad_norm": 0.09956691414117813, "learning_rate": 0.002, "loss": 2.3476, "step": 111040 }, { "epoch": 0.42928824357130707, "grad_norm": 0.1146853044629097, "learning_rate": 0.002, "loss": 2.3485, "step": 111050 }, { "epoch": 0.4293269007746904, "grad_norm": 0.10632400959730148, "learning_rate": 0.002, "loss": 2.3482, "step": 111060 }, { "epoch": 0.42936555797807363, "grad_norm": 0.09750671684741974, "learning_rate": 0.002, "loss": 2.3558, "step": 111070 }, { "epoch": 0.42940421518145694, "grad_norm": 0.10774068534374237, "learning_rate": 0.002, "loss": 2.3436, "step": 111080 }, { "epoch": 0.4294428723848402, "grad_norm": 0.09877938777208328, "learning_rate": 0.002, "loss": 2.3504, "step": 111090 }, { "epoch": 0.42948152958822344, "grad_norm": 0.09424333274364471, "learning_rate": 0.002, "loss": 2.3495, "step": 111100 }, { "epoch": 0.42952018679160675, "grad_norm": 0.1110386922955513, "learning_rate": 0.002, "loss": 2.3479, "step": 111110 }, { "epoch": 0.42955884399499, "grad_norm": 0.10476934909820557, "learning_rate": 0.002, "loss": 2.3414, "step": 111120 }, { "epoch": 0.4295975011983733, "grad_norm": 0.11968885362148285, "learning_rate": 0.002, "loss": 2.3501, "step": 111130 }, { "epoch": 0.42963615840175656, "grad_norm": 0.10490576177835464, "learning_rate": 0.002, "loss": 2.344, "step": 111140 }, { "epoch": 0.42967481560513987, "grad_norm": 0.09844032675027847, "learning_rate": 0.002, "loss": 2.3489, "step": 111150 }, { "epoch": 0.4297134728085231, "grad_norm": 0.11125150322914124, "learning_rate": 0.002, "loss": 2.3482, "step": 111160 }, { "epoch": 0.42975213001190643, "grad_norm": 0.09913921356201172, "learning_rate": 0.002, "loss": 2.3568, "step": 111170 }, { "epoch": 0.4297907872152897, "grad_norm": 0.11865279078483582, "learning_rate": 0.002, "loss": 2.35, "step": 111180 }, { "epoch": 0.429829444418673, "grad_norm": 0.10810180008411407, "learning_rate": 0.002, "loss": 2.3425, "step": 111190 }, { "epoch": 0.42986810162205624, "grad_norm": 0.10936781018972397, "learning_rate": 0.002, "loss": 2.3254, "step": 111200 }, { "epoch": 0.42990675882543955, "grad_norm": 0.10779301077127457, "learning_rate": 0.002, "loss": 2.3379, "step": 111210 }, { "epoch": 0.4299454160288228, "grad_norm": 0.10778950154781342, "learning_rate": 0.002, "loss": 2.338, "step": 111220 }, { "epoch": 0.4299840732322061, "grad_norm": 0.10507801920175552, "learning_rate": 0.002, "loss": 2.3405, "step": 111230 }, { "epoch": 0.43002273043558936, "grad_norm": 0.10925329476594925, "learning_rate": 0.002, "loss": 2.3448, "step": 111240 }, { "epoch": 0.43006138763897267, "grad_norm": 0.11180612444877625, "learning_rate": 0.002, "loss": 2.344, "step": 111250 }, { "epoch": 0.4301000448423559, "grad_norm": 0.10876971483230591, "learning_rate": 0.002, "loss": 2.3457, "step": 111260 }, { "epoch": 0.43013870204573923, "grad_norm": 0.09879743307828903, "learning_rate": 0.002, "loss": 2.352, "step": 111270 }, { "epoch": 0.4301773592491225, "grad_norm": 0.1002826914191246, "learning_rate": 0.002, "loss": 2.3515, "step": 111280 }, { "epoch": 0.43021601645250573, "grad_norm": 0.10194902122020721, "learning_rate": 0.002, "loss": 2.3434, "step": 111290 }, { "epoch": 0.43025467365588904, "grad_norm": 0.13493533432483673, "learning_rate": 0.002, "loss": 2.3495, "step": 111300 }, { "epoch": 0.4302933308592723, "grad_norm": 0.10006940364837646, "learning_rate": 0.002, "loss": 2.3468, "step": 111310 }, { "epoch": 0.4303319880626556, "grad_norm": 0.10241258889436722, "learning_rate": 0.002, "loss": 2.3571, "step": 111320 }, { "epoch": 0.43037064526603885, "grad_norm": 0.10722459852695465, "learning_rate": 0.002, "loss": 2.3375, "step": 111330 }, { "epoch": 0.43040930246942216, "grad_norm": 0.11641300469636917, "learning_rate": 0.002, "loss": 2.3466, "step": 111340 }, { "epoch": 0.4304479596728054, "grad_norm": 0.11539609730243683, "learning_rate": 0.002, "loss": 2.3537, "step": 111350 }, { "epoch": 0.4304866168761887, "grad_norm": 0.10943485796451569, "learning_rate": 0.002, "loss": 2.357, "step": 111360 }, { "epoch": 0.430525274079572, "grad_norm": 0.09740972518920898, "learning_rate": 0.002, "loss": 2.3355, "step": 111370 }, { "epoch": 0.4305639312829553, "grad_norm": 0.11207325756549835, "learning_rate": 0.002, "loss": 2.3262, "step": 111380 }, { "epoch": 0.43060258848633853, "grad_norm": 0.11338721215724945, "learning_rate": 0.002, "loss": 2.3469, "step": 111390 }, { "epoch": 0.43064124568972184, "grad_norm": 0.09858185797929764, "learning_rate": 0.002, "loss": 2.3483, "step": 111400 }, { "epoch": 0.4306799028931051, "grad_norm": 0.10881024599075317, "learning_rate": 0.002, "loss": 2.3455, "step": 111410 }, { "epoch": 0.4307185600964884, "grad_norm": 0.10683208703994751, "learning_rate": 0.002, "loss": 2.346, "step": 111420 }, { "epoch": 0.43075721729987165, "grad_norm": 0.09608019888401031, "learning_rate": 0.002, "loss": 2.3569, "step": 111430 }, { "epoch": 0.43079587450325496, "grad_norm": 0.08652333915233612, "learning_rate": 0.002, "loss": 2.3482, "step": 111440 }, { "epoch": 0.4308345317066382, "grad_norm": 0.11403698474168777, "learning_rate": 0.002, "loss": 2.3477, "step": 111450 }, { "epoch": 0.4308731889100215, "grad_norm": 0.10857101529836655, "learning_rate": 0.002, "loss": 2.3497, "step": 111460 }, { "epoch": 0.4309118461134048, "grad_norm": 0.10597855597734451, "learning_rate": 0.002, "loss": 2.3457, "step": 111470 }, { "epoch": 0.430950503316788, "grad_norm": 0.0985519140958786, "learning_rate": 0.002, "loss": 2.3549, "step": 111480 }, { "epoch": 0.43098916052017133, "grad_norm": 0.14048723876476288, "learning_rate": 0.002, "loss": 2.3544, "step": 111490 }, { "epoch": 0.4310278177235546, "grad_norm": 0.09988056868314743, "learning_rate": 0.002, "loss": 2.3426, "step": 111500 }, { "epoch": 0.4310664749269379, "grad_norm": 0.10084901750087738, "learning_rate": 0.002, "loss": 2.342, "step": 111510 }, { "epoch": 0.43110513213032114, "grad_norm": 0.11319395154714584, "learning_rate": 0.002, "loss": 2.3418, "step": 111520 }, { "epoch": 0.43114378933370445, "grad_norm": 0.11077584326267242, "learning_rate": 0.002, "loss": 2.3345, "step": 111530 }, { "epoch": 0.4311824465370877, "grad_norm": 0.10431598871946335, "learning_rate": 0.002, "loss": 2.3522, "step": 111540 }, { "epoch": 0.431221103740471, "grad_norm": 0.1093798279762268, "learning_rate": 0.002, "loss": 2.3559, "step": 111550 }, { "epoch": 0.43125976094385426, "grad_norm": 0.11040189862251282, "learning_rate": 0.002, "loss": 2.3619, "step": 111560 }, { "epoch": 0.43129841814723757, "grad_norm": 0.09806570410728455, "learning_rate": 0.002, "loss": 2.3432, "step": 111570 }, { "epoch": 0.4313370753506208, "grad_norm": 0.11054784804582596, "learning_rate": 0.002, "loss": 2.3403, "step": 111580 }, { "epoch": 0.43137573255400413, "grad_norm": 0.11820884793996811, "learning_rate": 0.002, "loss": 2.3608, "step": 111590 }, { "epoch": 0.4314143897573874, "grad_norm": 0.10436604171991348, "learning_rate": 0.002, "loss": 2.3557, "step": 111600 }, { "epoch": 0.4314530469607707, "grad_norm": 0.12815545499324799, "learning_rate": 0.002, "loss": 2.3587, "step": 111610 }, { "epoch": 0.43149170416415394, "grad_norm": 0.090470090508461, "learning_rate": 0.002, "loss": 2.3498, "step": 111620 }, { "epoch": 0.43153036136753725, "grad_norm": 0.09031805396080017, "learning_rate": 0.002, "loss": 2.3318, "step": 111630 }, { "epoch": 0.4315690185709205, "grad_norm": 0.10491258651018143, "learning_rate": 0.002, "loss": 2.3553, "step": 111640 }, { "epoch": 0.43160767577430376, "grad_norm": 0.0920221358537674, "learning_rate": 0.002, "loss": 2.3513, "step": 111650 }, { "epoch": 0.43164633297768706, "grad_norm": 0.1239825040102005, "learning_rate": 0.002, "loss": 2.3388, "step": 111660 }, { "epoch": 0.4316849901810703, "grad_norm": 0.10015200078487396, "learning_rate": 0.002, "loss": 2.3545, "step": 111670 }, { "epoch": 0.4317236473844536, "grad_norm": 0.125696063041687, "learning_rate": 0.002, "loss": 2.3531, "step": 111680 }, { "epoch": 0.4317623045878369, "grad_norm": 0.11201971024274826, "learning_rate": 0.002, "loss": 2.3542, "step": 111690 }, { "epoch": 0.4318009617912202, "grad_norm": 0.10140449553728104, "learning_rate": 0.002, "loss": 2.3512, "step": 111700 }, { "epoch": 0.43183961899460344, "grad_norm": 0.10589606314897537, "learning_rate": 0.002, "loss": 2.3531, "step": 111710 }, { "epoch": 0.43187827619798674, "grad_norm": 0.10249914228916168, "learning_rate": 0.002, "loss": 2.3578, "step": 111720 }, { "epoch": 0.43191693340137, "grad_norm": 0.10037936270236969, "learning_rate": 0.002, "loss": 2.3435, "step": 111730 }, { "epoch": 0.4319555906047533, "grad_norm": 0.09914037585258484, "learning_rate": 0.002, "loss": 2.345, "step": 111740 }, { "epoch": 0.43199424780813656, "grad_norm": 0.09331130981445312, "learning_rate": 0.002, "loss": 2.3476, "step": 111750 }, { "epoch": 0.43203290501151986, "grad_norm": 0.1144673079252243, "learning_rate": 0.002, "loss": 2.3505, "step": 111760 }, { "epoch": 0.4320715622149031, "grad_norm": 0.13211724162101746, "learning_rate": 0.002, "loss": 2.3392, "step": 111770 }, { "epoch": 0.4321102194182864, "grad_norm": 0.11853010207414627, "learning_rate": 0.002, "loss": 2.3584, "step": 111780 }, { "epoch": 0.4321488766216697, "grad_norm": 0.10304831713438034, "learning_rate": 0.002, "loss": 2.3398, "step": 111790 }, { "epoch": 0.432187533825053, "grad_norm": 0.102897509932518, "learning_rate": 0.002, "loss": 2.3595, "step": 111800 }, { "epoch": 0.43222619102843624, "grad_norm": 0.10276034474372864, "learning_rate": 0.002, "loss": 2.3392, "step": 111810 }, { "epoch": 0.43226484823181954, "grad_norm": 0.10357806831598282, "learning_rate": 0.002, "loss": 2.3514, "step": 111820 }, { "epoch": 0.4323035054352028, "grad_norm": 0.10969896614551544, "learning_rate": 0.002, "loss": 2.3526, "step": 111830 }, { "epoch": 0.43234216263858605, "grad_norm": 0.1067805290222168, "learning_rate": 0.002, "loss": 2.3302, "step": 111840 }, { "epoch": 0.43238081984196935, "grad_norm": 0.10147634893655777, "learning_rate": 0.002, "loss": 2.3486, "step": 111850 }, { "epoch": 0.4324194770453526, "grad_norm": 0.13259513676166534, "learning_rate": 0.002, "loss": 2.3546, "step": 111860 }, { "epoch": 0.4324581342487359, "grad_norm": 0.09387633949518204, "learning_rate": 0.002, "loss": 2.3536, "step": 111870 }, { "epoch": 0.43249679145211917, "grad_norm": 0.10201136022806168, "learning_rate": 0.002, "loss": 2.3434, "step": 111880 }, { "epoch": 0.4325354486555025, "grad_norm": 0.11776763200759888, "learning_rate": 0.002, "loss": 2.3431, "step": 111890 }, { "epoch": 0.4325741058588857, "grad_norm": 0.1148151308298111, "learning_rate": 0.002, "loss": 2.343, "step": 111900 }, { "epoch": 0.43261276306226903, "grad_norm": 0.11505398154258728, "learning_rate": 0.002, "loss": 2.344, "step": 111910 }, { "epoch": 0.4326514202656523, "grad_norm": 0.10341343283653259, "learning_rate": 0.002, "loss": 2.355, "step": 111920 }, { "epoch": 0.4326900774690356, "grad_norm": 0.1147983968257904, "learning_rate": 0.002, "loss": 2.3383, "step": 111930 }, { "epoch": 0.43272873467241885, "grad_norm": 0.08892353624105453, "learning_rate": 0.002, "loss": 2.3551, "step": 111940 }, { "epoch": 0.43276739187580215, "grad_norm": 0.09897063672542572, "learning_rate": 0.002, "loss": 2.3437, "step": 111950 }, { "epoch": 0.4328060490791854, "grad_norm": 0.12240561097860336, "learning_rate": 0.002, "loss": 2.3492, "step": 111960 }, { "epoch": 0.4328447062825687, "grad_norm": 0.10131847858428955, "learning_rate": 0.002, "loss": 2.3543, "step": 111970 }, { "epoch": 0.43288336348595197, "grad_norm": 0.10665290057659149, "learning_rate": 0.002, "loss": 2.3556, "step": 111980 }, { "epoch": 0.4329220206893353, "grad_norm": 0.09720829874277115, "learning_rate": 0.002, "loss": 2.3449, "step": 111990 }, { "epoch": 0.4329606778927185, "grad_norm": 0.10075974464416504, "learning_rate": 0.002, "loss": 2.346, "step": 112000 }, { "epoch": 0.43299933509610183, "grad_norm": 0.14467671513557434, "learning_rate": 0.002, "loss": 2.3404, "step": 112010 }, { "epoch": 0.4330379922994851, "grad_norm": 0.09993736445903778, "learning_rate": 0.002, "loss": 2.3616, "step": 112020 }, { "epoch": 0.43307664950286834, "grad_norm": 0.12287890911102295, "learning_rate": 0.002, "loss": 2.3543, "step": 112030 }, { "epoch": 0.43311530670625165, "grad_norm": 0.13064667582511902, "learning_rate": 0.002, "loss": 2.3541, "step": 112040 }, { "epoch": 0.4331539639096349, "grad_norm": 0.150161474943161, "learning_rate": 0.002, "loss": 2.3701, "step": 112050 }, { "epoch": 0.4331926211130182, "grad_norm": 0.24644172191619873, "learning_rate": 0.002, "loss": 2.3769, "step": 112060 }, { "epoch": 0.43323127831640146, "grad_norm": 0.09621595591306686, "learning_rate": 0.002, "loss": 2.3632, "step": 112070 }, { "epoch": 0.43326993551978477, "grad_norm": 0.11061085760593414, "learning_rate": 0.002, "loss": 2.3629, "step": 112080 }, { "epoch": 0.433308592723168, "grad_norm": 0.11562249809503555, "learning_rate": 0.002, "loss": 2.3397, "step": 112090 }, { "epoch": 0.4333472499265513, "grad_norm": 0.10313281416893005, "learning_rate": 0.002, "loss": 2.3323, "step": 112100 }, { "epoch": 0.4333859071299346, "grad_norm": 0.09896907210350037, "learning_rate": 0.002, "loss": 2.3433, "step": 112110 }, { "epoch": 0.4334245643333179, "grad_norm": 0.09867633134126663, "learning_rate": 0.002, "loss": 2.341, "step": 112120 }, { "epoch": 0.43346322153670114, "grad_norm": 0.38379985094070435, "learning_rate": 0.002, "loss": 2.3458, "step": 112130 }, { "epoch": 0.43350187874008445, "grad_norm": 0.14632292091846466, "learning_rate": 0.002, "loss": 2.345, "step": 112140 }, { "epoch": 0.4335405359434677, "grad_norm": 0.12974530458450317, "learning_rate": 0.002, "loss": 2.3416, "step": 112150 }, { "epoch": 0.433579193146851, "grad_norm": 0.1288261115550995, "learning_rate": 0.002, "loss": 2.349, "step": 112160 }, { "epoch": 0.43361785035023426, "grad_norm": 0.09830410033464432, "learning_rate": 0.002, "loss": 2.3692, "step": 112170 }, { "epoch": 0.43365650755361757, "grad_norm": 0.09210671484470367, "learning_rate": 0.002, "loss": 2.3444, "step": 112180 }, { "epoch": 0.4336951647570008, "grad_norm": 0.16515584290027618, "learning_rate": 0.002, "loss": 2.3379, "step": 112190 }, { "epoch": 0.4337338219603841, "grad_norm": 0.10097946971654892, "learning_rate": 0.002, "loss": 2.3442, "step": 112200 }, { "epoch": 0.4337724791637674, "grad_norm": 0.12167806178331375, "learning_rate": 0.002, "loss": 2.3735, "step": 112210 }, { "epoch": 0.43381113636715063, "grad_norm": 0.11054915189743042, "learning_rate": 0.002, "loss": 2.3444, "step": 112220 }, { "epoch": 0.43384979357053394, "grad_norm": 0.10262811928987503, "learning_rate": 0.002, "loss": 2.3524, "step": 112230 }, { "epoch": 0.4338884507739172, "grad_norm": 0.11559311300516129, "learning_rate": 0.002, "loss": 2.3516, "step": 112240 }, { "epoch": 0.4339271079773005, "grad_norm": 0.09575275331735611, "learning_rate": 0.002, "loss": 2.3483, "step": 112250 }, { "epoch": 0.43396576518068375, "grad_norm": 0.10230062901973724, "learning_rate": 0.002, "loss": 2.3283, "step": 112260 }, { "epoch": 0.43400442238406706, "grad_norm": 0.10631681233644485, "learning_rate": 0.002, "loss": 2.3532, "step": 112270 }, { "epoch": 0.4340430795874503, "grad_norm": 0.14762724936008453, "learning_rate": 0.002, "loss": 2.3423, "step": 112280 }, { "epoch": 0.4340817367908336, "grad_norm": 0.1018281877040863, "learning_rate": 0.002, "loss": 2.3342, "step": 112290 }, { "epoch": 0.43412039399421687, "grad_norm": 0.10437997430562973, "learning_rate": 0.002, "loss": 2.3341, "step": 112300 }, { "epoch": 0.4341590511976002, "grad_norm": 0.10592590272426605, "learning_rate": 0.002, "loss": 2.3524, "step": 112310 }, { "epoch": 0.43419770840098343, "grad_norm": 0.09188766032457352, "learning_rate": 0.002, "loss": 2.3535, "step": 112320 }, { "epoch": 0.43423636560436674, "grad_norm": 0.11880189925432205, "learning_rate": 0.002, "loss": 2.3559, "step": 112330 }, { "epoch": 0.43427502280775, "grad_norm": 0.11617539077997208, "learning_rate": 0.002, "loss": 2.3524, "step": 112340 }, { "epoch": 0.4343136800111333, "grad_norm": 0.13290619850158691, "learning_rate": 0.002, "loss": 2.3431, "step": 112350 }, { "epoch": 0.43435233721451655, "grad_norm": 0.10754197090864182, "learning_rate": 0.002, "loss": 2.3493, "step": 112360 }, { "epoch": 0.43439099441789986, "grad_norm": 0.11557207256555557, "learning_rate": 0.002, "loss": 2.3544, "step": 112370 }, { "epoch": 0.4344296516212831, "grad_norm": 0.11429212987422943, "learning_rate": 0.002, "loss": 2.3433, "step": 112380 }, { "epoch": 0.4344683088246664, "grad_norm": 0.12526215612888336, "learning_rate": 0.002, "loss": 2.3603, "step": 112390 }, { "epoch": 0.43450696602804967, "grad_norm": 0.10692817717790604, "learning_rate": 0.002, "loss": 2.3356, "step": 112400 }, { "epoch": 0.4345456232314329, "grad_norm": 0.11485128104686737, "learning_rate": 0.002, "loss": 2.3399, "step": 112410 }, { "epoch": 0.43458428043481623, "grad_norm": 0.1053088828921318, "learning_rate": 0.002, "loss": 2.3498, "step": 112420 }, { "epoch": 0.4346229376381995, "grad_norm": 0.09685841202735901, "learning_rate": 0.002, "loss": 2.3388, "step": 112430 }, { "epoch": 0.4346615948415828, "grad_norm": 0.11361964792013168, "learning_rate": 0.002, "loss": 2.3423, "step": 112440 }, { "epoch": 0.43470025204496604, "grad_norm": 0.10083848237991333, "learning_rate": 0.002, "loss": 2.3554, "step": 112450 }, { "epoch": 0.43473890924834935, "grad_norm": 0.1087498739361763, "learning_rate": 0.002, "loss": 2.3646, "step": 112460 }, { "epoch": 0.4347775664517326, "grad_norm": 0.09891539812088013, "learning_rate": 0.002, "loss": 2.3656, "step": 112470 }, { "epoch": 0.4348162236551159, "grad_norm": 0.09671245515346527, "learning_rate": 0.002, "loss": 2.3526, "step": 112480 }, { "epoch": 0.43485488085849916, "grad_norm": 0.09899502247571945, "learning_rate": 0.002, "loss": 2.3532, "step": 112490 }, { "epoch": 0.43489353806188247, "grad_norm": 0.1066652461886406, "learning_rate": 0.002, "loss": 2.3637, "step": 112500 }, { "epoch": 0.4349321952652657, "grad_norm": 0.09613171964883804, "learning_rate": 0.002, "loss": 2.3508, "step": 112510 }, { "epoch": 0.434970852468649, "grad_norm": 0.10856905579566956, "learning_rate": 0.002, "loss": 2.3496, "step": 112520 }, { "epoch": 0.4350095096720323, "grad_norm": 0.09104996919631958, "learning_rate": 0.002, "loss": 2.3531, "step": 112530 }, { "epoch": 0.4350481668754156, "grad_norm": 0.12133617699146271, "learning_rate": 0.002, "loss": 2.3507, "step": 112540 }, { "epoch": 0.43508682407879884, "grad_norm": 0.10629113763570786, "learning_rate": 0.002, "loss": 2.3591, "step": 112550 }, { "epoch": 0.43512548128218215, "grad_norm": 0.10068577527999878, "learning_rate": 0.002, "loss": 2.3522, "step": 112560 }, { "epoch": 0.4351641384855654, "grad_norm": 0.11508452892303467, "learning_rate": 0.002, "loss": 2.3461, "step": 112570 }, { "epoch": 0.43520279568894865, "grad_norm": 0.09170261025428772, "learning_rate": 0.002, "loss": 2.3371, "step": 112580 }, { "epoch": 0.43524145289233196, "grad_norm": 0.1024169772863388, "learning_rate": 0.002, "loss": 2.3505, "step": 112590 }, { "epoch": 0.4352801100957152, "grad_norm": 0.11252007633447647, "learning_rate": 0.002, "loss": 2.3418, "step": 112600 }, { "epoch": 0.4353187672990985, "grad_norm": 0.10464402288198471, "learning_rate": 0.002, "loss": 2.3536, "step": 112610 }, { "epoch": 0.43535742450248177, "grad_norm": 0.10425686091184616, "learning_rate": 0.002, "loss": 2.3517, "step": 112620 }, { "epoch": 0.4353960817058651, "grad_norm": 0.12892098724842072, "learning_rate": 0.002, "loss": 2.3728, "step": 112630 }, { "epoch": 0.43543473890924833, "grad_norm": 0.11767508834600449, "learning_rate": 0.002, "loss": 2.328, "step": 112640 }, { "epoch": 0.43547339611263164, "grad_norm": 0.09892310202121735, "learning_rate": 0.002, "loss": 2.3412, "step": 112650 }, { "epoch": 0.4355120533160149, "grad_norm": 0.11061010509729385, "learning_rate": 0.002, "loss": 2.3697, "step": 112660 }, { "epoch": 0.4355507105193982, "grad_norm": 0.10130377113819122, "learning_rate": 0.002, "loss": 2.3586, "step": 112670 }, { "epoch": 0.43558936772278145, "grad_norm": 0.12196584790945053, "learning_rate": 0.002, "loss": 2.352, "step": 112680 }, { "epoch": 0.43562802492616476, "grad_norm": 0.10804404318332672, "learning_rate": 0.002, "loss": 2.3437, "step": 112690 }, { "epoch": 0.435666682129548, "grad_norm": 0.11497610062360764, "learning_rate": 0.002, "loss": 2.3535, "step": 112700 }, { "epoch": 0.4357053393329313, "grad_norm": 0.10728848725557327, "learning_rate": 0.002, "loss": 2.3606, "step": 112710 }, { "epoch": 0.43574399653631457, "grad_norm": 0.11768807470798492, "learning_rate": 0.002, "loss": 2.3582, "step": 112720 }, { "epoch": 0.4357826537396979, "grad_norm": 0.1070348247885704, "learning_rate": 0.002, "loss": 2.3555, "step": 112730 }, { "epoch": 0.43582131094308113, "grad_norm": 0.09731265902519226, "learning_rate": 0.002, "loss": 2.3547, "step": 112740 }, { "epoch": 0.43585996814646444, "grad_norm": 0.11236754804849625, "learning_rate": 0.002, "loss": 2.3472, "step": 112750 }, { "epoch": 0.4358986253498477, "grad_norm": 0.10371419787406921, "learning_rate": 0.002, "loss": 2.3436, "step": 112760 }, { "epoch": 0.43593728255323094, "grad_norm": 0.09995149075984955, "learning_rate": 0.002, "loss": 2.3436, "step": 112770 }, { "epoch": 0.43597593975661425, "grad_norm": 0.09958817064762115, "learning_rate": 0.002, "loss": 2.3304, "step": 112780 }, { "epoch": 0.4360145969599975, "grad_norm": 0.1253259927034378, "learning_rate": 0.002, "loss": 2.3501, "step": 112790 }, { "epoch": 0.4360532541633808, "grad_norm": 0.10836859792470932, "learning_rate": 0.002, "loss": 2.3353, "step": 112800 }, { "epoch": 0.43609191136676406, "grad_norm": 0.09147986769676208, "learning_rate": 0.002, "loss": 2.3522, "step": 112810 }, { "epoch": 0.43613056857014737, "grad_norm": 0.09477005153894424, "learning_rate": 0.002, "loss": 2.3429, "step": 112820 }, { "epoch": 0.4361692257735306, "grad_norm": 0.1311466097831726, "learning_rate": 0.002, "loss": 2.3561, "step": 112830 }, { "epoch": 0.43620788297691393, "grad_norm": 0.12641702592372894, "learning_rate": 0.002, "loss": 2.355, "step": 112840 }, { "epoch": 0.4362465401802972, "grad_norm": 0.09795732796192169, "learning_rate": 0.002, "loss": 2.3309, "step": 112850 }, { "epoch": 0.4362851973836805, "grad_norm": 0.11147502809762955, "learning_rate": 0.002, "loss": 2.3579, "step": 112860 }, { "epoch": 0.43632385458706374, "grad_norm": 0.10798709094524384, "learning_rate": 0.002, "loss": 2.3436, "step": 112870 }, { "epoch": 0.43636251179044705, "grad_norm": 0.11606871336698532, "learning_rate": 0.002, "loss": 2.3532, "step": 112880 }, { "epoch": 0.4364011689938303, "grad_norm": 0.10911009460687637, "learning_rate": 0.002, "loss": 2.3539, "step": 112890 }, { "epoch": 0.4364398261972136, "grad_norm": 0.11125440150499344, "learning_rate": 0.002, "loss": 2.3428, "step": 112900 }, { "epoch": 0.43647848340059686, "grad_norm": 0.10836660861968994, "learning_rate": 0.002, "loss": 2.3428, "step": 112910 }, { "epoch": 0.43651714060398017, "grad_norm": 0.0975693017244339, "learning_rate": 0.002, "loss": 2.3543, "step": 112920 }, { "epoch": 0.4365557978073634, "grad_norm": 0.10515625029802322, "learning_rate": 0.002, "loss": 2.3407, "step": 112930 }, { "epoch": 0.43659445501074673, "grad_norm": 0.09607485681772232, "learning_rate": 0.002, "loss": 2.3437, "step": 112940 }, { "epoch": 0.43663311221413, "grad_norm": 0.08798225969076157, "learning_rate": 0.002, "loss": 2.3508, "step": 112950 }, { "epoch": 0.43667176941751323, "grad_norm": 0.11401963979005814, "learning_rate": 0.002, "loss": 2.3396, "step": 112960 }, { "epoch": 0.43671042662089654, "grad_norm": 0.09933006763458252, "learning_rate": 0.002, "loss": 2.3549, "step": 112970 }, { "epoch": 0.4367490838242798, "grad_norm": 0.10628395527601242, "learning_rate": 0.002, "loss": 2.3298, "step": 112980 }, { "epoch": 0.4367877410276631, "grad_norm": 0.0965963825583458, "learning_rate": 0.002, "loss": 2.3502, "step": 112990 }, { "epoch": 0.43682639823104635, "grad_norm": 0.10263420641422272, "learning_rate": 0.002, "loss": 2.3329, "step": 113000 }, { "epoch": 0.43686505543442966, "grad_norm": 0.10733772069215775, "learning_rate": 0.002, "loss": 2.3551, "step": 113010 }, { "epoch": 0.4369037126378129, "grad_norm": 0.09779093414545059, "learning_rate": 0.002, "loss": 2.3382, "step": 113020 }, { "epoch": 0.4369423698411962, "grad_norm": 0.10382230579853058, "learning_rate": 0.002, "loss": 2.3411, "step": 113030 }, { "epoch": 0.4369810270445795, "grad_norm": 0.2067124843597412, "learning_rate": 0.002, "loss": 2.3471, "step": 113040 }, { "epoch": 0.4370196842479628, "grad_norm": 0.11711885035037994, "learning_rate": 0.002, "loss": 2.3572, "step": 113050 }, { "epoch": 0.43705834145134603, "grad_norm": 0.10530546307563782, "learning_rate": 0.002, "loss": 2.3445, "step": 113060 }, { "epoch": 0.43709699865472934, "grad_norm": 0.10417833924293518, "learning_rate": 0.002, "loss": 2.3484, "step": 113070 }, { "epoch": 0.4371356558581126, "grad_norm": 0.09886866062879562, "learning_rate": 0.002, "loss": 2.352, "step": 113080 }, { "epoch": 0.4371743130614959, "grad_norm": 0.12263850122690201, "learning_rate": 0.002, "loss": 2.35, "step": 113090 }, { "epoch": 0.43721297026487915, "grad_norm": 0.09797932207584381, "learning_rate": 0.002, "loss": 2.344, "step": 113100 }, { "epoch": 0.43725162746826246, "grad_norm": 0.12123297899961472, "learning_rate": 0.002, "loss": 2.3624, "step": 113110 }, { "epoch": 0.4372902846716457, "grad_norm": 0.09888701885938644, "learning_rate": 0.002, "loss": 2.3538, "step": 113120 }, { "epoch": 0.437328941875029, "grad_norm": 0.10616670548915863, "learning_rate": 0.002, "loss": 2.3493, "step": 113130 }, { "epoch": 0.4373675990784123, "grad_norm": 0.10324602574110031, "learning_rate": 0.002, "loss": 2.3488, "step": 113140 }, { "epoch": 0.4374062562817955, "grad_norm": 0.13094359636306763, "learning_rate": 0.002, "loss": 2.3543, "step": 113150 }, { "epoch": 0.43744491348517883, "grad_norm": 0.12776924669742584, "learning_rate": 0.002, "loss": 2.3454, "step": 113160 }, { "epoch": 0.4374835706885621, "grad_norm": 0.11114688962697983, "learning_rate": 0.002, "loss": 2.3413, "step": 113170 }, { "epoch": 0.4375222278919454, "grad_norm": 0.10153741389513016, "learning_rate": 0.002, "loss": 2.3395, "step": 113180 }, { "epoch": 0.43756088509532864, "grad_norm": 0.0986599400639534, "learning_rate": 0.002, "loss": 2.3544, "step": 113190 }, { "epoch": 0.43759954229871195, "grad_norm": 0.10165860503911972, "learning_rate": 0.002, "loss": 2.3466, "step": 113200 }, { "epoch": 0.4376381995020952, "grad_norm": 0.11720269918441772, "learning_rate": 0.002, "loss": 2.344, "step": 113210 }, { "epoch": 0.4376768567054785, "grad_norm": 0.11033625155687332, "learning_rate": 0.002, "loss": 2.3429, "step": 113220 }, { "epoch": 0.43771551390886176, "grad_norm": 0.5819602012634277, "learning_rate": 0.002, "loss": 2.3526, "step": 113230 }, { "epoch": 0.43775417111224507, "grad_norm": 0.11656757444143295, "learning_rate": 0.002, "loss": 2.3477, "step": 113240 }, { "epoch": 0.4377928283156283, "grad_norm": 0.10410122573375702, "learning_rate": 0.002, "loss": 2.3518, "step": 113250 }, { "epoch": 0.43783148551901163, "grad_norm": 0.0943315178155899, "learning_rate": 0.002, "loss": 2.3365, "step": 113260 }, { "epoch": 0.4378701427223949, "grad_norm": 0.18185904622077942, "learning_rate": 0.002, "loss": 2.3581, "step": 113270 }, { "epoch": 0.4379087999257782, "grad_norm": 0.10419418662786484, "learning_rate": 0.002, "loss": 2.3601, "step": 113280 }, { "epoch": 0.43794745712916144, "grad_norm": 0.11047537624835968, "learning_rate": 0.002, "loss": 2.3369, "step": 113290 }, { "epoch": 0.43798611433254475, "grad_norm": 0.12000361829996109, "learning_rate": 0.002, "loss": 2.345, "step": 113300 }, { "epoch": 0.438024771535928, "grad_norm": 0.11603201925754547, "learning_rate": 0.002, "loss": 2.3523, "step": 113310 }, { "epoch": 0.43806342873931126, "grad_norm": 0.10754760354757309, "learning_rate": 0.002, "loss": 2.3561, "step": 113320 }, { "epoch": 0.43810208594269456, "grad_norm": 0.10942035913467407, "learning_rate": 0.002, "loss": 2.3472, "step": 113330 }, { "epoch": 0.4381407431460778, "grad_norm": 0.11428725719451904, "learning_rate": 0.002, "loss": 2.3465, "step": 113340 }, { "epoch": 0.4381794003494611, "grad_norm": 0.11185505986213684, "learning_rate": 0.002, "loss": 2.347, "step": 113350 }, { "epoch": 0.4382180575528444, "grad_norm": 0.10271166265010834, "learning_rate": 0.002, "loss": 2.3506, "step": 113360 }, { "epoch": 0.4382567147562277, "grad_norm": 0.09774298220872879, "learning_rate": 0.002, "loss": 2.3547, "step": 113370 }, { "epoch": 0.43829537195961094, "grad_norm": 0.11461903899908066, "learning_rate": 0.002, "loss": 2.3414, "step": 113380 }, { "epoch": 0.43833402916299424, "grad_norm": 0.12484222650527954, "learning_rate": 0.002, "loss": 2.347, "step": 113390 }, { "epoch": 0.4383726863663775, "grad_norm": 0.11602794378995895, "learning_rate": 0.002, "loss": 2.3405, "step": 113400 }, { "epoch": 0.4384113435697608, "grad_norm": 0.111027292907238, "learning_rate": 0.002, "loss": 2.3616, "step": 113410 }, { "epoch": 0.43845000077314406, "grad_norm": 0.08952493220567703, "learning_rate": 0.002, "loss": 2.3641, "step": 113420 }, { "epoch": 0.43848865797652736, "grad_norm": 0.10877718031406403, "learning_rate": 0.002, "loss": 2.3502, "step": 113430 }, { "epoch": 0.4385273151799106, "grad_norm": 0.11232464015483856, "learning_rate": 0.002, "loss": 2.3405, "step": 113440 }, { "epoch": 0.4385659723832939, "grad_norm": 0.10439947247505188, "learning_rate": 0.002, "loss": 2.3651, "step": 113450 }, { "epoch": 0.4386046295866772, "grad_norm": 0.10224592685699463, "learning_rate": 0.002, "loss": 2.3633, "step": 113460 }, { "epoch": 0.4386432867900605, "grad_norm": 0.10106103122234344, "learning_rate": 0.002, "loss": 2.3427, "step": 113470 }, { "epoch": 0.43868194399344373, "grad_norm": 0.1027471050620079, "learning_rate": 0.002, "loss": 2.3515, "step": 113480 }, { "epoch": 0.43872060119682704, "grad_norm": 0.10447635501623154, "learning_rate": 0.002, "loss": 2.3483, "step": 113490 }, { "epoch": 0.4387592584002103, "grad_norm": 0.10832199454307556, "learning_rate": 0.002, "loss": 2.3481, "step": 113500 }, { "epoch": 0.43879791560359355, "grad_norm": 0.375355064868927, "learning_rate": 0.002, "loss": 2.3476, "step": 113510 }, { "epoch": 0.43883657280697685, "grad_norm": 0.12057796865701675, "learning_rate": 0.002, "loss": 2.3561, "step": 113520 }, { "epoch": 0.4388752300103601, "grad_norm": 0.1038069799542427, "learning_rate": 0.002, "loss": 2.36, "step": 113530 }, { "epoch": 0.4389138872137434, "grad_norm": 0.09568572789430618, "learning_rate": 0.002, "loss": 2.3671, "step": 113540 }, { "epoch": 0.43895254441712667, "grad_norm": 0.10505779832601547, "learning_rate": 0.002, "loss": 2.3521, "step": 113550 }, { "epoch": 0.43899120162051, "grad_norm": 0.12001272290945053, "learning_rate": 0.002, "loss": 2.338, "step": 113560 }, { "epoch": 0.4390298588238932, "grad_norm": 0.1079864352941513, "learning_rate": 0.002, "loss": 2.335, "step": 113570 }, { "epoch": 0.43906851602727653, "grad_norm": 0.09099382907152176, "learning_rate": 0.002, "loss": 2.3517, "step": 113580 }, { "epoch": 0.4391071732306598, "grad_norm": 0.09558238834142685, "learning_rate": 0.002, "loss": 2.347, "step": 113590 }, { "epoch": 0.4391458304340431, "grad_norm": 0.10473648458719254, "learning_rate": 0.002, "loss": 2.3546, "step": 113600 }, { "epoch": 0.43918448763742635, "grad_norm": 0.0934872105717659, "learning_rate": 0.002, "loss": 2.346, "step": 113610 }, { "epoch": 0.43922314484080965, "grad_norm": 0.1030026376247406, "learning_rate": 0.002, "loss": 2.3505, "step": 113620 }, { "epoch": 0.4392618020441929, "grad_norm": 0.11831945925951004, "learning_rate": 0.002, "loss": 2.3493, "step": 113630 }, { "epoch": 0.4393004592475762, "grad_norm": 0.10450316220521927, "learning_rate": 0.002, "loss": 2.3444, "step": 113640 }, { "epoch": 0.43933911645095947, "grad_norm": 0.11427527666091919, "learning_rate": 0.002, "loss": 2.3662, "step": 113650 }, { "epoch": 0.4393777736543428, "grad_norm": 0.10517244786024094, "learning_rate": 0.002, "loss": 2.3382, "step": 113660 }, { "epoch": 0.439416430857726, "grad_norm": 0.1093309074640274, "learning_rate": 0.002, "loss": 2.3507, "step": 113670 }, { "epoch": 0.43945508806110933, "grad_norm": 0.1326448619365692, "learning_rate": 0.002, "loss": 2.3454, "step": 113680 }, { "epoch": 0.4394937452644926, "grad_norm": 0.10119234025478363, "learning_rate": 0.002, "loss": 2.3261, "step": 113690 }, { "epoch": 0.43953240246787584, "grad_norm": 0.10754355788230896, "learning_rate": 0.002, "loss": 2.362, "step": 113700 }, { "epoch": 0.43957105967125915, "grad_norm": 0.09021865576505661, "learning_rate": 0.002, "loss": 2.351, "step": 113710 }, { "epoch": 0.4396097168746424, "grad_norm": 0.11766369640827179, "learning_rate": 0.002, "loss": 2.3391, "step": 113720 }, { "epoch": 0.4396483740780257, "grad_norm": 0.11109083145856857, "learning_rate": 0.002, "loss": 2.3572, "step": 113730 }, { "epoch": 0.43968703128140896, "grad_norm": 0.09245065599679947, "learning_rate": 0.002, "loss": 2.356, "step": 113740 }, { "epoch": 0.43972568848479227, "grad_norm": 0.11703155189752579, "learning_rate": 0.002, "loss": 2.358, "step": 113750 }, { "epoch": 0.4397643456881755, "grad_norm": 0.10193216800689697, "learning_rate": 0.002, "loss": 2.352, "step": 113760 }, { "epoch": 0.4398030028915588, "grad_norm": 0.1027296856045723, "learning_rate": 0.002, "loss": 2.347, "step": 113770 }, { "epoch": 0.4398416600949421, "grad_norm": 0.11095414310693741, "learning_rate": 0.002, "loss": 2.3482, "step": 113780 }, { "epoch": 0.4398803172983254, "grad_norm": 0.12157269567251205, "learning_rate": 0.002, "loss": 2.3376, "step": 113790 }, { "epoch": 0.43991897450170864, "grad_norm": 0.12535102665424347, "learning_rate": 0.002, "loss": 2.3632, "step": 113800 }, { "epoch": 0.43995763170509194, "grad_norm": 0.09027545899152756, "learning_rate": 0.002, "loss": 2.3553, "step": 113810 }, { "epoch": 0.4399962889084752, "grad_norm": 0.16895151138305664, "learning_rate": 0.002, "loss": 2.3574, "step": 113820 }, { "epoch": 0.4400349461118585, "grad_norm": 0.18759407103061676, "learning_rate": 0.002, "loss": 2.36, "step": 113830 }, { "epoch": 0.44007360331524176, "grad_norm": 0.10348519682884216, "learning_rate": 0.002, "loss": 2.3544, "step": 113840 }, { "epoch": 0.44011226051862506, "grad_norm": 0.10666997730731964, "learning_rate": 0.002, "loss": 2.3379, "step": 113850 }, { "epoch": 0.4401509177220083, "grad_norm": 0.10717163980007172, "learning_rate": 0.002, "loss": 2.3509, "step": 113860 }, { "epoch": 0.4401895749253916, "grad_norm": 0.09396836906671524, "learning_rate": 0.002, "loss": 2.3378, "step": 113870 }, { "epoch": 0.4402282321287749, "grad_norm": 0.12772560119628906, "learning_rate": 0.002, "loss": 2.3447, "step": 113880 }, { "epoch": 0.44026688933215813, "grad_norm": 0.11134528368711472, "learning_rate": 0.002, "loss": 2.3572, "step": 113890 }, { "epoch": 0.44030554653554144, "grad_norm": 0.10744709521532059, "learning_rate": 0.002, "loss": 2.3462, "step": 113900 }, { "epoch": 0.4403442037389247, "grad_norm": 0.11233912408351898, "learning_rate": 0.002, "loss": 2.3646, "step": 113910 }, { "epoch": 0.440382860942308, "grad_norm": 0.10462363809347153, "learning_rate": 0.002, "loss": 2.3417, "step": 113920 }, { "epoch": 0.44042151814569125, "grad_norm": 0.1127980500459671, "learning_rate": 0.002, "loss": 2.3487, "step": 113930 }, { "epoch": 0.44046017534907456, "grad_norm": 0.09823184460401535, "learning_rate": 0.002, "loss": 2.334, "step": 113940 }, { "epoch": 0.4404988325524578, "grad_norm": 0.08919413387775421, "learning_rate": 0.002, "loss": 2.3429, "step": 113950 }, { "epoch": 0.4405374897558411, "grad_norm": 0.10333508253097534, "learning_rate": 0.002, "loss": 2.3342, "step": 113960 }, { "epoch": 0.44057614695922437, "grad_norm": 0.11753777414560318, "learning_rate": 0.002, "loss": 2.3501, "step": 113970 }, { "epoch": 0.4406148041626077, "grad_norm": 0.11250483244657516, "learning_rate": 0.002, "loss": 2.3469, "step": 113980 }, { "epoch": 0.44065346136599093, "grad_norm": 0.10561315715312958, "learning_rate": 0.002, "loss": 2.3482, "step": 113990 }, { "epoch": 0.44069211856937424, "grad_norm": 0.09429409354925156, "learning_rate": 0.002, "loss": 2.3404, "step": 114000 }, { "epoch": 0.4407307757727575, "grad_norm": 0.09992121905088425, "learning_rate": 0.002, "loss": 2.3557, "step": 114010 }, { "epoch": 0.4407694329761408, "grad_norm": 0.10621625185012817, "learning_rate": 0.002, "loss": 2.363, "step": 114020 }, { "epoch": 0.44080809017952405, "grad_norm": 0.11136475950479507, "learning_rate": 0.002, "loss": 2.3479, "step": 114030 }, { "epoch": 0.44084674738290736, "grad_norm": 0.10999602824449539, "learning_rate": 0.002, "loss": 2.3381, "step": 114040 }, { "epoch": 0.4408854045862906, "grad_norm": 0.12542486190795898, "learning_rate": 0.002, "loss": 2.3327, "step": 114050 }, { "epoch": 0.44092406178967386, "grad_norm": 0.10794009268283844, "learning_rate": 0.002, "loss": 2.3465, "step": 114060 }, { "epoch": 0.44096271899305717, "grad_norm": 0.11233028769493103, "learning_rate": 0.002, "loss": 2.3642, "step": 114070 }, { "epoch": 0.4410013761964404, "grad_norm": 0.10456335544586182, "learning_rate": 0.002, "loss": 2.3469, "step": 114080 }, { "epoch": 0.4410400333998237, "grad_norm": 0.10484910011291504, "learning_rate": 0.002, "loss": 2.3463, "step": 114090 }, { "epoch": 0.441078690603207, "grad_norm": 0.09089253842830658, "learning_rate": 0.002, "loss": 2.3549, "step": 114100 }, { "epoch": 0.4411173478065903, "grad_norm": 0.10500083863735199, "learning_rate": 0.002, "loss": 2.3565, "step": 114110 }, { "epoch": 0.44115600500997354, "grad_norm": 0.12656979262828827, "learning_rate": 0.002, "loss": 2.3539, "step": 114120 }, { "epoch": 0.44119466221335685, "grad_norm": 0.1000988706946373, "learning_rate": 0.002, "loss": 2.3393, "step": 114130 }, { "epoch": 0.4412333194167401, "grad_norm": 0.0932462066411972, "learning_rate": 0.002, "loss": 2.3504, "step": 114140 }, { "epoch": 0.4412719766201234, "grad_norm": 0.14996211230754852, "learning_rate": 0.002, "loss": 2.3496, "step": 114150 }, { "epoch": 0.44131063382350666, "grad_norm": 0.0966678187251091, "learning_rate": 0.002, "loss": 2.3482, "step": 114160 }, { "epoch": 0.44134929102688997, "grad_norm": 0.10398583114147186, "learning_rate": 0.002, "loss": 2.3585, "step": 114170 }, { "epoch": 0.4413879482302732, "grad_norm": 0.09673210978507996, "learning_rate": 0.002, "loss": 2.3444, "step": 114180 }, { "epoch": 0.4414266054336565, "grad_norm": 0.10650409013032913, "learning_rate": 0.002, "loss": 2.3534, "step": 114190 }, { "epoch": 0.4414652626370398, "grad_norm": 0.10166922956705093, "learning_rate": 0.002, "loss": 2.3603, "step": 114200 }, { "epoch": 0.4415039198404231, "grad_norm": 0.11047915369272232, "learning_rate": 0.002, "loss": 2.3513, "step": 114210 }, { "epoch": 0.44154257704380634, "grad_norm": 0.09343116730451584, "learning_rate": 0.002, "loss": 2.3282, "step": 114220 }, { "epoch": 0.44158123424718965, "grad_norm": 0.10396973788738251, "learning_rate": 0.002, "loss": 2.3511, "step": 114230 }, { "epoch": 0.4416198914505729, "grad_norm": 0.10365176200866699, "learning_rate": 0.002, "loss": 2.3455, "step": 114240 }, { "epoch": 0.44165854865395615, "grad_norm": 0.11292040348052979, "learning_rate": 0.002, "loss": 2.3703, "step": 114250 }, { "epoch": 0.44169720585733946, "grad_norm": 0.10333476215600967, "learning_rate": 0.002, "loss": 2.3524, "step": 114260 }, { "epoch": 0.4417358630607227, "grad_norm": 0.08777636289596558, "learning_rate": 0.002, "loss": 2.3417, "step": 114270 }, { "epoch": 0.441774520264106, "grad_norm": 0.12246387451887131, "learning_rate": 0.002, "loss": 2.3544, "step": 114280 }, { "epoch": 0.44181317746748927, "grad_norm": 0.10196879506111145, "learning_rate": 0.002, "loss": 2.3574, "step": 114290 }, { "epoch": 0.4418518346708726, "grad_norm": 0.1406039446592331, "learning_rate": 0.002, "loss": 2.348, "step": 114300 }, { "epoch": 0.44189049187425583, "grad_norm": 0.09151335805654526, "learning_rate": 0.002, "loss": 2.3497, "step": 114310 }, { "epoch": 0.44192914907763914, "grad_norm": 0.12211549282073975, "learning_rate": 0.002, "loss": 2.3398, "step": 114320 }, { "epoch": 0.4419678062810224, "grad_norm": 0.103364959359169, "learning_rate": 0.002, "loss": 2.3584, "step": 114330 }, { "epoch": 0.4420064634844057, "grad_norm": 0.095526784658432, "learning_rate": 0.002, "loss": 2.3352, "step": 114340 }, { "epoch": 0.44204512068778895, "grad_norm": 0.10776218771934509, "learning_rate": 0.002, "loss": 2.3543, "step": 114350 }, { "epoch": 0.44208377789117226, "grad_norm": 0.12116540968418121, "learning_rate": 0.002, "loss": 2.3519, "step": 114360 }, { "epoch": 0.4421224350945555, "grad_norm": 0.10187121480703354, "learning_rate": 0.002, "loss": 2.3501, "step": 114370 }, { "epoch": 0.4421610922979388, "grad_norm": 0.12751945853233337, "learning_rate": 0.002, "loss": 2.3477, "step": 114380 }, { "epoch": 0.44219974950132207, "grad_norm": 0.12061352282762527, "learning_rate": 0.002, "loss": 2.357, "step": 114390 }, { "epoch": 0.4422384067047054, "grad_norm": 0.09569384902715683, "learning_rate": 0.002, "loss": 2.3312, "step": 114400 }, { "epoch": 0.44227706390808863, "grad_norm": 0.10532968491315842, "learning_rate": 0.002, "loss": 2.3421, "step": 114410 }, { "epoch": 0.44231572111147194, "grad_norm": 0.1037789061665535, "learning_rate": 0.002, "loss": 2.3721, "step": 114420 }, { "epoch": 0.4423543783148552, "grad_norm": 0.09751158207654953, "learning_rate": 0.002, "loss": 2.3616, "step": 114430 }, { "epoch": 0.44239303551823844, "grad_norm": 0.11524856090545654, "learning_rate": 0.002, "loss": 2.3361, "step": 114440 }, { "epoch": 0.44243169272162175, "grad_norm": 0.10979925096035004, "learning_rate": 0.002, "loss": 2.3329, "step": 114450 }, { "epoch": 0.442470349925005, "grad_norm": 0.1250380575656891, "learning_rate": 0.002, "loss": 2.3446, "step": 114460 }, { "epoch": 0.4425090071283883, "grad_norm": 0.11559075117111206, "learning_rate": 0.002, "loss": 2.3404, "step": 114470 }, { "epoch": 0.44254766433177156, "grad_norm": 0.10545665770769119, "learning_rate": 0.002, "loss": 2.3554, "step": 114480 }, { "epoch": 0.44258632153515487, "grad_norm": 0.13213802874088287, "learning_rate": 0.002, "loss": 2.3595, "step": 114490 }, { "epoch": 0.4426249787385381, "grad_norm": 0.10205340385437012, "learning_rate": 0.002, "loss": 2.3537, "step": 114500 }, { "epoch": 0.44266363594192143, "grad_norm": 0.10072396695613861, "learning_rate": 0.002, "loss": 2.3409, "step": 114510 }, { "epoch": 0.4427022931453047, "grad_norm": 0.10048835724592209, "learning_rate": 0.002, "loss": 2.3412, "step": 114520 }, { "epoch": 0.442740950348688, "grad_norm": 0.10904062539339066, "learning_rate": 0.002, "loss": 2.3362, "step": 114530 }, { "epoch": 0.44277960755207124, "grad_norm": 0.09817338734865189, "learning_rate": 0.002, "loss": 2.3388, "step": 114540 }, { "epoch": 0.44281826475545455, "grad_norm": 0.12179743498563766, "learning_rate": 0.002, "loss": 2.3625, "step": 114550 }, { "epoch": 0.4428569219588378, "grad_norm": 0.11451654881238937, "learning_rate": 0.002, "loss": 2.3547, "step": 114560 }, { "epoch": 0.4428955791622211, "grad_norm": 0.11346857994794846, "learning_rate": 0.002, "loss": 2.3437, "step": 114570 }, { "epoch": 0.44293423636560436, "grad_norm": 0.13127179443836212, "learning_rate": 0.002, "loss": 2.3539, "step": 114580 }, { "epoch": 0.44297289356898767, "grad_norm": 0.10564865916967392, "learning_rate": 0.002, "loss": 2.3505, "step": 114590 }, { "epoch": 0.4430115507723709, "grad_norm": 0.09019293636083603, "learning_rate": 0.002, "loss": 2.3454, "step": 114600 }, { "epoch": 0.44305020797575423, "grad_norm": 0.12438137829303741, "learning_rate": 0.002, "loss": 2.3509, "step": 114610 }, { "epoch": 0.4430888651791375, "grad_norm": 0.12015697360038757, "learning_rate": 0.002, "loss": 2.357, "step": 114620 }, { "epoch": 0.44312752238252073, "grad_norm": 0.10316593199968338, "learning_rate": 0.002, "loss": 2.3535, "step": 114630 }, { "epoch": 0.44316617958590404, "grad_norm": 0.0989539623260498, "learning_rate": 0.002, "loss": 2.3449, "step": 114640 }, { "epoch": 0.4432048367892873, "grad_norm": 0.10684391111135483, "learning_rate": 0.002, "loss": 2.3419, "step": 114650 }, { "epoch": 0.4432434939926706, "grad_norm": 0.09858309477567673, "learning_rate": 0.002, "loss": 2.3573, "step": 114660 }, { "epoch": 0.44328215119605385, "grad_norm": 0.12590645253658295, "learning_rate": 0.002, "loss": 2.3556, "step": 114670 }, { "epoch": 0.44332080839943716, "grad_norm": 0.1031150296330452, "learning_rate": 0.002, "loss": 2.3508, "step": 114680 }, { "epoch": 0.4433594656028204, "grad_norm": 0.09509457647800446, "learning_rate": 0.002, "loss": 2.3414, "step": 114690 }, { "epoch": 0.4433981228062037, "grad_norm": 0.11310373246669769, "learning_rate": 0.002, "loss": 2.3483, "step": 114700 }, { "epoch": 0.443436780009587, "grad_norm": 0.10985482484102249, "learning_rate": 0.002, "loss": 2.3406, "step": 114710 }, { "epoch": 0.4434754372129703, "grad_norm": 0.11310135573148727, "learning_rate": 0.002, "loss": 2.3514, "step": 114720 }, { "epoch": 0.44351409441635353, "grad_norm": 0.10724176466464996, "learning_rate": 0.002, "loss": 2.347, "step": 114730 }, { "epoch": 0.44355275161973684, "grad_norm": 0.10607755929231644, "learning_rate": 0.002, "loss": 2.3451, "step": 114740 }, { "epoch": 0.4435914088231201, "grad_norm": 0.28465360403060913, "learning_rate": 0.002, "loss": 2.346, "step": 114750 }, { "epoch": 0.4436300660265034, "grad_norm": 0.1293918639421463, "learning_rate": 0.002, "loss": 2.3459, "step": 114760 }, { "epoch": 0.44366872322988665, "grad_norm": 0.1216643676161766, "learning_rate": 0.002, "loss": 2.348, "step": 114770 }, { "epoch": 0.44370738043326996, "grad_norm": 0.10900459438562393, "learning_rate": 0.002, "loss": 2.3563, "step": 114780 }, { "epoch": 0.4437460376366532, "grad_norm": 0.10473953932523727, "learning_rate": 0.002, "loss": 2.3514, "step": 114790 }, { "epoch": 0.4437846948400365, "grad_norm": 0.12707144021987915, "learning_rate": 0.002, "loss": 2.3306, "step": 114800 }, { "epoch": 0.44382335204341977, "grad_norm": 0.11088820546865463, "learning_rate": 0.002, "loss": 2.3432, "step": 114810 }, { "epoch": 0.443862009246803, "grad_norm": 0.10142777115106583, "learning_rate": 0.002, "loss": 2.3505, "step": 114820 }, { "epoch": 0.44390066645018633, "grad_norm": 0.10695470869541168, "learning_rate": 0.002, "loss": 2.3554, "step": 114830 }, { "epoch": 0.4439393236535696, "grad_norm": 0.09565724432468414, "learning_rate": 0.002, "loss": 2.3473, "step": 114840 }, { "epoch": 0.4439779808569529, "grad_norm": 0.10378613322973251, "learning_rate": 0.002, "loss": 2.3574, "step": 114850 }, { "epoch": 0.44401663806033614, "grad_norm": 0.1171041876077652, "learning_rate": 0.002, "loss": 2.3485, "step": 114860 }, { "epoch": 0.44405529526371945, "grad_norm": 0.11488772928714752, "learning_rate": 0.002, "loss": 2.3485, "step": 114870 }, { "epoch": 0.4440939524671027, "grad_norm": 0.11186092346906662, "learning_rate": 0.002, "loss": 2.3466, "step": 114880 }, { "epoch": 0.444132609670486, "grad_norm": 0.10943606495857239, "learning_rate": 0.002, "loss": 2.3709, "step": 114890 }, { "epoch": 0.44417126687386926, "grad_norm": 0.11419028788805008, "learning_rate": 0.002, "loss": 2.339, "step": 114900 }, { "epoch": 0.44420992407725257, "grad_norm": 0.09883607923984528, "learning_rate": 0.002, "loss": 2.35, "step": 114910 }, { "epoch": 0.4442485812806358, "grad_norm": 0.10349875688552856, "learning_rate": 0.002, "loss": 2.3494, "step": 114920 }, { "epoch": 0.44428723848401913, "grad_norm": 0.09061745554208755, "learning_rate": 0.002, "loss": 2.3505, "step": 114930 }, { "epoch": 0.4443258956874024, "grad_norm": 0.1337675154209137, "learning_rate": 0.002, "loss": 2.3498, "step": 114940 }, { "epoch": 0.4443645528907857, "grad_norm": 0.10015002638101578, "learning_rate": 0.002, "loss": 2.3433, "step": 114950 }, { "epoch": 0.44440321009416894, "grad_norm": 0.12010683864355087, "learning_rate": 0.002, "loss": 2.3521, "step": 114960 }, { "epoch": 0.44444186729755225, "grad_norm": 0.11135932058095932, "learning_rate": 0.002, "loss": 2.3416, "step": 114970 }, { "epoch": 0.4444805245009355, "grad_norm": 0.09960712492465973, "learning_rate": 0.002, "loss": 2.3386, "step": 114980 }, { "epoch": 0.44451918170431876, "grad_norm": 0.10387620329856873, "learning_rate": 0.002, "loss": 2.3601, "step": 114990 }, { "epoch": 0.44455783890770206, "grad_norm": 0.10625986009836197, "learning_rate": 0.002, "loss": 2.355, "step": 115000 }, { "epoch": 0.4445964961110853, "grad_norm": 0.11116498708724976, "learning_rate": 0.002, "loss": 2.3674, "step": 115010 }, { "epoch": 0.4446351533144686, "grad_norm": 0.12776482105255127, "learning_rate": 0.002, "loss": 2.3434, "step": 115020 }, { "epoch": 0.4446738105178519, "grad_norm": 0.10398684442043304, "learning_rate": 0.002, "loss": 2.3374, "step": 115030 }, { "epoch": 0.4447124677212352, "grad_norm": 0.1710490733385086, "learning_rate": 0.002, "loss": 2.3552, "step": 115040 }, { "epoch": 0.44475112492461844, "grad_norm": 0.10064823180437088, "learning_rate": 0.002, "loss": 2.364, "step": 115050 }, { "epoch": 0.44478978212800174, "grad_norm": 0.11258933693170547, "learning_rate": 0.002, "loss": 2.3504, "step": 115060 }, { "epoch": 0.444828439331385, "grad_norm": 0.10680379718542099, "learning_rate": 0.002, "loss": 2.3498, "step": 115070 }, { "epoch": 0.4448670965347683, "grad_norm": 0.11084065586328506, "learning_rate": 0.002, "loss": 2.3578, "step": 115080 }, { "epoch": 0.44490575373815155, "grad_norm": 0.09208144247531891, "learning_rate": 0.002, "loss": 2.3594, "step": 115090 }, { "epoch": 0.44494441094153486, "grad_norm": 0.09825203567743301, "learning_rate": 0.002, "loss": 2.3447, "step": 115100 }, { "epoch": 0.4449830681449181, "grad_norm": 0.09931337088346481, "learning_rate": 0.002, "loss": 2.3491, "step": 115110 }, { "epoch": 0.4450217253483014, "grad_norm": 0.11392378062009811, "learning_rate": 0.002, "loss": 2.3546, "step": 115120 }, { "epoch": 0.4450603825516847, "grad_norm": 0.09332132339477539, "learning_rate": 0.002, "loss": 2.3514, "step": 115130 }, { "epoch": 0.445099039755068, "grad_norm": 0.10715038329362869, "learning_rate": 0.002, "loss": 2.3586, "step": 115140 }, { "epoch": 0.44513769695845123, "grad_norm": 0.11407089233398438, "learning_rate": 0.002, "loss": 2.3462, "step": 115150 }, { "epoch": 0.44517635416183454, "grad_norm": 0.1000797227025032, "learning_rate": 0.002, "loss": 2.3409, "step": 115160 }, { "epoch": 0.4452150113652178, "grad_norm": 0.10885439068078995, "learning_rate": 0.002, "loss": 2.3638, "step": 115170 }, { "epoch": 0.44525366856860105, "grad_norm": 0.09690678864717484, "learning_rate": 0.002, "loss": 2.3354, "step": 115180 }, { "epoch": 0.44529232577198435, "grad_norm": 0.18793104588985443, "learning_rate": 0.002, "loss": 2.3381, "step": 115190 }, { "epoch": 0.4453309829753676, "grad_norm": 0.1313229501247406, "learning_rate": 0.002, "loss": 2.3421, "step": 115200 }, { "epoch": 0.4453696401787509, "grad_norm": 0.10109826177358627, "learning_rate": 0.002, "loss": 2.3463, "step": 115210 }, { "epoch": 0.44540829738213417, "grad_norm": 0.11365307122468948, "learning_rate": 0.002, "loss": 2.3599, "step": 115220 }, { "epoch": 0.4454469545855175, "grad_norm": 0.10179514437913895, "learning_rate": 0.002, "loss": 2.3361, "step": 115230 }, { "epoch": 0.4454856117889007, "grad_norm": 0.10260636359453201, "learning_rate": 0.002, "loss": 2.3481, "step": 115240 }, { "epoch": 0.44552426899228403, "grad_norm": 0.10231651365756989, "learning_rate": 0.002, "loss": 2.3431, "step": 115250 }, { "epoch": 0.4455629261956673, "grad_norm": 0.09702098369598389, "learning_rate": 0.002, "loss": 2.3304, "step": 115260 }, { "epoch": 0.4456015833990506, "grad_norm": 0.09935378283262253, "learning_rate": 0.002, "loss": 2.3493, "step": 115270 }, { "epoch": 0.44564024060243385, "grad_norm": 0.10840914398431778, "learning_rate": 0.002, "loss": 2.3482, "step": 115280 }, { "epoch": 0.44567889780581715, "grad_norm": 0.10769101232290268, "learning_rate": 0.002, "loss": 2.35, "step": 115290 }, { "epoch": 0.4457175550092004, "grad_norm": 0.10299813747406006, "learning_rate": 0.002, "loss": 2.3576, "step": 115300 }, { "epoch": 0.4457562122125837, "grad_norm": 0.10182159394025803, "learning_rate": 0.002, "loss": 2.3524, "step": 115310 }, { "epoch": 0.44579486941596697, "grad_norm": 0.10414126515388489, "learning_rate": 0.002, "loss": 2.3466, "step": 115320 }, { "epoch": 0.4458335266193503, "grad_norm": 0.10255824774503708, "learning_rate": 0.002, "loss": 2.3382, "step": 115330 }, { "epoch": 0.4458721838227335, "grad_norm": 0.09214485436677933, "learning_rate": 0.002, "loss": 2.3541, "step": 115340 }, { "epoch": 0.44591084102611683, "grad_norm": 0.11276800185441971, "learning_rate": 0.002, "loss": 2.3318, "step": 115350 }, { "epoch": 0.4459494982295001, "grad_norm": 0.10205808281898499, "learning_rate": 0.002, "loss": 2.3512, "step": 115360 }, { "epoch": 0.44598815543288334, "grad_norm": 0.15296092629432678, "learning_rate": 0.002, "loss": 2.3532, "step": 115370 }, { "epoch": 0.44602681263626665, "grad_norm": 0.10987062007188797, "learning_rate": 0.002, "loss": 2.3445, "step": 115380 }, { "epoch": 0.4460654698396499, "grad_norm": 0.11441478133201599, "learning_rate": 0.002, "loss": 2.3403, "step": 115390 }, { "epoch": 0.4461041270430332, "grad_norm": 0.107122503221035, "learning_rate": 0.002, "loss": 2.3351, "step": 115400 }, { "epoch": 0.44614278424641646, "grad_norm": 0.1339341700077057, "learning_rate": 0.002, "loss": 2.3536, "step": 115410 }, { "epoch": 0.44618144144979976, "grad_norm": 0.11719755083322525, "learning_rate": 0.002, "loss": 2.3639, "step": 115420 }, { "epoch": 0.446220098653183, "grad_norm": 0.1091657504439354, "learning_rate": 0.002, "loss": 2.3568, "step": 115430 }, { "epoch": 0.4462587558565663, "grad_norm": 0.10963524878025055, "learning_rate": 0.002, "loss": 2.3539, "step": 115440 }, { "epoch": 0.4462974130599496, "grad_norm": 0.11549586802721024, "learning_rate": 0.002, "loss": 2.3603, "step": 115450 }, { "epoch": 0.4463360702633329, "grad_norm": 0.12684787809848785, "learning_rate": 0.002, "loss": 2.3617, "step": 115460 }, { "epoch": 0.44637472746671614, "grad_norm": 0.11087344586849213, "learning_rate": 0.002, "loss": 2.3387, "step": 115470 }, { "epoch": 0.44641338467009944, "grad_norm": 0.10655102878808975, "learning_rate": 0.002, "loss": 2.346, "step": 115480 }, { "epoch": 0.4464520418734827, "grad_norm": 0.14614103734493256, "learning_rate": 0.002, "loss": 2.3555, "step": 115490 }, { "epoch": 0.446490699076866, "grad_norm": 0.09883083403110504, "learning_rate": 0.002, "loss": 2.3519, "step": 115500 }, { "epoch": 0.44652935628024926, "grad_norm": 0.1201692447066307, "learning_rate": 0.002, "loss": 2.3536, "step": 115510 }, { "epoch": 0.44656801348363256, "grad_norm": 0.10232232511043549, "learning_rate": 0.002, "loss": 2.3429, "step": 115520 }, { "epoch": 0.4466066706870158, "grad_norm": 0.1238783597946167, "learning_rate": 0.002, "loss": 2.3643, "step": 115530 }, { "epoch": 0.4466453278903991, "grad_norm": 0.10835893452167511, "learning_rate": 0.002, "loss": 2.3554, "step": 115540 }, { "epoch": 0.4466839850937824, "grad_norm": 0.10148458927869797, "learning_rate": 0.002, "loss": 2.3421, "step": 115550 }, { "epoch": 0.44672264229716563, "grad_norm": 0.10020631551742554, "learning_rate": 0.002, "loss": 2.3417, "step": 115560 }, { "epoch": 0.44676129950054894, "grad_norm": 0.10603410005569458, "learning_rate": 0.002, "loss": 2.3526, "step": 115570 }, { "epoch": 0.4467999567039322, "grad_norm": 0.10621050745248795, "learning_rate": 0.002, "loss": 2.3445, "step": 115580 }, { "epoch": 0.4468386139073155, "grad_norm": 0.10222116112709045, "learning_rate": 0.002, "loss": 2.3538, "step": 115590 }, { "epoch": 0.44687727111069875, "grad_norm": 0.09559807181358337, "learning_rate": 0.002, "loss": 2.3577, "step": 115600 }, { "epoch": 0.44691592831408206, "grad_norm": 0.09763092547655106, "learning_rate": 0.002, "loss": 2.3526, "step": 115610 }, { "epoch": 0.4469545855174653, "grad_norm": 0.1289861798286438, "learning_rate": 0.002, "loss": 2.3427, "step": 115620 }, { "epoch": 0.4469932427208486, "grad_norm": 0.10587324947118759, "learning_rate": 0.002, "loss": 2.3509, "step": 115630 }, { "epoch": 0.44703189992423187, "grad_norm": 0.09899197518825531, "learning_rate": 0.002, "loss": 2.3478, "step": 115640 }, { "epoch": 0.4470705571276152, "grad_norm": 0.09850729256868362, "learning_rate": 0.002, "loss": 2.3431, "step": 115650 }, { "epoch": 0.44710921433099843, "grad_norm": 0.12134348601102829, "learning_rate": 0.002, "loss": 2.3483, "step": 115660 }, { "epoch": 0.44714787153438174, "grad_norm": 0.11509352177381516, "learning_rate": 0.002, "loss": 2.3482, "step": 115670 }, { "epoch": 0.447186528737765, "grad_norm": 0.08912909030914307, "learning_rate": 0.002, "loss": 2.352, "step": 115680 }, { "epoch": 0.4472251859411483, "grad_norm": 0.11786410212516785, "learning_rate": 0.002, "loss": 2.3436, "step": 115690 }, { "epoch": 0.44726384314453155, "grad_norm": 0.09729806333780289, "learning_rate": 0.002, "loss": 2.3503, "step": 115700 }, { "epoch": 0.44730250034791486, "grad_norm": 0.09955301135778427, "learning_rate": 0.002, "loss": 2.3506, "step": 115710 }, { "epoch": 0.4473411575512981, "grad_norm": 0.1184239611029625, "learning_rate": 0.002, "loss": 2.3361, "step": 115720 }, { "epoch": 0.44737981475468136, "grad_norm": 0.1331377476453781, "learning_rate": 0.002, "loss": 2.3446, "step": 115730 }, { "epoch": 0.44741847195806467, "grad_norm": 0.11014413833618164, "learning_rate": 0.002, "loss": 2.3591, "step": 115740 }, { "epoch": 0.4474571291614479, "grad_norm": 0.10066406428813934, "learning_rate": 0.002, "loss": 2.3567, "step": 115750 }, { "epoch": 0.4474957863648312, "grad_norm": 0.09046106040477753, "learning_rate": 0.002, "loss": 2.3526, "step": 115760 }, { "epoch": 0.4475344435682145, "grad_norm": 0.12679710984230042, "learning_rate": 0.002, "loss": 2.3449, "step": 115770 }, { "epoch": 0.4475731007715978, "grad_norm": 0.10072343051433563, "learning_rate": 0.002, "loss": 2.3474, "step": 115780 }, { "epoch": 0.44761175797498104, "grad_norm": 0.10047661513090134, "learning_rate": 0.002, "loss": 2.3428, "step": 115790 }, { "epoch": 0.44765041517836435, "grad_norm": 0.09472613781690598, "learning_rate": 0.002, "loss": 2.349, "step": 115800 }, { "epoch": 0.4476890723817476, "grad_norm": 0.10094655305147171, "learning_rate": 0.002, "loss": 2.3524, "step": 115810 }, { "epoch": 0.4477277295851309, "grad_norm": 0.1129259541630745, "learning_rate": 0.002, "loss": 2.3462, "step": 115820 }, { "epoch": 0.44776638678851416, "grad_norm": 0.2693578898906708, "learning_rate": 0.002, "loss": 2.357, "step": 115830 }, { "epoch": 0.44780504399189747, "grad_norm": 0.11392874270677567, "learning_rate": 0.002, "loss": 2.3587, "step": 115840 }, { "epoch": 0.4478437011952807, "grad_norm": 0.13713346421718597, "learning_rate": 0.002, "loss": 2.3445, "step": 115850 }, { "epoch": 0.447882358398664, "grad_norm": 0.110591359436512, "learning_rate": 0.002, "loss": 2.356, "step": 115860 }, { "epoch": 0.4479210156020473, "grad_norm": 0.10545430332422256, "learning_rate": 0.002, "loss": 2.3679, "step": 115870 }, { "epoch": 0.4479596728054306, "grad_norm": 0.1109808161854744, "learning_rate": 0.002, "loss": 2.3597, "step": 115880 }, { "epoch": 0.44799833000881384, "grad_norm": 0.10120480507612228, "learning_rate": 0.002, "loss": 2.3602, "step": 115890 }, { "epoch": 0.44803698721219715, "grad_norm": 0.12015772610902786, "learning_rate": 0.002, "loss": 2.3505, "step": 115900 }, { "epoch": 0.4480756444155804, "grad_norm": 0.1165841743350029, "learning_rate": 0.002, "loss": 2.3575, "step": 115910 }, { "epoch": 0.44811430161896365, "grad_norm": 0.11997988820075989, "learning_rate": 0.002, "loss": 2.3347, "step": 115920 }, { "epoch": 0.44815295882234696, "grad_norm": 0.09375101327896118, "learning_rate": 0.002, "loss": 2.3514, "step": 115930 }, { "epoch": 0.4481916160257302, "grad_norm": 0.10501191020011902, "learning_rate": 0.002, "loss": 2.3593, "step": 115940 }, { "epoch": 0.4482302732291135, "grad_norm": 0.11917419731616974, "learning_rate": 0.002, "loss": 2.3378, "step": 115950 }, { "epoch": 0.44826893043249677, "grad_norm": 0.10625232756137848, "learning_rate": 0.002, "loss": 2.3461, "step": 115960 }, { "epoch": 0.4483075876358801, "grad_norm": 0.1068049892783165, "learning_rate": 0.002, "loss": 2.3435, "step": 115970 }, { "epoch": 0.44834624483926333, "grad_norm": 0.11341347545385361, "learning_rate": 0.002, "loss": 2.3522, "step": 115980 }, { "epoch": 0.44838490204264664, "grad_norm": 0.10867293179035187, "learning_rate": 0.002, "loss": 2.3332, "step": 115990 }, { "epoch": 0.4484235592460299, "grad_norm": 0.110105499625206, "learning_rate": 0.002, "loss": 2.3627, "step": 116000 }, { "epoch": 0.4484622164494132, "grad_norm": 0.11558972299098969, "learning_rate": 0.002, "loss": 2.348, "step": 116010 }, { "epoch": 0.44850087365279645, "grad_norm": 0.12024278193712234, "learning_rate": 0.002, "loss": 2.3357, "step": 116020 }, { "epoch": 0.44853953085617976, "grad_norm": 0.09318528324365616, "learning_rate": 0.002, "loss": 2.3423, "step": 116030 }, { "epoch": 0.448578188059563, "grad_norm": 0.11388210207223892, "learning_rate": 0.002, "loss": 2.3489, "step": 116040 }, { "epoch": 0.4486168452629463, "grad_norm": 0.10485441237688065, "learning_rate": 0.002, "loss": 2.3587, "step": 116050 }, { "epoch": 0.44865550246632957, "grad_norm": 0.10908669233322144, "learning_rate": 0.002, "loss": 2.3396, "step": 116060 }, { "epoch": 0.4486941596697129, "grad_norm": 0.11425649374723434, "learning_rate": 0.002, "loss": 2.3415, "step": 116070 }, { "epoch": 0.44873281687309613, "grad_norm": 0.09546269476413727, "learning_rate": 0.002, "loss": 2.3421, "step": 116080 }, { "epoch": 0.44877147407647944, "grad_norm": 0.10710975527763367, "learning_rate": 0.002, "loss": 2.3496, "step": 116090 }, { "epoch": 0.4488101312798627, "grad_norm": 0.12068114429712296, "learning_rate": 0.002, "loss": 2.3428, "step": 116100 }, { "epoch": 0.44884878848324594, "grad_norm": 0.10486262291669846, "learning_rate": 0.002, "loss": 2.3592, "step": 116110 }, { "epoch": 0.44888744568662925, "grad_norm": 0.10621446371078491, "learning_rate": 0.002, "loss": 2.3364, "step": 116120 }, { "epoch": 0.4489261028900125, "grad_norm": 0.09761782735586166, "learning_rate": 0.002, "loss": 2.3584, "step": 116130 }, { "epoch": 0.4489647600933958, "grad_norm": 0.10787412524223328, "learning_rate": 0.002, "loss": 2.3388, "step": 116140 }, { "epoch": 0.44900341729677906, "grad_norm": 0.11185034364461899, "learning_rate": 0.002, "loss": 2.359, "step": 116150 }, { "epoch": 0.44904207450016237, "grad_norm": 0.10436546057462692, "learning_rate": 0.002, "loss": 2.356, "step": 116160 }, { "epoch": 0.4490807317035456, "grad_norm": 0.1262926459312439, "learning_rate": 0.002, "loss": 2.3689, "step": 116170 }, { "epoch": 0.44911938890692893, "grad_norm": 0.09157133102416992, "learning_rate": 0.002, "loss": 2.3444, "step": 116180 }, { "epoch": 0.4491580461103122, "grad_norm": 0.1170177310705185, "learning_rate": 0.002, "loss": 2.3467, "step": 116190 }, { "epoch": 0.4491967033136955, "grad_norm": 0.10202539712190628, "learning_rate": 0.002, "loss": 2.3464, "step": 116200 }, { "epoch": 0.44923536051707874, "grad_norm": 0.11776548624038696, "learning_rate": 0.002, "loss": 2.343, "step": 116210 }, { "epoch": 0.44927401772046205, "grad_norm": 0.09582582116127014, "learning_rate": 0.002, "loss": 2.3562, "step": 116220 }, { "epoch": 0.4493126749238453, "grad_norm": 0.09644076973199844, "learning_rate": 0.002, "loss": 2.3462, "step": 116230 }, { "epoch": 0.4493513321272286, "grad_norm": 0.11195076256990433, "learning_rate": 0.002, "loss": 2.3457, "step": 116240 }, { "epoch": 0.44938998933061186, "grad_norm": 0.11746636778116226, "learning_rate": 0.002, "loss": 2.3443, "step": 116250 }, { "epoch": 0.44942864653399517, "grad_norm": 0.11495273560285568, "learning_rate": 0.002, "loss": 2.3496, "step": 116260 }, { "epoch": 0.4494673037373784, "grad_norm": 0.12250115722417831, "learning_rate": 0.002, "loss": 2.33, "step": 116270 }, { "epoch": 0.44950596094076173, "grad_norm": 0.10969327390193939, "learning_rate": 0.002, "loss": 2.3563, "step": 116280 }, { "epoch": 0.449544618144145, "grad_norm": 0.10717156529426575, "learning_rate": 0.002, "loss": 2.3603, "step": 116290 }, { "epoch": 0.44958327534752823, "grad_norm": 0.12968280911445618, "learning_rate": 0.002, "loss": 2.3493, "step": 116300 }, { "epoch": 0.44962193255091154, "grad_norm": 0.10125463455915451, "learning_rate": 0.002, "loss": 2.3449, "step": 116310 }, { "epoch": 0.4496605897542948, "grad_norm": 0.10570531338453293, "learning_rate": 0.002, "loss": 2.349, "step": 116320 }, { "epoch": 0.4496992469576781, "grad_norm": 0.10413254052400589, "learning_rate": 0.002, "loss": 2.3495, "step": 116330 }, { "epoch": 0.44973790416106135, "grad_norm": 0.11833761632442474, "learning_rate": 0.002, "loss": 2.3456, "step": 116340 }, { "epoch": 0.44977656136444466, "grad_norm": 0.09715671837329865, "learning_rate": 0.002, "loss": 2.3552, "step": 116350 }, { "epoch": 0.4498152185678279, "grad_norm": 0.09942399710416794, "learning_rate": 0.002, "loss": 2.3426, "step": 116360 }, { "epoch": 0.4498538757712112, "grad_norm": 0.1249130591750145, "learning_rate": 0.002, "loss": 2.3496, "step": 116370 }, { "epoch": 0.4498925329745945, "grad_norm": 0.11511149257421494, "learning_rate": 0.002, "loss": 2.3466, "step": 116380 }, { "epoch": 0.4499311901779778, "grad_norm": 0.117387555539608, "learning_rate": 0.002, "loss": 2.3386, "step": 116390 }, { "epoch": 0.44996984738136103, "grad_norm": 0.11409103870391846, "learning_rate": 0.002, "loss": 2.3438, "step": 116400 }, { "epoch": 0.45000850458474434, "grad_norm": 0.09024668484926224, "learning_rate": 0.002, "loss": 2.3389, "step": 116410 }, { "epoch": 0.4500471617881276, "grad_norm": 0.12117420881986618, "learning_rate": 0.002, "loss": 2.3368, "step": 116420 }, { "epoch": 0.4500858189915109, "grad_norm": 0.12811824679374695, "learning_rate": 0.002, "loss": 2.3953, "step": 116430 }, { "epoch": 0.45012447619489415, "grad_norm": 0.09323813021183014, "learning_rate": 0.002, "loss": 2.3483, "step": 116440 }, { "epoch": 0.45016313339827746, "grad_norm": 0.10673702508211136, "learning_rate": 0.002, "loss": 2.3493, "step": 116450 }, { "epoch": 0.4502017906016607, "grad_norm": 0.1002158671617508, "learning_rate": 0.002, "loss": 2.3544, "step": 116460 }, { "epoch": 0.450240447805044, "grad_norm": 0.08856503665447235, "learning_rate": 0.002, "loss": 2.3517, "step": 116470 }, { "epoch": 0.45027910500842727, "grad_norm": 0.14465701580047607, "learning_rate": 0.002, "loss": 2.3474, "step": 116480 }, { "epoch": 0.4503177622118105, "grad_norm": 0.11650863289833069, "learning_rate": 0.002, "loss": 2.3515, "step": 116490 }, { "epoch": 0.45035641941519383, "grad_norm": 0.10695286840200424, "learning_rate": 0.002, "loss": 2.3462, "step": 116500 }, { "epoch": 0.4503950766185771, "grad_norm": 0.11719808727502823, "learning_rate": 0.002, "loss": 2.3455, "step": 116510 }, { "epoch": 0.4504337338219604, "grad_norm": 0.11228390038013458, "learning_rate": 0.002, "loss": 2.3469, "step": 116520 }, { "epoch": 0.45047239102534364, "grad_norm": 0.10490066558122635, "learning_rate": 0.002, "loss": 2.3452, "step": 116530 }, { "epoch": 0.45051104822872695, "grad_norm": 0.10596594959497452, "learning_rate": 0.002, "loss": 2.336, "step": 116540 }, { "epoch": 0.4505497054321102, "grad_norm": 0.10827124118804932, "learning_rate": 0.002, "loss": 2.3508, "step": 116550 }, { "epoch": 0.4505883626354935, "grad_norm": 0.09110372513532639, "learning_rate": 0.002, "loss": 2.3497, "step": 116560 }, { "epoch": 0.45062701983887676, "grad_norm": 0.11297664791345596, "learning_rate": 0.002, "loss": 2.3647, "step": 116570 }, { "epoch": 0.45066567704226007, "grad_norm": 0.10109726339578629, "learning_rate": 0.002, "loss": 2.352, "step": 116580 }, { "epoch": 0.4507043342456433, "grad_norm": 0.12487292289733887, "learning_rate": 0.002, "loss": 2.3543, "step": 116590 }, { "epoch": 0.45074299144902663, "grad_norm": 0.11954599618911743, "learning_rate": 0.002, "loss": 2.3537, "step": 116600 }, { "epoch": 0.4507816486524099, "grad_norm": 0.11460407823324203, "learning_rate": 0.002, "loss": 2.3357, "step": 116610 }, { "epoch": 0.4508203058557932, "grad_norm": 0.1472836583852768, "learning_rate": 0.002, "loss": 2.3581, "step": 116620 }, { "epoch": 0.45085896305917644, "grad_norm": 0.11723505705595016, "learning_rate": 0.002, "loss": 2.3472, "step": 116630 }, { "epoch": 0.45089762026255975, "grad_norm": 0.11846832185983658, "learning_rate": 0.002, "loss": 2.3527, "step": 116640 }, { "epoch": 0.450936277465943, "grad_norm": 0.10739443451166153, "learning_rate": 0.002, "loss": 2.3441, "step": 116650 }, { "epoch": 0.45097493466932626, "grad_norm": 0.0924198254942894, "learning_rate": 0.002, "loss": 2.3431, "step": 116660 }, { "epoch": 0.45101359187270956, "grad_norm": 0.09949216991662979, "learning_rate": 0.002, "loss": 2.3506, "step": 116670 }, { "epoch": 0.4510522490760928, "grad_norm": 0.10973398387432098, "learning_rate": 0.002, "loss": 2.3496, "step": 116680 }, { "epoch": 0.4510909062794761, "grad_norm": 0.10357681661844254, "learning_rate": 0.002, "loss": 2.3357, "step": 116690 }, { "epoch": 0.4511295634828594, "grad_norm": 0.11836923658847809, "learning_rate": 0.002, "loss": 2.3656, "step": 116700 }, { "epoch": 0.4511682206862427, "grad_norm": 0.09963072091341019, "learning_rate": 0.002, "loss": 2.353, "step": 116710 }, { "epoch": 0.45120687788962593, "grad_norm": 0.10210157185792923, "learning_rate": 0.002, "loss": 2.3493, "step": 116720 }, { "epoch": 0.45124553509300924, "grad_norm": 0.12839585542678833, "learning_rate": 0.002, "loss": 2.3523, "step": 116730 }, { "epoch": 0.4512841922963925, "grad_norm": 0.10952276736497879, "learning_rate": 0.002, "loss": 2.354, "step": 116740 }, { "epoch": 0.4513228494997758, "grad_norm": 0.10620319843292236, "learning_rate": 0.002, "loss": 2.3471, "step": 116750 }, { "epoch": 0.45136150670315905, "grad_norm": 0.13847695291042328, "learning_rate": 0.002, "loss": 2.3441, "step": 116760 }, { "epoch": 0.45140016390654236, "grad_norm": 0.1073933020234108, "learning_rate": 0.002, "loss": 2.3433, "step": 116770 }, { "epoch": 0.4514388211099256, "grad_norm": 0.11338157206773758, "learning_rate": 0.002, "loss": 2.3564, "step": 116780 }, { "epoch": 0.4514774783133089, "grad_norm": 0.10555189102888107, "learning_rate": 0.002, "loss": 2.3434, "step": 116790 }, { "epoch": 0.4515161355166922, "grad_norm": 0.0956127718091011, "learning_rate": 0.002, "loss": 2.3508, "step": 116800 }, { "epoch": 0.4515547927200755, "grad_norm": 0.10321953892707825, "learning_rate": 0.002, "loss": 2.3462, "step": 116810 }, { "epoch": 0.45159344992345873, "grad_norm": 0.12122472375631332, "learning_rate": 0.002, "loss": 2.3335, "step": 116820 }, { "epoch": 0.45163210712684204, "grad_norm": 0.12436135113239288, "learning_rate": 0.002, "loss": 2.3637, "step": 116830 }, { "epoch": 0.4516707643302253, "grad_norm": 0.1039997860789299, "learning_rate": 0.002, "loss": 2.3518, "step": 116840 }, { "epoch": 0.45170942153360855, "grad_norm": 0.11182855814695358, "learning_rate": 0.002, "loss": 2.3453, "step": 116850 }, { "epoch": 0.45174807873699185, "grad_norm": 0.11596731841564178, "learning_rate": 0.002, "loss": 2.3545, "step": 116860 }, { "epoch": 0.4517867359403751, "grad_norm": 0.10430099815130234, "learning_rate": 0.002, "loss": 2.3475, "step": 116870 }, { "epoch": 0.4518253931437584, "grad_norm": 0.11345577985048294, "learning_rate": 0.002, "loss": 2.3508, "step": 116880 }, { "epoch": 0.45186405034714167, "grad_norm": 0.10795613378286362, "learning_rate": 0.002, "loss": 2.3487, "step": 116890 }, { "epoch": 0.451902707550525, "grad_norm": 0.13586848974227905, "learning_rate": 0.002, "loss": 2.3502, "step": 116900 }, { "epoch": 0.4519413647539082, "grad_norm": 0.1062573790550232, "learning_rate": 0.002, "loss": 2.3357, "step": 116910 }, { "epoch": 0.45198002195729153, "grad_norm": 0.11689643561840057, "learning_rate": 0.002, "loss": 2.336, "step": 116920 }, { "epoch": 0.4520186791606748, "grad_norm": 0.11215382069349289, "learning_rate": 0.002, "loss": 2.3354, "step": 116930 }, { "epoch": 0.4520573363640581, "grad_norm": 0.10423940420150757, "learning_rate": 0.002, "loss": 2.3413, "step": 116940 }, { "epoch": 0.45209599356744135, "grad_norm": 0.11624042689800262, "learning_rate": 0.002, "loss": 2.3468, "step": 116950 }, { "epoch": 0.45213465077082465, "grad_norm": 0.10433658957481384, "learning_rate": 0.002, "loss": 2.3414, "step": 116960 }, { "epoch": 0.4521733079742079, "grad_norm": 0.10768541693687439, "learning_rate": 0.002, "loss": 2.3377, "step": 116970 }, { "epoch": 0.4522119651775912, "grad_norm": 0.10143450647592545, "learning_rate": 0.002, "loss": 2.3554, "step": 116980 }, { "epoch": 0.45225062238097447, "grad_norm": 0.1296466588973999, "learning_rate": 0.002, "loss": 2.3581, "step": 116990 }, { "epoch": 0.4522892795843578, "grad_norm": 0.13517975807189941, "learning_rate": 0.002, "loss": 2.3392, "step": 117000 }, { "epoch": 0.452327936787741, "grad_norm": 0.10195448994636536, "learning_rate": 0.002, "loss": 2.3331, "step": 117010 }, { "epoch": 0.45236659399112433, "grad_norm": 0.1134767085313797, "learning_rate": 0.002, "loss": 2.3358, "step": 117020 }, { "epoch": 0.4524052511945076, "grad_norm": 0.11139202117919922, "learning_rate": 0.002, "loss": 2.3391, "step": 117030 }, { "epoch": 0.45244390839789084, "grad_norm": 0.11013373732566833, "learning_rate": 0.002, "loss": 2.3339, "step": 117040 }, { "epoch": 0.45248256560127414, "grad_norm": 0.11789267510175705, "learning_rate": 0.002, "loss": 2.3499, "step": 117050 }, { "epoch": 0.4525212228046574, "grad_norm": 0.10006678104400635, "learning_rate": 0.002, "loss": 2.356, "step": 117060 }, { "epoch": 0.4525598800080407, "grad_norm": 0.10965073108673096, "learning_rate": 0.002, "loss": 2.3575, "step": 117070 }, { "epoch": 0.45259853721142396, "grad_norm": 0.10759425163269043, "learning_rate": 0.002, "loss": 2.3499, "step": 117080 }, { "epoch": 0.45263719441480726, "grad_norm": 0.1084289476275444, "learning_rate": 0.002, "loss": 2.3455, "step": 117090 }, { "epoch": 0.4526758516181905, "grad_norm": 0.10556847602128983, "learning_rate": 0.002, "loss": 2.346, "step": 117100 }, { "epoch": 0.4527145088215738, "grad_norm": 0.11164150387048721, "learning_rate": 0.002, "loss": 2.3518, "step": 117110 }, { "epoch": 0.4527531660249571, "grad_norm": 0.12561391294002533, "learning_rate": 0.002, "loss": 2.3447, "step": 117120 }, { "epoch": 0.4527918232283404, "grad_norm": 0.09579742699861526, "learning_rate": 0.002, "loss": 2.3489, "step": 117130 }, { "epoch": 0.45283048043172364, "grad_norm": 0.11333385109901428, "learning_rate": 0.002, "loss": 2.3412, "step": 117140 }, { "epoch": 0.45286913763510694, "grad_norm": 0.1069633960723877, "learning_rate": 0.002, "loss": 2.355, "step": 117150 }, { "epoch": 0.4529077948384902, "grad_norm": 0.10790170729160309, "learning_rate": 0.002, "loss": 2.3562, "step": 117160 }, { "epoch": 0.4529464520418735, "grad_norm": 0.11147533357143402, "learning_rate": 0.002, "loss": 2.3553, "step": 117170 }, { "epoch": 0.45298510924525676, "grad_norm": 0.1039792075753212, "learning_rate": 0.002, "loss": 2.3464, "step": 117180 }, { "epoch": 0.45302376644864006, "grad_norm": 0.10051432996988297, "learning_rate": 0.002, "loss": 2.3324, "step": 117190 }, { "epoch": 0.4530624236520233, "grad_norm": 0.09797567129135132, "learning_rate": 0.002, "loss": 2.3394, "step": 117200 }, { "epoch": 0.4531010808554066, "grad_norm": 0.10028452426195145, "learning_rate": 0.002, "loss": 2.3347, "step": 117210 }, { "epoch": 0.4531397380587899, "grad_norm": 0.09154338389635086, "learning_rate": 0.002, "loss": 2.35, "step": 117220 }, { "epoch": 0.45317839526217313, "grad_norm": 0.1039407029747963, "learning_rate": 0.002, "loss": 2.3453, "step": 117230 }, { "epoch": 0.45321705246555644, "grad_norm": 0.10166822373867035, "learning_rate": 0.002, "loss": 2.3501, "step": 117240 }, { "epoch": 0.4532557096689397, "grad_norm": 0.12528187036514282, "learning_rate": 0.002, "loss": 2.346, "step": 117250 }, { "epoch": 0.453294366872323, "grad_norm": 0.10523614287376404, "learning_rate": 0.002, "loss": 2.3418, "step": 117260 }, { "epoch": 0.45333302407570625, "grad_norm": 0.09844973683357239, "learning_rate": 0.002, "loss": 2.3483, "step": 117270 }, { "epoch": 0.45337168127908956, "grad_norm": 0.11845612525939941, "learning_rate": 0.002, "loss": 2.3498, "step": 117280 }, { "epoch": 0.4534103384824728, "grad_norm": 0.11746399104595184, "learning_rate": 0.002, "loss": 2.3406, "step": 117290 }, { "epoch": 0.4534489956858561, "grad_norm": 0.11793000251054764, "learning_rate": 0.002, "loss": 2.3497, "step": 117300 }, { "epoch": 0.45348765288923937, "grad_norm": 0.11313361674547195, "learning_rate": 0.002, "loss": 2.3683, "step": 117310 }, { "epoch": 0.4535263100926227, "grad_norm": 0.1121147871017456, "learning_rate": 0.002, "loss": 2.3525, "step": 117320 }, { "epoch": 0.4535649672960059, "grad_norm": 0.1194700226187706, "learning_rate": 0.002, "loss": 2.3456, "step": 117330 }, { "epoch": 0.45360362449938924, "grad_norm": 0.11465243995189667, "learning_rate": 0.002, "loss": 2.3427, "step": 117340 }, { "epoch": 0.4536422817027725, "grad_norm": 0.11006400734186172, "learning_rate": 0.002, "loss": 2.3514, "step": 117350 }, { "epoch": 0.4536809389061558, "grad_norm": 0.12547846138477325, "learning_rate": 0.002, "loss": 2.3484, "step": 117360 }, { "epoch": 0.45371959610953905, "grad_norm": 0.12957924604415894, "learning_rate": 0.002, "loss": 2.3654, "step": 117370 }, { "epoch": 0.45375825331292235, "grad_norm": 0.09238374978303909, "learning_rate": 0.002, "loss": 2.3346, "step": 117380 }, { "epoch": 0.4537969105163056, "grad_norm": 0.091926708817482, "learning_rate": 0.002, "loss": 2.3443, "step": 117390 }, { "epoch": 0.45383556771968886, "grad_norm": 0.10020801424980164, "learning_rate": 0.002, "loss": 2.3433, "step": 117400 }, { "epoch": 0.45387422492307217, "grad_norm": 0.10550692677497864, "learning_rate": 0.002, "loss": 2.3365, "step": 117410 }, { "epoch": 0.4539128821264554, "grad_norm": 0.10293002426624298, "learning_rate": 0.002, "loss": 2.3569, "step": 117420 }, { "epoch": 0.4539515393298387, "grad_norm": 0.11234964430332184, "learning_rate": 0.002, "loss": 2.3503, "step": 117430 }, { "epoch": 0.453990196533222, "grad_norm": 0.10801722854375839, "learning_rate": 0.002, "loss": 2.3435, "step": 117440 }, { "epoch": 0.4540288537366053, "grad_norm": 0.10281901806592941, "learning_rate": 0.002, "loss": 2.3425, "step": 117450 }, { "epoch": 0.45406751093998854, "grad_norm": 0.11679302155971527, "learning_rate": 0.002, "loss": 2.3536, "step": 117460 }, { "epoch": 0.45410616814337185, "grad_norm": 0.10115484893321991, "learning_rate": 0.002, "loss": 2.3589, "step": 117470 }, { "epoch": 0.4541448253467551, "grad_norm": 0.09839174151420593, "learning_rate": 0.002, "loss": 2.3422, "step": 117480 }, { "epoch": 0.4541834825501384, "grad_norm": 0.10131075978279114, "learning_rate": 0.002, "loss": 2.3501, "step": 117490 }, { "epoch": 0.45422213975352166, "grad_norm": 0.10719174891710281, "learning_rate": 0.002, "loss": 2.3544, "step": 117500 }, { "epoch": 0.45426079695690497, "grad_norm": 0.11066577583551407, "learning_rate": 0.002, "loss": 2.3384, "step": 117510 }, { "epoch": 0.4542994541602882, "grad_norm": 0.11224183440208435, "learning_rate": 0.002, "loss": 2.3402, "step": 117520 }, { "epoch": 0.4543381113636715, "grad_norm": 0.09913508594036102, "learning_rate": 0.002, "loss": 2.3399, "step": 117530 }, { "epoch": 0.4543767685670548, "grad_norm": 0.12280930578708649, "learning_rate": 0.002, "loss": 2.3289, "step": 117540 }, { "epoch": 0.4544154257704381, "grad_norm": 0.11988666653633118, "learning_rate": 0.002, "loss": 2.3304, "step": 117550 }, { "epoch": 0.45445408297382134, "grad_norm": 0.09960682690143585, "learning_rate": 0.002, "loss": 2.348, "step": 117560 }, { "epoch": 0.45449274017720465, "grad_norm": 0.10411369055509567, "learning_rate": 0.002, "loss": 2.3347, "step": 117570 }, { "epoch": 0.4545313973805879, "grad_norm": 0.10298977047204971, "learning_rate": 0.002, "loss": 2.3509, "step": 117580 }, { "epoch": 0.45457005458397115, "grad_norm": 0.10172754526138306, "learning_rate": 0.002, "loss": 2.3508, "step": 117590 }, { "epoch": 0.45460871178735446, "grad_norm": 0.11252429336309433, "learning_rate": 0.002, "loss": 2.3283, "step": 117600 }, { "epoch": 0.4546473689907377, "grad_norm": 0.10582167655229568, "learning_rate": 0.002, "loss": 2.339, "step": 117610 }, { "epoch": 0.454686026194121, "grad_norm": 0.11229343712329865, "learning_rate": 0.002, "loss": 2.3357, "step": 117620 }, { "epoch": 0.45472468339750427, "grad_norm": 0.10725530236959457, "learning_rate": 0.002, "loss": 2.3474, "step": 117630 }, { "epoch": 0.4547633406008876, "grad_norm": 0.11920682340860367, "learning_rate": 0.002, "loss": 2.3413, "step": 117640 }, { "epoch": 0.45480199780427083, "grad_norm": 0.11216481029987335, "learning_rate": 0.002, "loss": 2.3445, "step": 117650 }, { "epoch": 0.45484065500765414, "grad_norm": 0.10069788247346878, "learning_rate": 0.002, "loss": 2.3267, "step": 117660 }, { "epoch": 0.4548793122110374, "grad_norm": 0.1241251602768898, "learning_rate": 0.002, "loss": 2.3407, "step": 117670 }, { "epoch": 0.4549179694144207, "grad_norm": 0.11363086104393005, "learning_rate": 0.002, "loss": 2.3462, "step": 117680 }, { "epoch": 0.45495662661780395, "grad_norm": 0.1133902296423912, "learning_rate": 0.002, "loss": 2.336, "step": 117690 }, { "epoch": 0.45499528382118726, "grad_norm": 0.10708627104759216, "learning_rate": 0.002, "loss": 2.3537, "step": 117700 }, { "epoch": 0.4550339410245705, "grad_norm": 0.10656888037919998, "learning_rate": 0.002, "loss": 2.3328, "step": 117710 }, { "epoch": 0.4550725982279538, "grad_norm": 0.09741010516881943, "learning_rate": 0.002, "loss": 2.3494, "step": 117720 }, { "epoch": 0.45511125543133707, "grad_norm": 0.10540743917226791, "learning_rate": 0.002, "loss": 2.3548, "step": 117730 }, { "epoch": 0.4551499126347204, "grad_norm": 0.10656697303056717, "learning_rate": 0.002, "loss": 2.3426, "step": 117740 }, { "epoch": 0.45518856983810363, "grad_norm": 0.10528460144996643, "learning_rate": 0.002, "loss": 2.3499, "step": 117750 }, { "epoch": 0.45522722704148694, "grad_norm": 0.1179598867893219, "learning_rate": 0.002, "loss": 2.3401, "step": 117760 }, { "epoch": 0.4552658842448702, "grad_norm": 0.12058539688587189, "learning_rate": 0.002, "loss": 2.3495, "step": 117770 }, { "epoch": 0.45530454144825344, "grad_norm": 0.12516923248767853, "learning_rate": 0.002, "loss": 2.3463, "step": 117780 }, { "epoch": 0.45534319865163675, "grad_norm": 0.1060568243265152, "learning_rate": 0.002, "loss": 2.3567, "step": 117790 }, { "epoch": 0.45538185585502, "grad_norm": 0.11407436430454254, "learning_rate": 0.002, "loss": 2.3466, "step": 117800 }, { "epoch": 0.4554205130584033, "grad_norm": 0.10866278409957886, "learning_rate": 0.002, "loss": 2.3496, "step": 117810 }, { "epoch": 0.45545917026178656, "grad_norm": 0.10732495784759521, "learning_rate": 0.002, "loss": 2.3515, "step": 117820 }, { "epoch": 0.45549782746516987, "grad_norm": 0.11802234500646591, "learning_rate": 0.002, "loss": 2.3488, "step": 117830 }, { "epoch": 0.4555364846685531, "grad_norm": 0.11777086555957794, "learning_rate": 0.002, "loss": 2.3478, "step": 117840 }, { "epoch": 0.45557514187193643, "grad_norm": 0.10621151328086853, "learning_rate": 0.002, "loss": 2.3478, "step": 117850 }, { "epoch": 0.4556137990753197, "grad_norm": 0.09682837873697281, "learning_rate": 0.002, "loss": 2.3502, "step": 117860 }, { "epoch": 0.455652456278703, "grad_norm": 0.11486103385686874, "learning_rate": 0.002, "loss": 2.3562, "step": 117870 }, { "epoch": 0.45569111348208624, "grad_norm": 0.10555505752563477, "learning_rate": 0.002, "loss": 2.3419, "step": 117880 }, { "epoch": 0.45572977068546955, "grad_norm": 0.12206471711397171, "learning_rate": 0.002, "loss": 2.347, "step": 117890 }, { "epoch": 0.4557684278888528, "grad_norm": 0.10294688493013382, "learning_rate": 0.002, "loss": 2.3567, "step": 117900 }, { "epoch": 0.4558070850922361, "grad_norm": 0.09726481139659882, "learning_rate": 0.002, "loss": 2.3555, "step": 117910 }, { "epoch": 0.45584574229561936, "grad_norm": 0.11977904289960861, "learning_rate": 0.002, "loss": 2.3287, "step": 117920 }, { "epoch": 0.45588439949900267, "grad_norm": 0.10810381174087524, "learning_rate": 0.002, "loss": 2.3294, "step": 117930 }, { "epoch": 0.4559230567023859, "grad_norm": 0.12523780763149261, "learning_rate": 0.002, "loss": 2.3468, "step": 117940 }, { "epoch": 0.45596171390576923, "grad_norm": 0.10021597892045975, "learning_rate": 0.002, "loss": 2.3551, "step": 117950 }, { "epoch": 0.4560003711091525, "grad_norm": 0.11003972589969635, "learning_rate": 0.002, "loss": 2.3508, "step": 117960 }, { "epoch": 0.45603902831253573, "grad_norm": 0.10835086554288864, "learning_rate": 0.002, "loss": 2.3401, "step": 117970 }, { "epoch": 0.45607768551591904, "grad_norm": 0.11682787537574768, "learning_rate": 0.002, "loss": 2.3494, "step": 117980 }, { "epoch": 0.4561163427193023, "grad_norm": 0.10805117338895798, "learning_rate": 0.002, "loss": 2.3444, "step": 117990 }, { "epoch": 0.4561549999226856, "grad_norm": 0.11354291439056396, "learning_rate": 0.002, "loss": 2.3304, "step": 118000 }, { "epoch": 0.45619365712606885, "grad_norm": 0.09457213431596756, "learning_rate": 0.002, "loss": 2.3532, "step": 118010 }, { "epoch": 0.45623231432945216, "grad_norm": 0.11127649247646332, "learning_rate": 0.002, "loss": 2.3478, "step": 118020 }, { "epoch": 0.4562709715328354, "grad_norm": 0.1210864931344986, "learning_rate": 0.002, "loss": 2.3504, "step": 118030 }, { "epoch": 0.4563096287362187, "grad_norm": 0.10766540467739105, "learning_rate": 0.002, "loss": 2.3491, "step": 118040 }, { "epoch": 0.45634828593960197, "grad_norm": 0.09710508584976196, "learning_rate": 0.002, "loss": 2.3497, "step": 118050 }, { "epoch": 0.4563869431429853, "grad_norm": 0.10217183083295822, "learning_rate": 0.002, "loss": 2.3426, "step": 118060 }, { "epoch": 0.45642560034636853, "grad_norm": 0.11859377473592758, "learning_rate": 0.002, "loss": 2.3398, "step": 118070 }, { "epoch": 0.45646425754975184, "grad_norm": 0.11485655605792999, "learning_rate": 0.002, "loss": 2.3591, "step": 118080 }, { "epoch": 0.4565029147531351, "grad_norm": 0.10804521292448044, "learning_rate": 0.002, "loss": 2.3404, "step": 118090 }, { "epoch": 0.4565415719565184, "grad_norm": 0.11177793145179749, "learning_rate": 0.002, "loss": 2.3465, "step": 118100 }, { "epoch": 0.45658022915990165, "grad_norm": 0.13285163044929504, "learning_rate": 0.002, "loss": 2.341, "step": 118110 }, { "epoch": 0.45661888636328496, "grad_norm": 0.101204052567482, "learning_rate": 0.002, "loss": 2.3452, "step": 118120 }, { "epoch": 0.4566575435666682, "grad_norm": 0.10703273862600327, "learning_rate": 0.002, "loss": 2.3533, "step": 118130 }, { "epoch": 0.4566962007700515, "grad_norm": 0.10772348195314407, "learning_rate": 0.002, "loss": 2.3519, "step": 118140 }, { "epoch": 0.45673485797343477, "grad_norm": 0.1055298000574112, "learning_rate": 0.002, "loss": 2.3385, "step": 118150 }, { "epoch": 0.456773515176818, "grad_norm": 0.10721943527460098, "learning_rate": 0.002, "loss": 2.3467, "step": 118160 }, { "epoch": 0.45681217238020133, "grad_norm": 0.12398935854434967, "learning_rate": 0.002, "loss": 2.3412, "step": 118170 }, { "epoch": 0.4568508295835846, "grad_norm": 0.10382281243801117, "learning_rate": 0.002, "loss": 2.3567, "step": 118180 }, { "epoch": 0.4568894867869679, "grad_norm": 0.09265174716711044, "learning_rate": 0.002, "loss": 2.3536, "step": 118190 }, { "epoch": 0.45692814399035114, "grad_norm": 0.09246476739645004, "learning_rate": 0.002, "loss": 2.3362, "step": 118200 }, { "epoch": 0.45696680119373445, "grad_norm": 0.10811164975166321, "learning_rate": 0.002, "loss": 2.3462, "step": 118210 }, { "epoch": 0.4570054583971177, "grad_norm": 0.11175240576267242, "learning_rate": 0.002, "loss": 2.3289, "step": 118220 }, { "epoch": 0.457044115600501, "grad_norm": 0.11679556965827942, "learning_rate": 0.002, "loss": 2.3645, "step": 118230 }, { "epoch": 0.45708277280388426, "grad_norm": 0.11951835453510284, "learning_rate": 0.002, "loss": 2.3431, "step": 118240 }, { "epoch": 0.45712143000726757, "grad_norm": 0.09550821036100388, "learning_rate": 0.002, "loss": 2.346, "step": 118250 }, { "epoch": 0.4571600872106508, "grad_norm": 0.12797249853610992, "learning_rate": 0.002, "loss": 2.3494, "step": 118260 }, { "epoch": 0.45719874441403413, "grad_norm": 0.1069146916270256, "learning_rate": 0.002, "loss": 2.3502, "step": 118270 }, { "epoch": 0.4572374016174174, "grad_norm": 0.11669927090406418, "learning_rate": 0.002, "loss": 2.3206, "step": 118280 }, { "epoch": 0.4572760588208007, "grad_norm": 0.09638587385416031, "learning_rate": 0.002, "loss": 2.3469, "step": 118290 }, { "epoch": 0.45731471602418394, "grad_norm": 0.10993216931819916, "learning_rate": 0.002, "loss": 2.3351, "step": 118300 }, { "epoch": 0.45735337322756725, "grad_norm": 0.10969716310501099, "learning_rate": 0.002, "loss": 2.3398, "step": 118310 }, { "epoch": 0.4573920304309505, "grad_norm": 0.10108502954244614, "learning_rate": 0.002, "loss": 2.3347, "step": 118320 }, { "epoch": 0.45743068763433375, "grad_norm": 0.11068734526634216, "learning_rate": 0.002, "loss": 2.3538, "step": 118330 }, { "epoch": 0.45746934483771706, "grad_norm": 0.09397805482149124, "learning_rate": 0.002, "loss": 2.3478, "step": 118340 }, { "epoch": 0.4575080020411003, "grad_norm": 0.10946174710988998, "learning_rate": 0.002, "loss": 2.3429, "step": 118350 }, { "epoch": 0.4575466592444836, "grad_norm": 0.11118168383836746, "learning_rate": 0.002, "loss": 2.3615, "step": 118360 }, { "epoch": 0.4575853164478669, "grad_norm": 0.11338678002357483, "learning_rate": 0.002, "loss": 2.3413, "step": 118370 }, { "epoch": 0.4576239736512502, "grad_norm": 0.10616643726825714, "learning_rate": 0.002, "loss": 2.3433, "step": 118380 }, { "epoch": 0.45766263085463343, "grad_norm": 0.0960552766919136, "learning_rate": 0.002, "loss": 2.3616, "step": 118390 }, { "epoch": 0.45770128805801674, "grad_norm": 0.10193514823913574, "learning_rate": 0.002, "loss": 2.3407, "step": 118400 }, { "epoch": 0.4577399452614, "grad_norm": 0.10242512077093124, "learning_rate": 0.002, "loss": 2.3477, "step": 118410 }, { "epoch": 0.4577786024647833, "grad_norm": 0.09158801287412643, "learning_rate": 0.002, "loss": 2.3538, "step": 118420 }, { "epoch": 0.45781725966816655, "grad_norm": 0.11092250794172287, "learning_rate": 0.002, "loss": 2.3571, "step": 118430 }, { "epoch": 0.45785591687154986, "grad_norm": 0.11364707350730896, "learning_rate": 0.002, "loss": 2.3492, "step": 118440 }, { "epoch": 0.4578945740749331, "grad_norm": 0.10631339997053146, "learning_rate": 0.002, "loss": 2.3242, "step": 118450 }, { "epoch": 0.4579332312783164, "grad_norm": 0.11272207647562027, "learning_rate": 0.002, "loss": 2.3383, "step": 118460 }, { "epoch": 0.4579718884816997, "grad_norm": 0.109578438103199, "learning_rate": 0.002, "loss": 2.3511, "step": 118470 }, { "epoch": 0.458010545685083, "grad_norm": 0.09565993398427963, "learning_rate": 0.002, "loss": 2.361, "step": 118480 }, { "epoch": 0.45804920288846623, "grad_norm": 0.1042008176445961, "learning_rate": 0.002, "loss": 2.3478, "step": 118490 }, { "epoch": 0.45808786009184954, "grad_norm": 0.09246491640806198, "learning_rate": 0.002, "loss": 2.3429, "step": 118500 }, { "epoch": 0.4581265172952328, "grad_norm": 0.0987660214304924, "learning_rate": 0.002, "loss": 2.3572, "step": 118510 }, { "epoch": 0.45816517449861605, "grad_norm": 0.1291125863790512, "learning_rate": 0.002, "loss": 2.3561, "step": 118520 }, { "epoch": 0.45820383170199935, "grad_norm": 0.11253218352794647, "learning_rate": 0.002, "loss": 2.3506, "step": 118530 }, { "epoch": 0.4582424889053826, "grad_norm": 0.10364680737257004, "learning_rate": 0.002, "loss": 2.3461, "step": 118540 }, { "epoch": 0.4582811461087659, "grad_norm": 0.09472087025642395, "learning_rate": 0.002, "loss": 2.3511, "step": 118550 }, { "epoch": 0.45831980331214917, "grad_norm": 0.11546590924263, "learning_rate": 0.002, "loss": 2.35, "step": 118560 }, { "epoch": 0.4583584605155325, "grad_norm": 0.11254972964525223, "learning_rate": 0.002, "loss": 2.3534, "step": 118570 }, { "epoch": 0.4583971177189157, "grad_norm": 0.11288855969905853, "learning_rate": 0.002, "loss": 2.3435, "step": 118580 }, { "epoch": 0.45843577492229903, "grad_norm": 0.10879432410001755, "learning_rate": 0.002, "loss": 2.3417, "step": 118590 }, { "epoch": 0.4584744321256823, "grad_norm": 0.10375107824802399, "learning_rate": 0.002, "loss": 2.3446, "step": 118600 }, { "epoch": 0.4585130893290656, "grad_norm": 0.10116180032491684, "learning_rate": 0.002, "loss": 2.342, "step": 118610 }, { "epoch": 0.45855174653244885, "grad_norm": 0.13571816682815552, "learning_rate": 0.002, "loss": 2.3482, "step": 118620 }, { "epoch": 0.45859040373583215, "grad_norm": 0.10424613207578659, "learning_rate": 0.002, "loss": 2.3584, "step": 118630 }, { "epoch": 0.4586290609392154, "grad_norm": 0.10607179999351501, "learning_rate": 0.002, "loss": 2.3517, "step": 118640 }, { "epoch": 0.4586677181425987, "grad_norm": 0.10820239782333374, "learning_rate": 0.002, "loss": 2.3531, "step": 118650 }, { "epoch": 0.45870637534598196, "grad_norm": 0.11633388698101044, "learning_rate": 0.002, "loss": 2.3609, "step": 118660 }, { "epoch": 0.4587450325493653, "grad_norm": 0.10916215181350708, "learning_rate": 0.002, "loss": 2.351, "step": 118670 }, { "epoch": 0.4587836897527485, "grad_norm": 0.11252785474061966, "learning_rate": 0.002, "loss": 2.3425, "step": 118680 }, { "epoch": 0.45882234695613183, "grad_norm": 0.11671953648328781, "learning_rate": 0.002, "loss": 2.3526, "step": 118690 }, { "epoch": 0.4588610041595151, "grad_norm": 0.09520015120506287, "learning_rate": 0.002, "loss": 2.352, "step": 118700 }, { "epoch": 0.45889966136289834, "grad_norm": 0.09781093150377274, "learning_rate": 0.002, "loss": 2.3465, "step": 118710 }, { "epoch": 0.45893831856628164, "grad_norm": 0.10954856872558594, "learning_rate": 0.002, "loss": 2.351, "step": 118720 }, { "epoch": 0.4589769757696649, "grad_norm": 0.11329742521047592, "learning_rate": 0.002, "loss": 2.3618, "step": 118730 }, { "epoch": 0.4590156329730482, "grad_norm": 0.13266512751579285, "learning_rate": 0.002, "loss": 2.3635, "step": 118740 }, { "epoch": 0.45905429017643146, "grad_norm": 0.10615793615579605, "learning_rate": 0.002, "loss": 2.3506, "step": 118750 }, { "epoch": 0.45909294737981476, "grad_norm": 0.09207666665315628, "learning_rate": 0.002, "loss": 2.3457, "step": 118760 }, { "epoch": 0.459131604583198, "grad_norm": 0.1197124645113945, "learning_rate": 0.002, "loss": 2.3473, "step": 118770 }, { "epoch": 0.4591702617865813, "grad_norm": 0.10759207606315613, "learning_rate": 0.002, "loss": 2.3648, "step": 118780 }, { "epoch": 0.4592089189899646, "grad_norm": 0.09776882082223892, "learning_rate": 0.002, "loss": 2.352, "step": 118790 }, { "epoch": 0.4592475761933479, "grad_norm": 0.10756875574588776, "learning_rate": 0.002, "loss": 2.3406, "step": 118800 }, { "epoch": 0.45928623339673114, "grad_norm": 0.11720873415470123, "learning_rate": 0.002, "loss": 2.3495, "step": 118810 }, { "epoch": 0.45932489060011444, "grad_norm": 0.23412977159023285, "learning_rate": 0.002, "loss": 2.3467, "step": 118820 }, { "epoch": 0.4593635478034977, "grad_norm": 0.11052930355072021, "learning_rate": 0.002, "loss": 2.3535, "step": 118830 }, { "epoch": 0.459402205006881, "grad_norm": 0.09955132752656937, "learning_rate": 0.002, "loss": 2.3543, "step": 118840 }, { "epoch": 0.45944086221026426, "grad_norm": 0.10409852862358093, "learning_rate": 0.002, "loss": 2.3294, "step": 118850 }, { "epoch": 0.45947951941364756, "grad_norm": 0.1087784692645073, "learning_rate": 0.002, "loss": 2.3561, "step": 118860 }, { "epoch": 0.4595181766170308, "grad_norm": 0.1173655092716217, "learning_rate": 0.002, "loss": 2.341, "step": 118870 }, { "epoch": 0.4595568338204141, "grad_norm": 0.09329547733068466, "learning_rate": 0.002, "loss": 2.3432, "step": 118880 }, { "epoch": 0.4595954910237974, "grad_norm": 0.12537939846515656, "learning_rate": 0.002, "loss": 2.3463, "step": 118890 }, { "epoch": 0.45963414822718063, "grad_norm": 0.09709435701370239, "learning_rate": 0.002, "loss": 2.3545, "step": 118900 }, { "epoch": 0.45967280543056394, "grad_norm": 0.10174285620450974, "learning_rate": 0.002, "loss": 2.349, "step": 118910 }, { "epoch": 0.4597114626339472, "grad_norm": 0.11365412175655365, "learning_rate": 0.002, "loss": 2.3466, "step": 118920 }, { "epoch": 0.4597501198373305, "grad_norm": 0.10826610773801804, "learning_rate": 0.002, "loss": 2.3463, "step": 118930 }, { "epoch": 0.45978877704071375, "grad_norm": 0.11835940182209015, "learning_rate": 0.002, "loss": 2.3339, "step": 118940 }, { "epoch": 0.45982743424409706, "grad_norm": 0.11163625121116638, "learning_rate": 0.002, "loss": 2.3435, "step": 118950 }, { "epoch": 0.4598660914474803, "grad_norm": 0.09987737238407135, "learning_rate": 0.002, "loss": 2.3463, "step": 118960 }, { "epoch": 0.4599047486508636, "grad_norm": 0.10360037535429001, "learning_rate": 0.002, "loss": 2.358, "step": 118970 }, { "epoch": 0.45994340585424687, "grad_norm": 0.10967511683702469, "learning_rate": 0.002, "loss": 2.3448, "step": 118980 }, { "epoch": 0.4599820630576302, "grad_norm": 0.11492753028869629, "learning_rate": 0.002, "loss": 2.3489, "step": 118990 }, { "epoch": 0.4600207202610134, "grad_norm": 0.10047702491283417, "learning_rate": 0.002, "loss": 2.3319, "step": 119000 }, { "epoch": 0.46005937746439673, "grad_norm": 0.10438437014818192, "learning_rate": 0.002, "loss": 2.3651, "step": 119010 }, { "epoch": 0.46009803466778, "grad_norm": 0.11283348500728607, "learning_rate": 0.002, "loss": 2.3478, "step": 119020 }, { "epoch": 0.4601366918711633, "grad_norm": 0.11161735653877258, "learning_rate": 0.002, "loss": 2.3545, "step": 119030 }, { "epoch": 0.46017534907454655, "grad_norm": 0.11847127974033356, "learning_rate": 0.002, "loss": 2.3403, "step": 119040 }, { "epoch": 0.46021400627792985, "grad_norm": 0.11654791980981827, "learning_rate": 0.002, "loss": 2.3484, "step": 119050 }, { "epoch": 0.4602526634813131, "grad_norm": 0.1058049201965332, "learning_rate": 0.002, "loss": 2.3404, "step": 119060 }, { "epoch": 0.46029132068469636, "grad_norm": 0.12000872939825058, "learning_rate": 0.002, "loss": 2.3519, "step": 119070 }, { "epoch": 0.46032997788807967, "grad_norm": 0.11992703378200531, "learning_rate": 0.002, "loss": 2.3214, "step": 119080 }, { "epoch": 0.4603686350914629, "grad_norm": 0.12298092991113663, "learning_rate": 0.002, "loss": 2.3496, "step": 119090 }, { "epoch": 0.4604072922948462, "grad_norm": 0.0997709259390831, "learning_rate": 0.002, "loss": 2.3529, "step": 119100 }, { "epoch": 0.4604459494982295, "grad_norm": 0.11276139318943024, "learning_rate": 0.002, "loss": 2.3401, "step": 119110 }, { "epoch": 0.4604846067016128, "grad_norm": 0.09762197732925415, "learning_rate": 0.002, "loss": 2.3397, "step": 119120 }, { "epoch": 0.46052326390499604, "grad_norm": 0.1194218322634697, "learning_rate": 0.002, "loss": 2.3589, "step": 119130 }, { "epoch": 0.46056192110837935, "grad_norm": 0.10560303181409836, "learning_rate": 0.002, "loss": 2.3472, "step": 119140 }, { "epoch": 0.4606005783117626, "grad_norm": 0.10876715183258057, "learning_rate": 0.002, "loss": 2.3369, "step": 119150 }, { "epoch": 0.4606392355151459, "grad_norm": 0.11960139125585556, "learning_rate": 0.002, "loss": 2.3552, "step": 119160 }, { "epoch": 0.46067789271852916, "grad_norm": 0.11625376343727112, "learning_rate": 0.002, "loss": 2.3406, "step": 119170 }, { "epoch": 0.46071654992191247, "grad_norm": 0.11519750207662582, "learning_rate": 0.002, "loss": 2.3422, "step": 119180 }, { "epoch": 0.4607552071252957, "grad_norm": 0.12271353602409363, "learning_rate": 0.002, "loss": 2.3526, "step": 119190 }, { "epoch": 0.460793864328679, "grad_norm": 0.08917110413312912, "learning_rate": 0.002, "loss": 2.3483, "step": 119200 }, { "epoch": 0.4608325215320623, "grad_norm": 0.09352830797433853, "learning_rate": 0.002, "loss": 2.347, "step": 119210 }, { "epoch": 0.4608711787354456, "grad_norm": 0.10510279983282089, "learning_rate": 0.002, "loss": 2.3572, "step": 119220 }, { "epoch": 0.46090983593882884, "grad_norm": 0.10788548737764359, "learning_rate": 0.002, "loss": 2.3625, "step": 119230 }, { "epoch": 0.46094849314221215, "grad_norm": 0.10123184323310852, "learning_rate": 0.002, "loss": 2.3431, "step": 119240 }, { "epoch": 0.4609871503455954, "grad_norm": 0.1042218804359436, "learning_rate": 0.002, "loss": 2.3399, "step": 119250 }, { "epoch": 0.46102580754897865, "grad_norm": 0.09787441790103912, "learning_rate": 0.002, "loss": 2.3562, "step": 119260 }, { "epoch": 0.46106446475236196, "grad_norm": 0.0965803861618042, "learning_rate": 0.002, "loss": 2.3534, "step": 119270 }, { "epoch": 0.4611031219557452, "grad_norm": 0.11218937486410141, "learning_rate": 0.002, "loss": 2.354, "step": 119280 }, { "epoch": 0.4611417791591285, "grad_norm": 0.0979524776339531, "learning_rate": 0.002, "loss": 2.352, "step": 119290 }, { "epoch": 0.46118043636251177, "grad_norm": 0.1069769412279129, "learning_rate": 0.002, "loss": 2.3406, "step": 119300 }, { "epoch": 0.4612190935658951, "grad_norm": 0.09180501848459244, "learning_rate": 0.002, "loss": 2.3395, "step": 119310 }, { "epoch": 0.46125775076927833, "grad_norm": 0.12309186905622482, "learning_rate": 0.002, "loss": 2.3464, "step": 119320 }, { "epoch": 0.46129640797266164, "grad_norm": 0.10278813540935516, "learning_rate": 0.002, "loss": 2.352, "step": 119330 }, { "epoch": 0.4613350651760449, "grad_norm": 0.16567133367061615, "learning_rate": 0.002, "loss": 2.352, "step": 119340 }, { "epoch": 0.4613737223794282, "grad_norm": 0.1133866161108017, "learning_rate": 0.002, "loss": 2.3396, "step": 119350 }, { "epoch": 0.46141237958281145, "grad_norm": 0.10620232671499252, "learning_rate": 0.002, "loss": 2.3354, "step": 119360 }, { "epoch": 0.46145103678619476, "grad_norm": 0.1399594396352768, "learning_rate": 0.002, "loss": 2.3562, "step": 119370 }, { "epoch": 0.461489693989578, "grad_norm": 0.1203150525689125, "learning_rate": 0.002, "loss": 2.3537, "step": 119380 }, { "epoch": 0.4615283511929613, "grad_norm": 0.10792354494333267, "learning_rate": 0.002, "loss": 2.3627, "step": 119390 }, { "epoch": 0.46156700839634457, "grad_norm": 0.09377129375934601, "learning_rate": 0.002, "loss": 2.3605, "step": 119400 }, { "epoch": 0.4616056655997279, "grad_norm": 0.1000465452671051, "learning_rate": 0.002, "loss": 2.3667, "step": 119410 }, { "epoch": 0.46164432280311113, "grad_norm": 0.11150357872247696, "learning_rate": 0.002, "loss": 2.3634, "step": 119420 }, { "epoch": 0.46168298000649444, "grad_norm": 0.10797961056232452, "learning_rate": 0.002, "loss": 2.3539, "step": 119430 }, { "epoch": 0.4617216372098777, "grad_norm": 0.11019590497016907, "learning_rate": 0.002, "loss": 2.3525, "step": 119440 }, { "epoch": 0.46176029441326094, "grad_norm": 0.11343441158533096, "learning_rate": 0.002, "loss": 2.3611, "step": 119450 }, { "epoch": 0.46179895161664425, "grad_norm": 0.09743047505617142, "learning_rate": 0.002, "loss": 2.3466, "step": 119460 }, { "epoch": 0.4618376088200275, "grad_norm": 0.13833467662334442, "learning_rate": 0.002, "loss": 2.3548, "step": 119470 }, { "epoch": 0.4618762660234108, "grad_norm": 0.09775927662849426, "learning_rate": 0.002, "loss": 2.3387, "step": 119480 }, { "epoch": 0.46191492322679406, "grad_norm": 0.10678848624229431, "learning_rate": 0.002, "loss": 2.3516, "step": 119490 }, { "epoch": 0.46195358043017737, "grad_norm": 0.10635264962911606, "learning_rate": 0.002, "loss": 2.3587, "step": 119500 }, { "epoch": 0.4619922376335606, "grad_norm": 0.11372309923171997, "learning_rate": 0.002, "loss": 2.359, "step": 119510 }, { "epoch": 0.46203089483694393, "grad_norm": 0.10508405417203903, "learning_rate": 0.002, "loss": 2.3529, "step": 119520 }, { "epoch": 0.4620695520403272, "grad_norm": 0.1135542243719101, "learning_rate": 0.002, "loss": 2.3336, "step": 119530 }, { "epoch": 0.4621082092437105, "grad_norm": 0.09791223704814911, "learning_rate": 0.002, "loss": 2.362, "step": 119540 }, { "epoch": 0.46214686644709374, "grad_norm": 0.10771148651838303, "learning_rate": 0.002, "loss": 2.3492, "step": 119550 }, { "epoch": 0.46218552365047705, "grad_norm": 0.10225772112607956, "learning_rate": 0.002, "loss": 2.3427, "step": 119560 }, { "epoch": 0.4622241808538603, "grad_norm": 0.1033085361123085, "learning_rate": 0.002, "loss": 2.3597, "step": 119570 }, { "epoch": 0.4622628380572436, "grad_norm": 0.10448309034109116, "learning_rate": 0.002, "loss": 2.346, "step": 119580 }, { "epoch": 0.46230149526062686, "grad_norm": 0.1192668080329895, "learning_rate": 0.002, "loss": 2.3377, "step": 119590 }, { "epoch": 0.46234015246401017, "grad_norm": 0.1274099349975586, "learning_rate": 0.002, "loss": 2.3508, "step": 119600 }, { "epoch": 0.4623788096673934, "grad_norm": 0.11079815030097961, "learning_rate": 0.002, "loss": 2.3439, "step": 119610 }, { "epoch": 0.4624174668707767, "grad_norm": 0.11107289791107178, "learning_rate": 0.002, "loss": 2.3431, "step": 119620 }, { "epoch": 0.46245612407416, "grad_norm": 0.11228213459253311, "learning_rate": 0.002, "loss": 2.3373, "step": 119630 }, { "epoch": 0.46249478127754323, "grad_norm": 0.09557343274354935, "learning_rate": 0.002, "loss": 2.3397, "step": 119640 }, { "epoch": 0.46253343848092654, "grad_norm": 0.11540845036506653, "learning_rate": 0.002, "loss": 2.3707, "step": 119650 }, { "epoch": 0.4625720956843098, "grad_norm": 0.11660248786211014, "learning_rate": 0.002, "loss": 2.3294, "step": 119660 }, { "epoch": 0.4626107528876931, "grad_norm": 0.12330517917871475, "learning_rate": 0.002, "loss": 2.3663, "step": 119670 }, { "epoch": 0.46264941009107635, "grad_norm": 0.09484302997589111, "learning_rate": 0.002, "loss": 2.3533, "step": 119680 }, { "epoch": 0.46268806729445966, "grad_norm": 0.11608867347240448, "learning_rate": 0.002, "loss": 2.3398, "step": 119690 }, { "epoch": 0.4627267244978429, "grad_norm": 0.09752960503101349, "learning_rate": 0.002, "loss": 2.3502, "step": 119700 }, { "epoch": 0.4627653817012262, "grad_norm": 0.1270848512649536, "learning_rate": 0.002, "loss": 2.3442, "step": 119710 }, { "epoch": 0.46280403890460947, "grad_norm": 0.0989656075835228, "learning_rate": 0.002, "loss": 2.3457, "step": 119720 }, { "epoch": 0.4628426961079928, "grad_norm": 0.1194680854678154, "learning_rate": 0.002, "loss": 2.3427, "step": 119730 }, { "epoch": 0.46288135331137603, "grad_norm": 0.10372116416692734, "learning_rate": 0.002, "loss": 2.3528, "step": 119740 }, { "epoch": 0.46292001051475934, "grad_norm": 0.10059253126382828, "learning_rate": 0.002, "loss": 2.343, "step": 119750 }, { "epoch": 0.4629586677181426, "grad_norm": 0.11401177197694778, "learning_rate": 0.002, "loss": 2.3381, "step": 119760 }, { "epoch": 0.4629973249215259, "grad_norm": 0.17351770401000977, "learning_rate": 0.002, "loss": 2.3623, "step": 119770 }, { "epoch": 0.46303598212490915, "grad_norm": 0.11136732250452042, "learning_rate": 0.002, "loss": 2.3508, "step": 119780 }, { "epoch": 0.46307463932829246, "grad_norm": 0.10837043076753616, "learning_rate": 0.002, "loss": 2.3576, "step": 119790 }, { "epoch": 0.4631132965316757, "grad_norm": 0.10472016781568527, "learning_rate": 0.002, "loss": 2.3448, "step": 119800 }, { "epoch": 0.46315195373505896, "grad_norm": 0.09861727058887482, "learning_rate": 0.002, "loss": 2.3541, "step": 119810 }, { "epoch": 0.46319061093844227, "grad_norm": 0.10650566965341568, "learning_rate": 0.002, "loss": 2.3583, "step": 119820 }, { "epoch": 0.4632292681418255, "grad_norm": 0.12352261692285538, "learning_rate": 0.002, "loss": 2.3553, "step": 119830 }, { "epoch": 0.46326792534520883, "grad_norm": 0.10251409560441971, "learning_rate": 0.002, "loss": 2.3404, "step": 119840 }, { "epoch": 0.4633065825485921, "grad_norm": 0.10705935209989548, "learning_rate": 0.002, "loss": 2.352, "step": 119850 }, { "epoch": 0.4633452397519754, "grad_norm": 0.1259768307209015, "learning_rate": 0.002, "loss": 2.338, "step": 119860 }, { "epoch": 0.46338389695535864, "grad_norm": 0.11686693131923676, "learning_rate": 0.002, "loss": 2.351, "step": 119870 }, { "epoch": 0.46342255415874195, "grad_norm": 0.11357530206441879, "learning_rate": 0.002, "loss": 2.3338, "step": 119880 }, { "epoch": 0.4634612113621252, "grad_norm": 0.10879925638437271, "learning_rate": 0.002, "loss": 2.3583, "step": 119890 }, { "epoch": 0.4634998685655085, "grad_norm": 0.09799963235855103, "learning_rate": 0.002, "loss": 2.3174, "step": 119900 }, { "epoch": 0.46353852576889176, "grad_norm": 0.11603069305419922, "learning_rate": 0.002, "loss": 2.3475, "step": 119910 }, { "epoch": 0.46357718297227507, "grad_norm": 0.11923540383577347, "learning_rate": 0.002, "loss": 2.3391, "step": 119920 }, { "epoch": 0.4636158401756583, "grad_norm": 0.10628203302621841, "learning_rate": 0.002, "loss": 2.3452, "step": 119930 }, { "epoch": 0.46365449737904163, "grad_norm": 0.10463366657495499, "learning_rate": 0.002, "loss": 2.3308, "step": 119940 }, { "epoch": 0.4636931545824249, "grad_norm": 0.11544150114059448, "learning_rate": 0.002, "loss": 2.3537, "step": 119950 }, { "epoch": 0.4637318117858082, "grad_norm": 0.10797338932752609, "learning_rate": 0.002, "loss": 2.3496, "step": 119960 }, { "epoch": 0.46377046898919144, "grad_norm": 0.09882956743240356, "learning_rate": 0.002, "loss": 2.3531, "step": 119970 }, { "epoch": 0.46380912619257475, "grad_norm": 0.10176735371351242, "learning_rate": 0.002, "loss": 2.3493, "step": 119980 }, { "epoch": 0.463847783395958, "grad_norm": 0.12177812308073044, "learning_rate": 0.002, "loss": 2.3378, "step": 119990 }, { "epoch": 0.46388644059934125, "grad_norm": 0.10070200264453888, "learning_rate": 0.002, "loss": 2.371, "step": 120000 }, { "epoch": 0.46392509780272456, "grad_norm": 0.08883144706487656, "learning_rate": 0.002, "loss": 2.3444, "step": 120010 }, { "epoch": 0.4639637550061078, "grad_norm": 0.11577492207288742, "learning_rate": 0.002, "loss": 2.3492, "step": 120020 }, { "epoch": 0.4640024122094911, "grad_norm": 0.09652300924062729, "learning_rate": 0.002, "loss": 2.3565, "step": 120030 }, { "epoch": 0.4640410694128744, "grad_norm": 0.10715745389461517, "learning_rate": 0.002, "loss": 2.3195, "step": 120040 }, { "epoch": 0.4640797266162577, "grad_norm": 0.1004326194524765, "learning_rate": 0.002, "loss": 2.332, "step": 120050 }, { "epoch": 0.46411838381964093, "grad_norm": 0.09592131525278091, "learning_rate": 0.002, "loss": 2.3606, "step": 120060 }, { "epoch": 0.46415704102302424, "grad_norm": 0.13525542616844177, "learning_rate": 0.002, "loss": 2.3631, "step": 120070 }, { "epoch": 0.4641956982264075, "grad_norm": 0.11197216808795929, "learning_rate": 0.002, "loss": 2.3549, "step": 120080 }, { "epoch": 0.4642343554297908, "grad_norm": 0.11991088837385178, "learning_rate": 0.002, "loss": 2.3513, "step": 120090 }, { "epoch": 0.46427301263317405, "grad_norm": 0.12654219567775726, "learning_rate": 0.002, "loss": 2.349, "step": 120100 }, { "epoch": 0.46431166983655736, "grad_norm": 0.10239842534065247, "learning_rate": 0.002, "loss": 2.3629, "step": 120110 }, { "epoch": 0.4643503270399406, "grad_norm": 0.12372080236673355, "learning_rate": 0.002, "loss": 2.3529, "step": 120120 }, { "epoch": 0.4643889842433239, "grad_norm": 0.10336051136255264, "learning_rate": 0.002, "loss": 2.3278, "step": 120130 }, { "epoch": 0.4644276414467072, "grad_norm": 0.11218868941068649, "learning_rate": 0.002, "loss": 2.3449, "step": 120140 }, { "epoch": 0.4644662986500905, "grad_norm": 0.10370685160160065, "learning_rate": 0.002, "loss": 2.3573, "step": 120150 }, { "epoch": 0.46450495585347373, "grad_norm": 0.09634726494550705, "learning_rate": 0.002, "loss": 2.3508, "step": 120160 }, { "epoch": 0.46454361305685704, "grad_norm": 0.132036954164505, "learning_rate": 0.002, "loss": 2.3314, "step": 120170 }, { "epoch": 0.4645822702602403, "grad_norm": 0.14281754195690155, "learning_rate": 0.002, "loss": 2.357, "step": 120180 }, { "epoch": 0.46462092746362355, "grad_norm": 0.11279735714197159, "learning_rate": 0.002, "loss": 2.3491, "step": 120190 }, { "epoch": 0.46465958466700685, "grad_norm": 0.09632159769535065, "learning_rate": 0.002, "loss": 2.3431, "step": 120200 }, { "epoch": 0.4646982418703901, "grad_norm": 0.09436524659395218, "learning_rate": 0.002, "loss": 2.3589, "step": 120210 }, { "epoch": 0.4647368990737734, "grad_norm": 0.12093179672956467, "learning_rate": 0.002, "loss": 2.3438, "step": 120220 }, { "epoch": 0.46477555627715667, "grad_norm": 0.1084708645939827, "learning_rate": 0.002, "loss": 2.3446, "step": 120230 }, { "epoch": 0.46481421348054, "grad_norm": 0.11820647865533829, "learning_rate": 0.002, "loss": 2.3459, "step": 120240 }, { "epoch": 0.4648528706839232, "grad_norm": 0.12325529754161835, "learning_rate": 0.002, "loss": 2.3425, "step": 120250 }, { "epoch": 0.46489152788730653, "grad_norm": 0.10659166425466537, "learning_rate": 0.002, "loss": 2.3333, "step": 120260 }, { "epoch": 0.4649301850906898, "grad_norm": 0.11430692672729492, "learning_rate": 0.002, "loss": 2.3372, "step": 120270 }, { "epoch": 0.4649688422940731, "grad_norm": 0.12177309393882751, "learning_rate": 0.002, "loss": 2.3333, "step": 120280 }, { "epoch": 0.46500749949745634, "grad_norm": 0.12660956382751465, "learning_rate": 0.002, "loss": 2.349, "step": 120290 }, { "epoch": 0.46504615670083965, "grad_norm": 0.08679116517305374, "learning_rate": 0.002, "loss": 2.3455, "step": 120300 }, { "epoch": 0.4650848139042229, "grad_norm": 0.09967411309480667, "learning_rate": 0.002, "loss": 2.3548, "step": 120310 }, { "epoch": 0.4651234711076062, "grad_norm": 0.11712680757045746, "learning_rate": 0.002, "loss": 2.3478, "step": 120320 }, { "epoch": 0.46516212831098946, "grad_norm": 0.11553264409303665, "learning_rate": 0.002, "loss": 2.339, "step": 120330 }, { "epoch": 0.46520078551437277, "grad_norm": 0.11434249579906464, "learning_rate": 0.002, "loss": 2.3471, "step": 120340 }, { "epoch": 0.465239442717756, "grad_norm": 0.1029350757598877, "learning_rate": 0.002, "loss": 2.3602, "step": 120350 }, { "epoch": 0.46527809992113933, "grad_norm": 0.11101929098367691, "learning_rate": 0.002, "loss": 2.3427, "step": 120360 }, { "epoch": 0.4653167571245226, "grad_norm": 0.10793200880289078, "learning_rate": 0.002, "loss": 2.3347, "step": 120370 }, { "epoch": 0.46535541432790584, "grad_norm": 0.1273803412914276, "learning_rate": 0.002, "loss": 2.3477, "step": 120380 }, { "epoch": 0.46539407153128914, "grad_norm": 0.14132443070411682, "learning_rate": 0.002, "loss": 2.356, "step": 120390 }, { "epoch": 0.4654327287346724, "grad_norm": 0.10636676847934723, "learning_rate": 0.002, "loss": 2.3364, "step": 120400 }, { "epoch": 0.4654713859380557, "grad_norm": 0.0932055413722992, "learning_rate": 0.002, "loss": 2.3443, "step": 120410 }, { "epoch": 0.46551004314143896, "grad_norm": 0.1264282763004303, "learning_rate": 0.002, "loss": 2.3538, "step": 120420 }, { "epoch": 0.46554870034482226, "grad_norm": 0.10335405170917511, "learning_rate": 0.002, "loss": 2.3538, "step": 120430 }, { "epoch": 0.4655873575482055, "grad_norm": 0.10554596036672592, "learning_rate": 0.002, "loss": 2.3388, "step": 120440 }, { "epoch": 0.4656260147515888, "grad_norm": 0.12399185448884964, "learning_rate": 0.002, "loss": 2.3505, "step": 120450 }, { "epoch": 0.4656646719549721, "grad_norm": 0.0971696674823761, "learning_rate": 0.002, "loss": 2.3328, "step": 120460 }, { "epoch": 0.4657033291583554, "grad_norm": 0.11128117889165878, "learning_rate": 0.002, "loss": 2.3567, "step": 120470 }, { "epoch": 0.46574198636173864, "grad_norm": 0.09656219929456711, "learning_rate": 0.002, "loss": 2.3431, "step": 120480 }, { "epoch": 0.46578064356512194, "grad_norm": 0.09573480486869812, "learning_rate": 0.002, "loss": 2.3513, "step": 120490 }, { "epoch": 0.4658193007685052, "grad_norm": 0.11154934018850327, "learning_rate": 0.002, "loss": 2.3423, "step": 120500 }, { "epoch": 0.4658579579718885, "grad_norm": 0.11035740375518799, "learning_rate": 0.002, "loss": 2.3514, "step": 120510 }, { "epoch": 0.46589661517527176, "grad_norm": 0.09658867120742798, "learning_rate": 0.002, "loss": 2.3465, "step": 120520 }, { "epoch": 0.46593527237865506, "grad_norm": 0.12649919092655182, "learning_rate": 0.002, "loss": 2.3541, "step": 120530 }, { "epoch": 0.4659739295820383, "grad_norm": 0.11713672429323196, "learning_rate": 0.002, "loss": 2.365, "step": 120540 }, { "epoch": 0.4660125867854216, "grad_norm": 0.11696921288967133, "learning_rate": 0.002, "loss": 2.3591, "step": 120550 }, { "epoch": 0.4660512439888049, "grad_norm": 0.10550013184547424, "learning_rate": 0.002, "loss": 2.3504, "step": 120560 }, { "epoch": 0.4660899011921881, "grad_norm": 0.12237689644098282, "learning_rate": 0.002, "loss": 2.3481, "step": 120570 }, { "epoch": 0.46612855839557144, "grad_norm": 0.09278661757707596, "learning_rate": 0.002, "loss": 2.3374, "step": 120580 }, { "epoch": 0.4661672155989547, "grad_norm": 0.11413309723138809, "learning_rate": 0.002, "loss": 2.3337, "step": 120590 }, { "epoch": 0.466205872802338, "grad_norm": 0.13561297953128815, "learning_rate": 0.002, "loss": 2.3362, "step": 120600 }, { "epoch": 0.46624453000572125, "grad_norm": 0.11021458357572556, "learning_rate": 0.002, "loss": 2.3466, "step": 120610 }, { "epoch": 0.46628318720910455, "grad_norm": 0.11534292995929718, "learning_rate": 0.002, "loss": 2.3443, "step": 120620 }, { "epoch": 0.4663218444124878, "grad_norm": 0.10146640241146088, "learning_rate": 0.002, "loss": 2.338, "step": 120630 }, { "epoch": 0.4663605016158711, "grad_norm": 0.11225304752588272, "learning_rate": 0.002, "loss": 2.3185, "step": 120640 }, { "epoch": 0.46639915881925437, "grad_norm": 0.10609450936317444, "learning_rate": 0.002, "loss": 2.3569, "step": 120650 }, { "epoch": 0.4664378160226377, "grad_norm": 0.10264197736978531, "learning_rate": 0.002, "loss": 2.3568, "step": 120660 }, { "epoch": 0.4664764732260209, "grad_norm": 0.09689023345708847, "learning_rate": 0.002, "loss": 2.3484, "step": 120670 }, { "epoch": 0.46651513042940423, "grad_norm": 0.108772873878479, "learning_rate": 0.002, "loss": 2.346, "step": 120680 }, { "epoch": 0.4665537876327875, "grad_norm": 0.1061214953660965, "learning_rate": 0.002, "loss": 2.35, "step": 120690 }, { "epoch": 0.4665924448361708, "grad_norm": 0.10163940489292145, "learning_rate": 0.002, "loss": 2.3439, "step": 120700 }, { "epoch": 0.46663110203955405, "grad_norm": 0.11704286187887192, "learning_rate": 0.002, "loss": 2.3516, "step": 120710 }, { "epoch": 0.46666975924293735, "grad_norm": 0.11735977232456207, "learning_rate": 0.002, "loss": 2.3503, "step": 120720 }, { "epoch": 0.4667084164463206, "grad_norm": 0.1283426731824875, "learning_rate": 0.002, "loss": 2.3465, "step": 120730 }, { "epoch": 0.46674707364970386, "grad_norm": 0.10897085815668106, "learning_rate": 0.002, "loss": 2.3535, "step": 120740 }, { "epoch": 0.46678573085308717, "grad_norm": 0.10965883731842041, "learning_rate": 0.002, "loss": 2.3483, "step": 120750 }, { "epoch": 0.4668243880564704, "grad_norm": 0.10167060047388077, "learning_rate": 0.002, "loss": 2.3452, "step": 120760 }, { "epoch": 0.4668630452598537, "grad_norm": 0.10578198730945587, "learning_rate": 0.002, "loss": 2.348, "step": 120770 }, { "epoch": 0.466901702463237, "grad_norm": 0.10897000133991241, "learning_rate": 0.002, "loss": 2.3515, "step": 120780 }, { "epoch": 0.4669403596666203, "grad_norm": 0.0896591916680336, "learning_rate": 0.002, "loss": 2.3425, "step": 120790 }, { "epoch": 0.46697901687000354, "grad_norm": 0.11927013844251633, "learning_rate": 0.002, "loss": 2.3439, "step": 120800 }, { "epoch": 0.46701767407338685, "grad_norm": 0.10862559825181961, "learning_rate": 0.002, "loss": 2.3506, "step": 120810 }, { "epoch": 0.4670563312767701, "grad_norm": 0.13308750092983246, "learning_rate": 0.002, "loss": 2.3566, "step": 120820 }, { "epoch": 0.4670949884801534, "grad_norm": 0.09521917998790741, "learning_rate": 0.002, "loss": 2.3494, "step": 120830 }, { "epoch": 0.46713364568353666, "grad_norm": 0.10568059235811234, "learning_rate": 0.002, "loss": 2.3319, "step": 120840 }, { "epoch": 0.46717230288691997, "grad_norm": 0.1008000299334526, "learning_rate": 0.002, "loss": 2.3467, "step": 120850 }, { "epoch": 0.4672109600903032, "grad_norm": 0.33562374114990234, "learning_rate": 0.002, "loss": 2.3406, "step": 120860 }, { "epoch": 0.4672496172936865, "grad_norm": 0.12237101048231125, "learning_rate": 0.002, "loss": 2.3465, "step": 120870 }, { "epoch": 0.4672882744970698, "grad_norm": 0.10964284092187881, "learning_rate": 0.002, "loss": 2.3588, "step": 120880 }, { "epoch": 0.4673269317004531, "grad_norm": 0.09678712487220764, "learning_rate": 0.002, "loss": 2.3486, "step": 120890 }, { "epoch": 0.46736558890383634, "grad_norm": 0.11241725087165833, "learning_rate": 0.002, "loss": 2.3378, "step": 120900 }, { "epoch": 0.46740424610721965, "grad_norm": 0.10393916815519333, "learning_rate": 0.002, "loss": 2.3507, "step": 120910 }, { "epoch": 0.4674429033106029, "grad_norm": 0.11094099283218384, "learning_rate": 0.002, "loss": 2.3328, "step": 120920 }, { "epoch": 0.46748156051398615, "grad_norm": 0.1192275732755661, "learning_rate": 0.002, "loss": 2.349, "step": 120930 }, { "epoch": 0.46752021771736946, "grad_norm": 0.13173754513263702, "learning_rate": 0.002, "loss": 2.3526, "step": 120940 }, { "epoch": 0.4675588749207527, "grad_norm": 0.1284404844045639, "learning_rate": 0.002, "loss": 2.3503, "step": 120950 }, { "epoch": 0.467597532124136, "grad_norm": 0.1140296459197998, "learning_rate": 0.002, "loss": 2.3333, "step": 120960 }, { "epoch": 0.46763618932751927, "grad_norm": 0.10858230292797089, "learning_rate": 0.002, "loss": 2.3568, "step": 120970 }, { "epoch": 0.4676748465309026, "grad_norm": 0.14189264178276062, "learning_rate": 0.002, "loss": 2.3406, "step": 120980 }, { "epoch": 0.46771350373428583, "grad_norm": 0.10394598543643951, "learning_rate": 0.002, "loss": 2.3366, "step": 120990 }, { "epoch": 0.46775216093766914, "grad_norm": 0.10648351162672043, "learning_rate": 0.002, "loss": 2.3414, "step": 121000 }, { "epoch": 0.4677908181410524, "grad_norm": 0.09850535541772842, "learning_rate": 0.002, "loss": 2.338, "step": 121010 }, { "epoch": 0.4678294753444357, "grad_norm": 0.09851415455341339, "learning_rate": 0.002, "loss": 2.3567, "step": 121020 }, { "epoch": 0.46786813254781895, "grad_norm": 0.1181519404053688, "learning_rate": 0.002, "loss": 2.3463, "step": 121030 }, { "epoch": 0.46790678975120226, "grad_norm": 0.10987094044685364, "learning_rate": 0.002, "loss": 2.3476, "step": 121040 }, { "epoch": 0.4679454469545855, "grad_norm": 0.10694500803947449, "learning_rate": 0.002, "loss": 2.3415, "step": 121050 }, { "epoch": 0.4679841041579688, "grad_norm": 0.1143125668168068, "learning_rate": 0.002, "loss": 2.3585, "step": 121060 }, { "epoch": 0.46802276136135207, "grad_norm": 0.12903359532356262, "learning_rate": 0.002, "loss": 2.3428, "step": 121070 }, { "epoch": 0.4680614185647354, "grad_norm": 0.09866703301668167, "learning_rate": 0.002, "loss": 2.3535, "step": 121080 }, { "epoch": 0.46810007576811863, "grad_norm": 0.11391236633062363, "learning_rate": 0.002, "loss": 2.3565, "step": 121090 }, { "epoch": 0.46813873297150194, "grad_norm": 0.0955762043595314, "learning_rate": 0.002, "loss": 2.3468, "step": 121100 }, { "epoch": 0.4681773901748852, "grad_norm": 0.11622832715511322, "learning_rate": 0.002, "loss": 2.3424, "step": 121110 }, { "epoch": 0.46821604737826844, "grad_norm": 0.11479691416025162, "learning_rate": 0.002, "loss": 2.3571, "step": 121120 }, { "epoch": 0.46825470458165175, "grad_norm": 0.09819727391004562, "learning_rate": 0.002, "loss": 2.3472, "step": 121130 }, { "epoch": 0.468293361785035, "grad_norm": 0.11308932304382324, "learning_rate": 0.002, "loss": 2.3433, "step": 121140 }, { "epoch": 0.4683320189884183, "grad_norm": 0.10155956447124481, "learning_rate": 0.002, "loss": 2.3483, "step": 121150 }, { "epoch": 0.46837067619180156, "grad_norm": 0.11019018292427063, "learning_rate": 0.002, "loss": 2.3362, "step": 121160 }, { "epoch": 0.46840933339518487, "grad_norm": 0.10457126796245575, "learning_rate": 0.002, "loss": 2.3459, "step": 121170 }, { "epoch": 0.4684479905985681, "grad_norm": 0.11174652725458145, "learning_rate": 0.002, "loss": 2.3289, "step": 121180 }, { "epoch": 0.46848664780195143, "grad_norm": 0.10429858416318893, "learning_rate": 0.002, "loss": 2.3257, "step": 121190 }, { "epoch": 0.4685253050053347, "grad_norm": 0.09792893379926682, "learning_rate": 0.002, "loss": 2.3638, "step": 121200 }, { "epoch": 0.468563962208718, "grad_norm": 0.10697484761476517, "learning_rate": 0.002, "loss": 2.3347, "step": 121210 }, { "epoch": 0.46860261941210124, "grad_norm": 0.10429663211107254, "learning_rate": 0.002, "loss": 2.3296, "step": 121220 }, { "epoch": 0.46864127661548455, "grad_norm": 0.09487517178058624, "learning_rate": 0.002, "loss": 2.3356, "step": 121230 }, { "epoch": 0.4686799338188678, "grad_norm": 0.10496007651090622, "learning_rate": 0.002, "loss": 2.3458, "step": 121240 }, { "epoch": 0.4687185910222511, "grad_norm": 0.11141261458396912, "learning_rate": 0.002, "loss": 2.3594, "step": 121250 }, { "epoch": 0.46875724822563436, "grad_norm": 0.10244353115558624, "learning_rate": 0.002, "loss": 2.3608, "step": 121260 }, { "epoch": 0.46879590542901767, "grad_norm": 0.09761404246091843, "learning_rate": 0.002, "loss": 2.3354, "step": 121270 }, { "epoch": 0.4688345626324009, "grad_norm": 0.10012496262788773, "learning_rate": 0.002, "loss": 2.3468, "step": 121280 }, { "epoch": 0.4688732198357842, "grad_norm": 0.10322960466146469, "learning_rate": 0.002, "loss": 2.3442, "step": 121290 }, { "epoch": 0.4689118770391675, "grad_norm": 0.11318720132112503, "learning_rate": 0.002, "loss": 2.341, "step": 121300 }, { "epoch": 0.46895053424255073, "grad_norm": 0.10540834069252014, "learning_rate": 0.002, "loss": 2.3411, "step": 121310 }, { "epoch": 0.46898919144593404, "grad_norm": 0.10919986665248871, "learning_rate": 0.002, "loss": 2.3273, "step": 121320 }, { "epoch": 0.4690278486493173, "grad_norm": 0.11813310533761978, "learning_rate": 0.002, "loss": 2.3348, "step": 121330 }, { "epoch": 0.4690665058527006, "grad_norm": 0.10450516641139984, "learning_rate": 0.002, "loss": 2.3357, "step": 121340 }, { "epoch": 0.46910516305608385, "grad_norm": 0.10707175731658936, "learning_rate": 0.002, "loss": 2.3608, "step": 121350 }, { "epoch": 0.46914382025946716, "grad_norm": 0.1057899221777916, "learning_rate": 0.002, "loss": 2.3408, "step": 121360 }, { "epoch": 0.4691824774628504, "grad_norm": 0.10432370007038116, "learning_rate": 0.002, "loss": 2.3507, "step": 121370 }, { "epoch": 0.4692211346662337, "grad_norm": 0.10156414657831192, "learning_rate": 0.002, "loss": 2.3352, "step": 121380 }, { "epoch": 0.46925979186961697, "grad_norm": 0.11768826842308044, "learning_rate": 0.002, "loss": 2.3571, "step": 121390 }, { "epoch": 0.4692984490730003, "grad_norm": 0.10005386918783188, "learning_rate": 0.002, "loss": 2.3568, "step": 121400 }, { "epoch": 0.46933710627638353, "grad_norm": 0.11784195154905319, "learning_rate": 0.002, "loss": 2.3712, "step": 121410 }, { "epoch": 0.46937576347976684, "grad_norm": 0.10894471406936646, "learning_rate": 0.002, "loss": 2.3414, "step": 121420 }, { "epoch": 0.4694144206831501, "grad_norm": 0.11698637902736664, "learning_rate": 0.002, "loss": 2.3539, "step": 121430 }, { "epoch": 0.4694530778865334, "grad_norm": 0.11290238797664642, "learning_rate": 0.002, "loss": 2.3485, "step": 121440 }, { "epoch": 0.46949173508991665, "grad_norm": 0.11281818896532059, "learning_rate": 0.002, "loss": 2.345, "step": 121450 }, { "epoch": 0.46953039229329996, "grad_norm": 0.10703438520431519, "learning_rate": 0.002, "loss": 2.3379, "step": 121460 }, { "epoch": 0.4695690494966832, "grad_norm": 0.11932453513145447, "learning_rate": 0.002, "loss": 2.3484, "step": 121470 }, { "epoch": 0.46960770670006646, "grad_norm": 0.1095815896987915, "learning_rate": 0.002, "loss": 2.3617, "step": 121480 }, { "epoch": 0.46964636390344977, "grad_norm": 0.13058489561080933, "learning_rate": 0.002, "loss": 2.3591, "step": 121490 }, { "epoch": 0.469685021106833, "grad_norm": 0.09637417644262314, "learning_rate": 0.002, "loss": 2.3582, "step": 121500 }, { "epoch": 0.46972367831021633, "grad_norm": 0.09962920099496841, "learning_rate": 0.002, "loss": 2.3532, "step": 121510 }, { "epoch": 0.4697623355135996, "grad_norm": 0.09866471588611603, "learning_rate": 0.002, "loss": 2.3682, "step": 121520 }, { "epoch": 0.4698009927169829, "grad_norm": 0.1037471741437912, "learning_rate": 0.002, "loss": 2.3428, "step": 121530 }, { "epoch": 0.46983964992036614, "grad_norm": 0.09907791763544083, "learning_rate": 0.002, "loss": 2.3441, "step": 121540 }, { "epoch": 0.46987830712374945, "grad_norm": 0.11282161623239517, "learning_rate": 0.002, "loss": 2.3477, "step": 121550 }, { "epoch": 0.4699169643271327, "grad_norm": 0.11053518950939178, "learning_rate": 0.002, "loss": 2.3499, "step": 121560 }, { "epoch": 0.469955621530516, "grad_norm": 0.10161669552326202, "learning_rate": 0.002, "loss": 2.3522, "step": 121570 }, { "epoch": 0.46999427873389926, "grad_norm": 0.09895357489585876, "learning_rate": 0.002, "loss": 2.3482, "step": 121580 }, { "epoch": 0.47003293593728257, "grad_norm": 0.1091216579079628, "learning_rate": 0.002, "loss": 2.3451, "step": 121590 }, { "epoch": 0.4700715931406658, "grad_norm": 0.10275205224752426, "learning_rate": 0.002, "loss": 2.3629, "step": 121600 }, { "epoch": 0.47011025034404913, "grad_norm": 0.10430356115102768, "learning_rate": 0.002, "loss": 2.3372, "step": 121610 }, { "epoch": 0.4701489075474324, "grad_norm": 0.11154796928167343, "learning_rate": 0.002, "loss": 2.3367, "step": 121620 }, { "epoch": 0.4701875647508157, "grad_norm": 0.11724287271499634, "learning_rate": 0.002, "loss": 2.3429, "step": 121630 }, { "epoch": 0.47022622195419894, "grad_norm": 0.11500924080610275, "learning_rate": 0.002, "loss": 2.3406, "step": 121640 }, { "epoch": 0.47026487915758225, "grad_norm": 0.10347431898117065, "learning_rate": 0.002, "loss": 2.3597, "step": 121650 }, { "epoch": 0.4703035363609655, "grad_norm": 0.11929845064878464, "learning_rate": 0.002, "loss": 2.3562, "step": 121660 }, { "epoch": 0.47034219356434875, "grad_norm": 0.12049948424100876, "learning_rate": 0.002, "loss": 2.33, "step": 121670 }, { "epoch": 0.47038085076773206, "grad_norm": 0.1123681291937828, "learning_rate": 0.002, "loss": 2.3582, "step": 121680 }, { "epoch": 0.4704195079711153, "grad_norm": 0.1003938540816307, "learning_rate": 0.002, "loss": 2.3455, "step": 121690 }, { "epoch": 0.4704581651744986, "grad_norm": 0.10262428224086761, "learning_rate": 0.002, "loss": 2.3588, "step": 121700 }, { "epoch": 0.4704968223778819, "grad_norm": 0.10206400603055954, "learning_rate": 0.002, "loss": 2.3629, "step": 121710 }, { "epoch": 0.4705354795812652, "grad_norm": 0.10609682649374008, "learning_rate": 0.002, "loss": 2.3481, "step": 121720 }, { "epoch": 0.47057413678464843, "grad_norm": 0.10343533009290695, "learning_rate": 0.002, "loss": 2.3464, "step": 121730 }, { "epoch": 0.47061279398803174, "grad_norm": 0.10270936787128448, "learning_rate": 0.002, "loss": 2.3376, "step": 121740 }, { "epoch": 0.470651451191415, "grad_norm": 0.12486699223518372, "learning_rate": 0.002, "loss": 2.3551, "step": 121750 }, { "epoch": 0.4706901083947983, "grad_norm": 0.10693617910146713, "learning_rate": 0.002, "loss": 2.3533, "step": 121760 }, { "epoch": 0.47072876559818155, "grad_norm": 0.1021292507648468, "learning_rate": 0.002, "loss": 2.3507, "step": 121770 }, { "epoch": 0.47076742280156486, "grad_norm": 0.12084102630615234, "learning_rate": 0.002, "loss": 2.3405, "step": 121780 }, { "epoch": 0.4708060800049481, "grad_norm": 0.1022479236125946, "learning_rate": 0.002, "loss": 2.3433, "step": 121790 }, { "epoch": 0.4708447372083314, "grad_norm": 0.09856412559747696, "learning_rate": 0.002, "loss": 2.3399, "step": 121800 }, { "epoch": 0.4708833944117147, "grad_norm": 0.10755357146263123, "learning_rate": 0.002, "loss": 2.351, "step": 121810 }, { "epoch": 0.470922051615098, "grad_norm": 0.1091640517115593, "learning_rate": 0.002, "loss": 2.3647, "step": 121820 }, { "epoch": 0.47096070881848123, "grad_norm": 0.11964549869298935, "learning_rate": 0.002, "loss": 2.3273, "step": 121830 }, { "epoch": 0.47099936602186454, "grad_norm": 0.10800044983625412, "learning_rate": 0.002, "loss": 2.356, "step": 121840 }, { "epoch": 0.4710380232252478, "grad_norm": 0.10545913875102997, "learning_rate": 0.002, "loss": 2.3429, "step": 121850 }, { "epoch": 0.47107668042863104, "grad_norm": 0.10835971683263779, "learning_rate": 0.002, "loss": 2.3359, "step": 121860 }, { "epoch": 0.47111533763201435, "grad_norm": 0.09890252351760864, "learning_rate": 0.002, "loss": 2.3407, "step": 121870 }, { "epoch": 0.4711539948353976, "grad_norm": 0.10678128153085709, "learning_rate": 0.002, "loss": 2.3583, "step": 121880 }, { "epoch": 0.4711926520387809, "grad_norm": 0.10318324714899063, "learning_rate": 0.002, "loss": 2.3425, "step": 121890 }, { "epoch": 0.47123130924216416, "grad_norm": 0.10819438844919205, "learning_rate": 0.002, "loss": 2.3483, "step": 121900 }, { "epoch": 0.4712699664455475, "grad_norm": 0.11838934570550919, "learning_rate": 0.002, "loss": 2.3497, "step": 121910 }, { "epoch": 0.4713086236489307, "grad_norm": 0.10129489004611969, "learning_rate": 0.002, "loss": 2.3559, "step": 121920 }, { "epoch": 0.47134728085231403, "grad_norm": 0.11001982539892197, "learning_rate": 0.002, "loss": 2.3448, "step": 121930 }, { "epoch": 0.4713859380556973, "grad_norm": 0.10711158812046051, "learning_rate": 0.002, "loss": 2.3467, "step": 121940 }, { "epoch": 0.4714245952590806, "grad_norm": 0.10674590617418289, "learning_rate": 0.002, "loss": 2.3365, "step": 121950 }, { "epoch": 0.47146325246246384, "grad_norm": 0.12260083109140396, "learning_rate": 0.002, "loss": 2.3446, "step": 121960 }, { "epoch": 0.47150190966584715, "grad_norm": 0.13318702578544617, "learning_rate": 0.002, "loss": 2.3487, "step": 121970 }, { "epoch": 0.4715405668692304, "grad_norm": 0.10054940730333328, "learning_rate": 0.002, "loss": 2.3365, "step": 121980 }, { "epoch": 0.4715792240726137, "grad_norm": 0.1058686152100563, "learning_rate": 0.002, "loss": 2.3403, "step": 121990 }, { "epoch": 0.47161788127599696, "grad_norm": 0.12064962089061737, "learning_rate": 0.002, "loss": 2.3525, "step": 122000 }, { "epoch": 0.47165653847938027, "grad_norm": 0.10323496162891388, "learning_rate": 0.002, "loss": 2.3454, "step": 122010 }, { "epoch": 0.4716951956827635, "grad_norm": 0.10121781378984451, "learning_rate": 0.002, "loss": 2.3528, "step": 122020 }, { "epoch": 0.47173385288614683, "grad_norm": 0.11574602872133255, "learning_rate": 0.002, "loss": 2.3659, "step": 122030 }, { "epoch": 0.4717725100895301, "grad_norm": 0.11002667993307114, "learning_rate": 0.002, "loss": 2.3598, "step": 122040 }, { "epoch": 0.47181116729291334, "grad_norm": 0.10185150802135468, "learning_rate": 0.002, "loss": 2.3424, "step": 122050 }, { "epoch": 0.47184982449629664, "grad_norm": 0.11052855104207993, "learning_rate": 0.002, "loss": 2.345, "step": 122060 }, { "epoch": 0.4718884816996799, "grad_norm": 0.13415193557739258, "learning_rate": 0.002, "loss": 2.3574, "step": 122070 }, { "epoch": 0.4719271389030632, "grad_norm": 0.1063295528292656, "learning_rate": 0.002, "loss": 2.3464, "step": 122080 }, { "epoch": 0.47196579610644646, "grad_norm": 0.08993332087993622, "learning_rate": 0.002, "loss": 2.3552, "step": 122090 }, { "epoch": 0.47200445330982976, "grad_norm": 0.10279487818479538, "learning_rate": 0.002, "loss": 2.337, "step": 122100 }, { "epoch": 0.472043110513213, "grad_norm": 0.11254022270441055, "learning_rate": 0.002, "loss": 2.354, "step": 122110 }, { "epoch": 0.4720817677165963, "grad_norm": 0.10892201215028763, "learning_rate": 0.002, "loss": 2.3555, "step": 122120 }, { "epoch": 0.4721204249199796, "grad_norm": 0.11184471845626831, "learning_rate": 0.002, "loss": 2.343, "step": 122130 }, { "epoch": 0.4721590821233629, "grad_norm": 0.11071911454200745, "learning_rate": 0.002, "loss": 2.3563, "step": 122140 }, { "epoch": 0.47219773932674614, "grad_norm": 0.120620958507061, "learning_rate": 0.002, "loss": 2.3354, "step": 122150 }, { "epoch": 0.47223639653012944, "grad_norm": 0.10598298162221909, "learning_rate": 0.002, "loss": 2.3522, "step": 122160 }, { "epoch": 0.4722750537335127, "grad_norm": 0.09908302128314972, "learning_rate": 0.002, "loss": 2.3433, "step": 122170 }, { "epoch": 0.472313710936896, "grad_norm": 0.11272618919610977, "learning_rate": 0.002, "loss": 2.3533, "step": 122180 }, { "epoch": 0.47235236814027926, "grad_norm": 0.10746448487043381, "learning_rate": 0.002, "loss": 2.3288, "step": 122190 }, { "epoch": 0.47239102534366256, "grad_norm": 0.11876311153173447, "learning_rate": 0.002, "loss": 2.3555, "step": 122200 }, { "epoch": 0.4724296825470458, "grad_norm": 0.09779126197099686, "learning_rate": 0.002, "loss": 2.3442, "step": 122210 }, { "epoch": 0.4724683397504291, "grad_norm": 0.11250808835029602, "learning_rate": 0.002, "loss": 2.3506, "step": 122220 }, { "epoch": 0.4725069969538124, "grad_norm": 0.09808599948883057, "learning_rate": 0.002, "loss": 2.3504, "step": 122230 }, { "epoch": 0.4725456541571956, "grad_norm": 0.09768354147672653, "learning_rate": 0.002, "loss": 2.3593, "step": 122240 }, { "epoch": 0.47258431136057893, "grad_norm": 0.10772417485713959, "learning_rate": 0.002, "loss": 2.3502, "step": 122250 }, { "epoch": 0.4726229685639622, "grad_norm": 0.11477760970592499, "learning_rate": 0.002, "loss": 2.3583, "step": 122260 }, { "epoch": 0.4726616257673455, "grad_norm": 0.11122085899114609, "learning_rate": 0.002, "loss": 2.3552, "step": 122270 }, { "epoch": 0.47270028297072875, "grad_norm": 0.12852375209331512, "learning_rate": 0.002, "loss": 2.3331, "step": 122280 }, { "epoch": 0.47273894017411205, "grad_norm": 0.16506214439868927, "learning_rate": 0.002, "loss": 2.3941, "step": 122290 }, { "epoch": 0.4727775973774953, "grad_norm": 0.11552702635526657, "learning_rate": 0.002, "loss": 2.3702, "step": 122300 }, { "epoch": 0.4728162545808786, "grad_norm": 0.10295978933572769, "learning_rate": 0.002, "loss": 2.3604, "step": 122310 }, { "epoch": 0.47285491178426187, "grad_norm": 0.33917784690856934, "learning_rate": 0.002, "loss": 2.3405, "step": 122320 }, { "epoch": 0.4728935689876452, "grad_norm": 0.09117474406957626, "learning_rate": 0.002, "loss": 2.3408, "step": 122330 }, { "epoch": 0.4729322261910284, "grad_norm": 0.11995255202054977, "learning_rate": 0.002, "loss": 2.3355, "step": 122340 }, { "epoch": 0.47297088339441173, "grad_norm": 0.10412541031837463, "learning_rate": 0.002, "loss": 2.3506, "step": 122350 }, { "epoch": 0.473009540597795, "grad_norm": 0.10575006902217865, "learning_rate": 0.002, "loss": 2.365, "step": 122360 }, { "epoch": 0.4730481978011783, "grad_norm": 0.09834925830364227, "learning_rate": 0.002, "loss": 2.3466, "step": 122370 }, { "epoch": 0.47308685500456155, "grad_norm": 0.10345424711704254, "learning_rate": 0.002, "loss": 2.3516, "step": 122380 }, { "epoch": 0.47312551220794485, "grad_norm": 0.10761108249425888, "learning_rate": 0.002, "loss": 2.3469, "step": 122390 }, { "epoch": 0.4731641694113281, "grad_norm": 0.13001149892807007, "learning_rate": 0.002, "loss": 2.3357, "step": 122400 }, { "epoch": 0.47320282661471136, "grad_norm": 0.11039768904447556, "learning_rate": 0.002, "loss": 2.3496, "step": 122410 }, { "epoch": 0.47324148381809467, "grad_norm": 0.10438776016235352, "learning_rate": 0.002, "loss": 2.3375, "step": 122420 }, { "epoch": 0.4732801410214779, "grad_norm": 0.11237910389900208, "learning_rate": 0.002, "loss": 2.3474, "step": 122430 }, { "epoch": 0.4733187982248612, "grad_norm": 0.10197488218545914, "learning_rate": 0.002, "loss": 2.3417, "step": 122440 }, { "epoch": 0.4733574554282445, "grad_norm": 0.11044498533010483, "learning_rate": 0.002, "loss": 2.3491, "step": 122450 }, { "epoch": 0.4733961126316278, "grad_norm": 0.12326077371835709, "learning_rate": 0.002, "loss": 2.3427, "step": 122460 }, { "epoch": 0.47343476983501104, "grad_norm": 0.09754885733127594, "learning_rate": 0.002, "loss": 2.3465, "step": 122470 }, { "epoch": 0.47347342703839435, "grad_norm": 0.10445263981819153, "learning_rate": 0.002, "loss": 2.3471, "step": 122480 }, { "epoch": 0.4735120842417776, "grad_norm": 0.09975433349609375, "learning_rate": 0.002, "loss": 2.3444, "step": 122490 }, { "epoch": 0.4735507414451609, "grad_norm": 0.10851839184761047, "learning_rate": 0.002, "loss": 2.3495, "step": 122500 }, { "epoch": 0.47358939864854416, "grad_norm": 0.10483758896589279, "learning_rate": 0.002, "loss": 2.3322, "step": 122510 }, { "epoch": 0.47362805585192747, "grad_norm": 0.1174597442150116, "learning_rate": 0.002, "loss": 2.3339, "step": 122520 }, { "epoch": 0.4736667130553107, "grad_norm": 0.10874351114034653, "learning_rate": 0.002, "loss": 2.3545, "step": 122530 }, { "epoch": 0.473705370258694, "grad_norm": 0.11186617612838745, "learning_rate": 0.002, "loss": 2.3675, "step": 122540 }, { "epoch": 0.4737440274620773, "grad_norm": 0.09507044404745102, "learning_rate": 0.002, "loss": 2.3569, "step": 122550 }, { "epoch": 0.4737826846654606, "grad_norm": 0.09617611765861511, "learning_rate": 0.002, "loss": 2.3413, "step": 122560 }, { "epoch": 0.47382134186884384, "grad_norm": 0.11511634290218353, "learning_rate": 0.002, "loss": 2.3423, "step": 122570 }, { "epoch": 0.47385999907222714, "grad_norm": 0.12867169082164764, "learning_rate": 0.002, "loss": 2.3616, "step": 122580 }, { "epoch": 0.4738986562756104, "grad_norm": 0.11168276518583298, "learning_rate": 0.002, "loss": 2.3473, "step": 122590 }, { "epoch": 0.47393731347899365, "grad_norm": 0.10149930417537689, "learning_rate": 0.002, "loss": 2.3523, "step": 122600 }, { "epoch": 0.47397597068237696, "grad_norm": 0.11204592883586884, "learning_rate": 0.002, "loss": 2.3731, "step": 122610 }, { "epoch": 0.4740146278857602, "grad_norm": 0.11496981233358383, "learning_rate": 0.002, "loss": 2.3426, "step": 122620 }, { "epoch": 0.4740532850891435, "grad_norm": 0.09762315452098846, "learning_rate": 0.002, "loss": 2.3511, "step": 122630 }, { "epoch": 0.47409194229252677, "grad_norm": 0.0918693020939827, "learning_rate": 0.002, "loss": 2.353, "step": 122640 }, { "epoch": 0.4741305994959101, "grad_norm": 0.12558548152446747, "learning_rate": 0.002, "loss": 2.3495, "step": 122650 }, { "epoch": 0.47416925669929333, "grad_norm": 0.10448439419269562, "learning_rate": 0.002, "loss": 2.3519, "step": 122660 }, { "epoch": 0.47420791390267664, "grad_norm": 0.08786539733409882, "learning_rate": 0.002, "loss": 2.3301, "step": 122670 }, { "epoch": 0.4742465711060599, "grad_norm": 0.12373369932174683, "learning_rate": 0.002, "loss": 2.3575, "step": 122680 }, { "epoch": 0.4742852283094432, "grad_norm": 0.10301606357097626, "learning_rate": 0.002, "loss": 2.3425, "step": 122690 }, { "epoch": 0.47432388551282645, "grad_norm": 0.1180417463183403, "learning_rate": 0.002, "loss": 2.3485, "step": 122700 }, { "epoch": 0.47436254271620976, "grad_norm": 0.0997176319360733, "learning_rate": 0.002, "loss": 2.3446, "step": 122710 }, { "epoch": 0.474401199919593, "grad_norm": 0.11349517852067947, "learning_rate": 0.002, "loss": 2.3566, "step": 122720 }, { "epoch": 0.4744398571229763, "grad_norm": 0.11657247692346573, "learning_rate": 0.002, "loss": 2.3765, "step": 122730 }, { "epoch": 0.47447851432635957, "grad_norm": 0.09847909212112427, "learning_rate": 0.002, "loss": 2.3489, "step": 122740 }, { "epoch": 0.4745171715297429, "grad_norm": 0.11855094879865646, "learning_rate": 0.002, "loss": 2.3506, "step": 122750 }, { "epoch": 0.47455582873312613, "grad_norm": 0.10409026592969894, "learning_rate": 0.002, "loss": 2.3444, "step": 122760 }, { "epoch": 0.47459448593650944, "grad_norm": 0.10284674912691116, "learning_rate": 0.002, "loss": 2.3636, "step": 122770 }, { "epoch": 0.4746331431398927, "grad_norm": 0.11371070891618729, "learning_rate": 0.002, "loss": 2.3427, "step": 122780 }, { "epoch": 0.47467180034327594, "grad_norm": 0.10059019923210144, "learning_rate": 0.002, "loss": 2.3563, "step": 122790 }, { "epoch": 0.47471045754665925, "grad_norm": 0.11485622823238373, "learning_rate": 0.002, "loss": 2.3493, "step": 122800 }, { "epoch": 0.4747491147500425, "grad_norm": 0.09271082282066345, "learning_rate": 0.002, "loss": 2.3312, "step": 122810 }, { "epoch": 0.4747877719534258, "grad_norm": 0.10824739187955856, "learning_rate": 0.002, "loss": 2.3415, "step": 122820 }, { "epoch": 0.47482642915680906, "grad_norm": 0.11803317815065384, "learning_rate": 0.002, "loss": 2.3335, "step": 122830 }, { "epoch": 0.47486508636019237, "grad_norm": 0.09994529187679291, "learning_rate": 0.002, "loss": 2.3284, "step": 122840 }, { "epoch": 0.4749037435635756, "grad_norm": 0.11159958690404892, "learning_rate": 0.002, "loss": 2.3363, "step": 122850 }, { "epoch": 0.4749424007669589, "grad_norm": 0.10315810143947601, "learning_rate": 0.002, "loss": 2.333, "step": 122860 }, { "epoch": 0.4749810579703422, "grad_norm": 0.11093452572822571, "learning_rate": 0.002, "loss": 2.3461, "step": 122870 }, { "epoch": 0.4750197151737255, "grad_norm": 0.14293049275875092, "learning_rate": 0.002, "loss": 2.3493, "step": 122880 }, { "epoch": 0.47505837237710874, "grad_norm": 0.10940203070640564, "learning_rate": 0.002, "loss": 2.348, "step": 122890 }, { "epoch": 0.47509702958049205, "grad_norm": 0.11061827093362808, "learning_rate": 0.002, "loss": 2.3407, "step": 122900 }, { "epoch": 0.4751356867838753, "grad_norm": 0.11959835141897202, "learning_rate": 0.002, "loss": 2.3514, "step": 122910 }, { "epoch": 0.4751743439872586, "grad_norm": 0.1079825758934021, "learning_rate": 0.002, "loss": 2.3339, "step": 122920 }, { "epoch": 0.47521300119064186, "grad_norm": 0.11268473416566849, "learning_rate": 0.002, "loss": 2.3529, "step": 122930 }, { "epoch": 0.47525165839402517, "grad_norm": 0.10862148553133011, "learning_rate": 0.002, "loss": 2.3297, "step": 122940 }, { "epoch": 0.4752903155974084, "grad_norm": 0.09216571599245071, "learning_rate": 0.002, "loss": 2.3359, "step": 122950 }, { "epoch": 0.4753289728007917, "grad_norm": 0.11650997400283813, "learning_rate": 0.002, "loss": 2.3552, "step": 122960 }, { "epoch": 0.475367630004175, "grad_norm": 0.09918678551912308, "learning_rate": 0.002, "loss": 2.3585, "step": 122970 }, { "epoch": 0.47540628720755823, "grad_norm": 0.11022733896970749, "learning_rate": 0.002, "loss": 2.3471, "step": 122980 }, { "epoch": 0.47544494441094154, "grad_norm": 0.10531317442655563, "learning_rate": 0.002, "loss": 2.3367, "step": 122990 }, { "epoch": 0.4754836016143248, "grad_norm": 0.11401054263114929, "learning_rate": 0.002, "loss": 2.3466, "step": 123000 }, { "epoch": 0.4755222588177081, "grad_norm": 0.10972213000059128, "learning_rate": 0.002, "loss": 2.3399, "step": 123010 }, { "epoch": 0.47556091602109135, "grad_norm": 0.10765939205884933, "learning_rate": 0.002, "loss": 2.3444, "step": 123020 }, { "epoch": 0.47559957322447466, "grad_norm": 0.09859401732683182, "learning_rate": 0.002, "loss": 2.3427, "step": 123030 }, { "epoch": 0.4756382304278579, "grad_norm": 0.11380176246166229, "learning_rate": 0.002, "loss": 2.3448, "step": 123040 }, { "epoch": 0.4756768876312412, "grad_norm": 0.09912136942148209, "learning_rate": 0.002, "loss": 2.3453, "step": 123050 }, { "epoch": 0.47571554483462447, "grad_norm": 0.1289130300283432, "learning_rate": 0.002, "loss": 2.3452, "step": 123060 }, { "epoch": 0.4757542020380078, "grad_norm": 0.0963532030582428, "learning_rate": 0.002, "loss": 2.333, "step": 123070 }, { "epoch": 0.47579285924139103, "grad_norm": 0.11527842283248901, "learning_rate": 0.002, "loss": 2.3588, "step": 123080 }, { "epoch": 0.47583151644477434, "grad_norm": 0.13533703982830048, "learning_rate": 0.002, "loss": 2.363, "step": 123090 }, { "epoch": 0.4758701736481576, "grad_norm": 0.10471322387456894, "learning_rate": 0.002, "loss": 2.3489, "step": 123100 }, { "epoch": 0.4759088308515409, "grad_norm": 0.10058660060167313, "learning_rate": 0.002, "loss": 2.3424, "step": 123110 }, { "epoch": 0.47594748805492415, "grad_norm": 0.09999233484268188, "learning_rate": 0.002, "loss": 2.3444, "step": 123120 }, { "epoch": 0.47598614525830746, "grad_norm": 0.10986481606960297, "learning_rate": 0.002, "loss": 2.3405, "step": 123130 }, { "epoch": 0.4760248024616907, "grad_norm": 0.10944868624210358, "learning_rate": 0.002, "loss": 2.352, "step": 123140 }, { "epoch": 0.47606345966507396, "grad_norm": 0.09159363061189651, "learning_rate": 0.002, "loss": 2.3502, "step": 123150 }, { "epoch": 0.47610211686845727, "grad_norm": 0.13750354945659637, "learning_rate": 0.002, "loss": 2.3491, "step": 123160 }, { "epoch": 0.4761407740718405, "grad_norm": 0.11780702322721481, "learning_rate": 0.002, "loss": 2.3678, "step": 123170 }, { "epoch": 0.47617943127522383, "grad_norm": 0.12204831838607788, "learning_rate": 0.002, "loss": 2.3433, "step": 123180 }, { "epoch": 0.4762180884786071, "grad_norm": 0.10399141907691956, "learning_rate": 0.002, "loss": 2.339, "step": 123190 }, { "epoch": 0.4762567456819904, "grad_norm": 0.11878559738397598, "learning_rate": 0.002, "loss": 2.3537, "step": 123200 }, { "epoch": 0.47629540288537364, "grad_norm": 0.11291263997554779, "learning_rate": 0.002, "loss": 2.3601, "step": 123210 }, { "epoch": 0.47633406008875695, "grad_norm": 0.12872259318828583, "learning_rate": 0.002, "loss": 2.3399, "step": 123220 }, { "epoch": 0.4763727172921402, "grad_norm": 0.12508279085159302, "learning_rate": 0.002, "loss": 2.3422, "step": 123230 }, { "epoch": 0.4764113744955235, "grad_norm": 0.09549172967672348, "learning_rate": 0.002, "loss": 2.3373, "step": 123240 }, { "epoch": 0.47645003169890676, "grad_norm": 0.10898482799530029, "learning_rate": 0.002, "loss": 2.3441, "step": 123250 }, { "epoch": 0.47648868890229007, "grad_norm": 0.1020859032869339, "learning_rate": 0.002, "loss": 2.3469, "step": 123260 }, { "epoch": 0.4765273461056733, "grad_norm": 0.10750532895326614, "learning_rate": 0.002, "loss": 2.3439, "step": 123270 }, { "epoch": 0.47656600330905663, "grad_norm": 0.10456563532352448, "learning_rate": 0.002, "loss": 2.3456, "step": 123280 }, { "epoch": 0.4766046605124399, "grad_norm": 0.13879376649856567, "learning_rate": 0.002, "loss": 2.3358, "step": 123290 }, { "epoch": 0.4766433177158232, "grad_norm": 0.10747821629047394, "learning_rate": 0.002, "loss": 2.3257, "step": 123300 }, { "epoch": 0.47668197491920644, "grad_norm": 0.09391331672668457, "learning_rate": 0.002, "loss": 2.3406, "step": 123310 }, { "epoch": 0.47672063212258975, "grad_norm": 0.11444292217493057, "learning_rate": 0.002, "loss": 2.3428, "step": 123320 }, { "epoch": 0.476759289325973, "grad_norm": 0.10892808437347412, "learning_rate": 0.002, "loss": 2.3474, "step": 123330 }, { "epoch": 0.47679794652935625, "grad_norm": 0.10487792640924454, "learning_rate": 0.002, "loss": 2.3267, "step": 123340 }, { "epoch": 0.47683660373273956, "grad_norm": 0.10460419952869415, "learning_rate": 0.002, "loss": 2.3421, "step": 123350 }, { "epoch": 0.4768752609361228, "grad_norm": 0.11522373557090759, "learning_rate": 0.002, "loss": 2.3409, "step": 123360 }, { "epoch": 0.4769139181395061, "grad_norm": 0.09701727330684662, "learning_rate": 0.002, "loss": 2.3472, "step": 123370 }, { "epoch": 0.4769525753428894, "grad_norm": 0.10497508198022842, "learning_rate": 0.002, "loss": 2.3479, "step": 123380 }, { "epoch": 0.4769912325462727, "grad_norm": 0.10454043000936508, "learning_rate": 0.002, "loss": 2.3384, "step": 123390 }, { "epoch": 0.47702988974965593, "grad_norm": 0.10469161719083786, "learning_rate": 0.002, "loss": 2.343, "step": 123400 }, { "epoch": 0.47706854695303924, "grad_norm": 0.11177811026573181, "learning_rate": 0.002, "loss": 2.3315, "step": 123410 }, { "epoch": 0.4771072041564225, "grad_norm": 0.13299429416656494, "learning_rate": 0.002, "loss": 2.3389, "step": 123420 }, { "epoch": 0.4771458613598058, "grad_norm": 0.127616286277771, "learning_rate": 0.002, "loss": 2.3459, "step": 123430 }, { "epoch": 0.47718451856318905, "grad_norm": 0.09650331735610962, "learning_rate": 0.002, "loss": 2.3485, "step": 123440 }, { "epoch": 0.47722317576657236, "grad_norm": 0.09756970405578613, "learning_rate": 0.002, "loss": 2.3397, "step": 123450 }, { "epoch": 0.4772618329699556, "grad_norm": 0.12383294105529785, "learning_rate": 0.002, "loss": 2.3422, "step": 123460 }, { "epoch": 0.4773004901733389, "grad_norm": 0.09876053035259247, "learning_rate": 0.002, "loss": 2.3341, "step": 123470 }, { "epoch": 0.4773391473767222, "grad_norm": 0.09043329954147339, "learning_rate": 0.002, "loss": 2.3516, "step": 123480 }, { "epoch": 0.4773778045801055, "grad_norm": 0.11116903275251389, "learning_rate": 0.002, "loss": 2.3446, "step": 123490 }, { "epoch": 0.47741646178348873, "grad_norm": 0.08807190507650375, "learning_rate": 0.002, "loss": 2.3484, "step": 123500 }, { "epoch": 0.47745511898687204, "grad_norm": 0.12583951652050018, "learning_rate": 0.002, "loss": 2.3463, "step": 123510 }, { "epoch": 0.4774937761902553, "grad_norm": 0.10756245255470276, "learning_rate": 0.002, "loss": 2.3375, "step": 123520 }, { "epoch": 0.47753243339363854, "grad_norm": 0.1139204278588295, "learning_rate": 0.002, "loss": 2.3499, "step": 123530 }, { "epoch": 0.47757109059702185, "grad_norm": 0.10103306919336319, "learning_rate": 0.002, "loss": 2.355, "step": 123540 }, { "epoch": 0.4776097478004051, "grad_norm": 0.11110308021306992, "learning_rate": 0.002, "loss": 2.3341, "step": 123550 }, { "epoch": 0.4776484050037884, "grad_norm": 0.11258696764707565, "learning_rate": 0.002, "loss": 2.3538, "step": 123560 }, { "epoch": 0.47768706220717166, "grad_norm": 0.10868509113788605, "learning_rate": 0.002, "loss": 2.3394, "step": 123570 }, { "epoch": 0.47772571941055497, "grad_norm": 0.10797320306301117, "learning_rate": 0.002, "loss": 2.3527, "step": 123580 }, { "epoch": 0.4777643766139382, "grad_norm": 0.1106133833527565, "learning_rate": 0.002, "loss": 2.3531, "step": 123590 }, { "epoch": 0.47780303381732153, "grad_norm": 0.09830787777900696, "learning_rate": 0.002, "loss": 2.3318, "step": 123600 }, { "epoch": 0.4778416910207048, "grad_norm": 0.09236044436693192, "learning_rate": 0.002, "loss": 2.3449, "step": 123610 }, { "epoch": 0.4778803482240881, "grad_norm": 0.0985744521021843, "learning_rate": 0.002, "loss": 2.3505, "step": 123620 }, { "epoch": 0.47791900542747134, "grad_norm": 0.12322218716144562, "learning_rate": 0.002, "loss": 2.3302, "step": 123630 }, { "epoch": 0.47795766263085465, "grad_norm": 0.11628399044275284, "learning_rate": 0.002, "loss": 2.3553, "step": 123640 }, { "epoch": 0.4779963198342379, "grad_norm": 0.09713403135538101, "learning_rate": 0.002, "loss": 2.3436, "step": 123650 }, { "epoch": 0.4780349770376212, "grad_norm": 0.10573708266019821, "learning_rate": 0.002, "loss": 2.3407, "step": 123660 }, { "epoch": 0.47807363424100446, "grad_norm": 0.16359750926494598, "learning_rate": 0.002, "loss": 2.3328, "step": 123670 }, { "epoch": 0.47811229144438777, "grad_norm": 0.10506831854581833, "learning_rate": 0.002, "loss": 2.3445, "step": 123680 }, { "epoch": 0.478150948647771, "grad_norm": 0.10553120076656342, "learning_rate": 0.002, "loss": 2.3517, "step": 123690 }, { "epoch": 0.47818960585115433, "grad_norm": 0.11690998077392578, "learning_rate": 0.002, "loss": 2.3565, "step": 123700 }, { "epoch": 0.4782282630545376, "grad_norm": 0.2327994406223297, "learning_rate": 0.002, "loss": 2.3531, "step": 123710 }, { "epoch": 0.47826692025792084, "grad_norm": 0.11086133122444153, "learning_rate": 0.002, "loss": 2.3433, "step": 123720 }, { "epoch": 0.47830557746130414, "grad_norm": 0.10380079597234726, "learning_rate": 0.002, "loss": 2.3245, "step": 123730 }, { "epoch": 0.4783442346646874, "grad_norm": 0.10399412363767624, "learning_rate": 0.002, "loss": 2.3566, "step": 123740 }, { "epoch": 0.4783828918680707, "grad_norm": 0.11090266704559326, "learning_rate": 0.002, "loss": 2.3514, "step": 123750 }, { "epoch": 0.47842154907145396, "grad_norm": 0.10452635586261749, "learning_rate": 0.002, "loss": 2.3255, "step": 123760 }, { "epoch": 0.47846020627483726, "grad_norm": 0.0872868224978447, "learning_rate": 0.002, "loss": 2.3408, "step": 123770 }, { "epoch": 0.4784988634782205, "grad_norm": 0.09890952706336975, "learning_rate": 0.002, "loss": 2.3275, "step": 123780 }, { "epoch": 0.4785375206816038, "grad_norm": 0.10068909078836441, "learning_rate": 0.002, "loss": 2.347, "step": 123790 }, { "epoch": 0.4785761778849871, "grad_norm": 0.10055209696292877, "learning_rate": 0.002, "loss": 2.3481, "step": 123800 }, { "epoch": 0.4786148350883704, "grad_norm": 0.1076897531747818, "learning_rate": 0.002, "loss": 2.3527, "step": 123810 }, { "epoch": 0.47865349229175363, "grad_norm": 0.11537522822618484, "learning_rate": 0.002, "loss": 2.3465, "step": 123820 }, { "epoch": 0.47869214949513694, "grad_norm": 0.11433115601539612, "learning_rate": 0.002, "loss": 2.34, "step": 123830 }, { "epoch": 0.4787308066985202, "grad_norm": 0.1042742133140564, "learning_rate": 0.002, "loss": 2.3446, "step": 123840 }, { "epoch": 0.4787694639019035, "grad_norm": 0.1332925260066986, "learning_rate": 0.002, "loss": 2.3335, "step": 123850 }, { "epoch": 0.47880812110528675, "grad_norm": 0.0977521538734436, "learning_rate": 0.002, "loss": 2.338, "step": 123860 }, { "epoch": 0.47884677830867006, "grad_norm": 0.09739074110984802, "learning_rate": 0.002, "loss": 2.3566, "step": 123870 }, { "epoch": 0.4788854355120533, "grad_norm": 0.10039547830820084, "learning_rate": 0.002, "loss": 2.3481, "step": 123880 }, { "epoch": 0.47892409271543657, "grad_norm": 0.10721756517887115, "learning_rate": 0.002, "loss": 2.3416, "step": 123890 }, { "epoch": 0.4789627499188199, "grad_norm": 0.09866327792406082, "learning_rate": 0.002, "loss": 2.3465, "step": 123900 }, { "epoch": 0.4790014071222031, "grad_norm": 0.10998773574829102, "learning_rate": 0.002, "loss": 2.3445, "step": 123910 }, { "epoch": 0.47904006432558643, "grad_norm": 0.10089613497257233, "learning_rate": 0.002, "loss": 2.3351, "step": 123920 }, { "epoch": 0.4790787215289697, "grad_norm": 0.09514022618532181, "learning_rate": 0.002, "loss": 2.3484, "step": 123930 }, { "epoch": 0.479117378732353, "grad_norm": 0.11414384096860886, "learning_rate": 0.002, "loss": 2.3532, "step": 123940 }, { "epoch": 0.47915603593573625, "grad_norm": 0.09830380976200104, "learning_rate": 0.002, "loss": 2.3352, "step": 123950 }, { "epoch": 0.47919469313911955, "grad_norm": 0.10151248425245285, "learning_rate": 0.002, "loss": 2.3543, "step": 123960 }, { "epoch": 0.4792333503425028, "grad_norm": 0.09892397373914719, "learning_rate": 0.002, "loss": 2.3471, "step": 123970 }, { "epoch": 0.4792720075458861, "grad_norm": 0.11178483814001083, "learning_rate": 0.002, "loss": 2.3419, "step": 123980 }, { "epoch": 0.47931066474926937, "grad_norm": 0.106821209192276, "learning_rate": 0.002, "loss": 2.3533, "step": 123990 }, { "epoch": 0.4793493219526527, "grad_norm": 0.09730260819196701, "learning_rate": 0.002, "loss": 2.3359, "step": 124000 }, { "epoch": 0.4793879791560359, "grad_norm": 0.10343889147043228, "learning_rate": 0.002, "loss": 2.3529, "step": 124010 }, { "epoch": 0.47942663635941923, "grad_norm": 0.09810855239629745, "learning_rate": 0.002, "loss": 2.3383, "step": 124020 }, { "epoch": 0.4794652935628025, "grad_norm": 0.1482008844614029, "learning_rate": 0.002, "loss": 2.3526, "step": 124030 }, { "epoch": 0.4795039507661858, "grad_norm": 0.11742330342531204, "learning_rate": 0.002, "loss": 2.3545, "step": 124040 }, { "epoch": 0.47954260796956905, "grad_norm": 0.09411367028951645, "learning_rate": 0.002, "loss": 2.3277, "step": 124050 }, { "epoch": 0.47958126517295235, "grad_norm": 0.11513230204582214, "learning_rate": 0.002, "loss": 2.3446, "step": 124060 }, { "epoch": 0.4796199223763356, "grad_norm": 0.11209332942962646, "learning_rate": 0.002, "loss": 2.3554, "step": 124070 }, { "epoch": 0.47965857957971886, "grad_norm": 0.10911991447210312, "learning_rate": 0.002, "loss": 2.3432, "step": 124080 }, { "epoch": 0.47969723678310217, "grad_norm": 0.12374506890773773, "learning_rate": 0.002, "loss": 2.3595, "step": 124090 }, { "epoch": 0.4797358939864854, "grad_norm": 0.1096247136592865, "learning_rate": 0.002, "loss": 2.3648, "step": 124100 }, { "epoch": 0.4797745511898687, "grad_norm": 0.12387961149215698, "learning_rate": 0.002, "loss": 2.3505, "step": 124110 }, { "epoch": 0.479813208393252, "grad_norm": 0.10381369292736053, "learning_rate": 0.002, "loss": 2.3523, "step": 124120 }, { "epoch": 0.4798518655966353, "grad_norm": 0.11828626692295074, "learning_rate": 0.002, "loss": 2.3487, "step": 124130 }, { "epoch": 0.47989052280001854, "grad_norm": 0.1190611943602562, "learning_rate": 0.002, "loss": 2.3348, "step": 124140 }, { "epoch": 0.47992918000340185, "grad_norm": 0.10298268496990204, "learning_rate": 0.002, "loss": 2.3688, "step": 124150 }, { "epoch": 0.4799678372067851, "grad_norm": 0.1297299563884735, "learning_rate": 0.002, "loss": 2.3555, "step": 124160 }, { "epoch": 0.4800064944101684, "grad_norm": 0.11486640572547913, "learning_rate": 0.002, "loss": 2.3313, "step": 124170 }, { "epoch": 0.48004515161355166, "grad_norm": 0.10039033740758896, "learning_rate": 0.002, "loss": 2.3499, "step": 124180 }, { "epoch": 0.48008380881693496, "grad_norm": 0.09772678464651108, "learning_rate": 0.002, "loss": 2.3433, "step": 124190 }, { "epoch": 0.4801224660203182, "grad_norm": 0.16087579727172852, "learning_rate": 0.002, "loss": 2.343, "step": 124200 }, { "epoch": 0.4801611232237015, "grad_norm": 0.11107642948627472, "learning_rate": 0.002, "loss": 2.3482, "step": 124210 }, { "epoch": 0.4801997804270848, "grad_norm": 0.10909386724233627, "learning_rate": 0.002, "loss": 2.3458, "step": 124220 }, { "epoch": 0.4802384376304681, "grad_norm": 0.11320231109857559, "learning_rate": 0.002, "loss": 2.3459, "step": 124230 }, { "epoch": 0.48027709483385134, "grad_norm": 0.10628072172403336, "learning_rate": 0.002, "loss": 2.3398, "step": 124240 }, { "epoch": 0.48031575203723464, "grad_norm": 0.11253766715526581, "learning_rate": 0.002, "loss": 2.3523, "step": 124250 }, { "epoch": 0.4803544092406179, "grad_norm": 0.10708357393741608, "learning_rate": 0.002, "loss": 2.3645, "step": 124260 }, { "epoch": 0.48039306644400115, "grad_norm": 0.09570637345314026, "learning_rate": 0.002, "loss": 2.3393, "step": 124270 }, { "epoch": 0.48043172364738446, "grad_norm": 0.11563023924827576, "learning_rate": 0.002, "loss": 2.3491, "step": 124280 }, { "epoch": 0.4804703808507677, "grad_norm": 0.1064223125576973, "learning_rate": 0.002, "loss": 2.346, "step": 124290 }, { "epoch": 0.480509038054151, "grad_norm": 0.09856715053319931, "learning_rate": 0.002, "loss": 2.3437, "step": 124300 }, { "epoch": 0.48054769525753427, "grad_norm": 0.11088183522224426, "learning_rate": 0.002, "loss": 2.346, "step": 124310 }, { "epoch": 0.4805863524609176, "grad_norm": 0.13400298357009888, "learning_rate": 0.002, "loss": 2.3538, "step": 124320 }, { "epoch": 0.48062500966430083, "grad_norm": 0.1328463852405548, "learning_rate": 0.002, "loss": 2.3395, "step": 124330 }, { "epoch": 0.48066366686768414, "grad_norm": 0.11681412905454636, "learning_rate": 0.002, "loss": 2.3424, "step": 124340 }, { "epoch": 0.4807023240710674, "grad_norm": 0.09900808334350586, "learning_rate": 0.002, "loss": 2.3726, "step": 124350 }, { "epoch": 0.4807409812744507, "grad_norm": 0.12010405212640762, "learning_rate": 0.002, "loss": 2.3411, "step": 124360 }, { "epoch": 0.48077963847783395, "grad_norm": 0.10821161419153214, "learning_rate": 0.002, "loss": 2.3606, "step": 124370 }, { "epoch": 0.48081829568121726, "grad_norm": 0.10221979022026062, "learning_rate": 0.002, "loss": 2.3416, "step": 124380 }, { "epoch": 0.4808569528846005, "grad_norm": 0.119261234998703, "learning_rate": 0.002, "loss": 2.3472, "step": 124390 }, { "epoch": 0.4808956100879838, "grad_norm": 0.10663552582263947, "learning_rate": 0.002, "loss": 2.3438, "step": 124400 }, { "epoch": 0.48093426729136707, "grad_norm": 0.12499512732028961, "learning_rate": 0.002, "loss": 2.3327, "step": 124410 }, { "epoch": 0.4809729244947504, "grad_norm": 0.1076861023902893, "learning_rate": 0.002, "loss": 2.3369, "step": 124420 }, { "epoch": 0.48101158169813363, "grad_norm": 0.11870467662811279, "learning_rate": 0.002, "loss": 2.358, "step": 124430 }, { "epoch": 0.48105023890151694, "grad_norm": 0.1098322793841362, "learning_rate": 0.002, "loss": 2.335, "step": 124440 }, { "epoch": 0.4810888961049002, "grad_norm": 0.10048480331897736, "learning_rate": 0.002, "loss": 2.35, "step": 124450 }, { "epoch": 0.48112755330828344, "grad_norm": 0.10514318197965622, "learning_rate": 0.002, "loss": 2.3347, "step": 124460 }, { "epoch": 0.48116621051166675, "grad_norm": 0.10487706959247589, "learning_rate": 0.002, "loss": 2.3484, "step": 124470 }, { "epoch": 0.48120486771505, "grad_norm": 0.10981044918298721, "learning_rate": 0.002, "loss": 2.3497, "step": 124480 }, { "epoch": 0.4812435249184333, "grad_norm": 0.10225850343704224, "learning_rate": 0.002, "loss": 2.36, "step": 124490 }, { "epoch": 0.48128218212181656, "grad_norm": 0.09725990891456604, "learning_rate": 0.002, "loss": 2.3413, "step": 124500 }, { "epoch": 0.48132083932519987, "grad_norm": 0.10735096782445908, "learning_rate": 0.002, "loss": 2.3443, "step": 124510 }, { "epoch": 0.4813594965285831, "grad_norm": 0.09924648702144623, "learning_rate": 0.002, "loss": 2.3496, "step": 124520 }, { "epoch": 0.4813981537319664, "grad_norm": 0.11738327890634537, "learning_rate": 0.002, "loss": 2.3466, "step": 124530 }, { "epoch": 0.4814368109353497, "grad_norm": 0.0980398878455162, "learning_rate": 0.002, "loss": 2.3513, "step": 124540 }, { "epoch": 0.481475468138733, "grad_norm": 0.12604741752147675, "learning_rate": 0.002, "loss": 2.3528, "step": 124550 }, { "epoch": 0.48151412534211624, "grad_norm": 0.12514206767082214, "learning_rate": 0.002, "loss": 2.3425, "step": 124560 }, { "epoch": 0.48155278254549955, "grad_norm": 0.09885002672672272, "learning_rate": 0.002, "loss": 2.3461, "step": 124570 }, { "epoch": 0.4815914397488828, "grad_norm": 0.10877029597759247, "learning_rate": 0.002, "loss": 2.3544, "step": 124580 }, { "epoch": 0.4816300969522661, "grad_norm": 0.11007339507341385, "learning_rate": 0.002, "loss": 2.3521, "step": 124590 }, { "epoch": 0.48166875415564936, "grad_norm": 0.09676380455493927, "learning_rate": 0.002, "loss": 2.3599, "step": 124600 }, { "epoch": 0.48170741135903267, "grad_norm": 0.09930353611707687, "learning_rate": 0.002, "loss": 2.3617, "step": 124610 }, { "epoch": 0.4817460685624159, "grad_norm": 0.09808854013681412, "learning_rate": 0.002, "loss": 2.3453, "step": 124620 }, { "epoch": 0.4817847257657992, "grad_norm": 0.1069636344909668, "learning_rate": 0.002, "loss": 2.3383, "step": 124630 }, { "epoch": 0.4818233829691825, "grad_norm": 0.10339025408029556, "learning_rate": 0.002, "loss": 2.3319, "step": 124640 }, { "epoch": 0.48186204017256573, "grad_norm": 0.1034790500998497, "learning_rate": 0.002, "loss": 2.3448, "step": 124650 }, { "epoch": 0.48190069737594904, "grad_norm": 0.10819303244352341, "learning_rate": 0.002, "loss": 2.3382, "step": 124660 }, { "epoch": 0.4819393545793323, "grad_norm": 0.10512588918209076, "learning_rate": 0.002, "loss": 2.3304, "step": 124670 }, { "epoch": 0.4819780117827156, "grad_norm": 0.16597643494606018, "learning_rate": 0.002, "loss": 2.3493, "step": 124680 }, { "epoch": 0.48201666898609885, "grad_norm": 0.11072135716676712, "learning_rate": 0.002, "loss": 2.339, "step": 124690 }, { "epoch": 0.48205532618948216, "grad_norm": 0.09571421891450882, "learning_rate": 0.002, "loss": 2.3498, "step": 124700 }, { "epoch": 0.4820939833928654, "grad_norm": 0.10639669746160507, "learning_rate": 0.002, "loss": 2.3429, "step": 124710 }, { "epoch": 0.4821326405962487, "grad_norm": 0.09967085719108582, "learning_rate": 0.002, "loss": 2.3424, "step": 124720 }, { "epoch": 0.48217129779963197, "grad_norm": 0.10771028697490692, "learning_rate": 0.002, "loss": 2.3509, "step": 124730 }, { "epoch": 0.4822099550030153, "grad_norm": 0.09660494327545166, "learning_rate": 0.002, "loss": 2.3508, "step": 124740 }, { "epoch": 0.48224861220639853, "grad_norm": 0.10943452268838882, "learning_rate": 0.002, "loss": 2.3399, "step": 124750 }, { "epoch": 0.48228726940978184, "grad_norm": 0.10899591445922852, "learning_rate": 0.002, "loss": 2.3459, "step": 124760 }, { "epoch": 0.4823259266131651, "grad_norm": 0.1283608227968216, "learning_rate": 0.002, "loss": 2.3606, "step": 124770 }, { "epoch": 0.4823645838165484, "grad_norm": 0.11032495647668839, "learning_rate": 0.002, "loss": 2.3509, "step": 124780 }, { "epoch": 0.48240324101993165, "grad_norm": 0.10227882117033005, "learning_rate": 0.002, "loss": 2.3443, "step": 124790 }, { "epoch": 0.48244189822331496, "grad_norm": 0.10366970300674438, "learning_rate": 0.002, "loss": 2.3584, "step": 124800 }, { "epoch": 0.4824805554266982, "grad_norm": 0.11385279148817062, "learning_rate": 0.002, "loss": 2.3523, "step": 124810 }, { "epoch": 0.48251921263008146, "grad_norm": 0.08986172080039978, "learning_rate": 0.002, "loss": 2.341, "step": 124820 }, { "epoch": 0.48255786983346477, "grad_norm": 0.10763401538133621, "learning_rate": 0.002, "loss": 2.3354, "step": 124830 }, { "epoch": 0.482596527036848, "grad_norm": 0.10179536789655685, "learning_rate": 0.002, "loss": 2.3294, "step": 124840 }, { "epoch": 0.48263518424023133, "grad_norm": 0.1055702343583107, "learning_rate": 0.002, "loss": 2.335, "step": 124850 }, { "epoch": 0.4826738414436146, "grad_norm": 0.140970841050148, "learning_rate": 0.002, "loss": 2.3599, "step": 124860 }, { "epoch": 0.4827124986469979, "grad_norm": 0.10648707300424576, "learning_rate": 0.002, "loss": 2.3525, "step": 124870 }, { "epoch": 0.48275115585038114, "grad_norm": 0.105586476624012, "learning_rate": 0.002, "loss": 2.3567, "step": 124880 }, { "epoch": 0.48278981305376445, "grad_norm": 0.10715337097644806, "learning_rate": 0.002, "loss": 2.346, "step": 124890 }, { "epoch": 0.4828284702571477, "grad_norm": 0.1060248464345932, "learning_rate": 0.002, "loss": 2.3565, "step": 124900 }, { "epoch": 0.482867127460531, "grad_norm": 0.09489964693784714, "learning_rate": 0.002, "loss": 2.3403, "step": 124910 }, { "epoch": 0.48290578466391426, "grad_norm": 0.10424128919839859, "learning_rate": 0.002, "loss": 2.3745, "step": 124920 }, { "epoch": 0.48294444186729757, "grad_norm": 0.10901130735874176, "learning_rate": 0.002, "loss": 2.3555, "step": 124930 }, { "epoch": 0.4829830990706808, "grad_norm": 0.11290793865919113, "learning_rate": 0.002, "loss": 2.3542, "step": 124940 }, { "epoch": 0.48302175627406413, "grad_norm": 0.10265372693538666, "learning_rate": 0.002, "loss": 2.3464, "step": 124950 }, { "epoch": 0.4830604134774474, "grad_norm": 0.10744495689868927, "learning_rate": 0.002, "loss": 2.3611, "step": 124960 }, { "epoch": 0.4830990706808307, "grad_norm": 0.0985356792807579, "learning_rate": 0.002, "loss": 2.3437, "step": 124970 }, { "epoch": 0.48313772788421394, "grad_norm": 0.10968119651079178, "learning_rate": 0.002, "loss": 2.354, "step": 124980 }, { "epoch": 0.48317638508759725, "grad_norm": 0.09952443093061447, "learning_rate": 0.002, "loss": 2.3448, "step": 124990 }, { "epoch": 0.4832150422909805, "grad_norm": 0.11583472788333893, "learning_rate": 0.002, "loss": 2.3333, "step": 125000 }, { "epoch": 0.48325369949436375, "grad_norm": 0.12367252260446548, "learning_rate": 0.002, "loss": 2.3343, "step": 125010 }, { "epoch": 0.48329235669774706, "grad_norm": 0.11487246304750443, "learning_rate": 0.002, "loss": 2.3522, "step": 125020 }, { "epoch": 0.4833310139011303, "grad_norm": 0.10422641038894653, "learning_rate": 0.002, "loss": 2.3481, "step": 125030 }, { "epoch": 0.4833696711045136, "grad_norm": 0.09209615737199783, "learning_rate": 0.002, "loss": 2.3406, "step": 125040 }, { "epoch": 0.4834083283078969, "grad_norm": 0.1276664286851883, "learning_rate": 0.002, "loss": 2.338, "step": 125050 }, { "epoch": 0.4834469855112802, "grad_norm": 0.10056187212467194, "learning_rate": 0.002, "loss": 2.348, "step": 125060 }, { "epoch": 0.48348564271466343, "grad_norm": 0.1188889592885971, "learning_rate": 0.002, "loss": 2.3399, "step": 125070 }, { "epoch": 0.48352429991804674, "grad_norm": 0.11096154153347015, "learning_rate": 0.002, "loss": 2.3417, "step": 125080 }, { "epoch": 0.48356295712143, "grad_norm": 0.119388647377491, "learning_rate": 0.002, "loss": 2.3345, "step": 125090 }, { "epoch": 0.4836016143248133, "grad_norm": 0.09515579789876938, "learning_rate": 0.002, "loss": 2.333, "step": 125100 }, { "epoch": 0.48364027152819655, "grad_norm": 0.09334330260753632, "learning_rate": 0.002, "loss": 2.3476, "step": 125110 }, { "epoch": 0.48367892873157986, "grad_norm": 0.0963970422744751, "learning_rate": 0.002, "loss": 2.3557, "step": 125120 }, { "epoch": 0.4837175859349631, "grad_norm": 0.10869818180799484, "learning_rate": 0.002, "loss": 2.345, "step": 125130 }, { "epoch": 0.4837562431383464, "grad_norm": 0.11080613732337952, "learning_rate": 0.002, "loss": 2.3375, "step": 125140 }, { "epoch": 0.4837949003417297, "grad_norm": 0.11019641160964966, "learning_rate": 0.002, "loss": 2.3242, "step": 125150 }, { "epoch": 0.483833557545113, "grad_norm": 0.11051522940397263, "learning_rate": 0.002, "loss": 2.3512, "step": 125160 }, { "epoch": 0.48387221474849623, "grad_norm": 0.12491607666015625, "learning_rate": 0.002, "loss": 2.3498, "step": 125170 }, { "epoch": 0.48391087195187954, "grad_norm": 0.11003629863262177, "learning_rate": 0.002, "loss": 2.3387, "step": 125180 }, { "epoch": 0.4839495291552628, "grad_norm": 0.10570420324802399, "learning_rate": 0.002, "loss": 2.3461, "step": 125190 }, { "epoch": 0.48398818635864604, "grad_norm": 0.10359431803226471, "learning_rate": 0.002, "loss": 2.357, "step": 125200 }, { "epoch": 0.48402684356202935, "grad_norm": 0.10841794312000275, "learning_rate": 0.002, "loss": 2.3397, "step": 125210 }, { "epoch": 0.4840655007654126, "grad_norm": 0.1006321832537651, "learning_rate": 0.002, "loss": 2.3404, "step": 125220 }, { "epoch": 0.4841041579687959, "grad_norm": 0.1357637345790863, "learning_rate": 0.002, "loss": 2.3489, "step": 125230 }, { "epoch": 0.48414281517217916, "grad_norm": 0.10982130467891693, "learning_rate": 0.002, "loss": 2.3387, "step": 125240 }, { "epoch": 0.48418147237556247, "grad_norm": 0.09725379198789597, "learning_rate": 0.002, "loss": 2.3589, "step": 125250 }, { "epoch": 0.4842201295789457, "grad_norm": 0.11814479529857635, "learning_rate": 0.002, "loss": 2.3421, "step": 125260 }, { "epoch": 0.48425878678232903, "grad_norm": 0.10812816023826599, "learning_rate": 0.002, "loss": 2.3611, "step": 125270 }, { "epoch": 0.4842974439857123, "grad_norm": 0.10966924577951431, "learning_rate": 0.002, "loss": 2.3474, "step": 125280 }, { "epoch": 0.4843361011890956, "grad_norm": 0.1525818407535553, "learning_rate": 0.002, "loss": 2.3569, "step": 125290 }, { "epoch": 0.48437475839247884, "grad_norm": 0.10309594124555588, "learning_rate": 0.002, "loss": 2.3481, "step": 125300 }, { "epoch": 0.48441341559586215, "grad_norm": 0.12443433701992035, "learning_rate": 0.002, "loss": 2.3391, "step": 125310 }, { "epoch": 0.4844520727992454, "grad_norm": 0.0991070494055748, "learning_rate": 0.002, "loss": 2.3312, "step": 125320 }, { "epoch": 0.4844907300026287, "grad_norm": 0.10828681290149689, "learning_rate": 0.002, "loss": 2.3496, "step": 125330 }, { "epoch": 0.48452938720601196, "grad_norm": 0.10650653392076492, "learning_rate": 0.002, "loss": 2.3526, "step": 125340 }, { "epoch": 0.48456804440939527, "grad_norm": 0.10827747732400894, "learning_rate": 0.002, "loss": 2.3504, "step": 125350 }, { "epoch": 0.4846067016127785, "grad_norm": 0.12869510054588318, "learning_rate": 0.002, "loss": 2.3496, "step": 125360 }, { "epoch": 0.48464535881616183, "grad_norm": 0.11589004844427109, "learning_rate": 0.002, "loss": 2.346, "step": 125370 }, { "epoch": 0.4846840160195451, "grad_norm": 0.29300642013549805, "learning_rate": 0.002, "loss": 2.3338, "step": 125380 }, { "epoch": 0.48472267322292834, "grad_norm": 0.0952758640050888, "learning_rate": 0.002, "loss": 2.3479, "step": 125390 }, { "epoch": 0.48476133042631164, "grad_norm": 0.10119640827178955, "learning_rate": 0.002, "loss": 2.3627, "step": 125400 }, { "epoch": 0.4847999876296949, "grad_norm": 0.10181231796741486, "learning_rate": 0.002, "loss": 2.3567, "step": 125410 }, { "epoch": 0.4848386448330782, "grad_norm": 0.10348128527402878, "learning_rate": 0.002, "loss": 2.3318, "step": 125420 }, { "epoch": 0.48487730203646145, "grad_norm": 0.09146574139595032, "learning_rate": 0.002, "loss": 2.3336, "step": 125430 }, { "epoch": 0.48491595923984476, "grad_norm": 0.10100752860307693, "learning_rate": 0.002, "loss": 2.3469, "step": 125440 }, { "epoch": 0.484954616443228, "grad_norm": 0.108201764523983, "learning_rate": 0.002, "loss": 2.3308, "step": 125450 }, { "epoch": 0.4849932736466113, "grad_norm": 0.10299346596002579, "learning_rate": 0.002, "loss": 2.356, "step": 125460 }, { "epoch": 0.4850319308499946, "grad_norm": 0.12397720664739609, "learning_rate": 0.002, "loss": 2.335, "step": 125470 }, { "epoch": 0.4850705880533779, "grad_norm": 0.1161360964179039, "learning_rate": 0.002, "loss": 2.334, "step": 125480 }, { "epoch": 0.48510924525676113, "grad_norm": 0.11088062077760696, "learning_rate": 0.002, "loss": 2.3374, "step": 125490 }, { "epoch": 0.48514790246014444, "grad_norm": 0.12293235212564468, "learning_rate": 0.002, "loss": 2.3517, "step": 125500 }, { "epoch": 0.4851865596635277, "grad_norm": 0.09698466211557388, "learning_rate": 0.002, "loss": 2.3573, "step": 125510 }, { "epoch": 0.485225216866911, "grad_norm": 0.10574761778116226, "learning_rate": 0.002, "loss": 2.3566, "step": 125520 }, { "epoch": 0.48526387407029425, "grad_norm": 0.10661862790584564, "learning_rate": 0.002, "loss": 2.3532, "step": 125530 }, { "epoch": 0.48530253127367756, "grad_norm": 0.1068946123123169, "learning_rate": 0.002, "loss": 2.3565, "step": 125540 }, { "epoch": 0.4853411884770608, "grad_norm": 0.10147390514612198, "learning_rate": 0.002, "loss": 2.338, "step": 125550 }, { "epoch": 0.48537984568044407, "grad_norm": 0.10097593814134598, "learning_rate": 0.002, "loss": 2.336, "step": 125560 }, { "epoch": 0.4854185028838274, "grad_norm": 0.11848179996013641, "learning_rate": 0.002, "loss": 2.3356, "step": 125570 }, { "epoch": 0.4854571600872106, "grad_norm": 0.10089132189750671, "learning_rate": 0.002, "loss": 2.3486, "step": 125580 }, { "epoch": 0.48549581729059393, "grad_norm": 0.12234753370285034, "learning_rate": 0.002, "loss": 2.3488, "step": 125590 }, { "epoch": 0.4855344744939772, "grad_norm": 0.10000978410243988, "learning_rate": 0.002, "loss": 2.3416, "step": 125600 }, { "epoch": 0.4855731316973605, "grad_norm": 0.11176779121160507, "learning_rate": 0.002, "loss": 2.3357, "step": 125610 }, { "epoch": 0.48561178890074375, "grad_norm": 0.11679171770811081, "learning_rate": 0.002, "loss": 2.3386, "step": 125620 }, { "epoch": 0.48565044610412705, "grad_norm": 0.1252773553133011, "learning_rate": 0.002, "loss": 2.3474, "step": 125630 }, { "epoch": 0.4856891033075103, "grad_norm": 0.11438603699207306, "learning_rate": 0.002, "loss": 2.3522, "step": 125640 }, { "epoch": 0.4857277605108936, "grad_norm": 0.11496452242136002, "learning_rate": 0.002, "loss": 2.3519, "step": 125650 }, { "epoch": 0.48576641771427687, "grad_norm": 0.11636804789304733, "learning_rate": 0.002, "loss": 2.3514, "step": 125660 }, { "epoch": 0.4858050749176602, "grad_norm": 0.11302290111780167, "learning_rate": 0.002, "loss": 2.3426, "step": 125670 }, { "epoch": 0.4858437321210434, "grad_norm": 0.1039479523897171, "learning_rate": 0.002, "loss": 2.3376, "step": 125680 }, { "epoch": 0.48588238932442673, "grad_norm": 0.09467640519142151, "learning_rate": 0.002, "loss": 2.3603, "step": 125690 }, { "epoch": 0.48592104652781, "grad_norm": 0.12681657075881958, "learning_rate": 0.002, "loss": 2.3471, "step": 125700 }, { "epoch": 0.4859597037311933, "grad_norm": 0.09812672436237335, "learning_rate": 0.002, "loss": 2.3507, "step": 125710 }, { "epoch": 0.48599836093457655, "grad_norm": 0.1105395182967186, "learning_rate": 0.002, "loss": 2.3451, "step": 125720 }, { "epoch": 0.48603701813795985, "grad_norm": 0.09311152249574661, "learning_rate": 0.002, "loss": 2.3587, "step": 125730 }, { "epoch": 0.4860756753413431, "grad_norm": 0.1042751669883728, "learning_rate": 0.002, "loss": 2.3508, "step": 125740 }, { "epoch": 0.48611433254472636, "grad_norm": 0.11312663555145264, "learning_rate": 0.002, "loss": 2.3596, "step": 125750 }, { "epoch": 0.48615298974810967, "grad_norm": 0.11612541973590851, "learning_rate": 0.002, "loss": 2.3612, "step": 125760 }, { "epoch": 0.4861916469514929, "grad_norm": 0.09691762179136276, "learning_rate": 0.002, "loss": 2.3416, "step": 125770 }, { "epoch": 0.4862303041548762, "grad_norm": 0.11213060468435287, "learning_rate": 0.002, "loss": 2.3374, "step": 125780 }, { "epoch": 0.4862689613582595, "grad_norm": 0.10857041925191879, "learning_rate": 0.002, "loss": 2.3529, "step": 125790 }, { "epoch": 0.4863076185616428, "grad_norm": 0.15382245182991028, "learning_rate": 0.002, "loss": 2.3514, "step": 125800 }, { "epoch": 0.48634627576502604, "grad_norm": 0.10181780904531479, "learning_rate": 0.002, "loss": 2.3462, "step": 125810 }, { "epoch": 0.48638493296840934, "grad_norm": 0.09610498696565628, "learning_rate": 0.002, "loss": 2.3497, "step": 125820 }, { "epoch": 0.4864235901717926, "grad_norm": 0.11725539714097977, "learning_rate": 0.002, "loss": 2.3567, "step": 125830 }, { "epoch": 0.4864622473751759, "grad_norm": 0.09810120612382889, "learning_rate": 0.002, "loss": 2.3567, "step": 125840 }, { "epoch": 0.48650090457855916, "grad_norm": 0.11101103574037552, "learning_rate": 0.002, "loss": 2.3555, "step": 125850 }, { "epoch": 0.48653956178194246, "grad_norm": 0.10683691501617432, "learning_rate": 0.002, "loss": 2.339, "step": 125860 }, { "epoch": 0.4865782189853257, "grad_norm": 0.10464399307966232, "learning_rate": 0.002, "loss": 2.3436, "step": 125870 }, { "epoch": 0.486616876188709, "grad_norm": 0.11023180186748505, "learning_rate": 0.002, "loss": 2.3483, "step": 125880 }, { "epoch": 0.4866555333920923, "grad_norm": 0.11341548711061478, "learning_rate": 0.002, "loss": 2.3427, "step": 125890 }, { "epoch": 0.4866941905954756, "grad_norm": 0.10827740281820297, "learning_rate": 0.002, "loss": 2.3251, "step": 125900 }, { "epoch": 0.48673284779885884, "grad_norm": 0.10786271095275879, "learning_rate": 0.002, "loss": 2.3536, "step": 125910 }, { "epoch": 0.48677150500224214, "grad_norm": 0.0976567268371582, "learning_rate": 0.002, "loss": 2.3365, "step": 125920 }, { "epoch": 0.4868101622056254, "grad_norm": 0.11942003667354584, "learning_rate": 0.002, "loss": 2.3467, "step": 125930 }, { "epoch": 0.48684881940900865, "grad_norm": 0.10531553626060486, "learning_rate": 0.002, "loss": 2.3317, "step": 125940 }, { "epoch": 0.48688747661239196, "grad_norm": 0.1033153161406517, "learning_rate": 0.002, "loss": 2.3432, "step": 125950 }, { "epoch": 0.4869261338157752, "grad_norm": 0.0929771140217781, "learning_rate": 0.002, "loss": 2.3618, "step": 125960 }, { "epoch": 0.4869647910191585, "grad_norm": 0.13447654247283936, "learning_rate": 0.002, "loss": 2.3391, "step": 125970 }, { "epoch": 0.48700344822254177, "grad_norm": 0.11186330020427704, "learning_rate": 0.002, "loss": 2.3398, "step": 125980 }, { "epoch": 0.4870421054259251, "grad_norm": 0.11655236035585403, "learning_rate": 0.002, "loss": 2.3514, "step": 125990 }, { "epoch": 0.48708076262930833, "grad_norm": 0.11176680773496628, "learning_rate": 0.002, "loss": 2.3532, "step": 126000 }, { "epoch": 0.48711941983269164, "grad_norm": 0.11391759663820267, "learning_rate": 0.002, "loss": 2.3466, "step": 126010 }, { "epoch": 0.4871580770360749, "grad_norm": 0.10402825474739075, "learning_rate": 0.002, "loss": 2.3561, "step": 126020 }, { "epoch": 0.4871967342394582, "grad_norm": 0.11938010901212692, "learning_rate": 0.002, "loss": 2.3569, "step": 126030 }, { "epoch": 0.48723539144284145, "grad_norm": 0.10057584196329117, "learning_rate": 0.002, "loss": 2.3582, "step": 126040 }, { "epoch": 0.48727404864622476, "grad_norm": 0.10689040273427963, "learning_rate": 0.002, "loss": 2.3566, "step": 126050 }, { "epoch": 0.487312705849608, "grad_norm": 0.09272339940071106, "learning_rate": 0.002, "loss": 2.3309, "step": 126060 }, { "epoch": 0.4873513630529913, "grad_norm": 0.11007925868034363, "learning_rate": 0.002, "loss": 2.3448, "step": 126070 }, { "epoch": 0.48739002025637457, "grad_norm": 0.09506697207689285, "learning_rate": 0.002, "loss": 2.3471, "step": 126080 }, { "epoch": 0.4874286774597579, "grad_norm": 0.12454500794410706, "learning_rate": 0.002, "loss": 2.3579, "step": 126090 }, { "epoch": 0.4874673346631411, "grad_norm": 0.10810870677232742, "learning_rate": 0.002, "loss": 2.3422, "step": 126100 }, { "epoch": 0.48750599186652444, "grad_norm": 0.10550742596387863, "learning_rate": 0.002, "loss": 2.3567, "step": 126110 }, { "epoch": 0.4875446490699077, "grad_norm": 0.10646315664052963, "learning_rate": 0.002, "loss": 2.3447, "step": 126120 }, { "epoch": 0.48758330627329094, "grad_norm": 0.10827473551034927, "learning_rate": 0.002, "loss": 2.3435, "step": 126130 }, { "epoch": 0.48762196347667425, "grad_norm": 0.11294981837272644, "learning_rate": 0.002, "loss": 2.357, "step": 126140 }, { "epoch": 0.4876606206800575, "grad_norm": 0.10049781948328018, "learning_rate": 0.002, "loss": 2.3499, "step": 126150 }, { "epoch": 0.4876992778834408, "grad_norm": 0.12226846069097519, "learning_rate": 0.002, "loss": 2.3389, "step": 126160 }, { "epoch": 0.48773793508682406, "grad_norm": 0.12626990675926208, "learning_rate": 0.002, "loss": 2.3466, "step": 126170 }, { "epoch": 0.48777659229020737, "grad_norm": 0.1084417849779129, "learning_rate": 0.002, "loss": 2.334, "step": 126180 }, { "epoch": 0.4878152494935906, "grad_norm": 0.11008724570274353, "learning_rate": 0.002, "loss": 2.3594, "step": 126190 }, { "epoch": 0.4878539066969739, "grad_norm": 0.10245554149150848, "learning_rate": 0.002, "loss": 2.3416, "step": 126200 }, { "epoch": 0.4878925639003572, "grad_norm": 0.11041338741779327, "learning_rate": 0.002, "loss": 2.338, "step": 126210 }, { "epoch": 0.4879312211037405, "grad_norm": 0.1254657804965973, "learning_rate": 0.002, "loss": 2.3495, "step": 126220 }, { "epoch": 0.48796987830712374, "grad_norm": 0.10623673349618912, "learning_rate": 0.002, "loss": 2.3488, "step": 126230 }, { "epoch": 0.48800853551050705, "grad_norm": 0.12370909750461578, "learning_rate": 0.002, "loss": 2.348, "step": 126240 }, { "epoch": 0.4880471927138903, "grad_norm": 0.11274704337120056, "learning_rate": 0.002, "loss": 2.3387, "step": 126250 }, { "epoch": 0.4880858499172736, "grad_norm": 0.11869961023330688, "learning_rate": 0.002, "loss": 2.3469, "step": 126260 }, { "epoch": 0.48812450712065686, "grad_norm": 0.1175747886300087, "learning_rate": 0.002, "loss": 2.3399, "step": 126270 }, { "epoch": 0.48816316432404017, "grad_norm": 0.27420172095298767, "learning_rate": 0.002, "loss": 2.3672, "step": 126280 }, { "epoch": 0.4882018215274234, "grad_norm": 0.11592134833335876, "learning_rate": 0.002, "loss": 2.3461, "step": 126290 }, { "epoch": 0.4882404787308067, "grad_norm": 0.1103459820151329, "learning_rate": 0.002, "loss": 2.3453, "step": 126300 }, { "epoch": 0.48827913593419, "grad_norm": 0.09347846359014511, "learning_rate": 0.002, "loss": 2.3295, "step": 126310 }, { "epoch": 0.48831779313757323, "grad_norm": 0.1197313666343689, "learning_rate": 0.002, "loss": 2.3386, "step": 126320 }, { "epoch": 0.48835645034095654, "grad_norm": 0.11836214363574982, "learning_rate": 0.002, "loss": 2.3537, "step": 126330 }, { "epoch": 0.4883951075443398, "grad_norm": 0.10949929803609848, "learning_rate": 0.002, "loss": 2.3324, "step": 126340 }, { "epoch": 0.4884337647477231, "grad_norm": 0.10934491455554962, "learning_rate": 0.002, "loss": 2.3502, "step": 126350 }, { "epoch": 0.48847242195110635, "grad_norm": 0.09713473916053772, "learning_rate": 0.002, "loss": 2.359, "step": 126360 }, { "epoch": 0.48851107915448966, "grad_norm": 0.1307293176651001, "learning_rate": 0.002, "loss": 2.3408, "step": 126370 }, { "epoch": 0.4885497363578729, "grad_norm": 0.10963564366102219, "learning_rate": 0.002, "loss": 2.346, "step": 126380 }, { "epoch": 0.4885883935612562, "grad_norm": 0.10998211055994034, "learning_rate": 0.002, "loss": 2.3465, "step": 126390 }, { "epoch": 0.48862705076463947, "grad_norm": 0.10524381697177887, "learning_rate": 0.002, "loss": 2.3452, "step": 126400 }, { "epoch": 0.4886657079680228, "grad_norm": 0.11870896816253662, "learning_rate": 0.002, "loss": 2.3286, "step": 126410 }, { "epoch": 0.48870436517140603, "grad_norm": 0.11174721270799637, "learning_rate": 0.002, "loss": 2.3412, "step": 126420 }, { "epoch": 0.48874302237478934, "grad_norm": 0.14074304699897766, "learning_rate": 0.002, "loss": 2.3352, "step": 126430 }, { "epoch": 0.4887816795781726, "grad_norm": 0.10730758309364319, "learning_rate": 0.002, "loss": 2.3372, "step": 126440 }, { "epoch": 0.4888203367815559, "grad_norm": 0.10556095838546753, "learning_rate": 0.002, "loss": 2.3419, "step": 126450 }, { "epoch": 0.48885899398493915, "grad_norm": 0.10455413907766342, "learning_rate": 0.002, "loss": 2.3448, "step": 126460 }, { "epoch": 0.48889765118832246, "grad_norm": 0.2152157425880432, "learning_rate": 0.002, "loss": 2.3407, "step": 126470 }, { "epoch": 0.4889363083917057, "grad_norm": 0.10853318870067596, "learning_rate": 0.002, "loss": 2.3448, "step": 126480 }, { "epoch": 0.48897496559508896, "grad_norm": 0.11397414654493332, "learning_rate": 0.002, "loss": 2.3441, "step": 126490 }, { "epoch": 0.48901362279847227, "grad_norm": 0.09437750279903412, "learning_rate": 0.002, "loss": 2.3538, "step": 126500 }, { "epoch": 0.4890522800018555, "grad_norm": 0.10448405146598816, "learning_rate": 0.002, "loss": 2.3559, "step": 126510 }, { "epoch": 0.48909093720523883, "grad_norm": 0.1160251796245575, "learning_rate": 0.002, "loss": 2.3322, "step": 126520 }, { "epoch": 0.4891295944086221, "grad_norm": 0.09574171900749207, "learning_rate": 0.002, "loss": 2.338, "step": 126530 }, { "epoch": 0.4891682516120054, "grad_norm": 0.12774688005447388, "learning_rate": 0.002, "loss": 2.3459, "step": 126540 }, { "epoch": 0.48920690881538864, "grad_norm": 0.106713205575943, "learning_rate": 0.002, "loss": 2.34, "step": 126550 }, { "epoch": 0.48924556601877195, "grad_norm": 0.11310985684394836, "learning_rate": 0.002, "loss": 2.3547, "step": 126560 }, { "epoch": 0.4892842232221552, "grad_norm": 0.10112718492746353, "learning_rate": 0.002, "loss": 2.3447, "step": 126570 }, { "epoch": 0.4893228804255385, "grad_norm": 0.11339341849088669, "learning_rate": 0.002, "loss": 2.3455, "step": 126580 }, { "epoch": 0.48936153762892176, "grad_norm": 0.10261591523885727, "learning_rate": 0.002, "loss": 2.3627, "step": 126590 }, { "epoch": 0.48940019483230507, "grad_norm": 0.10041617602109909, "learning_rate": 0.002, "loss": 2.3497, "step": 126600 }, { "epoch": 0.4894388520356883, "grad_norm": 0.10402018576860428, "learning_rate": 0.002, "loss": 2.3498, "step": 126610 }, { "epoch": 0.48947750923907163, "grad_norm": 0.09408276528120041, "learning_rate": 0.002, "loss": 2.3469, "step": 126620 }, { "epoch": 0.4895161664424549, "grad_norm": 0.10646459460258484, "learning_rate": 0.002, "loss": 2.3395, "step": 126630 }, { "epoch": 0.4895548236458382, "grad_norm": 0.12016189843416214, "learning_rate": 0.002, "loss": 2.3293, "step": 126640 }, { "epoch": 0.48959348084922144, "grad_norm": 0.0968141183257103, "learning_rate": 0.002, "loss": 2.3467, "step": 126650 }, { "epoch": 0.48963213805260475, "grad_norm": 0.10623986274003983, "learning_rate": 0.002, "loss": 2.3387, "step": 126660 }, { "epoch": 0.489670795255988, "grad_norm": 0.10878663510084152, "learning_rate": 0.002, "loss": 2.3476, "step": 126670 }, { "epoch": 0.48970945245937125, "grad_norm": 0.10133571922779083, "learning_rate": 0.002, "loss": 2.3272, "step": 126680 }, { "epoch": 0.48974810966275456, "grad_norm": 0.1187170147895813, "learning_rate": 0.002, "loss": 2.3452, "step": 126690 }, { "epoch": 0.4897867668661378, "grad_norm": 0.09845730662345886, "learning_rate": 0.002, "loss": 2.3534, "step": 126700 }, { "epoch": 0.4898254240695211, "grad_norm": 0.12468785792589188, "learning_rate": 0.002, "loss": 2.3601, "step": 126710 }, { "epoch": 0.4898640812729044, "grad_norm": 0.09631633758544922, "learning_rate": 0.002, "loss": 2.3453, "step": 126720 }, { "epoch": 0.4899027384762877, "grad_norm": 0.10853462666273117, "learning_rate": 0.002, "loss": 2.3336, "step": 126730 }, { "epoch": 0.48994139567967093, "grad_norm": 0.10383742302656174, "learning_rate": 0.002, "loss": 2.3564, "step": 126740 }, { "epoch": 0.48998005288305424, "grad_norm": 0.1175794005393982, "learning_rate": 0.002, "loss": 2.3251, "step": 126750 }, { "epoch": 0.4900187100864375, "grad_norm": 0.10090035945177078, "learning_rate": 0.002, "loss": 2.3436, "step": 126760 }, { "epoch": 0.4900573672898208, "grad_norm": 0.09507987648248672, "learning_rate": 0.002, "loss": 2.3451, "step": 126770 }, { "epoch": 0.49009602449320405, "grad_norm": 0.12469930946826935, "learning_rate": 0.002, "loss": 2.3314, "step": 126780 }, { "epoch": 0.49013468169658736, "grad_norm": 0.10618849843740463, "learning_rate": 0.002, "loss": 2.3398, "step": 126790 }, { "epoch": 0.4901733388999706, "grad_norm": 0.11613799631595612, "learning_rate": 0.002, "loss": 2.3665, "step": 126800 }, { "epoch": 0.4902119961033539, "grad_norm": 0.11558814346790314, "learning_rate": 0.002, "loss": 2.3455, "step": 126810 }, { "epoch": 0.49025065330673717, "grad_norm": 0.11019840091466904, "learning_rate": 0.002, "loss": 2.3232, "step": 126820 }, { "epoch": 0.4902893105101205, "grad_norm": 0.11108105629682541, "learning_rate": 0.002, "loss": 2.3464, "step": 126830 }, { "epoch": 0.49032796771350373, "grad_norm": 0.11231342703104019, "learning_rate": 0.002, "loss": 2.3448, "step": 126840 }, { "epoch": 0.49036662491688704, "grad_norm": 0.10218144208192825, "learning_rate": 0.002, "loss": 2.3412, "step": 126850 }, { "epoch": 0.4904052821202703, "grad_norm": 0.11010507494211197, "learning_rate": 0.002, "loss": 2.3713, "step": 126860 }, { "epoch": 0.49044393932365354, "grad_norm": 0.11340287327766418, "learning_rate": 0.002, "loss": 2.3643, "step": 126870 }, { "epoch": 0.49048259652703685, "grad_norm": 0.09621971100568771, "learning_rate": 0.002, "loss": 2.3508, "step": 126880 }, { "epoch": 0.4905212537304201, "grad_norm": 0.12170908600091934, "learning_rate": 0.002, "loss": 2.3577, "step": 126890 }, { "epoch": 0.4905599109338034, "grad_norm": 0.10535162687301636, "learning_rate": 0.002, "loss": 2.3443, "step": 126900 }, { "epoch": 0.49059856813718666, "grad_norm": 0.1528225988149643, "learning_rate": 0.002, "loss": 2.3412, "step": 126910 }, { "epoch": 0.49063722534056997, "grad_norm": 0.12900884449481964, "learning_rate": 0.002, "loss": 2.3476, "step": 126920 }, { "epoch": 0.4906758825439532, "grad_norm": 0.1097668707370758, "learning_rate": 0.002, "loss": 2.3626, "step": 126930 }, { "epoch": 0.49071453974733653, "grad_norm": 0.10988660156726837, "learning_rate": 0.002, "loss": 2.3426, "step": 126940 }, { "epoch": 0.4907531969507198, "grad_norm": 0.09913916140794754, "learning_rate": 0.002, "loss": 2.3648, "step": 126950 }, { "epoch": 0.4907918541541031, "grad_norm": 0.09544407576322556, "learning_rate": 0.002, "loss": 2.3431, "step": 126960 }, { "epoch": 0.49083051135748634, "grad_norm": 0.11369612067937851, "learning_rate": 0.002, "loss": 2.3533, "step": 126970 }, { "epoch": 0.49086916856086965, "grad_norm": 0.10173063725233078, "learning_rate": 0.002, "loss": 2.3512, "step": 126980 }, { "epoch": 0.4909078257642529, "grad_norm": 0.10611432045698166, "learning_rate": 0.002, "loss": 2.34, "step": 126990 }, { "epoch": 0.4909464829676362, "grad_norm": 0.10398635268211365, "learning_rate": 0.002, "loss": 2.3604, "step": 127000 }, { "epoch": 0.49098514017101946, "grad_norm": 0.11209771037101746, "learning_rate": 0.002, "loss": 2.3575, "step": 127010 }, { "epoch": 0.49102379737440277, "grad_norm": 0.10647541284561157, "learning_rate": 0.002, "loss": 2.3456, "step": 127020 }, { "epoch": 0.491062454577786, "grad_norm": 0.11205994337797165, "learning_rate": 0.002, "loss": 2.3477, "step": 127030 }, { "epoch": 0.49110111178116933, "grad_norm": 0.10731545090675354, "learning_rate": 0.002, "loss": 2.3357, "step": 127040 }, { "epoch": 0.4911397689845526, "grad_norm": 0.13301315903663635, "learning_rate": 0.002, "loss": 2.3374, "step": 127050 }, { "epoch": 0.49117842618793583, "grad_norm": 0.1114644855260849, "learning_rate": 0.002, "loss": 2.3473, "step": 127060 }, { "epoch": 0.49121708339131914, "grad_norm": 0.09928295761346817, "learning_rate": 0.002, "loss": 2.3424, "step": 127070 }, { "epoch": 0.4912557405947024, "grad_norm": 0.14364327490329742, "learning_rate": 0.002, "loss": 2.3439, "step": 127080 }, { "epoch": 0.4912943977980857, "grad_norm": 0.11864378303289413, "learning_rate": 0.002, "loss": 2.3495, "step": 127090 }, { "epoch": 0.49133305500146895, "grad_norm": 0.09710802137851715, "learning_rate": 0.002, "loss": 2.3257, "step": 127100 }, { "epoch": 0.49137171220485226, "grad_norm": 0.10308346152305603, "learning_rate": 0.002, "loss": 2.3465, "step": 127110 }, { "epoch": 0.4914103694082355, "grad_norm": 0.10085774958133698, "learning_rate": 0.002, "loss": 2.3487, "step": 127120 }, { "epoch": 0.4914490266116188, "grad_norm": 0.11910171061754227, "learning_rate": 0.002, "loss": 2.3446, "step": 127130 }, { "epoch": 0.4914876838150021, "grad_norm": 0.11054100096225739, "learning_rate": 0.002, "loss": 2.3389, "step": 127140 }, { "epoch": 0.4915263410183854, "grad_norm": 0.1082221195101738, "learning_rate": 0.002, "loss": 2.3487, "step": 127150 }, { "epoch": 0.49156499822176863, "grad_norm": 0.11659803986549377, "learning_rate": 0.002, "loss": 2.3616, "step": 127160 }, { "epoch": 0.49160365542515194, "grad_norm": 0.11233268678188324, "learning_rate": 0.002, "loss": 2.351, "step": 127170 }, { "epoch": 0.4916423126285352, "grad_norm": 0.10978901386260986, "learning_rate": 0.002, "loss": 2.3406, "step": 127180 }, { "epoch": 0.4916809698319185, "grad_norm": 0.1231280267238617, "learning_rate": 0.002, "loss": 2.3315, "step": 127190 }, { "epoch": 0.49171962703530175, "grad_norm": 0.11095661669969559, "learning_rate": 0.002, "loss": 2.344, "step": 127200 }, { "epoch": 0.49175828423868506, "grad_norm": 0.11212368309497833, "learning_rate": 0.002, "loss": 2.3558, "step": 127210 }, { "epoch": 0.4917969414420683, "grad_norm": 0.12635929882526398, "learning_rate": 0.002, "loss": 2.3519, "step": 127220 }, { "epoch": 0.49183559864545157, "grad_norm": 0.11959027498960495, "learning_rate": 0.002, "loss": 2.3456, "step": 127230 }, { "epoch": 0.4918742558488349, "grad_norm": 0.11061688512563705, "learning_rate": 0.002, "loss": 2.3547, "step": 127240 }, { "epoch": 0.4919129130522181, "grad_norm": 0.09959302842617035, "learning_rate": 0.002, "loss": 2.3521, "step": 127250 }, { "epoch": 0.49195157025560143, "grad_norm": 0.10022852569818497, "learning_rate": 0.002, "loss": 2.3401, "step": 127260 }, { "epoch": 0.4919902274589847, "grad_norm": 0.13130536675453186, "learning_rate": 0.002, "loss": 2.3427, "step": 127270 }, { "epoch": 0.492028884662368, "grad_norm": 0.31289398670196533, "learning_rate": 0.002, "loss": 2.3478, "step": 127280 }, { "epoch": 0.49206754186575125, "grad_norm": 0.09189872443675995, "learning_rate": 0.002, "loss": 2.354, "step": 127290 }, { "epoch": 0.49210619906913455, "grad_norm": 0.11592382937669754, "learning_rate": 0.002, "loss": 2.3511, "step": 127300 }, { "epoch": 0.4921448562725178, "grad_norm": 0.10866158455610275, "learning_rate": 0.002, "loss": 2.3514, "step": 127310 }, { "epoch": 0.4921835134759011, "grad_norm": 0.11046077311038971, "learning_rate": 0.002, "loss": 2.3446, "step": 127320 }, { "epoch": 0.49222217067928437, "grad_norm": 0.11631123721599579, "learning_rate": 0.002, "loss": 2.3453, "step": 127330 }, { "epoch": 0.4922608278826677, "grad_norm": 0.1088087409734726, "learning_rate": 0.002, "loss": 2.3282, "step": 127340 }, { "epoch": 0.4922994850860509, "grad_norm": 0.11409708112478256, "learning_rate": 0.002, "loss": 2.3637, "step": 127350 }, { "epoch": 0.49233814228943423, "grad_norm": 0.10936682671308517, "learning_rate": 0.002, "loss": 2.3619, "step": 127360 }, { "epoch": 0.4923767994928175, "grad_norm": 0.13295626640319824, "learning_rate": 0.002, "loss": 2.347, "step": 127370 }, { "epoch": 0.4924154566962008, "grad_norm": 0.1150369718670845, "learning_rate": 0.002, "loss": 2.3408, "step": 127380 }, { "epoch": 0.49245411389958405, "grad_norm": 0.11830049008131027, "learning_rate": 0.002, "loss": 2.3499, "step": 127390 }, { "epoch": 0.49249277110296735, "grad_norm": 0.10365139693021774, "learning_rate": 0.002, "loss": 2.331, "step": 127400 }, { "epoch": 0.4925314283063506, "grad_norm": 0.10849923640489578, "learning_rate": 0.002, "loss": 2.342, "step": 127410 }, { "epoch": 0.49257008550973386, "grad_norm": 0.11630803346633911, "learning_rate": 0.002, "loss": 2.3479, "step": 127420 }, { "epoch": 0.49260874271311716, "grad_norm": 0.11408057808876038, "learning_rate": 0.002, "loss": 2.3516, "step": 127430 }, { "epoch": 0.4926473999165004, "grad_norm": 0.10046610981225967, "learning_rate": 0.002, "loss": 2.3642, "step": 127440 }, { "epoch": 0.4926860571198837, "grad_norm": 0.1012267991900444, "learning_rate": 0.002, "loss": 2.3305, "step": 127450 }, { "epoch": 0.492724714323267, "grad_norm": 0.11138573288917542, "learning_rate": 0.002, "loss": 2.3532, "step": 127460 }, { "epoch": 0.4927633715266503, "grad_norm": 0.10981214046478271, "learning_rate": 0.002, "loss": 2.3457, "step": 127470 }, { "epoch": 0.49280202873003354, "grad_norm": 0.11571098864078522, "learning_rate": 0.002, "loss": 2.3464, "step": 127480 }, { "epoch": 0.49284068593341684, "grad_norm": 0.11805303394794464, "learning_rate": 0.002, "loss": 2.3544, "step": 127490 }, { "epoch": 0.4928793431368001, "grad_norm": 0.09402794390916824, "learning_rate": 0.002, "loss": 2.3508, "step": 127500 }, { "epoch": 0.4929180003401834, "grad_norm": 0.1224559023976326, "learning_rate": 0.002, "loss": 2.3476, "step": 127510 }, { "epoch": 0.49295665754356666, "grad_norm": 0.09864436089992523, "learning_rate": 0.002, "loss": 2.3435, "step": 127520 }, { "epoch": 0.49299531474694996, "grad_norm": 0.10893671214580536, "learning_rate": 0.002, "loss": 2.346, "step": 127530 }, { "epoch": 0.4930339719503332, "grad_norm": 0.11042977124452591, "learning_rate": 0.002, "loss": 2.3544, "step": 127540 }, { "epoch": 0.4930726291537165, "grad_norm": 0.1300388127565384, "learning_rate": 0.002, "loss": 2.3319, "step": 127550 }, { "epoch": 0.4931112863570998, "grad_norm": 0.10813228785991669, "learning_rate": 0.002, "loss": 2.3638, "step": 127560 }, { "epoch": 0.4931499435604831, "grad_norm": 0.10544244199991226, "learning_rate": 0.002, "loss": 2.3573, "step": 127570 }, { "epoch": 0.49318860076386634, "grad_norm": 0.09903642535209656, "learning_rate": 0.002, "loss": 2.3548, "step": 127580 }, { "epoch": 0.49322725796724964, "grad_norm": 0.11719724535942078, "learning_rate": 0.002, "loss": 2.3382, "step": 127590 }, { "epoch": 0.4932659151706329, "grad_norm": 0.11094460636377335, "learning_rate": 0.002, "loss": 2.3597, "step": 127600 }, { "epoch": 0.49330457237401615, "grad_norm": 0.12684324383735657, "learning_rate": 0.002, "loss": 2.3495, "step": 127610 }, { "epoch": 0.49334322957739946, "grad_norm": 0.11697521805763245, "learning_rate": 0.002, "loss": 2.3403, "step": 127620 }, { "epoch": 0.4933818867807827, "grad_norm": 0.11327816545963287, "learning_rate": 0.002, "loss": 2.3526, "step": 127630 }, { "epoch": 0.493420543984166, "grad_norm": 0.09532005339860916, "learning_rate": 0.002, "loss": 2.3481, "step": 127640 }, { "epoch": 0.49345920118754927, "grad_norm": 0.1013709083199501, "learning_rate": 0.002, "loss": 2.3625, "step": 127650 }, { "epoch": 0.4934978583909326, "grad_norm": 0.09491346776485443, "learning_rate": 0.002, "loss": 2.344, "step": 127660 }, { "epoch": 0.4935365155943158, "grad_norm": 0.11858315020799637, "learning_rate": 0.002, "loss": 2.3539, "step": 127670 }, { "epoch": 0.49357517279769914, "grad_norm": 0.10509752482175827, "learning_rate": 0.002, "loss": 2.3472, "step": 127680 }, { "epoch": 0.4936138300010824, "grad_norm": 0.11521925032138824, "learning_rate": 0.002, "loss": 2.3537, "step": 127690 }, { "epoch": 0.4936524872044657, "grad_norm": 0.11256805062294006, "learning_rate": 0.002, "loss": 2.351, "step": 127700 }, { "epoch": 0.49369114440784895, "grad_norm": 0.11606287211179733, "learning_rate": 0.002, "loss": 2.3513, "step": 127710 }, { "epoch": 0.49372980161123226, "grad_norm": 0.10571981966495514, "learning_rate": 0.002, "loss": 2.3427, "step": 127720 }, { "epoch": 0.4937684588146155, "grad_norm": 0.12093351036310196, "learning_rate": 0.002, "loss": 2.3496, "step": 127730 }, { "epoch": 0.4938071160179988, "grad_norm": 0.1062152162194252, "learning_rate": 0.002, "loss": 2.3265, "step": 127740 }, { "epoch": 0.49384577322138207, "grad_norm": 0.10367966443300247, "learning_rate": 0.002, "loss": 2.3487, "step": 127750 }, { "epoch": 0.4938844304247654, "grad_norm": 0.127140611410141, "learning_rate": 0.002, "loss": 2.3385, "step": 127760 }, { "epoch": 0.4939230876281486, "grad_norm": 0.10023011267185211, "learning_rate": 0.002, "loss": 2.3546, "step": 127770 }, { "epoch": 0.49396174483153193, "grad_norm": 0.10536835342645645, "learning_rate": 0.002, "loss": 2.3428, "step": 127780 }, { "epoch": 0.4940004020349152, "grad_norm": 0.10575197637081146, "learning_rate": 0.002, "loss": 2.3402, "step": 127790 }, { "epoch": 0.49403905923829844, "grad_norm": 0.10220493376255035, "learning_rate": 0.002, "loss": 2.3371, "step": 127800 }, { "epoch": 0.49407771644168175, "grad_norm": 0.10080619156360626, "learning_rate": 0.002, "loss": 2.3647, "step": 127810 }, { "epoch": 0.494116373645065, "grad_norm": 0.10619170218706131, "learning_rate": 0.002, "loss": 2.3444, "step": 127820 }, { "epoch": 0.4941550308484483, "grad_norm": 0.13515208661556244, "learning_rate": 0.002, "loss": 2.3549, "step": 127830 }, { "epoch": 0.49419368805183156, "grad_norm": 0.11757437884807587, "learning_rate": 0.002, "loss": 2.3567, "step": 127840 }, { "epoch": 0.49423234525521487, "grad_norm": 0.10760632157325745, "learning_rate": 0.002, "loss": 2.3383, "step": 127850 }, { "epoch": 0.4942710024585981, "grad_norm": 0.10090624541044235, "learning_rate": 0.002, "loss": 2.3306, "step": 127860 }, { "epoch": 0.4943096596619814, "grad_norm": 0.11018454283475876, "learning_rate": 0.002, "loss": 2.3349, "step": 127870 }, { "epoch": 0.4943483168653647, "grad_norm": 0.1165466159582138, "learning_rate": 0.002, "loss": 2.3524, "step": 127880 }, { "epoch": 0.494386974068748, "grad_norm": 0.1118311733007431, "learning_rate": 0.002, "loss": 2.3531, "step": 127890 }, { "epoch": 0.49442563127213124, "grad_norm": 0.10799223929643631, "learning_rate": 0.002, "loss": 2.3405, "step": 127900 }, { "epoch": 0.49446428847551455, "grad_norm": 0.10268426686525345, "learning_rate": 0.002, "loss": 2.3346, "step": 127910 }, { "epoch": 0.4945029456788978, "grad_norm": 0.11427894234657288, "learning_rate": 0.002, "loss": 2.3423, "step": 127920 }, { "epoch": 0.4945416028822811, "grad_norm": 0.10355595499277115, "learning_rate": 0.002, "loss": 2.3461, "step": 127930 }, { "epoch": 0.49458026008566436, "grad_norm": 0.11134828627109528, "learning_rate": 0.002, "loss": 2.3521, "step": 127940 }, { "epoch": 0.49461891728904767, "grad_norm": 0.10238852351903915, "learning_rate": 0.002, "loss": 2.3465, "step": 127950 }, { "epoch": 0.4946575744924309, "grad_norm": 0.11048177629709244, "learning_rate": 0.002, "loss": 2.3455, "step": 127960 }, { "epoch": 0.49469623169581417, "grad_norm": 0.11856822669506073, "learning_rate": 0.002, "loss": 2.369, "step": 127970 }, { "epoch": 0.4947348888991975, "grad_norm": 0.10213296115398407, "learning_rate": 0.002, "loss": 2.3558, "step": 127980 }, { "epoch": 0.49477354610258073, "grad_norm": 0.11366453021764755, "learning_rate": 0.002, "loss": 2.348, "step": 127990 }, { "epoch": 0.49481220330596404, "grad_norm": 0.10160555690526962, "learning_rate": 0.002, "loss": 2.3448, "step": 128000 }, { "epoch": 0.4948508605093473, "grad_norm": 0.10848793387413025, "learning_rate": 0.002, "loss": 2.3468, "step": 128010 }, { "epoch": 0.4948895177127306, "grad_norm": 0.09237051010131836, "learning_rate": 0.002, "loss": 2.363, "step": 128020 }, { "epoch": 0.49492817491611385, "grad_norm": 0.11140402406454086, "learning_rate": 0.002, "loss": 2.3391, "step": 128030 }, { "epoch": 0.49496683211949716, "grad_norm": 0.109286829829216, "learning_rate": 0.002, "loss": 2.3368, "step": 128040 }, { "epoch": 0.4950054893228804, "grad_norm": 0.1083264946937561, "learning_rate": 0.002, "loss": 2.3517, "step": 128050 }, { "epoch": 0.4950441465262637, "grad_norm": 0.1095544695854187, "learning_rate": 0.002, "loss": 2.3609, "step": 128060 }, { "epoch": 0.49508280372964697, "grad_norm": 0.12022789567708969, "learning_rate": 0.002, "loss": 2.3534, "step": 128070 }, { "epoch": 0.4951214609330303, "grad_norm": 0.10353036969900131, "learning_rate": 0.002, "loss": 2.34, "step": 128080 }, { "epoch": 0.49516011813641353, "grad_norm": 0.1073271781206131, "learning_rate": 0.002, "loss": 2.3561, "step": 128090 }, { "epoch": 0.49519877533979684, "grad_norm": 0.09842212498188019, "learning_rate": 0.002, "loss": 2.3319, "step": 128100 }, { "epoch": 0.4952374325431801, "grad_norm": 0.09421800076961517, "learning_rate": 0.002, "loss": 2.3318, "step": 128110 }, { "epoch": 0.4952760897465634, "grad_norm": 0.10729176551103592, "learning_rate": 0.002, "loss": 2.3343, "step": 128120 }, { "epoch": 0.49531474694994665, "grad_norm": 0.09626664221286774, "learning_rate": 0.002, "loss": 2.3348, "step": 128130 }, { "epoch": 0.49535340415332996, "grad_norm": 0.09824251383543015, "learning_rate": 0.002, "loss": 2.3314, "step": 128140 }, { "epoch": 0.4953920613567132, "grad_norm": 0.1067798063158989, "learning_rate": 0.002, "loss": 2.3388, "step": 128150 }, { "epoch": 0.49543071856009646, "grad_norm": 0.09855811297893524, "learning_rate": 0.002, "loss": 2.3609, "step": 128160 }, { "epoch": 0.49546937576347977, "grad_norm": 0.11580448597669601, "learning_rate": 0.002, "loss": 2.3351, "step": 128170 }, { "epoch": 0.495508032966863, "grad_norm": 0.10759995877742767, "learning_rate": 0.002, "loss": 2.3494, "step": 128180 }, { "epoch": 0.49554669017024633, "grad_norm": 0.09637139737606049, "learning_rate": 0.002, "loss": 2.3365, "step": 128190 }, { "epoch": 0.4955853473736296, "grad_norm": 0.0985492467880249, "learning_rate": 0.002, "loss": 2.3575, "step": 128200 }, { "epoch": 0.4956240045770129, "grad_norm": 0.10081609338521957, "learning_rate": 0.002, "loss": 2.3498, "step": 128210 }, { "epoch": 0.49566266178039614, "grad_norm": 0.10274836421012878, "learning_rate": 0.002, "loss": 2.3351, "step": 128220 }, { "epoch": 0.49570131898377945, "grad_norm": 0.10830628126859665, "learning_rate": 0.002, "loss": 2.347, "step": 128230 }, { "epoch": 0.4957399761871627, "grad_norm": 0.11750640720129013, "learning_rate": 0.002, "loss": 2.3452, "step": 128240 }, { "epoch": 0.495778633390546, "grad_norm": 0.10376337170600891, "learning_rate": 0.002, "loss": 2.3559, "step": 128250 }, { "epoch": 0.49581729059392926, "grad_norm": 0.13361400365829468, "learning_rate": 0.002, "loss": 2.3444, "step": 128260 }, { "epoch": 0.49585594779731257, "grad_norm": 0.11120904237031937, "learning_rate": 0.002, "loss": 2.336, "step": 128270 }, { "epoch": 0.4958946050006958, "grad_norm": 0.10228724032640457, "learning_rate": 0.002, "loss": 2.3362, "step": 128280 }, { "epoch": 0.49593326220407913, "grad_norm": 0.11770356446504593, "learning_rate": 0.002, "loss": 2.332, "step": 128290 }, { "epoch": 0.4959719194074624, "grad_norm": 0.10043749958276749, "learning_rate": 0.002, "loss": 2.3359, "step": 128300 }, { "epoch": 0.4960105766108457, "grad_norm": 0.10765615850687027, "learning_rate": 0.002, "loss": 2.3377, "step": 128310 }, { "epoch": 0.49604923381422894, "grad_norm": 0.11104804277420044, "learning_rate": 0.002, "loss": 2.3448, "step": 128320 }, { "epoch": 0.49608789101761225, "grad_norm": 0.11131534725427628, "learning_rate": 0.002, "loss": 2.3376, "step": 128330 }, { "epoch": 0.4961265482209955, "grad_norm": 0.10066875070333481, "learning_rate": 0.002, "loss": 2.333, "step": 128340 }, { "epoch": 0.49616520542437875, "grad_norm": 0.11946538835763931, "learning_rate": 0.002, "loss": 2.3415, "step": 128350 }, { "epoch": 0.49620386262776206, "grad_norm": 0.10993815213441849, "learning_rate": 0.002, "loss": 2.3514, "step": 128360 }, { "epoch": 0.4962425198311453, "grad_norm": 0.11155513674020767, "learning_rate": 0.002, "loss": 2.3501, "step": 128370 }, { "epoch": 0.4962811770345286, "grad_norm": 0.12873508036136627, "learning_rate": 0.002, "loss": 2.3393, "step": 128380 }, { "epoch": 0.49631983423791187, "grad_norm": 0.1165364608168602, "learning_rate": 0.002, "loss": 2.3527, "step": 128390 }, { "epoch": 0.4963584914412952, "grad_norm": 0.11247461289167404, "learning_rate": 0.002, "loss": 2.343, "step": 128400 }, { "epoch": 0.49639714864467843, "grad_norm": 0.11606108397245407, "learning_rate": 0.002, "loss": 2.3395, "step": 128410 }, { "epoch": 0.49643580584806174, "grad_norm": 0.09354253858327866, "learning_rate": 0.002, "loss": 2.3325, "step": 128420 }, { "epoch": 0.496474463051445, "grad_norm": 0.18941552937030792, "learning_rate": 0.002, "loss": 2.363, "step": 128430 }, { "epoch": 0.4965131202548283, "grad_norm": 0.09419083595275879, "learning_rate": 0.002, "loss": 2.3391, "step": 128440 }, { "epoch": 0.49655177745821155, "grad_norm": 0.11070279777050018, "learning_rate": 0.002, "loss": 2.3535, "step": 128450 }, { "epoch": 0.49659043466159486, "grad_norm": 0.09962885826826096, "learning_rate": 0.002, "loss": 2.3335, "step": 128460 }, { "epoch": 0.4966290918649781, "grad_norm": 0.11432572454214096, "learning_rate": 0.002, "loss": 2.3476, "step": 128470 }, { "epoch": 0.4966677490683614, "grad_norm": 0.11048974096775055, "learning_rate": 0.002, "loss": 2.3523, "step": 128480 }, { "epoch": 0.49670640627174467, "grad_norm": 0.14594443142414093, "learning_rate": 0.002, "loss": 2.3674, "step": 128490 }, { "epoch": 0.496745063475128, "grad_norm": 0.1116267740726471, "learning_rate": 0.002, "loss": 2.3571, "step": 128500 }, { "epoch": 0.49678372067851123, "grad_norm": 0.10765746980905533, "learning_rate": 0.002, "loss": 2.3434, "step": 128510 }, { "epoch": 0.49682237788189454, "grad_norm": 0.10805536806583405, "learning_rate": 0.002, "loss": 2.3619, "step": 128520 }, { "epoch": 0.4968610350852778, "grad_norm": 0.11227666586637497, "learning_rate": 0.002, "loss": 2.3503, "step": 128530 }, { "epoch": 0.49689969228866104, "grad_norm": 0.12149425595998764, "learning_rate": 0.002, "loss": 2.3424, "step": 128540 }, { "epoch": 0.49693834949204435, "grad_norm": 0.10537626594305038, "learning_rate": 0.002, "loss": 2.3395, "step": 128550 }, { "epoch": 0.4969770066954276, "grad_norm": 0.12122012674808502, "learning_rate": 0.002, "loss": 2.345, "step": 128560 }, { "epoch": 0.4970156638988109, "grad_norm": 0.11633715033531189, "learning_rate": 0.002, "loss": 2.3501, "step": 128570 }, { "epoch": 0.49705432110219416, "grad_norm": 0.10596884787082672, "learning_rate": 0.002, "loss": 2.3499, "step": 128580 }, { "epoch": 0.49709297830557747, "grad_norm": 0.12135255336761475, "learning_rate": 0.002, "loss": 2.337, "step": 128590 }, { "epoch": 0.4971316355089607, "grad_norm": 0.11075810343027115, "learning_rate": 0.002, "loss": 2.3376, "step": 128600 }, { "epoch": 0.49717029271234403, "grad_norm": 0.10570067167282104, "learning_rate": 0.002, "loss": 2.3333, "step": 128610 }, { "epoch": 0.4972089499157273, "grad_norm": 0.10998471081256866, "learning_rate": 0.002, "loss": 2.3569, "step": 128620 }, { "epoch": 0.4972476071191106, "grad_norm": 0.12023153156042099, "learning_rate": 0.002, "loss": 2.3533, "step": 128630 }, { "epoch": 0.49728626432249384, "grad_norm": 0.0980997234582901, "learning_rate": 0.002, "loss": 2.3512, "step": 128640 }, { "epoch": 0.49732492152587715, "grad_norm": 0.1252102553844452, "learning_rate": 0.002, "loss": 2.3522, "step": 128650 }, { "epoch": 0.4973635787292604, "grad_norm": 0.10539890080690384, "learning_rate": 0.002, "loss": 2.3408, "step": 128660 }, { "epoch": 0.4974022359326437, "grad_norm": 0.10025840252637863, "learning_rate": 0.002, "loss": 2.348, "step": 128670 }, { "epoch": 0.49744089313602696, "grad_norm": 0.10762711614370346, "learning_rate": 0.002, "loss": 2.3628, "step": 128680 }, { "epoch": 0.49747955033941027, "grad_norm": 0.11508306860923767, "learning_rate": 0.002, "loss": 2.3394, "step": 128690 }, { "epoch": 0.4975182075427935, "grad_norm": 0.10356348007917404, "learning_rate": 0.002, "loss": 2.3595, "step": 128700 }, { "epoch": 0.49755686474617683, "grad_norm": 0.10061529278755188, "learning_rate": 0.002, "loss": 2.3334, "step": 128710 }, { "epoch": 0.4975955219495601, "grad_norm": 0.10842433571815491, "learning_rate": 0.002, "loss": 2.3425, "step": 128720 }, { "epoch": 0.49763417915294333, "grad_norm": 0.12198733538389206, "learning_rate": 0.002, "loss": 2.335, "step": 128730 }, { "epoch": 0.49767283635632664, "grad_norm": 0.10084047168493271, "learning_rate": 0.002, "loss": 2.3404, "step": 128740 }, { "epoch": 0.4977114935597099, "grad_norm": 0.104703888297081, "learning_rate": 0.002, "loss": 2.3465, "step": 128750 }, { "epoch": 0.4977501507630932, "grad_norm": 0.10577589273452759, "learning_rate": 0.002, "loss": 2.3439, "step": 128760 }, { "epoch": 0.49778880796647645, "grad_norm": 0.10533466190099716, "learning_rate": 0.002, "loss": 2.3409, "step": 128770 }, { "epoch": 0.49782746516985976, "grad_norm": 0.10518418252468109, "learning_rate": 0.002, "loss": 2.3411, "step": 128780 }, { "epoch": 0.497866122373243, "grad_norm": 0.10616995394229889, "learning_rate": 0.002, "loss": 2.362, "step": 128790 }, { "epoch": 0.4979047795766263, "grad_norm": 0.10706468671560287, "learning_rate": 0.002, "loss": 2.3381, "step": 128800 }, { "epoch": 0.4979434367800096, "grad_norm": 0.1249483972787857, "learning_rate": 0.002, "loss": 2.3533, "step": 128810 }, { "epoch": 0.4979820939833929, "grad_norm": 0.09538888186216354, "learning_rate": 0.002, "loss": 2.3318, "step": 128820 }, { "epoch": 0.49802075118677613, "grad_norm": 0.10221359133720398, "learning_rate": 0.002, "loss": 2.3467, "step": 128830 }, { "epoch": 0.49805940839015944, "grad_norm": 0.11646133661270142, "learning_rate": 0.002, "loss": 2.3581, "step": 128840 }, { "epoch": 0.4980980655935427, "grad_norm": 0.1112755611538887, "learning_rate": 0.002, "loss": 2.3455, "step": 128850 }, { "epoch": 0.498136722796926, "grad_norm": 0.09283029288053513, "learning_rate": 0.002, "loss": 2.3534, "step": 128860 }, { "epoch": 0.49817538000030925, "grad_norm": 0.13229452073574066, "learning_rate": 0.002, "loss": 2.3417, "step": 128870 }, { "epoch": 0.49821403720369256, "grad_norm": 0.1138056218624115, "learning_rate": 0.002, "loss": 2.3568, "step": 128880 }, { "epoch": 0.4982526944070758, "grad_norm": 0.10786010324954987, "learning_rate": 0.002, "loss": 2.346, "step": 128890 }, { "epoch": 0.49829135161045907, "grad_norm": 0.10349131375551224, "learning_rate": 0.002, "loss": 2.3328, "step": 128900 }, { "epoch": 0.4983300088138424, "grad_norm": 0.1148381382226944, "learning_rate": 0.002, "loss": 2.3559, "step": 128910 }, { "epoch": 0.4983686660172256, "grad_norm": 0.11891456693410873, "learning_rate": 0.002, "loss": 2.3362, "step": 128920 }, { "epoch": 0.49840732322060893, "grad_norm": 0.10900149494409561, "learning_rate": 0.002, "loss": 2.3616, "step": 128930 }, { "epoch": 0.4984459804239922, "grad_norm": 0.10199940949678421, "learning_rate": 0.002, "loss": 2.3409, "step": 128940 }, { "epoch": 0.4984846376273755, "grad_norm": 0.1393444687128067, "learning_rate": 0.002, "loss": 2.3677, "step": 128950 }, { "epoch": 0.49852329483075875, "grad_norm": 0.2699110507965088, "learning_rate": 0.002, "loss": 2.3408, "step": 128960 }, { "epoch": 0.49856195203414205, "grad_norm": 0.0982867032289505, "learning_rate": 0.002, "loss": 2.3642, "step": 128970 }, { "epoch": 0.4986006092375253, "grad_norm": 0.10448093712329865, "learning_rate": 0.002, "loss": 2.3348, "step": 128980 }, { "epoch": 0.4986392664409086, "grad_norm": 0.11845577508211136, "learning_rate": 0.002, "loss": 2.3627, "step": 128990 }, { "epoch": 0.49867792364429187, "grad_norm": 0.0945153683423996, "learning_rate": 0.002, "loss": 2.3531, "step": 129000 }, { "epoch": 0.4987165808476752, "grad_norm": 0.10529087483882904, "learning_rate": 0.002, "loss": 2.3537, "step": 129010 }, { "epoch": 0.4987552380510584, "grad_norm": 0.10217724740505219, "learning_rate": 0.002, "loss": 2.3424, "step": 129020 }, { "epoch": 0.49879389525444173, "grad_norm": 0.10975181311368942, "learning_rate": 0.002, "loss": 2.3403, "step": 129030 }, { "epoch": 0.498832552457825, "grad_norm": 0.11213059723377228, "learning_rate": 0.002, "loss": 2.3499, "step": 129040 }, { "epoch": 0.4988712096612083, "grad_norm": 0.11686363816261292, "learning_rate": 0.002, "loss": 2.3505, "step": 129050 }, { "epoch": 0.49890986686459154, "grad_norm": 0.10080690681934357, "learning_rate": 0.002, "loss": 2.3443, "step": 129060 }, { "epoch": 0.49894852406797485, "grad_norm": 0.12356174737215042, "learning_rate": 0.002, "loss": 2.3446, "step": 129070 }, { "epoch": 0.4989871812713581, "grad_norm": 0.1226542666554451, "learning_rate": 0.002, "loss": 2.3633, "step": 129080 }, { "epoch": 0.49902583847474136, "grad_norm": 0.10394148528575897, "learning_rate": 0.002, "loss": 2.3524, "step": 129090 }, { "epoch": 0.49906449567812466, "grad_norm": 0.1452575922012329, "learning_rate": 0.002, "loss": 2.349, "step": 129100 }, { "epoch": 0.4991031528815079, "grad_norm": 0.11420813202857971, "learning_rate": 0.002, "loss": 2.3515, "step": 129110 }, { "epoch": 0.4991418100848912, "grad_norm": 0.0996331125497818, "learning_rate": 0.002, "loss": 2.3476, "step": 129120 }, { "epoch": 0.4991804672882745, "grad_norm": 0.10831142961978912, "learning_rate": 0.002, "loss": 2.3623, "step": 129130 }, { "epoch": 0.4992191244916578, "grad_norm": 0.121832475066185, "learning_rate": 0.002, "loss": 2.3582, "step": 129140 }, { "epoch": 0.49925778169504104, "grad_norm": 0.10833895206451416, "learning_rate": 0.002, "loss": 2.346, "step": 129150 }, { "epoch": 0.49929643889842434, "grad_norm": 0.10350599884986877, "learning_rate": 0.002, "loss": 2.3452, "step": 129160 }, { "epoch": 0.4993350961018076, "grad_norm": 0.10486671328544617, "learning_rate": 0.002, "loss": 2.3324, "step": 129170 }, { "epoch": 0.4993737533051909, "grad_norm": 0.10589516907930374, "learning_rate": 0.002, "loss": 2.3342, "step": 129180 }, { "epoch": 0.49941241050857416, "grad_norm": 0.10069865733385086, "learning_rate": 0.002, "loss": 2.3555, "step": 129190 }, { "epoch": 0.49945106771195746, "grad_norm": 0.11168158054351807, "learning_rate": 0.002, "loss": 2.3507, "step": 129200 }, { "epoch": 0.4994897249153407, "grad_norm": 0.1095777153968811, "learning_rate": 0.002, "loss": 2.3532, "step": 129210 }, { "epoch": 0.499528382118724, "grad_norm": 0.10152468830347061, "learning_rate": 0.002, "loss": 2.3585, "step": 129220 }, { "epoch": 0.4995670393221073, "grad_norm": 0.11011513322591782, "learning_rate": 0.002, "loss": 2.3481, "step": 129230 }, { "epoch": 0.4996056965254906, "grad_norm": 0.10783960670232773, "learning_rate": 0.002, "loss": 2.3549, "step": 129240 }, { "epoch": 0.49964435372887384, "grad_norm": 0.09583844989538193, "learning_rate": 0.002, "loss": 2.3654, "step": 129250 }, { "epoch": 0.49968301093225714, "grad_norm": 0.10869450867176056, "learning_rate": 0.002, "loss": 2.35, "step": 129260 }, { "epoch": 0.4997216681356404, "grad_norm": 0.10898219048976898, "learning_rate": 0.002, "loss": 2.3451, "step": 129270 }, { "epoch": 0.49976032533902365, "grad_norm": 0.1137361004948616, "learning_rate": 0.002, "loss": 2.3395, "step": 129280 }, { "epoch": 0.49979898254240696, "grad_norm": 0.09693686664104462, "learning_rate": 0.002, "loss": 2.3588, "step": 129290 }, { "epoch": 0.4998376397457902, "grad_norm": 0.10234058648347855, "learning_rate": 0.002, "loss": 2.3518, "step": 129300 }, { "epoch": 0.4998762969491735, "grad_norm": 0.11275883764028549, "learning_rate": 0.002, "loss": 2.342, "step": 129310 }, { "epoch": 0.49991495415255677, "grad_norm": 0.1097671240568161, "learning_rate": 0.002, "loss": 2.3378, "step": 129320 }, { "epoch": 0.4999536113559401, "grad_norm": 0.09512518346309662, "learning_rate": 0.002, "loss": 2.3384, "step": 129330 }, { "epoch": 0.4999922685593233, "grad_norm": 0.10958248376846313, "learning_rate": 0.002, "loss": 2.3431, "step": 129340 }, { "epoch": 0.5000309257627066, "grad_norm": 0.10563676059246063, "learning_rate": 0.002, "loss": 2.3388, "step": 129350 }, { "epoch": 0.5000695829660899, "grad_norm": 0.09640498459339142, "learning_rate": 0.002, "loss": 2.3479, "step": 129360 }, { "epoch": 0.5001082401694732, "grad_norm": 0.11537022888660431, "learning_rate": 0.002, "loss": 2.3456, "step": 129370 }, { "epoch": 0.5001468973728564, "grad_norm": 0.0939837396144867, "learning_rate": 0.002, "loss": 2.3491, "step": 129380 }, { "epoch": 0.5001855545762397, "grad_norm": 0.1068049892783165, "learning_rate": 0.002, "loss": 2.3391, "step": 129390 }, { "epoch": 0.5002242117796231, "grad_norm": 0.12465416640043259, "learning_rate": 0.002, "loss": 2.3503, "step": 129400 }, { "epoch": 0.5002628689830063, "grad_norm": 0.0991302952170372, "learning_rate": 0.002, "loss": 2.3513, "step": 129410 }, { "epoch": 0.5003015261863896, "grad_norm": 0.10132446140050888, "learning_rate": 0.002, "loss": 2.3641, "step": 129420 }, { "epoch": 0.5003401833897728, "grad_norm": 0.11166176944971085, "learning_rate": 0.002, "loss": 2.3338, "step": 129430 }, { "epoch": 0.5003788405931562, "grad_norm": 0.11701422929763794, "learning_rate": 0.002, "loss": 2.3402, "step": 129440 }, { "epoch": 0.5004174977965394, "grad_norm": 0.1262463927268982, "learning_rate": 0.002, "loss": 2.3561, "step": 129450 }, { "epoch": 0.5004561549999227, "grad_norm": 0.10851752758026123, "learning_rate": 0.002, "loss": 2.3507, "step": 129460 }, { "epoch": 0.5004948122033059, "grad_norm": 0.1244642361998558, "learning_rate": 0.002, "loss": 2.3615, "step": 129470 }, { "epoch": 0.5005334694066892, "grad_norm": 0.10973615199327469, "learning_rate": 0.002, "loss": 2.3403, "step": 129480 }, { "epoch": 0.5005721266100726, "grad_norm": 0.11159950494766235, "learning_rate": 0.002, "loss": 2.3573, "step": 129490 }, { "epoch": 0.5006107838134558, "grad_norm": 0.09034644067287445, "learning_rate": 0.002, "loss": 2.3391, "step": 129500 }, { "epoch": 0.5006494410168391, "grad_norm": 0.10297048836946487, "learning_rate": 0.002, "loss": 2.3309, "step": 129510 }, { "epoch": 0.5006880982202223, "grad_norm": 0.10359738022089005, "learning_rate": 0.002, "loss": 2.3375, "step": 129520 }, { "epoch": 0.5007267554236057, "grad_norm": 0.10247504711151123, "learning_rate": 0.002, "loss": 2.3222, "step": 129530 }, { "epoch": 0.5007654126269889, "grad_norm": 0.10605670511722565, "learning_rate": 0.002, "loss": 2.3347, "step": 129540 }, { "epoch": 0.5008040698303722, "grad_norm": 0.10165400058031082, "learning_rate": 0.002, "loss": 2.3467, "step": 129550 }, { "epoch": 0.5008427270337554, "grad_norm": 0.1123841404914856, "learning_rate": 0.002, "loss": 2.3474, "step": 129560 }, { "epoch": 0.5008813842371388, "grad_norm": 0.10684897750616074, "learning_rate": 0.002, "loss": 2.3581, "step": 129570 }, { "epoch": 0.500920041440522, "grad_norm": 0.11832645535469055, "learning_rate": 0.002, "loss": 2.3568, "step": 129580 }, { "epoch": 0.5009586986439053, "grad_norm": 0.10235374420881271, "learning_rate": 0.002, "loss": 2.3493, "step": 129590 }, { "epoch": 0.5009973558472886, "grad_norm": 0.10725278407335281, "learning_rate": 0.002, "loss": 2.3379, "step": 129600 }, { "epoch": 0.5010360130506719, "grad_norm": 0.11649779975414276, "learning_rate": 0.002, "loss": 2.3473, "step": 129610 }, { "epoch": 0.5010746702540552, "grad_norm": 0.10545599460601807, "learning_rate": 0.002, "loss": 2.3432, "step": 129620 }, { "epoch": 0.5011133274574384, "grad_norm": 0.10189778357744217, "learning_rate": 0.002, "loss": 2.3467, "step": 129630 }, { "epoch": 0.5011519846608217, "grad_norm": 0.11193004250526428, "learning_rate": 0.002, "loss": 2.354, "step": 129640 }, { "epoch": 0.5011906418642049, "grad_norm": 0.11135837435722351, "learning_rate": 0.002, "loss": 2.3592, "step": 129650 }, { "epoch": 0.5012292990675883, "grad_norm": 0.10167776793241501, "learning_rate": 0.002, "loss": 2.3379, "step": 129660 }, { "epoch": 0.5012679562709715, "grad_norm": 0.10497588664293289, "learning_rate": 0.002, "loss": 2.3453, "step": 129670 }, { "epoch": 0.5013066134743548, "grad_norm": 0.13115350902080536, "learning_rate": 0.002, "loss": 2.3554, "step": 129680 }, { "epoch": 0.501345270677738, "grad_norm": 0.09938187897205353, "learning_rate": 0.002, "loss": 2.3431, "step": 129690 }, { "epoch": 0.5013839278811214, "grad_norm": 0.09681079536676407, "learning_rate": 0.002, "loss": 2.3413, "step": 129700 }, { "epoch": 0.5014225850845047, "grad_norm": 0.0996105894446373, "learning_rate": 0.002, "loss": 2.3496, "step": 129710 }, { "epoch": 0.5014612422878879, "grad_norm": 0.12206865847110748, "learning_rate": 0.002, "loss": 2.3581, "step": 129720 }, { "epoch": 0.5014998994912712, "grad_norm": 0.10603801906108856, "learning_rate": 0.002, "loss": 2.3437, "step": 129730 }, { "epoch": 0.5015385566946545, "grad_norm": 0.12295734882354736, "learning_rate": 0.002, "loss": 2.3443, "step": 129740 }, { "epoch": 0.5015772138980378, "grad_norm": 0.1133185625076294, "learning_rate": 0.002, "loss": 2.3427, "step": 129750 }, { "epoch": 0.501615871101421, "grad_norm": 0.10275447368621826, "learning_rate": 0.002, "loss": 2.3386, "step": 129760 }, { "epoch": 0.5016545283048043, "grad_norm": 0.10109806805849075, "learning_rate": 0.002, "loss": 2.356, "step": 129770 }, { "epoch": 0.5016931855081876, "grad_norm": 0.09771363437175751, "learning_rate": 0.002, "loss": 2.3376, "step": 129780 }, { "epoch": 0.5017318427115709, "grad_norm": 0.14310520887374878, "learning_rate": 0.002, "loss": 2.3386, "step": 129790 }, { "epoch": 0.5017704999149541, "grad_norm": 0.11893128603696823, "learning_rate": 0.002, "loss": 2.3454, "step": 129800 }, { "epoch": 0.5018091571183374, "grad_norm": 0.09260301291942596, "learning_rate": 0.002, "loss": 2.3441, "step": 129810 }, { "epoch": 0.5018478143217208, "grad_norm": 0.11436281353235245, "learning_rate": 0.002, "loss": 2.3417, "step": 129820 }, { "epoch": 0.501886471525104, "grad_norm": 0.09662805497646332, "learning_rate": 0.002, "loss": 2.345, "step": 129830 }, { "epoch": 0.5019251287284873, "grad_norm": 0.1126139834523201, "learning_rate": 0.002, "loss": 2.3478, "step": 129840 }, { "epoch": 0.5019637859318705, "grad_norm": 0.11318148672580719, "learning_rate": 0.002, "loss": 2.3404, "step": 129850 }, { "epoch": 0.5020024431352538, "grad_norm": 0.11841975152492523, "learning_rate": 0.002, "loss": 2.3504, "step": 129860 }, { "epoch": 0.5020411003386371, "grad_norm": 0.08732382208108902, "learning_rate": 0.002, "loss": 2.34, "step": 129870 }, { "epoch": 0.5020797575420204, "grad_norm": 0.11381547152996063, "learning_rate": 0.002, "loss": 2.336, "step": 129880 }, { "epoch": 0.5021184147454036, "grad_norm": 0.10493790358304977, "learning_rate": 0.002, "loss": 2.3551, "step": 129890 }, { "epoch": 0.5021570719487869, "grad_norm": 0.10133479535579681, "learning_rate": 0.002, "loss": 2.3412, "step": 129900 }, { "epoch": 0.5021957291521703, "grad_norm": 0.10650533437728882, "learning_rate": 0.002, "loss": 2.3401, "step": 129910 }, { "epoch": 0.5022343863555535, "grad_norm": 0.09722936898469925, "learning_rate": 0.002, "loss": 2.3311, "step": 129920 }, { "epoch": 0.5022730435589368, "grad_norm": 0.09154581278562546, "learning_rate": 0.002, "loss": 2.343, "step": 129930 }, { "epoch": 0.50231170076232, "grad_norm": 0.12030097842216492, "learning_rate": 0.002, "loss": 2.336, "step": 129940 }, { "epoch": 0.5023503579657034, "grad_norm": 0.11775567382574081, "learning_rate": 0.002, "loss": 2.3497, "step": 129950 }, { "epoch": 0.5023890151690866, "grad_norm": 0.10541986674070358, "learning_rate": 0.002, "loss": 2.3517, "step": 129960 }, { "epoch": 0.5024276723724699, "grad_norm": 0.12167876213788986, "learning_rate": 0.002, "loss": 2.3467, "step": 129970 }, { "epoch": 0.5024663295758531, "grad_norm": 0.10480458289384842, "learning_rate": 0.002, "loss": 2.347, "step": 129980 }, { "epoch": 0.5025049867792365, "grad_norm": 0.10177701711654663, "learning_rate": 0.002, "loss": 2.3351, "step": 129990 }, { "epoch": 0.5025436439826197, "grad_norm": 0.11270109564065933, "learning_rate": 0.002, "loss": 2.34, "step": 130000 }, { "epoch": 0.502582301186003, "grad_norm": 0.104780413210392, "learning_rate": 0.002, "loss": 2.3352, "step": 130010 }, { "epoch": 0.5026209583893863, "grad_norm": 0.09986669570207596, "learning_rate": 0.002, "loss": 2.3497, "step": 130020 }, { "epoch": 0.5026596155927695, "grad_norm": 0.13555224239826202, "learning_rate": 0.002, "loss": 2.3398, "step": 130030 }, { "epoch": 0.5026982727961529, "grad_norm": 0.10494866967201233, "learning_rate": 0.002, "loss": 2.3414, "step": 130040 }, { "epoch": 0.5027369299995361, "grad_norm": 0.11657863110303879, "learning_rate": 0.002, "loss": 2.3468, "step": 130050 }, { "epoch": 0.5027755872029194, "grad_norm": 0.11362355947494507, "learning_rate": 0.002, "loss": 2.3454, "step": 130060 }, { "epoch": 0.5028142444063026, "grad_norm": 0.09364651143550873, "learning_rate": 0.002, "loss": 2.3564, "step": 130070 }, { "epoch": 0.502852901609686, "grad_norm": 0.11096946895122528, "learning_rate": 0.002, "loss": 2.3254, "step": 130080 }, { "epoch": 0.5028915588130692, "grad_norm": 0.10207531601190567, "learning_rate": 0.002, "loss": 2.3526, "step": 130090 }, { "epoch": 0.5029302160164525, "grad_norm": 0.10843487828969955, "learning_rate": 0.002, "loss": 2.3505, "step": 130100 }, { "epoch": 0.5029688732198357, "grad_norm": 0.11173061281442642, "learning_rate": 0.002, "loss": 2.3371, "step": 130110 }, { "epoch": 0.5030075304232191, "grad_norm": 0.10927116125822067, "learning_rate": 0.002, "loss": 2.3428, "step": 130120 }, { "epoch": 0.5030461876266024, "grad_norm": 0.10148067772388458, "learning_rate": 0.002, "loss": 2.335, "step": 130130 }, { "epoch": 0.5030848448299856, "grad_norm": 0.09976466745138168, "learning_rate": 0.002, "loss": 2.3488, "step": 130140 }, { "epoch": 0.5031235020333689, "grad_norm": 0.10571061074733734, "learning_rate": 0.002, "loss": 2.3509, "step": 130150 }, { "epoch": 0.5031621592367522, "grad_norm": 0.10799040645360947, "learning_rate": 0.002, "loss": 2.3374, "step": 130160 }, { "epoch": 0.5032008164401355, "grad_norm": 0.0908653512597084, "learning_rate": 0.002, "loss": 2.3453, "step": 130170 }, { "epoch": 0.5032394736435187, "grad_norm": 0.10622545331716537, "learning_rate": 0.002, "loss": 2.3468, "step": 130180 }, { "epoch": 0.503278130846902, "grad_norm": 0.11057796329259872, "learning_rate": 0.002, "loss": 2.3315, "step": 130190 }, { "epoch": 0.5033167880502852, "grad_norm": 0.10583652555942535, "learning_rate": 0.002, "loss": 2.3507, "step": 130200 }, { "epoch": 0.5033554452536686, "grad_norm": 0.10508795082569122, "learning_rate": 0.002, "loss": 2.345, "step": 130210 }, { "epoch": 0.5033941024570519, "grad_norm": 0.09605761617422104, "learning_rate": 0.002, "loss": 2.3538, "step": 130220 }, { "epoch": 0.5034327596604351, "grad_norm": 0.11529167741537094, "learning_rate": 0.002, "loss": 2.363, "step": 130230 }, { "epoch": 0.5034714168638184, "grad_norm": 0.09331204742193222, "learning_rate": 0.002, "loss": 2.3525, "step": 130240 }, { "epoch": 0.5035100740672017, "grad_norm": 0.11913208663463593, "learning_rate": 0.002, "loss": 2.3502, "step": 130250 }, { "epoch": 0.503548731270585, "grad_norm": 0.10425525903701782, "learning_rate": 0.002, "loss": 2.3456, "step": 130260 }, { "epoch": 0.5035873884739682, "grad_norm": 0.10939491540193558, "learning_rate": 0.002, "loss": 2.3528, "step": 130270 }, { "epoch": 0.5036260456773515, "grad_norm": 0.09465670585632324, "learning_rate": 0.002, "loss": 2.3385, "step": 130280 }, { "epoch": 0.5036647028807348, "grad_norm": 0.1188560351729393, "learning_rate": 0.002, "loss": 2.3417, "step": 130290 }, { "epoch": 0.5037033600841181, "grad_norm": 0.11721429228782654, "learning_rate": 0.002, "loss": 2.344, "step": 130300 }, { "epoch": 0.5037420172875013, "grad_norm": 0.097111776471138, "learning_rate": 0.002, "loss": 2.3569, "step": 130310 }, { "epoch": 0.5037806744908846, "grad_norm": 0.11451169103384018, "learning_rate": 0.002, "loss": 2.3463, "step": 130320 }, { "epoch": 0.503819331694268, "grad_norm": 0.14097385108470917, "learning_rate": 0.002, "loss": 2.3438, "step": 130330 }, { "epoch": 0.5038579888976512, "grad_norm": 0.10153955221176147, "learning_rate": 0.002, "loss": 2.3453, "step": 130340 }, { "epoch": 0.5038966461010345, "grad_norm": 0.1101381704211235, "learning_rate": 0.002, "loss": 2.3463, "step": 130350 }, { "epoch": 0.5039353033044177, "grad_norm": 0.10483207553625107, "learning_rate": 0.002, "loss": 2.3646, "step": 130360 }, { "epoch": 0.5039739605078011, "grad_norm": 0.10726115107536316, "learning_rate": 0.002, "loss": 2.3517, "step": 130370 }, { "epoch": 0.5040126177111843, "grad_norm": 0.11522553116083145, "learning_rate": 0.002, "loss": 2.3455, "step": 130380 }, { "epoch": 0.5040512749145676, "grad_norm": 0.1336357742547989, "learning_rate": 0.002, "loss": 2.3485, "step": 130390 }, { "epoch": 0.5040899321179508, "grad_norm": 0.10702812671661377, "learning_rate": 0.002, "loss": 2.3275, "step": 130400 }, { "epoch": 0.5041285893213341, "grad_norm": 0.10702671110630035, "learning_rate": 0.002, "loss": 2.3432, "step": 130410 }, { "epoch": 0.5041672465247174, "grad_norm": 0.09648557752370834, "learning_rate": 0.002, "loss": 2.346, "step": 130420 }, { "epoch": 0.5042059037281007, "grad_norm": 0.10503020137548447, "learning_rate": 0.002, "loss": 2.3479, "step": 130430 }, { "epoch": 0.504244560931484, "grad_norm": 0.10085074603557587, "learning_rate": 0.002, "loss": 2.3417, "step": 130440 }, { "epoch": 0.5042832181348672, "grad_norm": 0.10082501918077469, "learning_rate": 0.002, "loss": 2.3487, "step": 130450 }, { "epoch": 0.5043218753382506, "grad_norm": 0.10579691082239151, "learning_rate": 0.002, "loss": 2.3421, "step": 130460 }, { "epoch": 0.5043605325416338, "grad_norm": 0.12046706676483154, "learning_rate": 0.002, "loss": 2.3498, "step": 130470 }, { "epoch": 0.5043991897450171, "grad_norm": 0.10785841941833496, "learning_rate": 0.002, "loss": 2.3379, "step": 130480 }, { "epoch": 0.5044378469484003, "grad_norm": 0.12022893875837326, "learning_rate": 0.002, "loss": 2.3494, "step": 130490 }, { "epoch": 0.5044765041517837, "grad_norm": 0.10601069033145905, "learning_rate": 0.002, "loss": 2.3491, "step": 130500 }, { "epoch": 0.5045151613551669, "grad_norm": 0.10324139893054962, "learning_rate": 0.002, "loss": 2.3373, "step": 130510 }, { "epoch": 0.5045538185585502, "grad_norm": 0.0889090746641159, "learning_rate": 0.002, "loss": 2.3673, "step": 130520 }, { "epoch": 0.5045924757619334, "grad_norm": 0.10269230604171753, "learning_rate": 0.002, "loss": 2.3546, "step": 130530 }, { "epoch": 0.5046311329653168, "grad_norm": 0.10001306235790253, "learning_rate": 0.002, "loss": 2.3286, "step": 130540 }, { "epoch": 0.5046697901687001, "grad_norm": 0.11116419732570648, "learning_rate": 0.002, "loss": 2.3548, "step": 130550 }, { "epoch": 0.5047084473720833, "grad_norm": 0.1122574731707573, "learning_rate": 0.002, "loss": 2.339, "step": 130560 }, { "epoch": 0.5047471045754666, "grad_norm": 0.106594018638134, "learning_rate": 0.002, "loss": 2.3553, "step": 130570 }, { "epoch": 0.5047857617788498, "grad_norm": 0.11066550761461258, "learning_rate": 0.002, "loss": 2.3454, "step": 130580 }, { "epoch": 0.5048244189822332, "grad_norm": 0.09080637246370316, "learning_rate": 0.002, "loss": 2.3591, "step": 130590 }, { "epoch": 0.5048630761856164, "grad_norm": 0.1192973256111145, "learning_rate": 0.002, "loss": 2.3378, "step": 130600 }, { "epoch": 0.5049017333889997, "grad_norm": 0.1105991080403328, "learning_rate": 0.002, "loss": 2.3429, "step": 130610 }, { "epoch": 0.5049403905923829, "grad_norm": 0.20441003143787384, "learning_rate": 0.002, "loss": 2.3438, "step": 130620 }, { "epoch": 0.5049790477957663, "grad_norm": 0.10766471177339554, "learning_rate": 0.002, "loss": 2.3252, "step": 130630 }, { "epoch": 0.5050177049991496, "grad_norm": 0.09288868308067322, "learning_rate": 0.002, "loss": 2.3316, "step": 130640 }, { "epoch": 0.5050563622025328, "grad_norm": 0.10671941936016083, "learning_rate": 0.002, "loss": 2.3342, "step": 130650 }, { "epoch": 0.5050950194059161, "grad_norm": 0.10453400015830994, "learning_rate": 0.002, "loss": 2.35, "step": 130660 }, { "epoch": 0.5051336766092994, "grad_norm": 0.10739947855472565, "learning_rate": 0.002, "loss": 2.3423, "step": 130670 }, { "epoch": 0.5051723338126827, "grad_norm": 0.12816528975963593, "learning_rate": 0.002, "loss": 2.3466, "step": 130680 }, { "epoch": 0.5052109910160659, "grad_norm": 0.09332219511270523, "learning_rate": 0.002, "loss": 2.3443, "step": 130690 }, { "epoch": 0.5052496482194492, "grad_norm": 0.12734173238277435, "learning_rate": 0.002, "loss": 2.3641, "step": 130700 }, { "epoch": 0.5052883054228325, "grad_norm": 0.0906364843249321, "learning_rate": 0.002, "loss": 2.3477, "step": 130710 }, { "epoch": 0.5053269626262158, "grad_norm": 0.10444950312376022, "learning_rate": 0.002, "loss": 2.3506, "step": 130720 }, { "epoch": 0.505365619829599, "grad_norm": 0.10656848549842834, "learning_rate": 0.002, "loss": 2.3521, "step": 130730 }, { "epoch": 0.5054042770329823, "grad_norm": 0.09719150513410568, "learning_rate": 0.002, "loss": 2.3351, "step": 130740 }, { "epoch": 0.5054429342363657, "grad_norm": 0.10844217985868454, "learning_rate": 0.002, "loss": 2.356, "step": 130750 }, { "epoch": 0.5054815914397489, "grad_norm": 0.12327215820550919, "learning_rate": 0.002, "loss": 2.3369, "step": 130760 }, { "epoch": 0.5055202486431322, "grad_norm": 0.10725454241037369, "learning_rate": 0.002, "loss": 2.3433, "step": 130770 }, { "epoch": 0.5055589058465154, "grad_norm": 0.10240764170885086, "learning_rate": 0.002, "loss": 2.3465, "step": 130780 }, { "epoch": 0.5055975630498987, "grad_norm": 0.09825988858938217, "learning_rate": 0.002, "loss": 2.3455, "step": 130790 }, { "epoch": 0.505636220253282, "grad_norm": 0.1060064509510994, "learning_rate": 0.002, "loss": 2.3267, "step": 130800 }, { "epoch": 0.5056748774566653, "grad_norm": 0.10984325408935547, "learning_rate": 0.002, "loss": 2.3431, "step": 130810 }, { "epoch": 0.5057135346600485, "grad_norm": 0.10869551450014114, "learning_rate": 0.002, "loss": 2.3527, "step": 130820 }, { "epoch": 0.5057521918634318, "grad_norm": 0.1109204962849617, "learning_rate": 0.002, "loss": 2.3391, "step": 130830 }, { "epoch": 0.5057908490668152, "grad_norm": 0.1029016375541687, "learning_rate": 0.002, "loss": 2.3541, "step": 130840 }, { "epoch": 0.5058295062701984, "grad_norm": 0.12351314723491669, "learning_rate": 0.002, "loss": 2.3523, "step": 130850 }, { "epoch": 0.5058681634735817, "grad_norm": 0.1079796850681305, "learning_rate": 0.002, "loss": 2.3596, "step": 130860 }, { "epoch": 0.5059068206769649, "grad_norm": 0.10102064162492752, "learning_rate": 0.002, "loss": 2.3362, "step": 130870 }, { "epoch": 0.5059454778803483, "grad_norm": 0.11627139151096344, "learning_rate": 0.002, "loss": 2.35, "step": 130880 }, { "epoch": 0.5059841350837315, "grad_norm": 0.11690767109394073, "learning_rate": 0.002, "loss": 2.3378, "step": 130890 }, { "epoch": 0.5060227922871148, "grad_norm": 0.09732042998075485, "learning_rate": 0.002, "loss": 2.3504, "step": 130900 }, { "epoch": 0.506061449490498, "grad_norm": 0.12113215774297714, "learning_rate": 0.002, "loss": 2.343, "step": 130910 }, { "epoch": 0.5061001066938814, "grad_norm": 0.09814827144145966, "learning_rate": 0.002, "loss": 2.3583, "step": 130920 }, { "epoch": 0.5061387638972646, "grad_norm": 0.10524283349514008, "learning_rate": 0.002, "loss": 2.3481, "step": 130930 }, { "epoch": 0.5061774211006479, "grad_norm": 0.13482894003391266, "learning_rate": 0.002, "loss": 2.356, "step": 130940 }, { "epoch": 0.5062160783040311, "grad_norm": 0.08584418147802353, "learning_rate": 0.002, "loss": 2.357, "step": 130950 }, { "epoch": 0.5062547355074144, "grad_norm": 0.1154203787446022, "learning_rate": 0.002, "loss": 2.3548, "step": 130960 }, { "epoch": 0.5062933927107978, "grad_norm": 0.09992490708827972, "learning_rate": 0.002, "loss": 2.3312, "step": 130970 }, { "epoch": 0.506332049914181, "grad_norm": 0.1054072305560112, "learning_rate": 0.002, "loss": 2.3423, "step": 130980 }, { "epoch": 0.5063707071175643, "grad_norm": 0.11291274428367615, "learning_rate": 0.002, "loss": 2.3328, "step": 130990 }, { "epoch": 0.5064093643209475, "grad_norm": 0.11428273469209671, "learning_rate": 0.002, "loss": 2.3457, "step": 131000 }, { "epoch": 0.5064480215243309, "grad_norm": 0.1516738086938858, "learning_rate": 0.002, "loss": 2.3441, "step": 131010 }, { "epoch": 0.5064866787277141, "grad_norm": 0.10782832652330399, "learning_rate": 0.002, "loss": 2.3374, "step": 131020 }, { "epoch": 0.5065253359310974, "grad_norm": 0.10205426067113876, "learning_rate": 0.002, "loss": 2.3426, "step": 131030 }, { "epoch": 0.5065639931344806, "grad_norm": 0.1182415708899498, "learning_rate": 0.002, "loss": 2.3302, "step": 131040 }, { "epoch": 0.506602650337864, "grad_norm": 0.11133012175559998, "learning_rate": 0.002, "loss": 2.3294, "step": 131050 }, { "epoch": 0.5066413075412473, "grad_norm": 0.09714315831661224, "learning_rate": 0.002, "loss": 2.3436, "step": 131060 }, { "epoch": 0.5066799647446305, "grad_norm": 0.11032336205244064, "learning_rate": 0.002, "loss": 2.3275, "step": 131070 }, { "epoch": 0.5067186219480138, "grad_norm": 0.09999191761016846, "learning_rate": 0.002, "loss": 2.3618, "step": 131080 }, { "epoch": 0.5067572791513971, "grad_norm": 0.09123971313238144, "learning_rate": 0.002, "loss": 2.3448, "step": 131090 }, { "epoch": 0.5067959363547804, "grad_norm": 0.11429370939731598, "learning_rate": 0.002, "loss": 2.3377, "step": 131100 }, { "epoch": 0.5068345935581636, "grad_norm": 0.10124003887176514, "learning_rate": 0.002, "loss": 2.3517, "step": 131110 }, { "epoch": 0.5068732507615469, "grad_norm": 0.09939180314540863, "learning_rate": 0.002, "loss": 2.3257, "step": 131120 }, { "epoch": 0.5069119079649301, "grad_norm": 0.10667199641466141, "learning_rate": 0.002, "loss": 2.3486, "step": 131130 }, { "epoch": 0.5069505651683135, "grad_norm": 0.10647431761026382, "learning_rate": 0.002, "loss": 2.3424, "step": 131140 }, { "epoch": 0.5069892223716967, "grad_norm": 0.10949549823999405, "learning_rate": 0.002, "loss": 2.3576, "step": 131150 }, { "epoch": 0.50702787957508, "grad_norm": 0.11368773132562637, "learning_rate": 0.002, "loss": 2.356, "step": 131160 }, { "epoch": 0.5070665367784633, "grad_norm": 0.10527981072664261, "learning_rate": 0.002, "loss": 2.3445, "step": 131170 }, { "epoch": 0.5071051939818466, "grad_norm": 0.09757716208696365, "learning_rate": 0.002, "loss": 2.3568, "step": 131180 }, { "epoch": 0.5071438511852299, "grad_norm": 0.11202974617481232, "learning_rate": 0.002, "loss": 2.3478, "step": 131190 }, { "epoch": 0.5071825083886131, "grad_norm": 0.10303257405757904, "learning_rate": 0.002, "loss": 2.353, "step": 131200 }, { "epoch": 0.5072211655919964, "grad_norm": 0.11027651280164719, "learning_rate": 0.002, "loss": 2.3493, "step": 131210 }, { "epoch": 0.5072598227953797, "grad_norm": 0.10823901742696762, "learning_rate": 0.002, "loss": 2.3611, "step": 131220 }, { "epoch": 0.507298479998763, "grad_norm": 0.1229693591594696, "learning_rate": 0.002, "loss": 2.3543, "step": 131230 }, { "epoch": 0.5073371372021462, "grad_norm": 0.1097574308514595, "learning_rate": 0.002, "loss": 2.3488, "step": 131240 }, { "epoch": 0.5073757944055295, "grad_norm": 0.11523716151714325, "learning_rate": 0.002, "loss": 2.3298, "step": 131250 }, { "epoch": 0.5074144516089129, "grad_norm": 0.10482656955718994, "learning_rate": 0.002, "loss": 2.3353, "step": 131260 }, { "epoch": 0.5074531088122961, "grad_norm": 0.10720503330230713, "learning_rate": 0.002, "loss": 2.3513, "step": 131270 }, { "epoch": 0.5074917660156794, "grad_norm": 0.10162218660116196, "learning_rate": 0.002, "loss": 2.3397, "step": 131280 }, { "epoch": 0.5075304232190626, "grad_norm": 0.1127084493637085, "learning_rate": 0.002, "loss": 2.3502, "step": 131290 }, { "epoch": 0.507569080422446, "grad_norm": 0.10835334658622742, "learning_rate": 0.002, "loss": 2.3543, "step": 131300 }, { "epoch": 0.5076077376258292, "grad_norm": 0.10543724149465561, "learning_rate": 0.002, "loss": 2.342, "step": 131310 }, { "epoch": 0.5076463948292125, "grad_norm": 0.10903951525688171, "learning_rate": 0.002, "loss": 2.3351, "step": 131320 }, { "epoch": 0.5076850520325957, "grad_norm": 0.11438053846359253, "learning_rate": 0.002, "loss": 2.3424, "step": 131330 }, { "epoch": 0.507723709235979, "grad_norm": 0.11422231048345566, "learning_rate": 0.002, "loss": 2.3463, "step": 131340 }, { "epoch": 0.5077623664393623, "grad_norm": 0.11027810722589493, "learning_rate": 0.002, "loss": 2.349, "step": 131350 }, { "epoch": 0.5078010236427456, "grad_norm": 0.104742631316185, "learning_rate": 0.002, "loss": 2.3431, "step": 131360 }, { "epoch": 0.5078396808461288, "grad_norm": 0.11140044778585434, "learning_rate": 0.002, "loss": 2.3547, "step": 131370 }, { "epoch": 0.5078783380495121, "grad_norm": 0.10416561365127563, "learning_rate": 0.002, "loss": 2.3325, "step": 131380 }, { "epoch": 0.5079169952528955, "grad_norm": 0.11030837148427963, "learning_rate": 0.002, "loss": 2.3608, "step": 131390 }, { "epoch": 0.5079556524562787, "grad_norm": 0.10846863687038422, "learning_rate": 0.002, "loss": 2.3388, "step": 131400 }, { "epoch": 0.507994309659662, "grad_norm": 0.11026975512504578, "learning_rate": 0.002, "loss": 2.3496, "step": 131410 }, { "epoch": 0.5080329668630452, "grad_norm": 0.09743861854076385, "learning_rate": 0.002, "loss": 2.3603, "step": 131420 }, { "epoch": 0.5080716240664286, "grad_norm": 0.13267552852630615, "learning_rate": 0.002, "loss": 2.3398, "step": 131430 }, { "epoch": 0.5081102812698118, "grad_norm": 0.7629449367523193, "learning_rate": 0.002, "loss": 2.3612, "step": 131440 }, { "epoch": 0.5081489384731951, "grad_norm": 0.10051924735307693, "learning_rate": 0.002, "loss": 2.3375, "step": 131450 }, { "epoch": 0.5081875956765783, "grad_norm": 0.11776295304298401, "learning_rate": 0.002, "loss": 2.3412, "step": 131460 }, { "epoch": 0.5082262528799617, "grad_norm": 0.11177230626344681, "learning_rate": 0.002, "loss": 2.3347, "step": 131470 }, { "epoch": 0.508264910083345, "grad_norm": 0.09945464134216309, "learning_rate": 0.002, "loss": 2.3287, "step": 131480 }, { "epoch": 0.5083035672867282, "grad_norm": 0.11460543423891068, "learning_rate": 0.002, "loss": 2.3328, "step": 131490 }, { "epoch": 0.5083422244901115, "grad_norm": 0.12200960516929626, "learning_rate": 0.002, "loss": 2.332, "step": 131500 }, { "epoch": 0.5083808816934947, "grad_norm": 0.10961554944515228, "learning_rate": 0.002, "loss": 2.3645, "step": 131510 }, { "epoch": 0.5084195388968781, "grad_norm": 0.10165112465620041, "learning_rate": 0.002, "loss": 2.3473, "step": 131520 }, { "epoch": 0.5084581961002613, "grad_norm": 0.11187805235385895, "learning_rate": 0.002, "loss": 2.3525, "step": 131530 }, { "epoch": 0.5084968533036446, "grad_norm": 0.10341209173202515, "learning_rate": 0.002, "loss": 2.3472, "step": 131540 }, { "epoch": 0.5085355105070278, "grad_norm": 0.1048155277967453, "learning_rate": 0.002, "loss": 2.3605, "step": 131550 }, { "epoch": 0.5085741677104112, "grad_norm": 0.11941980570554733, "learning_rate": 0.002, "loss": 2.3473, "step": 131560 }, { "epoch": 0.5086128249137944, "grad_norm": 0.10180438309907913, "learning_rate": 0.002, "loss": 2.341, "step": 131570 }, { "epoch": 0.5086514821171777, "grad_norm": 0.1211216077208519, "learning_rate": 0.002, "loss": 2.3439, "step": 131580 }, { "epoch": 0.508690139320561, "grad_norm": 0.10446929931640625, "learning_rate": 0.002, "loss": 2.357, "step": 131590 }, { "epoch": 0.5087287965239443, "grad_norm": 0.11889198422431946, "learning_rate": 0.002, "loss": 2.3569, "step": 131600 }, { "epoch": 0.5087674537273276, "grad_norm": 0.1014224961400032, "learning_rate": 0.002, "loss": 2.3568, "step": 131610 }, { "epoch": 0.5088061109307108, "grad_norm": 0.10770139843225479, "learning_rate": 0.002, "loss": 2.3632, "step": 131620 }, { "epoch": 0.5088447681340941, "grad_norm": 0.11339019238948822, "learning_rate": 0.002, "loss": 2.3567, "step": 131630 }, { "epoch": 0.5088834253374774, "grad_norm": 0.10878494381904602, "learning_rate": 0.002, "loss": 2.3341, "step": 131640 }, { "epoch": 0.5089220825408607, "grad_norm": 0.10915534198284149, "learning_rate": 0.002, "loss": 2.3634, "step": 131650 }, { "epoch": 0.5089607397442439, "grad_norm": 0.1017685979604721, "learning_rate": 0.002, "loss": 2.3483, "step": 131660 }, { "epoch": 0.5089993969476272, "grad_norm": 0.11992502212524414, "learning_rate": 0.002, "loss": 2.3591, "step": 131670 }, { "epoch": 0.5090380541510104, "grad_norm": 0.1117842048406601, "learning_rate": 0.002, "loss": 2.3348, "step": 131680 }, { "epoch": 0.5090767113543938, "grad_norm": 0.10868057608604431, "learning_rate": 0.002, "loss": 2.3441, "step": 131690 }, { "epoch": 0.5091153685577771, "grad_norm": 0.10284089297056198, "learning_rate": 0.002, "loss": 2.3412, "step": 131700 }, { "epoch": 0.5091540257611603, "grad_norm": 0.11913391947746277, "learning_rate": 0.002, "loss": 2.3428, "step": 131710 }, { "epoch": 0.5091926829645436, "grad_norm": 0.10378926992416382, "learning_rate": 0.002, "loss": 2.3481, "step": 131720 }, { "epoch": 0.5092313401679269, "grad_norm": 0.10352809727191925, "learning_rate": 0.002, "loss": 2.3533, "step": 131730 }, { "epoch": 0.5092699973713102, "grad_norm": 0.10070081800222397, "learning_rate": 0.002, "loss": 2.3418, "step": 131740 }, { "epoch": 0.5093086545746934, "grad_norm": 0.10485085844993591, "learning_rate": 0.002, "loss": 2.3521, "step": 131750 }, { "epoch": 0.5093473117780767, "grad_norm": 0.10146090388298035, "learning_rate": 0.002, "loss": 2.3669, "step": 131760 }, { "epoch": 0.50938596898146, "grad_norm": 0.12098173797130585, "learning_rate": 0.002, "loss": 2.3445, "step": 131770 }, { "epoch": 0.5094246261848433, "grad_norm": 0.11403773725032806, "learning_rate": 0.002, "loss": 2.3362, "step": 131780 }, { "epoch": 0.5094632833882266, "grad_norm": 0.09431008994579315, "learning_rate": 0.002, "loss": 2.3456, "step": 131790 }, { "epoch": 0.5095019405916098, "grad_norm": 0.11636164784431458, "learning_rate": 0.002, "loss": 2.3759, "step": 131800 }, { "epoch": 0.5095405977949932, "grad_norm": 0.09959069639444351, "learning_rate": 0.002, "loss": 2.3354, "step": 131810 }, { "epoch": 0.5095792549983764, "grad_norm": 0.11051137000322342, "learning_rate": 0.002, "loss": 2.3538, "step": 131820 }, { "epoch": 0.5096179122017597, "grad_norm": 0.10396791249513626, "learning_rate": 0.002, "loss": 2.349, "step": 131830 }, { "epoch": 0.5096565694051429, "grad_norm": 0.10696316510438919, "learning_rate": 0.002, "loss": 2.3525, "step": 131840 }, { "epoch": 0.5096952266085263, "grad_norm": 0.09929095953702927, "learning_rate": 0.002, "loss": 2.3445, "step": 131850 }, { "epoch": 0.5097338838119095, "grad_norm": 0.11605452746152878, "learning_rate": 0.002, "loss": 2.3421, "step": 131860 }, { "epoch": 0.5097725410152928, "grad_norm": 0.11871679127216339, "learning_rate": 0.002, "loss": 2.3465, "step": 131870 }, { "epoch": 0.509811198218676, "grad_norm": 0.10420700907707214, "learning_rate": 0.002, "loss": 2.3491, "step": 131880 }, { "epoch": 0.5098498554220593, "grad_norm": 0.11169622838497162, "learning_rate": 0.002, "loss": 2.3488, "step": 131890 }, { "epoch": 0.5098885126254427, "grad_norm": 0.09876314550638199, "learning_rate": 0.002, "loss": 2.354, "step": 131900 }, { "epoch": 0.5099271698288259, "grad_norm": 0.10073533654212952, "learning_rate": 0.002, "loss": 2.3474, "step": 131910 }, { "epoch": 0.5099658270322092, "grad_norm": 0.10538259148597717, "learning_rate": 0.002, "loss": 2.3347, "step": 131920 }, { "epoch": 0.5100044842355924, "grad_norm": 0.09020911902189255, "learning_rate": 0.002, "loss": 2.3594, "step": 131930 }, { "epoch": 0.5100431414389758, "grad_norm": 0.11380067467689514, "learning_rate": 0.002, "loss": 2.3375, "step": 131940 }, { "epoch": 0.510081798642359, "grad_norm": 0.1035328358411789, "learning_rate": 0.002, "loss": 2.3438, "step": 131950 }, { "epoch": 0.5101204558457423, "grad_norm": 0.1049763560295105, "learning_rate": 0.002, "loss": 2.3493, "step": 131960 }, { "epoch": 0.5101591130491255, "grad_norm": 0.11328870058059692, "learning_rate": 0.002, "loss": 2.3384, "step": 131970 }, { "epoch": 0.5101977702525089, "grad_norm": 0.11048241704702377, "learning_rate": 0.002, "loss": 2.3265, "step": 131980 }, { "epoch": 0.5102364274558921, "grad_norm": 0.10634758323431015, "learning_rate": 0.002, "loss": 2.3428, "step": 131990 }, { "epoch": 0.5102750846592754, "grad_norm": 0.11657426506280899, "learning_rate": 0.002, "loss": 2.3471, "step": 132000 }, { "epoch": 0.5103137418626587, "grad_norm": 0.1074022501707077, "learning_rate": 0.002, "loss": 2.3451, "step": 132010 }, { "epoch": 0.510352399066042, "grad_norm": 0.0970696434378624, "learning_rate": 0.002, "loss": 2.339, "step": 132020 }, { "epoch": 0.5103910562694253, "grad_norm": 0.10929609090089798, "learning_rate": 0.002, "loss": 2.3534, "step": 132030 }, { "epoch": 0.5104297134728085, "grad_norm": 0.10729232430458069, "learning_rate": 0.002, "loss": 2.339, "step": 132040 }, { "epoch": 0.5104683706761918, "grad_norm": 0.09950549900531769, "learning_rate": 0.002, "loss": 2.3523, "step": 132050 }, { "epoch": 0.510507027879575, "grad_norm": 0.10508369654417038, "learning_rate": 0.002, "loss": 2.3453, "step": 132060 }, { "epoch": 0.5105456850829584, "grad_norm": 0.12266229093074799, "learning_rate": 0.002, "loss": 2.3502, "step": 132070 }, { "epoch": 0.5105843422863416, "grad_norm": 0.10278153419494629, "learning_rate": 0.002, "loss": 2.3443, "step": 132080 }, { "epoch": 0.5106229994897249, "grad_norm": 0.10363687574863434, "learning_rate": 0.002, "loss": 2.3438, "step": 132090 }, { "epoch": 0.5106616566931081, "grad_norm": 0.11429458856582642, "learning_rate": 0.002, "loss": 2.3468, "step": 132100 }, { "epoch": 0.5107003138964915, "grad_norm": 0.0995219498872757, "learning_rate": 0.002, "loss": 2.3265, "step": 132110 }, { "epoch": 0.5107389710998748, "grad_norm": 0.11245020478963852, "learning_rate": 0.002, "loss": 2.3492, "step": 132120 }, { "epoch": 0.510777628303258, "grad_norm": 0.133084237575531, "learning_rate": 0.002, "loss": 2.3407, "step": 132130 }, { "epoch": 0.5108162855066413, "grad_norm": 0.11617525666952133, "learning_rate": 0.002, "loss": 2.3539, "step": 132140 }, { "epoch": 0.5108549427100246, "grad_norm": 0.09348950535058975, "learning_rate": 0.002, "loss": 2.3309, "step": 132150 }, { "epoch": 0.5108935999134079, "grad_norm": 0.1113290935754776, "learning_rate": 0.002, "loss": 2.3389, "step": 132160 }, { "epoch": 0.5109322571167911, "grad_norm": 0.10906348377466202, "learning_rate": 0.002, "loss": 2.3439, "step": 132170 }, { "epoch": 0.5109709143201744, "grad_norm": 0.12325325608253479, "learning_rate": 0.002, "loss": 2.3553, "step": 132180 }, { "epoch": 0.5110095715235577, "grad_norm": 0.1047777459025383, "learning_rate": 0.002, "loss": 2.3496, "step": 132190 }, { "epoch": 0.511048228726941, "grad_norm": 0.10495191812515259, "learning_rate": 0.002, "loss": 2.3363, "step": 132200 }, { "epoch": 0.5110868859303243, "grad_norm": 0.10061822086572647, "learning_rate": 0.002, "loss": 2.3434, "step": 132210 }, { "epoch": 0.5111255431337075, "grad_norm": 0.10761447250843048, "learning_rate": 0.002, "loss": 2.3346, "step": 132220 }, { "epoch": 0.5111642003370909, "grad_norm": 0.10504017025232315, "learning_rate": 0.002, "loss": 2.3448, "step": 132230 }, { "epoch": 0.5112028575404741, "grad_norm": 0.11160441488027573, "learning_rate": 0.002, "loss": 2.3558, "step": 132240 }, { "epoch": 0.5112415147438574, "grad_norm": 0.110514797270298, "learning_rate": 0.002, "loss": 2.3393, "step": 132250 }, { "epoch": 0.5112801719472406, "grad_norm": 0.11630698293447495, "learning_rate": 0.002, "loss": 2.3448, "step": 132260 }, { "epoch": 0.5113188291506239, "grad_norm": 0.11179690808057785, "learning_rate": 0.002, "loss": 2.3476, "step": 132270 }, { "epoch": 0.5113574863540072, "grad_norm": 0.10343910753726959, "learning_rate": 0.002, "loss": 2.3426, "step": 132280 }, { "epoch": 0.5113961435573905, "grad_norm": 0.10784193873405457, "learning_rate": 0.002, "loss": 2.3525, "step": 132290 }, { "epoch": 0.5114348007607737, "grad_norm": 0.10568580776453018, "learning_rate": 0.002, "loss": 2.3599, "step": 132300 }, { "epoch": 0.511473457964157, "grad_norm": 0.11240072548389435, "learning_rate": 0.002, "loss": 2.3401, "step": 132310 }, { "epoch": 0.5115121151675404, "grad_norm": 0.10685410350561142, "learning_rate": 0.002, "loss": 2.3422, "step": 132320 }, { "epoch": 0.5115507723709236, "grad_norm": 0.1029103696346283, "learning_rate": 0.002, "loss": 2.3494, "step": 132330 }, { "epoch": 0.5115894295743069, "grad_norm": 0.09647617489099503, "learning_rate": 0.002, "loss": 2.3366, "step": 132340 }, { "epoch": 0.5116280867776901, "grad_norm": 0.10524403303861618, "learning_rate": 0.002, "loss": 2.3421, "step": 132350 }, { "epoch": 0.5116667439810735, "grad_norm": 0.11678078025579453, "learning_rate": 0.002, "loss": 2.3516, "step": 132360 }, { "epoch": 0.5117054011844567, "grad_norm": 0.11135760694742203, "learning_rate": 0.002, "loss": 2.3424, "step": 132370 }, { "epoch": 0.51174405838784, "grad_norm": 0.10289976000785828, "learning_rate": 0.002, "loss": 2.3456, "step": 132380 }, { "epoch": 0.5117827155912232, "grad_norm": 0.0918273851275444, "learning_rate": 0.002, "loss": 2.341, "step": 132390 }, { "epoch": 0.5118213727946066, "grad_norm": 0.11923960596323013, "learning_rate": 0.002, "loss": 2.3384, "step": 132400 }, { "epoch": 0.5118600299979899, "grad_norm": 0.1056162640452385, "learning_rate": 0.002, "loss": 2.3358, "step": 132410 }, { "epoch": 0.5118986872013731, "grad_norm": 0.10509263724088669, "learning_rate": 0.002, "loss": 2.3657, "step": 132420 }, { "epoch": 0.5119373444047564, "grad_norm": 0.09654008597135544, "learning_rate": 0.002, "loss": 2.3453, "step": 132430 }, { "epoch": 0.5119760016081396, "grad_norm": 0.09543860703706741, "learning_rate": 0.002, "loss": 2.3473, "step": 132440 }, { "epoch": 0.512014658811523, "grad_norm": 0.11143346130847931, "learning_rate": 0.002, "loss": 2.3476, "step": 132450 }, { "epoch": 0.5120533160149062, "grad_norm": 0.1058533564209938, "learning_rate": 0.002, "loss": 2.3607, "step": 132460 }, { "epoch": 0.5120919732182895, "grad_norm": 0.10159727931022644, "learning_rate": 0.002, "loss": 2.3431, "step": 132470 }, { "epoch": 0.5121306304216727, "grad_norm": 0.10234866291284561, "learning_rate": 0.002, "loss": 2.3592, "step": 132480 }, { "epoch": 0.5121692876250561, "grad_norm": 0.1149788424372673, "learning_rate": 0.002, "loss": 2.3564, "step": 132490 }, { "epoch": 0.5122079448284393, "grad_norm": 0.11671125143766403, "learning_rate": 0.002, "loss": 2.3703, "step": 132500 }, { "epoch": 0.5122466020318226, "grad_norm": 0.10762688517570496, "learning_rate": 0.002, "loss": 2.3667, "step": 132510 }, { "epoch": 0.5122852592352058, "grad_norm": 0.10247467458248138, "learning_rate": 0.002, "loss": 2.342, "step": 132520 }, { "epoch": 0.5123239164385892, "grad_norm": 0.12695589661598206, "learning_rate": 0.002, "loss": 2.3261, "step": 132530 }, { "epoch": 0.5123625736419725, "grad_norm": 0.10451260209083557, "learning_rate": 0.002, "loss": 2.3503, "step": 132540 }, { "epoch": 0.5124012308453557, "grad_norm": 0.10632877051830292, "learning_rate": 0.002, "loss": 2.3188, "step": 132550 }, { "epoch": 0.512439888048739, "grad_norm": 0.1000732034444809, "learning_rate": 0.002, "loss": 2.3416, "step": 132560 }, { "epoch": 0.5124785452521223, "grad_norm": 0.12949375808238983, "learning_rate": 0.002, "loss": 2.3434, "step": 132570 }, { "epoch": 0.5125172024555056, "grad_norm": 0.10160006582736969, "learning_rate": 0.002, "loss": 2.3513, "step": 132580 }, { "epoch": 0.5125558596588888, "grad_norm": 0.13972491025924683, "learning_rate": 0.002, "loss": 2.3422, "step": 132590 }, { "epoch": 0.5125945168622721, "grad_norm": 0.09255162626504898, "learning_rate": 0.002, "loss": 2.3573, "step": 132600 }, { "epoch": 0.5126331740656553, "grad_norm": 0.09891640394926071, "learning_rate": 0.002, "loss": 2.3446, "step": 132610 }, { "epoch": 0.5126718312690387, "grad_norm": 0.09517667442560196, "learning_rate": 0.002, "loss": 2.3286, "step": 132620 }, { "epoch": 0.512710488472422, "grad_norm": 0.10801737755537033, "learning_rate": 0.002, "loss": 2.3373, "step": 132630 }, { "epoch": 0.5127491456758052, "grad_norm": 0.10930386185646057, "learning_rate": 0.002, "loss": 2.3453, "step": 132640 }, { "epoch": 0.5127878028791885, "grad_norm": 0.11170261353254318, "learning_rate": 0.002, "loss": 2.3401, "step": 132650 }, { "epoch": 0.5128264600825718, "grad_norm": 0.09209370613098145, "learning_rate": 0.002, "loss": 2.3394, "step": 132660 }, { "epoch": 0.5128651172859551, "grad_norm": 0.10556315630674362, "learning_rate": 0.002, "loss": 2.3415, "step": 132670 }, { "epoch": 0.5129037744893383, "grad_norm": 0.11175452172756195, "learning_rate": 0.002, "loss": 2.3504, "step": 132680 }, { "epoch": 0.5129424316927216, "grad_norm": 0.11552779376506805, "learning_rate": 0.002, "loss": 2.3459, "step": 132690 }, { "epoch": 0.5129810888961049, "grad_norm": 0.10478057712316513, "learning_rate": 0.002, "loss": 2.3474, "step": 132700 }, { "epoch": 0.5130197460994882, "grad_norm": 0.09821119159460068, "learning_rate": 0.002, "loss": 2.335, "step": 132710 }, { "epoch": 0.5130584033028714, "grad_norm": 0.12864336371421814, "learning_rate": 0.002, "loss": 2.3538, "step": 132720 }, { "epoch": 0.5130970605062547, "grad_norm": 0.12022227793931961, "learning_rate": 0.002, "loss": 2.348, "step": 132730 }, { "epoch": 0.5131357177096381, "grad_norm": 0.09698469191789627, "learning_rate": 0.002, "loss": 2.3567, "step": 132740 }, { "epoch": 0.5131743749130213, "grad_norm": 0.10591412335634232, "learning_rate": 0.002, "loss": 2.3411, "step": 132750 }, { "epoch": 0.5132130321164046, "grad_norm": 0.10472962260246277, "learning_rate": 0.002, "loss": 2.3472, "step": 132760 }, { "epoch": 0.5132516893197878, "grad_norm": 0.1055840253829956, "learning_rate": 0.002, "loss": 2.3516, "step": 132770 }, { "epoch": 0.5132903465231712, "grad_norm": 0.10502316057682037, "learning_rate": 0.002, "loss": 2.3507, "step": 132780 }, { "epoch": 0.5133290037265544, "grad_norm": 0.10915154218673706, "learning_rate": 0.002, "loss": 2.3569, "step": 132790 }, { "epoch": 0.5133676609299377, "grad_norm": 0.09325850754976273, "learning_rate": 0.002, "loss": 2.3458, "step": 132800 }, { "epoch": 0.5134063181333209, "grad_norm": 0.11692541092634201, "learning_rate": 0.002, "loss": 2.3459, "step": 132810 }, { "epoch": 0.5134449753367042, "grad_norm": 0.11866552382707596, "learning_rate": 0.002, "loss": 2.3477, "step": 132820 }, { "epoch": 0.5134836325400876, "grad_norm": 0.11210706830024719, "learning_rate": 0.002, "loss": 2.3509, "step": 132830 }, { "epoch": 0.5135222897434708, "grad_norm": 0.09434731304645538, "learning_rate": 0.002, "loss": 2.3406, "step": 132840 }, { "epoch": 0.5135609469468541, "grad_norm": 0.1452234834432602, "learning_rate": 0.002, "loss": 2.3476, "step": 132850 }, { "epoch": 0.5135996041502373, "grad_norm": 0.11293261498212814, "learning_rate": 0.002, "loss": 2.3473, "step": 132860 }, { "epoch": 0.5136382613536207, "grad_norm": 0.10713877528905869, "learning_rate": 0.002, "loss": 2.3553, "step": 132870 }, { "epoch": 0.5136769185570039, "grad_norm": 0.14340457320213318, "learning_rate": 0.002, "loss": 2.3536, "step": 132880 }, { "epoch": 0.5137155757603872, "grad_norm": 0.12019462138414383, "learning_rate": 0.002, "loss": 2.3422, "step": 132890 }, { "epoch": 0.5137542329637704, "grad_norm": 0.10262832045555115, "learning_rate": 0.002, "loss": 2.3528, "step": 132900 }, { "epoch": 0.5137928901671538, "grad_norm": 0.10447874665260315, "learning_rate": 0.002, "loss": 2.3515, "step": 132910 }, { "epoch": 0.513831547370537, "grad_norm": 0.09929387271404266, "learning_rate": 0.002, "loss": 2.3416, "step": 132920 }, { "epoch": 0.5138702045739203, "grad_norm": 0.09785928577184677, "learning_rate": 0.002, "loss": 2.3423, "step": 132930 }, { "epoch": 0.5139088617773035, "grad_norm": 0.11398641020059586, "learning_rate": 0.002, "loss": 2.3529, "step": 132940 }, { "epoch": 0.5139475189806869, "grad_norm": 0.13142992556095123, "learning_rate": 0.002, "loss": 2.3515, "step": 132950 }, { "epoch": 0.5139861761840702, "grad_norm": 0.10426430404186249, "learning_rate": 0.002, "loss": 2.3533, "step": 132960 }, { "epoch": 0.5140248333874534, "grad_norm": 0.0965687558054924, "learning_rate": 0.002, "loss": 2.3593, "step": 132970 }, { "epoch": 0.5140634905908367, "grad_norm": 0.10306452959775925, "learning_rate": 0.002, "loss": 2.3367, "step": 132980 }, { "epoch": 0.5141021477942199, "grad_norm": 0.10615862160921097, "learning_rate": 0.002, "loss": 2.3528, "step": 132990 }, { "epoch": 0.5141408049976033, "grad_norm": 0.10264278203248978, "learning_rate": 0.002, "loss": 2.3515, "step": 133000 }, { "epoch": 0.5141794622009865, "grad_norm": 0.09326222538948059, "learning_rate": 0.002, "loss": 2.3532, "step": 133010 }, { "epoch": 0.5142181194043698, "grad_norm": 0.09489531815052032, "learning_rate": 0.002, "loss": 2.3519, "step": 133020 }, { "epoch": 0.514256776607753, "grad_norm": 0.12088096141815186, "learning_rate": 0.002, "loss": 2.3382, "step": 133030 }, { "epoch": 0.5142954338111364, "grad_norm": 0.10753527283668518, "learning_rate": 0.002, "loss": 2.3427, "step": 133040 }, { "epoch": 0.5143340910145197, "grad_norm": 0.10076984018087387, "learning_rate": 0.002, "loss": 2.3477, "step": 133050 }, { "epoch": 0.5143727482179029, "grad_norm": 0.12687014043331146, "learning_rate": 0.002, "loss": 2.3449, "step": 133060 }, { "epoch": 0.5144114054212862, "grad_norm": 0.1204477995634079, "learning_rate": 0.002, "loss": 2.3344, "step": 133070 }, { "epoch": 0.5144500626246695, "grad_norm": 0.12178194522857666, "learning_rate": 0.002, "loss": 2.3329, "step": 133080 }, { "epoch": 0.5144887198280528, "grad_norm": 0.10629022866487503, "learning_rate": 0.002, "loss": 2.3572, "step": 133090 }, { "epoch": 0.514527377031436, "grad_norm": 0.11349408328533173, "learning_rate": 0.002, "loss": 2.339, "step": 133100 }, { "epoch": 0.5145660342348193, "grad_norm": 0.12075722217559814, "learning_rate": 0.002, "loss": 2.3487, "step": 133110 }, { "epoch": 0.5146046914382026, "grad_norm": 0.10138367116451263, "learning_rate": 0.002, "loss": 2.3547, "step": 133120 }, { "epoch": 0.5146433486415859, "grad_norm": 0.10058567672967911, "learning_rate": 0.002, "loss": 2.3527, "step": 133130 }, { "epoch": 0.5146820058449691, "grad_norm": 0.10954679548740387, "learning_rate": 0.002, "loss": 2.3509, "step": 133140 }, { "epoch": 0.5147206630483524, "grad_norm": 0.10420696437358856, "learning_rate": 0.002, "loss": 2.3478, "step": 133150 }, { "epoch": 0.5147593202517358, "grad_norm": 0.10656526684761047, "learning_rate": 0.002, "loss": 2.3529, "step": 133160 }, { "epoch": 0.514797977455119, "grad_norm": 0.10731308162212372, "learning_rate": 0.002, "loss": 2.3642, "step": 133170 }, { "epoch": 0.5148366346585023, "grad_norm": 0.1107126921415329, "learning_rate": 0.002, "loss": 2.3436, "step": 133180 }, { "epoch": 0.5148752918618855, "grad_norm": 0.10922086238861084, "learning_rate": 0.002, "loss": 2.3456, "step": 133190 }, { "epoch": 0.5149139490652688, "grad_norm": 0.10685280710458755, "learning_rate": 0.002, "loss": 2.344, "step": 133200 }, { "epoch": 0.5149526062686521, "grad_norm": 0.10599326342344284, "learning_rate": 0.002, "loss": 2.3433, "step": 133210 }, { "epoch": 0.5149912634720354, "grad_norm": 0.09880273789167404, "learning_rate": 0.002, "loss": 2.3449, "step": 133220 }, { "epoch": 0.5150299206754186, "grad_norm": 0.11078929156064987, "learning_rate": 0.002, "loss": 2.3342, "step": 133230 }, { "epoch": 0.5150685778788019, "grad_norm": 0.11526217311620712, "learning_rate": 0.002, "loss": 2.3635, "step": 133240 }, { "epoch": 0.5151072350821853, "grad_norm": 0.11422278732061386, "learning_rate": 0.002, "loss": 2.3597, "step": 133250 }, { "epoch": 0.5151458922855685, "grad_norm": 0.10045097023248672, "learning_rate": 0.002, "loss": 2.3453, "step": 133260 }, { "epoch": 0.5151845494889518, "grad_norm": 0.10135480016469955, "learning_rate": 0.002, "loss": 2.3376, "step": 133270 }, { "epoch": 0.515223206692335, "grad_norm": 0.09497664123773575, "learning_rate": 0.002, "loss": 2.3511, "step": 133280 }, { "epoch": 0.5152618638957184, "grad_norm": 0.10189198702573776, "learning_rate": 0.002, "loss": 2.3549, "step": 133290 }, { "epoch": 0.5153005210991016, "grad_norm": 0.112625353038311, "learning_rate": 0.002, "loss": 2.3263, "step": 133300 }, { "epoch": 0.5153391783024849, "grad_norm": 0.12322766333818436, "learning_rate": 0.002, "loss": 2.3417, "step": 133310 }, { "epoch": 0.5153778355058681, "grad_norm": 0.10485317558050156, "learning_rate": 0.002, "loss": 2.3495, "step": 133320 }, { "epoch": 0.5154164927092515, "grad_norm": 0.11122911423444748, "learning_rate": 0.002, "loss": 2.3586, "step": 133330 }, { "epoch": 0.5154551499126347, "grad_norm": 0.1240696832537651, "learning_rate": 0.002, "loss": 2.3473, "step": 133340 }, { "epoch": 0.515493807116018, "grad_norm": 0.09978976100683212, "learning_rate": 0.002, "loss": 2.3503, "step": 133350 }, { "epoch": 0.5155324643194013, "grad_norm": 0.11819953471422195, "learning_rate": 0.002, "loss": 2.3483, "step": 133360 }, { "epoch": 0.5155711215227845, "grad_norm": 0.10220865160226822, "learning_rate": 0.002, "loss": 2.3348, "step": 133370 }, { "epoch": 0.5156097787261679, "grad_norm": 0.11670244485139847, "learning_rate": 0.002, "loss": 2.3428, "step": 133380 }, { "epoch": 0.5156484359295511, "grad_norm": 0.12394854426383972, "learning_rate": 0.002, "loss": 2.3488, "step": 133390 }, { "epoch": 0.5156870931329344, "grad_norm": 0.12229776382446289, "learning_rate": 0.002, "loss": 2.3337, "step": 133400 }, { "epoch": 0.5157257503363176, "grad_norm": 0.1157093346118927, "learning_rate": 0.002, "loss": 2.3519, "step": 133410 }, { "epoch": 0.515764407539701, "grad_norm": 0.11021224409341812, "learning_rate": 0.002, "loss": 2.3366, "step": 133420 }, { "epoch": 0.5158030647430842, "grad_norm": 0.11094634234905243, "learning_rate": 0.002, "loss": 2.3615, "step": 133430 }, { "epoch": 0.5158417219464675, "grad_norm": 0.10788990557193756, "learning_rate": 0.002, "loss": 2.3529, "step": 133440 }, { "epoch": 0.5158803791498507, "grad_norm": 0.10132350027561188, "learning_rate": 0.002, "loss": 2.3417, "step": 133450 }, { "epoch": 0.5159190363532341, "grad_norm": 0.11580069363117218, "learning_rate": 0.002, "loss": 2.3548, "step": 133460 }, { "epoch": 0.5159576935566174, "grad_norm": 0.12661530077457428, "learning_rate": 0.002, "loss": 2.3502, "step": 133470 }, { "epoch": 0.5159963507600006, "grad_norm": 0.1014246866106987, "learning_rate": 0.002, "loss": 2.3595, "step": 133480 }, { "epoch": 0.5160350079633839, "grad_norm": 0.10210530459880829, "learning_rate": 0.002, "loss": 2.3341, "step": 133490 }, { "epoch": 0.5160736651667672, "grad_norm": 0.11067615449428558, "learning_rate": 0.002, "loss": 2.3389, "step": 133500 }, { "epoch": 0.5161123223701505, "grad_norm": 0.17499856650829315, "learning_rate": 0.002, "loss": 2.3611, "step": 133510 }, { "epoch": 0.5161509795735337, "grad_norm": 0.14115779101848602, "learning_rate": 0.002, "loss": 2.3674, "step": 133520 }, { "epoch": 0.516189636776917, "grad_norm": 0.11126365512609482, "learning_rate": 0.002, "loss": 2.3637, "step": 133530 }, { "epoch": 0.5162282939803002, "grad_norm": 0.09723678976297379, "learning_rate": 0.002, "loss": 2.3622, "step": 133540 }, { "epoch": 0.5162669511836836, "grad_norm": 0.09144878387451172, "learning_rate": 0.002, "loss": 2.3586, "step": 133550 }, { "epoch": 0.5163056083870669, "grad_norm": 0.1076786145567894, "learning_rate": 0.002, "loss": 2.3457, "step": 133560 }, { "epoch": 0.5163442655904501, "grad_norm": 0.110333651304245, "learning_rate": 0.002, "loss": 2.3419, "step": 133570 }, { "epoch": 0.5163829227938334, "grad_norm": 0.10729396343231201, "learning_rate": 0.002, "loss": 2.3511, "step": 133580 }, { "epoch": 0.5164215799972167, "grad_norm": 0.1079677864909172, "learning_rate": 0.002, "loss": 2.3423, "step": 133590 }, { "epoch": 0.5164602372006, "grad_norm": 0.11192754656076431, "learning_rate": 0.002, "loss": 2.3531, "step": 133600 }, { "epoch": 0.5164988944039832, "grad_norm": 0.11061155796051025, "learning_rate": 0.002, "loss": 2.3517, "step": 133610 }, { "epoch": 0.5165375516073665, "grad_norm": 0.37179362773895264, "learning_rate": 0.002, "loss": 2.339, "step": 133620 }, { "epoch": 0.5165762088107498, "grad_norm": 0.11642017960548401, "learning_rate": 0.002, "loss": 2.3573, "step": 133630 }, { "epoch": 0.5166148660141331, "grad_norm": 0.10036401450634003, "learning_rate": 0.002, "loss": 2.3452, "step": 133640 }, { "epoch": 0.5166535232175163, "grad_norm": 0.10902401804924011, "learning_rate": 0.002, "loss": 2.3476, "step": 133650 }, { "epoch": 0.5166921804208996, "grad_norm": 0.09906166046857834, "learning_rate": 0.002, "loss": 2.3469, "step": 133660 }, { "epoch": 0.516730837624283, "grad_norm": 0.0989346131682396, "learning_rate": 0.002, "loss": 2.3544, "step": 133670 }, { "epoch": 0.5167694948276662, "grad_norm": 0.1261736899614334, "learning_rate": 0.002, "loss": 2.3417, "step": 133680 }, { "epoch": 0.5168081520310495, "grad_norm": 0.10672534257173538, "learning_rate": 0.002, "loss": 2.3628, "step": 133690 }, { "epoch": 0.5168468092344327, "grad_norm": 0.10923762619495392, "learning_rate": 0.002, "loss": 2.3608, "step": 133700 }, { "epoch": 0.5168854664378161, "grad_norm": 0.10447768867015839, "learning_rate": 0.002, "loss": 2.3324, "step": 133710 }, { "epoch": 0.5169241236411993, "grad_norm": 0.13156598806381226, "learning_rate": 0.002, "loss": 2.3603, "step": 133720 }, { "epoch": 0.5169627808445826, "grad_norm": 0.11636074632406235, "learning_rate": 0.002, "loss": 2.3617, "step": 133730 }, { "epoch": 0.5170014380479658, "grad_norm": 0.093384750187397, "learning_rate": 0.002, "loss": 2.3408, "step": 133740 }, { "epoch": 0.5170400952513491, "grad_norm": 0.10545427352190018, "learning_rate": 0.002, "loss": 2.3559, "step": 133750 }, { "epoch": 0.5170787524547324, "grad_norm": 0.1253446340560913, "learning_rate": 0.002, "loss": 2.3548, "step": 133760 }, { "epoch": 0.5171174096581157, "grad_norm": 0.1060023084282875, "learning_rate": 0.002, "loss": 2.3529, "step": 133770 }, { "epoch": 0.517156066861499, "grad_norm": 0.11571837961673737, "learning_rate": 0.002, "loss": 2.3417, "step": 133780 }, { "epoch": 0.5171947240648822, "grad_norm": 0.13900338113307953, "learning_rate": 0.002, "loss": 2.3551, "step": 133790 }, { "epoch": 0.5172333812682656, "grad_norm": 0.10236191004514694, "learning_rate": 0.002, "loss": 2.3322, "step": 133800 }, { "epoch": 0.5172720384716488, "grad_norm": 0.10067922621965408, "learning_rate": 0.002, "loss": 2.3346, "step": 133810 }, { "epoch": 0.5173106956750321, "grad_norm": 0.10600189864635468, "learning_rate": 0.002, "loss": 2.3415, "step": 133820 }, { "epoch": 0.5173493528784153, "grad_norm": 0.12919996678829193, "learning_rate": 0.002, "loss": 2.3558, "step": 133830 }, { "epoch": 0.5173880100817987, "grad_norm": 0.1171584352850914, "learning_rate": 0.002, "loss": 2.3419, "step": 133840 }, { "epoch": 0.5174266672851819, "grad_norm": 0.12879115343093872, "learning_rate": 0.002, "loss": 2.3447, "step": 133850 }, { "epoch": 0.5174653244885652, "grad_norm": 0.09737744927406311, "learning_rate": 0.002, "loss": 2.3446, "step": 133860 }, { "epoch": 0.5175039816919484, "grad_norm": 0.1005203127861023, "learning_rate": 0.002, "loss": 2.3475, "step": 133870 }, { "epoch": 0.5175426388953318, "grad_norm": 0.10229729115962982, "learning_rate": 0.002, "loss": 2.3508, "step": 133880 }, { "epoch": 0.5175812960987151, "grad_norm": 0.09854832291603088, "learning_rate": 0.002, "loss": 2.3559, "step": 133890 }, { "epoch": 0.5176199533020983, "grad_norm": 0.11200766265392303, "learning_rate": 0.002, "loss": 2.3321, "step": 133900 }, { "epoch": 0.5176586105054816, "grad_norm": 0.10389938205480576, "learning_rate": 0.002, "loss": 2.3456, "step": 133910 }, { "epoch": 0.5176972677088648, "grad_norm": 0.12249651551246643, "learning_rate": 0.002, "loss": 2.3508, "step": 133920 }, { "epoch": 0.5177359249122482, "grad_norm": 0.09284602850675583, "learning_rate": 0.002, "loss": 2.3461, "step": 133930 }, { "epoch": 0.5177745821156314, "grad_norm": 0.12067190557718277, "learning_rate": 0.002, "loss": 2.3448, "step": 133940 }, { "epoch": 0.5178132393190147, "grad_norm": 0.09926807135343552, "learning_rate": 0.002, "loss": 2.3371, "step": 133950 }, { "epoch": 0.5178518965223979, "grad_norm": 0.13991133868694305, "learning_rate": 0.002, "loss": 2.3424, "step": 133960 }, { "epoch": 0.5178905537257813, "grad_norm": 0.11230497807264328, "learning_rate": 0.002, "loss": 2.3389, "step": 133970 }, { "epoch": 0.5179292109291646, "grad_norm": 0.10470463335514069, "learning_rate": 0.002, "loss": 2.3503, "step": 133980 }, { "epoch": 0.5179678681325478, "grad_norm": 0.116326704621315, "learning_rate": 0.002, "loss": 2.3475, "step": 133990 }, { "epoch": 0.5180065253359311, "grad_norm": 0.10669012367725372, "learning_rate": 0.002, "loss": 2.3471, "step": 134000 }, { "epoch": 0.5180451825393144, "grad_norm": 0.10244178026914597, "learning_rate": 0.002, "loss": 2.3438, "step": 134010 }, { "epoch": 0.5180838397426977, "grad_norm": 0.1011168584227562, "learning_rate": 0.002, "loss": 2.3441, "step": 134020 }, { "epoch": 0.5181224969460809, "grad_norm": 0.10668822377920151, "learning_rate": 0.002, "loss": 2.3412, "step": 134030 }, { "epoch": 0.5181611541494642, "grad_norm": 0.108344167470932, "learning_rate": 0.002, "loss": 2.3628, "step": 134040 }, { "epoch": 0.5181998113528475, "grad_norm": 0.1032029315829277, "learning_rate": 0.002, "loss": 2.3443, "step": 134050 }, { "epoch": 0.5182384685562308, "grad_norm": 0.10704102367162704, "learning_rate": 0.002, "loss": 2.3457, "step": 134060 }, { "epoch": 0.518277125759614, "grad_norm": 0.12949079275131226, "learning_rate": 0.002, "loss": 2.3475, "step": 134070 }, { "epoch": 0.5183157829629973, "grad_norm": 0.10617806762456894, "learning_rate": 0.002, "loss": 2.3425, "step": 134080 }, { "epoch": 0.5183544401663805, "grad_norm": 0.09986576437950134, "learning_rate": 0.002, "loss": 2.335, "step": 134090 }, { "epoch": 0.5183930973697639, "grad_norm": 0.15230610966682434, "learning_rate": 0.002, "loss": 2.3492, "step": 134100 }, { "epoch": 0.5184317545731472, "grad_norm": 0.10359156876802444, "learning_rate": 0.002, "loss": 2.3555, "step": 134110 }, { "epoch": 0.5184704117765304, "grad_norm": 0.1316675990819931, "learning_rate": 0.002, "loss": 2.3518, "step": 134120 }, { "epoch": 0.5185090689799137, "grad_norm": 0.10203003883361816, "learning_rate": 0.002, "loss": 2.3388, "step": 134130 }, { "epoch": 0.518547726183297, "grad_norm": 0.11022720485925674, "learning_rate": 0.002, "loss": 2.3452, "step": 134140 }, { "epoch": 0.5185863833866803, "grad_norm": 0.1270035207271576, "learning_rate": 0.002, "loss": 2.359, "step": 134150 }, { "epoch": 0.5186250405900635, "grad_norm": 0.10473570972681046, "learning_rate": 0.002, "loss": 2.3436, "step": 134160 }, { "epoch": 0.5186636977934468, "grad_norm": 0.0970320850610733, "learning_rate": 0.002, "loss": 2.3498, "step": 134170 }, { "epoch": 0.5187023549968302, "grad_norm": 0.09618403017520905, "learning_rate": 0.002, "loss": 2.3655, "step": 134180 }, { "epoch": 0.5187410122002134, "grad_norm": 0.11617468297481537, "learning_rate": 0.002, "loss": 2.3533, "step": 134190 }, { "epoch": 0.5187796694035967, "grad_norm": 0.1210232824087143, "learning_rate": 0.002, "loss": 2.3264, "step": 134200 }, { "epoch": 0.5188183266069799, "grad_norm": 0.12471597641706467, "learning_rate": 0.002, "loss": 2.3425, "step": 134210 }, { "epoch": 0.5188569838103633, "grad_norm": 0.09887123852968216, "learning_rate": 0.002, "loss": 2.354, "step": 134220 }, { "epoch": 0.5188956410137465, "grad_norm": 0.11552924662828445, "learning_rate": 0.002, "loss": 2.3642, "step": 134230 }, { "epoch": 0.5189342982171298, "grad_norm": 0.11775676161050797, "learning_rate": 0.002, "loss": 2.3604, "step": 134240 }, { "epoch": 0.518972955420513, "grad_norm": 0.1122833788394928, "learning_rate": 0.002, "loss": 2.3612, "step": 134250 }, { "epoch": 0.5190116126238964, "grad_norm": 0.11856210976839066, "learning_rate": 0.002, "loss": 2.3508, "step": 134260 }, { "epoch": 0.5190502698272796, "grad_norm": 0.09300824254751205, "learning_rate": 0.002, "loss": 2.3631, "step": 134270 }, { "epoch": 0.5190889270306629, "grad_norm": 0.11704012751579285, "learning_rate": 0.002, "loss": 2.3395, "step": 134280 }, { "epoch": 0.5191275842340461, "grad_norm": 0.10149552673101425, "learning_rate": 0.002, "loss": 2.3564, "step": 134290 }, { "epoch": 0.5191662414374294, "grad_norm": 0.11498774588108063, "learning_rate": 0.002, "loss": 2.3385, "step": 134300 }, { "epoch": 0.5192048986408128, "grad_norm": 0.08841398358345032, "learning_rate": 0.002, "loss": 2.3453, "step": 134310 }, { "epoch": 0.519243555844196, "grad_norm": 0.11097606271505356, "learning_rate": 0.002, "loss": 2.3503, "step": 134320 }, { "epoch": 0.5192822130475793, "grad_norm": 0.11205650866031647, "learning_rate": 0.002, "loss": 2.3577, "step": 134330 }, { "epoch": 0.5193208702509625, "grad_norm": 0.10886071622371674, "learning_rate": 0.002, "loss": 2.329, "step": 134340 }, { "epoch": 0.5193595274543459, "grad_norm": 0.09254167973995209, "learning_rate": 0.002, "loss": 2.3333, "step": 134350 }, { "epoch": 0.5193981846577291, "grad_norm": 0.10301493108272552, "learning_rate": 0.002, "loss": 2.3318, "step": 134360 }, { "epoch": 0.5194368418611124, "grad_norm": 0.11026881635189056, "learning_rate": 0.002, "loss": 2.3325, "step": 134370 }, { "epoch": 0.5194754990644956, "grad_norm": 0.11774619668722153, "learning_rate": 0.002, "loss": 2.3489, "step": 134380 }, { "epoch": 0.519514156267879, "grad_norm": 0.10522367805242538, "learning_rate": 0.002, "loss": 2.3396, "step": 134390 }, { "epoch": 0.5195528134712623, "grad_norm": 0.10401474684476852, "learning_rate": 0.002, "loss": 2.3465, "step": 134400 }, { "epoch": 0.5195914706746455, "grad_norm": 0.1115586906671524, "learning_rate": 0.002, "loss": 2.3584, "step": 134410 }, { "epoch": 0.5196301278780288, "grad_norm": 0.10123543441295624, "learning_rate": 0.002, "loss": 2.3506, "step": 134420 }, { "epoch": 0.5196687850814121, "grad_norm": 0.10290665924549103, "learning_rate": 0.002, "loss": 2.3389, "step": 134430 }, { "epoch": 0.5197074422847954, "grad_norm": 0.11987538635730743, "learning_rate": 0.002, "loss": 2.3292, "step": 134440 }, { "epoch": 0.5197460994881786, "grad_norm": 0.10227857530117035, "learning_rate": 0.002, "loss": 2.3415, "step": 134450 }, { "epoch": 0.5197847566915619, "grad_norm": 0.11408598721027374, "learning_rate": 0.002, "loss": 2.3546, "step": 134460 }, { "epoch": 0.5198234138949451, "grad_norm": 0.12978363037109375, "learning_rate": 0.002, "loss": 2.3407, "step": 134470 }, { "epoch": 0.5198620710983285, "grad_norm": 0.09933390468358994, "learning_rate": 0.002, "loss": 2.336, "step": 134480 }, { "epoch": 0.5199007283017117, "grad_norm": 0.11041887104511261, "learning_rate": 0.002, "loss": 2.3554, "step": 134490 }, { "epoch": 0.519939385505095, "grad_norm": 0.10262308269739151, "learning_rate": 0.002, "loss": 2.3633, "step": 134500 }, { "epoch": 0.5199780427084782, "grad_norm": 0.10440009087324142, "learning_rate": 0.002, "loss": 2.3614, "step": 134510 }, { "epoch": 0.5200166999118616, "grad_norm": 0.104102224111557, "learning_rate": 0.002, "loss": 2.3257, "step": 134520 }, { "epoch": 0.5200553571152449, "grad_norm": 0.10482411086559296, "learning_rate": 0.002, "loss": 2.3562, "step": 134530 }, { "epoch": 0.5200940143186281, "grad_norm": 0.11475711315870285, "learning_rate": 0.002, "loss": 2.3475, "step": 134540 }, { "epoch": 0.5201326715220114, "grad_norm": 0.1048799678683281, "learning_rate": 0.002, "loss": 2.3353, "step": 134550 }, { "epoch": 0.5201713287253947, "grad_norm": 0.1025136187672615, "learning_rate": 0.002, "loss": 2.3388, "step": 134560 }, { "epoch": 0.520209985928778, "grad_norm": 0.09366423636674881, "learning_rate": 0.002, "loss": 2.3467, "step": 134570 }, { "epoch": 0.5202486431321612, "grad_norm": 0.13303522765636444, "learning_rate": 0.002, "loss": 2.3441, "step": 134580 }, { "epoch": 0.5202873003355445, "grad_norm": 0.45014289021492004, "learning_rate": 0.002, "loss": 2.3542, "step": 134590 }, { "epoch": 0.5203259575389279, "grad_norm": 0.11356982588768005, "learning_rate": 0.002, "loss": 2.331, "step": 134600 }, { "epoch": 0.5203646147423111, "grad_norm": 0.10263145714998245, "learning_rate": 0.002, "loss": 2.3502, "step": 134610 }, { "epoch": 0.5204032719456944, "grad_norm": 0.1106531098484993, "learning_rate": 0.002, "loss": 2.3389, "step": 134620 }, { "epoch": 0.5204419291490776, "grad_norm": 0.12091255933046341, "learning_rate": 0.002, "loss": 2.3351, "step": 134630 }, { "epoch": 0.520480586352461, "grad_norm": 0.11186621338129044, "learning_rate": 0.002, "loss": 2.3361, "step": 134640 }, { "epoch": 0.5205192435558442, "grad_norm": 0.09535179287195206, "learning_rate": 0.002, "loss": 2.3395, "step": 134650 }, { "epoch": 0.5205579007592275, "grad_norm": 0.10127545893192291, "learning_rate": 0.002, "loss": 2.3423, "step": 134660 }, { "epoch": 0.5205965579626107, "grad_norm": 0.14372475445270538, "learning_rate": 0.002, "loss": 2.3554, "step": 134670 }, { "epoch": 0.520635215165994, "grad_norm": 0.10775496810674667, "learning_rate": 0.002, "loss": 2.3431, "step": 134680 }, { "epoch": 0.5206738723693773, "grad_norm": 0.10723396390676498, "learning_rate": 0.002, "loss": 2.3513, "step": 134690 }, { "epoch": 0.5207125295727606, "grad_norm": 0.14177311956882477, "learning_rate": 0.002, "loss": 2.3481, "step": 134700 }, { "epoch": 0.5207511867761438, "grad_norm": 0.10543669015169144, "learning_rate": 0.002, "loss": 2.3346, "step": 134710 }, { "epoch": 0.5207898439795271, "grad_norm": 0.12044474482536316, "learning_rate": 0.002, "loss": 2.3492, "step": 134720 }, { "epoch": 0.5208285011829105, "grad_norm": 0.10286377370357513, "learning_rate": 0.002, "loss": 2.3512, "step": 134730 }, { "epoch": 0.5208671583862937, "grad_norm": 0.10837053507566452, "learning_rate": 0.002, "loss": 2.3433, "step": 134740 }, { "epoch": 0.520905815589677, "grad_norm": 0.10814402252435684, "learning_rate": 0.002, "loss": 2.3615, "step": 134750 }, { "epoch": 0.5209444727930602, "grad_norm": 0.15393859148025513, "learning_rate": 0.002, "loss": 2.3438, "step": 134760 }, { "epoch": 0.5209831299964436, "grad_norm": 0.11631506681442261, "learning_rate": 0.002, "loss": 2.3504, "step": 134770 }, { "epoch": 0.5210217871998268, "grad_norm": 0.10414082556962967, "learning_rate": 0.002, "loss": 2.3519, "step": 134780 }, { "epoch": 0.5210604444032101, "grad_norm": 0.11460426449775696, "learning_rate": 0.002, "loss": 2.3545, "step": 134790 }, { "epoch": 0.5210991016065933, "grad_norm": 0.11126938462257385, "learning_rate": 0.002, "loss": 2.3412, "step": 134800 }, { "epoch": 0.5211377588099767, "grad_norm": 0.10752900689840317, "learning_rate": 0.002, "loss": 2.3373, "step": 134810 }, { "epoch": 0.52117641601336, "grad_norm": 0.10768181830644608, "learning_rate": 0.002, "loss": 2.3459, "step": 134820 }, { "epoch": 0.5212150732167432, "grad_norm": 0.12294381111860275, "learning_rate": 0.002, "loss": 2.3591, "step": 134830 }, { "epoch": 0.5212537304201265, "grad_norm": 0.10825446993112564, "learning_rate": 0.002, "loss": 2.3492, "step": 134840 }, { "epoch": 0.5212923876235097, "grad_norm": 0.11277685314416885, "learning_rate": 0.002, "loss": 2.3485, "step": 134850 }, { "epoch": 0.5213310448268931, "grad_norm": 0.10696902871131897, "learning_rate": 0.002, "loss": 2.3472, "step": 134860 }, { "epoch": 0.5213697020302763, "grad_norm": 0.11747557669878006, "learning_rate": 0.002, "loss": 2.3531, "step": 134870 }, { "epoch": 0.5214083592336596, "grad_norm": 0.10795747488737106, "learning_rate": 0.002, "loss": 2.354, "step": 134880 }, { "epoch": 0.5214470164370428, "grad_norm": 0.09496504813432693, "learning_rate": 0.002, "loss": 2.3497, "step": 134890 }, { "epoch": 0.5214856736404262, "grad_norm": 0.09993629902601242, "learning_rate": 0.002, "loss": 2.3547, "step": 134900 }, { "epoch": 0.5215243308438094, "grad_norm": 0.10213123261928558, "learning_rate": 0.002, "loss": 2.3208, "step": 134910 }, { "epoch": 0.5215629880471927, "grad_norm": 0.08959214389324188, "learning_rate": 0.002, "loss": 2.3526, "step": 134920 }, { "epoch": 0.521601645250576, "grad_norm": 0.09359659999608994, "learning_rate": 0.002, "loss": 2.3432, "step": 134930 }, { "epoch": 0.5216403024539593, "grad_norm": 0.10688213258981705, "learning_rate": 0.002, "loss": 2.3412, "step": 134940 }, { "epoch": 0.5216789596573426, "grad_norm": 0.11640635877847672, "learning_rate": 0.002, "loss": 2.3328, "step": 134950 }, { "epoch": 0.5217176168607258, "grad_norm": 0.09887672960758209, "learning_rate": 0.002, "loss": 2.3525, "step": 134960 }, { "epoch": 0.5217562740641091, "grad_norm": 0.10856257379055023, "learning_rate": 0.002, "loss": 2.328, "step": 134970 }, { "epoch": 0.5217949312674924, "grad_norm": 0.11670080572366714, "learning_rate": 0.002, "loss": 2.3565, "step": 134980 }, { "epoch": 0.5218335884708757, "grad_norm": 0.10856100916862488, "learning_rate": 0.002, "loss": 2.3504, "step": 134990 }, { "epoch": 0.5218722456742589, "grad_norm": 0.09527178853750229, "learning_rate": 0.002, "loss": 2.3413, "step": 135000 }, { "epoch": 0.5219109028776422, "grad_norm": 0.11452498286962509, "learning_rate": 0.002, "loss": 2.3411, "step": 135010 }, { "epoch": 0.5219495600810254, "grad_norm": 0.1107560321688652, "learning_rate": 0.002, "loss": 2.3597, "step": 135020 }, { "epoch": 0.5219882172844088, "grad_norm": 0.11319515854120255, "learning_rate": 0.002, "loss": 2.342, "step": 135030 }, { "epoch": 0.5220268744877921, "grad_norm": 0.09755564481019974, "learning_rate": 0.002, "loss": 2.3601, "step": 135040 }, { "epoch": 0.5220655316911753, "grad_norm": 0.11580151319503784, "learning_rate": 0.002, "loss": 2.3785, "step": 135050 }, { "epoch": 0.5221041888945586, "grad_norm": 0.10912448912858963, "learning_rate": 0.002, "loss": 2.3531, "step": 135060 }, { "epoch": 0.5221428460979419, "grad_norm": 0.10042090713977814, "learning_rate": 0.002, "loss": 2.3338, "step": 135070 }, { "epoch": 0.5221815033013252, "grad_norm": 0.09757044166326523, "learning_rate": 0.002, "loss": 2.3441, "step": 135080 }, { "epoch": 0.5222201605047084, "grad_norm": 0.09776173532009125, "learning_rate": 0.002, "loss": 2.367, "step": 135090 }, { "epoch": 0.5222588177080917, "grad_norm": 0.11984714865684509, "learning_rate": 0.002, "loss": 2.3316, "step": 135100 }, { "epoch": 0.522297474911475, "grad_norm": 0.10493864119052887, "learning_rate": 0.002, "loss": 2.3478, "step": 135110 }, { "epoch": 0.5223361321148583, "grad_norm": 0.1006794199347496, "learning_rate": 0.002, "loss": 2.3472, "step": 135120 }, { "epoch": 0.5223747893182416, "grad_norm": 0.12291432172060013, "learning_rate": 0.002, "loss": 2.3522, "step": 135130 }, { "epoch": 0.5224134465216248, "grad_norm": 0.11559300869703293, "learning_rate": 0.002, "loss": 2.3411, "step": 135140 }, { "epoch": 0.5224521037250082, "grad_norm": 0.10269319266080856, "learning_rate": 0.002, "loss": 2.3397, "step": 135150 }, { "epoch": 0.5224907609283914, "grad_norm": 0.11249280720949173, "learning_rate": 0.002, "loss": 2.3435, "step": 135160 }, { "epoch": 0.5225294181317747, "grad_norm": 0.10752243548631668, "learning_rate": 0.002, "loss": 2.3369, "step": 135170 }, { "epoch": 0.5225680753351579, "grad_norm": 0.10211756080389023, "learning_rate": 0.002, "loss": 2.3406, "step": 135180 }, { "epoch": 0.5226067325385413, "grad_norm": 0.12007226049900055, "learning_rate": 0.002, "loss": 2.3464, "step": 135190 }, { "epoch": 0.5226453897419245, "grad_norm": 0.10831714421510696, "learning_rate": 0.002, "loss": 2.3523, "step": 135200 }, { "epoch": 0.5226840469453078, "grad_norm": 0.09663467854261398, "learning_rate": 0.002, "loss": 2.338, "step": 135210 }, { "epoch": 0.522722704148691, "grad_norm": 0.11312247067689896, "learning_rate": 0.002, "loss": 2.3548, "step": 135220 }, { "epoch": 0.5227613613520743, "grad_norm": 0.10021994262933731, "learning_rate": 0.002, "loss": 2.3563, "step": 135230 }, { "epoch": 0.5228000185554577, "grad_norm": 0.12776386737823486, "learning_rate": 0.002, "loss": 2.3397, "step": 135240 }, { "epoch": 0.5228386757588409, "grad_norm": 0.11067812889814377, "learning_rate": 0.002, "loss": 2.3261, "step": 135250 }, { "epoch": 0.5228773329622242, "grad_norm": 0.1040908619761467, "learning_rate": 0.002, "loss": 2.3412, "step": 135260 }, { "epoch": 0.5229159901656074, "grad_norm": 0.11826976388692856, "learning_rate": 0.002, "loss": 2.3357, "step": 135270 }, { "epoch": 0.5229546473689908, "grad_norm": 0.10310089588165283, "learning_rate": 0.002, "loss": 2.3457, "step": 135280 }, { "epoch": 0.522993304572374, "grad_norm": 0.13205021619796753, "learning_rate": 0.002, "loss": 2.3411, "step": 135290 }, { "epoch": 0.5230319617757573, "grad_norm": 0.0964132621884346, "learning_rate": 0.002, "loss": 2.3562, "step": 135300 }, { "epoch": 0.5230706189791405, "grad_norm": 0.10004474967718124, "learning_rate": 0.002, "loss": 2.3497, "step": 135310 }, { "epoch": 0.5231092761825239, "grad_norm": 0.1004033163189888, "learning_rate": 0.002, "loss": 2.3537, "step": 135320 }, { "epoch": 0.5231479333859071, "grad_norm": 0.09789041429758072, "learning_rate": 0.002, "loss": 2.3488, "step": 135330 }, { "epoch": 0.5231865905892904, "grad_norm": 0.13248370587825775, "learning_rate": 0.002, "loss": 2.3518, "step": 135340 }, { "epoch": 0.5232252477926737, "grad_norm": 0.10540876537561417, "learning_rate": 0.002, "loss": 2.3516, "step": 135350 }, { "epoch": 0.523263904996057, "grad_norm": 0.09602527320384979, "learning_rate": 0.002, "loss": 2.3471, "step": 135360 }, { "epoch": 0.5233025621994403, "grad_norm": 0.10474893450737, "learning_rate": 0.002, "loss": 2.3478, "step": 135370 }, { "epoch": 0.5233412194028235, "grad_norm": 0.08683910965919495, "learning_rate": 0.002, "loss": 2.3383, "step": 135380 }, { "epoch": 0.5233798766062068, "grad_norm": 0.14607366919517517, "learning_rate": 0.002, "loss": 2.3361, "step": 135390 }, { "epoch": 0.52341853380959, "grad_norm": 0.10965852439403534, "learning_rate": 0.002, "loss": 2.3436, "step": 135400 }, { "epoch": 0.5234571910129734, "grad_norm": 0.15924514830112457, "learning_rate": 0.002, "loss": 2.3436, "step": 135410 }, { "epoch": 0.5234958482163566, "grad_norm": 0.11714666336774826, "learning_rate": 0.002, "loss": 2.3542, "step": 135420 }, { "epoch": 0.5235345054197399, "grad_norm": 0.09994805604219437, "learning_rate": 0.002, "loss": 2.3427, "step": 135430 }, { "epoch": 0.5235731626231231, "grad_norm": 0.09854995459318161, "learning_rate": 0.002, "loss": 2.3529, "step": 135440 }, { "epoch": 0.5236118198265065, "grad_norm": 0.11116635799407959, "learning_rate": 0.002, "loss": 2.3481, "step": 135450 }, { "epoch": 0.5236504770298898, "grad_norm": 0.10123293846845627, "learning_rate": 0.002, "loss": 2.3573, "step": 135460 }, { "epoch": 0.523689134233273, "grad_norm": 0.10389833152294159, "learning_rate": 0.002, "loss": 2.3496, "step": 135470 }, { "epoch": 0.5237277914366563, "grad_norm": 0.11927933990955353, "learning_rate": 0.002, "loss": 2.3501, "step": 135480 }, { "epoch": 0.5237664486400396, "grad_norm": 0.11789486557245255, "learning_rate": 0.002, "loss": 2.3589, "step": 135490 }, { "epoch": 0.5238051058434229, "grad_norm": 0.08775586634874344, "learning_rate": 0.002, "loss": 2.3453, "step": 135500 }, { "epoch": 0.5238437630468061, "grad_norm": 0.10228339582681656, "learning_rate": 0.002, "loss": 2.3271, "step": 135510 }, { "epoch": 0.5238824202501894, "grad_norm": 0.10572341084480286, "learning_rate": 0.002, "loss": 2.3549, "step": 135520 }, { "epoch": 0.5239210774535727, "grad_norm": 0.10267415642738342, "learning_rate": 0.002, "loss": 2.3353, "step": 135530 }, { "epoch": 0.523959734656956, "grad_norm": 0.10416428744792938, "learning_rate": 0.002, "loss": 2.3569, "step": 135540 }, { "epoch": 0.5239983918603393, "grad_norm": 0.1263497918844223, "learning_rate": 0.002, "loss": 2.3384, "step": 135550 }, { "epoch": 0.5240370490637225, "grad_norm": 0.1601221263408661, "learning_rate": 0.002, "loss": 2.3356, "step": 135560 }, { "epoch": 0.5240757062671059, "grad_norm": 0.11268121749162674, "learning_rate": 0.002, "loss": 2.345, "step": 135570 }, { "epoch": 0.5241143634704891, "grad_norm": 0.1266414374113083, "learning_rate": 0.002, "loss": 2.3352, "step": 135580 }, { "epoch": 0.5241530206738724, "grad_norm": 0.119015634059906, "learning_rate": 0.002, "loss": 2.3536, "step": 135590 }, { "epoch": 0.5241916778772556, "grad_norm": 0.10737226158380508, "learning_rate": 0.002, "loss": 2.3557, "step": 135600 }, { "epoch": 0.5242303350806389, "grad_norm": 0.09539292752742767, "learning_rate": 0.002, "loss": 2.3371, "step": 135610 }, { "epoch": 0.5242689922840222, "grad_norm": 0.09416766464710236, "learning_rate": 0.002, "loss": 2.3606, "step": 135620 }, { "epoch": 0.5243076494874055, "grad_norm": 0.0924883484840393, "learning_rate": 0.002, "loss": 2.346, "step": 135630 }, { "epoch": 0.5243463066907887, "grad_norm": 0.11176314949989319, "learning_rate": 0.002, "loss": 2.3355, "step": 135640 }, { "epoch": 0.524384963894172, "grad_norm": 0.13459579646587372, "learning_rate": 0.002, "loss": 2.3465, "step": 135650 }, { "epoch": 0.5244236210975554, "grad_norm": 0.11950015276670456, "learning_rate": 0.002, "loss": 2.3486, "step": 135660 }, { "epoch": 0.5244622783009386, "grad_norm": 0.12850578129291534, "learning_rate": 0.002, "loss": 2.3457, "step": 135670 }, { "epoch": 0.5245009355043219, "grad_norm": 0.10969886183738708, "learning_rate": 0.002, "loss": 2.3446, "step": 135680 }, { "epoch": 0.5245395927077051, "grad_norm": 0.12006625533103943, "learning_rate": 0.002, "loss": 2.3516, "step": 135690 }, { "epoch": 0.5245782499110885, "grad_norm": 0.11588145047426224, "learning_rate": 0.002, "loss": 2.3398, "step": 135700 }, { "epoch": 0.5246169071144717, "grad_norm": 0.12310832738876343, "learning_rate": 0.002, "loss": 2.347, "step": 135710 }, { "epoch": 0.524655564317855, "grad_norm": 0.09691675752401352, "learning_rate": 0.002, "loss": 2.3287, "step": 135720 }, { "epoch": 0.5246942215212382, "grad_norm": 0.12335009127855301, "learning_rate": 0.002, "loss": 2.3663, "step": 135730 }, { "epoch": 0.5247328787246216, "grad_norm": 0.11544227600097656, "learning_rate": 0.002, "loss": 2.3519, "step": 135740 }, { "epoch": 0.5247715359280049, "grad_norm": 0.12592443823814392, "learning_rate": 0.002, "loss": 2.3542, "step": 135750 }, { "epoch": 0.5248101931313881, "grad_norm": 0.1194482147693634, "learning_rate": 0.002, "loss": 2.3412, "step": 135760 }, { "epoch": 0.5248488503347714, "grad_norm": 0.10435574501752853, "learning_rate": 0.002, "loss": 2.3505, "step": 135770 }, { "epoch": 0.5248875075381546, "grad_norm": 0.10019376128911972, "learning_rate": 0.002, "loss": 2.3346, "step": 135780 }, { "epoch": 0.524926164741538, "grad_norm": 0.10837697237730026, "learning_rate": 0.002, "loss": 2.3377, "step": 135790 }, { "epoch": 0.5249648219449212, "grad_norm": 0.12371774017810822, "learning_rate": 0.002, "loss": 2.3601, "step": 135800 }, { "epoch": 0.5250034791483045, "grad_norm": 0.09747034311294556, "learning_rate": 0.002, "loss": 2.344, "step": 135810 }, { "epoch": 0.5250421363516877, "grad_norm": 0.10710786283016205, "learning_rate": 0.002, "loss": 2.3466, "step": 135820 }, { "epoch": 0.5250807935550711, "grad_norm": 0.11960620433092117, "learning_rate": 0.002, "loss": 2.3476, "step": 135830 }, { "epoch": 0.5251194507584543, "grad_norm": 0.11846979707479477, "learning_rate": 0.002, "loss": 2.348, "step": 135840 }, { "epoch": 0.5251581079618376, "grad_norm": 0.12157097458839417, "learning_rate": 0.002, "loss": 2.3587, "step": 135850 }, { "epoch": 0.5251967651652208, "grad_norm": 0.1204945296049118, "learning_rate": 0.002, "loss": 2.3461, "step": 135860 }, { "epoch": 0.5252354223686042, "grad_norm": 0.10225175321102142, "learning_rate": 0.002, "loss": 2.3512, "step": 135870 }, { "epoch": 0.5252740795719875, "grad_norm": 0.11851023882627487, "learning_rate": 0.002, "loss": 2.3363, "step": 135880 }, { "epoch": 0.5253127367753707, "grad_norm": 0.09924530982971191, "learning_rate": 0.002, "loss": 2.3524, "step": 135890 }, { "epoch": 0.525351393978754, "grad_norm": 0.10862696170806885, "learning_rate": 0.002, "loss": 2.3368, "step": 135900 }, { "epoch": 0.5253900511821373, "grad_norm": 0.09692274034023285, "learning_rate": 0.002, "loss": 2.3539, "step": 135910 }, { "epoch": 0.5254287083855206, "grad_norm": 0.10528989136219025, "learning_rate": 0.002, "loss": 2.3625, "step": 135920 }, { "epoch": 0.5254673655889038, "grad_norm": 0.10078863799571991, "learning_rate": 0.002, "loss": 2.3394, "step": 135930 }, { "epoch": 0.5255060227922871, "grad_norm": 0.1006772443652153, "learning_rate": 0.002, "loss": 2.3499, "step": 135940 }, { "epoch": 0.5255446799956703, "grad_norm": 0.10570927709341049, "learning_rate": 0.002, "loss": 2.3387, "step": 135950 }, { "epoch": 0.5255833371990537, "grad_norm": 0.10867559164762497, "learning_rate": 0.002, "loss": 2.3513, "step": 135960 }, { "epoch": 0.525621994402437, "grad_norm": 0.10470971465110779, "learning_rate": 0.002, "loss": 2.3518, "step": 135970 }, { "epoch": 0.5256606516058202, "grad_norm": 0.10036683082580566, "learning_rate": 0.002, "loss": 2.3558, "step": 135980 }, { "epoch": 0.5256993088092035, "grad_norm": 0.11269212514162064, "learning_rate": 0.002, "loss": 2.3397, "step": 135990 }, { "epoch": 0.5257379660125868, "grad_norm": 0.10223264992237091, "learning_rate": 0.002, "loss": 2.3548, "step": 136000 }, { "epoch": 0.5257766232159701, "grad_norm": 0.12405657023191452, "learning_rate": 0.002, "loss": 2.361, "step": 136010 }, { "epoch": 0.5258152804193533, "grad_norm": 0.11630851775407791, "learning_rate": 0.002, "loss": 2.3476, "step": 136020 }, { "epoch": 0.5258539376227366, "grad_norm": 0.11915447562932968, "learning_rate": 0.002, "loss": 2.3513, "step": 136030 }, { "epoch": 0.5258925948261199, "grad_norm": 0.13056451082229614, "learning_rate": 0.002, "loss": 2.3464, "step": 136040 }, { "epoch": 0.5259312520295032, "grad_norm": 0.12401597201824188, "learning_rate": 0.002, "loss": 2.3514, "step": 136050 }, { "epoch": 0.5259699092328864, "grad_norm": 0.1053740456700325, "learning_rate": 0.002, "loss": 2.3591, "step": 136060 }, { "epoch": 0.5260085664362697, "grad_norm": 0.12475787848234177, "learning_rate": 0.002, "loss": 2.3456, "step": 136070 }, { "epoch": 0.5260472236396531, "grad_norm": 0.10645383596420288, "learning_rate": 0.002, "loss": 2.3372, "step": 136080 }, { "epoch": 0.5260858808430363, "grad_norm": 0.10044417530298233, "learning_rate": 0.002, "loss": 2.3581, "step": 136090 }, { "epoch": 0.5261245380464196, "grad_norm": 0.11066542565822601, "learning_rate": 0.002, "loss": 2.3491, "step": 136100 }, { "epoch": 0.5261631952498028, "grad_norm": 0.1240231841802597, "learning_rate": 0.002, "loss": 2.3595, "step": 136110 }, { "epoch": 0.5262018524531862, "grad_norm": 0.10536878556013107, "learning_rate": 0.002, "loss": 2.3501, "step": 136120 }, { "epoch": 0.5262405096565694, "grad_norm": 0.11693378537893295, "learning_rate": 0.002, "loss": 2.3414, "step": 136130 }, { "epoch": 0.5262791668599527, "grad_norm": 0.11828534305095673, "learning_rate": 0.002, "loss": 2.3562, "step": 136140 }, { "epoch": 0.5263178240633359, "grad_norm": 0.12139234691858292, "learning_rate": 0.002, "loss": 2.3285, "step": 136150 }, { "epoch": 0.5263564812667192, "grad_norm": 0.11577997356653214, "learning_rate": 0.002, "loss": 2.3531, "step": 136160 }, { "epoch": 0.5263951384701026, "grad_norm": 0.09906233102083206, "learning_rate": 0.002, "loss": 2.3515, "step": 136170 }, { "epoch": 0.5264337956734858, "grad_norm": 0.11612803488969803, "learning_rate": 0.002, "loss": 2.3442, "step": 136180 }, { "epoch": 0.5264724528768691, "grad_norm": 0.10978134721517563, "learning_rate": 0.002, "loss": 2.3507, "step": 136190 }, { "epoch": 0.5265111100802523, "grad_norm": 0.12343063950538635, "learning_rate": 0.002, "loss": 2.3327, "step": 136200 }, { "epoch": 0.5265497672836357, "grad_norm": 0.1064305379986763, "learning_rate": 0.002, "loss": 2.3364, "step": 136210 }, { "epoch": 0.5265884244870189, "grad_norm": 0.09350063651800156, "learning_rate": 0.002, "loss": 2.3314, "step": 136220 }, { "epoch": 0.5266270816904022, "grad_norm": 0.10289590060710907, "learning_rate": 0.002, "loss": 2.3348, "step": 136230 }, { "epoch": 0.5266657388937854, "grad_norm": 0.12419820576906204, "learning_rate": 0.002, "loss": 2.3601, "step": 136240 }, { "epoch": 0.5267043960971688, "grad_norm": 0.09969479590654373, "learning_rate": 0.002, "loss": 2.343, "step": 136250 }, { "epoch": 0.526743053300552, "grad_norm": 0.13299071788787842, "learning_rate": 0.002, "loss": 2.348, "step": 136260 }, { "epoch": 0.5267817105039353, "grad_norm": 0.11333530396223068, "learning_rate": 0.002, "loss": 2.3428, "step": 136270 }, { "epoch": 0.5268203677073185, "grad_norm": 0.11156991869211197, "learning_rate": 0.002, "loss": 2.3525, "step": 136280 }, { "epoch": 0.5268590249107019, "grad_norm": 0.10726924240589142, "learning_rate": 0.002, "loss": 2.3368, "step": 136290 }, { "epoch": 0.5268976821140852, "grad_norm": 0.0964454635977745, "learning_rate": 0.002, "loss": 2.3452, "step": 136300 }, { "epoch": 0.5269363393174684, "grad_norm": 0.10612018406391144, "learning_rate": 0.002, "loss": 2.3435, "step": 136310 }, { "epoch": 0.5269749965208517, "grad_norm": 0.10159798711538315, "learning_rate": 0.002, "loss": 2.3504, "step": 136320 }, { "epoch": 0.5270136537242349, "grad_norm": 0.10592789202928543, "learning_rate": 0.002, "loss": 2.3643, "step": 136330 }, { "epoch": 0.5270523109276183, "grad_norm": 0.12359337508678436, "learning_rate": 0.002, "loss": 2.3567, "step": 136340 }, { "epoch": 0.5270909681310015, "grad_norm": 0.11304433643817902, "learning_rate": 0.002, "loss": 2.3726, "step": 136350 }, { "epoch": 0.5271296253343848, "grad_norm": 0.10624481737613678, "learning_rate": 0.002, "loss": 2.327, "step": 136360 }, { "epoch": 0.527168282537768, "grad_norm": 0.10739625990390778, "learning_rate": 0.002, "loss": 2.3489, "step": 136370 }, { "epoch": 0.5272069397411514, "grad_norm": 0.11046303808689117, "learning_rate": 0.002, "loss": 2.3456, "step": 136380 }, { "epoch": 0.5272455969445347, "grad_norm": 0.12088941782712936, "learning_rate": 0.002, "loss": 2.3442, "step": 136390 }, { "epoch": 0.5272842541479179, "grad_norm": 0.10500706732273102, "learning_rate": 0.002, "loss": 2.3379, "step": 136400 }, { "epoch": 0.5273229113513012, "grad_norm": 0.10844823718070984, "learning_rate": 0.002, "loss": 2.3499, "step": 136410 }, { "epoch": 0.5273615685546845, "grad_norm": 0.10434307903051376, "learning_rate": 0.002, "loss": 2.3404, "step": 136420 }, { "epoch": 0.5274002257580678, "grad_norm": 0.09942899644374847, "learning_rate": 0.002, "loss": 2.3474, "step": 136430 }, { "epoch": 0.527438882961451, "grad_norm": 0.09675901383161545, "learning_rate": 0.002, "loss": 2.3616, "step": 136440 }, { "epoch": 0.5274775401648343, "grad_norm": 0.10810984671115875, "learning_rate": 0.002, "loss": 2.3557, "step": 136450 }, { "epoch": 0.5275161973682176, "grad_norm": 0.10786008089780807, "learning_rate": 0.002, "loss": 2.3435, "step": 136460 }, { "epoch": 0.5275548545716009, "grad_norm": 0.11267884075641632, "learning_rate": 0.002, "loss": 2.3458, "step": 136470 }, { "epoch": 0.5275935117749841, "grad_norm": 0.11689767241477966, "learning_rate": 0.002, "loss": 2.3476, "step": 136480 }, { "epoch": 0.5276321689783674, "grad_norm": 0.11251311749219894, "learning_rate": 0.002, "loss": 2.337, "step": 136490 }, { "epoch": 0.5276708261817508, "grad_norm": 0.1003497913479805, "learning_rate": 0.002, "loss": 2.3369, "step": 136500 }, { "epoch": 0.527709483385134, "grad_norm": 0.11317698657512665, "learning_rate": 0.002, "loss": 2.3433, "step": 136510 }, { "epoch": 0.5277481405885173, "grad_norm": 0.10377456247806549, "learning_rate": 0.002, "loss": 2.3351, "step": 136520 }, { "epoch": 0.5277867977919005, "grad_norm": 0.11414996534585953, "learning_rate": 0.002, "loss": 2.3444, "step": 136530 }, { "epoch": 0.5278254549952838, "grad_norm": 0.10959716886281967, "learning_rate": 0.002, "loss": 2.3302, "step": 136540 }, { "epoch": 0.5278641121986671, "grad_norm": 0.11749317497015, "learning_rate": 0.002, "loss": 2.3555, "step": 136550 }, { "epoch": 0.5279027694020504, "grad_norm": 0.10722503066062927, "learning_rate": 0.002, "loss": 2.3353, "step": 136560 }, { "epoch": 0.5279414266054336, "grad_norm": 0.09356562793254852, "learning_rate": 0.002, "loss": 2.3395, "step": 136570 }, { "epoch": 0.5279800838088169, "grad_norm": 0.1076977327466011, "learning_rate": 0.002, "loss": 2.3494, "step": 136580 }, { "epoch": 0.5280187410122003, "grad_norm": 0.10418250411748886, "learning_rate": 0.002, "loss": 2.3477, "step": 136590 }, { "epoch": 0.5280573982155835, "grad_norm": 0.10491588711738586, "learning_rate": 0.002, "loss": 2.3457, "step": 136600 }, { "epoch": 0.5280960554189668, "grad_norm": 0.10587549209594727, "learning_rate": 0.002, "loss": 2.3584, "step": 136610 }, { "epoch": 0.52813471262235, "grad_norm": 0.10736706107854843, "learning_rate": 0.002, "loss": 2.3298, "step": 136620 }, { "epoch": 0.5281733698257334, "grad_norm": 0.09436357021331787, "learning_rate": 0.002, "loss": 2.349, "step": 136630 }, { "epoch": 0.5282120270291166, "grad_norm": 0.10800494253635406, "learning_rate": 0.002, "loss": 2.3487, "step": 136640 }, { "epoch": 0.5282506842324999, "grad_norm": 0.10391030460596085, "learning_rate": 0.002, "loss": 2.3439, "step": 136650 }, { "epoch": 0.5282893414358831, "grad_norm": 0.1147724837064743, "learning_rate": 0.002, "loss": 2.3334, "step": 136660 }, { "epoch": 0.5283279986392665, "grad_norm": 0.10503552854061127, "learning_rate": 0.002, "loss": 2.3493, "step": 136670 }, { "epoch": 0.5283666558426497, "grad_norm": 0.11024164408445358, "learning_rate": 0.002, "loss": 2.3339, "step": 136680 }, { "epoch": 0.528405313046033, "grad_norm": 0.10197118669748306, "learning_rate": 0.002, "loss": 2.3415, "step": 136690 }, { "epoch": 0.5284439702494163, "grad_norm": 0.10183871537446976, "learning_rate": 0.002, "loss": 2.3386, "step": 136700 }, { "epoch": 0.5284826274527995, "grad_norm": 0.10741639137268066, "learning_rate": 0.002, "loss": 2.3302, "step": 136710 }, { "epoch": 0.5285212846561829, "grad_norm": 0.1323143094778061, "learning_rate": 0.002, "loss": 2.3596, "step": 136720 }, { "epoch": 0.5285599418595661, "grad_norm": 0.1085241287946701, "learning_rate": 0.002, "loss": 2.3462, "step": 136730 }, { "epoch": 0.5285985990629494, "grad_norm": 0.10665200650691986, "learning_rate": 0.002, "loss": 2.3483, "step": 136740 }, { "epoch": 0.5286372562663326, "grad_norm": 0.10080068558454514, "learning_rate": 0.002, "loss": 2.3475, "step": 136750 }, { "epoch": 0.528675913469716, "grad_norm": 0.11925096064805984, "learning_rate": 0.002, "loss": 2.3459, "step": 136760 }, { "epoch": 0.5287145706730992, "grad_norm": 0.11695779860019684, "learning_rate": 0.002, "loss": 2.3557, "step": 136770 }, { "epoch": 0.5287532278764825, "grad_norm": 0.12991879880428314, "learning_rate": 0.002, "loss": 2.3462, "step": 136780 }, { "epoch": 0.5287918850798657, "grad_norm": 0.13205960392951965, "learning_rate": 0.002, "loss": 2.3682, "step": 136790 }, { "epoch": 0.5288305422832491, "grad_norm": 0.0976249948143959, "learning_rate": 0.002, "loss": 2.3512, "step": 136800 }, { "epoch": 0.5288691994866324, "grad_norm": 0.11842067539691925, "learning_rate": 0.002, "loss": 2.3383, "step": 136810 }, { "epoch": 0.5289078566900156, "grad_norm": 0.13334208726882935, "learning_rate": 0.002, "loss": 2.3518, "step": 136820 }, { "epoch": 0.5289465138933989, "grad_norm": 0.11997167021036148, "learning_rate": 0.002, "loss": 2.3671, "step": 136830 }, { "epoch": 0.5289851710967822, "grad_norm": 0.09686478972434998, "learning_rate": 0.002, "loss": 2.3481, "step": 136840 }, { "epoch": 0.5290238283001655, "grad_norm": 0.09126406162977219, "learning_rate": 0.002, "loss": 2.3536, "step": 136850 }, { "epoch": 0.5290624855035487, "grad_norm": 0.13515914976596832, "learning_rate": 0.002, "loss": 2.3412, "step": 136860 }, { "epoch": 0.529101142706932, "grad_norm": 0.11011700332164764, "learning_rate": 0.002, "loss": 2.3387, "step": 136870 }, { "epoch": 0.5291397999103152, "grad_norm": 0.09951812773942947, "learning_rate": 0.002, "loss": 2.3441, "step": 136880 }, { "epoch": 0.5291784571136986, "grad_norm": 0.12080974876880646, "learning_rate": 0.002, "loss": 2.3531, "step": 136890 }, { "epoch": 0.5292171143170818, "grad_norm": 0.11308450251817703, "learning_rate": 0.002, "loss": 2.3551, "step": 136900 }, { "epoch": 0.5292557715204651, "grad_norm": 0.1100267842411995, "learning_rate": 0.002, "loss": 2.3398, "step": 136910 }, { "epoch": 0.5292944287238484, "grad_norm": 0.10424294322729111, "learning_rate": 0.002, "loss": 2.3445, "step": 136920 }, { "epoch": 0.5293330859272317, "grad_norm": 0.1197114810347557, "learning_rate": 0.002, "loss": 2.3343, "step": 136930 }, { "epoch": 0.529371743130615, "grad_norm": 0.1036001443862915, "learning_rate": 0.002, "loss": 2.3406, "step": 136940 }, { "epoch": 0.5294104003339982, "grad_norm": 0.10613606870174408, "learning_rate": 0.002, "loss": 2.3485, "step": 136950 }, { "epoch": 0.5294490575373815, "grad_norm": 0.10405551642179489, "learning_rate": 0.002, "loss": 2.3418, "step": 136960 }, { "epoch": 0.5294877147407648, "grad_norm": 0.11526428908109665, "learning_rate": 0.002, "loss": 2.3414, "step": 136970 }, { "epoch": 0.5295263719441481, "grad_norm": 0.10523267835378647, "learning_rate": 0.002, "loss": 2.3567, "step": 136980 }, { "epoch": 0.5295650291475313, "grad_norm": 0.09959837049245834, "learning_rate": 0.002, "loss": 2.3547, "step": 136990 }, { "epoch": 0.5296036863509146, "grad_norm": 0.12185262143611908, "learning_rate": 0.002, "loss": 2.3459, "step": 137000 }, { "epoch": 0.529642343554298, "grad_norm": 0.09950944036245346, "learning_rate": 0.002, "loss": 2.3304, "step": 137010 }, { "epoch": 0.5296810007576812, "grad_norm": 0.1028703823685646, "learning_rate": 0.002, "loss": 2.3465, "step": 137020 }, { "epoch": 0.5297196579610645, "grad_norm": 0.11582040041685104, "learning_rate": 0.002, "loss": 2.3297, "step": 137030 }, { "epoch": 0.5297583151644477, "grad_norm": 0.11042831838130951, "learning_rate": 0.002, "loss": 2.3472, "step": 137040 }, { "epoch": 0.5297969723678311, "grad_norm": 0.108167365193367, "learning_rate": 0.002, "loss": 2.3454, "step": 137050 }, { "epoch": 0.5298356295712143, "grad_norm": 0.12642677128314972, "learning_rate": 0.002, "loss": 2.3506, "step": 137060 }, { "epoch": 0.5298742867745976, "grad_norm": 0.11467591673135757, "learning_rate": 0.002, "loss": 2.345, "step": 137070 }, { "epoch": 0.5299129439779808, "grad_norm": 0.10052400082349777, "learning_rate": 0.002, "loss": 2.3334, "step": 137080 }, { "epoch": 0.5299516011813641, "grad_norm": 0.09801182150840759, "learning_rate": 0.002, "loss": 2.3358, "step": 137090 }, { "epoch": 0.5299902583847474, "grad_norm": 0.11449935287237167, "learning_rate": 0.002, "loss": 2.3598, "step": 137100 }, { "epoch": 0.5300289155881307, "grad_norm": 0.102525994181633, "learning_rate": 0.002, "loss": 2.3554, "step": 137110 }, { "epoch": 0.530067572791514, "grad_norm": 0.09240062534809113, "learning_rate": 0.002, "loss": 2.3654, "step": 137120 }, { "epoch": 0.5301062299948972, "grad_norm": 0.10398975014686584, "learning_rate": 0.002, "loss": 2.3572, "step": 137130 }, { "epoch": 0.5301448871982806, "grad_norm": 0.10602229088544846, "learning_rate": 0.002, "loss": 2.3406, "step": 137140 }, { "epoch": 0.5301835444016638, "grad_norm": 0.10612321645021439, "learning_rate": 0.002, "loss": 2.3483, "step": 137150 }, { "epoch": 0.5302222016050471, "grad_norm": 0.10876964777708054, "learning_rate": 0.002, "loss": 2.3264, "step": 137160 }, { "epoch": 0.5302608588084303, "grad_norm": 0.10908559709787369, "learning_rate": 0.002, "loss": 2.3419, "step": 137170 }, { "epoch": 0.5302995160118137, "grad_norm": 0.10353915393352509, "learning_rate": 0.002, "loss": 2.3564, "step": 137180 }, { "epoch": 0.5303381732151969, "grad_norm": 0.10041562467813492, "learning_rate": 0.002, "loss": 2.3447, "step": 137190 }, { "epoch": 0.5303768304185802, "grad_norm": 0.09648370742797852, "learning_rate": 0.002, "loss": 2.3439, "step": 137200 }, { "epoch": 0.5304154876219634, "grad_norm": 0.11177092790603638, "learning_rate": 0.002, "loss": 2.3592, "step": 137210 }, { "epoch": 0.5304541448253468, "grad_norm": 0.10650645941495895, "learning_rate": 0.002, "loss": 2.3374, "step": 137220 }, { "epoch": 0.5304928020287301, "grad_norm": 0.10645987838506699, "learning_rate": 0.002, "loss": 2.3566, "step": 137230 }, { "epoch": 0.5305314592321133, "grad_norm": 0.10007698833942413, "learning_rate": 0.002, "loss": 2.3473, "step": 137240 }, { "epoch": 0.5305701164354966, "grad_norm": 0.12854336202144623, "learning_rate": 0.002, "loss": 2.3411, "step": 137250 }, { "epoch": 0.5306087736388798, "grad_norm": 0.13256269693374634, "learning_rate": 0.002, "loss": 2.3565, "step": 137260 }, { "epoch": 0.5306474308422632, "grad_norm": 0.12798702716827393, "learning_rate": 0.002, "loss": 2.3233, "step": 137270 }, { "epoch": 0.5306860880456464, "grad_norm": 0.10192005336284637, "learning_rate": 0.002, "loss": 2.3456, "step": 137280 }, { "epoch": 0.5307247452490297, "grad_norm": 0.12237120419740677, "learning_rate": 0.002, "loss": 2.3474, "step": 137290 }, { "epoch": 0.5307634024524129, "grad_norm": 0.09982860833406448, "learning_rate": 0.002, "loss": 2.3349, "step": 137300 }, { "epoch": 0.5308020596557963, "grad_norm": 0.10656183958053589, "learning_rate": 0.002, "loss": 2.3479, "step": 137310 }, { "epoch": 0.5308407168591796, "grad_norm": 0.10240715742111206, "learning_rate": 0.002, "loss": 2.3585, "step": 137320 }, { "epoch": 0.5308793740625628, "grad_norm": 0.12004515528678894, "learning_rate": 0.002, "loss": 2.3649, "step": 137330 }, { "epoch": 0.530918031265946, "grad_norm": 0.11195909231901169, "learning_rate": 0.002, "loss": 2.3377, "step": 137340 }, { "epoch": 0.5309566884693294, "grad_norm": 0.12709975242614746, "learning_rate": 0.002, "loss": 2.3421, "step": 137350 }, { "epoch": 0.5309953456727127, "grad_norm": 0.09677889198064804, "learning_rate": 0.002, "loss": 2.3263, "step": 137360 }, { "epoch": 0.5310340028760959, "grad_norm": 0.10898634046316147, "learning_rate": 0.002, "loss": 2.3503, "step": 137370 }, { "epoch": 0.5310726600794792, "grad_norm": 0.11007361859083176, "learning_rate": 0.002, "loss": 2.3376, "step": 137380 }, { "epoch": 0.5311113172828625, "grad_norm": 0.12255366891622543, "learning_rate": 0.002, "loss": 2.3479, "step": 137390 }, { "epoch": 0.5311499744862458, "grad_norm": 0.1071476936340332, "learning_rate": 0.002, "loss": 2.3363, "step": 137400 }, { "epoch": 0.531188631689629, "grad_norm": 0.12634190917015076, "learning_rate": 0.002, "loss": 2.3297, "step": 137410 }, { "epoch": 0.5312272888930123, "grad_norm": 0.10547083616256714, "learning_rate": 0.002, "loss": 2.3412, "step": 137420 }, { "epoch": 0.5312659460963955, "grad_norm": 0.10647553950548172, "learning_rate": 0.002, "loss": 2.3535, "step": 137430 }, { "epoch": 0.5313046032997789, "grad_norm": 0.09845500439405441, "learning_rate": 0.002, "loss": 2.3652, "step": 137440 }, { "epoch": 0.5313432605031622, "grad_norm": 0.10793498903512955, "learning_rate": 0.002, "loss": 2.363, "step": 137450 }, { "epoch": 0.5313819177065454, "grad_norm": 0.11223926395177841, "learning_rate": 0.002, "loss": 2.349, "step": 137460 }, { "epoch": 0.5314205749099287, "grad_norm": 0.11091437190771103, "learning_rate": 0.002, "loss": 2.3406, "step": 137470 }, { "epoch": 0.531459232113312, "grad_norm": 0.11193855106830597, "learning_rate": 0.002, "loss": 2.3352, "step": 137480 }, { "epoch": 0.5314978893166953, "grad_norm": 0.11525751650333405, "learning_rate": 0.002, "loss": 2.341, "step": 137490 }, { "epoch": 0.5315365465200785, "grad_norm": 0.11154419928789139, "learning_rate": 0.002, "loss": 2.346, "step": 137500 }, { "epoch": 0.5315752037234618, "grad_norm": 0.10130494832992554, "learning_rate": 0.002, "loss": 2.3256, "step": 137510 }, { "epoch": 0.5316138609268451, "grad_norm": 0.1344832181930542, "learning_rate": 0.002, "loss": 2.3243, "step": 137520 }, { "epoch": 0.5316525181302284, "grad_norm": 0.11252513527870178, "learning_rate": 0.002, "loss": 2.3349, "step": 137530 }, { "epoch": 0.5316911753336117, "grad_norm": 0.11794115602970123, "learning_rate": 0.002, "loss": 2.344, "step": 137540 }, { "epoch": 0.5317298325369949, "grad_norm": 0.1194150522351265, "learning_rate": 0.002, "loss": 2.34, "step": 137550 }, { "epoch": 0.5317684897403783, "grad_norm": 0.10883738100528717, "learning_rate": 0.002, "loss": 2.3363, "step": 137560 }, { "epoch": 0.5318071469437615, "grad_norm": 0.11823202669620514, "learning_rate": 0.002, "loss": 2.3455, "step": 137570 }, { "epoch": 0.5318458041471448, "grad_norm": 0.10311403125524521, "learning_rate": 0.002, "loss": 2.3347, "step": 137580 }, { "epoch": 0.531884461350528, "grad_norm": 0.11634775996208191, "learning_rate": 0.002, "loss": 2.3379, "step": 137590 }, { "epoch": 0.5319231185539114, "grad_norm": 0.10529926419258118, "learning_rate": 0.002, "loss": 2.3441, "step": 137600 }, { "epoch": 0.5319617757572946, "grad_norm": 0.10747803002595901, "learning_rate": 0.002, "loss": 2.346, "step": 137610 }, { "epoch": 0.5320004329606779, "grad_norm": 0.12081359326839447, "learning_rate": 0.002, "loss": 2.3401, "step": 137620 }, { "epoch": 0.5320390901640611, "grad_norm": 0.1166115254163742, "learning_rate": 0.002, "loss": 2.3437, "step": 137630 }, { "epoch": 0.5320777473674444, "grad_norm": 0.11433306336402893, "learning_rate": 0.002, "loss": 2.3377, "step": 137640 }, { "epoch": 0.5321164045708278, "grad_norm": 0.11932501196861267, "learning_rate": 0.002, "loss": 2.3262, "step": 137650 }, { "epoch": 0.532155061774211, "grad_norm": 0.12655851244926453, "learning_rate": 0.002, "loss": 2.3535, "step": 137660 }, { "epoch": 0.5321937189775943, "grad_norm": 0.09576230496168137, "learning_rate": 0.002, "loss": 2.342, "step": 137670 }, { "epoch": 0.5322323761809775, "grad_norm": 0.11238545924425125, "learning_rate": 0.002, "loss": 2.3546, "step": 137680 }, { "epoch": 0.5322710333843609, "grad_norm": 0.10102082043886185, "learning_rate": 0.002, "loss": 2.348, "step": 137690 }, { "epoch": 0.5323096905877441, "grad_norm": 0.10141270607709885, "learning_rate": 0.002, "loss": 2.3522, "step": 137700 }, { "epoch": 0.5323483477911274, "grad_norm": 0.09829019010066986, "learning_rate": 0.002, "loss": 2.3459, "step": 137710 }, { "epoch": 0.5323870049945106, "grad_norm": 0.09422247856855392, "learning_rate": 0.002, "loss": 2.3507, "step": 137720 }, { "epoch": 0.532425662197894, "grad_norm": 0.11723770946264267, "learning_rate": 0.002, "loss": 2.3524, "step": 137730 }, { "epoch": 0.5324643194012773, "grad_norm": 0.10489365458488464, "learning_rate": 0.002, "loss": 2.3443, "step": 137740 }, { "epoch": 0.5325029766046605, "grad_norm": 0.12095458060503006, "learning_rate": 0.002, "loss": 2.3452, "step": 137750 }, { "epoch": 0.5325416338080438, "grad_norm": 0.10421379655599594, "learning_rate": 0.002, "loss": 2.3405, "step": 137760 }, { "epoch": 0.5325802910114271, "grad_norm": 0.10324057936668396, "learning_rate": 0.002, "loss": 2.356, "step": 137770 }, { "epoch": 0.5326189482148104, "grad_norm": 0.1105327233672142, "learning_rate": 0.002, "loss": 2.3515, "step": 137780 }, { "epoch": 0.5326576054181936, "grad_norm": 0.11294253170490265, "learning_rate": 0.002, "loss": 2.3444, "step": 137790 }, { "epoch": 0.5326962626215769, "grad_norm": 0.10702069848775864, "learning_rate": 0.002, "loss": 2.3488, "step": 137800 }, { "epoch": 0.5327349198249601, "grad_norm": 0.1080106571316719, "learning_rate": 0.002, "loss": 2.3379, "step": 137810 }, { "epoch": 0.5327735770283435, "grad_norm": 0.0997968465089798, "learning_rate": 0.002, "loss": 2.3284, "step": 137820 }, { "epoch": 0.5328122342317267, "grad_norm": 0.09706050157546997, "learning_rate": 0.002, "loss": 2.3488, "step": 137830 }, { "epoch": 0.53285089143511, "grad_norm": 0.17344297468662262, "learning_rate": 0.002, "loss": 2.3431, "step": 137840 }, { "epoch": 0.5328895486384932, "grad_norm": 0.11641829460859299, "learning_rate": 0.002, "loss": 2.3522, "step": 137850 }, { "epoch": 0.5329282058418766, "grad_norm": 0.12219628691673279, "learning_rate": 0.002, "loss": 2.3457, "step": 137860 }, { "epoch": 0.5329668630452599, "grad_norm": 0.09959860146045685, "learning_rate": 0.002, "loss": 2.3432, "step": 137870 }, { "epoch": 0.5330055202486431, "grad_norm": 0.10274484008550644, "learning_rate": 0.002, "loss": 2.3568, "step": 137880 }, { "epoch": 0.5330441774520264, "grad_norm": 0.11681586503982544, "learning_rate": 0.002, "loss": 2.3422, "step": 137890 }, { "epoch": 0.5330828346554097, "grad_norm": 0.11741876602172852, "learning_rate": 0.002, "loss": 2.337, "step": 137900 }, { "epoch": 0.533121491858793, "grad_norm": 0.10877589136362076, "learning_rate": 0.002, "loss": 2.338, "step": 137910 }, { "epoch": 0.5331601490621762, "grad_norm": 0.12460336089134216, "learning_rate": 0.002, "loss": 2.3437, "step": 137920 }, { "epoch": 0.5331988062655595, "grad_norm": 0.11148268729448318, "learning_rate": 0.002, "loss": 2.3387, "step": 137930 }, { "epoch": 0.5332374634689429, "grad_norm": 0.10965389758348465, "learning_rate": 0.002, "loss": 2.3449, "step": 137940 }, { "epoch": 0.5332761206723261, "grad_norm": 0.10817113518714905, "learning_rate": 0.002, "loss": 2.3489, "step": 137950 }, { "epoch": 0.5333147778757094, "grad_norm": 0.10625895112752914, "learning_rate": 0.002, "loss": 2.3453, "step": 137960 }, { "epoch": 0.5333534350790926, "grad_norm": 0.09788678586483002, "learning_rate": 0.002, "loss": 2.3475, "step": 137970 }, { "epoch": 0.533392092282476, "grad_norm": 0.10641621798276901, "learning_rate": 0.002, "loss": 2.3384, "step": 137980 }, { "epoch": 0.5334307494858592, "grad_norm": 0.11436722427606583, "learning_rate": 0.002, "loss": 2.3487, "step": 137990 }, { "epoch": 0.5334694066892425, "grad_norm": 0.10436925292015076, "learning_rate": 0.002, "loss": 2.3499, "step": 138000 }, { "epoch": 0.5335080638926257, "grad_norm": 0.11858268827199936, "learning_rate": 0.002, "loss": 2.3535, "step": 138010 }, { "epoch": 0.533546721096009, "grad_norm": 0.1955452710390091, "learning_rate": 0.002, "loss": 2.3383, "step": 138020 }, { "epoch": 0.5335853782993923, "grad_norm": 0.11019251495599747, "learning_rate": 0.002, "loss": 2.3329, "step": 138030 }, { "epoch": 0.5336240355027756, "grad_norm": 0.11960510909557343, "learning_rate": 0.002, "loss": 2.3647, "step": 138040 }, { "epoch": 0.5336626927061588, "grad_norm": 0.10244852304458618, "learning_rate": 0.002, "loss": 2.3462, "step": 138050 }, { "epoch": 0.5337013499095421, "grad_norm": 0.0959695354104042, "learning_rate": 0.002, "loss": 2.3612, "step": 138060 }, { "epoch": 0.5337400071129255, "grad_norm": 0.1148761510848999, "learning_rate": 0.002, "loss": 2.3485, "step": 138070 }, { "epoch": 0.5337786643163087, "grad_norm": 0.09662537276744843, "learning_rate": 0.002, "loss": 2.3484, "step": 138080 }, { "epoch": 0.533817321519692, "grad_norm": 0.1158476397395134, "learning_rate": 0.002, "loss": 2.3504, "step": 138090 }, { "epoch": 0.5338559787230752, "grad_norm": 0.11077646166086197, "learning_rate": 0.002, "loss": 2.3554, "step": 138100 }, { "epoch": 0.5338946359264586, "grad_norm": 0.1256052851676941, "learning_rate": 0.002, "loss": 2.3494, "step": 138110 }, { "epoch": 0.5339332931298418, "grad_norm": 0.11398004740476608, "learning_rate": 0.002, "loss": 2.3408, "step": 138120 }, { "epoch": 0.5339719503332251, "grad_norm": 0.10633135586977005, "learning_rate": 0.002, "loss": 2.3523, "step": 138130 }, { "epoch": 0.5340106075366083, "grad_norm": 0.10169097781181335, "learning_rate": 0.002, "loss": 2.3442, "step": 138140 }, { "epoch": 0.5340492647399917, "grad_norm": 0.10960586369037628, "learning_rate": 0.002, "loss": 2.3328, "step": 138150 }, { "epoch": 0.534087921943375, "grad_norm": 0.10723719000816345, "learning_rate": 0.002, "loss": 2.3364, "step": 138160 }, { "epoch": 0.5341265791467582, "grad_norm": 0.10737229138612747, "learning_rate": 0.002, "loss": 2.3421, "step": 138170 }, { "epoch": 0.5341652363501415, "grad_norm": 0.10901413857936859, "learning_rate": 0.002, "loss": 2.3451, "step": 138180 }, { "epoch": 0.5342038935535247, "grad_norm": 0.11041481047868729, "learning_rate": 0.002, "loss": 2.3451, "step": 138190 }, { "epoch": 0.5342425507569081, "grad_norm": 0.10067623108625412, "learning_rate": 0.002, "loss": 2.3469, "step": 138200 }, { "epoch": 0.5342812079602913, "grad_norm": 0.10747084021568298, "learning_rate": 0.002, "loss": 2.3324, "step": 138210 }, { "epoch": 0.5343198651636746, "grad_norm": 0.12419717758893967, "learning_rate": 0.002, "loss": 2.354, "step": 138220 }, { "epoch": 0.5343585223670578, "grad_norm": 0.10230522602796555, "learning_rate": 0.002, "loss": 2.3303, "step": 138230 }, { "epoch": 0.5343971795704412, "grad_norm": 0.0917351171374321, "learning_rate": 0.002, "loss": 2.3542, "step": 138240 }, { "epoch": 0.5344358367738244, "grad_norm": 0.11229658871889114, "learning_rate": 0.002, "loss": 2.3429, "step": 138250 }, { "epoch": 0.5344744939772077, "grad_norm": 0.11508216708898544, "learning_rate": 0.002, "loss": 2.3615, "step": 138260 }, { "epoch": 0.534513151180591, "grad_norm": 0.10804786533117294, "learning_rate": 0.002, "loss": 2.3629, "step": 138270 }, { "epoch": 0.5345518083839743, "grad_norm": 0.10519649088382721, "learning_rate": 0.002, "loss": 2.3536, "step": 138280 }, { "epoch": 0.5345904655873576, "grad_norm": 0.1274058073759079, "learning_rate": 0.002, "loss": 2.3462, "step": 138290 }, { "epoch": 0.5346291227907408, "grad_norm": 0.09675882756710052, "learning_rate": 0.002, "loss": 2.3451, "step": 138300 }, { "epoch": 0.5346677799941241, "grad_norm": 0.09814517945051193, "learning_rate": 0.002, "loss": 2.3327, "step": 138310 }, { "epoch": 0.5347064371975074, "grad_norm": 0.11503762751817703, "learning_rate": 0.002, "loss": 2.3464, "step": 138320 }, { "epoch": 0.5347450944008907, "grad_norm": 0.110122449696064, "learning_rate": 0.002, "loss": 2.3651, "step": 138330 }, { "epoch": 0.5347837516042739, "grad_norm": 0.1314275562763214, "learning_rate": 0.002, "loss": 2.3598, "step": 138340 }, { "epoch": 0.5348224088076572, "grad_norm": 0.09663635492324829, "learning_rate": 0.002, "loss": 2.3289, "step": 138350 }, { "epoch": 0.5348610660110404, "grad_norm": 0.11120118945837021, "learning_rate": 0.002, "loss": 2.3387, "step": 138360 }, { "epoch": 0.5348997232144238, "grad_norm": 0.10792695730924606, "learning_rate": 0.002, "loss": 2.3393, "step": 138370 }, { "epoch": 0.5349383804178071, "grad_norm": 0.1011994257569313, "learning_rate": 0.002, "loss": 2.3422, "step": 138380 }, { "epoch": 0.5349770376211903, "grad_norm": 0.11302940547466278, "learning_rate": 0.002, "loss": 2.3322, "step": 138390 }, { "epoch": 0.5350156948245736, "grad_norm": 0.10505368560552597, "learning_rate": 0.002, "loss": 2.3529, "step": 138400 }, { "epoch": 0.5350543520279569, "grad_norm": 0.11198221147060394, "learning_rate": 0.002, "loss": 2.3378, "step": 138410 }, { "epoch": 0.5350930092313402, "grad_norm": 0.1090167760848999, "learning_rate": 0.002, "loss": 2.3527, "step": 138420 }, { "epoch": 0.5351316664347234, "grad_norm": 0.10570481419563293, "learning_rate": 0.002, "loss": 2.329, "step": 138430 }, { "epoch": 0.5351703236381067, "grad_norm": 0.10702286660671234, "learning_rate": 0.002, "loss": 2.3341, "step": 138440 }, { "epoch": 0.53520898084149, "grad_norm": 0.12096297740936279, "learning_rate": 0.002, "loss": 2.3616, "step": 138450 }, { "epoch": 0.5352476380448733, "grad_norm": 0.12605620920658112, "learning_rate": 0.002, "loss": 2.3585, "step": 138460 }, { "epoch": 0.5352862952482565, "grad_norm": 0.10755420476198196, "learning_rate": 0.002, "loss": 2.33, "step": 138470 }, { "epoch": 0.5353249524516398, "grad_norm": 0.10866609960794449, "learning_rate": 0.002, "loss": 2.3475, "step": 138480 }, { "epoch": 0.5353636096550232, "grad_norm": 0.11253587156534195, "learning_rate": 0.002, "loss": 2.3319, "step": 138490 }, { "epoch": 0.5354022668584064, "grad_norm": 0.12749694287776947, "learning_rate": 0.002, "loss": 2.3502, "step": 138500 }, { "epoch": 0.5354409240617897, "grad_norm": 0.10958480089902878, "learning_rate": 0.002, "loss": 2.3386, "step": 138510 }, { "epoch": 0.5354795812651729, "grad_norm": 0.10303416103124619, "learning_rate": 0.002, "loss": 2.3513, "step": 138520 }, { "epoch": 0.5355182384685563, "grad_norm": 0.11819668114185333, "learning_rate": 0.002, "loss": 2.355, "step": 138530 }, { "epoch": 0.5355568956719395, "grad_norm": 0.2007521092891693, "learning_rate": 0.002, "loss": 2.3351, "step": 138540 }, { "epoch": 0.5355955528753228, "grad_norm": 0.09628603607416153, "learning_rate": 0.002, "loss": 2.3472, "step": 138550 }, { "epoch": 0.535634210078706, "grad_norm": 0.1116054356098175, "learning_rate": 0.002, "loss": 2.337, "step": 138560 }, { "epoch": 0.5356728672820893, "grad_norm": 0.10477752238512039, "learning_rate": 0.002, "loss": 2.3564, "step": 138570 }, { "epoch": 0.5357115244854727, "grad_norm": 0.10349495708942413, "learning_rate": 0.002, "loss": 2.3483, "step": 138580 }, { "epoch": 0.5357501816888559, "grad_norm": 0.10677219182252884, "learning_rate": 0.002, "loss": 2.3532, "step": 138590 }, { "epoch": 0.5357888388922392, "grad_norm": 0.10819147527217865, "learning_rate": 0.002, "loss": 2.3398, "step": 138600 }, { "epoch": 0.5358274960956224, "grad_norm": 0.12032123655080795, "learning_rate": 0.002, "loss": 2.3562, "step": 138610 }, { "epoch": 0.5358661532990058, "grad_norm": 0.10413940250873566, "learning_rate": 0.002, "loss": 2.3371, "step": 138620 }, { "epoch": 0.535904810502389, "grad_norm": 0.10026529431343079, "learning_rate": 0.002, "loss": 2.3428, "step": 138630 }, { "epoch": 0.5359434677057723, "grad_norm": 0.10903637111186981, "learning_rate": 0.002, "loss": 2.3485, "step": 138640 }, { "epoch": 0.5359821249091555, "grad_norm": 0.11362525075674057, "learning_rate": 0.002, "loss": 2.3436, "step": 138650 }, { "epoch": 0.5360207821125389, "grad_norm": 0.10190536081790924, "learning_rate": 0.002, "loss": 2.3578, "step": 138660 }, { "epoch": 0.5360594393159221, "grad_norm": 0.09856315702199936, "learning_rate": 0.002, "loss": 2.3371, "step": 138670 }, { "epoch": 0.5360980965193054, "grad_norm": 0.10943195968866348, "learning_rate": 0.002, "loss": 2.3564, "step": 138680 }, { "epoch": 0.5361367537226887, "grad_norm": 0.11535698920488358, "learning_rate": 0.002, "loss": 2.3485, "step": 138690 }, { "epoch": 0.536175410926072, "grad_norm": 0.10688416659832001, "learning_rate": 0.002, "loss": 2.3621, "step": 138700 }, { "epoch": 0.5362140681294553, "grad_norm": 0.10885433107614517, "learning_rate": 0.002, "loss": 2.348, "step": 138710 }, { "epoch": 0.5362527253328385, "grad_norm": 0.09661147743463516, "learning_rate": 0.002, "loss": 2.3419, "step": 138720 }, { "epoch": 0.5362913825362218, "grad_norm": 0.11588813364505768, "learning_rate": 0.002, "loss": 2.338, "step": 138730 }, { "epoch": 0.536330039739605, "grad_norm": 0.10474442690610886, "learning_rate": 0.002, "loss": 2.3343, "step": 138740 }, { "epoch": 0.5363686969429884, "grad_norm": 0.11134396493434906, "learning_rate": 0.002, "loss": 2.343, "step": 138750 }, { "epoch": 0.5364073541463716, "grad_norm": 0.12790916860103607, "learning_rate": 0.002, "loss": 2.3419, "step": 138760 }, { "epoch": 0.5364460113497549, "grad_norm": 0.11726795136928558, "learning_rate": 0.002, "loss": 2.354, "step": 138770 }, { "epoch": 0.5364846685531381, "grad_norm": 0.11688651889562607, "learning_rate": 0.002, "loss": 2.3526, "step": 138780 }, { "epoch": 0.5365233257565215, "grad_norm": 0.09294089674949646, "learning_rate": 0.002, "loss": 2.3401, "step": 138790 }, { "epoch": 0.5365619829599048, "grad_norm": 0.13146226108074188, "learning_rate": 0.002, "loss": 2.3308, "step": 138800 }, { "epoch": 0.536600640163288, "grad_norm": 0.11804410070180893, "learning_rate": 0.002, "loss": 2.359, "step": 138810 }, { "epoch": 0.5366392973666713, "grad_norm": 0.10523460805416107, "learning_rate": 0.002, "loss": 2.3419, "step": 138820 }, { "epoch": 0.5366779545700546, "grad_norm": 0.1207517683506012, "learning_rate": 0.002, "loss": 2.3605, "step": 138830 }, { "epoch": 0.5367166117734379, "grad_norm": 0.12145643681287766, "learning_rate": 0.002, "loss": 2.3367, "step": 138840 }, { "epoch": 0.5367552689768211, "grad_norm": 0.1467278003692627, "learning_rate": 0.002, "loss": 2.3402, "step": 138850 }, { "epoch": 0.5367939261802044, "grad_norm": 0.10867208987474442, "learning_rate": 0.002, "loss": 2.3427, "step": 138860 }, { "epoch": 0.5368325833835877, "grad_norm": 0.11317738145589828, "learning_rate": 0.002, "loss": 2.342, "step": 138870 }, { "epoch": 0.536871240586971, "grad_norm": 0.10063724964857101, "learning_rate": 0.002, "loss": 2.3357, "step": 138880 }, { "epoch": 0.5369098977903543, "grad_norm": 0.12380929291248322, "learning_rate": 0.002, "loss": 2.3362, "step": 138890 }, { "epoch": 0.5369485549937375, "grad_norm": 0.11145635694265366, "learning_rate": 0.002, "loss": 2.3537, "step": 138900 }, { "epoch": 0.5369872121971209, "grad_norm": 0.1016305610537529, "learning_rate": 0.002, "loss": 2.3399, "step": 138910 }, { "epoch": 0.5370258694005041, "grad_norm": 0.11410214006900787, "learning_rate": 0.002, "loss": 2.3395, "step": 138920 }, { "epoch": 0.5370645266038874, "grad_norm": 0.10275132209062576, "learning_rate": 0.002, "loss": 2.3436, "step": 138930 }, { "epoch": 0.5371031838072706, "grad_norm": 0.11877349019050598, "learning_rate": 0.002, "loss": 2.3428, "step": 138940 }, { "epoch": 0.5371418410106539, "grad_norm": 0.11219332367181778, "learning_rate": 0.002, "loss": 2.3453, "step": 138950 }, { "epoch": 0.5371804982140372, "grad_norm": 0.10791706293821335, "learning_rate": 0.002, "loss": 2.3319, "step": 138960 }, { "epoch": 0.5372191554174205, "grad_norm": 0.11491399258375168, "learning_rate": 0.002, "loss": 2.3536, "step": 138970 }, { "epoch": 0.5372578126208037, "grad_norm": 0.11004562675952911, "learning_rate": 0.002, "loss": 2.3531, "step": 138980 }, { "epoch": 0.537296469824187, "grad_norm": 0.11763054132461548, "learning_rate": 0.002, "loss": 2.3556, "step": 138990 }, { "epoch": 0.5373351270275704, "grad_norm": 0.10310228914022446, "learning_rate": 0.002, "loss": 2.3434, "step": 139000 }, { "epoch": 0.5373737842309536, "grad_norm": 0.10830710083246231, "learning_rate": 0.002, "loss": 2.353, "step": 139010 }, { "epoch": 0.5374124414343369, "grad_norm": 0.0979403480887413, "learning_rate": 0.002, "loss": 2.3495, "step": 139020 }, { "epoch": 0.5374510986377201, "grad_norm": 0.1297398954629898, "learning_rate": 0.002, "loss": 2.3518, "step": 139030 }, { "epoch": 0.5374897558411035, "grad_norm": 0.14436522126197815, "learning_rate": 0.002, "loss": 2.3541, "step": 139040 }, { "epoch": 0.5375284130444867, "grad_norm": 0.09765699505805969, "learning_rate": 0.002, "loss": 2.3468, "step": 139050 }, { "epoch": 0.53756707024787, "grad_norm": 0.10815630108118057, "learning_rate": 0.002, "loss": 2.3464, "step": 139060 }, { "epoch": 0.5376057274512532, "grad_norm": 0.09410147368907928, "learning_rate": 0.002, "loss": 2.3467, "step": 139070 }, { "epoch": 0.5376443846546366, "grad_norm": 0.11704320460557938, "learning_rate": 0.002, "loss": 2.3312, "step": 139080 }, { "epoch": 0.5376830418580199, "grad_norm": 0.10289481282234192, "learning_rate": 0.002, "loss": 2.3419, "step": 139090 }, { "epoch": 0.5377216990614031, "grad_norm": 0.13339246809482574, "learning_rate": 0.002, "loss": 2.3391, "step": 139100 }, { "epoch": 0.5377603562647864, "grad_norm": 0.10395749658346176, "learning_rate": 0.002, "loss": 2.3427, "step": 139110 }, { "epoch": 0.5377990134681696, "grad_norm": 0.10145257413387299, "learning_rate": 0.002, "loss": 2.3543, "step": 139120 }, { "epoch": 0.537837670671553, "grad_norm": 0.1058330237865448, "learning_rate": 0.002, "loss": 2.3447, "step": 139130 }, { "epoch": 0.5378763278749362, "grad_norm": 0.09136255830526352, "learning_rate": 0.002, "loss": 2.3262, "step": 139140 }, { "epoch": 0.5379149850783195, "grad_norm": 0.09894274920225143, "learning_rate": 0.002, "loss": 2.3386, "step": 139150 }, { "epoch": 0.5379536422817027, "grad_norm": 0.11287065595388412, "learning_rate": 0.002, "loss": 2.3554, "step": 139160 }, { "epoch": 0.5379922994850861, "grad_norm": 0.11594919115304947, "learning_rate": 0.002, "loss": 2.3523, "step": 139170 }, { "epoch": 0.5380309566884693, "grad_norm": 0.13460049033164978, "learning_rate": 0.002, "loss": 2.352, "step": 139180 }, { "epoch": 0.5380696138918526, "grad_norm": 0.11856977641582489, "learning_rate": 0.002, "loss": 2.3595, "step": 139190 }, { "epoch": 0.5381082710952358, "grad_norm": 0.10561086982488632, "learning_rate": 0.002, "loss": 2.345, "step": 139200 }, { "epoch": 0.5381469282986192, "grad_norm": 0.1271483302116394, "learning_rate": 0.002, "loss": 2.3507, "step": 139210 }, { "epoch": 0.5381855855020025, "grad_norm": 0.0922880545258522, "learning_rate": 0.002, "loss": 2.3581, "step": 139220 }, { "epoch": 0.5382242427053857, "grad_norm": 0.09948939085006714, "learning_rate": 0.002, "loss": 2.3467, "step": 139230 }, { "epoch": 0.538262899908769, "grad_norm": 0.10929225385189056, "learning_rate": 0.002, "loss": 2.3391, "step": 139240 }, { "epoch": 0.5383015571121523, "grad_norm": 0.11775950342416763, "learning_rate": 0.002, "loss": 2.349, "step": 139250 }, { "epoch": 0.5383402143155356, "grad_norm": 0.1184498593211174, "learning_rate": 0.002, "loss": 2.3444, "step": 139260 }, { "epoch": 0.5383788715189188, "grad_norm": 0.08900095522403717, "learning_rate": 0.002, "loss": 2.3587, "step": 139270 }, { "epoch": 0.5384175287223021, "grad_norm": 0.09807005524635315, "learning_rate": 0.002, "loss": 2.3459, "step": 139280 }, { "epoch": 0.5384561859256853, "grad_norm": 0.11427653580904007, "learning_rate": 0.002, "loss": 2.3526, "step": 139290 }, { "epoch": 0.5384948431290687, "grad_norm": 0.10617062449455261, "learning_rate": 0.002, "loss": 2.3558, "step": 139300 }, { "epoch": 0.538533500332452, "grad_norm": 0.11520641297101974, "learning_rate": 0.002, "loss": 2.3504, "step": 139310 }, { "epoch": 0.5385721575358352, "grad_norm": 0.09916675835847855, "learning_rate": 0.002, "loss": 2.3492, "step": 139320 }, { "epoch": 0.5386108147392185, "grad_norm": 0.09200314432382584, "learning_rate": 0.002, "loss": 2.3446, "step": 139330 }, { "epoch": 0.5386494719426018, "grad_norm": 0.12772326171398163, "learning_rate": 0.002, "loss": 2.342, "step": 139340 }, { "epoch": 0.5386881291459851, "grad_norm": 0.09921281039714813, "learning_rate": 0.002, "loss": 2.3376, "step": 139350 }, { "epoch": 0.5387267863493683, "grad_norm": 0.11318966746330261, "learning_rate": 0.002, "loss": 2.3412, "step": 139360 }, { "epoch": 0.5387654435527516, "grad_norm": 0.10829410701990128, "learning_rate": 0.002, "loss": 2.3288, "step": 139370 }, { "epoch": 0.5388041007561349, "grad_norm": 0.10719124227762222, "learning_rate": 0.002, "loss": 2.3536, "step": 139380 }, { "epoch": 0.5388427579595182, "grad_norm": 0.11400733888149261, "learning_rate": 0.002, "loss": 2.3445, "step": 139390 }, { "epoch": 0.5388814151629014, "grad_norm": 0.10717476904392242, "learning_rate": 0.002, "loss": 2.34, "step": 139400 }, { "epoch": 0.5389200723662847, "grad_norm": 0.11081114411354065, "learning_rate": 0.002, "loss": 2.3396, "step": 139410 }, { "epoch": 0.5389587295696681, "grad_norm": 0.10991322249174118, "learning_rate": 0.002, "loss": 2.3325, "step": 139420 }, { "epoch": 0.5389973867730513, "grad_norm": 0.09669278562068939, "learning_rate": 0.002, "loss": 2.3431, "step": 139430 }, { "epoch": 0.5390360439764346, "grad_norm": 0.11478671431541443, "learning_rate": 0.002, "loss": 2.3377, "step": 139440 }, { "epoch": 0.5390747011798178, "grad_norm": 0.1203511580824852, "learning_rate": 0.002, "loss": 2.3396, "step": 139450 }, { "epoch": 0.5391133583832012, "grad_norm": 0.10512226819992065, "learning_rate": 0.002, "loss": 2.335, "step": 139460 }, { "epoch": 0.5391520155865844, "grad_norm": 0.09710384905338287, "learning_rate": 0.002, "loss": 2.3443, "step": 139470 }, { "epoch": 0.5391906727899677, "grad_norm": 0.29483917355537415, "learning_rate": 0.002, "loss": 2.3516, "step": 139480 }, { "epoch": 0.5392293299933509, "grad_norm": 0.12612727284431458, "learning_rate": 0.002, "loss": 2.352, "step": 139490 }, { "epoch": 0.5392679871967342, "grad_norm": 0.10861579328775406, "learning_rate": 0.002, "loss": 2.3362, "step": 139500 }, { "epoch": 0.5393066444001176, "grad_norm": 0.10819701850414276, "learning_rate": 0.002, "loss": 2.3316, "step": 139510 }, { "epoch": 0.5393453016035008, "grad_norm": 0.10991454869508743, "learning_rate": 0.002, "loss": 2.3422, "step": 139520 }, { "epoch": 0.5393839588068841, "grad_norm": 0.09839512407779694, "learning_rate": 0.002, "loss": 2.3476, "step": 139530 }, { "epoch": 0.5394226160102673, "grad_norm": 0.11368145793676376, "learning_rate": 0.002, "loss": 2.3344, "step": 139540 }, { "epoch": 0.5394612732136507, "grad_norm": 0.10433456301689148, "learning_rate": 0.002, "loss": 2.3393, "step": 139550 }, { "epoch": 0.5394999304170339, "grad_norm": 0.1109948456287384, "learning_rate": 0.002, "loss": 2.3329, "step": 139560 }, { "epoch": 0.5395385876204172, "grad_norm": 0.1037093847990036, "learning_rate": 0.002, "loss": 2.3411, "step": 139570 }, { "epoch": 0.5395772448238004, "grad_norm": 0.10175781697034836, "learning_rate": 0.002, "loss": 2.3379, "step": 139580 }, { "epoch": 0.5396159020271838, "grad_norm": 0.09333990514278412, "learning_rate": 0.002, "loss": 2.3583, "step": 139590 }, { "epoch": 0.539654559230567, "grad_norm": 0.11181159317493439, "learning_rate": 0.002, "loss": 2.3431, "step": 139600 }, { "epoch": 0.5396932164339503, "grad_norm": 0.11220884323120117, "learning_rate": 0.002, "loss": 2.3304, "step": 139610 }, { "epoch": 0.5397318736373335, "grad_norm": 0.09398156404495239, "learning_rate": 0.002, "loss": 2.3492, "step": 139620 }, { "epoch": 0.5397705308407169, "grad_norm": 0.11417990177869797, "learning_rate": 0.002, "loss": 2.3477, "step": 139630 }, { "epoch": 0.5398091880441002, "grad_norm": 0.14919176697731018, "learning_rate": 0.002, "loss": 2.3465, "step": 139640 }, { "epoch": 0.5398478452474834, "grad_norm": 0.11777222901582718, "learning_rate": 0.002, "loss": 2.3387, "step": 139650 }, { "epoch": 0.5398865024508667, "grad_norm": 0.10088865458965302, "learning_rate": 0.002, "loss": 2.3474, "step": 139660 }, { "epoch": 0.5399251596542499, "grad_norm": 0.09098080545663834, "learning_rate": 0.002, "loss": 2.3557, "step": 139670 }, { "epoch": 0.5399638168576333, "grad_norm": 0.11097294092178345, "learning_rate": 0.002, "loss": 2.3439, "step": 139680 }, { "epoch": 0.5400024740610165, "grad_norm": 0.11297319829463959, "learning_rate": 0.002, "loss": 2.3619, "step": 139690 }, { "epoch": 0.5400411312643998, "grad_norm": 0.0978778824210167, "learning_rate": 0.002, "loss": 2.3495, "step": 139700 }, { "epoch": 0.540079788467783, "grad_norm": 0.12271102517843246, "learning_rate": 0.002, "loss": 2.3432, "step": 139710 }, { "epoch": 0.5401184456711664, "grad_norm": 0.10544592142105103, "learning_rate": 0.002, "loss": 2.3373, "step": 139720 }, { "epoch": 0.5401571028745497, "grad_norm": 0.10868457704782486, "learning_rate": 0.002, "loss": 2.3345, "step": 139730 }, { "epoch": 0.5401957600779329, "grad_norm": 0.13596363365650177, "learning_rate": 0.002, "loss": 2.3379, "step": 139740 }, { "epoch": 0.5402344172813162, "grad_norm": 0.10462050884962082, "learning_rate": 0.002, "loss": 2.3467, "step": 139750 }, { "epoch": 0.5402730744846995, "grad_norm": 0.12288188934326172, "learning_rate": 0.002, "loss": 2.3472, "step": 139760 }, { "epoch": 0.5403117316880828, "grad_norm": 0.11072691529989243, "learning_rate": 0.002, "loss": 2.3406, "step": 139770 }, { "epoch": 0.540350388891466, "grad_norm": 0.1335332691669464, "learning_rate": 0.002, "loss": 2.3394, "step": 139780 }, { "epoch": 0.5403890460948493, "grad_norm": 0.10119934380054474, "learning_rate": 0.002, "loss": 2.3524, "step": 139790 }, { "epoch": 0.5404277032982326, "grad_norm": 0.09935948997735977, "learning_rate": 0.002, "loss": 2.3419, "step": 139800 }, { "epoch": 0.5404663605016159, "grad_norm": 0.11677015572786331, "learning_rate": 0.002, "loss": 2.3375, "step": 139810 }, { "epoch": 0.5405050177049991, "grad_norm": 0.10641399770975113, "learning_rate": 0.002, "loss": 2.3386, "step": 139820 }, { "epoch": 0.5405436749083824, "grad_norm": 0.10183258354663849, "learning_rate": 0.002, "loss": 2.3289, "step": 139830 }, { "epoch": 0.5405823321117657, "grad_norm": 0.09802069514989853, "learning_rate": 0.002, "loss": 2.3327, "step": 139840 }, { "epoch": 0.540620989315149, "grad_norm": 0.11229890584945679, "learning_rate": 0.002, "loss": 2.356, "step": 139850 }, { "epoch": 0.5406596465185323, "grad_norm": 0.10374360531568527, "learning_rate": 0.002, "loss": 2.3545, "step": 139860 }, { "epoch": 0.5406983037219155, "grad_norm": 0.10174267739057541, "learning_rate": 0.002, "loss": 2.3438, "step": 139870 }, { "epoch": 0.5407369609252988, "grad_norm": 0.11391658335924149, "learning_rate": 0.002, "loss": 2.3332, "step": 139880 }, { "epoch": 0.5407756181286821, "grad_norm": 0.11875861883163452, "learning_rate": 0.002, "loss": 2.3309, "step": 139890 }, { "epoch": 0.5408142753320654, "grad_norm": 0.10455701500177383, "learning_rate": 0.002, "loss": 2.3351, "step": 139900 }, { "epoch": 0.5408529325354486, "grad_norm": 0.11358092725276947, "learning_rate": 0.002, "loss": 2.3503, "step": 139910 }, { "epoch": 0.5408915897388319, "grad_norm": 0.11409742385149002, "learning_rate": 0.002, "loss": 2.3354, "step": 139920 }, { "epoch": 0.5409302469422153, "grad_norm": 0.14725357294082642, "learning_rate": 0.002, "loss": 2.3486, "step": 139930 }, { "epoch": 0.5409689041455985, "grad_norm": 0.10605788230895996, "learning_rate": 0.002, "loss": 2.3416, "step": 139940 }, { "epoch": 0.5410075613489818, "grad_norm": 0.107200987637043, "learning_rate": 0.002, "loss": 2.3313, "step": 139950 }, { "epoch": 0.541046218552365, "grad_norm": 0.09364209324121475, "learning_rate": 0.002, "loss": 2.3489, "step": 139960 }, { "epoch": 0.5410848757557484, "grad_norm": 0.1474541425704956, "learning_rate": 0.002, "loss": 2.3574, "step": 139970 }, { "epoch": 0.5411235329591316, "grad_norm": 0.10103508085012436, "learning_rate": 0.002, "loss": 2.3651, "step": 139980 }, { "epoch": 0.5411621901625149, "grad_norm": 0.10418781638145447, "learning_rate": 0.002, "loss": 2.3568, "step": 139990 }, { "epoch": 0.5412008473658981, "grad_norm": 0.1143510490655899, "learning_rate": 0.002, "loss": 2.3332, "step": 140000 }, { "epoch": 0.5412395045692815, "grad_norm": 0.10674919933080673, "learning_rate": 0.002, "loss": 2.3424, "step": 140010 }, { "epoch": 0.5412781617726647, "grad_norm": 0.09429977834224701, "learning_rate": 0.002, "loss": 2.3451, "step": 140020 }, { "epoch": 0.541316818976048, "grad_norm": 0.11200029402971268, "learning_rate": 0.002, "loss": 2.3418, "step": 140030 }, { "epoch": 0.5413554761794312, "grad_norm": 0.10863328725099564, "learning_rate": 0.002, "loss": 2.3451, "step": 140040 }, { "epoch": 0.5413941333828145, "grad_norm": 0.09245651960372925, "learning_rate": 0.002, "loss": 2.3211, "step": 140050 }, { "epoch": 0.5414327905861979, "grad_norm": 0.10689567774534225, "learning_rate": 0.002, "loss": 2.3353, "step": 140060 }, { "epoch": 0.5414714477895811, "grad_norm": 0.10819069296121597, "learning_rate": 0.002, "loss": 2.3503, "step": 140070 }, { "epoch": 0.5415101049929644, "grad_norm": 0.10671747475862503, "learning_rate": 0.002, "loss": 2.3397, "step": 140080 }, { "epoch": 0.5415487621963476, "grad_norm": 0.10832956433296204, "learning_rate": 0.002, "loss": 2.3341, "step": 140090 }, { "epoch": 0.541587419399731, "grad_norm": 0.11262453347444534, "learning_rate": 0.002, "loss": 2.3284, "step": 140100 }, { "epoch": 0.5416260766031142, "grad_norm": 0.10590975731611252, "learning_rate": 0.002, "loss": 2.342, "step": 140110 }, { "epoch": 0.5416647338064975, "grad_norm": 0.11854864656925201, "learning_rate": 0.002, "loss": 2.3511, "step": 140120 }, { "epoch": 0.5417033910098807, "grad_norm": 0.10754364728927612, "learning_rate": 0.002, "loss": 2.34, "step": 140130 }, { "epoch": 0.5417420482132641, "grad_norm": 0.10380040109157562, "learning_rate": 0.002, "loss": 2.36, "step": 140140 }, { "epoch": 0.5417807054166474, "grad_norm": 0.11440842598676682, "learning_rate": 0.002, "loss": 2.3328, "step": 140150 }, { "epoch": 0.5418193626200306, "grad_norm": 0.11621855944395065, "learning_rate": 0.002, "loss": 2.3476, "step": 140160 }, { "epoch": 0.5418580198234139, "grad_norm": 0.10840783268213272, "learning_rate": 0.002, "loss": 2.3327, "step": 140170 }, { "epoch": 0.5418966770267972, "grad_norm": 0.11345444619655609, "learning_rate": 0.002, "loss": 2.3567, "step": 140180 }, { "epoch": 0.5419353342301805, "grad_norm": 0.10408294945955276, "learning_rate": 0.002, "loss": 2.3441, "step": 140190 }, { "epoch": 0.5419739914335637, "grad_norm": 0.10988804697990417, "learning_rate": 0.002, "loss": 2.347, "step": 140200 }, { "epoch": 0.542012648636947, "grad_norm": 0.08823379874229431, "learning_rate": 0.002, "loss": 2.3444, "step": 140210 }, { "epoch": 0.5420513058403302, "grad_norm": 0.10055267810821533, "learning_rate": 0.002, "loss": 2.3235, "step": 140220 }, { "epoch": 0.5420899630437136, "grad_norm": 0.10901609808206558, "learning_rate": 0.002, "loss": 2.3364, "step": 140230 }, { "epoch": 0.5421286202470968, "grad_norm": 0.09420198202133179, "learning_rate": 0.002, "loss": 2.343, "step": 140240 }, { "epoch": 0.5421672774504801, "grad_norm": 0.17304375767707825, "learning_rate": 0.002, "loss": 2.3473, "step": 140250 }, { "epoch": 0.5422059346538634, "grad_norm": 0.11952243000268936, "learning_rate": 0.002, "loss": 2.3306, "step": 140260 }, { "epoch": 0.5422445918572467, "grad_norm": 0.09226176142692566, "learning_rate": 0.002, "loss": 2.3485, "step": 140270 }, { "epoch": 0.54228324906063, "grad_norm": 0.10702744871377945, "learning_rate": 0.002, "loss": 2.3407, "step": 140280 }, { "epoch": 0.5423219062640132, "grad_norm": 0.10657176375389099, "learning_rate": 0.002, "loss": 2.3469, "step": 140290 }, { "epoch": 0.5423605634673965, "grad_norm": 0.10938893258571625, "learning_rate": 0.002, "loss": 2.3491, "step": 140300 }, { "epoch": 0.5423992206707798, "grad_norm": 0.10880811512470245, "learning_rate": 0.002, "loss": 2.3332, "step": 140310 }, { "epoch": 0.5424378778741631, "grad_norm": 0.11123374104499817, "learning_rate": 0.002, "loss": 2.3555, "step": 140320 }, { "epoch": 0.5424765350775463, "grad_norm": 0.09306691586971283, "learning_rate": 0.002, "loss": 2.3679, "step": 140330 }, { "epoch": 0.5425151922809296, "grad_norm": 0.11641582101583481, "learning_rate": 0.002, "loss": 2.3255, "step": 140340 }, { "epoch": 0.542553849484313, "grad_norm": 0.11220816522836685, "learning_rate": 0.002, "loss": 2.3498, "step": 140350 }, { "epoch": 0.5425925066876962, "grad_norm": 0.0909360870718956, "learning_rate": 0.002, "loss": 2.3438, "step": 140360 }, { "epoch": 0.5426311638910795, "grad_norm": 0.12731623649597168, "learning_rate": 0.002, "loss": 2.3423, "step": 140370 }, { "epoch": 0.5426698210944627, "grad_norm": 0.12812413275241852, "learning_rate": 0.002, "loss": 2.3338, "step": 140380 }, { "epoch": 0.5427084782978461, "grad_norm": 0.13419029116630554, "learning_rate": 0.002, "loss": 2.3381, "step": 140390 }, { "epoch": 0.5427471355012293, "grad_norm": 0.09206200391054153, "learning_rate": 0.002, "loss": 2.3336, "step": 140400 }, { "epoch": 0.5427857927046126, "grad_norm": 0.09752889722585678, "learning_rate": 0.002, "loss": 2.3382, "step": 140410 }, { "epoch": 0.5428244499079958, "grad_norm": 0.1198781207203865, "learning_rate": 0.002, "loss": 2.349, "step": 140420 }, { "epoch": 0.5428631071113791, "grad_norm": 0.13163580000400543, "learning_rate": 0.002, "loss": 2.3376, "step": 140430 }, { "epoch": 0.5429017643147624, "grad_norm": 0.10607501119375229, "learning_rate": 0.002, "loss": 2.3403, "step": 140440 }, { "epoch": 0.5429404215181457, "grad_norm": 0.10543625801801682, "learning_rate": 0.002, "loss": 2.332, "step": 140450 }, { "epoch": 0.542979078721529, "grad_norm": 0.11482521891593933, "learning_rate": 0.002, "loss": 2.3594, "step": 140460 }, { "epoch": 0.5430177359249122, "grad_norm": 0.09837967902421951, "learning_rate": 0.002, "loss": 2.3502, "step": 140470 }, { "epoch": 0.5430563931282956, "grad_norm": 0.13049250841140747, "learning_rate": 0.002, "loss": 2.3625, "step": 140480 }, { "epoch": 0.5430950503316788, "grad_norm": 0.12102434039115906, "learning_rate": 0.002, "loss": 2.3522, "step": 140490 }, { "epoch": 0.5431337075350621, "grad_norm": 0.10373283177614212, "learning_rate": 0.002, "loss": 2.3371, "step": 140500 }, { "epoch": 0.5431723647384453, "grad_norm": 0.1218980923295021, "learning_rate": 0.002, "loss": 2.3528, "step": 140510 }, { "epoch": 0.5432110219418287, "grad_norm": 0.11484548449516296, "learning_rate": 0.002, "loss": 2.3371, "step": 140520 }, { "epoch": 0.5432496791452119, "grad_norm": 0.09388495236635208, "learning_rate": 0.002, "loss": 2.3544, "step": 140530 }, { "epoch": 0.5432883363485952, "grad_norm": 0.10815958678722382, "learning_rate": 0.002, "loss": 2.3498, "step": 140540 }, { "epoch": 0.5433269935519784, "grad_norm": 0.10376013070344925, "learning_rate": 0.002, "loss": 2.3573, "step": 140550 }, { "epoch": 0.5433656507553618, "grad_norm": 0.1519942432641983, "learning_rate": 0.002, "loss": 2.353, "step": 140560 }, { "epoch": 0.5434043079587451, "grad_norm": 0.09740184247493744, "learning_rate": 0.002, "loss": 2.3444, "step": 140570 }, { "epoch": 0.5434429651621283, "grad_norm": 0.11878722161054611, "learning_rate": 0.002, "loss": 2.337, "step": 140580 }, { "epoch": 0.5434816223655116, "grad_norm": 0.11175355315208435, "learning_rate": 0.002, "loss": 2.3488, "step": 140590 }, { "epoch": 0.5435202795688948, "grad_norm": 0.11709204316139221, "learning_rate": 0.002, "loss": 2.3379, "step": 140600 }, { "epoch": 0.5435589367722782, "grad_norm": 0.10323283821344376, "learning_rate": 0.002, "loss": 2.3494, "step": 140610 }, { "epoch": 0.5435975939756614, "grad_norm": 0.1406097263097763, "learning_rate": 0.002, "loss": 2.3291, "step": 140620 }, { "epoch": 0.5436362511790447, "grad_norm": 0.10124669224023819, "learning_rate": 0.002, "loss": 2.3478, "step": 140630 }, { "epoch": 0.5436749083824279, "grad_norm": 0.10182762145996094, "learning_rate": 0.002, "loss": 2.3523, "step": 140640 }, { "epoch": 0.5437135655858113, "grad_norm": 0.11305060237646103, "learning_rate": 0.002, "loss": 2.3381, "step": 140650 }, { "epoch": 0.5437522227891946, "grad_norm": 0.10381458699703217, "learning_rate": 0.002, "loss": 2.3424, "step": 140660 }, { "epoch": 0.5437908799925778, "grad_norm": 0.10050679743289948, "learning_rate": 0.002, "loss": 2.341, "step": 140670 }, { "epoch": 0.543829537195961, "grad_norm": 0.11455484479665756, "learning_rate": 0.002, "loss": 2.3498, "step": 140680 }, { "epoch": 0.5438681943993444, "grad_norm": 0.12211675196886063, "learning_rate": 0.002, "loss": 2.3544, "step": 140690 }, { "epoch": 0.5439068516027277, "grad_norm": 0.14766447246074677, "learning_rate": 0.002, "loss": 2.34, "step": 140700 }, { "epoch": 0.5439455088061109, "grad_norm": 0.09843506664037704, "learning_rate": 0.002, "loss": 2.3575, "step": 140710 }, { "epoch": 0.5439841660094942, "grad_norm": 0.10439879447221756, "learning_rate": 0.002, "loss": 2.3342, "step": 140720 }, { "epoch": 0.5440228232128775, "grad_norm": 0.09761619567871094, "learning_rate": 0.002, "loss": 2.3471, "step": 140730 }, { "epoch": 0.5440614804162608, "grad_norm": 0.110262431204319, "learning_rate": 0.002, "loss": 2.3337, "step": 140740 }, { "epoch": 0.544100137619644, "grad_norm": 0.11131390184164047, "learning_rate": 0.002, "loss": 2.3427, "step": 140750 }, { "epoch": 0.5441387948230273, "grad_norm": 0.12816110253334045, "learning_rate": 0.002, "loss": 2.3648, "step": 140760 }, { "epoch": 0.5441774520264105, "grad_norm": 0.10264119505882263, "learning_rate": 0.002, "loss": 2.3481, "step": 140770 }, { "epoch": 0.5442161092297939, "grad_norm": 0.12289933860301971, "learning_rate": 0.002, "loss": 2.3388, "step": 140780 }, { "epoch": 0.5442547664331772, "grad_norm": 0.10959749668836594, "learning_rate": 0.002, "loss": 2.3541, "step": 140790 }, { "epoch": 0.5442934236365604, "grad_norm": 0.1002492755651474, "learning_rate": 0.002, "loss": 2.3474, "step": 140800 }, { "epoch": 0.5443320808399437, "grad_norm": 0.11740302294492722, "learning_rate": 0.002, "loss": 2.341, "step": 140810 }, { "epoch": 0.544370738043327, "grad_norm": 0.09524788707494736, "learning_rate": 0.002, "loss": 2.3384, "step": 140820 }, { "epoch": 0.5444093952467103, "grad_norm": 0.10740210115909576, "learning_rate": 0.002, "loss": 2.3444, "step": 140830 }, { "epoch": 0.5444480524500935, "grad_norm": 0.10724620521068573, "learning_rate": 0.002, "loss": 2.3478, "step": 140840 }, { "epoch": 0.5444867096534768, "grad_norm": 0.09376657009124756, "learning_rate": 0.002, "loss": 2.3326, "step": 140850 }, { "epoch": 0.5445253668568601, "grad_norm": 0.12937888503074646, "learning_rate": 0.002, "loss": 2.3559, "step": 140860 }, { "epoch": 0.5445640240602434, "grad_norm": 0.11522895097732544, "learning_rate": 0.002, "loss": 2.3306, "step": 140870 }, { "epoch": 0.5446026812636267, "grad_norm": 0.10400960594415665, "learning_rate": 0.002, "loss": 2.3545, "step": 140880 }, { "epoch": 0.5446413384670099, "grad_norm": 0.126004159450531, "learning_rate": 0.002, "loss": 2.3438, "step": 140890 }, { "epoch": 0.5446799956703933, "grad_norm": 0.11208324134349823, "learning_rate": 0.002, "loss": 2.3387, "step": 140900 }, { "epoch": 0.5447186528737765, "grad_norm": 0.10716670751571655, "learning_rate": 0.002, "loss": 2.362, "step": 140910 }, { "epoch": 0.5447573100771598, "grad_norm": 0.11034978926181793, "learning_rate": 0.002, "loss": 2.3454, "step": 140920 }, { "epoch": 0.544795967280543, "grad_norm": 0.11277556419372559, "learning_rate": 0.002, "loss": 2.3477, "step": 140930 }, { "epoch": 0.5448346244839264, "grad_norm": 0.1075998991727829, "learning_rate": 0.002, "loss": 2.3407, "step": 140940 }, { "epoch": 0.5448732816873096, "grad_norm": 0.12325393408536911, "learning_rate": 0.002, "loss": 2.3281, "step": 140950 }, { "epoch": 0.5449119388906929, "grad_norm": 0.11496102809906006, "learning_rate": 0.002, "loss": 2.3536, "step": 140960 }, { "epoch": 0.5449505960940761, "grad_norm": 0.09571215510368347, "learning_rate": 0.002, "loss": 2.3442, "step": 140970 }, { "epoch": 0.5449892532974594, "grad_norm": 0.10081490874290466, "learning_rate": 0.002, "loss": 2.3392, "step": 140980 }, { "epoch": 0.5450279105008428, "grad_norm": 0.11344999819993973, "learning_rate": 0.002, "loss": 2.3429, "step": 140990 }, { "epoch": 0.545066567704226, "grad_norm": 0.10012887418270111, "learning_rate": 0.002, "loss": 2.3629, "step": 141000 }, { "epoch": 0.5451052249076093, "grad_norm": 0.1071837916970253, "learning_rate": 0.002, "loss": 2.3436, "step": 141010 }, { "epoch": 0.5451438821109925, "grad_norm": 0.11736311763525009, "learning_rate": 0.002, "loss": 2.3266, "step": 141020 }, { "epoch": 0.5451825393143759, "grad_norm": 0.12453530728816986, "learning_rate": 0.002, "loss": 2.347, "step": 141030 }, { "epoch": 0.5452211965177591, "grad_norm": 0.10917795449495316, "learning_rate": 0.002, "loss": 2.3487, "step": 141040 }, { "epoch": 0.5452598537211424, "grad_norm": 0.11465924978256226, "learning_rate": 0.002, "loss": 2.3278, "step": 141050 }, { "epoch": 0.5452985109245256, "grad_norm": 0.11755634099245071, "learning_rate": 0.002, "loss": 2.3352, "step": 141060 }, { "epoch": 0.545337168127909, "grad_norm": 0.13867446780204773, "learning_rate": 0.002, "loss": 2.3271, "step": 141070 }, { "epoch": 0.5453758253312923, "grad_norm": 0.10823734104633331, "learning_rate": 0.002, "loss": 2.3585, "step": 141080 }, { "epoch": 0.5454144825346755, "grad_norm": 0.1211395263671875, "learning_rate": 0.002, "loss": 2.3349, "step": 141090 }, { "epoch": 0.5454531397380588, "grad_norm": 0.10349445044994354, "learning_rate": 0.002, "loss": 2.3473, "step": 141100 }, { "epoch": 0.5454917969414421, "grad_norm": 0.11433000862598419, "learning_rate": 0.002, "loss": 2.3436, "step": 141110 }, { "epoch": 0.5455304541448254, "grad_norm": 0.12612050771713257, "learning_rate": 0.002, "loss": 2.3467, "step": 141120 }, { "epoch": 0.5455691113482086, "grad_norm": 0.09824709594249725, "learning_rate": 0.002, "loss": 2.3549, "step": 141130 }, { "epoch": 0.5456077685515919, "grad_norm": 0.1088489443063736, "learning_rate": 0.002, "loss": 2.3423, "step": 141140 }, { "epoch": 0.5456464257549751, "grad_norm": 0.1025751382112503, "learning_rate": 0.002, "loss": 2.3507, "step": 141150 }, { "epoch": 0.5456850829583585, "grad_norm": 0.10643947124481201, "learning_rate": 0.002, "loss": 2.3407, "step": 141160 }, { "epoch": 0.5457237401617417, "grad_norm": 0.13787996768951416, "learning_rate": 0.002, "loss": 2.3533, "step": 141170 }, { "epoch": 0.545762397365125, "grad_norm": 0.18211092054843903, "learning_rate": 0.002, "loss": 2.3461, "step": 141180 }, { "epoch": 0.5458010545685082, "grad_norm": 0.10266145318746567, "learning_rate": 0.002, "loss": 2.3492, "step": 141190 }, { "epoch": 0.5458397117718916, "grad_norm": 0.1027229055762291, "learning_rate": 0.002, "loss": 2.3451, "step": 141200 }, { "epoch": 0.5458783689752749, "grad_norm": 0.11025464534759521, "learning_rate": 0.002, "loss": 2.341, "step": 141210 }, { "epoch": 0.5459170261786581, "grad_norm": 0.10315735638141632, "learning_rate": 0.002, "loss": 2.3428, "step": 141220 }, { "epoch": 0.5459556833820414, "grad_norm": 0.11743441969156265, "learning_rate": 0.002, "loss": 2.346, "step": 141230 }, { "epoch": 0.5459943405854247, "grad_norm": 0.09454575181007385, "learning_rate": 0.002, "loss": 2.334, "step": 141240 }, { "epoch": 0.546032997788808, "grad_norm": 0.10413283854722977, "learning_rate": 0.002, "loss": 2.3381, "step": 141250 }, { "epoch": 0.5460716549921912, "grad_norm": 0.10902142524719238, "learning_rate": 0.002, "loss": 2.3424, "step": 141260 }, { "epoch": 0.5461103121955745, "grad_norm": 0.10488925874233246, "learning_rate": 0.002, "loss": 2.3499, "step": 141270 }, { "epoch": 0.5461489693989579, "grad_norm": 0.12448426336050034, "learning_rate": 0.002, "loss": 2.3443, "step": 141280 }, { "epoch": 0.5461876266023411, "grad_norm": 0.14548496901988983, "learning_rate": 0.002, "loss": 2.3399, "step": 141290 }, { "epoch": 0.5462262838057244, "grad_norm": 0.09693209081888199, "learning_rate": 0.002, "loss": 2.3433, "step": 141300 }, { "epoch": 0.5462649410091076, "grad_norm": 0.09567166119813919, "learning_rate": 0.002, "loss": 2.3555, "step": 141310 }, { "epoch": 0.546303598212491, "grad_norm": 0.11992914974689484, "learning_rate": 0.002, "loss": 2.3415, "step": 141320 }, { "epoch": 0.5463422554158742, "grad_norm": 0.11967799067497253, "learning_rate": 0.002, "loss": 2.3258, "step": 141330 }, { "epoch": 0.5463809126192575, "grad_norm": 0.11840023845434189, "learning_rate": 0.002, "loss": 2.3435, "step": 141340 }, { "epoch": 0.5464195698226407, "grad_norm": 0.1122565045952797, "learning_rate": 0.002, "loss": 2.3576, "step": 141350 }, { "epoch": 0.546458227026024, "grad_norm": 0.10423989593982697, "learning_rate": 0.002, "loss": 2.3511, "step": 141360 }, { "epoch": 0.5464968842294073, "grad_norm": 0.11209941655397415, "learning_rate": 0.002, "loss": 2.3458, "step": 141370 }, { "epoch": 0.5465355414327906, "grad_norm": 0.1401730328798294, "learning_rate": 0.002, "loss": 2.35, "step": 141380 }, { "epoch": 0.5465741986361738, "grad_norm": 0.10664816200733185, "learning_rate": 0.002, "loss": 2.3541, "step": 141390 }, { "epoch": 0.5466128558395571, "grad_norm": 0.09247737377882004, "learning_rate": 0.002, "loss": 2.3387, "step": 141400 }, { "epoch": 0.5466515130429405, "grad_norm": 0.10387951880693436, "learning_rate": 0.002, "loss": 2.3454, "step": 141410 }, { "epoch": 0.5466901702463237, "grad_norm": 0.10998312383890152, "learning_rate": 0.002, "loss": 2.3434, "step": 141420 }, { "epoch": 0.546728827449707, "grad_norm": 0.10908520966768265, "learning_rate": 0.002, "loss": 2.3567, "step": 141430 }, { "epoch": 0.5467674846530902, "grad_norm": 0.1527799367904663, "learning_rate": 0.002, "loss": 2.358, "step": 141440 }, { "epoch": 0.5468061418564736, "grad_norm": 0.11492700129747391, "learning_rate": 0.002, "loss": 2.3583, "step": 141450 }, { "epoch": 0.5468447990598568, "grad_norm": 0.11756177246570587, "learning_rate": 0.002, "loss": 2.3404, "step": 141460 }, { "epoch": 0.5468834562632401, "grad_norm": 0.10394591093063354, "learning_rate": 0.002, "loss": 2.3569, "step": 141470 }, { "epoch": 0.5469221134666233, "grad_norm": 0.1501491218805313, "learning_rate": 0.002, "loss": 2.3352, "step": 141480 }, { "epoch": 0.5469607706700067, "grad_norm": 0.11308126896619797, "learning_rate": 0.002, "loss": 2.3425, "step": 141490 }, { "epoch": 0.54699942787339, "grad_norm": 0.10572928190231323, "learning_rate": 0.002, "loss": 2.3448, "step": 141500 }, { "epoch": 0.5470380850767732, "grad_norm": 0.11026640981435776, "learning_rate": 0.002, "loss": 2.3589, "step": 141510 }, { "epoch": 0.5470767422801565, "grad_norm": 0.0973174050450325, "learning_rate": 0.002, "loss": 2.3564, "step": 141520 }, { "epoch": 0.5471153994835397, "grad_norm": 0.11233708262443542, "learning_rate": 0.002, "loss": 2.3377, "step": 141530 }, { "epoch": 0.5471540566869231, "grad_norm": 0.10455524176359177, "learning_rate": 0.002, "loss": 2.3356, "step": 141540 }, { "epoch": 0.5471927138903063, "grad_norm": 0.11175559461116791, "learning_rate": 0.002, "loss": 2.3569, "step": 141550 }, { "epoch": 0.5472313710936896, "grad_norm": 0.10089890658855438, "learning_rate": 0.002, "loss": 2.367, "step": 141560 }, { "epoch": 0.5472700282970728, "grad_norm": 0.09284067898988724, "learning_rate": 0.002, "loss": 2.3504, "step": 141570 }, { "epoch": 0.5473086855004562, "grad_norm": 0.11399402469396591, "learning_rate": 0.002, "loss": 2.3307, "step": 141580 }, { "epoch": 0.5473473427038394, "grad_norm": 0.10248827189207077, "learning_rate": 0.002, "loss": 2.3465, "step": 141590 }, { "epoch": 0.5473859999072227, "grad_norm": 0.1427232325077057, "learning_rate": 0.002, "loss": 2.3411, "step": 141600 }, { "epoch": 0.547424657110606, "grad_norm": 0.10681942850351334, "learning_rate": 0.002, "loss": 2.3527, "step": 141610 }, { "epoch": 0.5474633143139893, "grad_norm": 0.11645887047052383, "learning_rate": 0.002, "loss": 2.3482, "step": 141620 }, { "epoch": 0.5475019715173726, "grad_norm": 0.10732848942279816, "learning_rate": 0.002, "loss": 2.3358, "step": 141630 }, { "epoch": 0.5475406287207558, "grad_norm": 0.10308927297592163, "learning_rate": 0.002, "loss": 2.3488, "step": 141640 }, { "epoch": 0.5475792859241391, "grad_norm": 0.11306145042181015, "learning_rate": 0.002, "loss": 2.3503, "step": 141650 }, { "epoch": 0.5476179431275224, "grad_norm": 0.11283697187900543, "learning_rate": 0.002, "loss": 2.3361, "step": 141660 }, { "epoch": 0.5476566003309057, "grad_norm": 0.11596529185771942, "learning_rate": 0.002, "loss": 2.3302, "step": 141670 }, { "epoch": 0.5476952575342889, "grad_norm": 0.10113532841205597, "learning_rate": 0.002, "loss": 2.3462, "step": 141680 }, { "epoch": 0.5477339147376722, "grad_norm": 0.10591016709804535, "learning_rate": 0.002, "loss": 2.3405, "step": 141690 }, { "epoch": 0.5477725719410554, "grad_norm": 0.0996353030204773, "learning_rate": 0.002, "loss": 2.3419, "step": 141700 }, { "epoch": 0.5478112291444388, "grad_norm": 0.11873535811901093, "learning_rate": 0.002, "loss": 2.3414, "step": 141710 }, { "epoch": 0.5478498863478221, "grad_norm": 0.09684333205223083, "learning_rate": 0.002, "loss": 2.3409, "step": 141720 }, { "epoch": 0.5478885435512053, "grad_norm": 0.10586248338222504, "learning_rate": 0.002, "loss": 2.3523, "step": 141730 }, { "epoch": 0.5479272007545886, "grad_norm": 0.10639870911836624, "learning_rate": 0.002, "loss": 2.3477, "step": 141740 }, { "epoch": 0.5479658579579719, "grad_norm": 0.1137106642127037, "learning_rate": 0.002, "loss": 2.3346, "step": 141750 }, { "epoch": 0.5480045151613552, "grad_norm": 0.11402547359466553, "learning_rate": 0.002, "loss": 2.3412, "step": 141760 }, { "epoch": 0.5480431723647384, "grad_norm": 0.11360086500644684, "learning_rate": 0.002, "loss": 2.3438, "step": 141770 }, { "epoch": 0.5480818295681217, "grad_norm": 0.11750371754169464, "learning_rate": 0.002, "loss": 2.353, "step": 141780 }, { "epoch": 0.548120486771505, "grad_norm": 0.1186307743191719, "learning_rate": 0.002, "loss": 2.3573, "step": 141790 }, { "epoch": 0.5481591439748883, "grad_norm": 0.11170139163732529, "learning_rate": 0.002, "loss": 2.3503, "step": 141800 }, { "epoch": 0.5481978011782715, "grad_norm": 0.11253993213176727, "learning_rate": 0.002, "loss": 2.3403, "step": 141810 }, { "epoch": 0.5482364583816548, "grad_norm": 0.129967600107193, "learning_rate": 0.002, "loss": 2.3408, "step": 141820 }, { "epoch": 0.5482751155850382, "grad_norm": 0.10900263488292694, "learning_rate": 0.002, "loss": 2.3559, "step": 141830 }, { "epoch": 0.5483137727884214, "grad_norm": 0.10477101802825928, "learning_rate": 0.002, "loss": 2.3476, "step": 141840 }, { "epoch": 0.5483524299918047, "grad_norm": 0.10287479311227798, "learning_rate": 0.002, "loss": 2.3637, "step": 141850 }, { "epoch": 0.5483910871951879, "grad_norm": 0.10571011900901794, "learning_rate": 0.002, "loss": 2.3506, "step": 141860 }, { "epoch": 0.5484297443985713, "grad_norm": 0.10842036455869675, "learning_rate": 0.002, "loss": 2.3423, "step": 141870 }, { "epoch": 0.5484684016019545, "grad_norm": 0.09585347026586533, "learning_rate": 0.002, "loss": 2.3367, "step": 141880 }, { "epoch": 0.5485070588053378, "grad_norm": 0.10993171483278275, "learning_rate": 0.002, "loss": 2.3438, "step": 141890 }, { "epoch": 0.548545716008721, "grad_norm": 0.10170605778694153, "learning_rate": 0.002, "loss": 2.3393, "step": 141900 }, { "epoch": 0.5485843732121043, "grad_norm": 0.10986624658107758, "learning_rate": 0.002, "loss": 2.3581, "step": 141910 }, { "epoch": 0.5486230304154877, "grad_norm": 0.11059927195310593, "learning_rate": 0.002, "loss": 2.3442, "step": 141920 }, { "epoch": 0.5486616876188709, "grad_norm": 0.11290241032838821, "learning_rate": 0.002, "loss": 2.3513, "step": 141930 }, { "epoch": 0.5487003448222542, "grad_norm": 0.11301394551992416, "learning_rate": 0.002, "loss": 2.3594, "step": 141940 }, { "epoch": 0.5487390020256374, "grad_norm": 0.1104665994644165, "learning_rate": 0.002, "loss": 2.3373, "step": 141950 }, { "epoch": 0.5487776592290208, "grad_norm": 0.10794278979301453, "learning_rate": 0.002, "loss": 2.3428, "step": 141960 }, { "epoch": 0.548816316432404, "grad_norm": 0.10633352398872375, "learning_rate": 0.002, "loss": 2.3414, "step": 141970 }, { "epoch": 0.5488549736357873, "grad_norm": 0.1199800968170166, "learning_rate": 0.002, "loss": 2.3377, "step": 141980 }, { "epoch": 0.5488936308391705, "grad_norm": 0.11475013196468353, "learning_rate": 0.002, "loss": 2.3486, "step": 141990 }, { "epoch": 0.5489322880425539, "grad_norm": 0.10983271151781082, "learning_rate": 0.002, "loss": 2.3365, "step": 142000 }, { "epoch": 0.5489709452459371, "grad_norm": 0.12063111364841461, "learning_rate": 0.002, "loss": 2.3292, "step": 142010 }, { "epoch": 0.5490096024493204, "grad_norm": 0.10083327442407608, "learning_rate": 0.002, "loss": 2.3549, "step": 142020 }, { "epoch": 0.5490482596527037, "grad_norm": 0.12083666026592255, "learning_rate": 0.002, "loss": 2.3444, "step": 142030 }, { "epoch": 0.549086916856087, "grad_norm": 0.09449543058872223, "learning_rate": 0.002, "loss": 2.3342, "step": 142040 }, { "epoch": 0.5491255740594703, "grad_norm": 0.10262036323547363, "learning_rate": 0.002, "loss": 2.3387, "step": 142050 }, { "epoch": 0.5491642312628535, "grad_norm": 0.10588623583316803, "learning_rate": 0.002, "loss": 2.3405, "step": 142060 }, { "epoch": 0.5492028884662368, "grad_norm": 0.10415609180927277, "learning_rate": 0.002, "loss": 2.3358, "step": 142070 }, { "epoch": 0.54924154566962, "grad_norm": 0.12541480362415314, "learning_rate": 0.002, "loss": 2.3398, "step": 142080 }, { "epoch": 0.5492802028730034, "grad_norm": 0.11196579784154892, "learning_rate": 0.002, "loss": 2.3363, "step": 142090 }, { "epoch": 0.5493188600763866, "grad_norm": 0.11203989386558533, "learning_rate": 0.002, "loss": 2.3423, "step": 142100 }, { "epoch": 0.5493575172797699, "grad_norm": 0.09834367036819458, "learning_rate": 0.002, "loss": 2.3439, "step": 142110 }, { "epoch": 0.5493961744831531, "grad_norm": 0.12963052093982697, "learning_rate": 0.002, "loss": 2.3505, "step": 142120 }, { "epoch": 0.5494348316865365, "grad_norm": 0.10904177278280258, "learning_rate": 0.002, "loss": 2.3251, "step": 142130 }, { "epoch": 0.5494734888899198, "grad_norm": 0.13051638007164001, "learning_rate": 0.002, "loss": 2.3468, "step": 142140 }, { "epoch": 0.549512146093303, "grad_norm": 0.11035215109586716, "learning_rate": 0.002, "loss": 2.3381, "step": 142150 }, { "epoch": 0.5495508032966863, "grad_norm": 0.10016104578971863, "learning_rate": 0.002, "loss": 2.3321, "step": 142160 }, { "epoch": 0.5495894605000696, "grad_norm": 0.10304881632328033, "learning_rate": 0.002, "loss": 2.3492, "step": 142170 }, { "epoch": 0.5496281177034529, "grad_norm": 0.09799494594335556, "learning_rate": 0.002, "loss": 2.3497, "step": 142180 }, { "epoch": 0.5496667749068361, "grad_norm": 0.10154926776885986, "learning_rate": 0.002, "loss": 2.3481, "step": 142190 }, { "epoch": 0.5497054321102194, "grad_norm": 0.1152535229921341, "learning_rate": 0.002, "loss": 2.3599, "step": 142200 }, { "epoch": 0.5497440893136027, "grad_norm": 0.100237637758255, "learning_rate": 0.002, "loss": 2.3572, "step": 142210 }, { "epoch": 0.549782746516986, "grad_norm": 0.11598395556211472, "learning_rate": 0.002, "loss": 2.3497, "step": 142220 }, { "epoch": 0.5498214037203693, "grad_norm": 0.10567312687635422, "learning_rate": 0.002, "loss": 2.3395, "step": 142230 }, { "epoch": 0.5498600609237525, "grad_norm": 0.08874915540218353, "learning_rate": 0.002, "loss": 2.3427, "step": 142240 }, { "epoch": 0.5498987181271359, "grad_norm": 0.13708887994289398, "learning_rate": 0.002, "loss": 2.3447, "step": 142250 }, { "epoch": 0.5499373753305191, "grad_norm": 0.12051773816347122, "learning_rate": 0.002, "loss": 2.3675, "step": 142260 }, { "epoch": 0.5499760325339024, "grad_norm": 0.10234910249710083, "learning_rate": 0.002, "loss": 2.3448, "step": 142270 }, { "epoch": 0.5500146897372856, "grad_norm": 0.1016688346862793, "learning_rate": 0.002, "loss": 2.3439, "step": 142280 }, { "epoch": 0.5500533469406689, "grad_norm": 0.11642882972955704, "learning_rate": 0.002, "loss": 2.3534, "step": 142290 }, { "epoch": 0.5500920041440522, "grad_norm": 0.10169527679681778, "learning_rate": 0.002, "loss": 2.3493, "step": 142300 }, { "epoch": 0.5501306613474355, "grad_norm": 0.102230966091156, "learning_rate": 0.002, "loss": 2.3472, "step": 142310 }, { "epoch": 0.5501693185508187, "grad_norm": 0.12380985170602798, "learning_rate": 0.002, "loss": 2.3406, "step": 142320 }, { "epoch": 0.550207975754202, "grad_norm": 0.11084472388029099, "learning_rate": 0.002, "loss": 2.3414, "step": 142330 }, { "epoch": 0.5502466329575854, "grad_norm": 0.14835940301418304, "learning_rate": 0.002, "loss": 2.3565, "step": 142340 }, { "epoch": 0.5502852901609686, "grad_norm": 0.11222419887781143, "learning_rate": 0.002, "loss": 2.3492, "step": 142350 }, { "epoch": 0.5503239473643519, "grad_norm": 0.09371999651193619, "learning_rate": 0.002, "loss": 2.367, "step": 142360 }, { "epoch": 0.5503626045677351, "grad_norm": 0.10161064565181732, "learning_rate": 0.002, "loss": 2.3577, "step": 142370 }, { "epoch": 0.5504012617711185, "grad_norm": 0.10748559236526489, "learning_rate": 0.002, "loss": 2.3405, "step": 142380 }, { "epoch": 0.5504399189745017, "grad_norm": 0.10964936017990112, "learning_rate": 0.002, "loss": 2.3451, "step": 142390 }, { "epoch": 0.550478576177885, "grad_norm": 0.10478832572698593, "learning_rate": 0.002, "loss": 2.3425, "step": 142400 }, { "epoch": 0.5505172333812682, "grad_norm": 0.11970923840999603, "learning_rate": 0.002, "loss": 2.3424, "step": 142410 }, { "epoch": 0.5505558905846516, "grad_norm": 0.1185605600476265, "learning_rate": 0.002, "loss": 2.3506, "step": 142420 }, { "epoch": 0.5505945477880348, "grad_norm": 0.11552104353904724, "learning_rate": 0.002, "loss": 2.3603, "step": 142430 }, { "epoch": 0.5506332049914181, "grad_norm": 0.11067025363445282, "learning_rate": 0.002, "loss": 2.3448, "step": 142440 }, { "epoch": 0.5506718621948014, "grad_norm": 0.11349763721227646, "learning_rate": 0.002, "loss": 2.3496, "step": 142450 }, { "epoch": 0.5507105193981846, "grad_norm": 0.12076409161090851, "learning_rate": 0.002, "loss": 2.3572, "step": 142460 }, { "epoch": 0.550749176601568, "grad_norm": 0.10295160114765167, "learning_rate": 0.002, "loss": 2.3453, "step": 142470 }, { "epoch": 0.5507878338049512, "grad_norm": 0.09861474484205246, "learning_rate": 0.002, "loss": 2.3515, "step": 142480 }, { "epoch": 0.5508264910083345, "grad_norm": 0.09792405366897583, "learning_rate": 0.002, "loss": 2.3562, "step": 142490 }, { "epoch": 0.5508651482117177, "grad_norm": 0.09810902178287506, "learning_rate": 0.002, "loss": 2.3427, "step": 142500 }, { "epoch": 0.5509038054151011, "grad_norm": 0.10388771444559097, "learning_rate": 0.002, "loss": 2.3304, "step": 142510 }, { "epoch": 0.5509424626184843, "grad_norm": 0.11705761402845383, "learning_rate": 0.002, "loss": 2.3411, "step": 142520 }, { "epoch": 0.5509811198218676, "grad_norm": 0.1030333936214447, "learning_rate": 0.002, "loss": 2.3598, "step": 142530 }, { "epoch": 0.5510197770252508, "grad_norm": 0.1025761142373085, "learning_rate": 0.002, "loss": 2.3445, "step": 142540 }, { "epoch": 0.5510584342286342, "grad_norm": 0.09871582686901093, "learning_rate": 0.002, "loss": 2.3388, "step": 142550 }, { "epoch": 0.5510970914320175, "grad_norm": 0.11431854218244553, "learning_rate": 0.002, "loss": 2.3475, "step": 142560 }, { "epoch": 0.5511357486354007, "grad_norm": 0.11743414402008057, "learning_rate": 0.002, "loss": 2.3419, "step": 142570 }, { "epoch": 0.551174405838784, "grad_norm": 0.11136042326688766, "learning_rate": 0.002, "loss": 2.3504, "step": 142580 }, { "epoch": 0.5512130630421673, "grad_norm": 0.11726684123277664, "learning_rate": 0.002, "loss": 2.35, "step": 142590 }, { "epoch": 0.5512517202455506, "grad_norm": 0.11254546046257019, "learning_rate": 0.002, "loss": 2.3391, "step": 142600 }, { "epoch": 0.5512903774489338, "grad_norm": 0.11354956775903702, "learning_rate": 0.002, "loss": 2.358, "step": 142610 }, { "epoch": 0.5513290346523171, "grad_norm": 0.1168670505285263, "learning_rate": 0.002, "loss": 2.3403, "step": 142620 }, { "epoch": 0.5513676918557003, "grad_norm": 0.12499973922967911, "learning_rate": 0.002, "loss": 2.3416, "step": 142630 }, { "epoch": 0.5514063490590837, "grad_norm": 0.0976676344871521, "learning_rate": 0.002, "loss": 2.3528, "step": 142640 }, { "epoch": 0.551445006262467, "grad_norm": 0.11980077624320984, "learning_rate": 0.002, "loss": 2.3405, "step": 142650 }, { "epoch": 0.5514836634658502, "grad_norm": 0.1028711274266243, "learning_rate": 0.002, "loss": 2.3516, "step": 142660 }, { "epoch": 0.5515223206692335, "grad_norm": 0.1479312628507614, "learning_rate": 0.002, "loss": 2.3568, "step": 142670 }, { "epoch": 0.5515609778726168, "grad_norm": 0.11372735351324081, "learning_rate": 0.002, "loss": 2.3503, "step": 142680 }, { "epoch": 0.5515996350760001, "grad_norm": 0.11581075191497803, "learning_rate": 0.002, "loss": 2.3586, "step": 142690 }, { "epoch": 0.5516382922793833, "grad_norm": 0.09935362637042999, "learning_rate": 0.002, "loss": 2.3578, "step": 142700 }, { "epoch": 0.5516769494827666, "grad_norm": 0.10516053438186646, "learning_rate": 0.002, "loss": 2.3376, "step": 142710 }, { "epoch": 0.5517156066861499, "grad_norm": 0.10696021467447281, "learning_rate": 0.002, "loss": 2.3402, "step": 142720 }, { "epoch": 0.5517542638895332, "grad_norm": 0.09497939795255661, "learning_rate": 0.002, "loss": 2.3446, "step": 142730 }, { "epoch": 0.5517929210929164, "grad_norm": 0.10460008680820465, "learning_rate": 0.002, "loss": 2.348, "step": 142740 }, { "epoch": 0.5518315782962997, "grad_norm": 0.134171724319458, "learning_rate": 0.002, "loss": 2.3534, "step": 142750 }, { "epoch": 0.5518702354996831, "grad_norm": 0.10251723229885101, "learning_rate": 0.002, "loss": 2.3324, "step": 142760 }, { "epoch": 0.5519088927030663, "grad_norm": 0.10286859422922134, "learning_rate": 0.002, "loss": 2.3461, "step": 142770 }, { "epoch": 0.5519475499064496, "grad_norm": 0.10067896544933319, "learning_rate": 0.002, "loss": 2.3317, "step": 142780 }, { "epoch": 0.5519862071098328, "grad_norm": 0.11117151379585266, "learning_rate": 0.002, "loss": 2.3496, "step": 142790 }, { "epoch": 0.5520248643132162, "grad_norm": 0.11155954003334045, "learning_rate": 0.002, "loss": 2.3301, "step": 142800 }, { "epoch": 0.5520635215165994, "grad_norm": 0.09828852862119675, "learning_rate": 0.002, "loss": 2.3307, "step": 142810 }, { "epoch": 0.5521021787199827, "grad_norm": 0.09801884740591049, "learning_rate": 0.002, "loss": 2.3196, "step": 142820 }, { "epoch": 0.5521408359233659, "grad_norm": 0.12369256466627121, "learning_rate": 0.002, "loss": 2.3588, "step": 142830 }, { "epoch": 0.5521794931267492, "grad_norm": 0.09979232400655746, "learning_rate": 0.002, "loss": 2.343, "step": 142840 }, { "epoch": 0.5522181503301326, "grad_norm": 0.10395888984203339, "learning_rate": 0.002, "loss": 2.3431, "step": 142850 }, { "epoch": 0.5522568075335158, "grad_norm": 0.09872164577245712, "learning_rate": 0.002, "loss": 2.3536, "step": 142860 }, { "epoch": 0.552295464736899, "grad_norm": 0.10278721898794174, "learning_rate": 0.002, "loss": 2.3381, "step": 142870 }, { "epoch": 0.5523341219402823, "grad_norm": 0.09452217072248459, "learning_rate": 0.002, "loss": 2.3309, "step": 142880 }, { "epoch": 0.5523727791436657, "grad_norm": 0.1081966757774353, "learning_rate": 0.002, "loss": 2.3434, "step": 142890 }, { "epoch": 0.5524114363470489, "grad_norm": 0.10426264256238937, "learning_rate": 0.002, "loss": 2.3391, "step": 142900 }, { "epoch": 0.5524500935504322, "grad_norm": 0.09925641119480133, "learning_rate": 0.002, "loss": 2.3424, "step": 142910 }, { "epoch": 0.5524887507538154, "grad_norm": 0.10067332535982132, "learning_rate": 0.002, "loss": 2.3492, "step": 142920 }, { "epoch": 0.5525274079571988, "grad_norm": 0.10544595122337341, "learning_rate": 0.002, "loss": 2.3525, "step": 142930 }, { "epoch": 0.552566065160582, "grad_norm": 0.12352099269628525, "learning_rate": 0.002, "loss": 2.3508, "step": 142940 }, { "epoch": 0.5526047223639653, "grad_norm": 0.25755220651626587, "learning_rate": 0.002, "loss": 2.3512, "step": 142950 }, { "epoch": 0.5526433795673485, "grad_norm": 0.1176406517624855, "learning_rate": 0.002, "loss": 2.3393, "step": 142960 }, { "epoch": 0.5526820367707319, "grad_norm": 0.09558644145727158, "learning_rate": 0.002, "loss": 2.3616, "step": 142970 }, { "epoch": 0.5527206939741152, "grad_norm": 0.09294940531253815, "learning_rate": 0.002, "loss": 2.3346, "step": 142980 }, { "epoch": 0.5527593511774984, "grad_norm": 0.12759459018707275, "learning_rate": 0.002, "loss": 2.345, "step": 142990 }, { "epoch": 0.5527980083808817, "grad_norm": 0.1065143421292305, "learning_rate": 0.002, "loss": 2.3464, "step": 143000 }, { "epoch": 0.5528366655842649, "grad_norm": 0.10980822890996933, "learning_rate": 0.002, "loss": 2.3251, "step": 143010 }, { "epoch": 0.5528753227876483, "grad_norm": 0.10744861513376236, "learning_rate": 0.002, "loss": 2.3479, "step": 143020 }, { "epoch": 0.5529139799910315, "grad_norm": 0.10682953894138336, "learning_rate": 0.002, "loss": 2.3544, "step": 143030 }, { "epoch": 0.5529526371944148, "grad_norm": 0.1215931624174118, "learning_rate": 0.002, "loss": 2.3382, "step": 143040 }, { "epoch": 0.552991294397798, "grad_norm": 0.10288725048303604, "learning_rate": 0.002, "loss": 2.3416, "step": 143050 }, { "epoch": 0.5530299516011814, "grad_norm": 0.10882063210010529, "learning_rate": 0.002, "loss": 2.35, "step": 143060 }, { "epoch": 0.5530686088045647, "grad_norm": 0.12566843628883362, "learning_rate": 0.002, "loss": 2.3518, "step": 143070 }, { "epoch": 0.5531072660079479, "grad_norm": 0.11138502508401871, "learning_rate": 0.002, "loss": 2.3579, "step": 143080 }, { "epoch": 0.5531459232113312, "grad_norm": 0.12486086040735245, "learning_rate": 0.002, "loss": 2.3382, "step": 143090 }, { "epoch": 0.5531845804147145, "grad_norm": 0.11329344660043716, "learning_rate": 0.002, "loss": 2.3382, "step": 143100 }, { "epoch": 0.5532232376180978, "grad_norm": 0.09787940979003906, "learning_rate": 0.002, "loss": 2.3509, "step": 143110 }, { "epoch": 0.553261894821481, "grad_norm": 0.1143239215016365, "learning_rate": 0.002, "loss": 2.3354, "step": 143120 }, { "epoch": 0.5533005520248643, "grad_norm": 0.10016550123691559, "learning_rate": 0.002, "loss": 2.3573, "step": 143130 }, { "epoch": 0.5533392092282476, "grad_norm": 0.09186196327209473, "learning_rate": 0.002, "loss": 2.3389, "step": 143140 }, { "epoch": 0.5533778664316309, "grad_norm": 0.09927267581224442, "learning_rate": 0.002, "loss": 2.3285, "step": 143150 }, { "epoch": 0.5534165236350141, "grad_norm": 0.10326193273067474, "learning_rate": 0.002, "loss": 2.3479, "step": 143160 }, { "epoch": 0.5534551808383974, "grad_norm": 0.1423591524362564, "learning_rate": 0.002, "loss": 2.338, "step": 143170 }, { "epoch": 0.5534938380417807, "grad_norm": 0.11829890310764313, "learning_rate": 0.002, "loss": 2.3418, "step": 143180 }, { "epoch": 0.553532495245164, "grad_norm": 0.10465212166309357, "learning_rate": 0.002, "loss": 2.3396, "step": 143190 }, { "epoch": 0.5535711524485473, "grad_norm": 0.1198871061205864, "learning_rate": 0.002, "loss": 2.3394, "step": 143200 }, { "epoch": 0.5536098096519305, "grad_norm": 0.11005301028490067, "learning_rate": 0.002, "loss": 2.3302, "step": 143210 }, { "epoch": 0.5536484668553138, "grad_norm": 0.13708800077438354, "learning_rate": 0.002, "loss": 2.3483, "step": 143220 }, { "epoch": 0.5536871240586971, "grad_norm": 0.12035497277975082, "learning_rate": 0.002, "loss": 2.341, "step": 143230 }, { "epoch": 0.5537257812620804, "grad_norm": 0.11575876921415329, "learning_rate": 0.002, "loss": 2.3526, "step": 143240 }, { "epoch": 0.5537644384654636, "grad_norm": 0.11982641369104385, "learning_rate": 0.002, "loss": 2.3398, "step": 143250 }, { "epoch": 0.5538030956688469, "grad_norm": 0.10384172946214676, "learning_rate": 0.002, "loss": 2.3454, "step": 143260 }, { "epoch": 0.5538417528722303, "grad_norm": 0.104493148624897, "learning_rate": 0.002, "loss": 2.3376, "step": 143270 }, { "epoch": 0.5538804100756135, "grad_norm": 0.12926560640335083, "learning_rate": 0.002, "loss": 2.3397, "step": 143280 }, { "epoch": 0.5539190672789968, "grad_norm": 0.09122234582901001, "learning_rate": 0.002, "loss": 2.3469, "step": 143290 }, { "epoch": 0.55395772448238, "grad_norm": 0.10327209532260895, "learning_rate": 0.002, "loss": 2.3455, "step": 143300 }, { "epoch": 0.5539963816857634, "grad_norm": 0.10882581770420074, "learning_rate": 0.002, "loss": 2.3468, "step": 143310 }, { "epoch": 0.5540350388891466, "grad_norm": 0.11400618404150009, "learning_rate": 0.002, "loss": 2.3546, "step": 143320 }, { "epoch": 0.5540736960925299, "grad_norm": 0.1005762591958046, "learning_rate": 0.002, "loss": 2.3396, "step": 143330 }, { "epoch": 0.5541123532959131, "grad_norm": 0.11607342213392258, "learning_rate": 0.002, "loss": 2.3402, "step": 143340 }, { "epoch": 0.5541510104992965, "grad_norm": 0.0930909588932991, "learning_rate": 0.002, "loss": 2.3387, "step": 143350 }, { "epoch": 0.5541896677026797, "grad_norm": 0.10060762614011765, "learning_rate": 0.002, "loss": 2.3409, "step": 143360 }, { "epoch": 0.554228324906063, "grad_norm": 0.11107786744832993, "learning_rate": 0.002, "loss": 2.3583, "step": 143370 }, { "epoch": 0.5542669821094462, "grad_norm": 0.11191900074481964, "learning_rate": 0.002, "loss": 2.3493, "step": 143380 }, { "epoch": 0.5543056393128295, "grad_norm": 0.12472184002399445, "learning_rate": 0.002, "loss": 2.3481, "step": 143390 }, { "epoch": 0.5543442965162129, "grad_norm": 0.11487885564565659, "learning_rate": 0.002, "loss": 2.3433, "step": 143400 }, { "epoch": 0.5543829537195961, "grad_norm": 0.10302220284938812, "learning_rate": 0.002, "loss": 2.3325, "step": 143410 }, { "epoch": 0.5544216109229794, "grad_norm": 0.10976248979568481, "learning_rate": 0.002, "loss": 2.3472, "step": 143420 }, { "epoch": 0.5544602681263626, "grad_norm": 0.11117681115865707, "learning_rate": 0.002, "loss": 2.3494, "step": 143430 }, { "epoch": 0.554498925329746, "grad_norm": 0.11047784239053726, "learning_rate": 0.002, "loss": 2.3515, "step": 143440 }, { "epoch": 0.5545375825331292, "grad_norm": 0.09889400750398636, "learning_rate": 0.002, "loss": 2.3485, "step": 143450 }, { "epoch": 0.5545762397365125, "grad_norm": 0.1134338527917862, "learning_rate": 0.002, "loss": 2.3311, "step": 143460 }, { "epoch": 0.5546148969398957, "grad_norm": 0.10511985421180725, "learning_rate": 0.002, "loss": 2.3548, "step": 143470 }, { "epoch": 0.5546535541432791, "grad_norm": 0.11839264631271362, "learning_rate": 0.002, "loss": 2.3484, "step": 143480 }, { "epoch": 0.5546922113466624, "grad_norm": 0.11493546515703201, "learning_rate": 0.002, "loss": 2.3402, "step": 143490 }, { "epoch": 0.5547308685500456, "grad_norm": 0.09989528357982635, "learning_rate": 0.002, "loss": 2.342, "step": 143500 }, { "epoch": 0.5547695257534289, "grad_norm": 0.10545303672552109, "learning_rate": 0.002, "loss": 2.3281, "step": 143510 }, { "epoch": 0.5548081829568122, "grad_norm": 0.12071103602647781, "learning_rate": 0.002, "loss": 2.3329, "step": 143520 }, { "epoch": 0.5548468401601955, "grad_norm": 0.11775526404380798, "learning_rate": 0.002, "loss": 2.3385, "step": 143530 }, { "epoch": 0.5548854973635787, "grad_norm": 0.12348510324954987, "learning_rate": 0.002, "loss": 2.3464, "step": 143540 }, { "epoch": 0.554924154566962, "grad_norm": 0.11271246522665024, "learning_rate": 0.002, "loss": 2.3403, "step": 143550 }, { "epoch": 0.5549628117703452, "grad_norm": 0.1072176918387413, "learning_rate": 0.002, "loss": 2.3287, "step": 143560 }, { "epoch": 0.5550014689737286, "grad_norm": 0.10169960558414459, "learning_rate": 0.002, "loss": 2.3491, "step": 143570 }, { "epoch": 0.5550401261771118, "grad_norm": 0.10885798186063766, "learning_rate": 0.002, "loss": 2.3486, "step": 143580 }, { "epoch": 0.5550787833804951, "grad_norm": 0.12151875346899033, "learning_rate": 0.002, "loss": 2.3626, "step": 143590 }, { "epoch": 0.5551174405838784, "grad_norm": 0.1123412549495697, "learning_rate": 0.002, "loss": 2.3528, "step": 143600 }, { "epoch": 0.5551560977872617, "grad_norm": 0.09589749574661255, "learning_rate": 0.002, "loss": 2.3317, "step": 143610 }, { "epoch": 0.555194754990645, "grad_norm": 0.11981192231178284, "learning_rate": 0.002, "loss": 2.3489, "step": 143620 }, { "epoch": 0.5552334121940282, "grad_norm": 0.10519280284643173, "learning_rate": 0.002, "loss": 2.3438, "step": 143630 }, { "epoch": 0.5552720693974115, "grad_norm": 0.10929519683122635, "learning_rate": 0.002, "loss": 2.3536, "step": 143640 }, { "epoch": 0.5553107266007948, "grad_norm": 0.10268570482730865, "learning_rate": 0.002, "loss": 2.3473, "step": 143650 }, { "epoch": 0.5553493838041781, "grad_norm": 0.10233426094055176, "learning_rate": 0.002, "loss": 2.3545, "step": 143660 }, { "epoch": 0.5553880410075613, "grad_norm": 0.12025441974401474, "learning_rate": 0.002, "loss": 2.3617, "step": 143670 }, { "epoch": 0.5554266982109446, "grad_norm": 0.11578936129808426, "learning_rate": 0.002, "loss": 2.3333, "step": 143680 }, { "epoch": 0.555465355414328, "grad_norm": 0.10593968629837036, "learning_rate": 0.002, "loss": 2.3387, "step": 143690 }, { "epoch": 0.5555040126177112, "grad_norm": 0.10012836009263992, "learning_rate": 0.002, "loss": 2.3465, "step": 143700 }, { "epoch": 0.5555426698210945, "grad_norm": 0.10861680656671524, "learning_rate": 0.002, "loss": 2.3481, "step": 143710 }, { "epoch": 0.5555813270244777, "grad_norm": 0.10936512798070908, "learning_rate": 0.002, "loss": 2.344, "step": 143720 }, { "epoch": 0.5556199842278611, "grad_norm": 0.11655790358781815, "learning_rate": 0.002, "loss": 2.3458, "step": 143730 }, { "epoch": 0.5556586414312443, "grad_norm": 0.09844601899385452, "learning_rate": 0.002, "loss": 2.3362, "step": 143740 }, { "epoch": 0.5556972986346276, "grad_norm": 0.11050175130367279, "learning_rate": 0.002, "loss": 2.3374, "step": 143750 }, { "epoch": 0.5557359558380108, "grad_norm": 0.10614526271820068, "learning_rate": 0.002, "loss": 2.3321, "step": 143760 }, { "epoch": 0.5557746130413941, "grad_norm": 0.1081186830997467, "learning_rate": 0.002, "loss": 2.3536, "step": 143770 }, { "epoch": 0.5558132702447774, "grad_norm": 0.10302522778511047, "learning_rate": 0.002, "loss": 2.3548, "step": 143780 }, { "epoch": 0.5558519274481607, "grad_norm": 0.10777824372053146, "learning_rate": 0.002, "loss": 2.3475, "step": 143790 }, { "epoch": 0.555890584651544, "grad_norm": 0.09283053129911423, "learning_rate": 0.002, "loss": 2.3359, "step": 143800 }, { "epoch": 0.5559292418549272, "grad_norm": 0.11310338228940964, "learning_rate": 0.002, "loss": 2.3493, "step": 143810 }, { "epoch": 0.5559678990583106, "grad_norm": 0.10526971518993378, "learning_rate": 0.002, "loss": 2.3415, "step": 143820 }, { "epoch": 0.5560065562616938, "grad_norm": 0.09571385383605957, "learning_rate": 0.002, "loss": 2.3568, "step": 143830 }, { "epoch": 0.5560452134650771, "grad_norm": 0.10621003806591034, "learning_rate": 0.002, "loss": 2.3642, "step": 143840 }, { "epoch": 0.5560838706684603, "grad_norm": 0.12751825153827667, "learning_rate": 0.002, "loss": 2.3485, "step": 143850 }, { "epoch": 0.5561225278718437, "grad_norm": 0.09856680035591125, "learning_rate": 0.002, "loss": 2.355, "step": 143860 }, { "epoch": 0.5561611850752269, "grad_norm": 0.120022252202034, "learning_rate": 0.002, "loss": 2.3313, "step": 143870 }, { "epoch": 0.5561998422786102, "grad_norm": 0.10100723803043365, "learning_rate": 0.002, "loss": 2.3521, "step": 143880 }, { "epoch": 0.5562384994819934, "grad_norm": 0.11421933770179749, "learning_rate": 0.002, "loss": 2.3441, "step": 143890 }, { "epoch": 0.5562771566853768, "grad_norm": 0.11160522699356079, "learning_rate": 0.002, "loss": 2.3467, "step": 143900 }, { "epoch": 0.5563158138887601, "grad_norm": 0.10204055905342102, "learning_rate": 0.002, "loss": 2.3452, "step": 143910 }, { "epoch": 0.5563544710921433, "grad_norm": 0.10301145166158676, "learning_rate": 0.002, "loss": 2.3421, "step": 143920 }, { "epoch": 0.5563931282955266, "grad_norm": 0.12065683305263519, "learning_rate": 0.002, "loss": 2.347, "step": 143930 }, { "epoch": 0.5564317854989098, "grad_norm": 0.10993140190839767, "learning_rate": 0.002, "loss": 2.3467, "step": 143940 }, { "epoch": 0.5564704427022932, "grad_norm": 0.12739704549312592, "learning_rate": 0.002, "loss": 2.3555, "step": 143950 }, { "epoch": 0.5565090999056764, "grad_norm": 0.09803897887468338, "learning_rate": 0.002, "loss": 2.3442, "step": 143960 }, { "epoch": 0.5565477571090597, "grad_norm": 0.10325773805379868, "learning_rate": 0.002, "loss": 2.34, "step": 143970 }, { "epoch": 0.5565864143124429, "grad_norm": 0.10811714828014374, "learning_rate": 0.002, "loss": 2.3416, "step": 143980 }, { "epoch": 0.5566250715158263, "grad_norm": 0.13029812276363373, "learning_rate": 0.002, "loss": 2.3531, "step": 143990 }, { "epoch": 0.5566637287192095, "grad_norm": 0.09954417496919632, "learning_rate": 0.002, "loss": 2.3305, "step": 144000 }, { "epoch": 0.5567023859225928, "grad_norm": 0.10553660988807678, "learning_rate": 0.002, "loss": 2.3337, "step": 144010 }, { "epoch": 0.556741043125976, "grad_norm": 0.11054238677024841, "learning_rate": 0.002, "loss": 2.3435, "step": 144020 }, { "epoch": 0.5567797003293594, "grad_norm": 0.10247528553009033, "learning_rate": 0.002, "loss": 2.3255, "step": 144030 }, { "epoch": 0.5568183575327427, "grad_norm": 0.11531244963407516, "learning_rate": 0.002, "loss": 2.3404, "step": 144040 }, { "epoch": 0.5568570147361259, "grad_norm": 0.12554004788398743, "learning_rate": 0.002, "loss": 2.36, "step": 144050 }, { "epoch": 0.5568956719395092, "grad_norm": 0.093537338078022, "learning_rate": 0.002, "loss": 2.3401, "step": 144060 }, { "epoch": 0.5569343291428925, "grad_norm": 0.10666855424642563, "learning_rate": 0.002, "loss": 2.3315, "step": 144070 }, { "epoch": 0.5569729863462758, "grad_norm": 0.10169276595115662, "learning_rate": 0.002, "loss": 2.3288, "step": 144080 }, { "epoch": 0.557011643549659, "grad_norm": 0.11072119325399399, "learning_rate": 0.002, "loss": 2.3367, "step": 144090 }, { "epoch": 0.5570503007530423, "grad_norm": 0.10343354940414429, "learning_rate": 0.002, "loss": 2.3292, "step": 144100 }, { "epoch": 0.5570889579564255, "grad_norm": 0.10450764000415802, "learning_rate": 0.002, "loss": 2.3506, "step": 144110 }, { "epoch": 0.5571276151598089, "grad_norm": 0.10256035625934601, "learning_rate": 0.002, "loss": 2.3392, "step": 144120 }, { "epoch": 0.5571662723631922, "grad_norm": 0.10732540488243103, "learning_rate": 0.002, "loss": 2.3238, "step": 144130 }, { "epoch": 0.5572049295665754, "grad_norm": 0.15823794901371002, "learning_rate": 0.002, "loss": 2.36, "step": 144140 }, { "epoch": 0.5572435867699587, "grad_norm": 0.10332081466913223, "learning_rate": 0.002, "loss": 2.3432, "step": 144150 }, { "epoch": 0.557282243973342, "grad_norm": 0.1082451194524765, "learning_rate": 0.002, "loss": 2.3393, "step": 144160 }, { "epoch": 0.5573209011767253, "grad_norm": 0.12070360034704208, "learning_rate": 0.002, "loss": 2.3398, "step": 144170 }, { "epoch": 0.5573595583801085, "grad_norm": 0.11742536723613739, "learning_rate": 0.002, "loss": 2.354, "step": 144180 }, { "epoch": 0.5573982155834918, "grad_norm": 0.10265970230102539, "learning_rate": 0.002, "loss": 2.3533, "step": 144190 }, { "epoch": 0.5574368727868751, "grad_norm": 0.12360739707946777, "learning_rate": 0.002, "loss": 2.3596, "step": 144200 }, { "epoch": 0.5574755299902584, "grad_norm": 0.11441248655319214, "learning_rate": 0.002, "loss": 2.3529, "step": 144210 }, { "epoch": 0.5575141871936417, "grad_norm": 0.13491380214691162, "learning_rate": 0.002, "loss": 2.3492, "step": 144220 }, { "epoch": 0.5575528443970249, "grad_norm": 0.10316541790962219, "learning_rate": 0.002, "loss": 2.3613, "step": 144230 }, { "epoch": 0.5575915016004083, "grad_norm": 0.11570821702480316, "learning_rate": 0.002, "loss": 2.3288, "step": 144240 }, { "epoch": 0.5576301588037915, "grad_norm": 0.1069808155298233, "learning_rate": 0.002, "loss": 2.3623, "step": 144250 }, { "epoch": 0.5576688160071748, "grad_norm": 0.10772182047367096, "learning_rate": 0.002, "loss": 2.3438, "step": 144260 }, { "epoch": 0.557707473210558, "grad_norm": 0.1067073792219162, "learning_rate": 0.002, "loss": 2.3477, "step": 144270 }, { "epoch": 0.5577461304139414, "grad_norm": 0.10397684574127197, "learning_rate": 0.002, "loss": 2.3333, "step": 144280 }, { "epoch": 0.5577847876173246, "grad_norm": 0.1108739972114563, "learning_rate": 0.002, "loss": 2.3497, "step": 144290 }, { "epoch": 0.5578234448207079, "grad_norm": 0.1144571602344513, "learning_rate": 0.002, "loss": 2.3486, "step": 144300 }, { "epoch": 0.5578621020240911, "grad_norm": 0.13079805672168732, "learning_rate": 0.002, "loss": 2.355, "step": 144310 }, { "epoch": 0.5579007592274744, "grad_norm": 0.10899020731449127, "learning_rate": 0.002, "loss": 2.3541, "step": 144320 }, { "epoch": 0.5579394164308578, "grad_norm": 0.1061665266752243, "learning_rate": 0.002, "loss": 2.3412, "step": 144330 }, { "epoch": 0.557978073634241, "grad_norm": 0.10170763731002808, "learning_rate": 0.002, "loss": 2.3356, "step": 144340 }, { "epoch": 0.5580167308376243, "grad_norm": 0.10558304190635681, "learning_rate": 0.002, "loss": 2.3322, "step": 144350 }, { "epoch": 0.5580553880410075, "grad_norm": 0.13423477113246918, "learning_rate": 0.002, "loss": 2.347, "step": 144360 }, { "epoch": 0.5580940452443909, "grad_norm": 0.11419844627380371, "learning_rate": 0.002, "loss": 2.3536, "step": 144370 }, { "epoch": 0.5581327024477741, "grad_norm": 0.09708640724420547, "learning_rate": 0.002, "loss": 2.3337, "step": 144380 }, { "epoch": 0.5581713596511574, "grad_norm": 0.10306001454591751, "learning_rate": 0.002, "loss": 2.3477, "step": 144390 }, { "epoch": 0.5582100168545406, "grad_norm": 0.10493401437997818, "learning_rate": 0.002, "loss": 2.3359, "step": 144400 }, { "epoch": 0.558248674057924, "grad_norm": 0.11392046511173248, "learning_rate": 0.002, "loss": 2.3407, "step": 144410 }, { "epoch": 0.5582873312613073, "grad_norm": 0.1155625432729721, "learning_rate": 0.002, "loss": 2.3472, "step": 144420 }, { "epoch": 0.5583259884646905, "grad_norm": 0.13787469267845154, "learning_rate": 0.002, "loss": 2.3446, "step": 144430 }, { "epoch": 0.5583646456680738, "grad_norm": 0.10516868531703949, "learning_rate": 0.002, "loss": 2.3533, "step": 144440 }, { "epoch": 0.5584033028714571, "grad_norm": 0.08933563530445099, "learning_rate": 0.002, "loss": 2.3457, "step": 144450 }, { "epoch": 0.5584419600748404, "grad_norm": 0.09746905416250229, "learning_rate": 0.002, "loss": 2.3364, "step": 144460 }, { "epoch": 0.5584806172782236, "grad_norm": 0.11433606594800949, "learning_rate": 0.002, "loss": 2.3441, "step": 144470 }, { "epoch": 0.5585192744816069, "grad_norm": 0.09467669576406479, "learning_rate": 0.002, "loss": 2.3322, "step": 144480 }, { "epoch": 0.5585579316849901, "grad_norm": 0.12223439663648605, "learning_rate": 0.002, "loss": 2.3648, "step": 144490 }, { "epoch": 0.5585965888883735, "grad_norm": 0.18821455538272858, "learning_rate": 0.002, "loss": 2.3394, "step": 144500 }, { "epoch": 0.5586352460917567, "grad_norm": 0.14402391016483307, "learning_rate": 0.002, "loss": 2.3394, "step": 144510 }, { "epoch": 0.55867390329514, "grad_norm": 0.1065588966012001, "learning_rate": 0.002, "loss": 2.3491, "step": 144520 }, { "epoch": 0.5587125604985232, "grad_norm": 0.11623067408800125, "learning_rate": 0.002, "loss": 2.3486, "step": 144530 }, { "epoch": 0.5587512177019066, "grad_norm": 0.23236896097660065, "learning_rate": 0.002, "loss": 2.3721, "step": 144540 }, { "epoch": 0.5587898749052899, "grad_norm": 0.10903756320476532, "learning_rate": 0.002, "loss": 2.3692, "step": 144550 }, { "epoch": 0.5588285321086731, "grad_norm": 0.10570161044597626, "learning_rate": 0.002, "loss": 2.3491, "step": 144560 }, { "epoch": 0.5588671893120564, "grad_norm": 0.11697853356599808, "learning_rate": 0.002, "loss": 2.3429, "step": 144570 }, { "epoch": 0.5589058465154397, "grad_norm": 0.1024005338549614, "learning_rate": 0.002, "loss": 2.3435, "step": 144580 }, { "epoch": 0.558944503718823, "grad_norm": 0.10046295821666718, "learning_rate": 0.002, "loss": 2.3393, "step": 144590 }, { "epoch": 0.5589831609222062, "grad_norm": 0.10596208274364471, "learning_rate": 0.002, "loss": 2.3411, "step": 144600 }, { "epoch": 0.5590218181255895, "grad_norm": 0.10390448570251465, "learning_rate": 0.002, "loss": 2.3444, "step": 144610 }, { "epoch": 0.5590604753289729, "grad_norm": 0.09717035293579102, "learning_rate": 0.002, "loss": 2.3376, "step": 144620 }, { "epoch": 0.5590991325323561, "grad_norm": 0.12094360589981079, "learning_rate": 0.002, "loss": 2.3401, "step": 144630 }, { "epoch": 0.5591377897357394, "grad_norm": 0.09621819853782654, "learning_rate": 0.002, "loss": 2.3349, "step": 144640 }, { "epoch": 0.5591764469391226, "grad_norm": 0.12042798846960068, "learning_rate": 0.002, "loss": 2.3462, "step": 144650 }, { "epoch": 0.559215104142506, "grad_norm": 0.1034356951713562, "learning_rate": 0.002, "loss": 2.3402, "step": 144660 }, { "epoch": 0.5592537613458892, "grad_norm": 0.10839217156171799, "learning_rate": 0.002, "loss": 2.3437, "step": 144670 }, { "epoch": 0.5592924185492725, "grad_norm": 0.10241516679525375, "learning_rate": 0.002, "loss": 2.3433, "step": 144680 }, { "epoch": 0.5593310757526557, "grad_norm": 0.11839167773723602, "learning_rate": 0.002, "loss": 2.3448, "step": 144690 }, { "epoch": 0.559369732956039, "grad_norm": 0.1063704788684845, "learning_rate": 0.002, "loss": 2.3389, "step": 144700 }, { "epoch": 0.5594083901594223, "grad_norm": 0.12920372188091278, "learning_rate": 0.002, "loss": 2.3598, "step": 144710 }, { "epoch": 0.5594470473628056, "grad_norm": 0.1120523139834404, "learning_rate": 0.002, "loss": 2.3433, "step": 144720 }, { "epoch": 0.5594857045661888, "grad_norm": 0.10131628811359406, "learning_rate": 0.002, "loss": 2.324, "step": 144730 }, { "epoch": 0.5595243617695721, "grad_norm": 0.09710616618394852, "learning_rate": 0.002, "loss": 2.3481, "step": 144740 }, { "epoch": 0.5595630189729555, "grad_norm": 0.11539177596569061, "learning_rate": 0.002, "loss": 2.3473, "step": 144750 }, { "epoch": 0.5596016761763387, "grad_norm": 0.11322654783725739, "learning_rate": 0.002, "loss": 2.3466, "step": 144760 }, { "epoch": 0.559640333379722, "grad_norm": 0.09051068872213364, "learning_rate": 0.002, "loss": 2.3574, "step": 144770 }, { "epoch": 0.5596789905831052, "grad_norm": 0.12363360822200775, "learning_rate": 0.002, "loss": 2.3569, "step": 144780 }, { "epoch": 0.5597176477864886, "grad_norm": 0.09123220294713974, "learning_rate": 0.002, "loss": 2.3401, "step": 144790 }, { "epoch": 0.5597563049898718, "grad_norm": 0.10503943264484406, "learning_rate": 0.002, "loss": 2.3551, "step": 144800 }, { "epoch": 0.5597949621932551, "grad_norm": 0.11177346110343933, "learning_rate": 0.002, "loss": 2.3482, "step": 144810 }, { "epoch": 0.5598336193966383, "grad_norm": 0.0991305410861969, "learning_rate": 0.002, "loss": 2.3552, "step": 144820 }, { "epoch": 0.5598722766000217, "grad_norm": 0.09888456016778946, "learning_rate": 0.002, "loss": 2.3436, "step": 144830 }, { "epoch": 0.559910933803405, "grad_norm": 0.10640181601047516, "learning_rate": 0.002, "loss": 2.3566, "step": 144840 }, { "epoch": 0.5599495910067882, "grad_norm": 0.14159761369228363, "learning_rate": 0.002, "loss": 2.3484, "step": 144850 }, { "epoch": 0.5599882482101715, "grad_norm": 0.10540436953306198, "learning_rate": 0.002, "loss": 2.3465, "step": 144860 }, { "epoch": 0.5600269054135547, "grad_norm": 0.10895095765590668, "learning_rate": 0.002, "loss": 2.3355, "step": 144870 }, { "epoch": 0.5600655626169381, "grad_norm": 0.10320727527141571, "learning_rate": 0.002, "loss": 2.355, "step": 144880 }, { "epoch": 0.5601042198203213, "grad_norm": 0.1057295873761177, "learning_rate": 0.002, "loss": 2.3471, "step": 144890 }, { "epoch": 0.5601428770237046, "grad_norm": 0.11810392886400223, "learning_rate": 0.002, "loss": 2.3519, "step": 144900 }, { "epoch": 0.5601815342270878, "grad_norm": 0.09296156466007233, "learning_rate": 0.002, "loss": 2.3572, "step": 144910 }, { "epoch": 0.5602201914304712, "grad_norm": 0.09686148166656494, "learning_rate": 0.002, "loss": 2.34, "step": 144920 }, { "epoch": 0.5602588486338544, "grad_norm": 0.09118534624576569, "learning_rate": 0.002, "loss": 2.3365, "step": 144930 }, { "epoch": 0.5602975058372377, "grad_norm": 0.1013110876083374, "learning_rate": 0.002, "loss": 2.3391, "step": 144940 }, { "epoch": 0.560336163040621, "grad_norm": 0.12402082979679108, "learning_rate": 0.002, "loss": 2.3409, "step": 144950 }, { "epoch": 0.5603748202440043, "grad_norm": 0.10473762452602386, "learning_rate": 0.002, "loss": 2.3482, "step": 144960 }, { "epoch": 0.5604134774473876, "grad_norm": 0.12455492466688156, "learning_rate": 0.002, "loss": 2.3392, "step": 144970 }, { "epoch": 0.5604521346507708, "grad_norm": 0.10267633199691772, "learning_rate": 0.002, "loss": 2.3524, "step": 144980 }, { "epoch": 0.5604907918541541, "grad_norm": 0.12730388343334198, "learning_rate": 0.002, "loss": 2.3455, "step": 144990 }, { "epoch": 0.5605294490575374, "grad_norm": 0.08786030858755112, "learning_rate": 0.002, "loss": 2.3357, "step": 145000 }, { "epoch": 0.5605681062609207, "grad_norm": 0.11927718669176102, "learning_rate": 0.002, "loss": 2.3547, "step": 145010 }, { "epoch": 0.5606067634643039, "grad_norm": 0.1271049529314041, "learning_rate": 0.002, "loss": 2.3533, "step": 145020 }, { "epoch": 0.5606454206676872, "grad_norm": 0.10028399527072906, "learning_rate": 0.002, "loss": 2.3475, "step": 145030 }, { "epoch": 0.5606840778710704, "grad_norm": 0.20037201046943665, "learning_rate": 0.002, "loss": 2.3367, "step": 145040 }, { "epoch": 0.5607227350744538, "grad_norm": 0.10821410268545151, "learning_rate": 0.002, "loss": 2.3519, "step": 145050 }, { "epoch": 0.5607613922778371, "grad_norm": 0.11034255474805832, "learning_rate": 0.002, "loss": 2.3372, "step": 145060 }, { "epoch": 0.5608000494812203, "grad_norm": 0.10494337975978851, "learning_rate": 0.002, "loss": 2.348, "step": 145070 }, { "epoch": 0.5608387066846036, "grad_norm": 0.13450533151626587, "learning_rate": 0.002, "loss": 2.3512, "step": 145080 }, { "epoch": 0.5608773638879869, "grad_norm": 0.09030753374099731, "learning_rate": 0.002, "loss": 2.3492, "step": 145090 }, { "epoch": 0.5609160210913702, "grad_norm": 0.09489750862121582, "learning_rate": 0.002, "loss": 2.3522, "step": 145100 }, { "epoch": 0.5609546782947534, "grad_norm": 0.10595418512821198, "learning_rate": 0.002, "loss": 2.3269, "step": 145110 }, { "epoch": 0.5609933354981367, "grad_norm": 0.09282474219799042, "learning_rate": 0.002, "loss": 2.344, "step": 145120 }, { "epoch": 0.56103199270152, "grad_norm": 0.0995539128780365, "learning_rate": 0.002, "loss": 2.3509, "step": 145130 }, { "epoch": 0.5610706499049033, "grad_norm": 0.1267012655735016, "learning_rate": 0.002, "loss": 2.3551, "step": 145140 }, { "epoch": 0.5611093071082865, "grad_norm": 0.11552777886390686, "learning_rate": 0.002, "loss": 2.3528, "step": 145150 }, { "epoch": 0.5611479643116698, "grad_norm": 0.08912569284439087, "learning_rate": 0.002, "loss": 2.3365, "step": 145160 }, { "epoch": 0.5611866215150532, "grad_norm": 0.19666577875614166, "learning_rate": 0.002, "loss": 2.3532, "step": 145170 }, { "epoch": 0.5612252787184364, "grad_norm": 0.0973074808716774, "learning_rate": 0.002, "loss": 2.3516, "step": 145180 }, { "epoch": 0.5612639359218197, "grad_norm": 0.09786782413721085, "learning_rate": 0.002, "loss": 2.337, "step": 145190 }, { "epoch": 0.5613025931252029, "grad_norm": 0.10890211164951324, "learning_rate": 0.002, "loss": 2.3379, "step": 145200 }, { "epoch": 0.5613412503285863, "grad_norm": 0.10896290838718414, "learning_rate": 0.002, "loss": 2.3643, "step": 145210 }, { "epoch": 0.5613799075319695, "grad_norm": 0.10439836978912354, "learning_rate": 0.002, "loss": 2.3355, "step": 145220 }, { "epoch": 0.5614185647353528, "grad_norm": 0.10249201953411102, "learning_rate": 0.002, "loss": 2.361, "step": 145230 }, { "epoch": 0.561457221938736, "grad_norm": 0.09261146932840347, "learning_rate": 0.002, "loss": 2.3338, "step": 145240 }, { "epoch": 0.5614958791421193, "grad_norm": 0.10369481146335602, "learning_rate": 0.002, "loss": 2.3544, "step": 145250 }, { "epoch": 0.5615345363455027, "grad_norm": 0.12321964651346207, "learning_rate": 0.002, "loss": 2.3478, "step": 145260 }, { "epoch": 0.5615731935488859, "grad_norm": 0.10361948609352112, "learning_rate": 0.002, "loss": 2.345, "step": 145270 }, { "epoch": 0.5616118507522692, "grad_norm": 0.12577545642852783, "learning_rate": 0.002, "loss": 2.3396, "step": 145280 }, { "epoch": 0.5616505079556524, "grad_norm": 0.10888047516345978, "learning_rate": 0.002, "loss": 2.3485, "step": 145290 }, { "epoch": 0.5616891651590358, "grad_norm": 0.09915715456008911, "learning_rate": 0.002, "loss": 2.3502, "step": 145300 }, { "epoch": 0.561727822362419, "grad_norm": 0.11483432352542877, "learning_rate": 0.002, "loss": 2.3377, "step": 145310 }, { "epoch": 0.5617664795658023, "grad_norm": 0.10110090672969818, "learning_rate": 0.002, "loss": 2.3473, "step": 145320 }, { "epoch": 0.5618051367691855, "grad_norm": 0.11618124693632126, "learning_rate": 0.002, "loss": 2.343, "step": 145330 }, { "epoch": 0.5618437939725689, "grad_norm": 0.10940609872341156, "learning_rate": 0.002, "loss": 2.3375, "step": 145340 }, { "epoch": 0.5618824511759521, "grad_norm": 0.10506876558065414, "learning_rate": 0.002, "loss": 2.358, "step": 145350 }, { "epoch": 0.5619211083793354, "grad_norm": 0.1089290902018547, "learning_rate": 0.002, "loss": 2.3523, "step": 145360 }, { "epoch": 0.5619597655827187, "grad_norm": 0.1401052325963974, "learning_rate": 0.002, "loss": 2.3516, "step": 145370 }, { "epoch": 0.561998422786102, "grad_norm": 0.10837940871715546, "learning_rate": 0.002, "loss": 2.36, "step": 145380 }, { "epoch": 0.5620370799894853, "grad_norm": 0.1082291305065155, "learning_rate": 0.002, "loss": 2.3531, "step": 145390 }, { "epoch": 0.5620757371928685, "grad_norm": 0.09650030732154846, "learning_rate": 0.002, "loss": 2.3264, "step": 145400 }, { "epoch": 0.5621143943962518, "grad_norm": 0.13818258047103882, "learning_rate": 0.002, "loss": 2.3471, "step": 145410 }, { "epoch": 0.562153051599635, "grad_norm": 0.11335279047489166, "learning_rate": 0.002, "loss": 2.3354, "step": 145420 }, { "epoch": 0.5621917088030184, "grad_norm": 0.11053285002708435, "learning_rate": 0.002, "loss": 2.353, "step": 145430 }, { "epoch": 0.5622303660064016, "grad_norm": 0.10307853668928146, "learning_rate": 0.002, "loss": 2.3522, "step": 145440 }, { "epoch": 0.5622690232097849, "grad_norm": 0.11353618651628494, "learning_rate": 0.002, "loss": 2.3569, "step": 145450 }, { "epoch": 0.5623076804131681, "grad_norm": 0.11812679469585419, "learning_rate": 0.002, "loss": 2.3578, "step": 145460 }, { "epoch": 0.5623463376165515, "grad_norm": 0.09612657129764557, "learning_rate": 0.002, "loss": 2.3483, "step": 145470 }, { "epoch": 0.5623849948199348, "grad_norm": 0.10407382994890213, "learning_rate": 0.002, "loss": 2.3241, "step": 145480 }, { "epoch": 0.562423652023318, "grad_norm": 0.09928133338689804, "learning_rate": 0.002, "loss": 2.3391, "step": 145490 }, { "epoch": 0.5624623092267013, "grad_norm": 0.0923873707652092, "learning_rate": 0.002, "loss": 2.3481, "step": 145500 }, { "epoch": 0.5625009664300846, "grad_norm": 0.1048966720700264, "learning_rate": 0.002, "loss": 2.3505, "step": 145510 }, { "epoch": 0.5625396236334679, "grad_norm": 0.12021849304437637, "learning_rate": 0.002, "loss": 2.3436, "step": 145520 }, { "epoch": 0.5625782808368511, "grad_norm": 0.1024569571018219, "learning_rate": 0.002, "loss": 2.338, "step": 145530 }, { "epoch": 0.5626169380402344, "grad_norm": 0.10092726349830627, "learning_rate": 0.002, "loss": 2.3354, "step": 145540 }, { "epoch": 0.5626555952436177, "grad_norm": 0.10643120110034943, "learning_rate": 0.002, "loss": 2.331, "step": 145550 }, { "epoch": 0.562694252447001, "grad_norm": 0.11607813090085983, "learning_rate": 0.002, "loss": 2.3463, "step": 145560 }, { "epoch": 0.5627329096503842, "grad_norm": 0.10902582854032516, "learning_rate": 0.002, "loss": 2.3443, "step": 145570 }, { "epoch": 0.5627715668537675, "grad_norm": 0.09896028786897659, "learning_rate": 0.002, "loss": 2.3378, "step": 145580 }, { "epoch": 0.5628102240571508, "grad_norm": 0.0961156114935875, "learning_rate": 0.002, "loss": 2.3589, "step": 145590 }, { "epoch": 0.5628488812605341, "grad_norm": 0.11849000304937363, "learning_rate": 0.002, "loss": 2.3561, "step": 145600 }, { "epoch": 0.5628875384639174, "grad_norm": 0.0874343141913414, "learning_rate": 0.002, "loss": 2.3458, "step": 145610 }, { "epoch": 0.5629261956673006, "grad_norm": 0.13053591549396515, "learning_rate": 0.002, "loss": 2.3598, "step": 145620 }, { "epoch": 0.5629648528706839, "grad_norm": 0.09980365633964539, "learning_rate": 0.002, "loss": 2.3421, "step": 145630 }, { "epoch": 0.5630035100740672, "grad_norm": 0.09870453178882599, "learning_rate": 0.002, "loss": 2.342, "step": 145640 }, { "epoch": 0.5630421672774505, "grad_norm": 0.10686399042606354, "learning_rate": 0.002, "loss": 2.355, "step": 145650 }, { "epoch": 0.5630808244808337, "grad_norm": 0.10777558386325836, "learning_rate": 0.002, "loss": 2.3573, "step": 145660 }, { "epoch": 0.563119481684217, "grad_norm": 0.1021328940987587, "learning_rate": 0.002, "loss": 2.3323, "step": 145670 }, { "epoch": 0.5631581388876004, "grad_norm": 0.10991507768630981, "learning_rate": 0.002, "loss": 2.3429, "step": 145680 }, { "epoch": 0.5631967960909836, "grad_norm": 0.09759745746850967, "learning_rate": 0.002, "loss": 2.3334, "step": 145690 }, { "epoch": 0.5632354532943669, "grad_norm": 0.09556913375854492, "learning_rate": 0.002, "loss": 2.3383, "step": 145700 }, { "epoch": 0.5632741104977501, "grad_norm": 0.11235988140106201, "learning_rate": 0.002, "loss": 2.3493, "step": 145710 }, { "epoch": 0.5633127677011335, "grad_norm": 0.09124507009983063, "learning_rate": 0.002, "loss": 2.3503, "step": 145720 }, { "epoch": 0.5633514249045167, "grad_norm": 0.10627923160791397, "learning_rate": 0.002, "loss": 2.3436, "step": 145730 }, { "epoch": 0.5633900821079, "grad_norm": 0.10545750707387924, "learning_rate": 0.002, "loss": 2.3508, "step": 145740 }, { "epoch": 0.5634287393112832, "grad_norm": 0.09357081353664398, "learning_rate": 0.002, "loss": 2.3408, "step": 145750 }, { "epoch": 0.5634673965146666, "grad_norm": 0.11138079315423965, "learning_rate": 0.002, "loss": 2.3517, "step": 145760 }, { "epoch": 0.5635060537180498, "grad_norm": 0.09826505184173584, "learning_rate": 0.002, "loss": 2.3278, "step": 145770 }, { "epoch": 0.5635447109214331, "grad_norm": 0.10549304634332657, "learning_rate": 0.002, "loss": 2.3634, "step": 145780 }, { "epoch": 0.5635833681248164, "grad_norm": 0.10531755536794662, "learning_rate": 0.002, "loss": 2.346, "step": 145790 }, { "epoch": 0.5636220253281996, "grad_norm": 0.1031210795044899, "learning_rate": 0.002, "loss": 2.3385, "step": 145800 }, { "epoch": 0.563660682531583, "grad_norm": 0.11040626466274261, "learning_rate": 0.002, "loss": 2.3458, "step": 145810 }, { "epoch": 0.5636993397349662, "grad_norm": 0.1013822853565216, "learning_rate": 0.002, "loss": 2.3552, "step": 145820 }, { "epoch": 0.5637379969383495, "grad_norm": 0.11882764846086502, "learning_rate": 0.002, "loss": 2.3408, "step": 145830 }, { "epoch": 0.5637766541417327, "grad_norm": 0.09364783018827438, "learning_rate": 0.002, "loss": 2.3254, "step": 145840 }, { "epoch": 0.5638153113451161, "grad_norm": 0.10282673686742783, "learning_rate": 0.002, "loss": 2.3515, "step": 145850 }, { "epoch": 0.5638539685484993, "grad_norm": 0.11586014181375504, "learning_rate": 0.002, "loss": 2.3356, "step": 145860 }, { "epoch": 0.5638926257518826, "grad_norm": 0.10484866052865982, "learning_rate": 0.002, "loss": 2.3436, "step": 145870 }, { "epoch": 0.5639312829552658, "grad_norm": 0.1112280786037445, "learning_rate": 0.002, "loss": 2.3479, "step": 145880 }, { "epoch": 0.5639699401586492, "grad_norm": 0.0995175763964653, "learning_rate": 0.002, "loss": 2.3511, "step": 145890 }, { "epoch": 0.5640085973620325, "grad_norm": 0.09986388683319092, "learning_rate": 0.002, "loss": 2.3472, "step": 145900 }, { "epoch": 0.5640472545654157, "grad_norm": 0.10015115886926651, "learning_rate": 0.002, "loss": 2.3286, "step": 145910 }, { "epoch": 0.564085911768799, "grad_norm": 0.10942229628562927, "learning_rate": 0.002, "loss": 2.3461, "step": 145920 }, { "epoch": 0.5641245689721823, "grad_norm": 0.10118841379880905, "learning_rate": 0.002, "loss": 2.344, "step": 145930 }, { "epoch": 0.5641632261755656, "grad_norm": 0.09918641299009323, "learning_rate": 0.002, "loss": 2.3557, "step": 145940 }, { "epoch": 0.5642018833789488, "grad_norm": 0.10887645184993744, "learning_rate": 0.002, "loss": 2.3471, "step": 145950 }, { "epoch": 0.5642405405823321, "grad_norm": 0.12971638143062592, "learning_rate": 0.002, "loss": 2.3573, "step": 145960 }, { "epoch": 0.5642791977857153, "grad_norm": 0.10035094618797302, "learning_rate": 0.002, "loss": 2.3408, "step": 145970 }, { "epoch": 0.5643178549890987, "grad_norm": 0.13477998971939087, "learning_rate": 0.002, "loss": 2.3362, "step": 145980 }, { "epoch": 0.564356512192482, "grad_norm": 0.10733260959386826, "learning_rate": 0.002, "loss": 2.3454, "step": 145990 }, { "epoch": 0.5643951693958652, "grad_norm": 0.11192618310451508, "learning_rate": 0.002, "loss": 2.3401, "step": 146000 }, { "epoch": 0.5644338265992485, "grad_norm": 0.12049674242734909, "learning_rate": 0.002, "loss": 2.3467, "step": 146010 }, { "epoch": 0.5644724838026318, "grad_norm": 0.10427054762840271, "learning_rate": 0.002, "loss": 2.3167, "step": 146020 }, { "epoch": 0.5645111410060151, "grad_norm": 0.11584754288196564, "learning_rate": 0.002, "loss": 2.3485, "step": 146030 }, { "epoch": 0.5645497982093983, "grad_norm": 0.1015305295586586, "learning_rate": 0.002, "loss": 2.3444, "step": 146040 }, { "epoch": 0.5645884554127816, "grad_norm": 0.11458880454301834, "learning_rate": 0.002, "loss": 2.3564, "step": 146050 }, { "epoch": 0.5646271126161649, "grad_norm": 0.11445124447345734, "learning_rate": 0.002, "loss": 2.3541, "step": 146060 }, { "epoch": 0.5646657698195482, "grad_norm": 0.10447277128696442, "learning_rate": 0.002, "loss": 2.3499, "step": 146070 }, { "epoch": 0.5647044270229314, "grad_norm": 0.09257570654153824, "learning_rate": 0.002, "loss": 2.347, "step": 146080 }, { "epoch": 0.5647430842263147, "grad_norm": 0.1130877286195755, "learning_rate": 0.002, "loss": 2.3325, "step": 146090 }, { "epoch": 0.5647817414296981, "grad_norm": 0.11452198773622513, "learning_rate": 0.002, "loss": 2.3476, "step": 146100 }, { "epoch": 0.5648203986330813, "grad_norm": 0.09652146697044373, "learning_rate": 0.002, "loss": 2.343, "step": 146110 }, { "epoch": 0.5648590558364646, "grad_norm": 0.1043148934841156, "learning_rate": 0.002, "loss": 2.3415, "step": 146120 }, { "epoch": 0.5648977130398478, "grad_norm": 0.133561372756958, "learning_rate": 0.002, "loss": 2.327, "step": 146130 }, { "epoch": 0.5649363702432312, "grad_norm": 0.10625911504030228, "learning_rate": 0.002, "loss": 2.3323, "step": 146140 }, { "epoch": 0.5649750274466144, "grad_norm": 0.10919652879238129, "learning_rate": 0.002, "loss": 2.3451, "step": 146150 }, { "epoch": 0.5650136846499977, "grad_norm": 0.12570203840732574, "learning_rate": 0.002, "loss": 2.3423, "step": 146160 }, { "epoch": 0.5650523418533809, "grad_norm": 0.114006906747818, "learning_rate": 0.002, "loss": 2.3515, "step": 146170 }, { "epoch": 0.5650909990567642, "grad_norm": 0.10396946966648102, "learning_rate": 0.002, "loss": 2.3498, "step": 146180 }, { "epoch": 0.5651296562601476, "grad_norm": 0.11225318908691406, "learning_rate": 0.002, "loss": 2.3461, "step": 146190 }, { "epoch": 0.5651683134635308, "grad_norm": 0.0972057357430458, "learning_rate": 0.002, "loss": 2.3404, "step": 146200 }, { "epoch": 0.565206970666914, "grad_norm": 0.09723258018493652, "learning_rate": 0.002, "loss": 2.3388, "step": 146210 }, { "epoch": 0.5652456278702973, "grad_norm": 0.12347773462533951, "learning_rate": 0.002, "loss": 2.3454, "step": 146220 }, { "epoch": 0.5652842850736807, "grad_norm": 0.10941901057958603, "learning_rate": 0.002, "loss": 2.3452, "step": 146230 }, { "epoch": 0.5653229422770639, "grad_norm": 0.11385990679264069, "learning_rate": 0.002, "loss": 2.3303, "step": 146240 }, { "epoch": 0.5653615994804472, "grad_norm": 0.09415145963430405, "learning_rate": 0.002, "loss": 2.3635, "step": 146250 }, { "epoch": 0.5654002566838304, "grad_norm": 0.0994042232632637, "learning_rate": 0.002, "loss": 2.3494, "step": 146260 }, { "epoch": 0.5654389138872138, "grad_norm": 0.098891481757164, "learning_rate": 0.002, "loss": 2.3465, "step": 146270 }, { "epoch": 0.565477571090597, "grad_norm": 0.11153262108564377, "learning_rate": 0.002, "loss": 2.3554, "step": 146280 }, { "epoch": 0.5655162282939803, "grad_norm": 0.11282942444086075, "learning_rate": 0.002, "loss": 2.355, "step": 146290 }, { "epoch": 0.5655548854973635, "grad_norm": 0.11050242185592651, "learning_rate": 0.002, "loss": 2.3505, "step": 146300 }, { "epoch": 0.5655935427007469, "grad_norm": 0.10159312933683395, "learning_rate": 0.002, "loss": 2.3393, "step": 146310 }, { "epoch": 0.5656321999041302, "grad_norm": 0.12493990361690521, "learning_rate": 0.002, "loss": 2.3368, "step": 146320 }, { "epoch": 0.5656708571075134, "grad_norm": 0.10932406783103943, "learning_rate": 0.002, "loss": 2.3618, "step": 146330 }, { "epoch": 0.5657095143108967, "grad_norm": 0.10044391453266144, "learning_rate": 0.002, "loss": 2.3475, "step": 146340 }, { "epoch": 0.5657481715142799, "grad_norm": 0.1306449919939041, "learning_rate": 0.002, "loss": 2.3475, "step": 146350 }, { "epoch": 0.5657868287176633, "grad_norm": 0.100794717669487, "learning_rate": 0.002, "loss": 2.3432, "step": 146360 }, { "epoch": 0.5658254859210465, "grad_norm": 0.11264822632074356, "learning_rate": 0.002, "loss": 2.3444, "step": 146370 }, { "epoch": 0.5658641431244298, "grad_norm": 0.100074902176857, "learning_rate": 0.002, "loss": 2.3606, "step": 146380 }, { "epoch": 0.565902800327813, "grad_norm": 0.10754478722810745, "learning_rate": 0.002, "loss": 2.3354, "step": 146390 }, { "epoch": 0.5659414575311964, "grad_norm": 0.14100757241249084, "learning_rate": 0.002, "loss": 2.3425, "step": 146400 }, { "epoch": 0.5659801147345797, "grad_norm": 0.1034492775797844, "learning_rate": 0.002, "loss": 2.363, "step": 146410 }, { "epoch": 0.5660187719379629, "grad_norm": 0.09591642022132874, "learning_rate": 0.002, "loss": 2.3551, "step": 146420 }, { "epoch": 0.5660574291413462, "grad_norm": 0.09747200459241867, "learning_rate": 0.002, "loss": 2.3482, "step": 146430 }, { "epoch": 0.5660960863447295, "grad_norm": 0.11737639456987381, "learning_rate": 0.002, "loss": 2.3432, "step": 146440 }, { "epoch": 0.5661347435481128, "grad_norm": 0.10552874207496643, "learning_rate": 0.002, "loss": 2.3388, "step": 146450 }, { "epoch": 0.566173400751496, "grad_norm": 0.10250578820705414, "learning_rate": 0.002, "loss": 2.3502, "step": 146460 }, { "epoch": 0.5662120579548793, "grad_norm": 0.10936874151229858, "learning_rate": 0.002, "loss": 2.3577, "step": 146470 }, { "epoch": 0.5662507151582626, "grad_norm": 0.10178228467702866, "learning_rate": 0.002, "loss": 2.3351, "step": 146480 }, { "epoch": 0.5662893723616459, "grad_norm": 0.11337237805128098, "learning_rate": 0.002, "loss": 2.3776, "step": 146490 }, { "epoch": 0.5663280295650291, "grad_norm": 0.13309693336486816, "learning_rate": 0.002, "loss": 2.3553, "step": 146500 }, { "epoch": 0.5663666867684124, "grad_norm": 0.10486093163490295, "learning_rate": 0.002, "loss": 2.3407, "step": 146510 }, { "epoch": 0.5664053439717956, "grad_norm": 0.09396681189537048, "learning_rate": 0.002, "loss": 2.341, "step": 146520 }, { "epoch": 0.566444001175179, "grad_norm": 0.11196771264076233, "learning_rate": 0.002, "loss": 2.3532, "step": 146530 }, { "epoch": 0.5664826583785623, "grad_norm": 0.09456878155469894, "learning_rate": 0.002, "loss": 2.3425, "step": 146540 }, { "epoch": 0.5665213155819455, "grad_norm": 0.11470114439725876, "learning_rate": 0.002, "loss": 2.3435, "step": 146550 }, { "epoch": 0.5665599727853288, "grad_norm": 0.1259901076555252, "learning_rate": 0.002, "loss": 2.3567, "step": 146560 }, { "epoch": 0.5665986299887121, "grad_norm": 0.11010333895683289, "learning_rate": 0.002, "loss": 2.3448, "step": 146570 }, { "epoch": 0.5666372871920954, "grad_norm": 0.09986169636249542, "learning_rate": 0.002, "loss": 2.3453, "step": 146580 }, { "epoch": 0.5666759443954786, "grad_norm": 0.11977176368236542, "learning_rate": 0.002, "loss": 2.3573, "step": 146590 }, { "epoch": 0.5667146015988619, "grad_norm": 0.10560222715139389, "learning_rate": 0.002, "loss": 2.3489, "step": 146600 }, { "epoch": 0.5667532588022453, "grad_norm": 0.11105663329362869, "learning_rate": 0.002, "loss": 2.3294, "step": 146610 }, { "epoch": 0.5667919160056285, "grad_norm": 0.11333081126213074, "learning_rate": 0.002, "loss": 2.3446, "step": 146620 }, { "epoch": 0.5668305732090118, "grad_norm": 0.10974690318107605, "learning_rate": 0.002, "loss": 2.3421, "step": 146630 }, { "epoch": 0.566869230412395, "grad_norm": 0.10348519682884216, "learning_rate": 0.002, "loss": 2.3418, "step": 146640 }, { "epoch": 0.5669078876157784, "grad_norm": 0.09863714873790741, "learning_rate": 0.002, "loss": 2.344, "step": 146650 }, { "epoch": 0.5669465448191616, "grad_norm": 0.12337260693311691, "learning_rate": 0.002, "loss": 2.3258, "step": 146660 }, { "epoch": 0.5669852020225449, "grad_norm": 0.11037690192461014, "learning_rate": 0.002, "loss": 2.3487, "step": 146670 }, { "epoch": 0.5670238592259281, "grad_norm": 0.11719117313623428, "learning_rate": 0.002, "loss": 2.3511, "step": 146680 }, { "epoch": 0.5670625164293115, "grad_norm": 0.10750693827867508, "learning_rate": 0.002, "loss": 2.339, "step": 146690 }, { "epoch": 0.5671011736326947, "grad_norm": 0.10234736651182175, "learning_rate": 0.002, "loss": 2.3485, "step": 146700 }, { "epoch": 0.567139830836078, "grad_norm": 0.1213693618774414, "learning_rate": 0.002, "loss": 2.3508, "step": 146710 }, { "epoch": 0.5671784880394612, "grad_norm": 0.11592577397823334, "learning_rate": 0.002, "loss": 2.323, "step": 146720 }, { "epoch": 0.5672171452428445, "grad_norm": 0.11350953578948975, "learning_rate": 0.002, "loss": 2.3438, "step": 146730 }, { "epoch": 0.5672558024462279, "grad_norm": 0.10382720828056335, "learning_rate": 0.002, "loss": 2.3402, "step": 146740 }, { "epoch": 0.5672944596496111, "grad_norm": 0.13582183420658112, "learning_rate": 0.002, "loss": 2.3413, "step": 146750 }, { "epoch": 0.5673331168529944, "grad_norm": 0.10660482197999954, "learning_rate": 0.002, "loss": 2.3495, "step": 146760 }, { "epoch": 0.5673717740563776, "grad_norm": 0.10936098545789719, "learning_rate": 0.002, "loss": 2.3585, "step": 146770 }, { "epoch": 0.567410431259761, "grad_norm": 0.15151578187942505, "learning_rate": 0.002, "loss": 2.3534, "step": 146780 }, { "epoch": 0.5674490884631442, "grad_norm": 0.11021112650632858, "learning_rate": 0.002, "loss": 2.3424, "step": 146790 }, { "epoch": 0.5674877456665275, "grad_norm": 0.10559472441673279, "learning_rate": 0.002, "loss": 2.3534, "step": 146800 }, { "epoch": 0.5675264028699107, "grad_norm": 0.11798600107431412, "learning_rate": 0.002, "loss": 2.3372, "step": 146810 }, { "epoch": 0.5675650600732941, "grad_norm": 0.10911992937326431, "learning_rate": 0.002, "loss": 2.3377, "step": 146820 }, { "epoch": 0.5676037172766774, "grad_norm": 0.1071673184633255, "learning_rate": 0.002, "loss": 2.3439, "step": 146830 }, { "epoch": 0.5676423744800606, "grad_norm": 0.11145402491092682, "learning_rate": 0.002, "loss": 2.3504, "step": 146840 }, { "epoch": 0.5676810316834439, "grad_norm": 0.10432492196559906, "learning_rate": 0.002, "loss": 2.34, "step": 146850 }, { "epoch": 0.5677196888868272, "grad_norm": 0.13461947441101074, "learning_rate": 0.002, "loss": 2.3562, "step": 146860 }, { "epoch": 0.5677583460902105, "grad_norm": 0.10047348588705063, "learning_rate": 0.002, "loss": 2.343, "step": 146870 }, { "epoch": 0.5677970032935937, "grad_norm": 0.11872837692499161, "learning_rate": 0.002, "loss": 2.3395, "step": 146880 }, { "epoch": 0.567835660496977, "grad_norm": 0.13711833953857422, "learning_rate": 0.002, "loss": 2.3544, "step": 146890 }, { "epoch": 0.5678743177003602, "grad_norm": 0.12299994379281998, "learning_rate": 0.002, "loss": 2.3325, "step": 146900 }, { "epoch": 0.5679129749037436, "grad_norm": 0.09880630671977997, "learning_rate": 0.002, "loss": 2.3546, "step": 146910 }, { "epoch": 0.5679516321071268, "grad_norm": 0.11891564726829529, "learning_rate": 0.002, "loss": 2.3448, "step": 146920 }, { "epoch": 0.5679902893105101, "grad_norm": 0.1166873499751091, "learning_rate": 0.002, "loss": 2.3503, "step": 146930 }, { "epoch": 0.5680289465138934, "grad_norm": 0.09265412390232086, "learning_rate": 0.002, "loss": 2.3522, "step": 146940 }, { "epoch": 0.5680676037172767, "grad_norm": 0.10010464489459991, "learning_rate": 0.002, "loss": 2.3412, "step": 146950 }, { "epoch": 0.56810626092066, "grad_norm": 0.10351064801216125, "learning_rate": 0.002, "loss": 2.3547, "step": 146960 }, { "epoch": 0.5681449181240432, "grad_norm": 0.1153855249285698, "learning_rate": 0.002, "loss": 2.3522, "step": 146970 }, { "epoch": 0.5681835753274265, "grad_norm": 0.14736634492874146, "learning_rate": 0.002, "loss": 2.3384, "step": 146980 }, { "epoch": 0.5682222325308098, "grad_norm": 0.1133892834186554, "learning_rate": 0.002, "loss": 2.3432, "step": 146990 }, { "epoch": 0.5682608897341931, "grad_norm": 0.1024647057056427, "learning_rate": 0.002, "loss": 2.3512, "step": 147000 }, { "epoch": 0.5682995469375763, "grad_norm": 0.10568360984325409, "learning_rate": 0.002, "loss": 2.3416, "step": 147010 }, { "epoch": 0.5683382041409596, "grad_norm": 0.10185234993696213, "learning_rate": 0.002, "loss": 2.3532, "step": 147020 }, { "epoch": 0.568376861344343, "grad_norm": 0.10574902594089508, "learning_rate": 0.002, "loss": 2.3495, "step": 147030 }, { "epoch": 0.5684155185477262, "grad_norm": 0.098385289311409, "learning_rate": 0.002, "loss": 2.3351, "step": 147040 }, { "epoch": 0.5684541757511095, "grad_norm": 0.10299337655305862, "learning_rate": 0.002, "loss": 2.3383, "step": 147050 }, { "epoch": 0.5684928329544927, "grad_norm": 0.10877186805009842, "learning_rate": 0.002, "loss": 2.345, "step": 147060 }, { "epoch": 0.5685314901578761, "grad_norm": 0.13419096171855927, "learning_rate": 0.002, "loss": 2.3453, "step": 147070 }, { "epoch": 0.5685701473612593, "grad_norm": 0.10985522717237473, "learning_rate": 0.002, "loss": 2.3508, "step": 147080 }, { "epoch": 0.5686088045646426, "grad_norm": 0.12040858715772629, "learning_rate": 0.002, "loss": 2.3422, "step": 147090 }, { "epoch": 0.5686474617680258, "grad_norm": 0.12113548815250397, "learning_rate": 0.002, "loss": 2.3352, "step": 147100 }, { "epoch": 0.5686861189714091, "grad_norm": 0.09761956334114075, "learning_rate": 0.002, "loss": 2.3396, "step": 147110 }, { "epoch": 0.5687247761747924, "grad_norm": 0.09393589943647385, "learning_rate": 0.002, "loss": 2.3361, "step": 147120 }, { "epoch": 0.5687634333781757, "grad_norm": 0.10053534805774689, "learning_rate": 0.002, "loss": 2.3695, "step": 147130 }, { "epoch": 0.568802090581559, "grad_norm": 0.10242559760808945, "learning_rate": 0.002, "loss": 2.3518, "step": 147140 }, { "epoch": 0.5688407477849422, "grad_norm": 0.2468280792236328, "learning_rate": 0.002, "loss": 2.3345, "step": 147150 }, { "epoch": 0.5688794049883256, "grad_norm": 0.09995192289352417, "learning_rate": 0.002, "loss": 2.3421, "step": 147160 }, { "epoch": 0.5689180621917088, "grad_norm": 0.11786067485809326, "learning_rate": 0.002, "loss": 2.3431, "step": 147170 }, { "epoch": 0.5689567193950921, "grad_norm": 0.10817831009626389, "learning_rate": 0.002, "loss": 2.3491, "step": 147180 }, { "epoch": 0.5689953765984753, "grad_norm": 0.10672974586486816, "learning_rate": 0.002, "loss": 2.3465, "step": 147190 }, { "epoch": 0.5690340338018587, "grad_norm": 0.1374005824327469, "learning_rate": 0.002, "loss": 2.3618, "step": 147200 }, { "epoch": 0.5690726910052419, "grad_norm": 0.0980212464928627, "learning_rate": 0.002, "loss": 2.3421, "step": 147210 }, { "epoch": 0.5691113482086252, "grad_norm": 0.10079023987054825, "learning_rate": 0.002, "loss": 2.3475, "step": 147220 }, { "epoch": 0.5691500054120084, "grad_norm": 0.1042499914765358, "learning_rate": 0.002, "loss": 2.3442, "step": 147230 }, { "epoch": 0.5691886626153918, "grad_norm": 0.11340388655662537, "learning_rate": 0.002, "loss": 2.362, "step": 147240 }, { "epoch": 0.5692273198187751, "grad_norm": 0.09811675548553467, "learning_rate": 0.002, "loss": 2.3442, "step": 147250 }, { "epoch": 0.5692659770221583, "grad_norm": 0.10510072857141495, "learning_rate": 0.002, "loss": 2.3426, "step": 147260 }, { "epoch": 0.5693046342255416, "grad_norm": 0.11670279502868652, "learning_rate": 0.002, "loss": 2.3375, "step": 147270 }, { "epoch": 0.5693432914289248, "grad_norm": 0.1188029870390892, "learning_rate": 0.002, "loss": 2.3451, "step": 147280 }, { "epoch": 0.5693819486323082, "grad_norm": 0.10574861615896225, "learning_rate": 0.002, "loss": 2.3504, "step": 147290 }, { "epoch": 0.5694206058356914, "grad_norm": 0.11340762674808502, "learning_rate": 0.002, "loss": 2.3332, "step": 147300 }, { "epoch": 0.5694592630390747, "grad_norm": 0.09961926937103271, "learning_rate": 0.002, "loss": 2.3405, "step": 147310 }, { "epoch": 0.5694979202424579, "grad_norm": 0.12730400264263153, "learning_rate": 0.002, "loss": 2.3587, "step": 147320 }, { "epoch": 0.5695365774458413, "grad_norm": 0.1308182179927826, "learning_rate": 0.002, "loss": 2.3515, "step": 147330 }, { "epoch": 0.5695752346492245, "grad_norm": 0.10637819021940231, "learning_rate": 0.002, "loss": 2.3403, "step": 147340 }, { "epoch": 0.5696138918526078, "grad_norm": 0.1019187867641449, "learning_rate": 0.002, "loss": 2.3387, "step": 147350 }, { "epoch": 0.569652549055991, "grad_norm": 0.10890976339578629, "learning_rate": 0.002, "loss": 2.3486, "step": 147360 }, { "epoch": 0.5696912062593744, "grad_norm": 0.10864552855491638, "learning_rate": 0.002, "loss": 2.3305, "step": 147370 }, { "epoch": 0.5697298634627577, "grad_norm": 0.10701534152030945, "learning_rate": 0.002, "loss": 2.3427, "step": 147380 }, { "epoch": 0.5697685206661409, "grad_norm": 0.09928543865680695, "learning_rate": 0.002, "loss": 2.3317, "step": 147390 }, { "epoch": 0.5698071778695242, "grad_norm": 0.11758770048618317, "learning_rate": 0.002, "loss": 2.3221, "step": 147400 }, { "epoch": 0.5698458350729075, "grad_norm": 0.10023277997970581, "learning_rate": 0.002, "loss": 2.3322, "step": 147410 }, { "epoch": 0.5698844922762908, "grad_norm": 0.11018215864896774, "learning_rate": 0.002, "loss": 2.3428, "step": 147420 }, { "epoch": 0.569923149479674, "grad_norm": 0.10372017323970795, "learning_rate": 0.002, "loss": 2.3551, "step": 147430 }, { "epoch": 0.5699618066830573, "grad_norm": 0.0958276018500328, "learning_rate": 0.002, "loss": 2.3384, "step": 147440 }, { "epoch": 0.5700004638864405, "grad_norm": 0.11901126801967621, "learning_rate": 0.002, "loss": 2.3512, "step": 147450 }, { "epoch": 0.5700391210898239, "grad_norm": 0.11974016577005386, "learning_rate": 0.002, "loss": 2.3383, "step": 147460 }, { "epoch": 0.5700777782932072, "grad_norm": 0.11292359977960587, "learning_rate": 0.002, "loss": 2.3453, "step": 147470 }, { "epoch": 0.5701164354965904, "grad_norm": 0.10017193853855133, "learning_rate": 0.002, "loss": 2.346, "step": 147480 }, { "epoch": 0.5701550926999737, "grad_norm": 0.10776569694280624, "learning_rate": 0.002, "loss": 2.3451, "step": 147490 }, { "epoch": 0.570193749903357, "grad_norm": 0.11120834201574326, "learning_rate": 0.002, "loss": 2.3542, "step": 147500 }, { "epoch": 0.5702324071067403, "grad_norm": 0.1190958097577095, "learning_rate": 0.002, "loss": 2.346, "step": 147510 }, { "epoch": 0.5702710643101235, "grad_norm": 0.09756813198328018, "learning_rate": 0.002, "loss": 2.3644, "step": 147520 }, { "epoch": 0.5703097215135068, "grad_norm": 0.11825324594974518, "learning_rate": 0.002, "loss": 2.3456, "step": 147530 }, { "epoch": 0.5703483787168901, "grad_norm": 0.1010400578379631, "learning_rate": 0.002, "loss": 2.3379, "step": 147540 }, { "epoch": 0.5703870359202734, "grad_norm": 0.09621983021497726, "learning_rate": 0.002, "loss": 2.337, "step": 147550 }, { "epoch": 0.5704256931236567, "grad_norm": 0.11062465608119965, "learning_rate": 0.002, "loss": 2.3518, "step": 147560 }, { "epoch": 0.5704643503270399, "grad_norm": 0.11987130343914032, "learning_rate": 0.002, "loss": 2.3463, "step": 147570 }, { "epoch": 0.5705030075304233, "grad_norm": 0.11454615741968155, "learning_rate": 0.002, "loss": 2.3388, "step": 147580 }, { "epoch": 0.5705416647338065, "grad_norm": 0.1135939434170723, "learning_rate": 0.002, "loss": 2.3459, "step": 147590 }, { "epoch": 0.5705803219371898, "grad_norm": 0.11390845477581024, "learning_rate": 0.002, "loss": 2.3368, "step": 147600 }, { "epoch": 0.570618979140573, "grad_norm": 0.12084183841943741, "learning_rate": 0.002, "loss": 2.34, "step": 147610 }, { "epoch": 0.5706576363439564, "grad_norm": 0.09719133377075195, "learning_rate": 0.002, "loss": 2.3453, "step": 147620 }, { "epoch": 0.5706962935473396, "grad_norm": 0.10637233406305313, "learning_rate": 0.002, "loss": 2.3333, "step": 147630 }, { "epoch": 0.5707349507507229, "grad_norm": 0.10290329903364182, "learning_rate": 0.002, "loss": 2.3335, "step": 147640 }, { "epoch": 0.5707736079541061, "grad_norm": 0.1075134128332138, "learning_rate": 0.002, "loss": 2.368, "step": 147650 }, { "epoch": 0.5708122651574894, "grad_norm": 0.10845005512237549, "learning_rate": 0.002, "loss": 2.3676, "step": 147660 }, { "epoch": 0.5708509223608728, "grad_norm": 0.14120826125144958, "learning_rate": 0.002, "loss": 2.3368, "step": 147670 }, { "epoch": 0.570889579564256, "grad_norm": 0.11323338747024536, "learning_rate": 0.002, "loss": 2.3391, "step": 147680 }, { "epoch": 0.5709282367676393, "grad_norm": 0.0938161090016365, "learning_rate": 0.002, "loss": 2.3408, "step": 147690 }, { "epoch": 0.5709668939710225, "grad_norm": 0.09869852662086487, "learning_rate": 0.002, "loss": 2.3509, "step": 147700 }, { "epoch": 0.5710055511744059, "grad_norm": 0.12468905746936798, "learning_rate": 0.002, "loss": 2.3589, "step": 147710 }, { "epoch": 0.5710442083777891, "grad_norm": 0.10178691893815994, "learning_rate": 0.002, "loss": 2.3518, "step": 147720 }, { "epoch": 0.5710828655811724, "grad_norm": 0.1340477019548416, "learning_rate": 0.002, "loss": 2.3499, "step": 147730 }, { "epoch": 0.5711215227845556, "grad_norm": 0.11438913643360138, "learning_rate": 0.002, "loss": 2.336, "step": 147740 }, { "epoch": 0.571160179987939, "grad_norm": 0.107391357421875, "learning_rate": 0.002, "loss": 2.3506, "step": 147750 }, { "epoch": 0.5711988371913223, "grad_norm": 0.11345870047807693, "learning_rate": 0.002, "loss": 2.339, "step": 147760 }, { "epoch": 0.5712374943947055, "grad_norm": 0.10152135789394379, "learning_rate": 0.002, "loss": 2.3419, "step": 147770 }, { "epoch": 0.5712761515980888, "grad_norm": 0.11594166606664658, "learning_rate": 0.002, "loss": 2.345, "step": 147780 }, { "epoch": 0.5713148088014721, "grad_norm": 0.11304232478141785, "learning_rate": 0.002, "loss": 2.3465, "step": 147790 }, { "epoch": 0.5713534660048554, "grad_norm": 0.11002659797668457, "learning_rate": 0.002, "loss": 2.3507, "step": 147800 }, { "epoch": 0.5713921232082386, "grad_norm": 0.0949237123131752, "learning_rate": 0.002, "loss": 2.3471, "step": 147810 }, { "epoch": 0.5714307804116219, "grad_norm": 0.13494773209095, "learning_rate": 0.002, "loss": 2.3379, "step": 147820 }, { "epoch": 0.5714694376150051, "grad_norm": 0.10871291905641556, "learning_rate": 0.002, "loss": 2.3466, "step": 147830 }, { "epoch": 0.5715080948183885, "grad_norm": 0.11340344697237015, "learning_rate": 0.002, "loss": 2.3252, "step": 147840 }, { "epoch": 0.5715467520217717, "grad_norm": 0.09544314444065094, "learning_rate": 0.002, "loss": 2.3408, "step": 147850 }, { "epoch": 0.571585409225155, "grad_norm": 0.10063931345939636, "learning_rate": 0.002, "loss": 2.343, "step": 147860 }, { "epoch": 0.5716240664285382, "grad_norm": 0.1033964529633522, "learning_rate": 0.002, "loss": 2.3418, "step": 147870 }, { "epoch": 0.5716627236319216, "grad_norm": 0.10400796681642532, "learning_rate": 0.002, "loss": 2.3416, "step": 147880 }, { "epoch": 0.5717013808353049, "grad_norm": 0.10402462631464005, "learning_rate": 0.002, "loss": 2.3387, "step": 147890 }, { "epoch": 0.5717400380386881, "grad_norm": 0.10717406123876572, "learning_rate": 0.002, "loss": 2.3458, "step": 147900 }, { "epoch": 0.5717786952420714, "grad_norm": 0.10979738086462021, "learning_rate": 0.002, "loss": 2.3417, "step": 147910 }, { "epoch": 0.5718173524454547, "grad_norm": 0.1012766882777214, "learning_rate": 0.002, "loss": 2.3255, "step": 147920 }, { "epoch": 0.571856009648838, "grad_norm": 0.14146389067173004, "learning_rate": 0.002, "loss": 2.3542, "step": 147930 }, { "epoch": 0.5718946668522212, "grad_norm": 0.1255309134721756, "learning_rate": 0.002, "loss": 2.3414, "step": 147940 }, { "epoch": 0.5719333240556045, "grad_norm": 0.09301314502954483, "learning_rate": 0.002, "loss": 2.3323, "step": 147950 }, { "epoch": 0.5719719812589878, "grad_norm": 0.09817476570606232, "learning_rate": 0.002, "loss": 2.3415, "step": 147960 }, { "epoch": 0.5720106384623711, "grad_norm": 0.123959019780159, "learning_rate": 0.002, "loss": 2.3342, "step": 147970 }, { "epoch": 0.5720492956657544, "grad_norm": 0.09742964059114456, "learning_rate": 0.002, "loss": 2.3364, "step": 147980 }, { "epoch": 0.5720879528691376, "grad_norm": 0.10809384286403656, "learning_rate": 0.002, "loss": 2.3485, "step": 147990 }, { "epoch": 0.5721266100725209, "grad_norm": 0.11448068916797638, "learning_rate": 0.002, "loss": 2.3428, "step": 148000 }, { "epoch": 0.5721652672759042, "grad_norm": 0.1087169274687767, "learning_rate": 0.002, "loss": 2.3376, "step": 148010 }, { "epoch": 0.5722039244792875, "grad_norm": 0.10386857390403748, "learning_rate": 0.002, "loss": 2.3533, "step": 148020 }, { "epoch": 0.5722425816826707, "grad_norm": 0.10609959810972214, "learning_rate": 0.002, "loss": 2.341, "step": 148030 }, { "epoch": 0.572281238886054, "grad_norm": 0.12076544761657715, "learning_rate": 0.002, "loss": 2.3383, "step": 148040 }, { "epoch": 0.5723198960894373, "grad_norm": 0.10421552509069443, "learning_rate": 0.002, "loss": 2.3408, "step": 148050 }, { "epoch": 0.5723585532928206, "grad_norm": 0.11697299778461456, "learning_rate": 0.002, "loss": 2.3435, "step": 148060 }, { "epoch": 0.5723972104962038, "grad_norm": 0.09293560683727264, "learning_rate": 0.002, "loss": 2.3331, "step": 148070 }, { "epoch": 0.5724358676995871, "grad_norm": 0.11079003661870956, "learning_rate": 0.002, "loss": 2.3496, "step": 148080 }, { "epoch": 0.5724745249029705, "grad_norm": 0.14255714416503906, "learning_rate": 0.002, "loss": 2.3566, "step": 148090 }, { "epoch": 0.5725131821063537, "grad_norm": 0.09715547412633896, "learning_rate": 0.002, "loss": 2.3631, "step": 148100 }, { "epoch": 0.572551839309737, "grad_norm": 0.10531293600797653, "learning_rate": 0.002, "loss": 2.3483, "step": 148110 }, { "epoch": 0.5725904965131202, "grad_norm": 0.12235134840011597, "learning_rate": 0.002, "loss": 2.3578, "step": 148120 }, { "epoch": 0.5726291537165036, "grad_norm": 0.1346643716096878, "learning_rate": 0.002, "loss": 2.3556, "step": 148130 }, { "epoch": 0.5726678109198868, "grad_norm": 0.12076713144779205, "learning_rate": 0.002, "loss": 2.3409, "step": 148140 }, { "epoch": 0.5727064681232701, "grad_norm": 0.1027338057756424, "learning_rate": 0.002, "loss": 2.3493, "step": 148150 }, { "epoch": 0.5727451253266533, "grad_norm": 0.09555674344301224, "learning_rate": 0.002, "loss": 2.3453, "step": 148160 }, { "epoch": 0.5727837825300367, "grad_norm": 0.09910275042057037, "learning_rate": 0.002, "loss": 2.3507, "step": 148170 }, { "epoch": 0.57282243973342, "grad_norm": 0.1158229410648346, "learning_rate": 0.002, "loss": 2.3607, "step": 148180 }, { "epoch": 0.5728610969368032, "grad_norm": 0.10328315943479538, "learning_rate": 0.002, "loss": 2.355, "step": 148190 }, { "epoch": 0.5728997541401865, "grad_norm": 0.10658963769674301, "learning_rate": 0.002, "loss": 2.3519, "step": 148200 }, { "epoch": 0.5729384113435697, "grad_norm": 0.12963686883449554, "learning_rate": 0.002, "loss": 2.3332, "step": 148210 }, { "epoch": 0.5729770685469531, "grad_norm": 0.10582420974969864, "learning_rate": 0.002, "loss": 2.3435, "step": 148220 }, { "epoch": 0.5730157257503363, "grad_norm": 0.10931842029094696, "learning_rate": 0.002, "loss": 2.3588, "step": 148230 }, { "epoch": 0.5730543829537196, "grad_norm": 0.1136174127459526, "learning_rate": 0.002, "loss": 2.3416, "step": 148240 }, { "epoch": 0.5730930401571028, "grad_norm": 0.09869039803743362, "learning_rate": 0.002, "loss": 2.3447, "step": 148250 }, { "epoch": 0.5731316973604862, "grad_norm": 0.10541972517967224, "learning_rate": 0.002, "loss": 2.3533, "step": 148260 }, { "epoch": 0.5731703545638694, "grad_norm": 0.10672339051961899, "learning_rate": 0.002, "loss": 2.345, "step": 148270 }, { "epoch": 0.5732090117672527, "grad_norm": 0.11145348846912384, "learning_rate": 0.002, "loss": 2.341, "step": 148280 }, { "epoch": 0.573247668970636, "grad_norm": 0.09722515940666199, "learning_rate": 0.002, "loss": 2.3185, "step": 148290 }, { "epoch": 0.5732863261740193, "grad_norm": 0.10645853728055954, "learning_rate": 0.002, "loss": 2.3407, "step": 148300 }, { "epoch": 0.5733249833774026, "grad_norm": 0.11253643035888672, "learning_rate": 0.002, "loss": 2.3363, "step": 148310 }, { "epoch": 0.5733636405807858, "grad_norm": 0.10229386389255524, "learning_rate": 0.002, "loss": 2.3468, "step": 148320 }, { "epoch": 0.5734022977841691, "grad_norm": 0.10213582217693329, "learning_rate": 0.002, "loss": 2.3406, "step": 148330 }, { "epoch": 0.5734409549875524, "grad_norm": 0.10059046000242233, "learning_rate": 0.002, "loss": 2.3482, "step": 148340 }, { "epoch": 0.5734796121909357, "grad_norm": 0.09339335560798645, "learning_rate": 0.002, "loss": 2.3359, "step": 148350 }, { "epoch": 0.5735182693943189, "grad_norm": 0.10184424370527267, "learning_rate": 0.002, "loss": 2.3571, "step": 148360 }, { "epoch": 0.5735569265977022, "grad_norm": 0.09910251945257187, "learning_rate": 0.002, "loss": 2.3388, "step": 148370 }, { "epoch": 0.5735955838010854, "grad_norm": 0.11430943757295609, "learning_rate": 0.002, "loss": 2.3383, "step": 148380 }, { "epoch": 0.5736342410044688, "grad_norm": 0.09297601878643036, "learning_rate": 0.002, "loss": 2.3405, "step": 148390 }, { "epoch": 0.5736728982078521, "grad_norm": 0.11601737886667252, "learning_rate": 0.002, "loss": 2.3307, "step": 148400 }, { "epoch": 0.5737115554112353, "grad_norm": 0.11303595453500748, "learning_rate": 0.002, "loss": 2.3398, "step": 148410 }, { "epoch": 0.5737502126146186, "grad_norm": 0.11738861352205276, "learning_rate": 0.002, "loss": 2.3636, "step": 148420 }, { "epoch": 0.5737888698180019, "grad_norm": 0.09818091988563538, "learning_rate": 0.002, "loss": 2.347, "step": 148430 }, { "epoch": 0.5738275270213852, "grad_norm": 0.1266518533229828, "learning_rate": 0.002, "loss": 2.351, "step": 148440 }, { "epoch": 0.5738661842247684, "grad_norm": 0.10527455806732178, "learning_rate": 0.002, "loss": 2.3466, "step": 148450 }, { "epoch": 0.5739048414281517, "grad_norm": 0.13432396948337555, "learning_rate": 0.002, "loss": 2.3483, "step": 148460 }, { "epoch": 0.573943498631535, "grad_norm": 0.09773876518011093, "learning_rate": 0.002, "loss": 2.333, "step": 148470 }, { "epoch": 0.5739821558349183, "grad_norm": 0.10255984216928482, "learning_rate": 0.002, "loss": 2.3365, "step": 148480 }, { "epoch": 0.5740208130383015, "grad_norm": 0.12200841307640076, "learning_rate": 0.002, "loss": 2.3556, "step": 148490 }, { "epoch": 0.5740594702416848, "grad_norm": 0.10463497787714005, "learning_rate": 0.002, "loss": 2.3549, "step": 148500 }, { "epoch": 0.5740981274450682, "grad_norm": 0.12294262647628784, "learning_rate": 0.002, "loss": 2.3493, "step": 148510 }, { "epoch": 0.5741367846484514, "grad_norm": 0.0980486273765564, "learning_rate": 0.002, "loss": 2.3468, "step": 148520 }, { "epoch": 0.5741754418518347, "grad_norm": 0.10178186744451523, "learning_rate": 0.002, "loss": 2.3345, "step": 148530 }, { "epoch": 0.5742140990552179, "grad_norm": 0.10752306878566742, "learning_rate": 0.002, "loss": 2.3393, "step": 148540 }, { "epoch": 0.5742527562586013, "grad_norm": 0.1123095452785492, "learning_rate": 0.002, "loss": 2.3499, "step": 148550 }, { "epoch": 0.5742914134619845, "grad_norm": 0.09954951703548431, "learning_rate": 0.002, "loss": 2.3444, "step": 148560 }, { "epoch": 0.5743300706653678, "grad_norm": 0.09189984202384949, "learning_rate": 0.002, "loss": 2.3496, "step": 148570 }, { "epoch": 0.574368727868751, "grad_norm": 0.5664579272270203, "learning_rate": 0.002, "loss": 2.3618, "step": 148580 }, { "epoch": 0.5744073850721343, "grad_norm": 0.12876535952091217, "learning_rate": 0.002, "loss": 2.3374, "step": 148590 }, { "epoch": 0.5744460422755177, "grad_norm": 0.11784474551677704, "learning_rate": 0.002, "loss": 2.3481, "step": 148600 }, { "epoch": 0.5744846994789009, "grad_norm": 0.13473601639270782, "learning_rate": 0.002, "loss": 2.343, "step": 148610 }, { "epoch": 0.5745233566822842, "grad_norm": 0.11781080812215805, "learning_rate": 0.002, "loss": 2.3445, "step": 148620 }, { "epoch": 0.5745620138856674, "grad_norm": 0.1007075086236, "learning_rate": 0.002, "loss": 2.3403, "step": 148630 }, { "epoch": 0.5746006710890508, "grad_norm": 0.09426712244749069, "learning_rate": 0.002, "loss": 2.3299, "step": 148640 }, { "epoch": 0.574639328292434, "grad_norm": 0.11516745388507843, "learning_rate": 0.002, "loss": 2.3474, "step": 148650 }, { "epoch": 0.5746779854958173, "grad_norm": 0.09972114115953445, "learning_rate": 0.002, "loss": 2.3321, "step": 148660 }, { "epoch": 0.5747166426992005, "grad_norm": 0.10529178380966187, "learning_rate": 0.002, "loss": 2.3462, "step": 148670 }, { "epoch": 0.5747552999025839, "grad_norm": 0.14836226403713226, "learning_rate": 0.002, "loss": 2.3518, "step": 148680 }, { "epoch": 0.5747939571059671, "grad_norm": 0.12221290916204453, "learning_rate": 0.002, "loss": 2.3553, "step": 148690 }, { "epoch": 0.5748326143093504, "grad_norm": 0.11130478233098984, "learning_rate": 0.002, "loss": 2.3364, "step": 148700 }, { "epoch": 0.5748712715127337, "grad_norm": 0.09555680304765701, "learning_rate": 0.002, "loss": 2.3434, "step": 148710 }, { "epoch": 0.574909928716117, "grad_norm": 0.12878166139125824, "learning_rate": 0.002, "loss": 2.3421, "step": 148720 }, { "epoch": 0.5749485859195003, "grad_norm": 0.10222994536161423, "learning_rate": 0.002, "loss": 2.35, "step": 148730 }, { "epoch": 0.5749872431228835, "grad_norm": 0.13735540211200714, "learning_rate": 0.002, "loss": 2.3382, "step": 148740 }, { "epoch": 0.5750259003262668, "grad_norm": 0.104596346616745, "learning_rate": 0.002, "loss": 2.348, "step": 148750 }, { "epoch": 0.57506455752965, "grad_norm": 0.09962372481822968, "learning_rate": 0.002, "loss": 2.3352, "step": 148760 }, { "epoch": 0.5751032147330334, "grad_norm": 0.10563892126083374, "learning_rate": 0.002, "loss": 2.3393, "step": 148770 }, { "epoch": 0.5751418719364166, "grad_norm": 0.12737879157066345, "learning_rate": 0.002, "loss": 2.352, "step": 148780 }, { "epoch": 0.5751805291397999, "grad_norm": 0.09857510775327682, "learning_rate": 0.002, "loss": 2.3533, "step": 148790 }, { "epoch": 0.5752191863431831, "grad_norm": 0.10216040164232254, "learning_rate": 0.002, "loss": 2.3452, "step": 148800 }, { "epoch": 0.5752578435465665, "grad_norm": 0.10757684707641602, "learning_rate": 0.002, "loss": 2.3499, "step": 148810 }, { "epoch": 0.5752965007499498, "grad_norm": 0.09681134670972824, "learning_rate": 0.002, "loss": 2.3297, "step": 148820 }, { "epoch": 0.575335157953333, "grad_norm": 0.10559449344873428, "learning_rate": 0.002, "loss": 2.3567, "step": 148830 }, { "epoch": 0.5753738151567163, "grad_norm": 0.1202688217163086, "learning_rate": 0.002, "loss": 2.3328, "step": 148840 }, { "epoch": 0.5754124723600996, "grad_norm": 0.12340883910655975, "learning_rate": 0.002, "loss": 2.3408, "step": 148850 }, { "epoch": 0.5754511295634829, "grad_norm": 0.09787190705537796, "learning_rate": 0.002, "loss": 2.3455, "step": 148860 }, { "epoch": 0.5754897867668661, "grad_norm": 0.10570527613162994, "learning_rate": 0.002, "loss": 2.343, "step": 148870 }, { "epoch": 0.5755284439702494, "grad_norm": 0.09977946430444717, "learning_rate": 0.002, "loss": 2.3302, "step": 148880 }, { "epoch": 0.5755671011736327, "grad_norm": 0.10107558965682983, "learning_rate": 0.002, "loss": 2.3557, "step": 148890 }, { "epoch": 0.575605758377016, "grad_norm": 0.11597032845020294, "learning_rate": 0.002, "loss": 2.3528, "step": 148900 }, { "epoch": 0.5756444155803992, "grad_norm": 0.10900797694921494, "learning_rate": 0.002, "loss": 2.3488, "step": 148910 }, { "epoch": 0.5756830727837825, "grad_norm": 0.1132872998714447, "learning_rate": 0.002, "loss": 2.3416, "step": 148920 }, { "epoch": 0.5757217299871658, "grad_norm": 0.11002641916275024, "learning_rate": 0.002, "loss": 2.3328, "step": 148930 }, { "epoch": 0.5757603871905491, "grad_norm": 0.11283540725708008, "learning_rate": 0.002, "loss": 2.3424, "step": 148940 }, { "epoch": 0.5757990443939324, "grad_norm": 0.10150940716266632, "learning_rate": 0.002, "loss": 2.3521, "step": 148950 }, { "epoch": 0.5758377015973156, "grad_norm": 0.14698892831802368, "learning_rate": 0.002, "loss": 2.3498, "step": 148960 }, { "epoch": 0.5758763588006989, "grad_norm": 0.10251089185476303, "learning_rate": 0.002, "loss": 2.3413, "step": 148970 }, { "epoch": 0.5759150160040822, "grad_norm": 0.09909605979919434, "learning_rate": 0.002, "loss": 2.3324, "step": 148980 }, { "epoch": 0.5759536732074655, "grad_norm": 0.10927826166152954, "learning_rate": 0.002, "loss": 2.3517, "step": 148990 }, { "epoch": 0.5759923304108487, "grad_norm": 0.10843043774366379, "learning_rate": 0.002, "loss": 2.3475, "step": 149000 }, { "epoch": 0.576030987614232, "grad_norm": 0.10785437375307083, "learning_rate": 0.002, "loss": 2.3439, "step": 149010 }, { "epoch": 0.5760696448176154, "grad_norm": 0.09829997271299362, "learning_rate": 0.002, "loss": 2.3248, "step": 149020 }, { "epoch": 0.5761083020209986, "grad_norm": 0.09375440329313278, "learning_rate": 0.002, "loss": 2.3494, "step": 149030 }, { "epoch": 0.5761469592243819, "grad_norm": 0.11929398030042648, "learning_rate": 0.002, "loss": 2.3374, "step": 149040 }, { "epoch": 0.5761856164277651, "grad_norm": 0.09872215986251831, "learning_rate": 0.002, "loss": 2.3237, "step": 149050 }, { "epoch": 0.5762242736311485, "grad_norm": 0.10986842960119247, "learning_rate": 0.002, "loss": 2.3438, "step": 149060 }, { "epoch": 0.5762629308345317, "grad_norm": 0.0990709587931633, "learning_rate": 0.002, "loss": 2.3453, "step": 149070 }, { "epoch": 0.576301588037915, "grad_norm": 0.12177325785160065, "learning_rate": 0.002, "loss": 2.3473, "step": 149080 }, { "epoch": 0.5763402452412982, "grad_norm": 0.11904153972864151, "learning_rate": 0.002, "loss": 2.354, "step": 149090 }, { "epoch": 0.5763789024446816, "grad_norm": 0.09998807311058044, "learning_rate": 0.002, "loss": 2.3596, "step": 149100 }, { "epoch": 0.5764175596480648, "grad_norm": 0.12162663042545319, "learning_rate": 0.002, "loss": 2.3501, "step": 149110 }, { "epoch": 0.5764562168514481, "grad_norm": 0.11092780530452728, "learning_rate": 0.002, "loss": 2.3345, "step": 149120 }, { "epoch": 0.5764948740548314, "grad_norm": 0.09261737763881683, "learning_rate": 0.002, "loss": 2.3497, "step": 149130 }, { "epoch": 0.5765335312582146, "grad_norm": 0.1161278486251831, "learning_rate": 0.002, "loss": 2.3445, "step": 149140 }, { "epoch": 0.576572188461598, "grad_norm": 0.09691427648067474, "learning_rate": 0.002, "loss": 2.3388, "step": 149150 }, { "epoch": 0.5766108456649812, "grad_norm": 0.10035521537065506, "learning_rate": 0.002, "loss": 2.3572, "step": 149160 }, { "epoch": 0.5766495028683645, "grad_norm": 0.10514352470636368, "learning_rate": 0.002, "loss": 2.3471, "step": 149170 }, { "epoch": 0.5766881600717477, "grad_norm": 0.21642756462097168, "learning_rate": 0.002, "loss": 2.3502, "step": 149180 }, { "epoch": 0.5767268172751311, "grad_norm": 0.12869301438331604, "learning_rate": 0.002, "loss": 2.3361, "step": 149190 }, { "epoch": 0.5767654744785143, "grad_norm": 0.1055729016661644, "learning_rate": 0.002, "loss": 2.3526, "step": 149200 }, { "epoch": 0.5768041316818976, "grad_norm": 0.0956469178199768, "learning_rate": 0.002, "loss": 2.3425, "step": 149210 }, { "epoch": 0.5768427888852808, "grad_norm": 0.10121183097362518, "learning_rate": 0.002, "loss": 2.3436, "step": 149220 }, { "epoch": 0.5768814460886642, "grad_norm": 0.12185820937156677, "learning_rate": 0.002, "loss": 2.3528, "step": 149230 }, { "epoch": 0.5769201032920475, "grad_norm": 0.11856206506490707, "learning_rate": 0.002, "loss": 2.3403, "step": 149240 }, { "epoch": 0.5769587604954307, "grad_norm": 0.10712940990924835, "learning_rate": 0.002, "loss": 2.3404, "step": 149250 }, { "epoch": 0.576997417698814, "grad_norm": 0.11829926818609238, "learning_rate": 0.002, "loss": 2.3488, "step": 149260 }, { "epoch": 0.5770360749021973, "grad_norm": 0.1022658720612526, "learning_rate": 0.002, "loss": 2.3459, "step": 149270 }, { "epoch": 0.5770747321055806, "grad_norm": 0.11594463884830475, "learning_rate": 0.002, "loss": 2.3487, "step": 149280 }, { "epoch": 0.5771133893089638, "grad_norm": 0.11002032458782196, "learning_rate": 0.002, "loss": 2.3334, "step": 149290 }, { "epoch": 0.5771520465123471, "grad_norm": 0.09113029390573502, "learning_rate": 0.002, "loss": 2.3339, "step": 149300 }, { "epoch": 0.5771907037157303, "grad_norm": 0.11730547249317169, "learning_rate": 0.002, "loss": 2.3498, "step": 149310 }, { "epoch": 0.5772293609191137, "grad_norm": 0.09948255866765976, "learning_rate": 0.002, "loss": 2.3478, "step": 149320 }, { "epoch": 0.577268018122497, "grad_norm": 0.11743677407503128, "learning_rate": 0.002, "loss": 2.3604, "step": 149330 }, { "epoch": 0.5773066753258802, "grad_norm": 0.12467943131923676, "learning_rate": 0.002, "loss": 2.3416, "step": 149340 }, { "epoch": 0.5773453325292635, "grad_norm": 0.1314239203929901, "learning_rate": 0.002, "loss": 2.36, "step": 149350 }, { "epoch": 0.5773839897326468, "grad_norm": 0.1049962267279625, "learning_rate": 0.002, "loss": 2.3584, "step": 149360 }, { "epoch": 0.5774226469360301, "grad_norm": 0.11080177128314972, "learning_rate": 0.002, "loss": 2.3552, "step": 149370 }, { "epoch": 0.5774613041394133, "grad_norm": 0.0954049751162529, "learning_rate": 0.002, "loss": 2.3625, "step": 149380 }, { "epoch": 0.5774999613427966, "grad_norm": 0.12053569406270981, "learning_rate": 0.002, "loss": 2.344, "step": 149390 }, { "epoch": 0.5775386185461799, "grad_norm": 0.10770398378372192, "learning_rate": 0.002, "loss": 2.3415, "step": 149400 }, { "epoch": 0.5775772757495632, "grad_norm": 0.10702679306268692, "learning_rate": 0.002, "loss": 2.3517, "step": 149410 }, { "epoch": 0.5776159329529464, "grad_norm": 0.09334122389554977, "learning_rate": 0.002, "loss": 2.3485, "step": 149420 }, { "epoch": 0.5776545901563297, "grad_norm": 0.11252539604902267, "learning_rate": 0.002, "loss": 2.3606, "step": 149430 }, { "epoch": 0.5776932473597131, "grad_norm": 0.1189262792468071, "learning_rate": 0.002, "loss": 2.3454, "step": 149440 }, { "epoch": 0.5777319045630963, "grad_norm": 0.1360866278409958, "learning_rate": 0.002, "loss": 2.341, "step": 149450 }, { "epoch": 0.5777705617664796, "grad_norm": 0.0987911969423294, "learning_rate": 0.002, "loss": 2.3533, "step": 149460 }, { "epoch": 0.5778092189698628, "grad_norm": 0.11495771259069443, "learning_rate": 0.002, "loss": 2.3485, "step": 149470 }, { "epoch": 0.5778478761732462, "grad_norm": 0.1220431998372078, "learning_rate": 0.002, "loss": 2.343, "step": 149480 }, { "epoch": 0.5778865333766294, "grad_norm": 0.12294565141201019, "learning_rate": 0.002, "loss": 2.3324, "step": 149490 }, { "epoch": 0.5779251905800127, "grad_norm": 0.10714684426784515, "learning_rate": 0.002, "loss": 2.3488, "step": 149500 }, { "epoch": 0.5779638477833959, "grad_norm": 0.11005298048257828, "learning_rate": 0.002, "loss": 2.3476, "step": 149510 }, { "epoch": 0.5780025049867792, "grad_norm": 0.08794310688972473, "learning_rate": 0.002, "loss": 2.3564, "step": 149520 }, { "epoch": 0.5780411621901625, "grad_norm": 0.1080562099814415, "learning_rate": 0.002, "loss": 2.359, "step": 149530 }, { "epoch": 0.5780798193935458, "grad_norm": 0.14582188427448273, "learning_rate": 0.002, "loss": 2.3474, "step": 149540 }, { "epoch": 0.578118476596929, "grad_norm": 0.12180215120315552, "learning_rate": 0.002, "loss": 2.3456, "step": 149550 }, { "epoch": 0.5781571338003123, "grad_norm": 0.10159006714820862, "learning_rate": 0.002, "loss": 2.3489, "step": 149560 }, { "epoch": 0.5781957910036957, "grad_norm": 0.10227713733911514, "learning_rate": 0.002, "loss": 2.3325, "step": 149570 }, { "epoch": 0.5782344482070789, "grad_norm": 0.10744677484035492, "learning_rate": 0.002, "loss": 2.337, "step": 149580 }, { "epoch": 0.5782731054104622, "grad_norm": 0.09768327325582504, "learning_rate": 0.002, "loss": 2.3478, "step": 149590 }, { "epoch": 0.5783117626138454, "grad_norm": 0.10872442275285721, "learning_rate": 0.002, "loss": 2.3591, "step": 149600 }, { "epoch": 0.5783504198172288, "grad_norm": 0.10671534389257431, "learning_rate": 0.002, "loss": 2.3397, "step": 149610 }, { "epoch": 0.578389077020612, "grad_norm": 0.14711715281009674, "learning_rate": 0.002, "loss": 2.3353, "step": 149620 }, { "epoch": 0.5784277342239953, "grad_norm": 0.09580583870410919, "learning_rate": 0.002, "loss": 2.3508, "step": 149630 }, { "epoch": 0.5784663914273785, "grad_norm": 0.0945882648229599, "learning_rate": 0.002, "loss": 2.3525, "step": 149640 }, { "epoch": 0.5785050486307619, "grad_norm": 0.13525830209255219, "learning_rate": 0.002, "loss": 2.3511, "step": 149650 }, { "epoch": 0.5785437058341452, "grad_norm": 0.10206573456525803, "learning_rate": 0.002, "loss": 2.3368, "step": 149660 }, { "epoch": 0.5785823630375284, "grad_norm": 0.09938966482877731, "learning_rate": 0.002, "loss": 2.332, "step": 149670 }, { "epoch": 0.5786210202409117, "grad_norm": 0.10354302078485489, "learning_rate": 0.002, "loss": 2.3382, "step": 149680 }, { "epoch": 0.5786596774442949, "grad_norm": 0.10915887355804443, "learning_rate": 0.002, "loss": 2.3391, "step": 149690 }, { "epoch": 0.5786983346476783, "grad_norm": 0.09858160465955734, "learning_rate": 0.002, "loss": 2.3409, "step": 149700 }, { "epoch": 0.5787369918510615, "grad_norm": 0.09634033590555191, "learning_rate": 0.002, "loss": 2.342, "step": 149710 }, { "epoch": 0.5787756490544448, "grad_norm": 0.10882976651191711, "learning_rate": 0.002, "loss": 2.3418, "step": 149720 }, { "epoch": 0.578814306257828, "grad_norm": 0.12693949043750763, "learning_rate": 0.002, "loss": 2.3465, "step": 149730 }, { "epoch": 0.5788529634612114, "grad_norm": 0.1207302138209343, "learning_rate": 0.002, "loss": 2.3367, "step": 149740 }, { "epoch": 0.5788916206645947, "grad_norm": 0.1069578304886818, "learning_rate": 0.002, "loss": 2.3338, "step": 149750 }, { "epoch": 0.5789302778679779, "grad_norm": 0.10116532444953918, "learning_rate": 0.002, "loss": 2.3622, "step": 149760 }, { "epoch": 0.5789689350713612, "grad_norm": 0.13171598315238953, "learning_rate": 0.002, "loss": 2.3502, "step": 149770 }, { "epoch": 0.5790075922747445, "grad_norm": 0.11328830569982529, "learning_rate": 0.002, "loss": 2.3492, "step": 149780 }, { "epoch": 0.5790462494781278, "grad_norm": 0.10096880793571472, "learning_rate": 0.002, "loss": 2.3355, "step": 149790 }, { "epoch": 0.579084906681511, "grad_norm": 0.11313828825950623, "learning_rate": 0.002, "loss": 2.3454, "step": 149800 }, { "epoch": 0.5791235638848943, "grad_norm": 0.09104368090629578, "learning_rate": 0.002, "loss": 2.3261, "step": 149810 }, { "epoch": 0.5791622210882776, "grad_norm": 0.11752974987030029, "learning_rate": 0.002, "loss": 2.343, "step": 149820 }, { "epoch": 0.5792008782916609, "grad_norm": 0.1244005411863327, "learning_rate": 0.002, "loss": 2.3405, "step": 149830 }, { "epoch": 0.5792395354950441, "grad_norm": 0.10278856009244919, "learning_rate": 0.002, "loss": 2.3326, "step": 149840 }, { "epoch": 0.5792781926984274, "grad_norm": 0.10532396286725998, "learning_rate": 0.002, "loss": 2.336, "step": 149850 }, { "epoch": 0.5793168499018106, "grad_norm": 0.0982581302523613, "learning_rate": 0.002, "loss": 2.3309, "step": 149860 }, { "epoch": 0.579355507105194, "grad_norm": 0.10512799769639969, "learning_rate": 0.002, "loss": 2.3351, "step": 149870 }, { "epoch": 0.5793941643085773, "grad_norm": 0.11000046879053116, "learning_rate": 0.002, "loss": 2.3455, "step": 149880 }, { "epoch": 0.5794328215119605, "grad_norm": 0.11731689423322678, "learning_rate": 0.002, "loss": 2.3521, "step": 149890 }, { "epoch": 0.5794714787153438, "grad_norm": 0.10304244607686996, "learning_rate": 0.002, "loss": 2.3401, "step": 149900 }, { "epoch": 0.5795101359187271, "grad_norm": 0.10752136260271072, "learning_rate": 0.002, "loss": 2.3402, "step": 149910 }, { "epoch": 0.5795487931221104, "grad_norm": 0.10894905775785446, "learning_rate": 0.002, "loss": 2.3354, "step": 149920 }, { "epoch": 0.5795874503254936, "grad_norm": 0.09900854527950287, "learning_rate": 0.002, "loss": 2.3439, "step": 149930 }, { "epoch": 0.5796261075288769, "grad_norm": 0.10060838609933853, "learning_rate": 0.002, "loss": 2.3343, "step": 149940 }, { "epoch": 0.5796647647322603, "grad_norm": 0.0998382493853569, "learning_rate": 0.002, "loss": 2.3553, "step": 149950 }, { "epoch": 0.5797034219356435, "grad_norm": 0.09682802110910416, "learning_rate": 0.002, "loss": 2.3511, "step": 149960 }, { "epoch": 0.5797420791390268, "grad_norm": 0.1006351038813591, "learning_rate": 0.002, "loss": 2.3646, "step": 149970 }, { "epoch": 0.57978073634241, "grad_norm": 0.1387142390012741, "learning_rate": 0.002, "loss": 2.3441, "step": 149980 }, { "epoch": 0.5798193935457934, "grad_norm": 0.11638208478689194, "learning_rate": 0.002, "loss": 2.3343, "step": 149990 }, { "epoch": 0.5798580507491766, "grad_norm": 0.0977184846997261, "learning_rate": 0.002, "loss": 2.3286, "step": 150000 }, { "epoch": 0.5798967079525599, "grad_norm": 0.13405273854732513, "learning_rate": 0.002, "loss": 2.3341, "step": 150010 }, { "epoch": 0.5799353651559431, "grad_norm": 0.09380652010440826, "learning_rate": 0.002, "loss": 2.3365, "step": 150020 }, { "epoch": 0.5799740223593265, "grad_norm": 0.10067908465862274, "learning_rate": 0.002, "loss": 2.3434, "step": 150030 }, { "epoch": 0.5800126795627097, "grad_norm": 0.10465842485427856, "learning_rate": 0.002, "loss": 2.3446, "step": 150040 }, { "epoch": 0.580051336766093, "grad_norm": 0.12888339161872864, "learning_rate": 0.002, "loss": 2.3305, "step": 150050 }, { "epoch": 0.5800899939694762, "grad_norm": 0.11086847633123398, "learning_rate": 0.002, "loss": 2.3495, "step": 150060 }, { "epoch": 0.5801286511728595, "grad_norm": 0.1055656149983406, "learning_rate": 0.002, "loss": 2.3454, "step": 150070 }, { "epoch": 0.5801673083762429, "grad_norm": 0.10147044062614441, "learning_rate": 0.002, "loss": 2.3472, "step": 150080 }, { "epoch": 0.5802059655796261, "grad_norm": 0.09590164572000504, "learning_rate": 0.002, "loss": 2.3328, "step": 150090 }, { "epoch": 0.5802446227830094, "grad_norm": 0.11232464015483856, "learning_rate": 0.002, "loss": 2.3413, "step": 150100 }, { "epoch": 0.5802832799863926, "grad_norm": 0.11140336096286774, "learning_rate": 0.002, "loss": 2.3546, "step": 150110 }, { "epoch": 0.580321937189776, "grad_norm": 0.12357457727193832, "learning_rate": 0.002, "loss": 2.3557, "step": 150120 }, { "epoch": 0.5803605943931592, "grad_norm": 0.10456366837024689, "learning_rate": 0.002, "loss": 2.3393, "step": 150130 }, { "epoch": 0.5803992515965425, "grad_norm": 0.11628726124763489, "learning_rate": 0.002, "loss": 2.3535, "step": 150140 }, { "epoch": 0.5804379087999257, "grad_norm": 0.10711704194545746, "learning_rate": 0.002, "loss": 2.352, "step": 150150 }, { "epoch": 0.5804765660033091, "grad_norm": 0.11025909334421158, "learning_rate": 0.002, "loss": 2.3476, "step": 150160 }, { "epoch": 0.5805152232066924, "grad_norm": 0.11634379625320435, "learning_rate": 0.002, "loss": 2.3364, "step": 150170 }, { "epoch": 0.5805538804100756, "grad_norm": 0.10299687832593918, "learning_rate": 0.002, "loss": 2.3344, "step": 150180 }, { "epoch": 0.5805925376134589, "grad_norm": 0.11979073286056519, "learning_rate": 0.002, "loss": 2.3515, "step": 150190 }, { "epoch": 0.5806311948168422, "grad_norm": 0.10962650924921036, "learning_rate": 0.002, "loss": 2.336, "step": 150200 }, { "epoch": 0.5806698520202255, "grad_norm": 0.09615093469619751, "learning_rate": 0.002, "loss": 2.3421, "step": 150210 }, { "epoch": 0.5807085092236087, "grad_norm": 0.12243734300136566, "learning_rate": 0.002, "loss": 2.3428, "step": 150220 }, { "epoch": 0.580747166426992, "grad_norm": 0.10823040455579758, "learning_rate": 0.002, "loss": 2.343, "step": 150230 }, { "epoch": 0.5807858236303752, "grad_norm": 0.09089124947786331, "learning_rate": 0.002, "loss": 2.3402, "step": 150240 }, { "epoch": 0.5808244808337586, "grad_norm": 0.14514900743961334, "learning_rate": 0.002, "loss": 2.3508, "step": 150250 }, { "epoch": 0.5808631380371418, "grad_norm": 0.10589548200368881, "learning_rate": 0.002, "loss": 2.3571, "step": 150260 }, { "epoch": 0.5809017952405251, "grad_norm": 0.1101815328001976, "learning_rate": 0.002, "loss": 2.3348, "step": 150270 }, { "epoch": 0.5809404524439084, "grad_norm": 0.10971608012914658, "learning_rate": 0.002, "loss": 2.3501, "step": 150280 }, { "epoch": 0.5809791096472917, "grad_norm": 0.11273403465747833, "learning_rate": 0.002, "loss": 2.3273, "step": 150290 }, { "epoch": 0.581017766850675, "grad_norm": 0.1006007120013237, "learning_rate": 0.002, "loss": 2.3502, "step": 150300 }, { "epoch": 0.5810564240540582, "grad_norm": 0.10495468974113464, "learning_rate": 0.002, "loss": 2.3601, "step": 150310 }, { "epoch": 0.5810950812574415, "grad_norm": 0.1348286271095276, "learning_rate": 0.002, "loss": 2.3486, "step": 150320 }, { "epoch": 0.5811337384608248, "grad_norm": 0.09644333273172379, "learning_rate": 0.002, "loss": 2.3431, "step": 150330 }, { "epoch": 0.5811723956642081, "grad_norm": 0.1011551171541214, "learning_rate": 0.002, "loss": 2.3525, "step": 150340 }, { "epoch": 0.5812110528675913, "grad_norm": 0.102486751973629, "learning_rate": 0.002, "loss": 2.3362, "step": 150350 }, { "epoch": 0.5812497100709746, "grad_norm": 0.10425101220607758, "learning_rate": 0.002, "loss": 2.3436, "step": 150360 }, { "epoch": 0.581288367274358, "grad_norm": 0.11448422074317932, "learning_rate": 0.002, "loss": 2.3432, "step": 150370 }, { "epoch": 0.5813270244777412, "grad_norm": 0.11644108593463898, "learning_rate": 0.002, "loss": 2.3416, "step": 150380 }, { "epoch": 0.5813656816811245, "grad_norm": 0.11712195724248886, "learning_rate": 0.002, "loss": 2.3374, "step": 150390 }, { "epoch": 0.5814043388845077, "grad_norm": 0.11532865464687347, "learning_rate": 0.002, "loss": 2.3277, "step": 150400 }, { "epoch": 0.5814429960878911, "grad_norm": 0.09724882245063782, "learning_rate": 0.002, "loss": 2.3368, "step": 150410 }, { "epoch": 0.5814816532912743, "grad_norm": 0.09928759932518005, "learning_rate": 0.002, "loss": 2.3366, "step": 150420 }, { "epoch": 0.5815203104946576, "grad_norm": 0.11150534451007843, "learning_rate": 0.002, "loss": 2.3588, "step": 150430 }, { "epoch": 0.5815589676980408, "grad_norm": 0.10577598959207535, "learning_rate": 0.002, "loss": 2.3521, "step": 150440 }, { "epoch": 0.5815976249014241, "grad_norm": 0.10753431171178818, "learning_rate": 0.002, "loss": 2.3275, "step": 150450 }, { "epoch": 0.5816362821048074, "grad_norm": 0.10999209433794022, "learning_rate": 0.002, "loss": 2.3529, "step": 150460 }, { "epoch": 0.5816749393081907, "grad_norm": 0.11631323397159576, "learning_rate": 0.002, "loss": 2.3441, "step": 150470 }, { "epoch": 0.581713596511574, "grad_norm": 0.10544676333665848, "learning_rate": 0.002, "loss": 2.3411, "step": 150480 }, { "epoch": 0.5817522537149572, "grad_norm": 0.12632738053798676, "learning_rate": 0.002, "loss": 2.3536, "step": 150490 }, { "epoch": 0.5817909109183406, "grad_norm": 0.11686243116855621, "learning_rate": 0.002, "loss": 2.3479, "step": 150500 }, { "epoch": 0.5818295681217238, "grad_norm": 0.09398223459720612, "learning_rate": 0.002, "loss": 2.3417, "step": 150510 }, { "epoch": 0.5818682253251071, "grad_norm": 0.1024533212184906, "learning_rate": 0.002, "loss": 2.3232, "step": 150520 }, { "epoch": 0.5819068825284903, "grad_norm": 0.1734480857849121, "learning_rate": 0.002, "loss": 2.3375, "step": 150530 }, { "epoch": 0.5819455397318737, "grad_norm": 0.10755108296871185, "learning_rate": 0.002, "loss": 2.3524, "step": 150540 }, { "epoch": 0.5819841969352569, "grad_norm": 0.10732606053352356, "learning_rate": 0.002, "loss": 2.355, "step": 150550 }, { "epoch": 0.5820228541386402, "grad_norm": 0.10095525532960892, "learning_rate": 0.002, "loss": 2.3295, "step": 150560 }, { "epoch": 0.5820615113420234, "grad_norm": 0.0937095358967781, "learning_rate": 0.002, "loss": 2.323, "step": 150570 }, { "epoch": 0.5821001685454068, "grad_norm": 0.10207615792751312, "learning_rate": 0.002, "loss": 2.3394, "step": 150580 }, { "epoch": 0.5821388257487901, "grad_norm": 0.09388964623212814, "learning_rate": 0.002, "loss": 2.3399, "step": 150590 }, { "epoch": 0.5821774829521733, "grad_norm": 0.10827864706516266, "learning_rate": 0.002, "loss": 2.3363, "step": 150600 }, { "epoch": 0.5822161401555566, "grad_norm": 0.10733626037836075, "learning_rate": 0.002, "loss": 2.3391, "step": 150610 }, { "epoch": 0.5822547973589398, "grad_norm": 0.10135097801685333, "learning_rate": 0.002, "loss": 2.3557, "step": 150620 }, { "epoch": 0.5822934545623232, "grad_norm": 0.11220720410346985, "learning_rate": 0.002, "loss": 2.351, "step": 150630 }, { "epoch": 0.5823321117657064, "grad_norm": 0.1044202595949173, "learning_rate": 0.002, "loss": 2.3378, "step": 150640 }, { "epoch": 0.5823707689690897, "grad_norm": 0.10599269717931747, "learning_rate": 0.002, "loss": 2.3368, "step": 150650 }, { "epoch": 0.5824094261724729, "grad_norm": 0.1008022353053093, "learning_rate": 0.002, "loss": 2.3441, "step": 150660 }, { "epoch": 0.5824480833758563, "grad_norm": 0.11797475069761276, "learning_rate": 0.002, "loss": 2.3349, "step": 150670 }, { "epoch": 0.5824867405792395, "grad_norm": 0.10903535038232803, "learning_rate": 0.002, "loss": 2.3521, "step": 150680 }, { "epoch": 0.5825253977826228, "grad_norm": 0.14368629455566406, "learning_rate": 0.002, "loss": 2.3446, "step": 150690 }, { "epoch": 0.582564054986006, "grad_norm": 0.10055279731750488, "learning_rate": 0.002, "loss": 2.3307, "step": 150700 }, { "epoch": 0.5826027121893894, "grad_norm": 0.11629831790924072, "learning_rate": 0.002, "loss": 2.3377, "step": 150710 }, { "epoch": 0.5826413693927727, "grad_norm": 0.10161113739013672, "learning_rate": 0.002, "loss": 2.3224, "step": 150720 }, { "epoch": 0.5826800265961559, "grad_norm": 0.10987081378698349, "learning_rate": 0.002, "loss": 2.345, "step": 150730 }, { "epoch": 0.5827186837995392, "grad_norm": 0.18283596634864807, "learning_rate": 0.002, "loss": 2.3383, "step": 150740 }, { "epoch": 0.5827573410029225, "grad_norm": 0.09929069131612778, "learning_rate": 0.002, "loss": 2.3519, "step": 150750 }, { "epoch": 0.5827959982063058, "grad_norm": 0.10900171846151352, "learning_rate": 0.002, "loss": 2.3357, "step": 150760 }, { "epoch": 0.582834655409689, "grad_norm": 0.11824193596839905, "learning_rate": 0.002, "loss": 2.3413, "step": 150770 }, { "epoch": 0.5828733126130723, "grad_norm": 0.10111094266176224, "learning_rate": 0.002, "loss": 2.3464, "step": 150780 }, { "epoch": 0.5829119698164555, "grad_norm": 0.11064330488443375, "learning_rate": 0.002, "loss": 2.3372, "step": 150790 }, { "epoch": 0.5829506270198389, "grad_norm": 0.11837726086378098, "learning_rate": 0.002, "loss": 2.3472, "step": 150800 }, { "epoch": 0.5829892842232222, "grad_norm": 0.11166118830442429, "learning_rate": 0.002, "loss": 2.3685, "step": 150810 }, { "epoch": 0.5830279414266054, "grad_norm": 0.09941312670707703, "learning_rate": 0.002, "loss": 2.34, "step": 150820 }, { "epoch": 0.5830665986299887, "grad_norm": 0.12270691245794296, "learning_rate": 0.002, "loss": 2.3424, "step": 150830 }, { "epoch": 0.583105255833372, "grad_norm": 0.11068408936262131, "learning_rate": 0.002, "loss": 2.3443, "step": 150840 }, { "epoch": 0.5831439130367553, "grad_norm": 0.1082112044095993, "learning_rate": 0.002, "loss": 2.3495, "step": 150850 }, { "epoch": 0.5831825702401385, "grad_norm": 0.11955466866493225, "learning_rate": 0.002, "loss": 2.3479, "step": 150860 }, { "epoch": 0.5832212274435218, "grad_norm": 0.11465366184711456, "learning_rate": 0.002, "loss": 2.3394, "step": 150870 }, { "epoch": 0.5832598846469051, "grad_norm": 0.10802769660949707, "learning_rate": 0.002, "loss": 2.3468, "step": 150880 }, { "epoch": 0.5832985418502884, "grad_norm": 0.10408146679401398, "learning_rate": 0.002, "loss": 2.3424, "step": 150890 }, { "epoch": 0.5833371990536717, "grad_norm": 0.13040228188037872, "learning_rate": 0.002, "loss": 2.335, "step": 150900 }, { "epoch": 0.5833758562570549, "grad_norm": 0.11632059514522552, "learning_rate": 0.002, "loss": 2.3613, "step": 150910 }, { "epoch": 0.5834145134604383, "grad_norm": 0.10285551100969315, "learning_rate": 0.002, "loss": 2.344, "step": 150920 }, { "epoch": 0.5834531706638215, "grad_norm": 0.11648505181074142, "learning_rate": 0.002, "loss": 2.3503, "step": 150930 }, { "epoch": 0.5834918278672048, "grad_norm": 0.09681139141321182, "learning_rate": 0.002, "loss": 2.3472, "step": 150940 }, { "epoch": 0.583530485070588, "grad_norm": 0.11876310408115387, "learning_rate": 0.002, "loss": 2.3252, "step": 150950 }, { "epoch": 0.5835691422739714, "grad_norm": 0.09588060528039932, "learning_rate": 0.002, "loss": 2.3423, "step": 150960 }, { "epoch": 0.5836077994773546, "grad_norm": 0.11005954444408417, "learning_rate": 0.002, "loss": 2.3189, "step": 150970 }, { "epoch": 0.5836464566807379, "grad_norm": 0.09692657738924026, "learning_rate": 0.002, "loss": 2.3593, "step": 150980 }, { "epoch": 0.5836851138841211, "grad_norm": 0.12167064845561981, "learning_rate": 0.002, "loss": 2.3503, "step": 150990 }, { "epoch": 0.5837237710875044, "grad_norm": 0.12240065634250641, "learning_rate": 0.002, "loss": 2.3351, "step": 151000 }, { "epoch": 0.5837624282908878, "grad_norm": 0.09758885204792023, "learning_rate": 0.002, "loss": 2.3284, "step": 151010 }, { "epoch": 0.583801085494271, "grad_norm": 0.11564390361309052, "learning_rate": 0.002, "loss": 2.3607, "step": 151020 }, { "epoch": 0.5838397426976543, "grad_norm": 0.11047053337097168, "learning_rate": 0.002, "loss": 2.356, "step": 151030 }, { "epoch": 0.5838783999010375, "grad_norm": 0.11683172732591629, "learning_rate": 0.002, "loss": 2.3466, "step": 151040 }, { "epoch": 0.5839170571044209, "grad_norm": 0.09342314302921295, "learning_rate": 0.002, "loss": 2.3395, "step": 151050 }, { "epoch": 0.5839557143078041, "grad_norm": 0.10871961712837219, "learning_rate": 0.002, "loss": 2.3267, "step": 151060 }, { "epoch": 0.5839943715111874, "grad_norm": 0.09939336031675339, "learning_rate": 0.002, "loss": 2.3509, "step": 151070 }, { "epoch": 0.5840330287145706, "grad_norm": 0.0856621041893959, "learning_rate": 0.002, "loss": 2.3519, "step": 151080 }, { "epoch": 0.584071685917954, "grad_norm": 0.11879793554544449, "learning_rate": 0.002, "loss": 2.3402, "step": 151090 }, { "epoch": 0.5841103431213372, "grad_norm": 0.10782366245985031, "learning_rate": 0.002, "loss": 2.3277, "step": 151100 }, { "epoch": 0.5841490003247205, "grad_norm": 0.10675476491451263, "learning_rate": 0.002, "loss": 2.3401, "step": 151110 }, { "epoch": 0.5841876575281038, "grad_norm": 0.1117832288146019, "learning_rate": 0.002, "loss": 2.3535, "step": 151120 }, { "epoch": 0.5842263147314871, "grad_norm": 0.10932677239179611, "learning_rate": 0.002, "loss": 2.3264, "step": 151130 }, { "epoch": 0.5842649719348704, "grad_norm": 0.11666686087846756, "learning_rate": 0.002, "loss": 2.3491, "step": 151140 }, { "epoch": 0.5843036291382536, "grad_norm": 0.09197304397821426, "learning_rate": 0.002, "loss": 2.338, "step": 151150 }, { "epoch": 0.5843422863416369, "grad_norm": 0.11431872099637985, "learning_rate": 0.002, "loss": 2.3378, "step": 151160 }, { "epoch": 0.5843809435450201, "grad_norm": 0.1133253276348114, "learning_rate": 0.002, "loss": 2.3502, "step": 151170 }, { "epoch": 0.5844196007484035, "grad_norm": 0.10361841320991516, "learning_rate": 0.002, "loss": 2.3512, "step": 151180 }, { "epoch": 0.5844582579517867, "grad_norm": 0.10608696192502975, "learning_rate": 0.002, "loss": 2.3473, "step": 151190 }, { "epoch": 0.58449691515517, "grad_norm": 0.10624140501022339, "learning_rate": 0.002, "loss": 2.3529, "step": 151200 }, { "epoch": 0.5845355723585532, "grad_norm": 0.10633480548858643, "learning_rate": 0.002, "loss": 2.3399, "step": 151210 }, { "epoch": 0.5845742295619366, "grad_norm": 0.09925577044487, "learning_rate": 0.002, "loss": 2.352, "step": 151220 }, { "epoch": 0.5846128867653199, "grad_norm": 0.1234511286020279, "learning_rate": 0.002, "loss": 2.3386, "step": 151230 }, { "epoch": 0.5846515439687031, "grad_norm": 0.10758979618549347, "learning_rate": 0.002, "loss": 2.3308, "step": 151240 }, { "epoch": 0.5846902011720864, "grad_norm": 0.09353959560394287, "learning_rate": 0.002, "loss": 2.3415, "step": 151250 }, { "epoch": 0.5847288583754697, "grad_norm": 0.12607234716415405, "learning_rate": 0.002, "loss": 2.3482, "step": 151260 }, { "epoch": 0.584767515578853, "grad_norm": 0.13077405095100403, "learning_rate": 0.002, "loss": 2.349, "step": 151270 }, { "epoch": 0.5848061727822362, "grad_norm": 0.10392604023218155, "learning_rate": 0.002, "loss": 2.341, "step": 151280 }, { "epoch": 0.5848448299856195, "grad_norm": 0.13296332955360413, "learning_rate": 0.002, "loss": 2.3478, "step": 151290 }, { "epoch": 0.5848834871890028, "grad_norm": 0.09580894559621811, "learning_rate": 0.002, "loss": 2.3503, "step": 151300 }, { "epoch": 0.5849221443923861, "grad_norm": 0.1118881031870842, "learning_rate": 0.002, "loss": 2.3432, "step": 151310 }, { "epoch": 0.5849608015957694, "grad_norm": 0.10997623950242996, "learning_rate": 0.002, "loss": 2.355, "step": 151320 }, { "epoch": 0.5849994587991526, "grad_norm": 0.09829279035329819, "learning_rate": 0.002, "loss": 2.3336, "step": 151330 }, { "epoch": 0.5850381160025359, "grad_norm": 0.12762117385864258, "learning_rate": 0.002, "loss": 2.3548, "step": 151340 }, { "epoch": 0.5850767732059192, "grad_norm": 0.10648144781589508, "learning_rate": 0.002, "loss": 2.3384, "step": 151350 }, { "epoch": 0.5851154304093025, "grad_norm": 0.1168905720114708, "learning_rate": 0.002, "loss": 2.3386, "step": 151360 }, { "epoch": 0.5851540876126857, "grad_norm": 0.11103395372629166, "learning_rate": 0.002, "loss": 2.3411, "step": 151370 }, { "epoch": 0.585192744816069, "grad_norm": 0.09544747322797775, "learning_rate": 0.002, "loss": 2.3402, "step": 151380 }, { "epoch": 0.5852314020194523, "grad_norm": 0.1289357841014862, "learning_rate": 0.002, "loss": 2.3391, "step": 151390 }, { "epoch": 0.5852700592228356, "grad_norm": 0.10271959006786346, "learning_rate": 0.002, "loss": 2.344, "step": 151400 }, { "epoch": 0.5853087164262188, "grad_norm": 0.11833061277866364, "learning_rate": 0.002, "loss": 2.3325, "step": 151410 }, { "epoch": 0.5853473736296021, "grad_norm": 0.11826982349157333, "learning_rate": 0.002, "loss": 2.3504, "step": 151420 }, { "epoch": 0.5853860308329855, "grad_norm": 0.11418145149946213, "learning_rate": 0.002, "loss": 2.3322, "step": 151430 }, { "epoch": 0.5854246880363687, "grad_norm": 0.10659082233905792, "learning_rate": 0.002, "loss": 2.3346, "step": 151440 }, { "epoch": 0.585463345239752, "grad_norm": 0.09980858862400055, "learning_rate": 0.002, "loss": 2.3453, "step": 151450 }, { "epoch": 0.5855020024431352, "grad_norm": 0.10436010360717773, "learning_rate": 0.002, "loss": 2.3576, "step": 151460 }, { "epoch": 0.5855406596465186, "grad_norm": 0.10832711309194565, "learning_rate": 0.002, "loss": 2.358, "step": 151470 }, { "epoch": 0.5855793168499018, "grad_norm": 0.11657103151082993, "learning_rate": 0.002, "loss": 2.3329, "step": 151480 }, { "epoch": 0.5856179740532851, "grad_norm": 0.10700567066669464, "learning_rate": 0.002, "loss": 2.3494, "step": 151490 }, { "epoch": 0.5856566312566683, "grad_norm": 0.10693954676389694, "learning_rate": 0.002, "loss": 2.3523, "step": 151500 }, { "epoch": 0.5856952884600517, "grad_norm": 0.10214036703109741, "learning_rate": 0.002, "loss": 2.3336, "step": 151510 }, { "epoch": 0.585733945663435, "grad_norm": 0.10415514558553696, "learning_rate": 0.002, "loss": 2.3393, "step": 151520 }, { "epoch": 0.5857726028668182, "grad_norm": 0.11231443285942078, "learning_rate": 0.002, "loss": 2.3355, "step": 151530 }, { "epoch": 0.5858112600702015, "grad_norm": 0.10299165546894073, "learning_rate": 0.002, "loss": 2.3349, "step": 151540 }, { "epoch": 0.5858499172735847, "grad_norm": 0.08635806292295456, "learning_rate": 0.002, "loss": 2.3437, "step": 151550 }, { "epoch": 0.5858885744769681, "grad_norm": 0.10262977331876755, "learning_rate": 0.002, "loss": 2.3472, "step": 151560 }, { "epoch": 0.5859272316803513, "grad_norm": 0.10760878771543503, "learning_rate": 0.002, "loss": 2.3562, "step": 151570 }, { "epoch": 0.5859658888837346, "grad_norm": 0.1183459609746933, "learning_rate": 0.002, "loss": 2.3238, "step": 151580 }, { "epoch": 0.5860045460871178, "grad_norm": 0.1195802241563797, "learning_rate": 0.002, "loss": 2.3594, "step": 151590 }, { "epoch": 0.5860432032905012, "grad_norm": 0.09660229086875916, "learning_rate": 0.002, "loss": 2.3462, "step": 151600 }, { "epoch": 0.5860818604938844, "grad_norm": 0.11083091050386429, "learning_rate": 0.002, "loss": 2.3546, "step": 151610 }, { "epoch": 0.5861205176972677, "grad_norm": 0.09059544652700424, "learning_rate": 0.002, "loss": 2.3408, "step": 151620 }, { "epoch": 0.586159174900651, "grad_norm": 0.10028998553752899, "learning_rate": 0.002, "loss": 2.3422, "step": 151630 }, { "epoch": 0.5861978321040343, "grad_norm": 0.11900894343852997, "learning_rate": 0.002, "loss": 2.3623, "step": 151640 }, { "epoch": 0.5862364893074176, "grad_norm": 0.10602059215307236, "learning_rate": 0.002, "loss": 2.3551, "step": 151650 }, { "epoch": 0.5862751465108008, "grad_norm": 0.10835479944944382, "learning_rate": 0.002, "loss": 2.3352, "step": 151660 }, { "epoch": 0.5863138037141841, "grad_norm": 0.09900346398353577, "learning_rate": 0.002, "loss": 2.3473, "step": 151670 }, { "epoch": 0.5863524609175674, "grad_norm": 0.10289320349693298, "learning_rate": 0.002, "loss": 2.3592, "step": 151680 }, { "epoch": 0.5863911181209507, "grad_norm": 0.12360788136720657, "learning_rate": 0.002, "loss": 2.3435, "step": 151690 }, { "epoch": 0.5864297753243339, "grad_norm": 0.0973445475101471, "learning_rate": 0.002, "loss": 2.3482, "step": 151700 }, { "epoch": 0.5864684325277172, "grad_norm": 0.10253198444843292, "learning_rate": 0.002, "loss": 2.3221, "step": 151710 }, { "epoch": 0.5865070897311004, "grad_norm": 0.2577599585056305, "learning_rate": 0.002, "loss": 2.3443, "step": 151720 }, { "epoch": 0.5865457469344838, "grad_norm": 0.13467490673065186, "learning_rate": 0.002, "loss": 2.348, "step": 151730 }, { "epoch": 0.586584404137867, "grad_norm": 0.09774551540613174, "learning_rate": 0.002, "loss": 2.3493, "step": 151740 }, { "epoch": 0.5866230613412503, "grad_norm": 0.10804309695959091, "learning_rate": 0.002, "loss": 2.3476, "step": 151750 }, { "epoch": 0.5866617185446336, "grad_norm": 0.10399198532104492, "learning_rate": 0.002, "loss": 2.3343, "step": 151760 }, { "epoch": 0.5867003757480169, "grad_norm": 0.11586041003465652, "learning_rate": 0.002, "loss": 2.3531, "step": 151770 }, { "epoch": 0.5867390329514002, "grad_norm": 0.11312390863895416, "learning_rate": 0.002, "loss": 2.3645, "step": 151780 }, { "epoch": 0.5867776901547834, "grad_norm": 0.10324378311634064, "learning_rate": 0.002, "loss": 2.3402, "step": 151790 }, { "epoch": 0.5868163473581667, "grad_norm": 0.08976956456899643, "learning_rate": 0.002, "loss": 2.341, "step": 151800 }, { "epoch": 0.58685500456155, "grad_norm": 0.10378418862819672, "learning_rate": 0.002, "loss": 2.3408, "step": 151810 }, { "epoch": 0.5868936617649333, "grad_norm": 0.11894010752439499, "learning_rate": 0.002, "loss": 2.358, "step": 151820 }, { "epoch": 0.5869323189683165, "grad_norm": 0.11267915368080139, "learning_rate": 0.002, "loss": 2.3414, "step": 151830 }, { "epoch": 0.5869709761716998, "grad_norm": 0.1251029670238495, "learning_rate": 0.002, "loss": 2.35, "step": 151840 }, { "epoch": 0.5870096333750832, "grad_norm": 0.11871767044067383, "learning_rate": 0.002, "loss": 2.3276, "step": 151850 }, { "epoch": 0.5870482905784664, "grad_norm": 0.11793194711208344, "learning_rate": 0.002, "loss": 2.347, "step": 151860 }, { "epoch": 0.5870869477818497, "grad_norm": 0.10210611671209335, "learning_rate": 0.002, "loss": 2.3369, "step": 151870 }, { "epoch": 0.5871256049852329, "grad_norm": 0.09311977028846741, "learning_rate": 0.002, "loss": 2.3539, "step": 151880 }, { "epoch": 0.5871642621886163, "grad_norm": 0.09820962697267532, "learning_rate": 0.002, "loss": 2.3588, "step": 151890 }, { "epoch": 0.5872029193919995, "grad_norm": 0.11874396353960037, "learning_rate": 0.002, "loss": 2.3414, "step": 151900 }, { "epoch": 0.5872415765953828, "grad_norm": 0.1036624014377594, "learning_rate": 0.002, "loss": 2.3539, "step": 151910 }, { "epoch": 0.587280233798766, "grad_norm": 0.11919917911291122, "learning_rate": 0.002, "loss": 2.3405, "step": 151920 }, { "epoch": 0.5873188910021493, "grad_norm": 0.12413368374109268, "learning_rate": 0.002, "loss": 2.3415, "step": 151930 }, { "epoch": 0.5873575482055327, "grad_norm": 0.11070192605257034, "learning_rate": 0.002, "loss": 2.3372, "step": 151940 }, { "epoch": 0.5873962054089159, "grad_norm": 0.11603335291147232, "learning_rate": 0.002, "loss": 2.3503, "step": 151950 }, { "epoch": 0.5874348626122992, "grad_norm": 0.09753865003585815, "learning_rate": 0.002, "loss": 2.351, "step": 151960 }, { "epoch": 0.5874735198156824, "grad_norm": 0.13039851188659668, "learning_rate": 0.002, "loss": 2.3387, "step": 151970 }, { "epoch": 0.5875121770190658, "grad_norm": 0.09103492647409439, "learning_rate": 0.002, "loss": 2.3279, "step": 151980 }, { "epoch": 0.587550834222449, "grad_norm": 0.1167258694767952, "learning_rate": 0.002, "loss": 2.331, "step": 151990 }, { "epoch": 0.5875894914258323, "grad_norm": 0.11920642852783203, "learning_rate": 0.002, "loss": 2.3603, "step": 152000 }, { "epoch": 0.5876281486292155, "grad_norm": 0.10152848064899445, "learning_rate": 0.002, "loss": 2.3513, "step": 152010 }, { "epoch": 0.5876668058325989, "grad_norm": 0.10228315740823746, "learning_rate": 0.002, "loss": 2.3408, "step": 152020 }, { "epoch": 0.5877054630359821, "grad_norm": 0.1047983169555664, "learning_rate": 0.002, "loss": 2.355, "step": 152030 }, { "epoch": 0.5877441202393654, "grad_norm": 0.11148735135793686, "learning_rate": 0.002, "loss": 2.3442, "step": 152040 }, { "epoch": 0.5877827774427486, "grad_norm": 0.10150796920061111, "learning_rate": 0.002, "loss": 2.3489, "step": 152050 }, { "epoch": 0.587821434646132, "grad_norm": 0.11171367019414902, "learning_rate": 0.002, "loss": 2.3404, "step": 152060 }, { "epoch": 0.5878600918495153, "grad_norm": 0.09901855885982513, "learning_rate": 0.002, "loss": 2.3598, "step": 152070 }, { "epoch": 0.5878987490528985, "grad_norm": 0.10765265673398972, "learning_rate": 0.002, "loss": 2.359, "step": 152080 }, { "epoch": 0.5879374062562818, "grad_norm": 0.12273464351892471, "learning_rate": 0.002, "loss": 2.3458, "step": 152090 }, { "epoch": 0.587976063459665, "grad_norm": 0.1203690692782402, "learning_rate": 0.002, "loss": 2.3441, "step": 152100 }, { "epoch": 0.5880147206630484, "grad_norm": 0.12574422359466553, "learning_rate": 0.002, "loss": 2.3338, "step": 152110 }, { "epoch": 0.5880533778664316, "grad_norm": 0.09954094886779785, "learning_rate": 0.002, "loss": 2.3406, "step": 152120 }, { "epoch": 0.5880920350698149, "grad_norm": 0.09980536252260208, "learning_rate": 0.002, "loss": 2.34, "step": 152130 }, { "epoch": 0.5881306922731981, "grad_norm": 0.10849287360906601, "learning_rate": 0.002, "loss": 2.3379, "step": 152140 }, { "epoch": 0.5881693494765815, "grad_norm": 0.11069836467504501, "learning_rate": 0.002, "loss": 2.3577, "step": 152150 }, { "epoch": 0.5882080066799648, "grad_norm": 0.09529554843902588, "learning_rate": 0.002, "loss": 2.3484, "step": 152160 }, { "epoch": 0.588246663883348, "grad_norm": 0.10251989960670471, "learning_rate": 0.002, "loss": 2.327, "step": 152170 }, { "epoch": 0.5882853210867313, "grad_norm": 0.10750257223844528, "learning_rate": 0.002, "loss": 2.3348, "step": 152180 }, { "epoch": 0.5883239782901146, "grad_norm": 0.1048988401889801, "learning_rate": 0.002, "loss": 2.3404, "step": 152190 }, { "epoch": 0.5883626354934979, "grad_norm": 0.10716889053583145, "learning_rate": 0.002, "loss": 2.334, "step": 152200 }, { "epoch": 0.5884012926968811, "grad_norm": 0.09506648778915405, "learning_rate": 0.002, "loss": 2.3573, "step": 152210 }, { "epoch": 0.5884399499002644, "grad_norm": 0.12282132357358932, "learning_rate": 0.002, "loss": 2.3491, "step": 152220 }, { "epoch": 0.5884786071036477, "grad_norm": 0.1068616658449173, "learning_rate": 0.002, "loss": 2.3318, "step": 152230 }, { "epoch": 0.588517264307031, "grad_norm": 0.1003294587135315, "learning_rate": 0.002, "loss": 2.3584, "step": 152240 }, { "epoch": 0.5885559215104142, "grad_norm": 0.10827738791704178, "learning_rate": 0.002, "loss": 2.3514, "step": 152250 }, { "epoch": 0.5885945787137975, "grad_norm": 0.10909453779459, "learning_rate": 0.002, "loss": 2.3437, "step": 152260 }, { "epoch": 0.5886332359171808, "grad_norm": 0.1163821890950203, "learning_rate": 0.002, "loss": 2.3337, "step": 152270 }, { "epoch": 0.5886718931205641, "grad_norm": 0.09663323312997818, "learning_rate": 0.002, "loss": 2.3297, "step": 152280 }, { "epoch": 0.5887105503239474, "grad_norm": 0.0876787081360817, "learning_rate": 0.002, "loss": 2.3445, "step": 152290 }, { "epoch": 0.5887492075273306, "grad_norm": 0.09570583701133728, "learning_rate": 0.002, "loss": 2.3373, "step": 152300 }, { "epoch": 0.5887878647307139, "grad_norm": 0.3451126217842102, "learning_rate": 0.002, "loss": 2.3508, "step": 152310 }, { "epoch": 0.5888265219340972, "grad_norm": 0.11119550466537476, "learning_rate": 0.002, "loss": 2.3419, "step": 152320 }, { "epoch": 0.5888651791374805, "grad_norm": 0.10163173079490662, "learning_rate": 0.002, "loss": 2.3414, "step": 152330 }, { "epoch": 0.5889038363408637, "grad_norm": 0.10424763709306717, "learning_rate": 0.002, "loss": 2.3612, "step": 152340 }, { "epoch": 0.588942493544247, "grad_norm": 0.10825823992490768, "learning_rate": 0.002, "loss": 2.3388, "step": 152350 }, { "epoch": 0.5889811507476304, "grad_norm": 0.1267893761396408, "learning_rate": 0.002, "loss": 2.366, "step": 152360 }, { "epoch": 0.5890198079510136, "grad_norm": 0.10046001523733139, "learning_rate": 0.002, "loss": 2.3322, "step": 152370 }, { "epoch": 0.5890584651543969, "grad_norm": 0.09802465885877609, "learning_rate": 0.002, "loss": 2.345, "step": 152380 }, { "epoch": 0.5890971223577801, "grad_norm": 0.09976851940155029, "learning_rate": 0.002, "loss": 2.354, "step": 152390 }, { "epoch": 0.5891357795611635, "grad_norm": 0.11055140197277069, "learning_rate": 0.002, "loss": 2.3543, "step": 152400 }, { "epoch": 0.5891744367645467, "grad_norm": 0.10662383586168289, "learning_rate": 0.002, "loss": 2.3439, "step": 152410 }, { "epoch": 0.58921309396793, "grad_norm": 0.11698868870735168, "learning_rate": 0.002, "loss": 2.3469, "step": 152420 }, { "epoch": 0.5892517511713132, "grad_norm": 0.10271777957677841, "learning_rate": 0.002, "loss": 2.3584, "step": 152430 }, { "epoch": 0.5892904083746966, "grad_norm": 0.10462356358766556, "learning_rate": 0.002, "loss": 2.3535, "step": 152440 }, { "epoch": 0.5893290655780798, "grad_norm": 0.13692766427993774, "learning_rate": 0.002, "loss": 2.3385, "step": 152450 }, { "epoch": 0.5893677227814631, "grad_norm": 0.09228157997131348, "learning_rate": 0.002, "loss": 2.3485, "step": 152460 }, { "epoch": 0.5894063799848464, "grad_norm": 0.1338038593530655, "learning_rate": 0.002, "loss": 2.3436, "step": 152470 }, { "epoch": 0.5894450371882296, "grad_norm": 0.10304766148328781, "learning_rate": 0.002, "loss": 2.3331, "step": 152480 }, { "epoch": 0.589483694391613, "grad_norm": 0.12008009105920792, "learning_rate": 0.002, "loss": 2.3455, "step": 152490 }, { "epoch": 0.5895223515949962, "grad_norm": 0.11367745697498322, "learning_rate": 0.002, "loss": 2.3417, "step": 152500 }, { "epoch": 0.5895610087983795, "grad_norm": 0.10777866095304489, "learning_rate": 0.002, "loss": 2.3426, "step": 152510 }, { "epoch": 0.5895996660017627, "grad_norm": 0.09322947263717651, "learning_rate": 0.002, "loss": 2.3497, "step": 152520 }, { "epoch": 0.5896383232051461, "grad_norm": 0.10844715684652328, "learning_rate": 0.002, "loss": 2.33, "step": 152530 }, { "epoch": 0.5896769804085293, "grad_norm": 0.11750151962041855, "learning_rate": 0.002, "loss": 2.3285, "step": 152540 }, { "epoch": 0.5897156376119126, "grad_norm": 0.09563729166984558, "learning_rate": 0.002, "loss": 2.3426, "step": 152550 }, { "epoch": 0.5897542948152958, "grad_norm": 0.13296782970428467, "learning_rate": 0.002, "loss": 2.3369, "step": 152560 }, { "epoch": 0.5897929520186792, "grad_norm": 0.11598195135593414, "learning_rate": 0.002, "loss": 2.3365, "step": 152570 }, { "epoch": 0.5898316092220625, "grad_norm": 0.11028944700956345, "learning_rate": 0.002, "loss": 2.3429, "step": 152580 }, { "epoch": 0.5898702664254457, "grad_norm": 0.10078743100166321, "learning_rate": 0.002, "loss": 2.3354, "step": 152590 }, { "epoch": 0.589908923628829, "grad_norm": 0.11776949465274811, "learning_rate": 0.002, "loss": 2.3512, "step": 152600 }, { "epoch": 0.5899475808322123, "grad_norm": 0.10884489864110947, "learning_rate": 0.002, "loss": 2.3473, "step": 152610 }, { "epoch": 0.5899862380355956, "grad_norm": 0.10422182828187943, "learning_rate": 0.002, "loss": 2.3438, "step": 152620 }, { "epoch": 0.5900248952389788, "grad_norm": 0.10939929634332657, "learning_rate": 0.002, "loss": 2.34, "step": 152630 }, { "epoch": 0.5900635524423621, "grad_norm": 0.10515302419662476, "learning_rate": 0.002, "loss": 2.3488, "step": 152640 }, { "epoch": 0.5901022096457453, "grad_norm": 0.10827838629484177, "learning_rate": 0.002, "loss": 2.3385, "step": 152650 }, { "epoch": 0.5901408668491287, "grad_norm": 0.09868855774402618, "learning_rate": 0.002, "loss": 2.3359, "step": 152660 }, { "epoch": 0.590179524052512, "grad_norm": 0.11079439520835876, "learning_rate": 0.002, "loss": 2.3634, "step": 152670 }, { "epoch": 0.5902181812558952, "grad_norm": 0.12535911798477173, "learning_rate": 0.002, "loss": 2.3417, "step": 152680 }, { "epoch": 0.5902568384592785, "grad_norm": 0.24511437118053436, "learning_rate": 0.002, "loss": 2.3471, "step": 152690 }, { "epoch": 0.5902954956626618, "grad_norm": 0.10732803493738174, "learning_rate": 0.002, "loss": 2.3499, "step": 152700 }, { "epoch": 0.5903341528660451, "grad_norm": 0.113482765853405, "learning_rate": 0.002, "loss": 2.362, "step": 152710 }, { "epoch": 0.5903728100694283, "grad_norm": 0.0953284204006195, "learning_rate": 0.002, "loss": 2.3608, "step": 152720 }, { "epoch": 0.5904114672728116, "grad_norm": 0.12417623400688171, "learning_rate": 0.002, "loss": 2.3399, "step": 152730 }, { "epoch": 0.5904501244761949, "grad_norm": 0.10904087871313095, "learning_rate": 0.002, "loss": 2.3613, "step": 152740 }, { "epoch": 0.5904887816795782, "grad_norm": 0.11377087235450745, "learning_rate": 0.002, "loss": 2.3347, "step": 152750 }, { "epoch": 0.5905274388829614, "grad_norm": 0.09927655011415482, "learning_rate": 0.002, "loss": 2.3311, "step": 152760 }, { "epoch": 0.5905660960863447, "grad_norm": 0.12110207229852676, "learning_rate": 0.002, "loss": 2.3447, "step": 152770 }, { "epoch": 0.5906047532897281, "grad_norm": 0.1045595183968544, "learning_rate": 0.002, "loss": 2.3496, "step": 152780 }, { "epoch": 0.5906434104931113, "grad_norm": 0.09562791883945465, "learning_rate": 0.002, "loss": 2.3291, "step": 152790 }, { "epoch": 0.5906820676964946, "grad_norm": 0.09046997874975204, "learning_rate": 0.002, "loss": 2.349, "step": 152800 }, { "epoch": 0.5907207248998778, "grad_norm": 0.12062862515449524, "learning_rate": 0.002, "loss": 2.3351, "step": 152810 }, { "epoch": 0.5907593821032612, "grad_norm": 0.11804115027189255, "learning_rate": 0.002, "loss": 2.3377, "step": 152820 }, { "epoch": 0.5907980393066444, "grad_norm": 0.10457627475261688, "learning_rate": 0.002, "loss": 2.3303, "step": 152830 }, { "epoch": 0.5908366965100277, "grad_norm": 0.09671007841825485, "learning_rate": 0.002, "loss": 2.3524, "step": 152840 }, { "epoch": 0.5908753537134109, "grad_norm": 0.11309293657541275, "learning_rate": 0.002, "loss": 2.351, "step": 152850 }, { "epoch": 0.5909140109167942, "grad_norm": 0.11583933979272842, "learning_rate": 0.002, "loss": 2.3396, "step": 152860 }, { "epoch": 0.5909526681201775, "grad_norm": 0.10668080300092697, "learning_rate": 0.002, "loss": 2.3586, "step": 152870 }, { "epoch": 0.5909913253235608, "grad_norm": 0.09829465299844742, "learning_rate": 0.002, "loss": 2.3417, "step": 152880 }, { "epoch": 0.591029982526944, "grad_norm": 0.11730373650789261, "learning_rate": 0.002, "loss": 2.3428, "step": 152890 }, { "epoch": 0.5910686397303273, "grad_norm": 0.11244652420282364, "learning_rate": 0.002, "loss": 2.3502, "step": 152900 }, { "epoch": 0.5911072969337107, "grad_norm": 0.10793368518352509, "learning_rate": 0.002, "loss": 2.3359, "step": 152910 }, { "epoch": 0.5911459541370939, "grad_norm": 0.09984087944030762, "learning_rate": 0.002, "loss": 2.3403, "step": 152920 }, { "epoch": 0.5911846113404772, "grad_norm": 0.11790379136800766, "learning_rate": 0.002, "loss": 2.3447, "step": 152930 }, { "epoch": 0.5912232685438604, "grad_norm": 0.11779079586267471, "learning_rate": 0.002, "loss": 2.334, "step": 152940 }, { "epoch": 0.5912619257472438, "grad_norm": 0.11105469614267349, "learning_rate": 0.002, "loss": 2.3387, "step": 152950 }, { "epoch": 0.591300582950627, "grad_norm": 0.11927516013383865, "learning_rate": 0.002, "loss": 2.3412, "step": 152960 }, { "epoch": 0.5913392401540103, "grad_norm": 0.12462148070335388, "learning_rate": 0.002, "loss": 2.3514, "step": 152970 }, { "epoch": 0.5913778973573935, "grad_norm": 0.11625771969556808, "learning_rate": 0.002, "loss": 2.3503, "step": 152980 }, { "epoch": 0.5914165545607769, "grad_norm": 0.10062477737665176, "learning_rate": 0.002, "loss": 2.3519, "step": 152990 }, { "epoch": 0.5914552117641602, "grad_norm": 0.10655297338962555, "learning_rate": 0.002, "loss": 2.3354, "step": 153000 }, { "epoch": 0.5914938689675434, "grad_norm": 0.11452042311429977, "learning_rate": 0.002, "loss": 2.3603, "step": 153010 }, { "epoch": 0.5915325261709267, "grad_norm": 0.10042541474103928, "learning_rate": 0.002, "loss": 2.348, "step": 153020 }, { "epoch": 0.5915711833743099, "grad_norm": 0.09965793043375015, "learning_rate": 0.002, "loss": 2.3302, "step": 153030 }, { "epoch": 0.5916098405776933, "grad_norm": 0.10968238115310669, "learning_rate": 0.002, "loss": 2.3397, "step": 153040 }, { "epoch": 0.5916484977810765, "grad_norm": 0.12298446893692017, "learning_rate": 0.002, "loss": 2.3363, "step": 153050 }, { "epoch": 0.5916871549844598, "grad_norm": 0.12933231890201569, "learning_rate": 0.002, "loss": 2.3459, "step": 153060 }, { "epoch": 0.591725812187843, "grad_norm": 0.09767874330282211, "learning_rate": 0.002, "loss": 2.3282, "step": 153070 }, { "epoch": 0.5917644693912264, "grad_norm": 0.11389636248350143, "learning_rate": 0.002, "loss": 2.3352, "step": 153080 }, { "epoch": 0.5918031265946097, "grad_norm": 0.09770804643630981, "learning_rate": 0.002, "loss": 2.3472, "step": 153090 }, { "epoch": 0.5918417837979929, "grad_norm": 0.09838932752609253, "learning_rate": 0.002, "loss": 2.3447, "step": 153100 }, { "epoch": 0.5918804410013762, "grad_norm": 0.12737208604812622, "learning_rate": 0.002, "loss": 2.3585, "step": 153110 }, { "epoch": 0.5919190982047595, "grad_norm": 0.14221762120723724, "learning_rate": 0.002, "loss": 2.3463, "step": 153120 }, { "epoch": 0.5919577554081428, "grad_norm": 0.10190019011497498, "learning_rate": 0.002, "loss": 2.3298, "step": 153130 }, { "epoch": 0.591996412611526, "grad_norm": 0.12370285391807556, "learning_rate": 0.002, "loss": 2.3397, "step": 153140 }, { "epoch": 0.5920350698149093, "grad_norm": 0.0968901664018631, "learning_rate": 0.002, "loss": 2.3401, "step": 153150 }, { "epoch": 0.5920737270182926, "grad_norm": 0.10463910549879074, "learning_rate": 0.002, "loss": 2.3442, "step": 153160 }, { "epoch": 0.5921123842216759, "grad_norm": 0.11504076421260834, "learning_rate": 0.002, "loss": 2.3201, "step": 153170 }, { "epoch": 0.5921510414250591, "grad_norm": 0.110844686627388, "learning_rate": 0.002, "loss": 2.3467, "step": 153180 }, { "epoch": 0.5921896986284424, "grad_norm": 0.10349141061306, "learning_rate": 0.002, "loss": 2.3416, "step": 153190 }, { "epoch": 0.5922283558318256, "grad_norm": 0.1068180575966835, "learning_rate": 0.002, "loss": 2.3527, "step": 153200 }, { "epoch": 0.592267013035209, "grad_norm": 0.10640236735343933, "learning_rate": 0.002, "loss": 2.3452, "step": 153210 }, { "epoch": 0.5923056702385923, "grad_norm": 0.12542495131492615, "learning_rate": 0.002, "loss": 2.3422, "step": 153220 }, { "epoch": 0.5923443274419755, "grad_norm": 0.12914881110191345, "learning_rate": 0.002, "loss": 2.3365, "step": 153230 }, { "epoch": 0.5923829846453588, "grad_norm": 0.11109361052513123, "learning_rate": 0.002, "loss": 2.3546, "step": 153240 }, { "epoch": 0.5924216418487421, "grad_norm": 0.09706513583660126, "learning_rate": 0.002, "loss": 2.3574, "step": 153250 }, { "epoch": 0.5924602990521254, "grad_norm": 0.10857724398374557, "learning_rate": 0.002, "loss": 2.3067, "step": 153260 }, { "epoch": 0.5924989562555086, "grad_norm": 0.11418808996677399, "learning_rate": 0.002, "loss": 2.3474, "step": 153270 }, { "epoch": 0.5925376134588919, "grad_norm": 0.11678769439458847, "learning_rate": 0.002, "loss": 2.3554, "step": 153280 }, { "epoch": 0.5925762706622753, "grad_norm": 0.134332537651062, "learning_rate": 0.002, "loss": 2.338, "step": 153290 }, { "epoch": 0.5926149278656585, "grad_norm": 0.10128011554479599, "learning_rate": 0.002, "loss": 2.3403, "step": 153300 }, { "epoch": 0.5926535850690418, "grad_norm": 0.1074603945016861, "learning_rate": 0.002, "loss": 2.3333, "step": 153310 }, { "epoch": 0.592692242272425, "grad_norm": 0.1251911222934723, "learning_rate": 0.002, "loss": 2.353, "step": 153320 }, { "epoch": 0.5927308994758084, "grad_norm": 0.09642443805932999, "learning_rate": 0.002, "loss": 2.3428, "step": 153330 }, { "epoch": 0.5927695566791916, "grad_norm": 0.11492682993412018, "learning_rate": 0.002, "loss": 2.3492, "step": 153340 }, { "epoch": 0.5928082138825749, "grad_norm": 0.0965128168463707, "learning_rate": 0.002, "loss": 2.3447, "step": 153350 }, { "epoch": 0.5928468710859581, "grad_norm": 0.12206316739320755, "learning_rate": 0.002, "loss": 2.3242, "step": 153360 }, { "epoch": 0.5928855282893415, "grad_norm": 0.10395307838916779, "learning_rate": 0.002, "loss": 2.3395, "step": 153370 }, { "epoch": 0.5929241854927247, "grad_norm": 0.12461967021226883, "learning_rate": 0.002, "loss": 2.3513, "step": 153380 }, { "epoch": 0.592962842696108, "grad_norm": 0.11842044442892075, "learning_rate": 0.002, "loss": 2.3455, "step": 153390 }, { "epoch": 0.5930014998994912, "grad_norm": 0.1287367045879364, "learning_rate": 0.002, "loss": 2.33, "step": 153400 }, { "epoch": 0.5930401571028745, "grad_norm": 0.11194679886102676, "learning_rate": 0.002, "loss": 2.3407, "step": 153410 }, { "epoch": 0.5930788143062579, "grad_norm": 0.10873754322528839, "learning_rate": 0.002, "loss": 2.3406, "step": 153420 }, { "epoch": 0.5931174715096411, "grad_norm": 0.10627902299165726, "learning_rate": 0.002, "loss": 2.33, "step": 153430 }, { "epoch": 0.5931561287130244, "grad_norm": 0.123879075050354, "learning_rate": 0.002, "loss": 2.3433, "step": 153440 }, { "epoch": 0.5931947859164076, "grad_norm": 0.09307146072387695, "learning_rate": 0.002, "loss": 2.3416, "step": 153450 }, { "epoch": 0.593233443119791, "grad_norm": 0.09851375222206116, "learning_rate": 0.002, "loss": 2.3525, "step": 153460 }, { "epoch": 0.5932721003231742, "grad_norm": 0.11978261172771454, "learning_rate": 0.002, "loss": 2.3332, "step": 153470 }, { "epoch": 0.5933107575265575, "grad_norm": 0.09971176832914352, "learning_rate": 0.002, "loss": 2.3444, "step": 153480 }, { "epoch": 0.5933494147299407, "grad_norm": 0.11914601922035217, "learning_rate": 0.002, "loss": 2.3239, "step": 153490 }, { "epoch": 0.5933880719333241, "grad_norm": 0.09303689748048782, "learning_rate": 0.002, "loss": 2.3327, "step": 153500 }, { "epoch": 0.5934267291367074, "grad_norm": 0.10256947576999664, "learning_rate": 0.002, "loss": 2.341, "step": 153510 }, { "epoch": 0.5934653863400906, "grad_norm": 0.13114187121391296, "learning_rate": 0.002, "loss": 2.3302, "step": 153520 }, { "epoch": 0.5935040435434739, "grad_norm": 0.0972515270113945, "learning_rate": 0.002, "loss": 2.3457, "step": 153530 }, { "epoch": 0.5935427007468572, "grad_norm": 0.11419462412595749, "learning_rate": 0.002, "loss": 2.3408, "step": 153540 }, { "epoch": 0.5935813579502405, "grad_norm": 0.10986484587192535, "learning_rate": 0.002, "loss": 2.3449, "step": 153550 }, { "epoch": 0.5936200151536237, "grad_norm": 0.09035072475671768, "learning_rate": 0.002, "loss": 2.3312, "step": 153560 }, { "epoch": 0.593658672357007, "grad_norm": 0.11297523230314255, "learning_rate": 0.002, "loss": 2.3243, "step": 153570 }, { "epoch": 0.5936973295603902, "grad_norm": 0.11120027303695679, "learning_rate": 0.002, "loss": 2.3376, "step": 153580 }, { "epoch": 0.5937359867637736, "grad_norm": 0.08788348734378815, "learning_rate": 0.002, "loss": 2.3546, "step": 153590 }, { "epoch": 0.5937746439671568, "grad_norm": 0.10375424474477768, "learning_rate": 0.002, "loss": 2.3503, "step": 153600 }, { "epoch": 0.5938133011705401, "grad_norm": 0.1176786869764328, "learning_rate": 0.002, "loss": 2.3502, "step": 153610 }, { "epoch": 0.5938519583739233, "grad_norm": 0.09632980823516846, "learning_rate": 0.002, "loss": 2.3358, "step": 153620 }, { "epoch": 0.5938906155773067, "grad_norm": 0.09856001287698746, "learning_rate": 0.002, "loss": 2.3417, "step": 153630 }, { "epoch": 0.59392927278069, "grad_norm": 0.11624328047037125, "learning_rate": 0.002, "loss": 2.3361, "step": 153640 }, { "epoch": 0.5939679299840732, "grad_norm": 0.102755106985569, "learning_rate": 0.002, "loss": 2.3488, "step": 153650 }, { "epoch": 0.5940065871874565, "grad_norm": 0.10109134018421173, "learning_rate": 0.002, "loss": 2.3393, "step": 153660 }, { "epoch": 0.5940452443908398, "grad_norm": 0.10665129125118256, "learning_rate": 0.002, "loss": 2.3404, "step": 153670 }, { "epoch": 0.5940839015942231, "grad_norm": 0.1010684221982956, "learning_rate": 0.002, "loss": 2.3339, "step": 153680 }, { "epoch": 0.5941225587976063, "grad_norm": 0.10469356924295425, "learning_rate": 0.002, "loss": 2.3374, "step": 153690 }, { "epoch": 0.5941612160009896, "grad_norm": 0.12337398529052734, "learning_rate": 0.002, "loss": 2.3298, "step": 153700 }, { "epoch": 0.594199873204373, "grad_norm": 0.09571670740842819, "learning_rate": 0.002, "loss": 2.3575, "step": 153710 }, { "epoch": 0.5942385304077562, "grad_norm": 0.11873577535152435, "learning_rate": 0.002, "loss": 2.3409, "step": 153720 }, { "epoch": 0.5942771876111395, "grad_norm": 0.11242678761482239, "learning_rate": 0.002, "loss": 2.3548, "step": 153730 }, { "epoch": 0.5943158448145227, "grad_norm": 0.10509809851646423, "learning_rate": 0.002, "loss": 2.3444, "step": 153740 }, { "epoch": 0.594354502017906, "grad_norm": 0.10575684159994125, "learning_rate": 0.002, "loss": 2.3275, "step": 153750 }, { "epoch": 0.5943931592212893, "grad_norm": 0.12107384204864502, "learning_rate": 0.002, "loss": 2.3486, "step": 153760 }, { "epoch": 0.5944318164246726, "grad_norm": 0.10724588483572006, "learning_rate": 0.002, "loss": 2.3343, "step": 153770 }, { "epoch": 0.5944704736280558, "grad_norm": 0.10052936524152756, "learning_rate": 0.002, "loss": 2.3454, "step": 153780 }, { "epoch": 0.5945091308314391, "grad_norm": 0.10812316834926605, "learning_rate": 0.002, "loss": 2.3471, "step": 153790 }, { "epoch": 0.5945477880348224, "grad_norm": 0.09277022629976273, "learning_rate": 0.002, "loss": 2.3469, "step": 153800 }, { "epoch": 0.5945864452382057, "grad_norm": 0.09093903750181198, "learning_rate": 0.002, "loss": 2.3367, "step": 153810 }, { "epoch": 0.594625102441589, "grad_norm": 0.11882476508617401, "learning_rate": 0.002, "loss": 2.3445, "step": 153820 }, { "epoch": 0.5946637596449722, "grad_norm": 0.09744821488857269, "learning_rate": 0.002, "loss": 2.333, "step": 153830 }, { "epoch": 0.5947024168483556, "grad_norm": 0.1002984419465065, "learning_rate": 0.002, "loss": 2.3459, "step": 153840 }, { "epoch": 0.5947410740517388, "grad_norm": 0.10048675537109375, "learning_rate": 0.002, "loss": 2.3369, "step": 153850 }, { "epoch": 0.5947797312551221, "grad_norm": 0.12754768133163452, "learning_rate": 0.002, "loss": 2.3423, "step": 153860 }, { "epoch": 0.5948183884585053, "grad_norm": 0.13870207965373993, "learning_rate": 0.002, "loss": 2.3237, "step": 153870 }, { "epoch": 0.5948570456618887, "grad_norm": 0.0983053594827652, "learning_rate": 0.002, "loss": 2.3461, "step": 153880 }, { "epoch": 0.5948957028652719, "grad_norm": 0.10627438127994537, "learning_rate": 0.002, "loss": 2.342, "step": 153890 }, { "epoch": 0.5949343600686552, "grad_norm": 0.10147716104984283, "learning_rate": 0.002, "loss": 2.3417, "step": 153900 }, { "epoch": 0.5949730172720384, "grad_norm": 0.11695457249879837, "learning_rate": 0.002, "loss": 2.3434, "step": 153910 }, { "epoch": 0.5950116744754218, "grad_norm": 0.10442083328962326, "learning_rate": 0.002, "loss": 2.336, "step": 153920 }, { "epoch": 0.5950503316788051, "grad_norm": 0.10922045260667801, "learning_rate": 0.002, "loss": 2.3446, "step": 153930 }, { "epoch": 0.5950889888821883, "grad_norm": 0.09638384729623795, "learning_rate": 0.002, "loss": 2.3451, "step": 153940 }, { "epoch": 0.5951276460855716, "grad_norm": 0.09987162798643112, "learning_rate": 0.002, "loss": 2.3488, "step": 153950 }, { "epoch": 0.5951663032889548, "grad_norm": 0.11333297193050385, "learning_rate": 0.002, "loss": 2.3275, "step": 153960 }, { "epoch": 0.5952049604923382, "grad_norm": 0.11005302518606186, "learning_rate": 0.002, "loss": 2.3478, "step": 153970 }, { "epoch": 0.5952436176957214, "grad_norm": 0.09839659184217453, "learning_rate": 0.002, "loss": 2.3594, "step": 153980 }, { "epoch": 0.5952822748991047, "grad_norm": 0.1176500916481018, "learning_rate": 0.002, "loss": 2.3441, "step": 153990 }, { "epoch": 0.5953209321024879, "grad_norm": 0.1214519664645195, "learning_rate": 0.002, "loss": 2.3395, "step": 154000 }, { "epoch": 0.5953595893058713, "grad_norm": 0.10236170887947083, "learning_rate": 0.002, "loss": 2.352, "step": 154010 }, { "epoch": 0.5953982465092545, "grad_norm": 0.11794347316026688, "learning_rate": 0.002, "loss": 2.3385, "step": 154020 }, { "epoch": 0.5954369037126378, "grad_norm": 0.1112416610121727, "learning_rate": 0.002, "loss": 2.3329, "step": 154030 }, { "epoch": 0.595475560916021, "grad_norm": 0.09213864058256149, "learning_rate": 0.002, "loss": 2.3331, "step": 154040 }, { "epoch": 0.5955142181194044, "grad_norm": 0.11961939930915833, "learning_rate": 0.002, "loss": 2.341, "step": 154050 }, { "epoch": 0.5955528753227877, "grad_norm": 0.10888458788394928, "learning_rate": 0.002, "loss": 2.3432, "step": 154060 }, { "epoch": 0.5955915325261709, "grad_norm": 0.13337060809135437, "learning_rate": 0.002, "loss": 2.3447, "step": 154070 }, { "epoch": 0.5956301897295542, "grad_norm": 0.12271896749734879, "learning_rate": 0.002, "loss": 2.3504, "step": 154080 }, { "epoch": 0.5956688469329375, "grad_norm": 0.114785335958004, "learning_rate": 0.002, "loss": 2.3558, "step": 154090 }, { "epoch": 0.5957075041363208, "grad_norm": 0.11995737999677658, "learning_rate": 0.002, "loss": 2.3567, "step": 154100 }, { "epoch": 0.595746161339704, "grad_norm": 0.11258647590875626, "learning_rate": 0.002, "loss": 2.3527, "step": 154110 }, { "epoch": 0.5957848185430873, "grad_norm": 0.12628565728664398, "learning_rate": 0.002, "loss": 2.3577, "step": 154120 }, { "epoch": 0.5958234757464705, "grad_norm": 0.1209024041891098, "learning_rate": 0.002, "loss": 2.3401, "step": 154130 }, { "epoch": 0.5958621329498539, "grad_norm": 0.10436531901359558, "learning_rate": 0.002, "loss": 2.3294, "step": 154140 }, { "epoch": 0.5959007901532372, "grad_norm": 0.11716852337121964, "learning_rate": 0.002, "loss": 2.3494, "step": 154150 }, { "epoch": 0.5959394473566204, "grad_norm": 0.10218223929405212, "learning_rate": 0.002, "loss": 2.3504, "step": 154160 }, { "epoch": 0.5959781045600037, "grad_norm": 0.12400393187999725, "learning_rate": 0.002, "loss": 2.3403, "step": 154170 }, { "epoch": 0.596016761763387, "grad_norm": 0.1067901998758316, "learning_rate": 0.002, "loss": 2.3296, "step": 154180 }, { "epoch": 0.5960554189667703, "grad_norm": 0.10009181499481201, "learning_rate": 0.002, "loss": 2.3386, "step": 154190 }, { "epoch": 0.5960940761701535, "grad_norm": 0.1042267307639122, "learning_rate": 0.002, "loss": 2.3482, "step": 154200 }, { "epoch": 0.5961327333735368, "grad_norm": 0.1324894279241562, "learning_rate": 0.002, "loss": 2.3383, "step": 154210 }, { "epoch": 0.5961713905769201, "grad_norm": 0.11904510855674744, "learning_rate": 0.002, "loss": 2.346, "step": 154220 }, { "epoch": 0.5962100477803034, "grad_norm": 0.0989355519413948, "learning_rate": 0.002, "loss": 2.3468, "step": 154230 }, { "epoch": 0.5962487049836867, "grad_norm": 0.12672419846057892, "learning_rate": 0.002, "loss": 2.3374, "step": 154240 }, { "epoch": 0.5962873621870699, "grad_norm": 0.11545150727033615, "learning_rate": 0.002, "loss": 2.3428, "step": 154250 }, { "epoch": 0.5963260193904533, "grad_norm": 0.11972194910049438, "learning_rate": 0.002, "loss": 2.3421, "step": 154260 }, { "epoch": 0.5963646765938365, "grad_norm": 0.13377872109413147, "learning_rate": 0.002, "loss": 2.3399, "step": 154270 }, { "epoch": 0.5964033337972198, "grad_norm": 0.10976868122816086, "learning_rate": 0.002, "loss": 2.3483, "step": 154280 }, { "epoch": 0.596441991000603, "grad_norm": 0.1025083065032959, "learning_rate": 0.002, "loss": 2.3418, "step": 154290 }, { "epoch": 0.5964806482039864, "grad_norm": 0.09834298491477966, "learning_rate": 0.002, "loss": 2.3388, "step": 154300 }, { "epoch": 0.5965193054073696, "grad_norm": 0.12274927645921707, "learning_rate": 0.002, "loss": 2.3436, "step": 154310 }, { "epoch": 0.5965579626107529, "grad_norm": 0.09800335764884949, "learning_rate": 0.002, "loss": 2.3541, "step": 154320 }, { "epoch": 0.5965966198141361, "grad_norm": 0.09520439058542252, "learning_rate": 0.002, "loss": 2.3515, "step": 154330 }, { "epoch": 0.5966352770175194, "grad_norm": 0.10089229792356491, "learning_rate": 0.002, "loss": 2.3394, "step": 154340 }, { "epoch": 0.5966739342209028, "grad_norm": 0.11390987038612366, "learning_rate": 0.002, "loss": 2.3453, "step": 154350 }, { "epoch": 0.596712591424286, "grad_norm": 0.09597501158714294, "learning_rate": 0.002, "loss": 2.3572, "step": 154360 }, { "epoch": 0.5967512486276693, "grad_norm": 0.12074465304613113, "learning_rate": 0.002, "loss": 2.3383, "step": 154370 }, { "epoch": 0.5967899058310525, "grad_norm": 0.13768284022808075, "learning_rate": 0.002, "loss": 2.3487, "step": 154380 }, { "epoch": 0.5968285630344359, "grad_norm": 0.11336904764175415, "learning_rate": 0.002, "loss": 2.3437, "step": 154390 }, { "epoch": 0.5968672202378191, "grad_norm": 0.1107833981513977, "learning_rate": 0.002, "loss": 2.3529, "step": 154400 }, { "epoch": 0.5969058774412024, "grad_norm": 0.10331069678068161, "learning_rate": 0.002, "loss": 2.3394, "step": 154410 }, { "epoch": 0.5969445346445856, "grad_norm": 0.11662972718477249, "learning_rate": 0.002, "loss": 2.3454, "step": 154420 }, { "epoch": 0.596983191847969, "grad_norm": 0.10128601640462875, "learning_rate": 0.002, "loss": 2.3383, "step": 154430 }, { "epoch": 0.5970218490513522, "grad_norm": 0.09762250632047653, "learning_rate": 0.002, "loss": 2.3435, "step": 154440 }, { "epoch": 0.5970605062547355, "grad_norm": 0.10796613991260529, "learning_rate": 0.002, "loss": 2.3397, "step": 154450 }, { "epoch": 0.5970991634581188, "grad_norm": 0.13601264357566833, "learning_rate": 0.002, "loss": 2.3476, "step": 154460 }, { "epoch": 0.5971378206615021, "grad_norm": 0.10176949948072433, "learning_rate": 0.002, "loss": 2.3762, "step": 154470 }, { "epoch": 0.5971764778648854, "grad_norm": 0.10595223307609558, "learning_rate": 0.002, "loss": 2.3494, "step": 154480 }, { "epoch": 0.5972151350682686, "grad_norm": 0.10171402990818024, "learning_rate": 0.002, "loss": 2.3461, "step": 154490 }, { "epoch": 0.5972537922716519, "grad_norm": 0.11845794320106506, "learning_rate": 0.002, "loss": 2.3399, "step": 154500 }, { "epoch": 0.5972924494750351, "grad_norm": 0.09843221306800842, "learning_rate": 0.002, "loss": 2.3474, "step": 154510 }, { "epoch": 0.5973311066784185, "grad_norm": 0.09988819062709808, "learning_rate": 0.002, "loss": 2.3422, "step": 154520 }, { "epoch": 0.5973697638818017, "grad_norm": 0.11028525233268738, "learning_rate": 0.002, "loss": 2.3444, "step": 154530 }, { "epoch": 0.597408421085185, "grad_norm": 0.10710286349058151, "learning_rate": 0.002, "loss": 2.3441, "step": 154540 }, { "epoch": 0.5974470782885682, "grad_norm": 0.11657308787107468, "learning_rate": 0.002, "loss": 2.3321, "step": 154550 }, { "epoch": 0.5974857354919516, "grad_norm": 0.1102280244231224, "learning_rate": 0.002, "loss": 2.3543, "step": 154560 }, { "epoch": 0.5975243926953349, "grad_norm": 0.10331825911998749, "learning_rate": 0.002, "loss": 2.3404, "step": 154570 }, { "epoch": 0.5975630498987181, "grad_norm": 0.11934579163789749, "learning_rate": 0.002, "loss": 2.3459, "step": 154580 }, { "epoch": 0.5976017071021014, "grad_norm": 0.11135943233966827, "learning_rate": 0.002, "loss": 2.3486, "step": 154590 }, { "epoch": 0.5976403643054847, "grad_norm": 0.1203981563448906, "learning_rate": 0.002, "loss": 2.3529, "step": 154600 }, { "epoch": 0.597679021508868, "grad_norm": 0.0986235961318016, "learning_rate": 0.002, "loss": 2.3497, "step": 154610 }, { "epoch": 0.5977176787122512, "grad_norm": 0.09534791857004166, "learning_rate": 0.002, "loss": 2.3469, "step": 154620 }, { "epoch": 0.5977563359156345, "grad_norm": 0.09276114404201508, "learning_rate": 0.002, "loss": 2.3389, "step": 154630 }, { "epoch": 0.5977949931190178, "grad_norm": 0.11678782850503922, "learning_rate": 0.002, "loss": 2.3333, "step": 154640 }, { "epoch": 0.5978336503224011, "grad_norm": 0.4351901412010193, "learning_rate": 0.002, "loss": 2.3424, "step": 154650 }, { "epoch": 0.5978723075257844, "grad_norm": 0.1325816512107849, "learning_rate": 0.002, "loss": 2.3513, "step": 154660 }, { "epoch": 0.5979109647291676, "grad_norm": 0.11587052047252655, "learning_rate": 0.002, "loss": 2.342, "step": 154670 }, { "epoch": 0.5979496219325509, "grad_norm": 0.12498050928115845, "learning_rate": 0.002, "loss": 2.3306, "step": 154680 }, { "epoch": 0.5979882791359342, "grad_norm": 0.11049990355968475, "learning_rate": 0.002, "loss": 2.3541, "step": 154690 }, { "epoch": 0.5980269363393175, "grad_norm": 0.11567365378141403, "learning_rate": 0.002, "loss": 2.3447, "step": 154700 }, { "epoch": 0.5980655935427007, "grad_norm": 0.10405659675598145, "learning_rate": 0.002, "loss": 2.3333, "step": 154710 }, { "epoch": 0.598104250746084, "grad_norm": 0.11374452710151672, "learning_rate": 0.002, "loss": 2.352, "step": 154720 }, { "epoch": 0.5981429079494673, "grad_norm": 0.12257920950651169, "learning_rate": 0.002, "loss": 2.3471, "step": 154730 }, { "epoch": 0.5981815651528506, "grad_norm": 0.10406675189733505, "learning_rate": 0.002, "loss": 2.3374, "step": 154740 }, { "epoch": 0.5982202223562338, "grad_norm": 0.13149400055408478, "learning_rate": 0.002, "loss": 2.3479, "step": 154750 }, { "epoch": 0.5982588795596171, "grad_norm": 0.1053181141614914, "learning_rate": 0.002, "loss": 2.3528, "step": 154760 }, { "epoch": 0.5982975367630005, "grad_norm": 0.10634828358888626, "learning_rate": 0.002, "loss": 2.3415, "step": 154770 }, { "epoch": 0.5983361939663837, "grad_norm": 0.10084746778011322, "learning_rate": 0.002, "loss": 2.3455, "step": 154780 }, { "epoch": 0.598374851169767, "grad_norm": 0.09804457426071167, "learning_rate": 0.002, "loss": 2.353, "step": 154790 }, { "epoch": 0.5984135083731502, "grad_norm": 0.12280680984258652, "learning_rate": 0.002, "loss": 2.3362, "step": 154800 }, { "epoch": 0.5984521655765336, "grad_norm": 0.11038525402545929, "learning_rate": 0.002, "loss": 2.3342, "step": 154810 }, { "epoch": 0.5984908227799168, "grad_norm": 0.10350499302148819, "learning_rate": 0.002, "loss": 2.3543, "step": 154820 }, { "epoch": 0.5985294799833001, "grad_norm": 0.08930511027574539, "learning_rate": 0.002, "loss": 2.3435, "step": 154830 }, { "epoch": 0.5985681371866833, "grad_norm": 0.13034844398498535, "learning_rate": 0.002, "loss": 2.3426, "step": 154840 }, { "epoch": 0.5986067943900667, "grad_norm": 0.10703147202730179, "learning_rate": 0.002, "loss": 2.3484, "step": 154850 }, { "epoch": 0.59864545159345, "grad_norm": 0.10099921375513077, "learning_rate": 0.002, "loss": 2.3468, "step": 154860 }, { "epoch": 0.5986841087968332, "grad_norm": 0.1223965436220169, "learning_rate": 0.002, "loss": 2.3307, "step": 154870 }, { "epoch": 0.5987227660002165, "grad_norm": 0.13722404837608337, "learning_rate": 0.002, "loss": 2.3476, "step": 154880 }, { "epoch": 0.5987614232035997, "grad_norm": 0.13416677713394165, "learning_rate": 0.002, "loss": 2.3452, "step": 154890 }, { "epoch": 0.5988000804069831, "grad_norm": 0.11614516377449036, "learning_rate": 0.002, "loss": 2.34, "step": 154900 }, { "epoch": 0.5988387376103663, "grad_norm": 0.10839620977640152, "learning_rate": 0.002, "loss": 2.3231, "step": 154910 }, { "epoch": 0.5988773948137496, "grad_norm": 0.10474540293216705, "learning_rate": 0.002, "loss": 2.3334, "step": 154920 }, { "epoch": 0.5989160520171328, "grad_norm": 0.12745654582977295, "learning_rate": 0.002, "loss": 2.348, "step": 154930 }, { "epoch": 0.5989547092205162, "grad_norm": 0.09551647305488586, "learning_rate": 0.002, "loss": 2.3377, "step": 154940 }, { "epoch": 0.5989933664238994, "grad_norm": 0.1009441688656807, "learning_rate": 0.002, "loss": 2.3392, "step": 154950 }, { "epoch": 0.5990320236272827, "grad_norm": 0.10916905105113983, "learning_rate": 0.002, "loss": 2.3436, "step": 154960 }, { "epoch": 0.599070680830666, "grad_norm": 0.10929003357887268, "learning_rate": 0.002, "loss": 2.3368, "step": 154970 }, { "epoch": 0.5991093380340493, "grad_norm": 0.10747554153203964, "learning_rate": 0.002, "loss": 2.3432, "step": 154980 }, { "epoch": 0.5991479952374326, "grad_norm": 0.12056456506252289, "learning_rate": 0.002, "loss": 2.3485, "step": 154990 }, { "epoch": 0.5991866524408158, "grad_norm": 0.13263089954853058, "learning_rate": 0.002, "loss": 2.3348, "step": 155000 }, { "epoch": 0.5992253096441991, "grad_norm": 0.10803119838237762, "learning_rate": 0.002, "loss": 2.3505, "step": 155010 }, { "epoch": 0.5992639668475824, "grad_norm": 0.10046329349279404, "learning_rate": 0.002, "loss": 2.3452, "step": 155020 }, { "epoch": 0.5993026240509657, "grad_norm": 0.13293473422527313, "learning_rate": 0.002, "loss": 2.3399, "step": 155030 }, { "epoch": 0.5993412812543489, "grad_norm": 0.1141979917883873, "learning_rate": 0.002, "loss": 2.3345, "step": 155040 }, { "epoch": 0.5993799384577322, "grad_norm": 0.100059375166893, "learning_rate": 0.002, "loss": 2.3525, "step": 155050 }, { "epoch": 0.5994185956611154, "grad_norm": 0.11091131716966629, "learning_rate": 0.002, "loss": 2.3438, "step": 155060 }, { "epoch": 0.5994572528644988, "grad_norm": 0.1243298128247261, "learning_rate": 0.002, "loss": 2.3461, "step": 155070 }, { "epoch": 0.599495910067882, "grad_norm": 0.11147356033325195, "learning_rate": 0.002, "loss": 2.3473, "step": 155080 }, { "epoch": 0.5995345672712653, "grad_norm": 0.1063820868730545, "learning_rate": 0.002, "loss": 2.3486, "step": 155090 }, { "epoch": 0.5995732244746486, "grad_norm": 0.117628313601017, "learning_rate": 0.002, "loss": 2.3495, "step": 155100 }, { "epoch": 0.5996118816780319, "grad_norm": 0.1024029403924942, "learning_rate": 0.002, "loss": 2.3441, "step": 155110 }, { "epoch": 0.5996505388814152, "grad_norm": 0.10223310440778732, "learning_rate": 0.002, "loss": 2.3572, "step": 155120 }, { "epoch": 0.5996891960847984, "grad_norm": 0.12325169891119003, "learning_rate": 0.002, "loss": 2.3422, "step": 155130 }, { "epoch": 0.5997278532881817, "grad_norm": 0.11169067770242691, "learning_rate": 0.002, "loss": 2.3484, "step": 155140 }, { "epoch": 0.599766510491565, "grad_norm": 0.10746439546346664, "learning_rate": 0.002, "loss": 2.3715, "step": 155150 }, { "epoch": 0.5998051676949483, "grad_norm": 0.09545475989580154, "learning_rate": 0.002, "loss": 2.3495, "step": 155160 }, { "epoch": 0.5998438248983315, "grad_norm": 0.10252842307090759, "learning_rate": 0.002, "loss": 2.3398, "step": 155170 }, { "epoch": 0.5998824821017148, "grad_norm": 0.10515642911195755, "learning_rate": 0.002, "loss": 2.3469, "step": 155180 }, { "epoch": 0.5999211393050982, "grad_norm": 0.09995514154434204, "learning_rate": 0.002, "loss": 2.3287, "step": 155190 }, { "epoch": 0.5999597965084814, "grad_norm": 0.10166479647159576, "learning_rate": 0.002, "loss": 2.3386, "step": 155200 }, { "epoch": 0.5999984537118647, "grad_norm": 0.10672900080680847, "learning_rate": 0.002, "loss": 2.3449, "step": 155210 }, { "epoch": 0.6000371109152479, "grad_norm": 0.12125857174396515, "learning_rate": 0.002, "loss": 2.3586, "step": 155220 }, { "epoch": 0.6000757681186313, "grad_norm": 0.11408720165491104, "learning_rate": 0.002, "loss": 2.3609, "step": 155230 }, { "epoch": 0.6001144253220145, "grad_norm": 0.11315006017684937, "learning_rate": 0.002, "loss": 2.3376, "step": 155240 }, { "epoch": 0.6001530825253978, "grad_norm": 0.11115438491106033, "learning_rate": 0.002, "loss": 2.3427, "step": 155250 }, { "epoch": 0.600191739728781, "grad_norm": 0.11406593769788742, "learning_rate": 0.002, "loss": 2.3432, "step": 155260 }, { "epoch": 0.6002303969321643, "grad_norm": 0.11923495680093765, "learning_rate": 0.002, "loss": 2.3327, "step": 155270 }, { "epoch": 0.6002690541355477, "grad_norm": 0.10646390169858932, "learning_rate": 0.002, "loss": 2.3416, "step": 155280 }, { "epoch": 0.6003077113389309, "grad_norm": 0.11619962006807327, "learning_rate": 0.002, "loss": 2.3412, "step": 155290 }, { "epoch": 0.6003463685423142, "grad_norm": 0.09947273135185242, "learning_rate": 0.002, "loss": 2.3517, "step": 155300 }, { "epoch": 0.6003850257456974, "grad_norm": 0.10065237432718277, "learning_rate": 0.002, "loss": 2.3518, "step": 155310 }, { "epoch": 0.6004236829490808, "grad_norm": 0.10713927447795868, "learning_rate": 0.002, "loss": 2.3399, "step": 155320 }, { "epoch": 0.600462340152464, "grad_norm": 0.12866121530532837, "learning_rate": 0.002, "loss": 2.3298, "step": 155330 }, { "epoch": 0.6005009973558473, "grad_norm": 0.1103544682264328, "learning_rate": 0.002, "loss": 2.363, "step": 155340 }, { "epoch": 0.6005396545592305, "grad_norm": 0.1077926978468895, "learning_rate": 0.002, "loss": 2.3461, "step": 155350 }, { "epoch": 0.6005783117626139, "grad_norm": 0.11506489664316177, "learning_rate": 0.002, "loss": 2.3466, "step": 155360 }, { "epoch": 0.6006169689659971, "grad_norm": 0.09376713633537292, "learning_rate": 0.002, "loss": 2.3437, "step": 155370 }, { "epoch": 0.6006556261693804, "grad_norm": 0.12837748229503632, "learning_rate": 0.002, "loss": 2.3493, "step": 155380 }, { "epoch": 0.6006942833727636, "grad_norm": 0.1213526576757431, "learning_rate": 0.002, "loss": 2.3531, "step": 155390 }, { "epoch": 0.600732940576147, "grad_norm": 0.09528365731239319, "learning_rate": 0.002, "loss": 2.3396, "step": 155400 }, { "epoch": 0.6007715977795303, "grad_norm": 0.09534302353858948, "learning_rate": 0.002, "loss": 2.3334, "step": 155410 }, { "epoch": 0.6008102549829135, "grad_norm": 0.13553263247013092, "learning_rate": 0.002, "loss": 2.3496, "step": 155420 }, { "epoch": 0.6008489121862968, "grad_norm": 0.11701808869838715, "learning_rate": 0.002, "loss": 2.3358, "step": 155430 }, { "epoch": 0.60088756938968, "grad_norm": 0.10585474222898483, "learning_rate": 0.002, "loss": 2.3429, "step": 155440 }, { "epoch": 0.6009262265930634, "grad_norm": 0.1210872009396553, "learning_rate": 0.002, "loss": 2.3474, "step": 155450 }, { "epoch": 0.6009648837964466, "grad_norm": 0.11863139271736145, "learning_rate": 0.002, "loss": 2.3468, "step": 155460 }, { "epoch": 0.6010035409998299, "grad_norm": 0.12181145697832108, "learning_rate": 0.002, "loss": 2.3227, "step": 155470 }, { "epoch": 0.6010421982032131, "grad_norm": 0.09719795733690262, "learning_rate": 0.002, "loss": 2.3444, "step": 155480 }, { "epoch": 0.6010808554065965, "grad_norm": 0.09969165176153183, "learning_rate": 0.002, "loss": 2.3345, "step": 155490 }, { "epoch": 0.6011195126099798, "grad_norm": 0.12080059945583344, "learning_rate": 0.002, "loss": 2.3494, "step": 155500 }, { "epoch": 0.601158169813363, "grad_norm": 0.10750175267457962, "learning_rate": 0.002, "loss": 2.3383, "step": 155510 }, { "epoch": 0.6011968270167463, "grad_norm": 0.11188539862632751, "learning_rate": 0.002, "loss": 2.3493, "step": 155520 }, { "epoch": 0.6012354842201296, "grad_norm": 0.12191398441791534, "learning_rate": 0.002, "loss": 2.3511, "step": 155530 }, { "epoch": 0.6012741414235129, "grad_norm": 0.10276643931865692, "learning_rate": 0.002, "loss": 2.3678, "step": 155540 }, { "epoch": 0.6013127986268961, "grad_norm": 0.1120799109339714, "learning_rate": 0.002, "loss": 2.3343, "step": 155550 }, { "epoch": 0.6013514558302794, "grad_norm": 0.11369843035936356, "learning_rate": 0.002, "loss": 2.3521, "step": 155560 }, { "epoch": 0.6013901130336627, "grad_norm": 0.13047267496585846, "learning_rate": 0.002, "loss": 2.3347, "step": 155570 }, { "epoch": 0.601428770237046, "grad_norm": 0.10796220600605011, "learning_rate": 0.002, "loss": 2.3411, "step": 155580 }, { "epoch": 0.6014674274404292, "grad_norm": 0.09764180332422256, "learning_rate": 0.002, "loss": 2.3419, "step": 155590 }, { "epoch": 0.6015060846438125, "grad_norm": 0.11820457130670547, "learning_rate": 0.002, "loss": 2.3539, "step": 155600 }, { "epoch": 0.6015447418471958, "grad_norm": 0.11059782654047012, "learning_rate": 0.002, "loss": 2.3403, "step": 155610 }, { "epoch": 0.6015833990505791, "grad_norm": 0.0945165678858757, "learning_rate": 0.002, "loss": 2.3446, "step": 155620 }, { "epoch": 0.6016220562539624, "grad_norm": 0.1834547221660614, "learning_rate": 0.002, "loss": 2.3405, "step": 155630 }, { "epoch": 0.6016607134573456, "grad_norm": 0.12191552668809891, "learning_rate": 0.002, "loss": 2.3558, "step": 155640 }, { "epoch": 0.6016993706607289, "grad_norm": 0.1166863664984703, "learning_rate": 0.002, "loss": 2.356, "step": 155650 }, { "epoch": 0.6017380278641122, "grad_norm": 0.09542674571275711, "learning_rate": 0.002, "loss": 2.3506, "step": 155660 }, { "epoch": 0.6017766850674955, "grad_norm": 0.11937938630580902, "learning_rate": 0.002, "loss": 2.3545, "step": 155670 }, { "epoch": 0.6018153422708787, "grad_norm": 0.1050414890050888, "learning_rate": 0.002, "loss": 2.3496, "step": 155680 }, { "epoch": 0.601853999474262, "grad_norm": 0.10865213721990585, "learning_rate": 0.002, "loss": 2.3388, "step": 155690 }, { "epoch": 0.6018926566776454, "grad_norm": 0.11925183981657028, "learning_rate": 0.002, "loss": 2.3451, "step": 155700 }, { "epoch": 0.6019313138810286, "grad_norm": 0.10119999945163727, "learning_rate": 0.002, "loss": 2.3515, "step": 155710 }, { "epoch": 0.6019699710844119, "grad_norm": 0.10889890044927597, "learning_rate": 0.002, "loss": 2.3417, "step": 155720 }, { "epoch": 0.6020086282877951, "grad_norm": 0.10714955627918243, "learning_rate": 0.002, "loss": 2.3503, "step": 155730 }, { "epoch": 0.6020472854911785, "grad_norm": 0.12309153378009796, "learning_rate": 0.002, "loss": 2.361, "step": 155740 }, { "epoch": 0.6020859426945617, "grad_norm": 0.09973911941051483, "learning_rate": 0.002, "loss": 2.3518, "step": 155750 }, { "epoch": 0.602124599897945, "grad_norm": 0.13327579200267792, "learning_rate": 0.002, "loss": 2.3615, "step": 155760 }, { "epoch": 0.6021632571013282, "grad_norm": 0.10561807453632355, "learning_rate": 0.002, "loss": 2.3363, "step": 155770 }, { "epoch": 0.6022019143047116, "grad_norm": 0.10778845101594925, "learning_rate": 0.002, "loss": 2.348, "step": 155780 }, { "epoch": 0.6022405715080948, "grad_norm": 0.1271054595708847, "learning_rate": 0.002, "loss": 2.3416, "step": 155790 }, { "epoch": 0.6022792287114781, "grad_norm": 0.10283089429140091, "learning_rate": 0.002, "loss": 2.3403, "step": 155800 }, { "epoch": 0.6023178859148614, "grad_norm": 0.16596998274326324, "learning_rate": 0.002, "loss": 2.357, "step": 155810 }, { "epoch": 0.6023565431182446, "grad_norm": 0.1061653345823288, "learning_rate": 0.002, "loss": 2.3393, "step": 155820 }, { "epoch": 0.602395200321628, "grad_norm": 0.10739660263061523, "learning_rate": 0.002, "loss": 2.3241, "step": 155830 }, { "epoch": 0.6024338575250112, "grad_norm": 0.11349905282258987, "learning_rate": 0.002, "loss": 2.3303, "step": 155840 }, { "epoch": 0.6024725147283945, "grad_norm": 0.11594951897859573, "learning_rate": 0.002, "loss": 2.3518, "step": 155850 }, { "epoch": 0.6025111719317777, "grad_norm": 0.11781451106071472, "learning_rate": 0.002, "loss": 2.3247, "step": 155860 }, { "epoch": 0.6025498291351611, "grad_norm": 0.10783499479293823, "learning_rate": 0.002, "loss": 2.3546, "step": 155870 }, { "epoch": 0.6025884863385443, "grad_norm": 0.0937013030052185, "learning_rate": 0.002, "loss": 2.3484, "step": 155880 }, { "epoch": 0.6026271435419276, "grad_norm": 0.11056476086378098, "learning_rate": 0.002, "loss": 2.3349, "step": 155890 }, { "epoch": 0.6026658007453108, "grad_norm": 0.1398683488368988, "learning_rate": 0.002, "loss": 2.3466, "step": 155900 }, { "epoch": 0.6027044579486942, "grad_norm": 0.10671201348304749, "learning_rate": 0.002, "loss": 2.3545, "step": 155910 }, { "epoch": 0.6027431151520775, "grad_norm": 0.10434763878583908, "learning_rate": 0.002, "loss": 2.3198, "step": 155920 }, { "epoch": 0.6027817723554607, "grad_norm": 0.10570269823074341, "learning_rate": 0.002, "loss": 2.332, "step": 155930 }, { "epoch": 0.602820429558844, "grad_norm": 0.08993524312973022, "learning_rate": 0.002, "loss": 2.3299, "step": 155940 }, { "epoch": 0.6028590867622273, "grad_norm": 0.11433251202106476, "learning_rate": 0.002, "loss": 2.3528, "step": 155950 }, { "epoch": 0.6028977439656106, "grad_norm": 0.12297467142343521, "learning_rate": 0.002, "loss": 2.3367, "step": 155960 }, { "epoch": 0.6029364011689938, "grad_norm": 0.09785819798707962, "learning_rate": 0.002, "loss": 2.3472, "step": 155970 }, { "epoch": 0.6029750583723771, "grad_norm": 0.10338174551725388, "learning_rate": 0.002, "loss": 2.3332, "step": 155980 }, { "epoch": 0.6030137155757603, "grad_norm": 0.13314157724380493, "learning_rate": 0.002, "loss": 2.363, "step": 155990 }, { "epoch": 0.6030523727791437, "grad_norm": 0.1302737146615982, "learning_rate": 0.002, "loss": 2.3592, "step": 156000 }, { "epoch": 0.603091029982527, "grad_norm": 0.13459540903568268, "learning_rate": 0.002, "loss": 2.3303, "step": 156010 }, { "epoch": 0.6031296871859102, "grad_norm": 0.1265462189912796, "learning_rate": 0.002, "loss": 2.3466, "step": 156020 }, { "epoch": 0.6031683443892935, "grad_norm": 0.13730959594249725, "learning_rate": 0.002, "loss": 2.3541, "step": 156030 }, { "epoch": 0.6032070015926768, "grad_norm": 0.09791308641433716, "learning_rate": 0.002, "loss": 2.3371, "step": 156040 }, { "epoch": 0.6032456587960601, "grad_norm": 0.09639491885900497, "learning_rate": 0.002, "loss": 2.3466, "step": 156050 }, { "epoch": 0.6032843159994433, "grad_norm": 0.11070328950881958, "learning_rate": 0.002, "loss": 2.3538, "step": 156060 }, { "epoch": 0.6033229732028266, "grad_norm": 0.10137160122394562, "learning_rate": 0.002, "loss": 2.3343, "step": 156070 }, { "epoch": 0.6033616304062099, "grad_norm": 0.11534697562456131, "learning_rate": 0.002, "loss": 2.3414, "step": 156080 }, { "epoch": 0.6034002876095932, "grad_norm": 0.1556681990623474, "learning_rate": 0.002, "loss": 2.3503, "step": 156090 }, { "epoch": 0.6034389448129764, "grad_norm": 0.11398902535438538, "learning_rate": 0.002, "loss": 2.3471, "step": 156100 }, { "epoch": 0.6034776020163597, "grad_norm": 0.09806044399738312, "learning_rate": 0.002, "loss": 2.3422, "step": 156110 }, { "epoch": 0.6035162592197431, "grad_norm": 0.11352315545082092, "learning_rate": 0.002, "loss": 2.3478, "step": 156120 }, { "epoch": 0.6035549164231263, "grad_norm": 0.09236298501491547, "learning_rate": 0.002, "loss": 2.3562, "step": 156130 }, { "epoch": 0.6035935736265096, "grad_norm": 0.0992632508277893, "learning_rate": 0.002, "loss": 2.3421, "step": 156140 }, { "epoch": 0.6036322308298928, "grad_norm": 0.1060241162776947, "learning_rate": 0.002, "loss": 2.3441, "step": 156150 }, { "epoch": 0.6036708880332761, "grad_norm": 0.10750989615917206, "learning_rate": 0.002, "loss": 2.3399, "step": 156160 }, { "epoch": 0.6037095452366594, "grad_norm": 0.11849751323461533, "learning_rate": 0.002, "loss": 2.3316, "step": 156170 }, { "epoch": 0.6037482024400427, "grad_norm": 0.10138081014156342, "learning_rate": 0.002, "loss": 2.341, "step": 156180 }, { "epoch": 0.6037868596434259, "grad_norm": 0.10642571747303009, "learning_rate": 0.002, "loss": 2.3362, "step": 156190 }, { "epoch": 0.6038255168468092, "grad_norm": 0.1288752406835556, "learning_rate": 0.002, "loss": 2.3325, "step": 156200 }, { "epoch": 0.6038641740501925, "grad_norm": 0.09953559935092926, "learning_rate": 0.002, "loss": 2.3494, "step": 156210 }, { "epoch": 0.6039028312535758, "grad_norm": 0.10579574853181839, "learning_rate": 0.002, "loss": 2.3399, "step": 156220 }, { "epoch": 0.603941488456959, "grad_norm": 0.0933934897184372, "learning_rate": 0.002, "loss": 2.3389, "step": 156230 }, { "epoch": 0.6039801456603423, "grad_norm": 0.10033083707094193, "learning_rate": 0.002, "loss": 2.3367, "step": 156240 }, { "epoch": 0.6040188028637257, "grad_norm": 0.10382883250713348, "learning_rate": 0.002, "loss": 2.3577, "step": 156250 }, { "epoch": 0.6040574600671089, "grad_norm": 0.10549649596214294, "learning_rate": 0.002, "loss": 2.3349, "step": 156260 }, { "epoch": 0.6040961172704922, "grad_norm": 0.11817780882120132, "learning_rate": 0.002, "loss": 2.371, "step": 156270 }, { "epoch": 0.6041347744738754, "grad_norm": 0.12325211614370346, "learning_rate": 0.002, "loss": 2.3541, "step": 156280 }, { "epoch": 0.6041734316772588, "grad_norm": 0.1172434538602829, "learning_rate": 0.002, "loss": 2.3502, "step": 156290 }, { "epoch": 0.604212088880642, "grad_norm": 0.09819969534873962, "learning_rate": 0.002, "loss": 2.3386, "step": 156300 }, { "epoch": 0.6042507460840253, "grad_norm": 0.10299625247716904, "learning_rate": 0.002, "loss": 2.355, "step": 156310 }, { "epoch": 0.6042894032874085, "grad_norm": 0.09782897680997849, "learning_rate": 0.002, "loss": 2.3641, "step": 156320 }, { "epoch": 0.6043280604907919, "grad_norm": 0.11144860088825226, "learning_rate": 0.002, "loss": 2.349, "step": 156330 }, { "epoch": 0.6043667176941752, "grad_norm": 0.11498149484395981, "learning_rate": 0.002, "loss": 2.3301, "step": 156340 }, { "epoch": 0.6044053748975584, "grad_norm": 0.09959711879491806, "learning_rate": 0.002, "loss": 2.3555, "step": 156350 }, { "epoch": 0.6044440321009417, "grad_norm": 0.10747111588716507, "learning_rate": 0.002, "loss": 2.3476, "step": 156360 }, { "epoch": 0.6044826893043249, "grad_norm": 0.09978053718805313, "learning_rate": 0.002, "loss": 2.3474, "step": 156370 }, { "epoch": 0.6045213465077083, "grad_norm": 0.10574757307767868, "learning_rate": 0.002, "loss": 2.3335, "step": 156380 }, { "epoch": 0.6045600037110915, "grad_norm": 0.1058548167347908, "learning_rate": 0.002, "loss": 2.3339, "step": 156390 }, { "epoch": 0.6045986609144748, "grad_norm": 0.11262711137533188, "learning_rate": 0.002, "loss": 2.3428, "step": 156400 }, { "epoch": 0.604637318117858, "grad_norm": 0.10832460969686508, "learning_rate": 0.002, "loss": 2.3349, "step": 156410 }, { "epoch": 0.6046759753212414, "grad_norm": 0.10032720118761063, "learning_rate": 0.002, "loss": 2.3591, "step": 156420 }, { "epoch": 0.6047146325246247, "grad_norm": 0.1258467733860016, "learning_rate": 0.002, "loss": 2.337, "step": 156430 }, { "epoch": 0.6047532897280079, "grad_norm": 0.10651124268770218, "learning_rate": 0.002, "loss": 2.3386, "step": 156440 }, { "epoch": 0.6047919469313912, "grad_norm": 0.1075170561671257, "learning_rate": 0.002, "loss": 2.3358, "step": 156450 }, { "epoch": 0.6048306041347745, "grad_norm": 0.10806979238986969, "learning_rate": 0.002, "loss": 2.3516, "step": 156460 }, { "epoch": 0.6048692613381578, "grad_norm": 0.11116418987512589, "learning_rate": 0.002, "loss": 2.3474, "step": 156470 }, { "epoch": 0.604907918541541, "grad_norm": 0.10847268253564835, "learning_rate": 0.002, "loss": 2.3394, "step": 156480 }, { "epoch": 0.6049465757449243, "grad_norm": 0.12186453491449356, "learning_rate": 0.002, "loss": 2.3522, "step": 156490 }, { "epoch": 0.6049852329483076, "grad_norm": 0.09700154513120651, "learning_rate": 0.002, "loss": 2.3425, "step": 156500 }, { "epoch": 0.6050238901516909, "grad_norm": 0.09895280003547668, "learning_rate": 0.002, "loss": 2.3324, "step": 156510 }, { "epoch": 0.6050625473550741, "grad_norm": 0.10787780582904816, "learning_rate": 0.002, "loss": 2.3427, "step": 156520 }, { "epoch": 0.6051012045584574, "grad_norm": 0.08980587869882584, "learning_rate": 0.002, "loss": 2.347, "step": 156530 }, { "epoch": 0.6051398617618406, "grad_norm": 0.09867847710847855, "learning_rate": 0.002, "loss": 2.334, "step": 156540 }, { "epoch": 0.605178518965224, "grad_norm": 0.110805444419384, "learning_rate": 0.002, "loss": 2.3581, "step": 156550 }, { "epoch": 0.6052171761686073, "grad_norm": 0.10935091972351074, "learning_rate": 0.002, "loss": 2.3408, "step": 156560 }, { "epoch": 0.6052558333719905, "grad_norm": 0.1027233675122261, "learning_rate": 0.002, "loss": 2.3296, "step": 156570 }, { "epoch": 0.6052944905753738, "grad_norm": 0.11280585825443268, "learning_rate": 0.002, "loss": 2.3371, "step": 156580 }, { "epoch": 0.6053331477787571, "grad_norm": 0.09640748798847198, "learning_rate": 0.002, "loss": 2.3363, "step": 156590 }, { "epoch": 0.6053718049821404, "grad_norm": 0.09245883673429489, "learning_rate": 0.002, "loss": 2.3318, "step": 156600 }, { "epoch": 0.6054104621855236, "grad_norm": 0.16029566526412964, "learning_rate": 0.002, "loss": 2.3408, "step": 156610 }, { "epoch": 0.6054491193889069, "grad_norm": 0.10584169626235962, "learning_rate": 0.002, "loss": 2.3359, "step": 156620 }, { "epoch": 0.6054877765922902, "grad_norm": 0.10097134113311768, "learning_rate": 0.002, "loss": 2.3539, "step": 156630 }, { "epoch": 0.6055264337956735, "grad_norm": 0.10749845951795578, "learning_rate": 0.002, "loss": 2.3479, "step": 156640 }, { "epoch": 0.6055650909990568, "grad_norm": 0.10039525479078293, "learning_rate": 0.002, "loss": 2.3315, "step": 156650 }, { "epoch": 0.60560374820244, "grad_norm": 0.1171375960111618, "learning_rate": 0.002, "loss": 2.3654, "step": 156660 }, { "epoch": 0.6056424054058234, "grad_norm": 0.10131317377090454, "learning_rate": 0.002, "loss": 2.3358, "step": 156670 }, { "epoch": 0.6056810626092066, "grad_norm": 0.10574357956647873, "learning_rate": 0.002, "loss": 2.3439, "step": 156680 }, { "epoch": 0.6057197198125899, "grad_norm": 0.12090755254030228, "learning_rate": 0.002, "loss": 2.3498, "step": 156690 }, { "epoch": 0.6057583770159731, "grad_norm": 0.1064232811331749, "learning_rate": 0.002, "loss": 2.3226, "step": 156700 }, { "epoch": 0.6057970342193565, "grad_norm": 0.09819698333740234, "learning_rate": 0.002, "loss": 2.32, "step": 156710 }, { "epoch": 0.6058356914227397, "grad_norm": 0.12896254658699036, "learning_rate": 0.002, "loss": 2.3706, "step": 156720 }, { "epoch": 0.605874348626123, "grad_norm": 0.10714369267225266, "learning_rate": 0.002, "loss": 2.3552, "step": 156730 }, { "epoch": 0.6059130058295062, "grad_norm": 0.11655072122812271, "learning_rate": 0.002, "loss": 2.3545, "step": 156740 }, { "epoch": 0.6059516630328895, "grad_norm": 0.10611128062009811, "learning_rate": 0.002, "loss": 2.3411, "step": 156750 }, { "epoch": 0.6059903202362729, "grad_norm": 0.09566417336463928, "learning_rate": 0.002, "loss": 2.3322, "step": 156760 }, { "epoch": 0.6060289774396561, "grad_norm": 0.09997794032096863, "learning_rate": 0.002, "loss": 2.3529, "step": 156770 }, { "epoch": 0.6060676346430394, "grad_norm": 0.12219233810901642, "learning_rate": 0.002, "loss": 2.3385, "step": 156780 }, { "epoch": 0.6061062918464226, "grad_norm": 0.1029295101761818, "learning_rate": 0.002, "loss": 2.3436, "step": 156790 }, { "epoch": 0.606144949049806, "grad_norm": 0.09928140044212341, "learning_rate": 0.002, "loss": 2.3601, "step": 156800 }, { "epoch": 0.6061836062531892, "grad_norm": 0.11081463098526001, "learning_rate": 0.002, "loss": 2.3304, "step": 156810 }, { "epoch": 0.6062222634565725, "grad_norm": 0.11391083896160126, "learning_rate": 0.002, "loss": 2.3448, "step": 156820 }, { "epoch": 0.6062609206599557, "grad_norm": 0.12047885358333588, "learning_rate": 0.002, "loss": 2.3555, "step": 156830 }, { "epoch": 0.6062995778633391, "grad_norm": 0.1044929251074791, "learning_rate": 0.002, "loss": 2.3493, "step": 156840 }, { "epoch": 0.6063382350667224, "grad_norm": 0.1222505047917366, "learning_rate": 0.002, "loss": 2.3422, "step": 156850 }, { "epoch": 0.6063768922701056, "grad_norm": 0.10657400637865067, "learning_rate": 0.002, "loss": 2.3479, "step": 156860 }, { "epoch": 0.6064155494734889, "grad_norm": 0.10880149900913239, "learning_rate": 0.002, "loss": 2.3407, "step": 156870 }, { "epoch": 0.6064542066768722, "grad_norm": 0.10814648121595383, "learning_rate": 0.002, "loss": 2.3291, "step": 156880 }, { "epoch": 0.6064928638802555, "grad_norm": 0.09671936929225922, "learning_rate": 0.002, "loss": 2.3357, "step": 156890 }, { "epoch": 0.6065315210836387, "grad_norm": 0.10830371081829071, "learning_rate": 0.002, "loss": 2.352, "step": 156900 }, { "epoch": 0.606570178287022, "grad_norm": 0.1003207340836525, "learning_rate": 0.002, "loss": 2.3444, "step": 156910 }, { "epoch": 0.6066088354904052, "grad_norm": 0.12952585518360138, "learning_rate": 0.002, "loss": 2.341, "step": 156920 }, { "epoch": 0.6066474926937886, "grad_norm": 0.09259293973445892, "learning_rate": 0.002, "loss": 2.347, "step": 156930 }, { "epoch": 0.6066861498971718, "grad_norm": 0.1007172018289566, "learning_rate": 0.002, "loss": 2.3396, "step": 156940 }, { "epoch": 0.6067248071005551, "grad_norm": 0.13747857511043549, "learning_rate": 0.002, "loss": 2.3404, "step": 156950 }, { "epoch": 0.6067634643039383, "grad_norm": 0.11612991988658905, "learning_rate": 0.002, "loss": 2.3487, "step": 156960 }, { "epoch": 0.6068021215073217, "grad_norm": 0.11666108667850494, "learning_rate": 0.002, "loss": 2.3428, "step": 156970 }, { "epoch": 0.606840778710705, "grad_norm": 0.10534953325986862, "learning_rate": 0.002, "loss": 2.3301, "step": 156980 }, { "epoch": 0.6068794359140882, "grad_norm": 0.11357153207063675, "learning_rate": 0.002, "loss": 2.3388, "step": 156990 }, { "epoch": 0.6069180931174715, "grad_norm": 0.08578240126371384, "learning_rate": 0.002, "loss": 2.3395, "step": 157000 }, { "epoch": 0.6069567503208548, "grad_norm": 0.12134891003370285, "learning_rate": 0.002, "loss": 2.3288, "step": 157010 }, { "epoch": 0.6069954075242381, "grad_norm": 0.10584051162004471, "learning_rate": 0.002, "loss": 2.358, "step": 157020 }, { "epoch": 0.6070340647276213, "grad_norm": 0.11070246249437332, "learning_rate": 0.002, "loss": 2.3469, "step": 157030 }, { "epoch": 0.6070727219310046, "grad_norm": 0.11073992401361465, "learning_rate": 0.002, "loss": 2.3442, "step": 157040 }, { "epoch": 0.607111379134388, "grad_norm": 0.10716310143470764, "learning_rate": 0.002, "loss": 2.3507, "step": 157050 }, { "epoch": 0.6071500363377712, "grad_norm": 0.11177654564380646, "learning_rate": 0.002, "loss": 2.3475, "step": 157060 }, { "epoch": 0.6071886935411545, "grad_norm": 0.10450034588575363, "learning_rate": 0.002, "loss": 2.3424, "step": 157070 }, { "epoch": 0.6072273507445377, "grad_norm": 0.12878698110580444, "learning_rate": 0.002, "loss": 2.3406, "step": 157080 }, { "epoch": 0.607266007947921, "grad_norm": 0.10073508322238922, "learning_rate": 0.002, "loss": 2.353, "step": 157090 }, { "epoch": 0.6073046651513043, "grad_norm": 0.1201476901769638, "learning_rate": 0.002, "loss": 2.3715, "step": 157100 }, { "epoch": 0.6073433223546876, "grad_norm": 0.10388737916946411, "learning_rate": 0.002, "loss": 2.3462, "step": 157110 }, { "epoch": 0.6073819795580708, "grad_norm": 0.09778716415166855, "learning_rate": 0.002, "loss": 2.3424, "step": 157120 }, { "epoch": 0.6074206367614541, "grad_norm": 0.12194199860095978, "learning_rate": 0.002, "loss": 2.3439, "step": 157130 }, { "epoch": 0.6074592939648374, "grad_norm": 0.11156753450632095, "learning_rate": 0.002, "loss": 2.3408, "step": 157140 }, { "epoch": 0.6074979511682207, "grad_norm": 0.10753699392080307, "learning_rate": 0.002, "loss": 2.3297, "step": 157150 }, { "epoch": 0.607536608371604, "grad_norm": 0.10721046477556229, "learning_rate": 0.002, "loss": 2.3383, "step": 157160 }, { "epoch": 0.6075752655749872, "grad_norm": 0.10429911315441132, "learning_rate": 0.002, "loss": 2.3518, "step": 157170 }, { "epoch": 0.6076139227783706, "grad_norm": 0.12225025147199631, "learning_rate": 0.002, "loss": 2.3415, "step": 157180 }, { "epoch": 0.6076525799817538, "grad_norm": 0.08854345232248306, "learning_rate": 0.002, "loss": 2.3358, "step": 157190 }, { "epoch": 0.6076912371851371, "grad_norm": 0.09663832187652588, "learning_rate": 0.002, "loss": 2.3536, "step": 157200 }, { "epoch": 0.6077298943885203, "grad_norm": 0.09602214395999908, "learning_rate": 0.002, "loss": 2.3404, "step": 157210 }, { "epoch": 0.6077685515919037, "grad_norm": 0.11233019083738327, "learning_rate": 0.002, "loss": 2.34, "step": 157220 }, { "epoch": 0.6078072087952869, "grad_norm": 0.10633435100317001, "learning_rate": 0.002, "loss": 2.3272, "step": 157230 }, { "epoch": 0.6078458659986702, "grad_norm": 0.09058328717947006, "learning_rate": 0.002, "loss": 2.3408, "step": 157240 }, { "epoch": 0.6078845232020534, "grad_norm": 0.10827624797821045, "learning_rate": 0.002, "loss": 2.3411, "step": 157250 }, { "epoch": 0.6079231804054368, "grad_norm": 0.11266107112169266, "learning_rate": 0.002, "loss": 2.3545, "step": 157260 }, { "epoch": 0.60796183760882, "grad_norm": 0.10475073009729385, "learning_rate": 0.002, "loss": 2.3508, "step": 157270 }, { "epoch": 0.6080004948122033, "grad_norm": 0.09929769486188889, "learning_rate": 0.002, "loss": 2.3414, "step": 157280 }, { "epoch": 0.6080391520155866, "grad_norm": 0.11319760233163834, "learning_rate": 0.002, "loss": 2.3385, "step": 157290 }, { "epoch": 0.6080778092189698, "grad_norm": 0.10194125026464462, "learning_rate": 0.002, "loss": 2.3426, "step": 157300 }, { "epoch": 0.6081164664223532, "grad_norm": 0.12336062639951706, "learning_rate": 0.002, "loss": 2.353, "step": 157310 }, { "epoch": 0.6081551236257364, "grad_norm": 0.11531714349985123, "learning_rate": 0.002, "loss": 2.3447, "step": 157320 }, { "epoch": 0.6081937808291197, "grad_norm": 0.10148004442453384, "learning_rate": 0.002, "loss": 2.3318, "step": 157330 }, { "epoch": 0.6082324380325029, "grad_norm": 0.09875989705324173, "learning_rate": 0.002, "loss": 2.3411, "step": 157340 }, { "epoch": 0.6082710952358863, "grad_norm": 0.119626984000206, "learning_rate": 0.002, "loss": 2.3382, "step": 157350 }, { "epoch": 0.6083097524392695, "grad_norm": 0.11117689311504364, "learning_rate": 0.002, "loss": 2.3315, "step": 157360 }, { "epoch": 0.6083484096426528, "grad_norm": 0.12111735343933105, "learning_rate": 0.002, "loss": 2.3465, "step": 157370 }, { "epoch": 0.608387066846036, "grad_norm": 0.09148456901311874, "learning_rate": 0.002, "loss": 2.342, "step": 157380 }, { "epoch": 0.6084257240494194, "grad_norm": 0.11267891526222229, "learning_rate": 0.002, "loss": 2.3454, "step": 157390 }, { "epoch": 0.6084643812528027, "grad_norm": 0.1054447740316391, "learning_rate": 0.002, "loss": 2.3441, "step": 157400 }, { "epoch": 0.6085030384561859, "grad_norm": 0.10002760589122772, "learning_rate": 0.002, "loss": 2.3437, "step": 157410 }, { "epoch": 0.6085416956595692, "grad_norm": 0.10492801666259766, "learning_rate": 0.002, "loss": 2.3397, "step": 157420 }, { "epoch": 0.6085803528629525, "grad_norm": 0.12571987509727478, "learning_rate": 0.002, "loss": 2.3446, "step": 157430 }, { "epoch": 0.6086190100663358, "grad_norm": 0.11996311694383621, "learning_rate": 0.002, "loss": 2.3349, "step": 157440 }, { "epoch": 0.608657667269719, "grad_norm": 0.10160239785909653, "learning_rate": 0.002, "loss": 2.3413, "step": 157450 }, { "epoch": 0.6086963244731023, "grad_norm": 0.0983286201953888, "learning_rate": 0.002, "loss": 2.3314, "step": 157460 }, { "epoch": 0.6087349816764855, "grad_norm": 0.10145774483680725, "learning_rate": 0.002, "loss": 2.3412, "step": 157470 }, { "epoch": 0.6087736388798689, "grad_norm": 0.13767632842063904, "learning_rate": 0.002, "loss": 2.3413, "step": 157480 }, { "epoch": 0.6088122960832522, "grad_norm": 0.11958499997854233, "learning_rate": 0.002, "loss": 2.3469, "step": 157490 }, { "epoch": 0.6088509532866354, "grad_norm": 0.10636449605226517, "learning_rate": 0.002, "loss": 2.3272, "step": 157500 }, { "epoch": 0.6088896104900187, "grad_norm": 0.1382298618555069, "learning_rate": 0.002, "loss": 2.3564, "step": 157510 }, { "epoch": 0.608928267693402, "grad_norm": 0.11528230458498001, "learning_rate": 0.002, "loss": 2.3362, "step": 157520 }, { "epoch": 0.6089669248967853, "grad_norm": 0.11574528366327286, "learning_rate": 0.002, "loss": 2.3279, "step": 157530 }, { "epoch": 0.6090055821001685, "grad_norm": 0.10157489031553268, "learning_rate": 0.002, "loss": 2.3291, "step": 157540 }, { "epoch": 0.6090442393035518, "grad_norm": 0.09453226625919342, "learning_rate": 0.002, "loss": 2.3441, "step": 157550 }, { "epoch": 0.6090828965069351, "grad_norm": 0.10535252839326859, "learning_rate": 0.002, "loss": 2.3413, "step": 157560 }, { "epoch": 0.6091215537103184, "grad_norm": 0.09338109195232391, "learning_rate": 0.002, "loss": 2.323, "step": 157570 }, { "epoch": 0.6091602109137016, "grad_norm": 0.1233820989727974, "learning_rate": 0.002, "loss": 2.3202, "step": 157580 }, { "epoch": 0.6091988681170849, "grad_norm": 0.09621302038431168, "learning_rate": 0.002, "loss": 2.347, "step": 157590 }, { "epoch": 0.6092375253204683, "grad_norm": 0.11600583046674728, "learning_rate": 0.002, "loss": 2.35, "step": 157600 }, { "epoch": 0.6092761825238515, "grad_norm": 0.10407842695713043, "learning_rate": 0.002, "loss": 2.3447, "step": 157610 }, { "epoch": 0.6093148397272348, "grad_norm": 0.08800975233316422, "learning_rate": 0.002, "loss": 2.3451, "step": 157620 }, { "epoch": 0.609353496930618, "grad_norm": 0.1047428622841835, "learning_rate": 0.002, "loss": 2.3421, "step": 157630 }, { "epoch": 0.6093921541340014, "grad_norm": 0.1025058701634407, "learning_rate": 0.002, "loss": 2.3316, "step": 157640 }, { "epoch": 0.6094308113373846, "grad_norm": 0.10395365953445435, "learning_rate": 0.002, "loss": 2.3512, "step": 157650 }, { "epoch": 0.6094694685407679, "grad_norm": 0.13296107947826385, "learning_rate": 0.002, "loss": 2.3349, "step": 157660 }, { "epoch": 0.6095081257441511, "grad_norm": 0.11022595316171646, "learning_rate": 0.002, "loss": 2.3309, "step": 157670 }, { "epoch": 0.6095467829475344, "grad_norm": 0.1114407628774643, "learning_rate": 0.002, "loss": 2.3416, "step": 157680 }, { "epoch": 0.6095854401509178, "grad_norm": 0.11167177557945251, "learning_rate": 0.002, "loss": 2.341, "step": 157690 }, { "epoch": 0.609624097354301, "grad_norm": 0.1167573407292366, "learning_rate": 0.002, "loss": 2.3462, "step": 157700 }, { "epoch": 0.6096627545576843, "grad_norm": 0.10351765155792236, "learning_rate": 0.002, "loss": 2.3388, "step": 157710 }, { "epoch": 0.6097014117610675, "grad_norm": 0.11204945296049118, "learning_rate": 0.002, "loss": 2.3431, "step": 157720 }, { "epoch": 0.6097400689644509, "grad_norm": 0.12831169366836548, "learning_rate": 0.002, "loss": 2.342, "step": 157730 }, { "epoch": 0.6097787261678341, "grad_norm": 0.11609380692243576, "learning_rate": 0.002, "loss": 2.3312, "step": 157740 }, { "epoch": 0.6098173833712174, "grad_norm": 0.0960511639714241, "learning_rate": 0.002, "loss": 2.3594, "step": 157750 }, { "epoch": 0.6098560405746006, "grad_norm": 0.11027969419956207, "learning_rate": 0.002, "loss": 2.3294, "step": 157760 }, { "epoch": 0.609894697777984, "grad_norm": 0.1651405245065689, "learning_rate": 0.002, "loss": 2.3532, "step": 157770 }, { "epoch": 0.6099333549813672, "grad_norm": 0.12306205928325653, "learning_rate": 0.002, "loss": 2.3618, "step": 157780 }, { "epoch": 0.6099720121847505, "grad_norm": 0.10880153626203537, "learning_rate": 0.002, "loss": 2.33, "step": 157790 }, { "epoch": 0.6100106693881338, "grad_norm": 0.10684796422719955, "learning_rate": 0.002, "loss": 2.3464, "step": 157800 }, { "epoch": 0.6100493265915171, "grad_norm": 0.0998319610953331, "learning_rate": 0.002, "loss": 2.3625, "step": 157810 }, { "epoch": 0.6100879837949004, "grad_norm": 0.1425325572490692, "learning_rate": 0.002, "loss": 2.3286, "step": 157820 }, { "epoch": 0.6101266409982836, "grad_norm": 0.09976795315742493, "learning_rate": 0.002, "loss": 2.349, "step": 157830 }, { "epoch": 0.6101652982016669, "grad_norm": 0.10247869044542313, "learning_rate": 0.002, "loss": 2.321, "step": 157840 }, { "epoch": 0.6102039554050501, "grad_norm": 0.10698070377111435, "learning_rate": 0.002, "loss": 2.3557, "step": 157850 }, { "epoch": 0.6102426126084335, "grad_norm": 0.11454866081476212, "learning_rate": 0.002, "loss": 2.3372, "step": 157860 }, { "epoch": 0.6102812698118167, "grad_norm": 0.09456183016300201, "learning_rate": 0.002, "loss": 2.3411, "step": 157870 }, { "epoch": 0.6103199270152, "grad_norm": 0.10078510642051697, "learning_rate": 0.002, "loss": 2.3455, "step": 157880 }, { "epoch": 0.6103585842185832, "grad_norm": 0.14035581052303314, "learning_rate": 0.002, "loss": 2.3485, "step": 157890 }, { "epoch": 0.6103972414219666, "grad_norm": 0.09534963965415955, "learning_rate": 0.002, "loss": 2.3373, "step": 157900 }, { "epoch": 0.6104358986253499, "grad_norm": 0.09866109490394592, "learning_rate": 0.002, "loss": 2.3391, "step": 157910 }, { "epoch": 0.6104745558287331, "grad_norm": 0.10259261727333069, "learning_rate": 0.002, "loss": 2.3504, "step": 157920 }, { "epoch": 0.6105132130321164, "grad_norm": 0.1016649454832077, "learning_rate": 0.002, "loss": 2.337, "step": 157930 }, { "epoch": 0.6105518702354997, "grad_norm": 0.10683011263608932, "learning_rate": 0.002, "loss": 2.3264, "step": 157940 }, { "epoch": 0.610590527438883, "grad_norm": 0.11511304974555969, "learning_rate": 0.002, "loss": 2.338, "step": 157950 }, { "epoch": 0.6106291846422662, "grad_norm": 0.09644953161478043, "learning_rate": 0.002, "loss": 2.3321, "step": 157960 }, { "epoch": 0.6106678418456495, "grad_norm": 0.13170172274112701, "learning_rate": 0.002, "loss": 2.3426, "step": 157970 }, { "epoch": 0.6107064990490328, "grad_norm": 0.13730081915855408, "learning_rate": 0.002, "loss": 2.3571, "step": 157980 }, { "epoch": 0.6107451562524161, "grad_norm": 0.0939185619354248, "learning_rate": 0.002, "loss": 2.3357, "step": 157990 }, { "epoch": 0.6107838134557994, "grad_norm": 0.11765972524881363, "learning_rate": 0.002, "loss": 2.3315, "step": 158000 }, { "epoch": 0.6108224706591826, "grad_norm": 0.10731218010187149, "learning_rate": 0.002, "loss": 2.3458, "step": 158010 }, { "epoch": 0.6108611278625659, "grad_norm": 0.10578600317239761, "learning_rate": 0.002, "loss": 2.3439, "step": 158020 }, { "epoch": 0.6108997850659492, "grad_norm": 0.097139373421669, "learning_rate": 0.002, "loss": 2.357, "step": 158030 }, { "epoch": 0.6109384422693325, "grad_norm": 0.10506080090999603, "learning_rate": 0.002, "loss": 2.3334, "step": 158040 }, { "epoch": 0.6109770994727157, "grad_norm": 0.10108962655067444, "learning_rate": 0.002, "loss": 2.3498, "step": 158050 }, { "epoch": 0.611015756676099, "grad_norm": 0.10646820813417435, "learning_rate": 0.002, "loss": 2.344, "step": 158060 }, { "epoch": 0.6110544138794823, "grad_norm": 0.11607225984334946, "learning_rate": 0.002, "loss": 2.3384, "step": 158070 }, { "epoch": 0.6110930710828656, "grad_norm": 0.1082080751657486, "learning_rate": 0.002, "loss": 2.3479, "step": 158080 }, { "epoch": 0.6111317282862488, "grad_norm": 0.0966542437672615, "learning_rate": 0.002, "loss": 2.3387, "step": 158090 }, { "epoch": 0.6111703854896321, "grad_norm": 0.10189072787761688, "learning_rate": 0.002, "loss": 2.3329, "step": 158100 }, { "epoch": 0.6112090426930155, "grad_norm": 0.09674588590860367, "learning_rate": 0.002, "loss": 2.3334, "step": 158110 }, { "epoch": 0.6112476998963987, "grad_norm": 0.10019045323133469, "learning_rate": 0.002, "loss": 2.3395, "step": 158120 }, { "epoch": 0.611286357099782, "grad_norm": 0.09782741218805313, "learning_rate": 0.002, "loss": 2.3393, "step": 158130 }, { "epoch": 0.6113250143031652, "grad_norm": 0.1437297761440277, "learning_rate": 0.002, "loss": 2.334, "step": 158140 }, { "epoch": 0.6113636715065486, "grad_norm": 0.09866262972354889, "learning_rate": 0.002, "loss": 2.3443, "step": 158150 }, { "epoch": 0.6114023287099318, "grad_norm": 0.10425934195518494, "learning_rate": 0.002, "loss": 2.3424, "step": 158160 }, { "epoch": 0.6114409859133151, "grad_norm": 0.0972549319267273, "learning_rate": 0.002, "loss": 2.3488, "step": 158170 }, { "epoch": 0.6114796431166983, "grad_norm": 0.10543369501829147, "learning_rate": 0.002, "loss": 2.345, "step": 158180 }, { "epoch": 0.6115183003200817, "grad_norm": 0.1036418080329895, "learning_rate": 0.002, "loss": 2.3553, "step": 158190 }, { "epoch": 0.611556957523465, "grad_norm": 0.14457623660564423, "learning_rate": 0.002, "loss": 2.3386, "step": 158200 }, { "epoch": 0.6115956147268482, "grad_norm": 0.11930687725543976, "learning_rate": 0.002, "loss": 2.3433, "step": 158210 }, { "epoch": 0.6116342719302315, "grad_norm": 0.1089979037642479, "learning_rate": 0.002, "loss": 2.3458, "step": 158220 }, { "epoch": 0.6116729291336147, "grad_norm": 0.12308035790920258, "learning_rate": 0.002, "loss": 2.3364, "step": 158230 }, { "epoch": 0.6117115863369981, "grad_norm": 0.10787247121334076, "learning_rate": 0.002, "loss": 2.3402, "step": 158240 }, { "epoch": 0.6117502435403813, "grad_norm": 0.11869463324546814, "learning_rate": 0.002, "loss": 2.3518, "step": 158250 }, { "epoch": 0.6117889007437646, "grad_norm": 0.11029456555843353, "learning_rate": 0.002, "loss": 2.3411, "step": 158260 }, { "epoch": 0.6118275579471478, "grad_norm": 0.1282932013273239, "learning_rate": 0.002, "loss": 2.3491, "step": 158270 }, { "epoch": 0.6118662151505312, "grad_norm": 0.1101757362484932, "learning_rate": 0.002, "loss": 2.329, "step": 158280 }, { "epoch": 0.6119048723539144, "grad_norm": 0.11456190049648285, "learning_rate": 0.002, "loss": 2.3502, "step": 158290 }, { "epoch": 0.6119435295572977, "grad_norm": 0.13458718359470367, "learning_rate": 0.002, "loss": 2.3479, "step": 158300 }, { "epoch": 0.611982186760681, "grad_norm": 0.10693209618330002, "learning_rate": 0.002, "loss": 2.3552, "step": 158310 }, { "epoch": 0.6120208439640643, "grad_norm": 0.09521332383155823, "learning_rate": 0.002, "loss": 2.343, "step": 158320 }, { "epoch": 0.6120595011674476, "grad_norm": 0.09104505181312561, "learning_rate": 0.002, "loss": 2.3494, "step": 158330 }, { "epoch": 0.6120981583708308, "grad_norm": 0.1118229329586029, "learning_rate": 0.002, "loss": 2.3319, "step": 158340 }, { "epoch": 0.6121368155742141, "grad_norm": 0.09892457723617554, "learning_rate": 0.002, "loss": 2.3437, "step": 158350 }, { "epoch": 0.6121754727775974, "grad_norm": 0.10420117527246475, "learning_rate": 0.002, "loss": 2.36, "step": 158360 }, { "epoch": 0.6122141299809807, "grad_norm": 0.10320697724819183, "learning_rate": 0.002, "loss": 2.3414, "step": 158370 }, { "epoch": 0.6122527871843639, "grad_norm": 0.08881300687789917, "learning_rate": 0.002, "loss": 2.3407, "step": 158380 }, { "epoch": 0.6122914443877472, "grad_norm": 0.10302989929914474, "learning_rate": 0.002, "loss": 2.3343, "step": 158390 }, { "epoch": 0.6123301015911304, "grad_norm": 0.12415283173322678, "learning_rate": 0.002, "loss": 2.3321, "step": 158400 }, { "epoch": 0.6123687587945138, "grad_norm": 0.09723592549562454, "learning_rate": 0.002, "loss": 2.3164, "step": 158410 }, { "epoch": 0.612407415997897, "grad_norm": 0.10486268252134323, "learning_rate": 0.002, "loss": 2.3364, "step": 158420 }, { "epoch": 0.6124460732012803, "grad_norm": 0.10682319849729538, "learning_rate": 0.002, "loss": 2.3431, "step": 158430 }, { "epoch": 0.6124847304046636, "grad_norm": 0.09781024605035782, "learning_rate": 0.002, "loss": 2.3438, "step": 158440 }, { "epoch": 0.6125233876080469, "grad_norm": 0.11337437480688095, "learning_rate": 0.002, "loss": 2.3428, "step": 158450 }, { "epoch": 0.6125620448114302, "grad_norm": 0.10655515640974045, "learning_rate": 0.002, "loss": 2.341, "step": 158460 }, { "epoch": 0.6126007020148134, "grad_norm": 0.12020324915647507, "learning_rate": 0.002, "loss": 2.3511, "step": 158470 }, { "epoch": 0.6126393592181967, "grad_norm": 0.09384416043758392, "learning_rate": 0.002, "loss": 2.3514, "step": 158480 }, { "epoch": 0.61267801642158, "grad_norm": 0.10570283234119415, "learning_rate": 0.002, "loss": 2.3344, "step": 158490 }, { "epoch": 0.6127166736249633, "grad_norm": 0.13045048713684082, "learning_rate": 0.002, "loss": 2.3272, "step": 158500 }, { "epoch": 0.6127553308283465, "grad_norm": 0.10316865146160126, "learning_rate": 0.002, "loss": 2.3421, "step": 158510 }, { "epoch": 0.6127939880317298, "grad_norm": 0.10627957433462143, "learning_rate": 0.002, "loss": 2.3362, "step": 158520 }, { "epoch": 0.6128326452351132, "grad_norm": 0.12966708838939667, "learning_rate": 0.002, "loss": 2.3358, "step": 158530 }, { "epoch": 0.6128713024384964, "grad_norm": 0.11290135234594345, "learning_rate": 0.002, "loss": 2.3494, "step": 158540 }, { "epoch": 0.6129099596418797, "grad_norm": 0.09105128049850464, "learning_rate": 0.002, "loss": 2.3579, "step": 158550 }, { "epoch": 0.6129486168452629, "grad_norm": 0.10901861637830734, "learning_rate": 0.002, "loss": 2.3292, "step": 158560 }, { "epoch": 0.6129872740486463, "grad_norm": 0.10790381580591202, "learning_rate": 0.002, "loss": 2.3533, "step": 158570 }, { "epoch": 0.6130259312520295, "grad_norm": 0.10326814651489258, "learning_rate": 0.002, "loss": 2.3411, "step": 158580 }, { "epoch": 0.6130645884554128, "grad_norm": 0.11023823916912079, "learning_rate": 0.002, "loss": 2.3357, "step": 158590 }, { "epoch": 0.613103245658796, "grad_norm": 0.13924099504947662, "learning_rate": 0.002, "loss": 2.3476, "step": 158600 }, { "epoch": 0.6131419028621793, "grad_norm": 0.11610828340053558, "learning_rate": 0.002, "loss": 2.3611, "step": 158610 }, { "epoch": 0.6131805600655627, "grad_norm": 0.11992853134870529, "learning_rate": 0.002, "loss": 2.3338, "step": 158620 }, { "epoch": 0.6132192172689459, "grad_norm": 0.11580061167478561, "learning_rate": 0.002, "loss": 2.357, "step": 158630 }, { "epoch": 0.6132578744723292, "grad_norm": 0.11151894181966782, "learning_rate": 0.002, "loss": 2.3358, "step": 158640 }, { "epoch": 0.6132965316757124, "grad_norm": 0.10860782116651535, "learning_rate": 0.002, "loss": 2.3452, "step": 158650 }, { "epoch": 0.6133351888790958, "grad_norm": 0.09790189564228058, "learning_rate": 0.002, "loss": 2.3525, "step": 158660 }, { "epoch": 0.613373846082479, "grad_norm": 0.13056792318820953, "learning_rate": 0.002, "loss": 2.3435, "step": 158670 }, { "epoch": 0.6134125032858623, "grad_norm": 0.09393236041069031, "learning_rate": 0.002, "loss": 2.351, "step": 158680 }, { "epoch": 0.6134511604892455, "grad_norm": 0.10080570727586746, "learning_rate": 0.002, "loss": 2.3418, "step": 158690 }, { "epoch": 0.6134898176926289, "grad_norm": 0.11203580349683762, "learning_rate": 0.002, "loss": 2.336, "step": 158700 }, { "epoch": 0.6135284748960121, "grad_norm": 0.11879559606313705, "learning_rate": 0.002, "loss": 2.3427, "step": 158710 }, { "epoch": 0.6135671320993954, "grad_norm": 0.102769635617733, "learning_rate": 0.002, "loss": 2.3543, "step": 158720 }, { "epoch": 0.6136057893027786, "grad_norm": 0.12622100114822388, "learning_rate": 0.002, "loss": 2.3475, "step": 158730 }, { "epoch": 0.613644446506162, "grad_norm": 0.09503379464149475, "learning_rate": 0.002, "loss": 2.3366, "step": 158740 }, { "epoch": 0.6136831037095453, "grad_norm": 0.12669788300991058, "learning_rate": 0.002, "loss": 2.3529, "step": 158750 }, { "epoch": 0.6137217609129285, "grad_norm": 0.11435175687074661, "learning_rate": 0.002, "loss": 2.3472, "step": 158760 }, { "epoch": 0.6137604181163118, "grad_norm": 0.10128527134656906, "learning_rate": 0.002, "loss": 2.3353, "step": 158770 }, { "epoch": 0.613799075319695, "grad_norm": 0.10439524799585342, "learning_rate": 0.002, "loss": 2.3467, "step": 158780 }, { "epoch": 0.6138377325230784, "grad_norm": 0.09249827265739441, "learning_rate": 0.002, "loss": 2.3338, "step": 158790 }, { "epoch": 0.6138763897264616, "grad_norm": 0.1345771849155426, "learning_rate": 0.002, "loss": 2.3386, "step": 158800 }, { "epoch": 0.6139150469298449, "grad_norm": 0.09422943741083145, "learning_rate": 0.002, "loss": 2.3643, "step": 158810 }, { "epoch": 0.6139537041332281, "grad_norm": 0.10849691182374954, "learning_rate": 0.002, "loss": 2.3524, "step": 158820 }, { "epoch": 0.6139923613366115, "grad_norm": 0.11163496226072311, "learning_rate": 0.002, "loss": 2.3336, "step": 158830 }, { "epoch": 0.6140310185399948, "grad_norm": 0.10774195939302444, "learning_rate": 0.002, "loss": 2.3522, "step": 158840 }, { "epoch": 0.614069675743378, "grad_norm": 0.13317424058914185, "learning_rate": 0.002, "loss": 2.3478, "step": 158850 }, { "epoch": 0.6141083329467613, "grad_norm": 0.10984144359827042, "learning_rate": 0.002, "loss": 2.3548, "step": 158860 }, { "epoch": 0.6141469901501446, "grad_norm": 0.10041702538728714, "learning_rate": 0.002, "loss": 2.3471, "step": 158870 }, { "epoch": 0.6141856473535279, "grad_norm": 0.10012153536081314, "learning_rate": 0.002, "loss": 2.3443, "step": 158880 }, { "epoch": 0.6142243045569111, "grad_norm": 0.11682833731174469, "learning_rate": 0.002, "loss": 2.3662, "step": 158890 }, { "epoch": 0.6142629617602944, "grad_norm": 0.10764049738645554, "learning_rate": 0.002, "loss": 2.3234, "step": 158900 }, { "epoch": 0.6143016189636777, "grad_norm": 0.10517425090074539, "learning_rate": 0.002, "loss": 2.3319, "step": 158910 }, { "epoch": 0.614340276167061, "grad_norm": 0.10669191181659698, "learning_rate": 0.002, "loss": 2.3486, "step": 158920 }, { "epoch": 0.6143789333704442, "grad_norm": 0.10772227495908737, "learning_rate": 0.002, "loss": 2.3454, "step": 158930 }, { "epoch": 0.6144175905738275, "grad_norm": 0.0992896556854248, "learning_rate": 0.002, "loss": 2.3526, "step": 158940 }, { "epoch": 0.6144562477772108, "grad_norm": 0.09629623591899872, "learning_rate": 0.002, "loss": 2.3355, "step": 158950 }, { "epoch": 0.6144949049805941, "grad_norm": 0.09165239334106445, "learning_rate": 0.002, "loss": 2.3506, "step": 158960 }, { "epoch": 0.6145335621839774, "grad_norm": 0.12956255674362183, "learning_rate": 0.002, "loss": 2.3492, "step": 158970 }, { "epoch": 0.6145722193873606, "grad_norm": 0.1039843037724495, "learning_rate": 0.002, "loss": 2.3515, "step": 158980 }, { "epoch": 0.6146108765907439, "grad_norm": 0.11637865006923676, "learning_rate": 0.002, "loss": 2.3476, "step": 158990 }, { "epoch": 0.6146495337941272, "grad_norm": 0.114281065762043, "learning_rate": 0.002, "loss": 2.3382, "step": 159000 }, { "epoch": 0.6146881909975105, "grad_norm": 0.12161819636821747, "learning_rate": 0.002, "loss": 2.3311, "step": 159010 }, { "epoch": 0.6147268482008937, "grad_norm": 0.10355739295482635, "learning_rate": 0.002, "loss": 2.3429, "step": 159020 }, { "epoch": 0.614765505404277, "grad_norm": 0.1096806526184082, "learning_rate": 0.002, "loss": 2.3383, "step": 159030 }, { "epoch": 0.6148041626076604, "grad_norm": 0.11160244047641754, "learning_rate": 0.002, "loss": 2.3343, "step": 159040 }, { "epoch": 0.6148428198110436, "grad_norm": 0.11055063456296921, "learning_rate": 0.002, "loss": 2.3323, "step": 159050 }, { "epoch": 0.6148814770144269, "grad_norm": 0.10469084978103638, "learning_rate": 0.002, "loss": 2.3478, "step": 159060 }, { "epoch": 0.6149201342178101, "grad_norm": 0.121566042304039, "learning_rate": 0.002, "loss": 2.3372, "step": 159070 }, { "epoch": 0.6149587914211935, "grad_norm": 0.11239653825759888, "learning_rate": 0.002, "loss": 2.3329, "step": 159080 }, { "epoch": 0.6149974486245767, "grad_norm": 0.09685921669006348, "learning_rate": 0.002, "loss": 2.3363, "step": 159090 }, { "epoch": 0.61503610582796, "grad_norm": 0.13571828603744507, "learning_rate": 0.002, "loss": 2.3488, "step": 159100 }, { "epoch": 0.6150747630313432, "grad_norm": 0.10878980159759521, "learning_rate": 0.002, "loss": 2.3617, "step": 159110 }, { "epoch": 0.6151134202347266, "grad_norm": 0.09652600437402725, "learning_rate": 0.002, "loss": 2.3528, "step": 159120 }, { "epoch": 0.6151520774381098, "grad_norm": 0.103079654276371, "learning_rate": 0.002, "loss": 2.3344, "step": 159130 }, { "epoch": 0.6151907346414931, "grad_norm": 0.09848003089427948, "learning_rate": 0.002, "loss": 2.3445, "step": 159140 }, { "epoch": 0.6152293918448763, "grad_norm": 0.1262648105621338, "learning_rate": 0.002, "loss": 2.3533, "step": 159150 }, { "epoch": 0.6152680490482596, "grad_norm": 0.1000993624329567, "learning_rate": 0.002, "loss": 2.332, "step": 159160 }, { "epoch": 0.615306706251643, "grad_norm": 0.12465333938598633, "learning_rate": 0.002, "loss": 2.3425, "step": 159170 }, { "epoch": 0.6153453634550262, "grad_norm": 0.11284952610731125, "learning_rate": 0.002, "loss": 2.3647, "step": 159180 }, { "epoch": 0.6153840206584095, "grad_norm": 0.11351795494556427, "learning_rate": 0.002, "loss": 2.3483, "step": 159190 }, { "epoch": 0.6154226778617927, "grad_norm": 0.10991384088993073, "learning_rate": 0.002, "loss": 2.3364, "step": 159200 }, { "epoch": 0.6154613350651761, "grad_norm": 0.11290508508682251, "learning_rate": 0.002, "loss": 2.3461, "step": 159210 }, { "epoch": 0.6154999922685593, "grad_norm": 0.11408476531505585, "learning_rate": 0.002, "loss": 2.3536, "step": 159220 }, { "epoch": 0.6155386494719426, "grad_norm": 0.10141675174236298, "learning_rate": 0.002, "loss": 2.3396, "step": 159230 }, { "epoch": 0.6155773066753258, "grad_norm": 0.12357048690319061, "learning_rate": 0.002, "loss": 2.3343, "step": 159240 }, { "epoch": 0.6156159638787092, "grad_norm": 0.09966486692428589, "learning_rate": 0.002, "loss": 2.3344, "step": 159250 }, { "epoch": 0.6156546210820925, "grad_norm": 0.11067885905504227, "learning_rate": 0.002, "loss": 2.3631, "step": 159260 }, { "epoch": 0.6156932782854757, "grad_norm": 0.12197203934192657, "learning_rate": 0.002, "loss": 2.3415, "step": 159270 }, { "epoch": 0.615731935488859, "grad_norm": 0.10108727961778641, "learning_rate": 0.002, "loss": 2.3425, "step": 159280 }, { "epoch": 0.6157705926922423, "grad_norm": 0.09892028570175171, "learning_rate": 0.002, "loss": 2.3329, "step": 159290 }, { "epoch": 0.6158092498956256, "grad_norm": 0.1011892557144165, "learning_rate": 0.002, "loss": 2.3519, "step": 159300 }, { "epoch": 0.6158479070990088, "grad_norm": 0.1394670456647873, "learning_rate": 0.002, "loss": 2.3384, "step": 159310 }, { "epoch": 0.6158865643023921, "grad_norm": 0.11167936027050018, "learning_rate": 0.002, "loss": 2.3498, "step": 159320 }, { "epoch": 0.6159252215057753, "grad_norm": 0.10931015014648438, "learning_rate": 0.002, "loss": 2.3552, "step": 159330 }, { "epoch": 0.6159638787091587, "grad_norm": 0.10654870420694351, "learning_rate": 0.002, "loss": 2.3375, "step": 159340 }, { "epoch": 0.616002535912542, "grad_norm": 0.09276770800352097, "learning_rate": 0.002, "loss": 2.3377, "step": 159350 }, { "epoch": 0.6160411931159252, "grad_norm": 0.09629074484109879, "learning_rate": 0.002, "loss": 2.3427, "step": 159360 }, { "epoch": 0.6160798503193085, "grad_norm": 0.10154861956834793, "learning_rate": 0.002, "loss": 2.3409, "step": 159370 }, { "epoch": 0.6161185075226918, "grad_norm": 0.09723439067602158, "learning_rate": 0.002, "loss": 2.3304, "step": 159380 }, { "epoch": 0.6161571647260751, "grad_norm": 0.25782158970832825, "learning_rate": 0.002, "loss": 2.3261, "step": 159390 }, { "epoch": 0.6161958219294583, "grad_norm": 0.10921783745288849, "learning_rate": 0.002, "loss": 2.3366, "step": 159400 }, { "epoch": 0.6162344791328416, "grad_norm": 0.09872537851333618, "learning_rate": 0.002, "loss": 2.3398, "step": 159410 }, { "epoch": 0.6162731363362249, "grad_norm": 0.10222181677818298, "learning_rate": 0.002, "loss": 2.3428, "step": 159420 }, { "epoch": 0.6163117935396082, "grad_norm": 0.1002846285700798, "learning_rate": 0.002, "loss": 2.3427, "step": 159430 }, { "epoch": 0.6163504507429914, "grad_norm": 0.1227022334933281, "learning_rate": 0.002, "loss": 2.3329, "step": 159440 }, { "epoch": 0.6163891079463747, "grad_norm": 0.10258164256811142, "learning_rate": 0.002, "loss": 2.357, "step": 159450 }, { "epoch": 0.6164277651497581, "grad_norm": 0.13086551427841187, "learning_rate": 0.002, "loss": 2.3413, "step": 159460 }, { "epoch": 0.6164664223531413, "grad_norm": 0.1075073629617691, "learning_rate": 0.002, "loss": 2.3355, "step": 159470 }, { "epoch": 0.6165050795565246, "grad_norm": 0.09900263696908951, "learning_rate": 0.002, "loss": 2.3376, "step": 159480 }, { "epoch": 0.6165437367599078, "grad_norm": 0.09153147041797638, "learning_rate": 0.002, "loss": 2.3359, "step": 159490 }, { "epoch": 0.6165823939632911, "grad_norm": 0.11093005537986755, "learning_rate": 0.002, "loss": 2.3506, "step": 159500 }, { "epoch": 0.6166210511666744, "grad_norm": 0.11436476558446884, "learning_rate": 0.002, "loss": 2.3392, "step": 159510 }, { "epoch": 0.6166597083700577, "grad_norm": 0.10130810737609863, "learning_rate": 0.002, "loss": 2.3448, "step": 159520 }, { "epoch": 0.6166983655734409, "grad_norm": 0.16572950780391693, "learning_rate": 0.002, "loss": 2.3558, "step": 159530 }, { "epoch": 0.6167370227768242, "grad_norm": 0.1082151010632515, "learning_rate": 0.002, "loss": 2.3421, "step": 159540 }, { "epoch": 0.6167756799802075, "grad_norm": 0.10624217987060547, "learning_rate": 0.002, "loss": 2.3484, "step": 159550 }, { "epoch": 0.6168143371835908, "grad_norm": 0.1009274423122406, "learning_rate": 0.002, "loss": 2.3387, "step": 159560 }, { "epoch": 0.616852994386974, "grad_norm": 0.12295461446046829, "learning_rate": 0.002, "loss": 2.3426, "step": 159570 }, { "epoch": 0.6168916515903573, "grad_norm": 0.10738131403923035, "learning_rate": 0.002, "loss": 2.3432, "step": 159580 }, { "epoch": 0.6169303087937407, "grad_norm": 0.10055730491876602, "learning_rate": 0.002, "loss": 2.3634, "step": 159590 }, { "epoch": 0.6169689659971239, "grad_norm": 0.10193420946598053, "learning_rate": 0.002, "loss": 2.3297, "step": 159600 }, { "epoch": 0.6170076232005072, "grad_norm": 0.09353253990411758, "learning_rate": 0.002, "loss": 2.349, "step": 159610 }, { "epoch": 0.6170462804038904, "grad_norm": 0.10531756281852722, "learning_rate": 0.002, "loss": 2.3301, "step": 159620 }, { "epoch": 0.6170849376072738, "grad_norm": 0.11639406532049179, "learning_rate": 0.002, "loss": 2.3512, "step": 159630 }, { "epoch": 0.617123594810657, "grad_norm": 0.2310658097267151, "learning_rate": 0.002, "loss": 2.3434, "step": 159640 }, { "epoch": 0.6171622520140403, "grad_norm": 1.0232207775115967, "learning_rate": 0.002, "loss": 2.372, "step": 159650 }, { "epoch": 0.6172009092174235, "grad_norm": 0.2197166085243225, "learning_rate": 0.002, "loss": 2.3532, "step": 159660 }, { "epoch": 0.6172395664208069, "grad_norm": 0.17389705777168274, "learning_rate": 0.002, "loss": 2.3574, "step": 159670 }, { "epoch": 0.6172782236241902, "grad_norm": 0.10403891652822495, "learning_rate": 0.002, "loss": 2.3301, "step": 159680 }, { "epoch": 0.6173168808275734, "grad_norm": 0.10457657277584076, "learning_rate": 0.002, "loss": 2.348, "step": 159690 }, { "epoch": 0.6173555380309567, "grad_norm": 0.10556718707084656, "learning_rate": 0.002, "loss": 2.341, "step": 159700 }, { "epoch": 0.6173941952343399, "grad_norm": 0.11601948738098145, "learning_rate": 0.002, "loss": 2.3484, "step": 159710 }, { "epoch": 0.6174328524377233, "grad_norm": 0.1278046816587448, "learning_rate": 0.002, "loss": 2.3402, "step": 159720 }, { "epoch": 0.6174715096411065, "grad_norm": 0.10599952191114426, "learning_rate": 0.002, "loss": 2.3519, "step": 159730 }, { "epoch": 0.6175101668444898, "grad_norm": 0.10278228670358658, "learning_rate": 0.002, "loss": 2.3384, "step": 159740 }, { "epoch": 0.617548824047873, "grad_norm": 0.10085756331682205, "learning_rate": 0.002, "loss": 2.352, "step": 159750 }, { "epoch": 0.6175874812512564, "grad_norm": 0.13335645198822021, "learning_rate": 0.002, "loss": 2.3402, "step": 159760 }, { "epoch": 0.6176261384546397, "grad_norm": 0.11378223448991776, "learning_rate": 0.002, "loss": 2.3511, "step": 159770 }, { "epoch": 0.6176647956580229, "grad_norm": 0.10508736968040466, "learning_rate": 0.002, "loss": 2.3379, "step": 159780 }, { "epoch": 0.6177034528614062, "grad_norm": 0.10805348306894302, "learning_rate": 0.002, "loss": 2.3562, "step": 159790 }, { "epoch": 0.6177421100647895, "grad_norm": 0.10574381053447723, "learning_rate": 0.002, "loss": 2.3437, "step": 159800 }, { "epoch": 0.6177807672681728, "grad_norm": 0.11227120459079742, "learning_rate": 0.002, "loss": 2.3515, "step": 159810 }, { "epoch": 0.617819424471556, "grad_norm": 0.11124691367149353, "learning_rate": 0.002, "loss": 2.3377, "step": 159820 }, { "epoch": 0.6178580816749393, "grad_norm": 0.11466017365455627, "learning_rate": 0.002, "loss": 2.3371, "step": 159830 }, { "epoch": 0.6178967388783226, "grad_norm": 0.11591002345085144, "learning_rate": 0.002, "loss": 2.3457, "step": 159840 }, { "epoch": 0.6179353960817059, "grad_norm": 0.09957937896251678, "learning_rate": 0.002, "loss": 2.3463, "step": 159850 }, { "epoch": 0.6179740532850891, "grad_norm": 0.1121915876865387, "learning_rate": 0.002, "loss": 2.3509, "step": 159860 }, { "epoch": 0.6180127104884724, "grad_norm": 0.09734398871660233, "learning_rate": 0.002, "loss": 2.3532, "step": 159870 }, { "epoch": 0.6180513676918556, "grad_norm": 0.08913884311914444, "learning_rate": 0.002, "loss": 2.3375, "step": 159880 }, { "epoch": 0.618090024895239, "grad_norm": 0.12423180788755417, "learning_rate": 0.002, "loss": 2.3427, "step": 159890 }, { "epoch": 0.6181286820986223, "grad_norm": 0.1204834133386612, "learning_rate": 0.002, "loss": 2.3448, "step": 159900 }, { "epoch": 0.6181673393020055, "grad_norm": 0.09985199570655823, "learning_rate": 0.002, "loss": 2.3423, "step": 159910 }, { "epoch": 0.6182059965053888, "grad_norm": 0.1060582771897316, "learning_rate": 0.002, "loss": 2.3533, "step": 159920 }, { "epoch": 0.6182446537087721, "grad_norm": 0.11842060089111328, "learning_rate": 0.002, "loss": 2.3418, "step": 159930 }, { "epoch": 0.6182833109121554, "grad_norm": 0.09812531620264053, "learning_rate": 0.002, "loss": 2.3402, "step": 159940 }, { "epoch": 0.6183219681155386, "grad_norm": 0.10997223854064941, "learning_rate": 0.002, "loss": 2.3414, "step": 159950 }, { "epoch": 0.6183606253189219, "grad_norm": 0.1334807425737381, "learning_rate": 0.002, "loss": 2.3344, "step": 159960 }, { "epoch": 0.6183992825223052, "grad_norm": 0.1621674746274948, "learning_rate": 0.002, "loss": 2.351, "step": 159970 }, { "epoch": 0.6184379397256885, "grad_norm": 0.10352502018213272, "learning_rate": 0.002, "loss": 2.337, "step": 159980 }, { "epoch": 0.6184765969290718, "grad_norm": 0.10434258729219437, "learning_rate": 0.002, "loss": 2.3469, "step": 159990 }, { "epoch": 0.618515254132455, "grad_norm": 0.08978249132633209, "learning_rate": 0.002, "loss": 2.3367, "step": 160000 }, { "epoch": 0.6185539113358384, "grad_norm": 0.09855789691209793, "learning_rate": 0.002, "loss": 2.3437, "step": 160010 }, { "epoch": 0.6185925685392216, "grad_norm": 0.11199983209371567, "learning_rate": 0.002, "loss": 2.3645, "step": 160020 }, { "epoch": 0.6186312257426049, "grad_norm": 0.1278078407049179, "learning_rate": 0.002, "loss": 2.3475, "step": 160030 }, { "epoch": 0.6186698829459881, "grad_norm": 0.10843171924352646, "learning_rate": 0.002, "loss": 2.3452, "step": 160040 }, { "epoch": 0.6187085401493715, "grad_norm": 0.11941742151975632, "learning_rate": 0.002, "loss": 2.3365, "step": 160050 }, { "epoch": 0.6187471973527547, "grad_norm": 0.1163121685385704, "learning_rate": 0.002, "loss": 2.3335, "step": 160060 }, { "epoch": 0.618785854556138, "grad_norm": 0.09469784796237946, "learning_rate": 0.002, "loss": 2.3392, "step": 160070 }, { "epoch": 0.6188245117595212, "grad_norm": 0.1268109381198883, "learning_rate": 0.002, "loss": 2.3382, "step": 160080 }, { "epoch": 0.6188631689629045, "grad_norm": 0.09837017208337784, "learning_rate": 0.002, "loss": 2.3574, "step": 160090 }, { "epoch": 0.6189018261662879, "grad_norm": 0.15756139159202576, "learning_rate": 0.002, "loss": 2.3389, "step": 160100 }, { "epoch": 0.6189404833696711, "grad_norm": 0.11236690729856491, "learning_rate": 0.002, "loss": 2.3514, "step": 160110 }, { "epoch": 0.6189791405730544, "grad_norm": 0.10446345806121826, "learning_rate": 0.002, "loss": 2.3384, "step": 160120 }, { "epoch": 0.6190177977764376, "grad_norm": 0.11776310205459595, "learning_rate": 0.002, "loss": 2.3425, "step": 160130 }, { "epoch": 0.619056454979821, "grad_norm": 0.1054246798157692, "learning_rate": 0.002, "loss": 2.341, "step": 160140 }, { "epoch": 0.6190951121832042, "grad_norm": 0.10899261385202408, "learning_rate": 0.002, "loss": 2.3439, "step": 160150 }, { "epoch": 0.6191337693865875, "grad_norm": 0.1234220638871193, "learning_rate": 0.002, "loss": 2.3498, "step": 160160 }, { "epoch": 0.6191724265899707, "grad_norm": 0.09885939955711365, "learning_rate": 0.002, "loss": 2.3492, "step": 160170 }, { "epoch": 0.6192110837933541, "grad_norm": 0.10989391803741455, "learning_rate": 0.002, "loss": 2.3423, "step": 160180 }, { "epoch": 0.6192497409967374, "grad_norm": 0.1203991025686264, "learning_rate": 0.002, "loss": 2.3519, "step": 160190 }, { "epoch": 0.6192883982001206, "grad_norm": 0.1000213697552681, "learning_rate": 0.002, "loss": 2.3501, "step": 160200 }, { "epoch": 0.6193270554035039, "grad_norm": 0.11637286096811295, "learning_rate": 0.002, "loss": 2.3424, "step": 160210 }, { "epoch": 0.6193657126068872, "grad_norm": 0.11612696945667267, "learning_rate": 0.002, "loss": 2.3471, "step": 160220 }, { "epoch": 0.6194043698102705, "grad_norm": 0.10938242822885513, "learning_rate": 0.002, "loss": 2.3565, "step": 160230 }, { "epoch": 0.6194430270136537, "grad_norm": 0.10004662722349167, "learning_rate": 0.002, "loss": 2.336, "step": 160240 }, { "epoch": 0.619481684217037, "grad_norm": 0.09453094005584717, "learning_rate": 0.002, "loss": 2.3267, "step": 160250 }, { "epoch": 0.6195203414204202, "grad_norm": 0.11575914174318314, "learning_rate": 0.002, "loss": 2.3365, "step": 160260 }, { "epoch": 0.6195589986238036, "grad_norm": 0.1202038824558258, "learning_rate": 0.002, "loss": 2.3448, "step": 160270 }, { "epoch": 0.6195976558271868, "grad_norm": 0.11634848266839981, "learning_rate": 0.002, "loss": 2.349, "step": 160280 }, { "epoch": 0.6196363130305701, "grad_norm": 0.09766193479299545, "learning_rate": 0.002, "loss": 2.342, "step": 160290 }, { "epoch": 0.6196749702339533, "grad_norm": 0.13540469110012054, "learning_rate": 0.002, "loss": 2.3556, "step": 160300 }, { "epoch": 0.6197136274373367, "grad_norm": 0.11753330379724503, "learning_rate": 0.002, "loss": 2.3457, "step": 160310 }, { "epoch": 0.61975228464072, "grad_norm": 0.11685632914304733, "learning_rate": 0.002, "loss": 2.3506, "step": 160320 }, { "epoch": 0.6197909418441032, "grad_norm": 0.12577968835830688, "learning_rate": 0.002, "loss": 2.3526, "step": 160330 }, { "epoch": 0.6198295990474865, "grad_norm": 0.10916007310152054, "learning_rate": 0.002, "loss": 2.3516, "step": 160340 }, { "epoch": 0.6198682562508698, "grad_norm": 0.09644216299057007, "learning_rate": 0.002, "loss": 2.3309, "step": 160350 }, { "epoch": 0.6199069134542531, "grad_norm": 0.11642160266637802, "learning_rate": 0.002, "loss": 2.3443, "step": 160360 }, { "epoch": 0.6199455706576363, "grad_norm": 0.11780939996242523, "learning_rate": 0.002, "loss": 2.342, "step": 160370 }, { "epoch": 0.6199842278610196, "grad_norm": 0.10777207463979721, "learning_rate": 0.002, "loss": 2.3257, "step": 160380 }, { "epoch": 0.620022885064403, "grad_norm": 0.09815847128629684, "learning_rate": 0.002, "loss": 2.3343, "step": 160390 }, { "epoch": 0.6200615422677862, "grad_norm": 0.10464230924844742, "learning_rate": 0.002, "loss": 2.3329, "step": 160400 }, { "epoch": 0.6201001994711695, "grad_norm": 0.0949997529387474, "learning_rate": 0.002, "loss": 2.3574, "step": 160410 }, { "epoch": 0.6201388566745527, "grad_norm": 0.10524674504995346, "learning_rate": 0.002, "loss": 2.3402, "step": 160420 }, { "epoch": 0.620177513877936, "grad_norm": 0.09734108299016953, "learning_rate": 0.002, "loss": 2.3422, "step": 160430 }, { "epoch": 0.6202161710813193, "grad_norm": 0.11617741733789444, "learning_rate": 0.002, "loss": 2.3437, "step": 160440 }, { "epoch": 0.6202548282847026, "grad_norm": 0.10688306391239166, "learning_rate": 0.002, "loss": 2.3313, "step": 160450 }, { "epoch": 0.6202934854880858, "grad_norm": 0.10394486784934998, "learning_rate": 0.002, "loss": 2.337, "step": 160460 }, { "epoch": 0.6203321426914691, "grad_norm": 0.10238736122846603, "learning_rate": 0.002, "loss": 2.3356, "step": 160470 }, { "epoch": 0.6203707998948524, "grad_norm": 0.11343356966972351, "learning_rate": 0.002, "loss": 2.3465, "step": 160480 }, { "epoch": 0.6204094570982357, "grad_norm": 0.11415211856365204, "learning_rate": 0.002, "loss": 2.3496, "step": 160490 }, { "epoch": 0.620448114301619, "grad_norm": 0.12570852041244507, "learning_rate": 0.002, "loss": 2.3423, "step": 160500 }, { "epoch": 0.6204867715050022, "grad_norm": 0.11392834037542343, "learning_rate": 0.002, "loss": 2.3357, "step": 160510 }, { "epoch": 0.6205254287083856, "grad_norm": 0.10089188814163208, "learning_rate": 0.002, "loss": 2.361, "step": 160520 }, { "epoch": 0.6205640859117688, "grad_norm": 0.10702072083950043, "learning_rate": 0.002, "loss": 2.3578, "step": 160530 }, { "epoch": 0.6206027431151521, "grad_norm": 0.13690462708473206, "learning_rate": 0.002, "loss": 2.3469, "step": 160540 }, { "epoch": 0.6206414003185353, "grad_norm": 0.09837660938501358, "learning_rate": 0.002, "loss": 2.3427, "step": 160550 }, { "epoch": 0.6206800575219187, "grad_norm": 0.10660859942436218, "learning_rate": 0.002, "loss": 2.3366, "step": 160560 }, { "epoch": 0.6207187147253019, "grad_norm": 0.10659363120794296, "learning_rate": 0.002, "loss": 2.3422, "step": 160570 }, { "epoch": 0.6207573719286852, "grad_norm": 0.13043168187141418, "learning_rate": 0.002, "loss": 2.3348, "step": 160580 }, { "epoch": 0.6207960291320684, "grad_norm": 0.11595991253852844, "learning_rate": 0.002, "loss": 2.3386, "step": 160590 }, { "epoch": 0.6208346863354518, "grad_norm": 0.10297106951475143, "learning_rate": 0.002, "loss": 2.3532, "step": 160600 }, { "epoch": 0.620873343538835, "grad_norm": 0.10592272877693176, "learning_rate": 0.002, "loss": 2.3353, "step": 160610 }, { "epoch": 0.6209120007422183, "grad_norm": 0.12115828692913055, "learning_rate": 0.002, "loss": 2.344, "step": 160620 }, { "epoch": 0.6209506579456016, "grad_norm": 0.12772081792354584, "learning_rate": 0.002, "loss": 2.3337, "step": 160630 }, { "epoch": 0.6209893151489848, "grad_norm": 0.10656872391700745, "learning_rate": 0.002, "loss": 2.3328, "step": 160640 }, { "epoch": 0.6210279723523682, "grad_norm": 0.12006162106990814, "learning_rate": 0.002, "loss": 2.3406, "step": 160650 }, { "epoch": 0.6210666295557514, "grad_norm": 0.1134830042719841, "learning_rate": 0.002, "loss": 2.3284, "step": 160660 }, { "epoch": 0.6211052867591347, "grad_norm": 0.09791508316993713, "learning_rate": 0.002, "loss": 2.3432, "step": 160670 }, { "epoch": 0.6211439439625179, "grad_norm": 0.1255888044834137, "learning_rate": 0.002, "loss": 2.3412, "step": 160680 }, { "epoch": 0.6211826011659013, "grad_norm": 0.09975399821996689, "learning_rate": 0.002, "loss": 2.3381, "step": 160690 }, { "epoch": 0.6212212583692845, "grad_norm": 0.11866630613803864, "learning_rate": 0.002, "loss": 2.3322, "step": 160700 }, { "epoch": 0.6212599155726678, "grad_norm": 0.09884458780288696, "learning_rate": 0.002, "loss": 2.3266, "step": 160710 }, { "epoch": 0.621298572776051, "grad_norm": 0.0869886502623558, "learning_rate": 0.002, "loss": 2.3451, "step": 160720 }, { "epoch": 0.6213372299794344, "grad_norm": 0.11433932930231094, "learning_rate": 0.002, "loss": 2.3514, "step": 160730 }, { "epoch": 0.6213758871828177, "grad_norm": 0.09779713302850723, "learning_rate": 0.002, "loss": 2.336, "step": 160740 }, { "epoch": 0.6214145443862009, "grad_norm": 0.125816211104393, "learning_rate": 0.002, "loss": 2.3444, "step": 160750 }, { "epoch": 0.6214532015895842, "grad_norm": 0.10931852459907532, "learning_rate": 0.002, "loss": 2.358, "step": 160760 }, { "epoch": 0.6214918587929675, "grad_norm": 0.10296256095170975, "learning_rate": 0.002, "loss": 2.3491, "step": 160770 }, { "epoch": 0.6215305159963508, "grad_norm": 0.10576235502958298, "learning_rate": 0.002, "loss": 2.3529, "step": 160780 }, { "epoch": 0.621569173199734, "grad_norm": 0.1022237166762352, "learning_rate": 0.002, "loss": 2.3394, "step": 160790 }, { "epoch": 0.6216078304031173, "grad_norm": 0.11737734824419022, "learning_rate": 0.002, "loss": 2.3264, "step": 160800 }, { "epoch": 0.6216464876065005, "grad_norm": 0.10755621641874313, "learning_rate": 0.002, "loss": 2.3478, "step": 160810 }, { "epoch": 0.6216851448098839, "grad_norm": 0.12070505321025848, "learning_rate": 0.002, "loss": 2.3488, "step": 160820 }, { "epoch": 0.6217238020132672, "grad_norm": 0.0936044454574585, "learning_rate": 0.002, "loss": 2.3518, "step": 160830 }, { "epoch": 0.6217624592166504, "grad_norm": 0.09852291643619537, "learning_rate": 0.002, "loss": 2.3399, "step": 160840 }, { "epoch": 0.6218011164200337, "grad_norm": 0.09821783006191254, "learning_rate": 0.002, "loss": 2.3449, "step": 160850 }, { "epoch": 0.621839773623417, "grad_norm": 0.10328206419944763, "learning_rate": 0.002, "loss": 2.3458, "step": 160860 }, { "epoch": 0.6218784308268003, "grad_norm": 0.09731963276863098, "learning_rate": 0.002, "loss": 2.3326, "step": 160870 }, { "epoch": 0.6219170880301835, "grad_norm": 0.11714418232440948, "learning_rate": 0.002, "loss": 2.344, "step": 160880 }, { "epoch": 0.6219557452335668, "grad_norm": 0.10939300060272217, "learning_rate": 0.002, "loss": 2.3293, "step": 160890 }, { "epoch": 0.6219944024369501, "grad_norm": 0.11220026016235352, "learning_rate": 0.002, "loss": 2.3333, "step": 160900 }, { "epoch": 0.6220330596403334, "grad_norm": 0.11689543724060059, "learning_rate": 0.002, "loss": 2.3414, "step": 160910 }, { "epoch": 0.6220717168437166, "grad_norm": 0.12207639217376709, "learning_rate": 0.002, "loss": 2.3486, "step": 160920 }, { "epoch": 0.6221103740470999, "grad_norm": 0.1033955067396164, "learning_rate": 0.002, "loss": 2.3494, "step": 160930 }, { "epoch": 0.6221490312504833, "grad_norm": 0.09711343795061111, "learning_rate": 0.002, "loss": 2.3463, "step": 160940 }, { "epoch": 0.6221876884538665, "grad_norm": 0.10052355378866196, "learning_rate": 0.002, "loss": 2.3397, "step": 160950 }, { "epoch": 0.6222263456572498, "grad_norm": 0.12841641902923584, "learning_rate": 0.002, "loss": 2.3592, "step": 160960 }, { "epoch": 0.622265002860633, "grad_norm": 0.1063733622431755, "learning_rate": 0.002, "loss": 2.3441, "step": 160970 }, { "epoch": 0.6223036600640164, "grad_norm": 0.11179991811513901, "learning_rate": 0.002, "loss": 2.3486, "step": 160980 }, { "epoch": 0.6223423172673996, "grad_norm": 0.10117033869028091, "learning_rate": 0.002, "loss": 2.3456, "step": 160990 }, { "epoch": 0.6223809744707829, "grad_norm": 0.11976698786020279, "learning_rate": 0.002, "loss": 2.3587, "step": 161000 }, { "epoch": 0.6224196316741661, "grad_norm": 0.09096930921077728, "learning_rate": 0.002, "loss": 2.3478, "step": 161010 }, { "epoch": 0.6224582888775494, "grad_norm": 0.23469658195972443, "learning_rate": 0.002, "loss": 2.3479, "step": 161020 }, { "epoch": 0.6224969460809328, "grad_norm": 0.11037572473287582, "learning_rate": 0.002, "loss": 2.3399, "step": 161030 }, { "epoch": 0.622535603284316, "grad_norm": 0.10270857810974121, "learning_rate": 0.002, "loss": 2.3572, "step": 161040 }, { "epoch": 0.6225742604876993, "grad_norm": 0.1143588274717331, "learning_rate": 0.002, "loss": 2.3407, "step": 161050 }, { "epoch": 0.6226129176910825, "grad_norm": 0.11809398233890533, "learning_rate": 0.002, "loss": 2.3359, "step": 161060 }, { "epoch": 0.6226515748944659, "grad_norm": 0.10981971025466919, "learning_rate": 0.002, "loss": 2.3389, "step": 161070 }, { "epoch": 0.6226902320978491, "grad_norm": 0.09778551012277603, "learning_rate": 0.002, "loss": 2.3207, "step": 161080 }, { "epoch": 0.6227288893012324, "grad_norm": 0.10928311198949814, "learning_rate": 0.002, "loss": 2.3495, "step": 161090 }, { "epoch": 0.6227675465046156, "grad_norm": 0.1072680875658989, "learning_rate": 0.002, "loss": 2.3432, "step": 161100 }, { "epoch": 0.622806203707999, "grad_norm": 0.22394028306007385, "learning_rate": 0.002, "loss": 2.3451, "step": 161110 }, { "epoch": 0.6228448609113822, "grad_norm": 0.10629655420780182, "learning_rate": 0.002, "loss": 2.3379, "step": 161120 }, { "epoch": 0.6228835181147655, "grad_norm": 0.09425670653581619, "learning_rate": 0.002, "loss": 2.3394, "step": 161130 }, { "epoch": 0.6229221753181488, "grad_norm": 0.10633596777915955, "learning_rate": 0.002, "loss": 2.3428, "step": 161140 }, { "epoch": 0.6229608325215321, "grad_norm": 0.10779435187578201, "learning_rate": 0.002, "loss": 2.3344, "step": 161150 }, { "epoch": 0.6229994897249154, "grad_norm": 0.12639832496643066, "learning_rate": 0.002, "loss": 2.3544, "step": 161160 }, { "epoch": 0.6230381469282986, "grad_norm": 0.11051980406045914, "learning_rate": 0.002, "loss": 2.357, "step": 161170 }, { "epoch": 0.6230768041316819, "grad_norm": 0.09174934774637222, "learning_rate": 0.002, "loss": 2.3398, "step": 161180 }, { "epoch": 0.6231154613350651, "grad_norm": 0.11634857207536697, "learning_rate": 0.002, "loss": 2.3443, "step": 161190 }, { "epoch": 0.6231541185384485, "grad_norm": 0.10571297258138657, "learning_rate": 0.002, "loss": 2.3444, "step": 161200 }, { "epoch": 0.6231927757418317, "grad_norm": 0.09662492573261261, "learning_rate": 0.002, "loss": 2.3582, "step": 161210 }, { "epoch": 0.623231432945215, "grad_norm": 0.10626404732465744, "learning_rate": 0.002, "loss": 2.3313, "step": 161220 }, { "epoch": 0.6232700901485982, "grad_norm": 0.10699160397052765, "learning_rate": 0.002, "loss": 2.347, "step": 161230 }, { "epoch": 0.6233087473519816, "grad_norm": 0.10607574135065079, "learning_rate": 0.002, "loss": 2.3478, "step": 161240 }, { "epoch": 0.6233474045553649, "grad_norm": 0.10362876951694489, "learning_rate": 0.002, "loss": 2.3536, "step": 161250 }, { "epoch": 0.6233860617587481, "grad_norm": 0.10847746580839157, "learning_rate": 0.002, "loss": 2.3514, "step": 161260 }, { "epoch": 0.6234247189621314, "grad_norm": 0.13175007700920105, "learning_rate": 0.002, "loss": 2.3473, "step": 161270 }, { "epoch": 0.6234633761655147, "grad_norm": 0.10136456042528152, "learning_rate": 0.002, "loss": 2.3481, "step": 161280 }, { "epoch": 0.623502033368898, "grad_norm": 0.09823547303676605, "learning_rate": 0.002, "loss": 2.3317, "step": 161290 }, { "epoch": 0.6235406905722812, "grad_norm": 0.12443459033966064, "learning_rate": 0.002, "loss": 2.3477, "step": 161300 }, { "epoch": 0.6235793477756645, "grad_norm": 0.12469448894262314, "learning_rate": 0.002, "loss": 2.3362, "step": 161310 }, { "epoch": 0.6236180049790478, "grad_norm": 0.11748787015676498, "learning_rate": 0.002, "loss": 2.3667, "step": 161320 }, { "epoch": 0.6236566621824311, "grad_norm": 0.12585628032684326, "learning_rate": 0.002, "loss": 2.3515, "step": 161330 }, { "epoch": 0.6236953193858144, "grad_norm": 0.2920219302177429, "learning_rate": 0.002, "loss": 2.3396, "step": 161340 }, { "epoch": 0.6237339765891976, "grad_norm": 0.09079938381910324, "learning_rate": 0.002, "loss": 2.3374, "step": 161350 }, { "epoch": 0.6237726337925809, "grad_norm": 0.12725763022899628, "learning_rate": 0.002, "loss": 2.3418, "step": 161360 }, { "epoch": 0.6238112909959642, "grad_norm": 0.1162029504776001, "learning_rate": 0.002, "loss": 2.3582, "step": 161370 }, { "epoch": 0.6238499481993475, "grad_norm": 0.10406900942325592, "learning_rate": 0.002, "loss": 2.3468, "step": 161380 }, { "epoch": 0.6238886054027307, "grad_norm": 0.10797575861215591, "learning_rate": 0.002, "loss": 2.3462, "step": 161390 }, { "epoch": 0.623927262606114, "grad_norm": 0.1059408187866211, "learning_rate": 0.002, "loss": 2.3399, "step": 161400 }, { "epoch": 0.6239659198094973, "grad_norm": 0.09478353708982468, "learning_rate": 0.002, "loss": 2.3511, "step": 161410 }, { "epoch": 0.6240045770128806, "grad_norm": 0.1414909064769745, "learning_rate": 0.002, "loss": 2.345, "step": 161420 }, { "epoch": 0.6240432342162638, "grad_norm": 0.10855178534984589, "learning_rate": 0.002, "loss": 2.3406, "step": 161430 }, { "epoch": 0.6240818914196471, "grad_norm": 0.09712377935647964, "learning_rate": 0.002, "loss": 2.3563, "step": 161440 }, { "epoch": 0.6241205486230305, "grad_norm": 0.09870415180921555, "learning_rate": 0.002, "loss": 2.3371, "step": 161450 }, { "epoch": 0.6241592058264137, "grad_norm": 0.1139429435133934, "learning_rate": 0.002, "loss": 2.3479, "step": 161460 }, { "epoch": 0.624197863029797, "grad_norm": 0.10842197388410568, "learning_rate": 0.002, "loss": 2.3351, "step": 161470 }, { "epoch": 0.6242365202331802, "grad_norm": 0.12628144025802612, "learning_rate": 0.002, "loss": 2.3475, "step": 161480 }, { "epoch": 0.6242751774365636, "grad_norm": 0.11068233102560043, "learning_rate": 0.002, "loss": 2.3339, "step": 161490 }, { "epoch": 0.6243138346399468, "grad_norm": 0.12281452119350433, "learning_rate": 0.002, "loss": 2.3346, "step": 161500 }, { "epoch": 0.6243524918433301, "grad_norm": 0.10054417699575424, "learning_rate": 0.002, "loss": 2.3487, "step": 161510 }, { "epoch": 0.6243911490467133, "grad_norm": 0.10512567311525345, "learning_rate": 0.002, "loss": 2.3409, "step": 161520 }, { "epoch": 0.6244298062500967, "grad_norm": 0.10423226654529572, "learning_rate": 0.002, "loss": 2.3532, "step": 161530 }, { "epoch": 0.62446846345348, "grad_norm": 0.11877862364053726, "learning_rate": 0.002, "loss": 2.3579, "step": 161540 }, { "epoch": 0.6245071206568632, "grad_norm": 0.10388844460248947, "learning_rate": 0.002, "loss": 2.3427, "step": 161550 }, { "epoch": 0.6245457778602465, "grad_norm": 0.094859778881073, "learning_rate": 0.002, "loss": 2.3277, "step": 161560 }, { "epoch": 0.6245844350636297, "grad_norm": 0.12273389846086502, "learning_rate": 0.002, "loss": 2.3356, "step": 161570 }, { "epoch": 0.6246230922670131, "grad_norm": 0.142424076795578, "learning_rate": 0.002, "loss": 2.3543, "step": 161580 }, { "epoch": 0.6246617494703963, "grad_norm": 0.1083383709192276, "learning_rate": 0.002, "loss": 2.3527, "step": 161590 }, { "epoch": 0.6247004066737796, "grad_norm": 0.10823900252580643, "learning_rate": 0.002, "loss": 2.3525, "step": 161600 }, { "epoch": 0.6247390638771628, "grad_norm": 0.10325956344604492, "learning_rate": 0.002, "loss": 2.3532, "step": 161610 }, { "epoch": 0.6247777210805462, "grad_norm": 0.1117747575044632, "learning_rate": 0.002, "loss": 2.3353, "step": 161620 }, { "epoch": 0.6248163782839294, "grad_norm": 0.10606677085161209, "learning_rate": 0.002, "loss": 2.3448, "step": 161630 }, { "epoch": 0.6248550354873127, "grad_norm": 0.11204840987920761, "learning_rate": 0.002, "loss": 2.354, "step": 161640 }, { "epoch": 0.624893692690696, "grad_norm": 0.1343681663274765, "learning_rate": 0.002, "loss": 2.354, "step": 161650 }, { "epoch": 0.6249323498940793, "grad_norm": 0.11574757844209671, "learning_rate": 0.002, "loss": 2.3491, "step": 161660 }, { "epoch": 0.6249710070974626, "grad_norm": 0.11187739670276642, "learning_rate": 0.002, "loss": 2.3543, "step": 161670 }, { "epoch": 0.6250096643008458, "grad_norm": 0.11739975214004517, "learning_rate": 0.002, "loss": 2.3432, "step": 161680 }, { "epoch": 0.6250483215042291, "grad_norm": 0.1051754280924797, "learning_rate": 0.002, "loss": 2.3442, "step": 161690 }, { "epoch": 0.6250869787076124, "grad_norm": 0.09944341331720352, "learning_rate": 0.002, "loss": 2.3497, "step": 161700 }, { "epoch": 0.6251256359109957, "grad_norm": 0.11645132303237915, "learning_rate": 0.002, "loss": 2.356, "step": 161710 }, { "epoch": 0.6251642931143789, "grad_norm": 0.10068979114294052, "learning_rate": 0.002, "loss": 2.3229, "step": 161720 }, { "epoch": 0.6252029503177622, "grad_norm": 0.10064230114221573, "learning_rate": 0.002, "loss": 2.3427, "step": 161730 }, { "epoch": 0.6252416075211454, "grad_norm": 0.12956196069717407, "learning_rate": 0.002, "loss": 2.3283, "step": 161740 }, { "epoch": 0.6252802647245288, "grad_norm": 0.10924475640058517, "learning_rate": 0.002, "loss": 2.3404, "step": 161750 }, { "epoch": 0.625318921927912, "grad_norm": 0.10532253235578537, "learning_rate": 0.002, "loss": 2.3335, "step": 161760 }, { "epoch": 0.6253575791312953, "grad_norm": 0.09834955632686615, "learning_rate": 0.002, "loss": 2.3361, "step": 161770 }, { "epoch": 0.6253962363346786, "grad_norm": 0.10091876238584518, "learning_rate": 0.002, "loss": 2.3582, "step": 161780 }, { "epoch": 0.6254348935380619, "grad_norm": 0.10023108124732971, "learning_rate": 0.002, "loss": 2.3446, "step": 161790 }, { "epoch": 0.6254735507414452, "grad_norm": 0.14204120635986328, "learning_rate": 0.002, "loss": 2.345, "step": 161800 }, { "epoch": 0.6255122079448284, "grad_norm": 0.12205145508050919, "learning_rate": 0.002, "loss": 2.342, "step": 161810 }, { "epoch": 0.6255508651482117, "grad_norm": 0.10635034739971161, "learning_rate": 0.002, "loss": 2.3363, "step": 161820 }, { "epoch": 0.625589522351595, "grad_norm": 0.11630839854478836, "learning_rate": 0.002, "loss": 2.353, "step": 161830 }, { "epoch": 0.6256281795549783, "grad_norm": 0.10341896116733551, "learning_rate": 0.002, "loss": 2.3632, "step": 161840 }, { "epoch": 0.6256668367583615, "grad_norm": 0.1114698201417923, "learning_rate": 0.002, "loss": 2.3363, "step": 161850 }, { "epoch": 0.6257054939617448, "grad_norm": 0.10270850360393524, "learning_rate": 0.002, "loss": 2.3391, "step": 161860 }, { "epoch": 0.6257441511651282, "grad_norm": 0.12893211841583252, "learning_rate": 0.002, "loss": 2.3402, "step": 161870 }, { "epoch": 0.6257828083685114, "grad_norm": 0.11013567447662354, "learning_rate": 0.002, "loss": 2.3319, "step": 161880 }, { "epoch": 0.6258214655718947, "grad_norm": 0.09540493786334991, "learning_rate": 0.002, "loss": 2.3353, "step": 161890 }, { "epoch": 0.6258601227752779, "grad_norm": 0.08586122840642929, "learning_rate": 0.002, "loss": 2.3442, "step": 161900 }, { "epoch": 0.6258987799786612, "grad_norm": 0.10693126171827316, "learning_rate": 0.002, "loss": 2.3541, "step": 161910 }, { "epoch": 0.6259374371820445, "grad_norm": 0.1023019403219223, "learning_rate": 0.002, "loss": 2.3445, "step": 161920 }, { "epoch": 0.6259760943854278, "grad_norm": 0.14287428557872772, "learning_rate": 0.002, "loss": 2.3461, "step": 161930 }, { "epoch": 0.626014751588811, "grad_norm": 0.10729516297578812, "learning_rate": 0.002, "loss": 2.3531, "step": 161940 }, { "epoch": 0.6260534087921943, "grad_norm": 0.11237427592277527, "learning_rate": 0.002, "loss": 2.3427, "step": 161950 }, { "epoch": 0.6260920659955777, "grad_norm": 0.09579712897539139, "learning_rate": 0.002, "loss": 2.3447, "step": 161960 }, { "epoch": 0.6261307231989609, "grad_norm": 0.12017855048179626, "learning_rate": 0.002, "loss": 2.3466, "step": 161970 }, { "epoch": 0.6261693804023442, "grad_norm": 0.09329648315906525, "learning_rate": 0.002, "loss": 2.3505, "step": 161980 }, { "epoch": 0.6262080376057274, "grad_norm": 0.1064738929271698, "learning_rate": 0.002, "loss": 2.3327, "step": 161990 }, { "epoch": 0.6262466948091108, "grad_norm": 0.10892489552497864, "learning_rate": 0.002, "loss": 2.3444, "step": 162000 }, { "epoch": 0.626285352012494, "grad_norm": 0.08925356715917587, "learning_rate": 0.002, "loss": 2.3408, "step": 162010 }, { "epoch": 0.6263240092158773, "grad_norm": 0.10236651450395584, "learning_rate": 0.002, "loss": 2.3453, "step": 162020 }, { "epoch": 0.6263626664192605, "grad_norm": 0.10449860244989395, "learning_rate": 0.002, "loss": 2.3479, "step": 162030 }, { "epoch": 0.6264013236226439, "grad_norm": 0.10375801473855972, "learning_rate": 0.002, "loss": 2.3483, "step": 162040 }, { "epoch": 0.6264399808260271, "grad_norm": 0.1058206558227539, "learning_rate": 0.002, "loss": 2.3595, "step": 162050 }, { "epoch": 0.6264786380294104, "grad_norm": 0.12303949892520905, "learning_rate": 0.002, "loss": 2.3507, "step": 162060 }, { "epoch": 0.6265172952327936, "grad_norm": 0.09614730626344681, "learning_rate": 0.002, "loss": 2.3428, "step": 162070 }, { "epoch": 0.626555952436177, "grad_norm": 0.10775220394134521, "learning_rate": 0.002, "loss": 2.3326, "step": 162080 }, { "epoch": 0.6265946096395603, "grad_norm": 0.10548651218414307, "learning_rate": 0.002, "loss": 2.3379, "step": 162090 }, { "epoch": 0.6266332668429435, "grad_norm": 0.133774533867836, "learning_rate": 0.002, "loss": 2.3389, "step": 162100 }, { "epoch": 0.6266719240463268, "grad_norm": 0.10769365727901459, "learning_rate": 0.002, "loss": 2.333, "step": 162110 }, { "epoch": 0.62671058124971, "grad_norm": 0.11196187883615494, "learning_rate": 0.002, "loss": 2.3469, "step": 162120 }, { "epoch": 0.6267492384530934, "grad_norm": 0.09751760959625244, "learning_rate": 0.002, "loss": 2.35, "step": 162130 }, { "epoch": 0.6267878956564766, "grad_norm": 0.10065893828868866, "learning_rate": 0.002, "loss": 2.3473, "step": 162140 }, { "epoch": 0.6268265528598599, "grad_norm": 0.11453534662723541, "learning_rate": 0.002, "loss": 2.352, "step": 162150 }, { "epoch": 0.6268652100632431, "grad_norm": 0.11050843447446823, "learning_rate": 0.002, "loss": 2.3365, "step": 162160 }, { "epoch": 0.6269038672666265, "grad_norm": 0.09605526179075241, "learning_rate": 0.002, "loss": 2.3264, "step": 162170 }, { "epoch": 0.6269425244700098, "grad_norm": 0.11402832716703415, "learning_rate": 0.002, "loss": 2.3362, "step": 162180 }, { "epoch": 0.626981181673393, "grad_norm": 0.10346776247024536, "learning_rate": 0.002, "loss": 2.3609, "step": 162190 }, { "epoch": 0.6270198388767763, "grad_norm": 0.11273309588432312, "learning_rate": 0.002, "loss": 2.3511, "step": 162200 }, { "epoch": 0.6270584960801596, "grad_norm": 0.11330553144216537, "learning_rate": 0.002, "loss": 2.3363, "step": 162210 }, { "epoch": 0.6270971532835429, "grad_norm": 0.11245008558034897, "learning_rate": 0.002, "loss": 2.3427, "step": 162220 }, { "epoch": 0.6271358104869261, "grad_norm": 0.0939001739025116, "learning_rate": 0.002, "loss": 2.3399, "step": 162230 }, { "epoch": 0.6271744676903094, "grad_norm": 0.11301296949386597, "learning_rate": 0.002, "loss": 2.3526, "step": 162240 }, { "epoch": 0.6272131248936927, "grad_norm": 0.10336881130933762, "learning_rate": 0.002, "loss": 2.3386, "step": 162250 }, { "epoch": 0.627251782097076, "grad_norm": 0.12161832302808762, "learning_rate": 0.002, "loss": 2.3498, "step": 162260 }, { "epoch": 0.6272904393004592, "grad_norm": 0.0986507460474968, "learning_rate": 0.002, "loss": 2.3426, "step": 162270 }, { "epoch": 0.6273290965038425, "grad_norm": 0.09472258388996124, "learning_rate": 0.002, "loss": 2.3397, "step": 162280 }, { "epoch": 0.6273677537072258, "grad_norm": 0.10754972696304321, "learning_rate": 0.002, "loss": 2.3504, "step": 162290 }, { "epoch": 0.6274064109106091, "grad_norm": 0.11803829669952393, "learning_rate": 0.002, "loss": 2.3444, "step": 162300 }, { "epoch": 0.6274450681139924, "grad_norm": 0.0967666506767273, "learning_rate": 0.002, "loss": 2.3428, "step": 162310 }, { "epoch": 0.6274837253173756, "grad_norm": 0.11755374073982239, "learning_rate": 0.002, "loss": 2.3424, "step": 162320 }, { "epoch": 0.6275223825207589, "grad_norm": 0.09747770428657532, "learning_rate": 0.002, "loss": 2.3427, "step": 162330 }, { "epoch": 0.6275610397241422, "grad_norm": 0.10866741091012955, "learning_rate": 0.002, "loss": 2.3541, "step": 162340 }, { "epoch": 0.6275996969275255, "grad_norm": 0.11812674254179001, "learning_rate": 0.002, "loss": 2.3386, "step": 162350 }, { "epoch": 0.6276383541309087, "grad_norm": 0.09401613473892212, "learning_rate": 0.002, "loss": 2.3273, "step": 162360 }, { "epoch": 0.627677011334292, "grad_norm": 0.11415007710456848, "learning_rate": 0.002, "loss": 2.3426, "step": 162370 }, { "epoch": 0.6277156685376754, "grad_norm": 0.10338752716779709, "learning_rate": 0.002, "loss": 2.3432, "step": 162380 }, { "epoch": 0.6277543257410586, "grad_norm": 0.13331128656864166, "learning_rate": 0.002, "loss": 2.3367, "step": 162390 }, { "epoch": 0.6277929829444419, "grad_norm": 0.12205469608306885, "learning_rate": 0.002, "loss": 2.3363, "step": 162400 }, { "epoch": 0.6278316401478251, "grad_norm": 0.11239419132471085, "learning_rate": 0.002, "loss": 2.3533, "step": 162410 }, { "epoch": 0.6278702973512085, "grad_norm": 0.10969708114862442, "learning_rate": 0.002, "loss": 2.3389, "step": 162420 }, { "epoch": 0.6279089545545917, "grad_norm": 0.1146257221698761, "learning_rate": 0.002, "loss": 2.3522, "step": 162430 }, { "epoch": 0.627947611757975, "grad_norm": 0.1019849181175232, "learning_rate": 0.002, "loss": 2.3345, "step": 162440 }, { "epoch": 0.6279862689613582, "grad_norm": 0.09386495500802994, "learning_rate": 0.002, "loss": 2.3345, "step": 162450 }, { "epoch": 0.6280249261647416, "grad_norm": 0.10513816773891449, "learning_rate": 0.002, "loss": 2.3394, "step": 162460 }, { "epoch": 0.6280635833681248, "grad_norm": 0.11198500543832779, "learning_rate": 0.002, "loss": 2.343, "step": 162470 }, { "epoch": 0.6281022405715081, "grad_norm": 0.09839422255754471, "learning_rate": 0.002, "loss": 2.3438, "step": 162480 }, { "epoch": 0.6281408977748913, "grad_norm": 0.09457303583621979, "learning_rate": 0.002, "loss": 2.3436, "step": 162490 }, { "epoch": 0.6281795549782746, "grad_norm": 0.0936691090464592, "learning_rate": 0.002, "loss": 2.3436, "step": 162500 }, { "epoch": 0.628218212181658, "grad_norm": 0.1124194860458374, "learning_rate": 0.002, "loss": 2.3461, "step": 162510 }, { "epoch": 0.6282568693850412, "grad_norm": 0.12158659845590591, "learning_rate": 0.002, "loss": 2.3357, "step": 162520 }, { "epoch": 0.6282955265884245, "grad_norm": 0.09474817663431168, "learning_rate": 0.002, "loss": 2.3377, "step": 162530 }, { "epoch": 0.6283341837918077, "grad_norm": 0.09250221401453018, "learning_rate": 0.002, "loss": 2.3263, "step": 162540 }, { "epoch": 0.6283728409951911, "grad_norm": 0.09708566963672638, "learning_rate": 0.002, "loss": 2.3508, "step": 162550 }, { "epoch": 0.6284114981985743, "grad_norm": 0.09655487537384033, "learning_rate": 0.002, "loss": 2.3407, "step": 162560 }, { "epoch": 0.6284501554019576, "grad_norm": 0.0928114727139473, "learning_rate": 0.002, "loss": 2.3368, "step": 162570 }, { "epoch": 0.6284888126053408, "grad_norm": 0.10528218001127243, "learning_rate": 0.002, "loss": 2.3424, "step": 162580 }, { "epoch": 0.6285274698087242, "grad_norm": 0.11332085728645325, "learning_rate": 0.002, "loss": 2.3527, "step": 162590 }, { "epoch": 0.6285661270121075, "grad_norm": 0.11296750605106354, "learning_rate": 0.002, "loss": 2.3372, "step": 162600 }, { "epoch": 0.6286047842154907, "grad_norm": 0.09918206930160522, "learning_rate": 0.002, "loss": 2.3292, "step": 162610 }, { "epoch": 0.628643441418874, "grad_norm": 0.11857914924621582, "learning_rate": 0.002, "loss": 2.3469, "step": 162620 }, { "epoch": 0.6286820986222573, "grad_norm": 0.11675361543893814, "learning_rate": 0.002, "loss": 2.341, "step": 162630 }, { "epoch": 0.6287207558256406, "grad_norm": 0.13692381978034973, "learning_rate": 0.002, "loss": 2.3497, "step": 162640 }, { "epoch": 0.6287594130290238, "grad_norm": 0.08462470024824142, "learning_rate": 0.002, "loss": 2.3301, "step": 162650 }, { "epoch": 0.6287980702324071, "grad_norm": 0.11189759522676468, "learning_rate": 0.002, "loss": 2.3498, "step": 162660 }, { "epoch": 0.6288367274357903, "grad_norm": 0.12786071002483368, "learning_rate": 0.002, "loss": 2.3262, "step": 162670 }, { "epoch": 0.6288753846391737, "grad_norm": 0.09691540151834488, "learning_rate": 0.002, "loss": 2.3512, "step": 162680 }, { "epoch": 0.628914041842557, "grad_norm": 0.10311124473810196, "learning_rate": 0.002, "loss": 2.3487, "step": 162690 }, { "epoch": 0.6289526990459402, "grad_norm": 0.10573244094848633, "learning_rate": 0.002, "loss": 2.3498, "step": 162700 }, { "epoch": 0.6289913562493235, "grad_norm": 0.10980737209320068, "learning_rate": 0.002, "loss": 2.3432, "step": 162710 }, { "epoch": 0.6290300134527068, "grad_norm": 0.1420067548751831, "learning_rate": 0.002, "loss": 2.3416, "step": 162720 }, { "epoch": 0.6290686706560901, "grad_norm": 0.10817544907331467, "learning_rate": 0.002, "loss": 2.3391, "step": 162730 }, { "epoch": 0.6291073278594733, "grad_norm": 0.11977480351924896, "learning_rate": 0.002, "loss": 2.3266, "step": 162740 }, { "epoch": 0.6291459850628566, "grad_norm": 0.10848201811313629, "learning_rate": 0.002, "loss": 2.3324, "step": 162750 }, { "epoch": 0.6291846422662399, "grad_norm": 0.12340037524700165, "learning_rate": 0.002, "loss": 2.3478, "step": 162760 }, { "epoch": 0.6292232994696232, "grad_norm": 0.10151165723800659, "learning_rate": 0.002, "loss": 2.343, "step": 162770 }, { "epoch": 0.6292619566730064, "grad_norm": 0.094622902572155, "learning_rate": 0.002, "loss": 2.3346, "step": 162780 }, { "epoch": 0.6293006138763897, "grad_norm": 0.10345442593097687, "learning_rate": 0.002, "loss": 2.3373, "step": 162790 }, { "epoch": 0.629339271079773, "grad_norm": 0.11360262334346771, "learning_rate": 0.002, "loss": 2.3515, "step": 162800 }, { "epoch": 0.6293779282831563, "grad_norm": 0.11832479387521744, "learning_rate": 0.002, "loss": 2.3388, "step": 162810 }, { "epoch": 0.6294165854865396, "grad_norm": 0.11140234768390656, "learning_rate": 0.002, "loss": 2.3403, "step": 162820 }, { "epoch": 0.6294552426899228, "grad_norm": 0.16921010613441467, "learning_rate": 0.002, "loss": 2.3283, "step": 162830 }, { "epoch": 0.6294938998933061, "grad_norm": 0.1355878859758377, "learning_rate": 0.002, "loss": 2.35, "step": 162840 }, { "epoch": 0.6295325570966894, "grad_norm": 0.10031003504991531, "learning_rate": 0.002, "loss": 2.3328, "step": 162850 }, { "epoch": 0.6295712143000727, "grad_norm": 0.09619584679603577, "learning_rate": 0.002, "loss": 2.3483, "step": 162860 }, { "epoch": 0.6296098715034559, "grad_norm": 0.11223511397838593, "learning_rate": 0.002, "loss": 2.3443, "step": 162870 }, { "epoch": 0.6296485287068392, "grad_norm": 0.11276748776435852, "learning_rate": 0.002, "loss": 2.335, "step": 162880 }, { "epoch": 0.6296871859102225, "grad_norm": 0.10060160607099533, "learning_rate": 0.002, "loss": 2.347, "step": 162890 }, { "epoch": 0.6297258431136058, "grad_norm": 0.09993769228458405, "learning_rate": 0.002, "loss": 2.3337, "step": 162900 }, { "epoch": 0.629764500316989, "grad_norm": 0.09578732401132584, "learning_rate": 0.002, "loss": 2.3406, "step": 162910 }, { "epoch": 0.6298031575203723, "grad_norm": 0.16651290655136108, "learning_rate": 0.002, "loss": 2.3569, "step": 162920 }, { "epoch": 0.6298418147237557, "grad_norm": 0.10034544765949249, "learning_rate": 0.002, "loss": 2.3499, "step": 162930 }, { "epoch": 0.6298804719271389, "grad_norm": 0.11658070236444473, "learning_rate": 0.002, "loss": 2.3507, "step": 162940 }, { "epoch": 0.6299191291305222, "grad_norm": 0.08569231629371643, "learning_rate": 0.002, "loss": 2.3444, "step": 162950 }, { "epoch": 0.6299577863339054, "grad_norm": 0.760219395160675, "learning_rate": 0.002, "loss": 2.3537, "step": 162960 }, { "epoch": 0.6299964435372888, "grad_norm": 0.11948889493942261, "learning_rate": 0.002, "loss": 2.3501, "step": 162970 }, { "epoch": 0.630035100740672, "grad_norm": 0.09244943410158157, "learning_rate": 0.002, "loss": 2.3449, "step": 162980 }, { "epoch": 0.6300737579440553, "grad_norm": 0.08750931918621063, "learning_rate": 0.002, "loss": 2.3426, "step": 162990 }, { "epoch": 0.6301124151474385, "grad_norm": 0.09580099582672119, "learning_rate": 0.002, "loss": 2.3439, "step": 163000 }, { "epoch": 0.6301510723508219, "grad_norm": 0.1356991082429886, "learning_rate": 0.002, "loss": 2.3472, "step": 163010 }, { "epoch": 0.6301897295542052, "grad_norm": 0.10146520286798477, "learning_rate": 0.002, "loss": 2.3412, "step": 163020 }, { "epoch": 0.6302283867575884, "grad_norm": 0.11479398608207703, "learning_rate": 0.002, "loss": 2.3331, "step": 163030 }, { "epoch": 0.6302670439609717, "grad_norm": 0.0942661389708519, "learning_rate": 0.002, "loss": 2.3581, "step": 163040 }, { "epoch": 0.6303057011643549, "grad_norm": 0.10510151088237762, "learning_rate": 0.002, "loss": 2.3393, "step": 163050 }, { "epoch": 0.6303443583677383, "grad_norm": 0.12045959383249283, "learning_rate": 0.002, "loss": 2.3478, "step": 163060 }, { "epoch": 0.6303830155711215, "grad_norm": 0.10102223604917526, "learning_rate": 0.002, "loss": 2.3524, "step": 163070 }, { "epoch": 0.6304216727745048, "grad_norm": 0.10106303542852402, "learning_rate": 0.002, "loss": 2.3266, "step": 163080 }, { "epoch": 0.630460329977888, "grad_norm": 0.09535668045282364, "learning_rate": 0.002, "loss": 2.3361, "step": 163090 }, { "epoch": 0.6304989871812714, "grad_norm": 0.09568269550800323, "learning_rate": 0.002, "loss": 2.3391, "step": 163100 }, { "epoch": 0.6305376443846546, "grad_norm": 0.10704615712165833, "learning_rate": 0.002, "loss": 2.3462, "step": 163110 }, { "epoch": 0.6305763015880379, "grad_norm": 0.1080525666475296, "learning_rate": 0.002, "loss": 2.3438, "step": 163120 }, { "epoch": 0.6306149587914212, "grad_norm": 0.11370661854743958, "learning_rate": 0.002, "loss": 2.3459, "step": 163130 }, { "epoch": 0.6306536159948045, "grad_norm": 0.11887659877538681, "learning_rate": 0.002, "loss": 2.3387, "step": 163140 }, { "epoch": 0.6306922731981878, "grad_norm": 0.09894140809774399, "learning_rate": 0.002, "loss": 2.3467, "step": 163150 }, { "epoch": 0.630730930401571, "grad_norm": 0.0998791828751564, "learning_rate": 0.002, "loss": 2.3643, "step": 163160 }, { "epoch": 0.6307695876049543, "grad_norm": 0.11118055880069733, "learning_rate": 0.002, "loss": 2.3374, "step": 163170 }, { "epoch": 0.6308082448083376, "grad_norm": 0.10285613685846329, "learning_rate": 0.002, "loss": 2.3355, "step": 163180 }, { "epoch": 0.6308469020117209, "grad_norm": 0.11359003186225891, "learning_rate": 0.002, "loss": 2.3369, "step": 163190 }, { "epoch": 0.6308855592151041, "grad_norm": 0.1001129075884819, "learning_rate": 0.002, "loss": 2.3396, "step": 163200 }, { "epoch": 0.6309242164184874, "grad_norm": 0.09381992369890213, "learning_rate": 0.002, "loss": 2.3472, "step": 163210 }, { "epoch": 0.6309628736218706, "grad_norm": 0.1008177399635315, "learning_rate": 0.002, "loss": 2.3408, "step": 163220 }, { "epoch": 0.631001530825254, "grad_norm": 0.09887038916349411, "learning_rate": 0.002, "loss": 2.3525, "step": 163230 }, { "epoch": 0.6310401880286373, "grad_norm": 0.10974892228841782, "learning_rate": 0.002, "loss": 2.3437, "step": 163240 }, { "epoch": 0.6310788452320205, "grad_norm": 0.12041390687227249, "learning_rate": 0.002, "loss": 2.3514, "step": 163250 }, { "epoch": 0.6311175024354038, "grad_norm": 0.0948445051908493, "learning_rate": 0.002, "loss": 2.3462, "step": 163260 }, { "epoch": 0.6311561596387871, "grad_norm": 0.132850781083107, "learning_rate": 0.002, "loss": 2.36, "step": 163270 }, { "epoch": 0.6311948168421704, "grad_norm": 0.11287672817707062, "learning_rate": 0.002, "loss": 2.3321, "step": 163280 }, { "epoch": 0.6312334740455536, "grad_norm": 0.10684788227081299, "learning_rate": 0.002, "loss": 2.3329, "step": 163290 }, { "epoch": 0.6312721312489369, "grad_norm": 0.10461094975471497, "learning_rate": 0.002, "loss": 2.3497, "step": 163300 }, { "epoch": 0.6313107884523202, "grad_norm": 0.09377182275056839, "learning_rate": 0.002, "loss": 2.3319, "step": 163310 }, { "epoch": 0.6313494456557035, "grad_norm": 0.09944086521863937, "learning_rate": 0.002, "loss": 2.3393, "step": 163320 }, { "epoch": 0.6313881028590868, "grad_norm": 0.10178162902593613, "learning_rate": 0.002, "loss": 2.3379, "step": 163330 }, { "epoch": 0.63142676006247, "grad_norm": 0.1384466588497162, "learning_rate": 0.002, "loss": 2.356, "step": 163340 }, { "epoch": 0.6314654172658534, "grad_norm": 0.10060542821884155, "learning_rate": 0.002, "loss": 2.3475, "step": 163350 }, { "epoch": 0.6315040744692366, "grad_norm": 0.1142500638961792, "learning_rate": 0.002, "loss": 2.346, "step": 163360 }, { "epoch": 0.6315427316726199, "grad_norm": 0.11058689653873444, "learning_rate": 0.002, "loss": 2.3503, "step": 163370 }, { "epoch": 0.6315813888760031, "grad_norm": 0.10664571076631546, "learning_rate": 0.002, "loss": 2.3352, "step": 163380 }, { "epoch": 0.6316200460793865, "grad_norm": 0.1178840771317482, "learning_rate": 0.002, "loss": 2.3437, "step": 163390 }, { "epoch": 0.6316587032827697, "grad_norm": 0.09117285162210464, "learning_rate": 0.002, "loss": 2.3555, "step": 163400 }, { "epoch": 0.631697360486153, "grad_norm": 0.11692478507757187, "learning_rate": 0.002, "loss": 2.3532, "step": 163410 }, { "epoch": 0.6317360176895362, "grad_norm": 0.1247502863407135, "learning_rate": 0.002, "loss": 2.335, "step": 163420 }, { "epoch": 0.6317746748929195, "grad_norm": 0.11090654134750366, "learning_rate": 0.002, "loss": 2.3494, "step": 163430 }, { "epoch": 0.6318133320963029, "grad_norm": 0.11316248774528503, "learning_rate": 0.002, "loss": 2.3392, "step": 163440 }, { "epoch": 0.6318519892996861, "grad_norm": 0.101323701441288, "learning_rate": 0.002, "loss": 2.3534, "step": 163450 }, { "epoch": 0.6318906465030694, "grad_norm": 0.10291837900876999, "learning_rate": 0.002, "loss": 2.3422, "step": 163460 }, { "epoch": 0.6319293037064526, "grad_norm": 0.1195756047964096, "learning_rate": 0.002, "loss": 2.3413, "step": 163470 }, { "epoch": 0.631967960909836, "grad_norm": 0.10091858357191086, "learning_rate": 0.002, "loss": 2.3493, "step": 163480 }, { "epoch": 0.6320066181132192, "grad_norm": 0.10238576680421829, "learning_rate": 0.002, "loss": 2.334, "step": 163490 }, { "epoch": 0.6320452753166025, "grad_norm": 0.11800894141197205, "learning_rate": 0.002, "loss": 2.3413, "step": 163500 }, { "epoch": 0.6320839325199857, "grad_norm": 0.10410700738430023, "learning_rate": 0.002, "loss": 2.3413, "step": 163510 }, { "epoch": 0.6321225897233691, "grad_norm": 0.11911267787218094, "learning_rate": 0.002, "loss": 2.35, "step": 163520 }, { "epoch": 0.6321612469267524, "grad_norm": 0.10908859968185425, "learning_rate": 0.002, "loss": 2.3531, "step": 163530 }, { "epoch": 0.6321999041301356, "grad_norm": 0.10330608487129211, "learning_rate": 0.002, "loss": 2.3404, "step": 163540 }, { "epoch": 0.6322385613335189, "grad_norm": 0.11335937678813934, "learning_rate": 0.002, "loss": 2.353, "step": 163550 }, { "epoch": 0.6322772185369022, "grad_norm": 0.10461365431547165, "learning_rate": 0.002, "loss": 2.3366, "step": 163560 }, { "epoch": 0.6323158757402855, "grad_norm": 0.0950535461306572, "learning_rate": 0.002, "loss": 2.3566, "step": 163570 }, { "epoch": 0.6323545329436687, "grad_norm": 0.09518333524465561, "learning_rate": 0.002, "loss": 2.3248, "step": 163580 }, { "epoch": 0.632393190147052, "grad_norm": 0.11334793269634247, "learning_rate": 0.002, "loss": 2.3352, "step": 163590 }, { "epoch": 0.6324318473504352, "grad_norm": 0.13851404190063477, "learning_rate": 0.002, "loss": 2.3187, "step": 163600 }, { "epoch": 0.6324705045538186, "grad_norm": 0.09821225702762604, "learning_rate": 0.002, "loss": 2.3428, "step": 163610 }, { "epoch": 0.6325091617572018, "grad_norm": 0.11290562897920609, "learning_rate": 0.002, "loss": 2.3533, "step": 163620 }, { "epoch": 0.6325478189605851, "grad_norm": 0.09985516965389252, "learning_rate": 0.002, "loss": 2.3514, "step": 163630 }, { "epoch": 0.6325864761639683, "grad_norm": 0.11000210791826248, "learning_rate": 0.002, "loss": 2.3428, "step": 163640 }, { "epoch": 0.6326251333673517, "grad_norm": 0.11401853710412979, "learning_rate": 0.002, "loss": 2.3317, "step": 163650 }, { "epoch": 0.632663790570735, "grad_norm": 0.1564873307943344, "learning_rate": 0.002, "loss": 2.346, "step": 163660 }, { "epoch": 0.6327024477741182, "grad_norm": 0.10557052493095398, "learning_rate": 0.002, "loss": 2.3389, "step": 163670 }, { "epoch": 0.6327411049775015, "grad_norm": 0.09930504113435745, "learning_rate": 0.002, "loss": 2.3565, "step": 163680 }, { "epoch": 0.6327797621808848, "grad_norm": 0.10658255964517593, "learning_rate": 0.002, "loss": 2.3396, "step": 163690 }, { "epoch": 0.6328184193842681, "grad_norm": 0.09009906649589539, "learning_rate": 0.002, "loss": 2.3488, "step": 163700 }, { "epoch": 0.6328570765876513, "grad_norm": 0.09801620244979858, "learning_rate": 0.002, "loss": 2.3394, "step": 163710 }, { "epoch": 0.6328957337910346, "grad_norm": 0.09540320932865143, "learning_rate": 0.002, "loss": 2.3448, "step": 163720 }, { "epoch": 0.632934390994418, "grad_norm": 0.11449936777353287, "learning_rate": 0.002, "loss": 2.3418, "step": 163730 }, { "epoch": 0.6329730481978012, "grad_norm": 0.10777530819177628, "learning_rate": 0.002, "loss": 2.3446, "step": 163740 }, { "epoch": 0.6330117054011845, "grad_norm": 0.12651008367538452, "learning_rate": 0.002, "loss": 2.342, "step": 163750 }, { "epoch": 0.6330503626045677, "grad_norm": 0.10534662008285522, "learning_rate": 0.002, "loss": 2.3366, "step": 163760 }, { "epoch": 0.633089019807951, "grad_norm": 0.10595740377902985, "learning_rate": 0.002, "loss": 2.3417, "step": 163770 }, { "epoch": 0.6331276770113343, "grad_norm": 0.11781991273164749, "learning_rate": 0.002, "loss": 2.3409, "step": 163780 }, { "epoch": 0.6331663342147176, "grad_norm": 0.10847979784011841, "learning_rate": 0.002, "loss": 2.3402, "step": 163790 }, { "epoch": 0.6332049914181008, "grad_norm": 0.11100934445858002, "learning_rate": 0.002, "loss": 2.3413, "step": 163800 }, { "epoch": 0.6332436486214841, "grad_norm": 0.09589594602584839, "learning_rate": 0.002, "loss": 2.3392, "step": 163810 }, { "epoch": 0.6332823058248674, "grad_norm": 0.08635708689689636, "learning_rate": 0.002, "loss": 2.3409, "step": 163820 }, { "epoch": 0.6333209630282507, "grad_norm": 0.09627924114465714, "learning_rate": 0.002, "loss": 2.3366, "step": 163830 }, { "epoch": 0.633359620231634, "grad_norm": 0.10940419882535934, "learning_rate": 0.002, "loss": 2.3468, "step": 163840 }, { "epoch": 0.6333982774350172, "grad_norm": 0.10659973323345184, "learning_rate": 0.002, "loss": 2.3351, "step": 163850 }, { "epoch": 0.6334369346384006, "grad_norm": 0.10410846024751663, "learning_rate": 0.002, "loss": 2.3625, "step": 163860 }, { "epoch": 0.6334755918417838, "grad_norm": 0.08976782858371735, "learning_rate": 0.002, "loss": 2.3592, "step": 163870 }, { "epoch": 0.6335142490451671, "grad_norm": 0.11145444214344025, "learning_rate": 0.002, "loss": 2.3293, "step": 163880 }, { "epoch": 0.6335529062485503, "grad_norm": 0.10460666567087173, "learning_rate": 0.002, "loss": 2.3298, "step": 163890 }, { "epoch": 0.6335915634519337, "grad_norm": 0.09775000810623169, "learning_rate": 0.002, "loss": 2.3547, "step": 163900 }, { "epoch": 0.6336302206553169, "grad_norm": 0.10219895094633102, "learning_rate": 0.002, "loss": 2.3354, "step": 163910 }, { "epoch": 0.6336688778587002, "grad_norm": 0.13126534223556519, "learning_rate": 0.002, "loss": 2.3305, "step": 163920 }, { "epoch": 0.6337075350620834, "grad_norm": 0.15083256363868713, "learning_rate": 0.002, "loss": 2.3479, "step": 163930 }, { "epoch": 0.6337461922654668, "grad_norm": 0.13783420622348785, "learning_rate": 0.002, "loss": 2.3491, "step": 163940 }, { "epoch": 0.63378484946885, "grad_norm": 0.105369433760643, "learning_rate": 0.002, "loss": 2.3489, "step": 163950 }, { "epoch": 0.6338235066722333, "grad_norm": 0.11018861085176468, "learning_rate": 0.002, "loss": 2.3483, "step": 163960 }, { "epoch": 0.6338621638756166, "grad_norm": 0.11167337745428085, "learning_rate": 0.002, "loss": 2.3496, "step": 163970 }, { "epoch": 0.6339008210789998, "grad_norm": 0.09401069581508636, "learning_rate": 0.002, "loss": 2.3392, "step": 163980 }, { "epoch": 0.6339394782823832, "grad_norm": 0.10706432163715363, "learning_rate": 0.002, "loss": 2.3441, "step": 163990 }, { "epoch": 0.6339781354857664, "grad_norm": 0.10177291929721832, "learning_rate": 0.002, "loss": 2.3381, "step": 164000 }, { "epoch": 0.6340167926891497, "grad_norm": 0.10011303424835205, "learning_rate": 0.002, "loss": 2.335, "step": 164010 }, { "epoch": 0.6340554498925329, "grad_norm": 0.13315925002098083, "learning_rate": 0.002, "loss": 2.3382, "step": 164020 }, { "epoch": 0.6340941070959163, "grad_norm": 0.1010168194770813, "learning_rate": 0.002, "loss": 2.3555, "step": 164030 }, { "epoch": 0.6341327642992995, "grad_norm": 0.11664897203445435, "learning_rate": 0.002, "loss": 2.3524, "step": 164040 }, { "epoch": 0.6341714215026828, "grad_norm": 0.10335059463977814, "learning_rate": 0.002, "loss": 2.3342, "step": 164050 }, { "epoch": 0.634210078706066, "grad_norm": 0.11125491559505463, "learning_rate": 0.002, "loss": 2.3508, "step": 164060 }, { "epoch": 0.6342487359094494, "grad_norm": 0.10294213891029358, "learning_rate": 0.002, "loss": 2.3409, "step": 164070 }, { "epoch": 0.6342873931128327, "grad_norm": 0.10268981754779816, "learning_rate": 0.002, "loss": 2.3465, "step": 164080 }, { "epoch": 0.6343260503162159, "grad_norm": 0.11555498093366623, "learning_rate": 0.002, "loss": 2.3459, "step": 164090 }, { "epoch": 0.6343647075195992, "grad_norm": 0.09391045570373535, "learning_rate": 0.002, "loss": 2.3577, "step": 164100 }, { "epoch": 0.6344033647229825, "grad_norm": 0.08841327577829361, "learning_rate": 0.002, "loss": 2.3355, "step": 164110 }, { "epoch": 0.6344420219263658, "grad_norm": 0.11089881509542465, "learning_rate": 0.002, "loss": 2.3512, "step": 164120 }, { "epoch": 0.634480679129749, "grad_norm": 0.10194240510463715, "learning_rate": 0.002, "loss": 2.3381, "step": 164130 }, { "epoch": 0.6345193363331323, "grad_norm": 0.10533329099416733, "learning_rate": 0.002, "loss": 2.3502, "step": 164140 }, { "epoch": 0.6345579935365155, "grad_norm": 0.10327567905187607, "learning_rate": 0.002, "loss": 2.3606, "step": 164150 }, { "epoch": 0.6345966507398989, "grad_norm": 0.12457548081874847, "learning_rate": 0.002, "loss": 2.3292, "step": 164160 }, { "epoch": 0.6346353079432822, "grad_norm": 0.14414572715759277, "learning_rate": 0.002, "loss": 2.3405, "step": 164170 }, { "epoch": 0.6346739651466654, "grad_norm": 0.11866335570812225, "learning_rate": 0.002, "loss": 2.3508, "step": 164180 }, { "epoch": 0.6347126223500487, "grad_norm": 0.09757451713085175, "learning_rate": 0.002, "loss": 2.3387, "step": 164190 }, { "epoch": 0.634751279553432, "grad_norm": 0.1049044206738472, "learning_rate": 0.002, "loss": 2.3427, "step": 164200 }, { "epoch": 0.6347899367568153, "grad_norm": 0.08824124187231064, "learning_rate": 0.002, "loss": 2.3219, "step": 164210 }, { "epoch": 0.6348285939601985, "grad_norm": 0.10676911473274231, "learning_rate": 0.002, "loss": 2.3326, "step": 164220 }, { "epoch": 0.6348672511635818, "grad_norm": 0.11019159853458405, "learning_rate": 0.002, "loss": 2.3332, "step": 164230 }, { "epoch": 0.6349059083669651, "grad_norm": 0.13544712960720062, "learning_rate": 0.002, "loss": 2.3394, "step": 164240 }, { "epoch": 0.6349445655703484, "grad_norm": 0.10107675194740295, "learning_rate": 0.002, "loss": 2.337, "step": 164250 }, { "epoch": 0.6349832227737316, "grad_norm": 0.10352107882499695, "learning_rate": 0.002, "loss": 2.3445, "step": 164260 }, { "epoch": 0.6350218799771149, "grad_norm": 0.14161401987075806, "learning_rate": 0.002, "loss": 2.3409, "step": 164270 }, { "epoch": 0.6350605371804983, "grad_norm": 0.11117195338010788, "learning_rate": 0.002, "loss": 2.3362, "step": 164280 }, { "epoch": 0.6350991943838815, "grad_norm": 0.0933212861418724, "learning_rate": 0.002, "loss": 2.3464, "step": 164290 }, { "epoch": 0.6351378515872648, "grad_norm": 0.10913147032260895, "learning_rate": 0.002, "loss": 2.3299, "step": 164300 }, { "epoch": 0.635176508790648, "grad_norm": 0.08877578377723694, "learning_rate": 0.002, "loss": 2.3307, "step": 164310 }, { "epoch": 0.6352151659940314, "grad_norm": 0.11947460472583771, "learning_rate": 0.002, "loss": 2.3362, "step": 164320 }, { "epoch": 0.6352538231974146, "grad_norm": 0.09575257450342178, "learning_rate": 0.002, "loss": 2.3387, "step": 164330 }, { "epoch": 0.6352924804007979, "grad_norm": 0.09766148775815964, "learning_rate": 0.002, "loss": 2.3403, "step": 164340 }, { "epoch": 0.6353311376041811, "grad_norm": 0.12030192464590073, "learning_rate": 0.002, "loss": 2.3439, "step": 164350 }, { "epoch": 0.6353697948075644, "grad_norm": 0.10689179599285126, "learning_rate": 0.002, "loss": 2.3462, "step": 164360 }, { "epoch": 0.6354084520109478, "grad_norm": 0.10224936157464981, "learning_rate": 0.002, "loss": 2.3491, "step": 164370 }, { "epoch": 0.635447109214331, "grad_norm": 0.094263955950737, "learning_rate": 0.002, "loss": 2.3576, "step": 164380 }, { "epoch": 0.6354857664177143, "grad_norm": 0.10004042834043503, "learning_rate": 0.002, "loss": 2.3498, "step": 164390 }, { "epoch": 0.6355244236210975, "grad_norm": 0.10117456316947937, "learning_rate": 0.002, "loss": 2.35, "step": 164400 }, { "epoch": 0.6355630808244809, "grad_norm": 0.10049059987068176, "learning_rate": 0.002, "loss": 2.3444, "step": 164410 }, { "epoch": 0.6356017380278641, "grad_norm": 0.11376563459634781, "learning_rate": 0.002, "loss": 2.3539, "step": 164420 }, { "epoch": 0.6356403952312474, "grad_norm": 0.10315565019845963, "learning_rate": 0.002, "loss": 2.3331, "step": 164430 }, { "epoch": 0.6356790524346306, "grad_norm": 0.10755404829978943, "learning_rate": 0.002, "loss": 2.3507, "step": 164440 }, { "epoch": 0.635717709638014, "grad_norm": 0.0966995358467102, "learning_rate": 0.002, "loss": 2.3244, "step": 164450 }, { "epoch": 0.6357563668413972, "grad_norm": 0.11110610514879227, "learning_rate": 0.002, "loss": 2.351, "step": 164460 }, { "epoch": 0.6357950240447805, "grad_norm": 0.11405228823423386, "learning_rate": 0.002, "loss": 2.3305, "step": 164470 }, { "epoch": 0.6358336812481638, "grad_norm": 0.12278889864683151, "learning_rate": 0.002, "loss": 2.3297, "step": 164480 }, { "epoch": 0.6358723384515471, "grad_norm": 0.10765815526247025, "learning_rate": 0.002, "loss": 2.3423, "step": 164490 }, { "epoch": 0.6359109956549304, "grad_norm": 0.1147020012140274, "learning_rate": 0.002, "loss": 2.3495, "step": 164500 }, { "epoch": 0.6359496528583136, "grad_norm": 0.09279278665781021, "learning_rate": 0.002, "loss": 2.3394, "step": 164510 }, { "epoch": 0.6359883100616969, "grad_norm": 0.09570721536874771, "learning_rate": 0.002, "loss": 2.3397, "step": 164520 }, { "epoch": 0.6360269672650801, "grad_norm": 0.11539546400308609, "learning_rate": 0.002, "loss": 2.3536, "step": 164530 }, { "epoch": 0.6360656244684635, "grad_norm": 0.09704439342021942, "learning_rate": 0.002, "loss": 2.3611, "step": 164540 }, { "epoch": 0.6361042816718467, "grad_norm": 0.11590418964624405, "learning_rate": 0.002, "loss": 2.3506, "step": 164550 }, { "epoch": 0.63614293887523, "grad_norm": 0.10154294222593307, "learning_rate": 0.002, "loss": 2.3473, "step": 164560 }, { "epoch": 0.6361815960786132, "grad_norm": 0.09837421029806137, "learning_rate": 0.002, "loss": 2.3625, "step": 164570 }, { "epoch": 0.6362202532819966, "grad_norm": 0.10751167684793472, "learning_rate": 0.002, "loss": 2.3456, "step": 164580 }, { "epoch": 0.6362589104853799, "grad_norm": 0.09590762853622437, "learning_rate": 0.002, "loss": 2.3408, "step": 164590 }, { "epoch": 0.6362975676887631, "grad_norm": 0.11032095551490784, "learning_rate": 0.002, "loss": 2.3423, "step": 164600 }, { "epoch": 0.6363362248921464, "grad_norm": 0.09885178506374359, "learning_rate": 0.002, "loss": 2.3384, "step": 164610 }, { "epoch": 0.6363748820955297, "grad_norm": 0.12746669352054596, "learning_rate": 0.002, "loss": 2.3251, "step": 164620 }, { "epoch": 0.636413539298913, "grad_norm": 0.09944802522659302, "learning_rate": 0.002, "loss": 2.3359, "step": 164630 }, { "epoch": 0.6364521965022962, "grad_norm": 0.11385858803987503, "learning_rate": 0.002, "loss": 2.3335, "step": 164640 }, { "epoch": 0.6364908537056795, "grad_norm": 0.1174749806523323, "learning_rate": 0.002, "loss": 2.3451, "step": 164650 }, { "epoch": 0.6365295109090628, "grad_norm": 0.10102608799934387, "learning_rate": 0.002, "loss": 2.3412, "step": 164660 }, { "epoch": 0.6365681681124461, "grad_norm": 0.1062927097082138, "learning_rate": 0.002, "loss": 2.3395, "step": 164670 }, { "epoch": 0.6366068253158293, "grad_norm": 0.10197556763887405, "learning_rate": 0.002, "loss": 2.3445, "step": 164680 }, { "epoch": 0.6366454825192126, "grad_norm": 0.11311152577400208, "learning_rate": 0.002, "loss": 2.342, "step": 164690 }, { "epoch": 0.6366841397225959, "grad_norm": 0.10863665491342545, "learning_rate": 0.002, "loss": 2.3368, "step": 164700 }, { "epoch": 0.6367227969259792, "grad_norm": 0.09699777513742447, "learning_rate": 0.002, "loss": 2.3502, "step": 164710 }, { "epoch": 0.6367614541293625, "grad_norm": 0.11128968000411987, "learning_rate": 0.002, "loss": 2.3404, "step": 164720 }, { "epoch": 0.6368001113327457, "grad_norm": 0.10367298126220703, "learning_rate": 0.002, "loss": 2.3362, "step": 164730 }, { "epoch": 0.636838768536129, "grad_norm": 0.111191026866436, "learning_rate": 0.002, "loss": 2.345, "step": 164740 }, { "epoch": 0.6368774257395123, "grad_norm": 0.10926955193281174, "learning_rate": 0.002, "loss": 2.33, "step": 164750 }, { "epoch": 0.6369160829428956, "grad_norm": 0.12507140636444092, "learning_rate": 0.002, "loss": 2.3427, "step": 164760 }, { "epoch": 0.6369547401462788, "grad_norm": 0.09888520836830139, "learning_rate": 0.002, "loss": 2.3435, "step": 164770 }, { "epoch": 0.6369933973496621, "grad_norm": 0.11516008526086807, "learning_rate": 0.002, "loss": 2.3398, "step": 164780 }, { "epoch": 0.6370320545530455, "grad_norm": 0.10190610587596893, "learning_rate": 0.002, "loss": 2.3524, "step": 164790 }, { "epoch": 0.6370707117564287, "grad_norm": 0.08981244266033173, "learning_rate": 0.002, "loss": 2.3404, "step": 164800 }, { "epoch": 0.637109368959812, "grad_norm": 0.1259339600801468, "learning_rate": 0.002, "loss": 2.3674, "step": 164810 }, { "epoch": 0.6371480261631952, "grad_norm": 0.11346227675676346, "learning_rate": 0.002, "loss": 2.3331, "step": 164820 }, { "epoch": 0.6371866833665786, "grad_norm": 0.10800295323133469, "learning_rate": 0.002, "loss": 2.3331, "step": 164830 }, { "epoch": 0.6372253405699618, "grad_norm": 0.1250246912240982, "learning_rate": 0.002, "loss": 2.353, "step": 164840 }, { "epoch": 0.6372639977733451, "grad_norm": 0.10640424489974976, "learning_rate": 0.002, "loss": 2.3265, "step": 164850 }, { "epoch": 0.6373026549767283, "grad_norm": 0.12365873903036118, "learning_rate": 0.002, "loss": 2.3505, "step": 164860 }, { "epoch": 0.6373413121801117, "grad_norm": 0.113923080265522, "learning_rate": 0.002, "loss": 2.3351, "step": 164870 }, { "epoch": 0.637379969383495, "grad_norm": 0.13655415177345276, "learning_rate": 0.002, "loss": 2.3609, "step": 164880 }, { "epoch": 0.6374186265868782, "grad_norm": 0.10525278747081757, "learning_rate": 0.002, "loss": 2.3294, "step": 164890 }, { "epoch": 0.6374572837902615, "grad_norm": 0.09903251379728317, "learning_rate": 0.002, "loss": 2.3496, "step": 164900 }, { "epoch": 0.6374959409936447, "grad_norm": 0.09784550219774246, "learning_rate": 0.002, "loss": 2.3467, "step": 164910 }, { "epoch": 0.6375345981970281, "grad_norm": 0.121519073843956, "learning_rate": 0.002, "loss": 2.3741, "step": 164920 }, { "epoch": 0.6375732554004113, "grad_norm": 0.14645813405513763, "learning_rate": 0.002, "loss": 2.3467, "step": 164930 }, { "epoch": 0.6376119126037946, "grad_norm": 0.09632168710231781, "learning_rate": 0.002, "loss": 2.3564, "step": 164940 }, { "epoch": 0.6376505698071778, "grad_norm": 0.09478174895048141, "learning_rate": 0.002, "loss": 2.3256, "step": 164950 }, { "epoch": 0.6376892270105612, "grad_norm": 0.11825351417064667, "learning_rate": 0.002, "loss": 2.3363, "step": 164960 }, { "epoch": 0.6377278842139444, "grad_norm": 0.1066097617149353, "learning_rate": 0.002, "loss": 2.3487, "step": 164970 }, { "epoch": 0.6377665414173277, "grad_norm": 0.10353917628526688, "learning_rate": 0.002, "loss": 2.3519, "step": 164980 }, { "epoch": 0.637805198620711, "grad_norm": 0.11960267275571823, "learning_rate": 0.002, "loss": 2.3399, "step": 164990 }, { "epoch": 0.6378438558240943, "grad_norm": 0.10730654746294022, "learning_rate": 0.002, "loss": 2.338, "step": 165000 }, { "epoch": 0.6378825130274776, "grad_norm": 0.10325822979211807, "learning_rate": 0.002, "loss": 2.3351, "step": 165010 }, { "epoch": 0.6379211702308608, "grad_norm": 0.09659511595964432, "learning_rate": 0.002, "loss": 2.3329, "step": 165020 }, { "epoch": 0.6379598274342441, "grad_norm": 0.11972470581531525, "learning_rate": 0.002, "loss": 2.3572, "step": 165030 }, { "epoch": 0.6379984846376274, "grad_norm": 0.13168546557426453, "learning_rate": 0.002, "loss": 2.3573, "step": 165040 }, { "epoch": 0.6380371418410107, "grad_norm": 0.09514293074607849, "learning_rate": 0.002, "loss": 2.3397, "step": 165050 }, { "epoch": 0.6380757990443939, "grad_norm": 0.09249638766050339, "learning_rate": 0.002, "loss": 2.3263, "step": 165060 }, { "epoch": 0.6381144562477772, "grad_norm": 0.11831273138523102, "learning_rate": 0.002, "loss": 2.3371, "step": 165070 }, { "epoch": 0.6381531134511604, "grad_norm": 0.10477405041456223, "learning_rate": 0.002, "loss": 2.343, "step": 165080 }, { "epoch": 0.6381917706545438, "grad_norm": 0.13984504342079163, "learning_rate": 0.002, "loss": 2.3339, "step": 165090 }, { "epoch": 0.638230427857927, "grad_norm": 0.09976739436388016, "learning_rate": 0.002, "loss": 2.3429, "step": 165100 }, { "epoch": 0.6382690850613103, "grad_norm": 0.10496357828378677, "learning_rate": 0.002, "loss": 2.3442, "step": 165110 }, { "epoch": 0.6383077422646936, "grad_norm": 0.11283092200756073, "learning_rate": 0.002, "loss": 2.3445, "step": 165120 }, { "epoch": 0.6383463994680769, "grad_norm": 0.09938319772481918, "learning_rate": 0.002, "loss": 2.3457, "step": 165130 }, { "epoch": 0.6383850566714602, "grad_norm": 0.0918104350566864, "learning_rate": 0.002, "loss": 2.3402, "step": 165140 }, { "epoch": 0.6384237138748434, "grad_norm": 0.12190679460763931, "learning_rate": 0.002, "loss": 2.3293, "step": 165150 }, { "epoch": 0.6384623710782267, "grad_norm": 0.09865977615118027, "learning_rate": 0.002, "loss": 2.3235, "step": 165160 }, { "epoch": 0.63850102828161, "grad_norm": 0.10292075574398041, "learning_rate": 0.002, "loss": 2.3488, "step": 165170 }, { "epoch": 0.6385396854849933, "grad_norm": 0.11105905473232269, "learning_rate": 0.002, "loss": 2.3489, "step": 165180 }, { "epoch": 0.6385783426883765, "grad_norm": 0.09727694094181061, "learning_rate": 0.002, "loss": 2.3449, "step": 165190 }, { "epoch": 0.6386169998917598, "grad_norm": 0.09970822930335999, "learning_rate": 0.002, "loss": 2.329, "step": 165200 }, { "epoch": 0.6386556570951432, "grad_norm": 0.09458530694246292, "learning_rate": 0.002, "loss": 2.3477, "step": 165210 }, { "epoch": 0.6386943142985264, "grad_norm": 0.09014523029327393, "learning_rate": 0.002, "loss": 2.3528, "step": 165220 }, { "epoch": 0.6387329715019097, "grad_norm": 0.10275470465421677, "learning_rate": 0.002, "loss": 2.3462, "step": 165230 }, { "epoch": 0.6387716287052929, "grad_norm": 0.09910184890031815, "learning_rate": 0.002, "loss": 2.3474, "step": 165240 }, { "epoch": 0.6388102859086762, "grad_norm": 0.10784036666154861, "learning_rate": 0.002, "loss": 2.3484, "step": 165250 }, { "epoch": 0.6388489431120595, "grad_norm": 0.18909576535224915, "learning_rate": 0.002, "loss": 2.3415, "step": 165260 }, { "epoch": 0.6388876003154428, "grad_norm": 0.10532589256763458, "learning_rate": 0.002, "loss": 2.3413, "step": 165270 }, { "epoch": 0.638926257518826, "grad_norm": 0.11066906899213791, "learning_rate": 0.002, "loss": 2.3554, "step": 165280 }, { "epoch": 0.6389649147222093, "grad_norm": 0.1000770553946495, "learning_rate": 0.002, "loss": 2.3531, "step": 165290 }, { "epoch": 0.6390035719255927, "grad_norm": 0.1069023460149765, "learning_rate": 0.002, "loss": 2.3477, "step": 165300 }, { "epoch": 0.6390422291289759, "grad_norm": 0.08948520570993423, "learning_rate": 0.002, "loss": 2.3315, "step": 165310 }, { "epoch": 0.6390808863323592, "grad_norm": 0.11200417578220367, "learning_rate": 0.002, "loss": 2.3417, "step": 165320 }, { "epoch": 0.6391195435357424, "grad_norm": 0.09061744809150696, "learning_rate": 0.002, "loss": 2.342, "step": 165330 }, { "epoch": 0.6391582007391258, "grad_norm": 0.11405932158231735, "learning_rate": 0.002, "loss": 2.3464, "step": 165340 }, { "epoch": 0.639196857942509, "grad_norm": 0.2837398648262024, "learning_rate": 0.002, "loss": 2.3405, "step": 165350 }, { "epoch": 0.6392355151458923, "grad_norm": 0.12070345133543015, "learning_rate": 0.002, "loss": 2.3566, "step": 165360 }, { "epoch": 0.6392741723492755, "grad_norm": 0.11351531744003296, "learning_rate": 0.002, "loss": 2.3331, "step": 165370 }, { "epoch": 0.6393128295526589, "grad_norm": 0.11273518204689026, "learning_rate": 0.002, "loss": 2.3437, "step": 165380 }, { "epoch": 0.6393514867560421, "grad_norm": 0.12190520763397217, "learning_rate": 0.002, "loss": 2.342, "step": 165390 }, { "epoch": 0.6393901439594254, "grad_norm": 0.11630602180957794, "learning_rate": 0.002, "loss": 2.3426, "step": 165400 }, { "epoch": 0.6394288011628086, "grad_norm": 0.10664429515600204, "learning_rate": 0.002, "loss": 2.3283, "step": 165410 }, { "epoch": 0.639467458366192, "grad_norm": 0.10132356733083725, "learning_rate": 0.002, "loss": 2.3207, "step": 165420 }, { "epoch": 0.6395061155695753, "grad_norm": 0.10533542931079865, "learning_rate": 0.002, "loss": 2.3416, "step": 165430 }, { "epoch": 0.6395447727729585, "grad_norm": 0.09856123477220535, "learning_rate": 0.002, "loss": 2.3576, "step": 165440 }, { "epoch": 0.6395834299763418, "grad_norm": 0.2605516314506531, "learning_rate": 0.002, "loss": 2.3512, "step": 165450 }, { "epoch": 0.639622087179725, "grad_norm": 0.11958436667919159, "learning_rate": 0.002, "loss": 2.349, "step": 165460 }, { "epoch": 0.6396607443831084, "grad_norm": 0.11529088765382767, "learning_rate": 0.002, "loss": 2.3308, "step": 165470 }, { "epoch": 0.6396994015864916, "grad_norm": 0.10149786621332169, "learning_rate": 0.002, "loss": 2.338, "step": 165480 }, { "epoch": 0.6397380587898749, "grad_norm": 0.11168461292982101, "learning_rate": 0.002, "loss": 2.3387, "step": 165490 }, { "epoch": 0.6397767159932581, "grad_norm": 0.1082560122013092, "learning_rate": 0.002, "loss": 2.3509, "step": 165500 }, { "epoch": 0.6398153731966415, "grad_norm": 0.12643194198608398, "learning_rate": 0.002, "loss": 2.3478, "step": 165510 }, { "epoch": 0.6398540304000248, "grad_norm": 0.1188395693898201, "learning_rate": 0.002, "loss": 2.3597, "step": 165520 }, { "epoch": 0.639892687603408, "grad_norm": 0.11413159221410751, "learning_rate": 0.002, "loss": 2.3567, "step": 165530 }, { "epoch": 0.6399313448067913, "grad_norm": 0.10644764453172684, "learning_rate": 0.002, "loss": 2.3513, "step": 165540 }, { "epoch": 0.6399700020101746, "grad_norm": 0.1004197746515274, "learning_rate": 0.002, "loss": 2.3504, "step": 165550 }, { "epoch": 0.6400086592135579, "grad_norm": 0.14543674886226654, "learning_rate": 0.002, "loss": 2.3412, "step": 165560 }, { "epoch": 0.6400473164169411, "grad_norm": 0.09863635152578354, "learning_rate": 0.002, "loss": 2.3394, "step": 165570 }, { "epoch": 0.6400859736203244, "grad_norm": 0.1266559362411499, "learning_rate": 0.002, "loss": 2.3422, "step": 165580 }, { "epoch": 0.6401246308237077, "grad_norm": 0.11178171634674072, "learning_rate": 0.002, "loss": 2.3438, "step": 165590 }, { "epoch": 0.640163288027091, "grad_norm": 0.10830844193696976, "learning_rate": 0.002, "loss": 2.3368, "step": 165600 }, { "epoch": 0.6402019452304742, "grad_norm": 0.10719065368175507, "learning_rate": 0.002, "loss": 2.3481, "step": 165610 }, { "epoch": 0.6402406024338575, "grad_norm": 0.16013024747371674, "learning_rate": 0.002, "loss": 2.3356, "step": 165620 }, { "epoch": 0.6402792596372407, "grad_norm": 0.11934558302164078, "learning_rate": 0.002, "loss": 2.3579, "step": 165630 }, { "epoch": 0.6403179168406241, "grad_norm": 0.09629912674427032, "learning_rate": 0.002, "loss": 2.3325, "step": 165640 }, { "epoch": 0.6403565740440074, "grad_norm": 0.11514745652675629, "learning_rate": 0.002, "loss": 2.3445, "step": 165650 }, { "epoch": 0.6403952312473906, "grad_norm": 0.09105557948350906, "learning_rate": 0.002, "loss": 2.3322, "step": 165660 }, { "epoch": 0.6404338884507739, "grad_norm": 0.16520722210407257, "learning_rate": 0.002, "loss": 2.3369, "step": 165670 }, { "epoch": 0.6404725456541572, "grad_norm": 0.09158007055521011, "learning_rate": 0.002, "loss": 2.3362, "step": 165680 }, { "epoch": 0.6405112028575405, "grad_norm": 0.1115182489156723, "learning_rate": 0.002, "loss": 2.3492, "step": 165690 }, { "epoch": 0.6405498600609237, "grad_norm": 0.13768655061721802, "learning_rate": 0.002, "loss": 2.3363, "step": 165700 }, { "epoch": 0.640588517264307, "grad_norm": 0.2795492112636566, "learning_rate": 0.002, "loss": 2.3241, "step": 165710 }, { "epoch": 0.6406271744676904, "grad_norm": 0.11476332694292068, "learning_rate": 0.002, "loss": 2.3352, "step": 165720 }, { "epoch": 0.6406658316710736, "grad_norm": 0.13985703885555267, "learning_rate": 0.002, "loss": 2.3405, "step": 165730 }, { "epoch": 0.6407044888744569, "grad_norm": 0.10406840592622757, "learning_rate": 0.002, "loss": 2.3427, "step": 165740 }, { "epoch": 0.6407431460778401, "grad_norm": 0.10520946234464645, "learning_rate": 0.002, "loss": 2.3529, "step": 165750 }, { "epoch": 0.6407818032812235, "grad_norm": 0.10424433648586273, "learning_rate": 0.002, "loss": 2.3501, "step": 165760 }, { "epoch": 0.6408204604846067, "grad_norm": 0.11177675426006317, "learning_rate": 0.002, "loss": 2.3487, "step": 165770 }, { "epoch": 0.64085911768799, "grad_norm": 0.09933862835168839, "learning_rate": 0.002, "loss": 2.3386, "step": 165780 }, { "epoch": 0.6408977748913732, "grad_norm": 0.09627087414264679, "learning_rate": 0.002, "loss": 2.3523, "step": 165790 }, { "epoch": 0.6409364320947566, "grad_norm": 0.11053643375635147, "learning_rate": 0.002, "loss": 2.3507, "step": 165800 }, { "epoch": 0.6409750892981398, "grad_norm": 0.10794644802808762, "learning_rate": 0.002, "loss": 2.3361, "step": 165810 }, { "epoch": 0.6410137465015231, "grad_norm": 0.10086505860090256, "learning_rate": 0.002, "loss": 2.3401, "step": 165820 }, { "epoch": 0.6410524037049063, "grad_norm": 0.11320238560438156, "learning_rate": 0.002, "loss": 2.3286, "step": 165830 }, { "epoch": 0.6410910609082896, "grad_norm": 0.09917449206113815, "learning_rate": 0.002, "loss": 2.352, "step": 165840 }, { "epoch": 0.641129718111673, "grad_norm": 0.12727132439613342, "learning_rate": 0.002, "loss": 2.3298, "step": 165850 }, { "epoch": 0.6411683753150562, "grad_norm": 0.10197830945253372, "learning_rate": 0.002, "loss": 2.3489, "step": 165860 }, { "epoch": 0.6412070325184395, "grad_norm": 0.089323990046978, "learning_rate": 0.002, "loss": 2.3475, "step": 165870 }, { "epoch": 0.6412456897218227, "grad_norm": 0.10101732611656189, "learning_rate": 0.002, "loss": 2.3387, "step": 165880 }, { "epoch": 0.6412843469252061, "grad_norm": 0.08730916678905487, "learning_rate": 0.002, "loss": 2.3398, "step": 165890 }, { "epoch": 0.6413230041285893, "grad_norm": 0.10433012247085571, "learning_rate": 0.002, "loss": 2.3362, "step": 165900 }, { "epoch": 0.6413616613319726, "grad_norm": 0.15657569468021393, "learning_rate": 0.002, "loss": 2.3309, "step": 165910 }, { "epoch": 0.6414003185353558, "grad_norm": 0.11098662763834, "learning_rate": 0.002, "loss": 2.3344, "step": 165920 }, { "epoch": 0.6414389757387392, "grad_norm": 0.09086654335260391, "learning_rate": 0.002, "loss": 2.3387, "step": 165930 }, { "epoch": 0.6414776329421225, "grad_norm": 0.10442627221345901, "learning_rate": 0.002, "loss": 2.3515, "step": 165940 }, { "epoch": 0.6415162901455057, "grad_norm": 0.09077071398496628, "learning_rate": 0.002, "loss": 2.3435, "step": 165950 }, { "epoch": 0.641554947348889, "grad_norm": 0.11129163950681686, "learning_rate": 0.002, "loss": 2.3323, "step": 165960 }, { "epoch": 0.6415936045522723, "grad_norm": 0.12598635256290436, "learning_rate": 0.002, "loss": 2.3515, "step": 165970 }, { "epoch": 0.6416322617556556, "grad_norm": 0.13409005105495453, "learning_rate": 0.002, "loss": 2.3421, "step": 165980 }, { "epoch": 0.6416709189590388, "grad_norm": 0.17931459844112396, "learning_rate": 0.002, "loss": 2.3254, "step": 165990 }, { "epoch": 0.6417095761624221, "grad_norm": 0.10173191130161285, "learning_rate": 0.002, "loss": 2.3467, "step": 166000 }, { "epoch": 0.6417482333658053, "grad_norm": 0.10050991922616959, "learning_rate": 0.002, "loss": 2.3395, "step": 166010 }, { "epoch": 0.6417868905691887, "grad_norm": 0.11818138509988785, "learning_rate": 0.002, "loss": 2.3515, "step": 166020 }, { "epoch": 0.641825547772572, "grad_norm": 0.10185743868350983, "learning_rate": 0.002, "loss": 2.3471, "step": 166030 }, { "epoch": 0.6418642049759552, "grad_norm": 0.11149696260690689, "learning_rate": 0.002, "loss": 2.333, "step": 166040 }, { "epoch": 0.6419028621793385, "grad_norm": 0.13266867399215698, "learning_rate": 0.002, "loss": 2.3365, "step": 166050 }, { "epoch": 0.6419415193827218, "grad_norm": 0.12067516148090363, "learning_rate": 0.002, "loss": 2.3392, "step": 166060 }, { "epoch": 0.6419801765861051, "grad_norm": 0.09225700795650482, "learning_rate": 0.002, "loss": 2.3499, "step": 166070 }, { "epoch": 0.6420188337894883, "grad_norm": 0.13785415887832642, "learning_rate": 0.002, "loss": 2.3412, "step": 166080 }, { "epoch": 0.6420574909928716, "grad_norm": 0.09486342966556549, "learning_rate": 0.002, "loss": 2.3534, "step": 166090 }, { "epoch": 0.6420961481962549, "grad_norm": 0.1279897838830948, "learning_rate": 0.002, "loss": 2.3338, "step": 166100 }, { "epoch": 0.6421348053996382, "grad_norm": 0.11228784918785095, "learning_rate": 0.002, "loss": 2.3365, "step": 166110 }, { "epoch": 0.6421734626030214, "grad_norm": 0.11330057680606842, "learning_rate": 0.002, "loss": 2.3559, "step": 166120 }, { "epoch": 0.6422121198064047, "grad_norm": 0.11201409995555878, "learning_rate": 0.002, "loss": 2.3553, "step": 166130 }, { "epoch": 0.642250777009788, "grad_norm": 0.10510092228651047, "learning_rate": 0.002, "loss": 2.3481, "step": 166140 }, { "epoch": 0.6422894342131713, "grad_norm": 0.09827280789613724, "learning_rate": 0.002, "loss": 2.3413, "step": 166150 }, { "epoch": 0.6423280914165546, "grad_norm": 0.11704771965742111, "learning_rate": 0.002, "loss": 2.3268, "step": 166160 }, { "epoch": 0.6423667486199378, "grad_norm": 0.12008139491081238, "learning_rate": 0.002, "loss": 2.3431, "step": 166170 }, { "epoch": 0.6424054058233211, "grad_norm": 0.11050605028867722, "learning_rate": 0.002, "loss": 2.3346, "step": 166180 }, { "epoch": 0.6424440630267044, "grad_norm": 0.09580128639936447, "learning_rate": 0.002, "loss": 2.3541, "step": 166190 }, { "epoch": 0.6424827202300877, "grad_norm": 0.1491931825876236, "learning_rate": 0.002, "loss": 2.3427, "step": 166200 }, { "epoch": 0.6425213774334709, "grad_norm": 0.09168663620948792, "learning_rate": 0.002, "loss": 2.3494, "step": 166210 }, { "epoch": 0.6425600346368542, "grad_norm": 0.09992379695177078, "learning_rate": 0.002, "loss": 2.3328, "step": 166220 }, { "epoch": 0.6425986918402375, "grad_norm": 0.10679803043603897, "learning_rate": 0.002, "loss": 2.3353, "step": 166230 }, { "epoch": 0.6426373490436208, "grad_norm": 0.10552909970283508, "learning_rate": 0.002, "loss": 2.347, "step": 166240 }, { "epoch": 0.642676006247004, "grad_norm": 0.20289833843708038, "learning_rate": 0.002, "loss": 2.3405, "step": 166250 }, { "epoch": 0.6427146634503873, "grad_norm": 0.13337108492851257, "learning_rate": 0.002, "loss": 2.3387, "step": 166260 }, { "epoch": 0.6427533206537707, "grad_norm": 0.10712134838104248, "learning_rate": 0.002, "loss": 2.3366, "step": 166270 }, { "epoch": 0.6427919778571539, "grad_norm": 0.11020371317863464, "learning_rate": 0.002, "loss": 2.3362, "step": 166280 }, { "epoch": 0.6428306350605372, "grad_norm": 0.11662837862968445, "learning_rate": 0.002, "loss": 2.3292, "step": 166290 }, { "epoch": 0.6428692922639204, "grad_norm": 0.10071861743927002, "learning_rate": 0.002, "loss": 2.34, "step": 166300 }, { "epoch": 0.6429079494673038, "grad_norm": 0.11239639669656754, "learning_rate": 0.002, "loss": 2.335, "step": 166310 }, { "epoch": 0.642946606670687, "grad_norm": 0.1093963086605072, "learning_rate": 0.002, "loss": 2.3264, "step": 166320 }, { "epoch": 0.6429852638740703, "grad_norm": 0.1184404194355011, "learning_rate": 0.002, "loss": 2.3466, "step": 166330 }, { "epoch": 0.6430239210774535, "grad_norm": 0.11267964541912079, "learning_rate": 0.002, "loss": 2.334, "step": 166340 }, { "epoch": 0.6430625782808369, "grad_norm": 0.10850871354341507, "learning_rate": 0.002, "loss": 2.3393, "step": 166350 }, { "epoch": 0.6431012354842202, "grad_norm": 0.10646646469831467, "learning_rate": 0.002, "loss": 2.3515, "step": 166360 }, { "epoch": 0.6431398926876034, "grad_norm": 0.10959392786026001, "learning_rate": 0.002, "loss": 2.3325, "step": 166370 }, { "epoch": 0.6431785498909867, "grad_norm": 0.10171080380678177, "learning_rate": 0.002, "loss": 2.3334, "step": 166380 }, { "epoch": 0.6432172070943699, "grad_norm": 0.12380674481391907, "learning_rate": 0.002, "loss": 2.3286, "step": 166390 }, { "epoch": 0.6432558642977533, "grad_norm": 0.11397700756788254, "learning_rate": 0.002, "loss": 2.3396, "step": 166400 }, { "epoch": 0.6432945215011365, "grad_norm": 0.12687024474143982, "learning_rate": 0.002, "loss": 2.3314, "step": 166410 }, { "epoch": 0.6433331787045198, "grad_norm": 0.11587760597467422, "learning_rate": 0.002, "loss": 2.3423, "step": 166420 }, { "epoch": 0.643371835907903, "grad_norm": 0.09550909698009491, "learning_rate": 0.002, "loss": 2.3332, "step": 166430 }, { "epoch": 0.6434104931112864, "grad_norm": 0.10184428095817566, "learning_rate": 0.002, "loss": 2.3429, "step": 166440 }, { "epoch": 0.6434491503146696, "grad_norm": 0.09556592255830765, "learning_rate": 0.002, "loss": 2.3502, "step": 166450 }, { "epoch": 0.6434878075180529, "grad_norm": 0.10573387145996094, "learning_rate": 0.002, "loss": 2.3453, "step": 166460 }, { "epoch": 0.6435264647214362, "grad_norm": 0.10674691945314407, "learning_rate": 0.002, "loss": 2.3358, "step": 166470 }, { "epoch": 0.6435651219248195, "grad_norm": 0.12763813138008118, "learning_rate": 0.002, "loss": 2.3495, "step": 166480 }, { "epoch": 0.6436037791282028, "grad_norm": 0.10722203552722931, "learning_rate": 0.002, "loss": 2.3544, "step": 166490 }, { "epoch": 0.643642436331586, "grad_norm": 0.10920672863721848, "learning_rate": 0.002, "loss": 2.3457, "step": 166500 }, { "epoch": 0.6436810935349693, "grad_norm": 0.09678395837545395, "learning_rate": 0.002, "loss": 2.3458, "step": 166510 }, { "epoch": 0.6437197507383526, "grad_norm": 0.10371027141809464, "learning_rate": 0.002, "loss": 2.3503, "step": 166520 }, { "epoch": 0.6437584079417359, "grad_norm": 0.09537974745035172, "learning_rate": 0.002, "loss": 2.3499, "step": 166530 }, { "epoch": 0.6437970651451191, "grad_norm": 0.12189342826604843, "learning_rate": 0.002, "loss": 2.3345, "step": 166540 }, { "epoch": 0.6438357223485024, "grad_norm": 0.0994722917675972, "learning_rate": 0.002, "loss": 2.3417, "step": 166550 }, { "epoch": 0.6438743795518856, "grad_norm": 0.1094537302851677, "learning_rate": 0.002, "loss": 2.3592, "step": 166560 }, { "epoch": 0.643913036755269, "grad_norm": 0.1033482551574707, "learning_rate": 0.002, "loss": 2.349, "step": 166570 }, { "epoch": 0.6439516939586523, "grad_norm": 0.08849822729825974, "learning_rate": 0.002, "loss": 2.3384, "step": 166580 }, { "epoch": 0.6439903511620355, "grad_norm": 0.11044814437627792, "learning_rate": 0.002, "loss": 2.3518, "step": 166590 }, { "epoch": 0.6440290083654188, "grad_norm": 0.11067377775907516, "learning_rate": 0.002, "loss": 2.333, "step": 166600 }, { "epoch": 0.6440676655688021, "grad_norm": 0.10998781770467758, "learning_rate": 0.002, "loss": 2.3406, "step": 166610 }, { "epoch": 0.6441063227721854, "grad_norm": 0.10315155982971191, "learning_rate": 0.002, "loss": 2.3326, "step": 166620 }, { "epoch": 0.6441449799755686, "grad_norm": 0.09741552919149399, "learning_rate": 0.002, "loss": 2.3451, "step": 166630 }, { "epoch": 0.6441836371789519, "grad_norm": 0.14576038718223572, "learning_rate": 0.002, "loss": 2.3488, "step": 166640 }, { "epoch": 0.6442222943823352, "grad_norm": 0.09207435697317123, "learning_rate": 0.002, "loss": 2.3473, "step": 166650 }, { "epoch": 0.6442609515857185, "grad_norm": 0.10076536238193512, "learning_rate": 0.002, "loss": 2.336, "step": 166660 }, { "epoch": 0.6442996087891018, "grad_norm": 0.11621806025505066, "learning_rate": 0.002, "loss": 2.3398, "step": 166670 }, { "epoch": 0.644338265992485, "grad_norm": 0.11521206051111221, "learning_rate": 0.002, "loss": 2.3632, "step": 166680 }, { "epoch": 0.6443769231958684, "grad_norm": 0.10904458165168762, "learning_rate": 0.002, "loss": 2.3447, "step": 166690 }, { "epoch": 0.6444155803992516, "grad_norm": 0.14390254020690918, "learning_rate": 0.002, "loss": 2.3502, "step": 166700 }, { "epoch": 0.6444542376026349, "grad_norm": 0.1041063517332077, "learning_rate": 0.002, "loss": 2.3469, "step": 166710 }, { "epoch": 0.6444928948060181, "grad_norm": 0.10243967920541763, "learning_rate": 0.002, "loss": 2.3517, "step": 166720 }, { "epoch": 0.6445315520094015, "grad_norm": 0.10894256830215454, "learning_rate": 0.002, "loss": 2.3499, "step": 166730 }, { "epoch": 0.6445702092127847, "grad_norm": 0.14673057198524475, "learning_rate": 0.002, "loss": 2.3496, "step": 166740 }, { "epoch": 0.644608866416168, "grad_norm": 0.09796229004859924, "learning_rate": 0.002, "loss": 2.3406, "step": 166750 }, { "epoch": 0.6446475236195512, "grad_norm": 0.09875568747520447, "learning_rate": 0.002, "loss": 2.3461, "step": 166760 }, { "epoch": 0.6446861808229345, "grad_norm": 0.09696167707443237, "learning_rate": 0.002, "loss": 2.3398, "step": 166770 }, { "epoch": 0.6447248380263179, "grad_norm": 0.12166699767112732, "learning_rate": 0.002, "loss": 2.3525, "step": 166780 }, { "epoch": 0.6447634952297011, "grad_norm": 0.12227968871593475, "learning_rate": 0.002, "loss": 2.3291, "step": 166790 }, { "epoch": 0.6448021524330844, "grad_norm": 0.11464644968509674, "learning_rate": 0.002, "loss": 2.3556, "step": 166800 }, { "epoch": 0.6448408096364676, "grad_norm": 0.14891226589679718, "learning_rate": 0.002, "loss": 2.3471, "step": 166810 }, { "epoch": 0.644879466839851, "grad_norm": 0.1151174008846283, "learning_rate": 0.002, "loss": 2.3513, "step": 166820 }, { "epoch": 0.6449181240432342, "grad_norm": 0.09968895465135574, "learning_rate": 0.002, "loss": 2.3463, "step": 166830 }, { "epoch": 0.6449567812466175, "grad_norm": 0.12719690799713135, "learning_rate": 0.002, "loss": 2.34, "step": 166840 }, { "epoch": 0.6449954384500007, "grad_norm": 0.1118369996547699, "learning_rate": 0.002, "loss": 2.3508, "step": 166850 }, { "epoch": 0.6450340956533841, "grad_norm": 0.15037807822227478, "learning_rate": 0.002, "loss": 2.3547, "step": 166860 }, { "epoch": 0.6450727528567674, "grad_norm": 0.09736327826976776, "learning_rate": 0.002, "loss": 2.345, "step": 166870 }, { "epoch": 0.6451114100601506, "grad_norm": 0.14185789227485657, "learning_rate": 0.002, "loss": 2.3471, "step": 166880 }, { "epoch": 0.6451500672635339, "grad_norm": 0.10070722550153732, "learning_rate": 0.002, "loss": 2.341, "step": 166890 }, { "epoch": 0.6451887244669172, "grad_norm": 0.09284228831529617, "learning_rate": 0.002, "loss": 2.3541, "step": 166900 }, { "epoch": 0.6452273816703005, "grad_norm": 0.12558911740779877, "learning_rate": 0.002, "loss": 2.37, "step": 166910 }, { "epoch": 0.6452660388736837, "grad_norm": 0.10748089849948883, "learning_rate": 0.002, "loss": 2.3508, "step": 166920 }, { "epoch": 0.645304696077067, "grad_norm": 0.09128913283348083, "learning_rate": 0.002, "loss": 2.343, "step": 166930 }, { "epoch": 0.6453433532804502, "grad_norm": 0.09370754659175873, "learning_rate": 0.002, "loss": 2.338, "step": 166940 }, { "epoch": 0.6453820104838336, "grad_norm": 0.10552221536636353, "learning_rate": 0.002, "loss": 2.3393, "step": 166950 }, { "epoch": 0.6454206676872168, "grad_norm": 0.10560479760169983, "learning_rate": 0.002, "loss": 2.3415, "step": 166960 }, { "epoch": 0.6454593248906001, "grad_norm": 0.12228506058454514, "learning_rate": 0.002, "loss": 2.343, "step": 166970 }, { "epoch": 0.6454979820939833, "grad_norm": 0.10752666741609573, "learning_rate": 0.002, "loss": 2.3456, "step": 166980 }, { "epoch": 0.6455366392973667, "grad_norm": 0.13183359801769257, "learning_rate": 0.002, "loss": 2.3538, "step": 166990 }, { "epoch": 0.64557529650075, "grad_norm": 0.09106297045946121, "learning_rate": 0.002, "loss": 2.3479, "step": 167000 }, { "epoch": 0.6456139537041332, "grad_norm": 0.09969896823167801, "learning_rate": 0.002, "loss": 2.3386, "step": 167010 }, { "epoch": 0.6456526109075165, "grad_norm": 0.10386525094509125, "learning_rate": 0.002, "loss": 2.3498, "step": 167020 }, { "epoch": 0.6456912681108998, "grad_norm": 0.1330537647008896, "learning_rate": 0.002, "loss": 2.3598, "step": 167030 }, { "epoch": 0.6457299253142831, "grad_norm": 0.1207583099603653, "learning_rate": 0.002, "loss": 2.3302, "step": 167040 }, { "epoch": 0.6457685825176663, "grad_norm": 0.11657993495464325, "learning_rate": 0.002, "loss": 2.3423, "step": 167050 }, { "epoch": 0.6458072397210496, "grad_norm": 0.1179221123456955, "learning_rate": 0.002, "loss": 2.3447, "step": 167060 }, { "epoch": 0.645845896924433, "grad_norm": 0.11940892040729523, "learning_rate": 0.002, "loss": 2.3293, "step": 167070 }, { "epoch": 0.6458845541278162, "grad_norm": 0.10305957496166229, "learning_rate": 0.002, "loss": 2.3489, "step": 167080 }, { "epoch": 0.6459232113311995, "grad_norm": 0.09387367963790894, "learning_rate": 0.002, "loss": 2.3434, "step": 167090 }, { "epoch": 0.6459618685345827, "grad_norm": 0.09949074685573578, "learning_rate": 0.002, "loss": 2.3571, "step": 167100 }, { "epoch": 0.646000525737966, "grad_norm": 0.11801401525735855, "learning_rate": 0.002, "loss": 2.3231, "step": 167110 }, { "epoch": 0.6460391829413493, "grad_norm": 0.11283881217241287, "learning_rate": 0.002, "loss": 2.3373, "step": 167120 }, { "epoch": 0.6460778401447326, "grad_norm": 0.10136590152978897, "learning_rate": 0.002, "loss": 2.3384, "step": 167130 }, { "epoch": 0.6461164973481158, "grad_norm": 0.09200512617826462, "learning_rate": 0.002, "loss": 2.3528, "step": 167140 }, { "epoch": 0.6461551545514991, "grad_norm": 0.10584022849798203, "learning_rate": 0.002, "loss": 2.3445, "step": 167150 }, { "epoch": 0.6461938117548824, "grad_norm": 0.11500915884971619, "learning_rate": 0.002, "loss": 2.3668, "step": 167160 }, { "epoch": 0.6462324689582657, "grad_norm": 0.11622696369886398, "learning_rate": 0.002, "loss": 2.3387, "step": 167170 }, { "epoch": 0.646271126161649, "grad_norm": 0.1187836229801178, "learning_rate": 0.002, "loss": 2.3422, "step": 167180 }, { "epoch": 0.6463097833650322, "grad_norm": 0.12957388162612915, "learning_rate": 0.002, "loss": 2.3471, "step": 167190 }, { "epoch": 0.6463484405684156, "grad_norm": 0.10623416304588318, "learning_rate": 0.002, "loss": 2.3547, "step": 167200 }, { "epoch": 0.6463870977717988, "grad_norm": 0.09525856375694275, "learning_rate": 0.002, "loss": 2.341, "step": 167210 }, { "epoch": 0.6464257549751821, "grad_norm": 0.09404592216014862, "learning_rate": 0.002, "loss": 2.3422, "step": 167220 }, { "epoch": 0.6464644121785653, "grad_norm": 0.11730389297008514, "learning_rate": 0.002, "loss": 2.342, "step": 167230 }, { "epoch": 0.6465030693819487, "grad_norm": 0.1096835732460022, "learning_rate": 0.002, "loss": 2.3445, "step": 167240 }, { "epoch": 0.6465417265853319, "grad_norm": 0.11142761260271072, "learning_rate": 0.002, "loss": 2.347, "step": 167250 }, { "epoch": 0.6465803837887152, "grad_norm": 0.09524443000555038, "learning_rate": 0.002, "loss": 2.3424, "step": 167260 }, { "epoch": 0.6466190409920984, "grad_norm": 0.09275458008050919, "learning_rate": 0.002, "loss": 2.3356, "step": 167270 }, { "epoch": 0.6466576981954818, "grad_norm": 0.11118104308843613, "learning_rate": 0.002, "loss": 2.3353, "step": 167280 }, { "epoch": 0.646696355398865, "grad_norm": 0.10477935522794724, "learning_rate": 0.002, "loss": 2.3565, "step": 167290 }, { "epoch": 0.6467350126022483, "grad_norm": 0.11114238947629929, "learning_rate": 0.002, "loss": 2.3344, "step": 167300 }, { "epoch": 0.6467736698056316, "grad_norm": 0.09935715049505234, "learning_rate": 0.002, "loss": 2.3436, "step": 167310 }, { "epoch": 0.6468123270090148, "grad_norm": 0.09258892387151718, "learning_rate": 0.002, "loss": 2.3454, "step": 167320 }, { "epoch": 0.6468509842123982, "grad_norm": 0.10509154200553894, "learning_rate": 0.002, "loss": 2.3425, "step": 167330 }, { "epoch": 0.6468896414157814, "grad_norm": 0.11609136313199997, "learning_rate": 0.002, "loss": 2.3367, "step": 167340 }, { "epoch": 0.6469282986191647, "grad_norm": 0.10558400303125381, "learning_rate": 0.002, "loss": 2.3471, "step": 167350 }, { "epoch": 0.6469669558225479, "grad_norm": 0.0904054194688797, "learning_rate": 0.002, "loss": 2.3338, "step": 167360 }, { "epoch": 0.6470056130259313, "grad_norm": 0.1114594042301178, "learning_rate": 0.002, "loss": 2.3319, "step": 167370 }, { "epoch": 0.6470442702293145, "grad_norm": 0.09686373919248581, "learning_rate": 0.002, "loss": 2.3537, "step": 167380 }, { "epoch": 0.6470829274326978, "grad_norm": 0.1125197783112526, "learning_rate": 0.002, "loss": 2.3564, "step": 167390 }, { "epoch": 0.647121584636081, "grad_norm": 0.09414125978946686, "learning_rate": 0.002, "loss": 2.3669, "step": 167400 }, { "epoch": 0.6471602418394644, "grad_norm": 0.10086499899625778, "learning_rate": 0.002, "loss": 2.3295, "step": 167410 }, { "epoch": 0.6471988990428477, "grad_norm": 0.1413799226284027, "learning_rate": 0.002, "loss": 2.3446, "step": 167420 }, { "epoch": 0.6472375562462309, "grad_norm": 0.11475475877523422, "learning_rate": 0.002, "loss": 2.3527, "step": 167430 }, { "epoch": 0.6472762134496142, "grad_norm": 0.10616303235292435, "learning_rate": 0.002, "loss": 2.3441, "step": 167440 }, { "epoch": 0.6473148706529975, "grad_norm": 0.10366956889629364, "learning_rate": 0.002, "loss": 2.3367, "step": 167450 }, { "epoch": 0.6473535278563808, "grad_norm": 0.10474330931901932, "learning_rate": 0.002, "loss": 2.3348, "step": 167460 }, { "epoch": 0.647392185059764, "grad_norm": 0.10938055068254471, "learning_rate": 0.002, "loss": 2.3418, "step": 167470 }, { "epoch": 0.6474308422631473, "grad_norm": 0.0967249721288681, "learning_rate": 0.002, "loss": 2.3516, "step": 167480 }, { "epoch": 0.6474694994665305, "grad_norm": 0.11261042207479477, "learning_rate": 0.002, "loss": 2.3455, "step": 167490 }, { "epoch": 0.6475081566699139, "grad_norm": 0.10691956430673599, "learning_rate": 0.002, "loss": 2.3321, "step": 167500 }, { "epoch": 0.6475468138732972, "grad_norm": 0.11203692853450775, "learning_rate": 0.002, "loss": 2.362, "step": 167510 }, { "epoch": 0.6475854710766804, "grad_norm": 0.09567520767450333, "learning_rate": 0.002, "loss": 2.3518, "step": 167520 }, { "epoch": 0.6476241282800637, "grad_norm": 0.12334605306386948, "learning_rate": 0.002, "loss": 2.3445, "step": 167530 }, { "epoch": 0.647662785483447, "grad_norm": 0.11762816458940506, "learning_rate": 0.002, "loss": 2.3607, "step": 167540 }, { "epoch": 0.6477014426868303, "grad_norm": 0.09287743270397186, "learning_rate": 0.002, "loss": 2.3572, "step": 167550 }, { "epoch": 0.6477400998902135, "grad_norm": 0.1160036027431488, "learning_rate": 0.002, "loss": 2.3479, "step": 167560 }, { "epoch": 0.6477787570935968, "grad_norm": 0.1122221052646637, "learning_rate": 0.002, "loss": 2.3455, "step": 167570 }, { "epoch": 0.6478174142969801, "grad_norm": 0.11325201392173767, "learning_rate": 0.002, "loss": 2.3351, "step": 167580 }, { "epoch": 0.6478560715003634, "grad_norm": 0.09645060449838638, "learning_rate": 0.002, "loss": 2.3323, "step": 167590 }, { "epoch": 0.6478947287037466, "grad_norm": 0.10355105996131897, "learning_rate": 0.002, "loss": 2.3532, "step": 167600 }, { "epoch": 0.6479333859071299, "grad_norm": 0.09704997390508652, "learning_rate": 0.002, "loss": 2.3322, "step": 167610 }, { "epoch": 0.6479720431105133, "grad_norm": 0.10785232484340668, "learning_rate": 0.002, "loss": 2.3238, "step": 167620 }, { "epoch": 0.6480107003138965, "grad_norm": 0.1302575320005417, "learning_rate": 0.002, "loss": 2.3644, "step": 167630 }, { "epoch": 0.6480493575172798, "grad_norm": 0.09805729240179062, "learning_rate": 0.002, "loss": 2.3188, "step": 167640 }, { "epoch": 0.648088014720663, "grad_norm": 0.10799106955528259, "learning_rate": 0.002, "loss": 2.3398, "step": 167650 }, { "epoch": 0.6481266719240463, "grad_norm": 0.11254876106977463, "learning_rate": 0.002, "loss": 2.3297, "step": 167660 }, { "epoch": 0.6481653291274296, "grad_norm": 0.10297144204378128, "learning_rate": 0.002, "loss": 2.3565, "step": 167670 }, { "epoch": 0.6482039863308129, "grad_norm": 0.09445956349372864, "learning_rate": 0.002, "loss": 2.3487, "step": 167680 }, { "epoch": 0.6482426435341961, "grad_norm": 0.11523937433958054, "learning_rate": 0.002, "loss": 2.3632, "step": 167690 }, { "epoch": 0.6482813007375794, "grad_norm": 0.12582455575466156, "learning_rate": 0.002, "loss": 2.3498, "step": 167700 }, { "epoch": 0.6483199579409628, "grad_norm": 0.11468572169542313, "learning_rate": 0.002, "loss": 2.3307, "step": 167710 }, { "epoch": 0.648358615144346, "grad_norm": 0.09803872555494308, "learning_rate": 0.002, "loss": 2.3361, "step": 167720 }, { "epoch": 0.6483972723477293, "grad_norm": 0.09609941393136978, "learning_rate": 0.002, "loss": 2.3441, "step": 167730 }, { "epoch": 0.6484359295511125, "grad_norm": 0.10531718283891678, "learning_rate": 0.002, "loss": 2.3444, "step": 167740 }, { "epoch": 0.6484745867544959, "grad_norm": 0.1036820337176323, "learning_rate": 0.002, "loss": 2.3462, "step": 167750 }, { "epoch": 0.6485132439578791, "grad_norm": 0.09775065630674362, "learning_rate": 0.002, "loss": 2.3422, "step": 167760 }, { "epoch": 0.6485519011612624, "grad_norm": 0.10095041245222092, "learning_rate": 0.002, "loss": 2.3538, "step": 167770 }, { "epoch": 0.6485905583646456, "grad_norm": 0.10327846556901932, "learning_rate": 0.002, "loss": 2.3394, "step": 167780 }, { "epoch": 0.648629215568029, "grad_norm": 0.2384766787290573, "learning_rate": 0.002, "loss": 2.3333, "step": 167790 }, { "epoch": 0.6486678727714122, "grad_norm": 0.09711385518312454, "learning_rate": 0.002, "loss": 2.3446, "step": 167800 }, { "epoch": 0.6487065299747955, "grad_norm": 0.1092362031340599, "learning_rate": 0.002, "loss": 2.3391, "step": 167810 }, { "epoch": 0.6487451871781788, "grad_norm": 0.11628155410289764, "learning_rate": 0.002, "loss": 2.3432, "step": 167820 }, { "epoch": 0.6487838443815621, "grad_norm": 0.10958966612815857, "learning_rate": 0.002, "loss": 2.3332, "step": 167830 }, { "epoch": 0.6488225015849454, "grad_norm": 0.11430975794792175, "learning_rate": 0.002, "loss": 2.3339, "step": 167840 }, { "epoch": 0.6488611587883286, "grad_norm": 0.08860432356595993, "learning_rate": 0.002, "loss": 2.3409, "step": 167850 }, { "epoch": 0.6488998159917119, "grad_norm": 0.10471168160438538, "learning_rate": 0.002, "loss": 2.3368, "step": 167860 }, { "epoch": 0.6489384731950951, "grad_norm": 0.10139449685811996, "learning_rate": 0.002, "loss": 2.3377, "step": 167870 }, { "epoch": 0.6489771303984785, "grad_norm": 0.10843594372272491, "learning_rate": 0.002, "loss": 2.3393, "step": 167880 }, { "epoch": 0.6490157876018617, "grad_norm": 0.10248102247714996, "learning_rate": 0.002, "loss": 2.3468, "step": 167890 }, { "epoch": 0.649054444805245, "grad_norm": 0.08800366520881653, "learning_rate": 0.002, "loss": 2.3405, "step": 167900 }, { "epoch": 0.6490931020086282, "grad_norm": 0.1202792376279831, "learning_rate": 0.002, "loss": 2.3475, "step": 167910 }, { "epoch": 0.6491317592120116, "grad_norm": 0.11822064220905304, "learning_rate": 0.002, "loss": 2.3349, "step": 167920 }, { "epoch": 0.6491704164153949, "grad_norm": 0.09871813654899597, "learning_rate": 0.002, "loss": 2.3326, "step": 167930 }, { "epoch": 0.6492090736187781, "grad_norm": 0.09461401402950287, "learning_rate": 0.002, "loss": 2.344, "step": 167940 }, { "epoch": 0.6492477308221614, "grad_norm": 0.1045580804347992, "learning_rate": 0.002, "loss": 2.3425, "step": 167950 }, { "epoch": 0.6492863880255447, "grad_norm": 0.14083321392536163, "learning_rate": 0.002, "loss": 2.3456, "step": 167960 }, { "epoch": 0.649325045228928, "grad_norm": 0.1170283779501915, "learning_rate": 0.002, "loss": 2.3436, "step": 167970 }, { "epoch": 0.6493637024323112, "grad_norm": 0.1271386593580246, "learning_rate": 0.002, "loss": 2.3291, "step": 167980 }, { "epoch": 0.6494023596356945, "grad_norm": 0.12174142152070999, "learning_rate": 0.002, "loss": 2.349, "step": 167990 }, { "epoch": 0.6494410168390778, "grad_norm": 0.09934654831886292, "learning_rate": 0.002, "loss": 2.3446, "step": 168000 }, { "epoch": 0.6494796740424611, "grad_norm": 0.08649374544620514, "learning_rate": 0.002, "loss": 2.353, "step": 168010 }, { "epoch": 0.6495183312458443, "grad_norm": 0.1037168875336647, "learning_rate": 0.002, "loss": 2.3519, "step": 168020 }, { "epoch": 0.6495569884492276, "grad_norm": 0.10774770379066467, "learning_rate": 0.002, "loss": 2.3376, "step": 168030 }, { "epoch": 0.6495956456526109, "grad_norm": 0.10287889838218689, "learning_rate": 0.002, "loss": 2.3517, "step": 168040 }, { "epoch": 0.6496343028559942, "grad_norm": 0.09221872687339783, "learning_rate": 0.002, "loss": 2.3496, "step": 168050 }, { "epoch": 0.6496729600593775, "grad_norm": 0.11805696040391922, "learning_rate": 0.002, "loss": 2.336, "step": 168060 }, { "epoch": 0.6497116172627607, "grad_norm": 0.10697631537914276, "learning_rate": 0.002, "loss": 2.3313, "step": 168070 }, { "epoch": 0.649750274466144, "grad_norm": 0.11242430657148361, "learning_rate": 0.002, "loss": 2.3471, "step": 168080 }, { "epoch": 0.6497889316695273, "grad_norm": 0.11488054692745209, "learning_rate": 0.002, "loss": 2.3381, "step": 168090 }, { "epoch": 0.6498275888729106, "grad_norm": 0.11183663457632065, "learning_rate": 0.002, "loss": 2.3477, "step": 168100 }, { "epoch": 0.6498662460762938, "grad_norm": 0.10771051794290543, "learning_rate": 0.002, "loss": 2.3475, "step": 168110 }, { "epoch": 0.6499049032796771, "grad_norm": 0.1694340854883194, "learning_rate": 0.002, "loss": 2.3371, "step": 168120 }, { "epoch": 0.6499435604830605, "grad_norm": 0.10719171166419983, "learning_rate": 0.002, "loss": 2.3403, "step": 168130 }, { "epoch": 0.6499822176864437, "grad_norm": 0.09480391442775726, "learning_rate": 0.002, "loss": 2.3316, "step": 168140 }, { "epoch": 0.650020874889827, "grad_norm": 0.14159810543060303, "learning_rate": 0.002, "loss": 2.348, "step": 168150 }, { "epoch": 0.6500595320932102, "grad_norm": 0.10143940150737762, "learning_rate": 0.002, "loss": 2.3297, "step": 168160 }, { "epoch": 0.6500981892965936, "grad_norm": 0.11819176375865936, "learning_rate": 0.002, "loss": 2.3304, "step": 168170 }, { "epoch": 0.6501368464999768, "grad_norm": 0.11603454500436783, "learning_rate": 0.002, "loss": 2.346, "step": 168180 }, { "epoch": 0.6501755037033601, "grad_norm": 0.12313628941774368, "learning_rate": 0.002, "loss": 2.3427, "step": 168190 }, { "epoch": 0.6502141609067433, "grad_norm": 0.09967542439699173, "learning_rate": 0.002, "loss": 2.3279, "step": 168200 }, { "epoch": 0.6502528181101267, "grad_norm": 0.10996179282665253, "learning_rate": 0.002, "loss": 2.3523, "step": 168210 }, { "epoch": 0.65029147531351, "grad_norm": 0.099667027592659, "learning_rate": 0.002, "loss": 2.3517, "step": 168220 }, { "epoch": 0.6503301325168932, "grad_norm": 0.14640873670578003, "learning_rate": 0.002, "loss": 2.3611, "step": 168230 }, { "epoch": 0.6503687897202765, "grad_norm": 0.10246577858924866, "learning_rate": 0.002, "loss": 2.3318, "step": 168240 }, { "epoch": 0.6504074469236597, "grad_norm": 0.09082327038049698, "learning_rate": 0.002, "loss": 2.3493, "step": 168250 }, { "epoch": 0.6504461041270431, "grad_norm": 0.1145891472697258, "learning_rate": 0.002, "loss": 2.3484, "step": 168260 }, { "epoch": 0.6504847613304263, "grad_norm": 0.10838694870471954, "learning_rate": 0.002, "loss": 2.3284, "step": 168270 }, { "epoch": 0.6505234185338096, "grad_norm": 0.12250230461359024, "learning_rate": 0.002, "loss": 2.3344, "step": 168280 }, { "epoch": 0.6505620757371928, "grad_norm": 0.09471901506185532, "learning_rate": 0.002, "loss": 2.3435, "step": 168290 }, { "epoch": 0.6506007329405762, "grad_norm": 0.1212347075343132, "learning_rate": 0.002, "loss": 2.3364, "step": 168300 }, { "epoch": 0.6506393901439594, "grad_norm": 0.09806258231401443, "learning_rate": 0.002, "loss": 2.3305, "step": 168310 }, { "epoch": 0.6506780473473427, "grad_norm": 0.10392429679632187, "learning_rate": 0.002, "loss": 2.3424, "step": 168320 }, { "epoch": 0.6507167045507259, "grad_norm": 0.10079951584339142, "learning_rate": 0.002, "loss": 2.3315, "step": 168330 }, { "epoch": 0.6507553617541093, "grad_norm": 0.10576821863651276, "learning_rate": 0.002, "loss": 2.3421, "step": 168340 }, { "epoch": 0.6507940189574926, "grad_norm": 0.09604281932115555, "learning_rate": 0.002, "loss": 2.3481, "step": 168350 }, { "epoch": 0.6508326761608758, "grad_norm": 0.12223909050226212, "learning_rate": 0.002, "loss": 2.3419, "step": 168360 }, { "epoch": 0.6508713333642591, "grad_norm": 0.1080576702952385, "learning_rate": 0.002, "loss": 2.345, "step": 168370 }, { "epoch": 0.6509099905676424, "grad_norm": 0.10412755608558655, "learning_rate": 0.002, "loss": 2.3406, "step": 168380 }, { "epoch": 0.6509486477710257, "grad_norm": 0.09832601994276047, "learning_rate": 0.002, "loss": 2.3446, "step": 168390 }, { "epoch": 0.6509873049744089, "grad_norm": 0.09842384606599808, "learning_rate": 0.002, "loss": 2.3516, "step": 168400 }, { "epoch": 0.6510259621777922, "grad_norm": 0.11502683907747269, "learning_rate": 0.002, "loss": 2.3499, "step": 168410 }, { "epoch": 0.6510646193811754, "grad_norm": 0.11557365208864212, "learning_rate": 0.002, "loss": 2.3485, "step": 168420 }, { "epoch": 0.6511032765845588, "grad_norm": 0.124544657766819, "learning_rate": 0.002, "loss": 2.3509, "step": 168430 }, { "epoch": 0.651141933787942, "grad_norm": 0.10851182788610458, "learning_rate": 0.002, "loss": 2.3537, "step": 168440 }, { "epoch": 0.6511805909913253, "grad_norm": 0.10926005244255066, "learning_rate": 0.002, "loss": 2.341, "step": 168450 }, { "epoch": 0.6512192481947086, "grad_norm": 0.11285897344350815, "learning_rate": 0.002, "loss": 2.3494, "step": 168460 }, { "epoch": 0.6512579053980919, "grad_norm": 0.1205969750881195, "learning_rate": 0.002, "loss": 2.332, "step": 168470 }, { "epoch": 0.6512965626014752, "grad_norm": 0.09737059473991394, "learning_rate": 0.002, "loss": 2.3429, "step": 168480 }, { "epoch": 0.6513352198048584, "grad_norm": 0.10460642725229263, "learning_rate": 0.002, "loss": 2.3525, "step": 168490 }, { "epoch": 0.6513738770082417, "grad_norm": 0.10728586465120316, "learning_rate": 0.002, "loss": 2.3503, "step": 168500 }, { "epoch": 0.651412534211625, "grad_norm": 0.11106391996145248, "learning_rate": 0.002, "loss": 2.3675, "step": 168510 }, { "epoch": 0.6514511914150083, "grad_norm": 0.0917786955833435, "learning_rate": 0.002, "loss": 2.3461, "step": 168520 }, { "epoch": 0.6514898486183915, "grad_norm": 0.10599620640277863, "learning_rate": 0.002, "loss": 2.3328, "step": 168530 }, { "epoch": 0.6515285058217748, "grad_norm": 0.10357921570539474, "learning_rate": 0.002, "loss": 2.3453, "step": 168540 }, { "epoch": 0.6515671630251582, "grad_norm": 0.13063235580921173, "learning_rate": 0.002, "loss": 2.3429, "step": 168550 }, { "epoch": 0.6516058202285414, "grad_norm": 0.23453116416931152, "learning_rate": 0.002, "loss": 2.3373, "step": 168560 }, { "epoch": 0.6516444774319247, "grad_norm": 0.11451299488544464, "learning_rate": 0.002, "loss": 2.3438, "step": 168570 }, { "epoch": 0.6516831346353079, "grad_norm": 0.10291838645935059, "learning_rate": 0.002, "loss": 2.3457, "step": 168580 }, { "epoch": 0.6517217918386912, "grad_norm": 0.11585838347673416, "learning_rate": 0.002, "loss": 2.3604, "step": 168590 }, { "epoch": 0.6517604490420745, "grad_norm": 0.0969030112028122, "learning_rate": 0.002, "loss": 2.3401, "step": 168600 }, { "epoch": 0.6517991062454578, "grad_norm": 0.15430507063865662, "learning_rate": 0.002, "loss": 2.3305, "step": 168610 }, { "epoch": 0.651837763448841, "grad_norm": 0.12934225797653198, "learning_rate": 0.002, "loss": 2.3465, "step": 168620 }, { "epoch": 0.6518764206522243, "grad_norm": 0.1302899420261383, "learning_rate": 0.002, "loss": 2.3366, "step": 168630 }, { "epoch": 0.6519150778556076, "grad_norm": 0.0999523252248764, "learning_rate": 0.002, "loss": 2.3399, "step": 168640 }, { "epoch": 0.6519537350589909, "grad_norm": 0.1182340458035469, "learning_rate": 0.002, "loss": 2.3449, "step": 168650 }, { "epoch": 0.6519923922623742, "grad_norm": 0.11040201783180237, "learning_rate": 0.002, "loss": 2.3482, "step": 168660 }, { "epoch": 0.6520310494657574, "grad_norm": 0.11541494727134705, "learning_rate": 0.002, "loss": 2.3411, "step": 168670 }, { "epoch": 0.6520697066691408, "grad_norm": 0.12044411152601242, "learning_rate": 0.002, "loss": 2.343, "step": 168680 }, { "epoch": 0.652108363872524, "grad_norm": 0.09319040924310684, "learning_rate": 0.002, "loss": 2.3221, "step": 168690 }, { "epoch": 0.6521470210759073, "grad_norm": 0.10696112364530563, "learning_rate": 0.002, "loss": 2.3202, "step": 168700 }, { "epoch": 0.6521856782792905, "grad_norm": 0.09727698564529419, "learning_rate": 0.002, "loss": 2.324, "step": 168710 }, { "epoch": 0.6522243354826739, "grad_norm": 0.11124209314584732, "learning_rate": 0.002, "loss": 2.358, "step": 168720 }, { "epoch": 0.6522629926860571, "grad_norm": 0.10262446105480194, "learning_rate": 0.002, "loss": 2.3354, "step": 168730 }, { "epoch": 0.6523016498894404, "grad_norm": 0.11032116413116455, "learning_rate": 0.002, "loss": 2.3442, "step": 168740 }, { "epoch": 0.6523403070928236, "grad_norm": 0.125161811709404, "learning_rate": 0.002, "loss": 2.3383, "step": 168750 }, { "epoch": 0.652378964296207, "grad_norm": 0.09789597988128662, "learning_rate": 0.002, "loss": 2.3335, "step": 168760 }, { "epoch": 0.6524176214995903, "grad_norm": 0.09521053731441498, "learning_rate": 0.002, "loss": 2.3445, "step": 168770 }, { "epoch": 0.6524562787029735, "grad_norm": 0.13877107203006744, "learning_rate": 0.002, "loss": 2.3612, "step": 168780 }, { "epoch": 0.6524949359063568, "grad_norm": 0.1110723614692688, "learning_rate": 0.002, "loss": 2.3357, "step": 168790 }, { "epoch": 0.65253359310974, "grad_norm": 0.11743714660406113, "learning_rate": 0.002, "loss": 2.3444, "step": 168800 }, { "epoch": 0.6525722503131234, "grad_norm": 0.1045524850487709, "learning_rate": 0.002, "loss": 2.3424, "step": 168810 }, { "epoch": 0.6526109075165066, "grad_norm": 0.10068535804748535, "learning_rate": 0.002, "loss": 2.3404, "step": 168820 }, { "epoch": 0.6526495647198899, "grad_norm": 0.08899284899234772, "learning_rate": 0.002, "loss": 2.3365, "step": 168830 }, { "epoch": 0.6526882219232731, "grad_norm": 0.140655517578125, "learning_rate": 0.002, "loss": 2.3372, "step": 168840 }, { "epoch": 0.6527268791266565, "grad_norm": 0.10260647535324097, "learning_rate": 0.002, "loss": 2.3402, "step": 168850 }, { "epoch": 0.6527655363300398, "grad_norm": 0.10964271426200867, "learning_rate": 0.002, "loss": 2.344, "step": 168860 }, { "epoch": 0.652804193533423, "grad_norm": 0.11483105272054672, "learning_rate": 0.002, "loss": 2.3427, "step": 168870 }, { "epoch": 0.6528428507368063, "grad_norm": 0.11260164529085159, "learning_rate": 0.002, "loss": 2.3387, "step": 168880 }, { "epoch": 0.6528815079401896, "grad_norm": 0.11363021284341812, "learning_rate": 0.002, "loss": 2.3449, "step": 168890 }, { "epoch": 0.6529201651435729, "grad_norm": 0.10534073412418365, "learning_rate": 0.002, "loss": 2.3429, "step": 168900 }, { "epoch": 0.6529588223469561, "grad_norm": 0.12031086534261703, "learning_rate": 0.002, "loss": 2.3331, "step": 168910 }, { "epoch": 0.6529974795503394, "grad_norm": 0.10896068066358566, "learning_rate": 0.002, "loss": 2.3255, "step": 168920 }, { "epoch": 0.6530361367537227, "grad_norm": 0.1259516328573227, "learning_rate": 0.002, "loss": 2.3357, "step": 168930 }, { "epoch": 0.653074793957106, "grad_norm": 0.110020712018013, "learning_rate": 0.002, "loss": 2.3471, "step": 168940 }, { "epoch": 0.6531134511604892, "grad_norm": 0.14240184426307678, "learning_rate": 0.002, "loss": 2.3614, "step": 168950 }, { "epoch": 0.6531521083638725, "grad_norm": 0.0922495424747467, "learning_rate": 0.002, "loss": 2.3558, "step": 168960 }, { "epoch": 0.6531907655672557, "grad_norm": 0.08859805017709732, "learning_rate": 0.002, "loss": 2.3435, "step": 168970 }, { "epoch": 0.6532294227706391, "grad_norm": 0.09676842391490936, "learning_rate": 0.002, "loss": 2.3518, "step": 168980 }, { "epoch": 0.6532680799740224, "grad_norm": 0.11511459946632385, "learning_rate": 0.002, "loss": 2.3287, "step": 168990 }, { "epoch": 0.6533067371774056, "grad_norm": 0.09584780037403107, "learning_rate": 0.002, "loss": 2.3505, "step": 169000 }, { "epoch": 0.6533453943807889, "grad_norm": 0.1151767149567604, "learning_rate": 0.002, "loss": 2.333, "step": 169010 }, { "epoch": 0.6533840515841722, "grad_norm": 0.09524775296449661, "learning_rate": 0.002, "loss": 2.3439, "step": 169020 }, { "epoch": 0.6534227087875555, "grad_norm": 0.10495606064796448, "learning_rate": 0.002, "loss": 2.331, "step": 169030 }, { "epoch": 0.6534613659909387, "grad_norm": 0.09548933058977127, "learning_rate": 0.002, "loss": 2.3426, "step": 169040 }, { "epoch": 0.653500023194322, "grad_norm": 0.1085919663310051, "learning_rate": 0.002, "loss": 2.3415, "step": 169050 }, { "epoch": 0.6535386803977054, "grad_norm": 0.09393031895160675, "learning_rate": 0.002, "loss": 2.3302, "step": 169060 }, { "epoch": 0.6535773376010886, "grad_norm": 0.11786603927612305, "learning_rate": 0.002, "loss": 2.3384, "step": 169070 }, { "epoch": 0.6536159948044719, "grad_norm": 0.09170527011156082, "learning_rate": 0.002, "loss": 2.3544, "step": 169080 }, { "epoch": 0.6536546520078551, "grad_norm": 0.11640550196170807, "learning_rate": 0.002, "loss": 2.329, "step": 169090 }, { "epoch": 0.6536933092112385, "grad_norm": 0.10119421035051346, "learning_rate": 0.002, "loss": 2.3358, "step": 169100 }, { "epoch": 0.6537319664146217, "grad_norm": 0.1151321604847908, "learning_rate": 0.002, "loss": 2.342, "step": 169110 }, { "epoch": 0.653770623618005, "grad_norm": 0.11418864130973816, "learning_rate": 0.002, "loss": 2.3498, "step": 169120 }, { "epoch": 0.6538092808213882, "grad_norm": 0.10545522719621658, "learning_rate": 0.002, "loss": 2.3399, "step": 169130 }, { "epoch": 0.6538479380247716, "grad_norm": 0.09957364946603775, "learning_rate": 0.002, "loss": 2.341, "step": 169140 }, { "epoch": 0.6538865952281548, "grad_norm": 0.12598615884780884, "learning_rate": 0.002, "loss": 2.3401, "step": 169150 }, { "epoch": 0.6539252524315381, "grad_norm": 0.1004445031285286, "learning_rate": 0.002, "loss": 2.3412, "step": 169160 }, { "epoch": 0.6539639096349213, "grad_norm": 0.09190630912780762, "learning_rate": 0.002, "loss": 2.3565, "step": 169170 }, { "epoch": 0.6540025668383046, "grad_norm": 0.12551818788051605, "learning_rate": 0.002, "loss": 2.3507, "step": 169180 }, { "epoch": 0.654041224041688, "grad_norm": 0.09470973163843155, "learning_rate": 0.002, "loss": 2.3423, "step": 169190 }, { "epoch": 0.6540798812450712, "grad_norm": 0.11399231106042862, "learning_rate": 0.002, "loss": 2.3419, "step": 169200 }, { "epoch": 0.6541185384484545, "grad_norm": 0.09847266227006912, "learning_rate": 0.002, "loss": 2.3363, "step": 169210 }, { "epoch": 0.6541571956518377, "grad_norm": 0.1182786300778389, "learning_rate": 0.002, "loss": 2.3521, "step": 169220 }, { "epoch": 0.6541958528552211, "grad_norm": 0.11805164813995361, "learning_rate": 0.002, "loss": 2.3393, "step": 169230 }, { "epoch": 0.6542345100586043, "grad_norm": 0.11220698058605194, "learning_rate": 0.002, "loss": 2.3517, "step": 169240 }, { "epoch": 0.6542731672619876, "grad_norm": 0.11356706917285919, "learning_rate": 0.002, "loss": 2.3371, "step": 169250 }, { "epoch": 0.6543118244653708, "grad_norm": 0.09796137362718582, "learning_rate": 0.002, "loss": 2.3394, "step": 169260 }, { "epoch": 0.6543504816687542, "grad_norm": 0.11808452755212784, "learning_rate": 0.002, "loss": 2.3407, "step": 169270 }, { "epoch": 0.6543891388721375, "grad_norm": 0.17121228575706482, "learning_rate": 0.002, "loss": 2.3494, "step": 169280 }, { "epoch": 0.6544277960755207, "grad_norm": 0.1005435660481453, "learning_rate": 0.002, "loss": 2.3464, "step": 169290 }, { "epoch": 0.654466453278904, "grad_norm": 0.10762804001569748, "learning_rate": 0.002, "loss": 2.3488, "step": 169300 }, { "epoch": 0.6545051104822873, "grad_norm": 0.10820998251438141, "learning_rate": 0.002, "loss": 2.3517, "step": 169310 }, { "epoch": 0.6545437676856706, "grad_norm": 0.10625746846199036, "learning_rate": 0.002, "loss": 2.354, "step": 169320 }, { "epoch": 0.6545824248890538, "grad_norm": 0.09775619208812714, "learning_rate": 0.002, "loss": 2.3435, "step": 169330 }, { "epoch": 0.6546210820924371, "grad_norm": 0.09884735196828842, "learning_rate": 0.002, "loss": 2.3517, "step": 169340 }, { "epoch": 0.6546597392958203, "grad_norm": 0.11361068487167358, "learning_rate": 0.002, "loss": 2.359, "step": 169350 }, { "epoch": 0.6546983964992037, "grad_norm": 0.09546619653701782, "learning_rate": 0.002, "loss": 2.3183, "step": 169360 }, { "epoch": 0.654737053702587, "grad_norm": 0.503311038017273, "learning_rate": 0.002, "loss": 2.3435, "step": 169370 }, { "epoch": 0.6547757109059702, "grad_norm": 0.1377723515033722, "learning_rate": 0.002, "loss": 2.3442, "step": 169380 }, { "epoch": 0.6548143681093535, "grad_norm": 0.10469796508550644, "learning_rate": 0.002, "loss": 2.3474, "step": 169390 }, { "epoch": 0.6548530253127368, "grad_norm": 0.09249204397201538, "learning_rate": 0.002, "loss": 2.342, "step": 169400 }, { "epoch": 0.6548916825161201, "grad_norm": 0.11732906848192215, "learning_rate": 0.002, "loss": 2.3531, "step": 169410 }, { "epoch": 0.6549303397195033, "grad_norm": 0.1086328774690628, "learning_rate": 0.002, "loss": 2.3317, "step": 169420 }, { "epoch": 0.6549689969228866, "grad_norm": 0.10182340443134308, "learning_rate": 0.002, "loss": 2.3353, "step": 169430 }, { "epoch": 0.6550076541262699, "grad_norm": 0.12364275008440018, "learning_rate": 0.002, "loss": 2.352, "step": 169440 }, { "epoch": 0.6550463113296532, "grad_norm": 0.1127183586359024, "learning_rate": 0.002, "loss": 2.349, "step": 169450 }, { "epoch": 0.6550849685330364, "grad_norm": 0.08764702081680298, "learning_rate": 0.002, "loss": 2.3434, "step": 169460 }, { "epoch": 0.6551236257364197, "grad_norm": 0.11021017283201218, "learning_rate": 0.002, "loss": 2.3332, "step": 169470 }, { "epoch": 0.655162282939803, "grad_norm": 0.1042870506644249, "learning_rate": 0.002, "loss": 2.3299, "step": 169480 }, { "epoch": 0.6552009401431863, "grad_norm": 0.10292533040046692, "learning_rate": 0.002, "loss": 2.3479, "step": 169490 }, { "epoch": 0.6552395973465696, "grad_norm": 0.12277467548847198, "learning_rate": 0.002, "loss": 2.3432, "step": 169500 }, { "epoch": 0.6552782545499528, "grad_norm": 0.09520641714334488, "learning_rate": 0.002, "loss": 2.3442, "step": 169510 }, { "epoch": 0.6553169117533361, "grad_norm": 0.11056701838970184, "learning_rate": 0.002, "loss": 2.3373, "step": 169520 }, { "epoch": 0.6553555689567194, "grad_norm": 0.09635456651449203, "learning_rate": 0.002, "loss": 2.3488, "step": 169530 }, { "epoch": 0.6553942261601027, "grad_norm": 0.10798116773366928, "learning_rate": 0.002, "loss": 2.3406, "step": 169540 }, { "epoch": 0.6554328833634859, "grad_norm": 0.09781666100025177, "learning_rate": 0.002, "loss": 2.3558, "step": 169550 }, { "epoch": 0.6554715405668692, "grad_norm": 0.0961836725473404, "learning_rate": 0.002, "loss": 2.3402, "step": 169560 }, { "epoch": 0.6555101977702525, "grad_norm": 0.1179947480559349, "learning_rate": 0.002, "loss": 2.3418, "step": 169570 }, { "epoch": 0.6555488549736358, "grad_norm": 0.11416078358888626, "learning_rate": 0.002, "loss": 2.3503, "step": 169580 }, { "epoch": 0.655587512177019, "grad_norm": 0.10990830510854721, "learning_rate": 0.002, "loss": 2.3462, "step": 169590 }, { "epoch": 0.6556261693804023, "grad_norm": 0.12073973566293716, "learning_rate": 0.002, "loss": 2.3464, "step": 169600 }, { "epoch": 0.6556648265837857, "grad_norm": 0.1154635101556778, "learning_rate": 0.002, "loss": 2.3481, "step": 169610 }, { "epoch": 0.6557034837871689, "grad_norm": 0.10131113976240158, "learning_rate": 0.002, "loss": 2.3291, "step": 169620 }, { "epoch": 0.6557421409905522, "grad_norm": 0.09605452418327332, "learning_rate": 0.002, "loss": 2.3229, "step": 169630 }, { "epoch": 0.6557807981939354, "grad_norm": 0.12022049725055695, "learning_rate": 0.002, "loss": 2.345, "step": 169640 }, { "epoch": 0.6558194553973188, "grad_norm": 0.11077900975942612, "learning_rate": 0.002, "loss": 2.325, "step": 169650 }, { "epoch": 0.655858112600702, "grad_norm": 0.10479994863271713, "learning_rate": 0.002, "loss": 2.3293, "step": 169660 }, { "epoch": 0.6558967698040853, "grad_norm": 0.10238562524318695, "learning_rate": 0.002, "loss": 2.3395, "step": 169670 }, { "epoch": 0.6559354270074685, "grad_norm": 0.09943419694900513, "learning_rate": 0.002, "loss": 2.3595, "step": 169680 }, { "epoch": 0.6559740842108519, "grad_norm": 0.10164353251457214, "learning_rate": 0.002, "loss": 2.3353, "step": 169690 }, { "epoch": 0.6560127414142352, "grad_norm": 0.10022822767496109, "learning_rate": 0.002, "loss": 2.3257, "step": 169700 }, { "epoch": 0.6560513986176184, "grad_norm": 0.10680403560400009, "learning_rate": 0.002, "loss": 2.3484, "step": 169710 }, { "epoch": 0.6560900558210017, "grad_norm": 0.10432382673025131, "learning_rate": 0.002, "loss": 2.3549, "step": 169720 }, { "epoch": 0.6561287130243849, "grad_norm": 0.11037509143352509, "learning_rate": 0.002, "loss": 2.3483, "step": 169730 }, { "epoch": 0.6561673702277683, "grad_norm": 0.10722117871046066, "learning_rate": 0.002, "loss": 2.3406, "step": 169740 }, { "epoch": 0.6562060274311515, "grad_norm": 0.12687820196151733, "learning_rate": 0.002, "loss": 2.3547, "step": 169750 }, { "epoch": 0.6562446846345348, "grad_norm": 0.10140392184257507, "learning_rate": 0.002, "loss": 2.3611, "step": 169760 }, { "epoch": 0.656283341837918, "grad_norm": 0.1151595190167427, "learning_rate": 0.002, "loss": 2.3466, "step": 169770 }, { "epoch": 0.6563219990413014, "grad_norm": 0.0952373743057251, "learning_rate": 0.002, "loss": 2.3401, "step": 169780 }, { "epoch": 0.6563606562446846, "grad_norm": 0.11280129104852676, "learning_rate": 0.002, "loss": 2.351, "step": 169790 }, { "epoch": 0.6563993134480679, "grad_norm": 0.11494222283363342, "learning_rate": 0.002, "loss": 2.334, "step": 169800 }, { "epoch": 0.6564379706514512, "grad_norm": 0.10257001966238022, "learning_rate": 0.002, "loss": 2.3468, "step": 169810 }, { "epoch": 0.6564766278548345, "grad_norm": 0.08688849210739136, "learning_rate": 0.002, "loss": 2.3297, "step": 169820 }, { "epoch": 0.6565152850582178, "grad_norm": 0.10008734464645386, "learning_rate": 0.002, "loss": 2.3425, "step": 169830 }, { "epoch": 0.656553942261601, "grad_norm": 0.12060262262821198, "learning_rate": 0.002, "loss": 2.3324, "step": 169840 }, { "epoch": 0.6565925994649843, "grad_norm": 0.10228583961725235, "learning_rate": 0.002, "loss": 2.3348, "step": 169850 }, { "epoch": 0.6566312566683676, "grad_norm": 0.11394084990024567, "learning_rate": 0.002, "loss": 2.357, "step": 169860 }, { "epoch": 0.6566699138717509, "grad_norm": 0.10401419550180435, "learning_rate": 0.002, "loss": 2.3429, "step": 169870 }, { "epoch": 0.6567085710751341, "grad_norm": 0.10975929349660873, "learning_rate": 0.002, "loss": 2.3352, "step": 169880 }, { "epoch": 0.6567472282785174, "grad_norm": 0.10642538219690323, "learning_rate": 0.002, "loss": 2.3299, "step": 169890 }, { "epoch": 0.6567858854819006, "grad_norm": 0.1057378351688385, "learning_rate": 0.002, "loss": 2.3273, "step": 169900 }, { "epoch": 0.656824542685284, "grad_norm": 0.10456452518701553, "learning_rate": 0.002, "loss": 2.3444, "step": 169910 }, { "epoch": 0.6568631998886673, "grad_norm": 0.09889863431453705, "learning_rate": 0.002, "loss": 2.3432, "step": 169920 }, { "epoch": 0.6569018570920505, "grad_norm": 0.09749070554971695, "learning_rate": 0.002, "loss": 2.3363, "step": 169930 }, { "epoch": 0.6569405142954338, "grad_norm": 0.10418283939361572, "learning_rate": 0.002, "loss": 2.3334, "step": 169940 }, { "epoch": 0.6569791714988171, "grad_norm": 0.12010916322469711, "learning_rate": 0.002, "loss": 2.354, "step": 169950 }, { "epoch": 0.6570178287022004, "grad_norm": 0.09374318271875381, "learning_rate": 0.002, "loss": 2.344, "step": 169960 }, { "epoch": 0.6570564859055836, "grad_norm": 0.11701393872499466, "learning_rate": 0.002, "loss": 2.3488, "step": 169970 }, { "epoch": 0.6570951431089669, "grad_norm": 0.11446920782327652, "learning_rate": 0.002, "loss": 2.3365, "step": 169980 }, { "epoch": 0.6571338003123502, "grad_norm": 0.10390263050794601, "learning_rate": 0.002, "loss": 2.3432, "step": 169990 }, { "epoch": 0.6571724575157335, "grad_norm": 0.10065855830907822, "learning_rate": 0.002, "loss": 2.3316, "step": 170000 }, { "epoch": 0.6572111147191168, "grad_norm": 0.10264160484075546, "learning_rate": 0.002, "loss": 2.3412, "step": 170010 }, { "epoch": 0.6572497719225, "grad_norm": 0.10007256269454956, "learning_rate": 0.002, "loss": 2.3359, "step": 170020 }, { "epoch": 0.6572884291258834, "grad_norm": 0.11538249254226685, "learning_rate": 0.002, "loss": 2.344, "step": 170030 }, { "epoch": 0.6573270863292666, "grad_norm": 0.12416301667690277, "learning_rate": 0.002, "loss": 2.3606, "step": 170040 }, { "epoch": 0.6573657435326499, "grad_norm": 0.10510220378637314, "learning_rate": 0.002, "loss": 2.3332, "step": 170050 }, { "epoch": 0.6574044007360331, "grad_norm": 0.10308302938938141, "learning_rate": 0.002, "loss": 2.3507, "step": 170060 }, { "epoch": 0.6574430579394164, "grad_norm": 0.11111033707857132, "learning_rate": 0.002, "loss": 2.3469, "step": 170070 }, { "epoch": 0.6574817151427997, "grad_norm": 0.11000286042690277, "learning_rate": 0.002, "loss": 2.3409, "step": 170080 }, { "epoch": 0.657520372346183, "grad_norm": 0.1113034263253212, "learning_rate": 0.002, "loss": 2.3418, "step": 170090 }, { "epoch": 0.6575590295495662, "grad_norm": 0.10262025892734528, "learning_rate": 0.002, "loss": 2.3339, "step": 170100 }, { "epoch": 0.6575976867529495, "grad_norm": 0.09241412580013275, "learning_rate": 0.002, "loss": 2.347, "step": 170110 }, { "epoch": 0.6576363439563329, "grad_norm": 0.10361220687627792, "learning_rate": 0.002, "loss": 2.3519, "step": 170120 }, { "epoch": 0.6576750011597161, "grad_norm": 0.0940166786313057, "learning_rate": 0.002, "loss": 2.343, "step": 170130 }, { "epoch": 0.6577136583630994, "grad_norm": 0.10688845813274384, "learning_rate": 0.002, "loss": 2.3302, "step": 170140 }, { "epoch": 0.6577523155664826, "grad_norm": 0.0985884964466095, "learning_rate": 0.002, "loss": 2.3285, "step": 170150 }, { "epoch": 0.657790972769866, "grad_norm": 0.11543738096952438, "learning_rate": 0.002, "loss": 2.3675, "step": 170160 }, { "epoch": 0.6578296299732492, "grad_norm": 0.11746984720230103, "learning_rate": 0.002, "loss": 2.3433, "step": 170170 }, { "epoch": 0.6578682871766325, "grad_norm": 0.08940320461988449, "learning_rate": 0.002, "loss": 2.3426, "step": 170180 }, { "epoch": 0.6579069443800157, "grad_norm": 0.13141517341136932, "learning_rate": 0.002, "loss": 2.3273, "step": 170190 }, { "epoch": 0.6579456015833991, "grad_norm": 0.1139146015048027, "learning_rate": 0.002, "loss": 2.3502, "step": 170200 }, { "epoch": 0.6579842587867824, "grad_norm": 0.09153608232736588, "learning_rate": 0.002, "loss": 2.3486, "step": 170210 }, { "epoch": 0.6580229159901656, "grad_norm": 0.08851674199104309, "learning_rate": 0.002, "loss": 2.3475, "step": 170220 }, { "epoch": 0.6580615731935489, "grad_norm": 0.09489694982767105, "learning_rate": 0.002, "loss": 2.3475, "step": 170230 }, { "epoch": 0.6581002303969322, "grad_norm": 0.0882200300693512, "learning_rate": 0.002, "loss": 2.3369, "step": 170240 }, { "epoch": 0.6581388876003155, "grad_norm": 0.12556059658527374, "learning_rate": 0.002, "loss": 2.3517, "step": 170250 }, { "epoch": 0.6581775448036987, "grad_norm": 0.11310429126024246, "learning_rate": 0.002, "loss": 2.3414, "step": 170260 }, { "epoch": 0.658216202007082, "grad_norm": 0.10799125581979752, "learning_rate": 0.002, "loss": 2.3335, "step": 170270 }, { "epoch": 0.6582548592104652, "grad_norm": 0.10793901234865189, "learning_rate": 0.002, "loss": 2.3427, "step": 170280 }, { "epoch": 0.6582935164138486, "grad_norm": 0.11244485527276993, "learning_rate": 0.002, "loss": 2.3432, "step": 170290 }, { "epoch": 0.6583321736172318, "grad_norm": 0.08430363982915878, "learning_rate": 0.002, "loss": 2.331, "step": 170300 }, { "epoch": 0.6583708308206151, "grad_norm": 0.10967878997325897, "learning_rate": 0.002, "loss": 2.348, "step": 170310 }, { "epoch": 0.6584094880239983, "grad_norm": 0.10456710308790207, "learning_rate": 0.002, "loss": 2.3376, "step": 170320 }, { "epoch": 0.6584481452273817, "grad_norm": 0.1308537870645523, "learning_rate": 0.002, "loss": 2.3249, "step": 170330 }, { "epoch": 0.658486802430765, "grad_norm": 0.09279914945363998, "learning_rate": 0.002, "loss": 2.3364, "step": 170340 }, { "epoch": 0.6585254596341482, "grad_norm": 0.10395380854606628, "learning_rate": 0.002, "loss": 2.3357, "step": 170350 }, { "epoch": 0.6585641168375315, "grad_norm": 0.09720392525196075, "learning_rate": 0.002, "loss": 2.3492, "step": 170360 }, { "epoch": 0.6586027740409148, "grad_norm": 0.12921123206615448, "learning_rate": 0.002, "loss": 2.3425, "step": 170370 }, { "epoch": 0.6586414312442981, "grad_norm": 0.11742226034402847, "learning_rate": 0.002, "loss": 2.362, "step": 170380 }, { "epoch": 0.6586800884476813, "grad_norm": 0.0947229340672493, "learning_rate": 0.002, "loss": 2.3375, "step": 170390 }, { "epoch": 0.6587187456510646, "grad_norm": 0.1013537347316742, "learning_rate": 0.002, "loss": 2.3343, "step": 170400 }, { "epoch": 0.658757402854448, "grad_norm": 0.11041922867298126, "learning_rate": 0.002, "loss": 2.3447, "step": 170410 }, { "epoch": 0.6587960600578312, "grad_norm": 0.12972447276115417, "learning_rate": 0.002, "loss": 2.3435, "step": 170420 }, { "epoch": 0.6588347172612145, "grad_norm": 0.10491415858268738, "learning_rate": 0.002, "loss": 2.3336, "step": 170430 }, { "epoch": 0.6588733744645977, "grad_norm": 0.11179272085428238, "learning_rate": 0.002, "loss": 2.3442, "step": 170440 }, { "epoch": 0.658912031667981, "grad_norm": 0.09766574203968048, "learning_rate": 0.002, "loss": 2.3317, "step": 170450 }, { "epoch": 0.6589506888713643, "grad_norm": 0.09233935177326202, "learning_rate": 0.002, "loss": 2.3475, "step": 170460 }, { "epoch": 0.6589893460747476, "grad_norm": 0.10109974443912506, "learning_rate": 0.002, "loss": 2.3435, "step": 170470 }, { "epoch": 0.6590280032781308, "grad_norm": 0.10457310825586319, "learning_rate": 0.002, "loss": 2.3516, "step": 170480 }, { "epoch": 0.6590666604815141, "grad_norm": 0.13295739889144897, "learning_rate": 0.002, "loss": 2.3508, "step": 170490 }, { "epoch": 0.6591053176848974, "grad_norm": 0.09190351516008377, "learning_rate": 0.002, "loss": 2.3435, "step": 170500 }, { "epoch": 0.6591439748882807, "grad_norm": 0.1040644571185112, "learning_rate": 0.002, "loss": 2.3538, "step": 170510 }, { "epoch": 0.659182632091664, "grad_norm": 0.10038863122463226, "learning_rate": 0.002, "loss": 2.3367, "step": 170520 }, { "epoch": 0.6592212892950472, "grad_norm": 0.1172012984752655, "learning_rate": 0.002, "loss": 2.3319, "step": 170530 }, { "epoch": 0.6592599464984306, "grad_norm": 0.10850688070058823, "learning_rate": 0.002, "loss": 2.3406, "step": 170540 }, { "epoch": 0.6592986037018138, "grad_norm": 0.09408782422542572, "learning_rate": 0.002, "loss": 2.3466, "step": 170550 }, { "epoch": 0.6593372609051971, "grad_norm": 0.12638218700885773, "learning_rate": 0.002, "loss": 2.3333, "step": 170560 }, { "epoch": 0.6593759181085803, "grad_norm": 0.10627342760562897, "learning_rate": 0.002, "loss": 2.3374, "step": 170570 }, { "epoch": 0.6594145753119637, "grad_norm": 0.16851374506950378, "learning_rate": 0.002, "loss": 2.3407, "step": 170580 }, { "epoch": 0.6594532325153469, "grad_norm": 0.08803575485944748, "learning_rate": 0.002, "loss": 2.3374, "step": 170590 }, { "epoch": 0.6594918897187302, "grad_norm": 0.11330971121788025, "learning_rate": 0.002, "loss": 2.3275, "step": 170600 }, { "epoch": 0.6595305469221134, "grad_norm": 0.09639140963554382, "learning_rate": 0.002, "loss": 2.3516, "step": 170610 }, { "epoch": 0.6595692041254968, "grad_norm": 0.10504119098186493, "learning_rate": 0.002, "loss": 2.335, "step": 170620 }, { "epoch": 0.65960786132888, "grad_norm": 0.2254355549812317, "learning_rate": 0.002, "loss": 2.3451, "step": 170630 }, { "epoch": 0.6596465185322633, "grad_norm": 0.09327276796102524, "learning_rate": 0.002, "loss": 2.3452, "step": 170640 }, { "epoch": 0.6596851757356466, "grad_norm": 0.09641090780496597, "learning_rate": 0.002, "loss": 2.3539, "step": 170650 }, { "epoch": 0.6597238329390298, "grad_norm": 0.1110328882932663, "learning_rate": 0.002, "loss": 2.364, "step": 170660 }, { "epoch": 0.6597624901424132, "grad_norm": 0.10514163970947266, "learning_rate": 0.002, "loss": 2.3392, "step": 170670 }, { "epoch": 0.6598011473457964, "grad_norm": 0.1033729761838913, "learning_rate": 0.002, "loss": 2.3457, "step": 170680 }, { "epoch": 0.6598398045491797, "grad_norm": 0.09916379302740097, "learning_rate": 0.002, "loss": 2.3354, "step": 170690 }, { "epoch": 0.6598784617525629, "grad_norm": 0.09037516266107559, "learning_rate": 0.002, "loss": 2.3411, "step": 170700 }, { "epoch": 0.6599171189559463, "grad_norm": 0.0995062068104744, "learning_rate": 0.002, "loss": 2.3446, "step": 170710 }, { "epoch": 0.6599557761593295, "grad_norm": 0.1171247586607933, "learning_rate": 0.002, "loss": 2.3414, "step": 170720 }, { "epoch": 0.6599944333627128, "grad_norm": 0.09908228367567062, "learning_rate": 0.002, "loss": 2.3419, "step": 170730 }, { "epoch": 0.660033090566096, "grad_norm": 0.10033340752124786, "learning_rate": 0.002, "loss": 2.3366, "step": 170740 }, { "epoch": 0.6600717477694794, "grad_norm": 0.10300584137439728, "learning_rate": 0.002, "loss": 2.3455, "step": 170750 }, { "epoch": 0.6601104049728627, "grad_norm": 0.09993764758110046, "learning_rate": 0.002, "loss": 2.3391, "step": 170760 }, { "epoch": 0.6601490621762459, "grad_norm": 0.1555805802345276, "learning_rate": 0.002, "loss": 2.3513, "step": 170770 }, { "epoch": 0.6601877193796292, "grad_norm": 0.12053221464157104, "learning_rate": 0.002, "loss": 2.3485, "step": 170780 }, { "epoch": 0.6602263765830125, "grad_norm": 0.10223250091075897, "learning_rate": 0.002, "loss": 2.3265, "step": 170790 }, { "epoch": 0.6602650337863958, "grad_norm": 0.10473880916833878, "learning_rate": 0.002, "loss": 2.3584, "step": 170800 }, { "epoch": 0.660303690989779, "grad_norm": 0.11674695461988449, "learning_rate": 0.002, "loss": 2.3556, "step": 170810 }, { "epoch": 0.6603423481931623, "grad_norm": 0.10591929405927658, "learning_rate": 0.002, "loss": 2.344, "step": 170820 }, { "epoch": 0.6603810053965455, "grad_norm": 0.10028078407049179, "learning_rate": 0.002, "loss": 2.3419, "step": 170830 }, { "epoch": 0.6604196625999289, "grad_norm": 0.08663002401590347, "learning_rate": 0.002, "loss": 2.3449, "step": 170840 }, { "epoch": 0.6604583198033122, "grad_norm": 0.11164186894893646, "learning_rate": 0.002, "loss": 2.3404, "step": 170850 }, { "epoch": 0.6604969770066954, "grad_norm": 0.12362227588891983, "learning_rate": 0.002, "loss": 2.3455, "step": 170860 }, { "epoch": 0.6605356342100787, "grad_norm": 0.09875123202800751, "learning_rate": 0.002, "loss": 2.3366, "step": 170870 }, { "epoch": 0.660574291413462, "grad_norm": 0.10698625445365906, "learning_rate": 0.002, "loss": 2.327, "step": 170880 }, { "epoch": 0.6606129486168453, "grad_norm": 0.0939648225903511, "learning_rate": 0.002, "loss": 2.3438, "step": 170890 }, { "epoch": 0.6606516058202285, "grad_norm": 0.1279655396938324, "learning_rate": 0.002, "loss": 2.3352, "step": 170900 }, { "epoch": 0.6606902630236118, "grad_norm": 0.11866834759712219, "learning_rate": 0.002, "loss": 2.3472, "step": 170910 }, { "epoch": 0.6607289202269951, "grad_norm": 0.09348573535680771, "learning_rate": 0.002, "loss": 2.3442, "step": 170920 }, { "epoch": 0.6607675774303784, "grad_norm": 0.1124269962310791, "learning_rate": 0.002, "loss": 2.3384, "step": 170930 }, { "epoch": 0.6608062346337616, "grad_norm": 0.12030279636383057, "learning_rate": 0.002, "loss": 2.3456, "step": 170940 }, { "epoch": 0.6608448918371449, "grad_norm": 0.10367617756128311, "learning_rate": 0.002, "loss": 2.321, "step": 170950 }, { "epoch": 0.6608835490405283, "grad_norm": 0.09243983775377274, "learning_rate": 0.002, "loss": 2.3399, "step": 170960 }, { "epoch": 0.6609222062439115, "grad_norm": 0.09867145121097565, "learning_rate": 0.002, "loss": 2.3483, "step": 170970 }, { "epoch": 0.6609608634472948, "grad_norm": 0.09240375459194183, "learning_rate": 0.002, "loss": 2.3434, "step": 170980 }, { "epoch": 0.660999520650678, "grad_norm": 0.0999855175614357, "learning_rate": 0.002, "loss": 2.3482, "step": 170990 }, { "epoch": 0.6610381778540613, "grad_norm": 0.14586104452610016, "learning_rate": 0.002, "loss": 2.3534, "step": 171000 }, { "epoch": 0.6610768350574446, "grad_norm": 0.11599107831716537, "learning_rate": 0.002, "loss": 2.3442, "step": 171010 }, { "epoch": 0.6611154922608279, "grad_norm": 0.099213145673275, "learning_rate": 0.002, "loss": 2.3448, "step": 171020 }, { "epoch": 0.6611541494642111, "grad_norm": 0.10795638710260391, "learning_rate": 0.002, "loss": 2.3203, "step": 171030 }, { "epoch": 0.6611928066675944, "grad_norm": 0.19326281547546387, "learning_rate": 0.002, "loss": 2.3276, "step": 171040 }, { "epoch": 0.6612314638709778, "grad_norm": 0.09383906424045563, "learning_rate": 0.002, "loss": 2.3511, "step": 171050 }, { "epoch": 0.661270121074361, "grad_norm": 0.11155370622873306, "learning_rate": 0.002, "loss": 2.3658, "step": 171060 }, { "epoch": 0.6613087782777443, "grad_norm": 0.09720378369092941, "learning_rate": 0.002, "loss": 2.3466, "step": 171070 }, { "epoch": 0.6613474354811275, "grad_norm": 0.10207916796207428, "learning_rate": 0.002, "loss": 2.3554, "step": 171080 }, { "epoch": 0.6613860926845109, "grad_norm": 0.09510542452335358, "learning_rate": 0.002, "loss": 2.3332, "step": 171090 }, { "epoch": 0.6614247498878941, "grad_norm": 0.10385362058877945, "learning_rate": 0.002, "loss": 2.3493, "step": 171100 }, { "epoch": 0.6614634070912774, "grad_norm": 0.09923926740884781, "learning_rate": 0.002, "loss": 2.3468, "step": 171110 }, { "epoch": 0.6615020642946606, "grad_norm": 0.10726913809776306, "learning_rate": 0.002, "loss": 2.3463, "step": 171120 }, { "epoch": 0.661540721498044, "grad_norm": 0.13127969205379486, "learning_rate": 0.002, "loss": 2.3491, "step": 171130 }, { "epoch": 0.6615793787014272, "grad_norm": 0.10140059143304825, "learning_rate": 0.002, "loss": 2.3374, "step": 171140 }, { "epoch": 0.6616180359048105, "grad_norm": 0.10477566719055176, "learning_rate": 0.002, "loss": 2.3434, "step": 171150 }, { "epoch": 0.6616566931081937, "grad_norm": 0.13156050443649292, "learning_rate": 0.002, "loss": 2.3347, "step": 171160 }, { "epoch": 0.6616953503115771, "grad_norm": 0.09503421187400818, "learning_rate": 0.002, "loss": 2.3359, "step": 171170 }, { "epoch": 0.6617340075149604, "grad_norm": 0.11637887358665466, "learning_rate": 0.002, "loss": 2.3371, "step": 171180 }, { "epoch": 0.6617726647183436, "grad_norm": 0.0924702063202858, "learning_rate": 0.002, "loss": 2.3561, "step": 171190 }, { "epoch": 0.6618113219217269, "grad_norm": 0.11030198633670807, "learning_rate": 0.002, "loss": 2.3352, "step": 171200 }, { "epoch": 0.6618499791251101, "grad_norm": 0.11644049733877182, "learning_rate": 0.002, "loss": 2.3516, "step": 171210 }, { "epoch": 0.6618886363284935, "grad_norm": 0.10431084781885147, "learning_rate": 0.002, "loss": 2.3431, "step": 171220 }, { "epoch": 0.6619272935318767, "grad_norm": 0.12342498451471329, "learning_rate": 0.002, "loss": 2.3477, "step": 171230 }, { "epoch": 0.66196595073526, "grad_norm": 0.08883868902921677, "learning_rate": 0.002, "loss": 2.3361, "step": 171240 }, { "epoch": 0.6620046079386432, "grad_norm": 0.10376245528459549, "learning_rate": 0.002, "loss": 2.352, "step": 171250 }, { "epoch": 0.6620432651420266, "grad_norm": 0.12346050143241882, "learning_rate": 0.002, "loss": 2.3527, "step": 171260 }, { "epoch": 0.6620819223454099, "grad_norm": 0.09900625050067902, "learning_rate": 0.002, "loss": 2.3329, "step": 171270 }, { "epoch": 0.6621205795487931, "grad_norm": 0.11180734634399414, "learning_rate": 0.002, "loss": 2.3509, "step": 171280 }, { "epoch": 0.6621592367521764, "grad_norm": 0.11599894613027573, "learning_rate": 0.002, "loss": 2.3343, "step": 171290 }, { "epoch": 0.6621978939555597, "grad_norm": 0.09575050324201584, "learning_rate": 0.002, "loss": 2.3528, "step": 171300 }, { "epoch": 0.662236551158943, "grad_norm": 0.10637818276882172, "learning_rate": 0.002, "loss": 2.3415, "step": 171310 }, { "epoch": 0.6622752083623262, "grad_norm": 0.10875699669122696, "learning_rate": 0.002, "loss": 2.3483, "step": 171320 }, { "epoch": 0.6623138655657095, "grad_norm": 0.09395532310009003, "learning_rate": 0.002, "loss": 2.3423, "step": 171330 }, { "epoch": 0.6623525227690928, "grad_norm": 0.08983360975980759, "learning_rate": 0.002, "loss": 2.3369, "step": 171340 }, { "epoch": 0.6623911799724761, "grad_norm": 0.08850152045488358, "learning_rate": 0.002, "loss": 2.343, "step": 171350 }, { "epoch": 0.6624298371758593, "grad_norm": 0.10465314239263535, "learning_rate": 0.002, "loss": 2.3373, "step": 171360 }, { "epoch": 0.6624684943792426, "grad_norm": 0.1074366495013237, "learning_rate": 0.002, "loss": 2.3598, "step": 171370 }, { "epoch": 0.6625071515826259, "grad_norm": 0.11158204078674316, "learning_rate": 0.002, "loss": 2.3506, "step": 171380 }, { "epoch": 0.6625458087860092, "grad_norm": 0.3130476772785187, "learning_rate": 0.002, "loss": 2.3382, "step": 171390 }, { "epoch": 0.6625844659893925, "grad_norm": 0.10336320102214813, "learning_rate": 0.002, "loss": 2.3445, "step": 171400 }, { "epoch": 0.6626231231927757, "grad_norm": 0.09524131566286087, "learning_rate": 0.002, "loss": 2.3539, "step": 171410 }, { "epoch": 0.662661780396159, "grad_norm": 0.0908019170165062, "learning_rate": 0.002, "loss": 2.3543, "step": 171420 }, { "epoch": 0.6627004375995423, "grad_norm": 0.1123889610171318, "learning_rate": 0.002, "loss": 2.3266, "step": 171430 }, { "epoch": 0.6627390948029256, "grad_norm": 0.10293354839086533, "learning_rate": 0.002, "loss": 2.3411, "step": 171440 }, { "epoch": 0.6627777520063088, "grad_norm": 0.12804663181304932, "learning_rate": 0.002, "loss": 2.3418, "step": 171450 }, { "epoch": 0.6628164092096921, "grad_norm": 0.12000278383493423, "learning_rate": 0.002, "loss": 2.3465, "step": 171460 }, { "epoch": 0.6628550664130755, "grad_norm": 0.1008700430393219, "learning_rate": 0.002, "loss": 2.3255, "step": 171470 }, { "epoch": 0.6628937236164587, "grad_norm": 0.11298047751188278, "learning_rate": 0.002, "loss": 2.3406, "step": 171480 }, { "epoch": 0.662932380819842, "grad_norm": 0.09088566899299622, "learning_rate": 0.002, "loss": 2.333, "step": 171490 }, { "epoch": 0.6629710380232252, "grad_norm": 0.10908161848783493, "learning_rate": 0.002, "loss": 2.3431, "step": 171500 }, { "epoch": 0.6630096952266086, "grad_norm": 0.10217082500457764, "learning_rate": 0.002, "loss": 2.3328, "step": 171510 }, { "epoch": 0.6630483524299918, "grad_norm": 0.10757473111152649, "learning_rate": 0.002, "loss": 2.3496, "step": 171520 }, { "epoch": 0.6630870096333751, "grad_norm": 0.11873800307512283, "learning_rate": 0.002, "loss": 2.336, "step": 171530 }, { "epoch": 0.6631256668367583, "grad_norm": 0.10032789409160614, "learning_rate": 0.002, "loss": 2.345, "step": 171540 }, { "epoch": 0.6631643240401417, "grad_norm": 0.1108940839767456, "learning_rate": 0.002, "loss": 2.3525, "step": 171550 }, { "epoch": 0.663202981243525, "grad_norm": 0.11462477594614029, "learning_rate": 0.002, "loss": 2.3369, "step": 171560 }, { "epoch": 0.6632416384469082, "grad_norm": 0.11187411844730377, "learning_rate": 0.002, "loss": 2.3425, "step": 171570 }, { "epoch": 0.6632802956502915, "grad_norm": 0.10882826149463654, "learning_rate": 0.002, "loss": 2.3525, "step": 171580 }, { "epoch": 0.6633189528536747, "grad_norm": 0.11319388449192047, "learning_rate": 0.002, "loss": 2.3289, "step": 171590 }, { "epoch": 0.6633576100570581, "grad_norm": 0.1004195362329483, "learning_rate": 0.002, "loss": 2.3448, "step": 171600 }, { "epoch": 0.6633962672604413, "grad_norm": 0.09757034480571747, "learning_rate": 0.002, "loss": 2.3459, "step": 171610 }, { "epoch": 0.6634349244638246, "grad_norm": 0.10731105506420135, "learning_rate": 0.002, "loss": 2.332, "step": 171620 }, { "epoch": 0.6634735816672078, "grad_norm": 0.1167788878083229, "learning_rate": 0.002, "loss": 2.3498, "step": 171630 }, { "epoch": 0.6635122388705912, "grad_norm": 0.11580292135477066, "learning_rate": 0.002, "loss": 2.3474, "step": 171640 }, { "epoch": 0.6635508960739744, "grad_norm": 0.09653709828853607, "learning_rate": 0.002, "loss": 2.3395, "step": 171650 }, { "epoch": 0.6635895532773577, "grad_norm": 0.09271356463432312, "learning_rate": 0.002, "loss": 2.3496, "step": 171660 }, { "epoch": 0.6636282104807409, "grad_norm": 0.1072278544306755, "learning_rate": 0.002, "loss": 2.3504, "step": 171670 }, { "epoch": 0.6636668676841243, "grad_norm": 0.11917044967412949, "learning_rate": 0.002, "loss": 2.3488, "step": 171680 }, { "epoch": 0.6637055248875076, "grad_norm": 0.10324475914239883, "learning_rate": 0.002, "loss": 2.3549, "step": 171690 }, { "epoch": 0.6637441820908908, "grad_norm": 0.11788350343704224, "learning_rate": 0.002, "loss": 2.3285, "step": 171700 }, { "epoch": 0.6637828392942741, "grad_norm": 0.10315144807100296, "learning_rate": 0.002, "loss": 2.3456, "step": 171710 }, { "epoch": 0.6638214964976574, "grad_norm": 0.12169231474399567, "learning_rate": 0.002, "loss": 2.3308, "step": 171720 }, { "epoch": 0.6638601537010407, "grad_norm": 0.08901426941156387, "learning_rate": 0.002, "loss": 2.3331, "step": 171730 }, { "epoch": 0.6638988109044239, "grad_norm": 0.11607123911380768, "learning_rate": 0.002, "loss": 2.3273, "step": 171740 }, { "epoch": 0.6639374681078072, "grad_norm": 0.111125148832798, "learning_rate": 0.002, "loss": 2.3494, "step": 171750 }, { "epoch": 0.6639761253111904, "grad_norm": 0.10658658295869827, "learning_rate": 0.002, "loss": 2.3508, "step": 171760 }, { "epoch": 0.6640147825145738, "grad_norm": 0.09600212424993515, "learning_rate": 0.002, "loss": 2.3397, "step": 171770 }, { "epoch": 0.664053439717957, "grad_norm": 0.09580216556787491, "learning_rate": 0.002, "loss": 2.3428, "step": 171780 }, { "epoch": 0.6640920969213403, "grad_norm": 0.11963184922933578, "learning_rate": 0.002, "loss": 2.3408, "step": 171790 }, { "epoch": 0.6641307541247236, "grad_norm": 0.09414113312959671, "learning_rate": 0.002, "loss": 2.3292, "step": 171800 }, { "epoch": 0.6641694113281069, "grad_norm": 0.12084329128265381, "learning_rate": 0.002, "loss": 2.3276, "step": 171810 }, { "epoch": 0.6642080685314902, "grad_norm": 0.09752669930458069, "learning_rate": 0.002, "loss": 2.3419, "step": 171820 }, { "epoch": 0.6642467257348734, "grad_norm": 0.11089248955249786, "learning_rate": 0.002, "loss": 2.3413, "step": 171830 }, { "epoch": 0.6642853829382567, "grad_norm": 0.12359671294689178, "learning_rate": 0.002, "loss": 2.3411, "step": 171840 }, { "epoch": 0.66432404014164, "grad_norm": 0.09718482196331024, "learning_rate": 0.002, "loss": 2.333, "step": 171850 }, { "epoch": 0.6643626973450233, "grad_norm": 0.09817170351743698, "learning_rate": 0.002, "loss": 2.3536, "step": 171860 }, { "epoch": 0.6644013545484065, "grad_norm": 0.09323927760124207, "learning_rate": 0.002, "loss": 2.3175, "step": 171870 }, { "epoch": 0.6644400117517898, "grad_norm": 0.1100674495100975, "learning_rate": 0.002, "loss": 2.3411, "step": 171880 }, { "epoch": 0.6644786689551732, "grad_norm": 0.10748188197612762, "learning_rate": 0.002, "loss": 2.3398, "step": 171890 }, { "epoch": 0.6645173261585564, "grad_norm": 0.12465234100818634, "learning_rate": 0.002, "loss": 2.3401, "step": 171900 }, { "epoch": 0.6645559833619397, "grad_norm": 0.09937615692615509, "learning_rate": 0.002, "loss": 2.3574, "step": 171910 }, { "epoch": 0.6645946405653229, "grad_norm": 0.09921646118164062, "learning_rate": 0.002, "loss": 2.343, "step": 171920 }, { "epoch": 0.6646332977687062, "grad_norm": 0.11704915761947632, "learning_rate": 0.002, "loss": 2.3381, "step": 171930 }, { "epoch": 0.6646719549720895, "grad_norm": 0.10025927424430847, "learning_rate": 0.002, "loss": 2.3452, "step": 171940 }, { "epoch": 0.6647106121754728, "grad_norm": 0.134184330701828, "learning_rate": 0.002, "loss": 2.3404, "step": 171950 }, { "epoch": 0.664749269378856, "grad_norm": 0.09934668242931366, "learning_rate": 0.002, "loss": 2.3537, "step": 171960 }, { "epoch": 0.6647879265822393, "grad_norm": 0.12052752077579498, "learning_rate": 0.002, "loss": 2.3366, "step": 171970 }, { "epoch": 0.6648265837856226, "grad_norm": 0.12109331041574478, "learning_rate": 0.002, "loss": 2.3406, "step": 171980 }, { "epoch": 0.6648652409890059, "grad_norm": 0.10054156184196472, "learning_rate": 0.002, "loss": 2.3432, "step": 171990 }, { "epoch": 0.6649038981923892, "grad_norm": 0.09870258718729019, "learning_rate": 0.002, "loss": 2.345, "step": 172000 }, { "epoch": 0.6649425553957724, "grad_norm": 0.11486545205116272, "learning_rate": 0.002, "loss": 2.3439, "step": 172010 }, { "epoch": 0.6649812125991558, "grad_norm": 0.11269167810678482, "learning_rate": 0.002, "loss": 2.3446, "step": 172020 }, { "epoch": 0.665019869802539, "grad_norm": 0.09960546344518661, "learning_rate": 0.002, "loss": 2.352, "step": 172030 }, { "epoch": 0.6650585270059223, "grad_norm": 0.09361325949430466, "learning_rate": 0.002, "loss": 2.3413, "step": 172040 }, { "epoch": 0.6650971842093055, "grad_norm": 0.10352955013513565, "learning_rate": 0.002, "loss": 2.3642, "step": 172050 }, { "epoch": 0.6651358414126889, "grad_norm": 0.1031767800450325, "learning_rate": 0.002, "loss": 2.3434, "step": 172060 }, { "epoch": 0.6651744986160721, "grad_norm": 0.09600525349378586, "learning_rate": 0.002, "loss": 2.3293, "step": 172070 }, { "epoch": 0.6652131558194554, "grad_norm": 0.10943388938903809, "learning_rate": 0.002, "loss": 2.3436, "step": 172080 }, { "epoch": 0.6652518130228386, "grad_norm": 0.11022301763296127, "learning_rate": 0.002, "loss": 2.3436, "step": 172090 }, { "epoch": 0.665290470226222, "grad_norm": 0.09631247818470001, "learning_rate": 0.002, "loss": 2.3348, "step": 172100 }, { "epoch": 0.6653291274296053, "grad_norm": 0.23882737755775452, "learning_rate": 0.002, "loss": 2.3442, "step": 172110 }, { "epoch": 0.6653677846329885, "grad_norm": 0.100875124335289, "learning_rate": 0.002, "loss": 2.352, "step": 172120 }, { "epoch": 0.6654064418363718, "grad_norm": 0.11418143659830093, "learning_rate": 0.002, "loss": 2.3472, "step": 172130 }, { "epoch": 0.665445099039755, "grad_norm": 0.10449163615703583, "learning_rate": 0.002, "loss": 2.3275, "step": 172140 }, { "epoch": 0.6654837562431384, "grad_norm": 0.1039036363363266, "learning_rate": 0.002, "loss": 2.3489, "step": 172150 }, { "epoch": 0.6655224134465216, "grad_norm": 0.09626703709363937, "learning_rate": 0.002, "loss": 2.3209, "step": 172160 }, { "epoch": 0.6655610706499049, "grad_norm": 0.10677660256624222, "learning_rate": 0.002, "loss": 2.3398, "step": 172170 }, { "epoch": 0.6655997278532881, "grad_norm": 0.09619175642728806, "learning_rate": 0.002, "loss": 2.3351, "step": 172180 }, { "epoch": 0.6656383850566715, "grad_norm": 0.12057211995124817, "learning_rate": 0.002, "loss": 2.3432, "step": 172190 }, { "epoch": 0.6656770422600548, "grad_norm": 0.10427337139844894, "learning_rate": 0.002, "loss": 2.3369, "step": 172200 }, { "epoch": 0.665715699463438, "grad_norm": 0.10566114634275436, "learning_rate": 0.002, "loss": 2.3459, "step": 172210 }, { "epoch": 0.6657543566668213, "grad_norm": 0.10993968695402145, "learning_rate": 0.002, "loss": 2.3467, "step": 172220 }, { "epoch": 0.6657930138702046, "grad_norm": 0.09533580392599106, "learning_rate": 0.002, "loss": 2.34, "step": 172230 }, { "epoch": 0.6658316710735879, "grad_norm": 0.09474946558475494, "learning_rate": 0.002, "loss": 2.32, "step": 172240 }, { "epoch": 0.6658703282769711, "grad_norm": 0.12730206549167633, "learning_rate": 0.002, "loss": 2.3417, "step": 172250 }, { "epoch": 0.6659089854803544, "grad_norm": 0.10522949695587158, "learning_rate": 0.002, "loss": 2.3324, "step": 172260 }, { "epoch": 0.6659476426837377, "grad_norm": 0.12083995342254639, "learning_rate": 0.002, "loss": 2.3427, "step": 172270 }, { "epoch": 0.665986299887121, "grad_norm": 0.11327656358480453, "learning_rate": 0.002, "loss": 2.3478, "step": 172280 }, { "epoch": 0.6660249570905042, "grad_norm": 0.1076776310801506, "learning_rate": 0.002, "loss": 2.3222, "step": 172290 }, { "epoch": 0.6660636142938875, "grad_norm": 0.11642111092805862, "learning_rate": 0.002, "loss": 2.326, "step": 172300 }, { "epoch": 0.6661022714972707, "grad_norm": 0.09679548442363739, "learning_rate": 0.002, "loss": 2.3392, "step": 172310 }, { "epoch": 0.6661409287006541, "grad_norm": 0.13206057250499725, "learning_rate": 0.002, "loss": 2.3491, "step": 172320 }, { "epoch": 0.6661795859040374, "grad_norm": 0.09222513437271118, "learning_rate": 0.002, "loss": 2.3491, "step": 172330 }, { "epoch": 0.6662182431074206, "grad_norm": 0.09885145723819733, "learning_rate": 0.002, "loss": 2.3396, "step": 172340 }, { "epoch": 0.6662569003108039, "grad_norm": 0.09937558323144913, "learning_rate": 0.002, "loss": 2.3356, "step": 172350 }, { "epoch": 0.6662955575141872, "grad_norm": 0.11098925769329071, "learning_rate": 0.002, "loss": 2.3271, "step": 172360 }, { "epoch": 0.6663342147175705, "grad_norm": 0.11017122119665146, "learning_rate": 0.002, "loss": 2.3496, "step": 172370 }, { "epoch": 0.6663728719209537, "grad_norm": 0.09626687318086624, "learning_rate": 0.002, "loss": 2.3401, "step": 172380 }, { "epoch": 0.666411529124337, "grad_norm": 0.10198944061994553, "learning_rate": 0.002, "loss": 2.3349, "step": 172390 }, { "epoch": 0.6664501863277204, "grad_norm": 0.10744412243366241, "learning_rate": 0.002, "loss": 2.3385, "step": 172400 }, { "epoch": 0.6664888435311036, "grad_norm": 0.09577515721321106, "learning_rate": 0.002, "loss": 2.3315, "step": 172410 }, { "epoch": 0.6665275007344869, "grad_norm": 0.1001209020614624, "learning_rate": 0.002, "loss": 2.3494, "step": 172420 }, { "epoch": 0.6665661579378701, "grad_norm": 0.09737569838762283, "learning_rate": 0.002, "loss": 2.357, "step": 172430 }, { "epoch": 0.6666048151412535, "grad_norm": 0.1063157245516777, "learning_rate": 0.002, "loss": 2.3293, "step": 172440 }, { "epoch": 0.6666434723446367, "grad_norm": 0.1438492089509964, "learning_rate": 0.002, "loss": 2.3282, "step": 172450 }, { "epoch": 0.66668212954802, "grad_norm": 0.1074848547577858, "learning_rate": 0.002, "loss": 2.3423, "step": 172460 }, { "epoch": 0.6667207867514032, "grad_norm": 0.10906050354242325, "learning_rate": 0.002, "loss": 2.352, "step": 172470 }, { "epoch": 0.6667594439547866, "grad_norm": 0.10571140795946121, "learning_rate": 0.002, "loss": 2.3558, "step": 172480 }, { "epoch": 0.6667981011581698, "grad_norm": 0.10705535113811493, "learning_rate": 0.002, "loss": 2.3541, "step": 172490 }, { "epoch": 0.6668367583615531, "grad_norm": 0.1198195368051529, "learning_rate": 0.002, "loss": 2.3422, "step": 172500 }, { "epoch": 0.6668754155649363, "grad_norm": 0.10089991241693497, "learning_rate": 0.002, "loss": 2.3596, "step": 172510 }, { "epoch": 0.6669140727683196, "grad_norm": 0.10191459208726883, "learning_rate": 0.002, "loss": 2.354, "step": 172520 }, { "epoch": 0.666952729971703, "grad_norm": 0.09125278890132904, "learning_rate": 0.002, "loss": 2.3416, "step": 172530 }, { "epoch": 0.6669913871750862, "grad_norm": 0.12424398213624954, "learning_rate": 0.002, "loss": 2.354, "step": 172540 }, { "epoch": 0.6670300443784695, "grad_norm": 0.09108825773000717, "learning_rate": 0.002, "loss": 2.3576, "step": 172550 }, { "epoch": 0.6670687015818527, "grad_norm": 0.10158373415470123, "learning_rate": 0.002, "loss": 2.3421, "step": 172560 }, { "epoch": 0.6671073587852361, "grad_norm": 0.126356303691864, "learning_rate": 0.002, "loss": 2.3466, "step": 172570 }, { "epoch": 0.6671460159886193, "grad_norm": 0.11853097379207611, "learning_rate": 0.002, "loss": 2.3453, "step": 172580 }, { "epoch": 0.6671846731920026, "grad_norm": 0.09849544614553452, "learning_rate": 0.002, "loss": 2.3377, "step": 172590 }, { "epoch": 0.6672233303953858, "grad_norm": 0.09688036143779755, "learning_rate": 0.002, "loss": 2.3199, "step": 172600 }, { "epoch": 0.6672619875987692, "grad_norm": 0.10107052326202393, "learning_rate": 0.002, "loss": 2.3459, "step": 172610 }, { "epoch": 0.6673006448021525, "grad_norm": 0.10965628921985626, "learning_rate": 0.002, "loss": 2.3427, "step": 172620 }, { "epoch": 0.6673393020055357, "grad_norm": 0.10272455960512161, "learning_rate": 0.002, "loss": 2.3301, "step": 172630 }, { "epoch": 0.667377959208919, "grad_norm": 0.10162177681922913, "learning_rate": 0.002, "loss": 2.3384, "step": 172640 }, { "epoch": 0.6674166164123023, "grad_norm": 0.10011301189661026, "learning_rate": 0.002, "loss": 2.3271, "step": 172650 }, { "epoch": 0.6674552736156856, "grad_norm": 0.1002359688282013, "learning_rate": 0.002, "loss": 2.3372, "step": 172660 }, { "epoch": 0.6674939308190688, "grad_norm": 0.2552264928817749, "learning_rate": 0.002, "loss": 2.3503, "step": 172670 }, { "epoch": 0.6675325880224521, "grad_norm": 0.09233082830905914, "learning_rate": 0.002, "loss": 2.3643, "step": 172680 }, { "epoch": 0.6675712452258353, "grad_norm": 0.09360641241073608, "learning_rate": 0.002, "loss": 2.3383, "step": 172690 }, { "epoch": 0.6676099024292187, "grad_norm": 0.10834074020385742, "learning_rate": 0.002, "loss": 2.3398, "step": 172700 }, { "epoch": 0.667648559632602, "grad_norm": 0.10256896913051605, "learning_rate": 0.002, "loss": 2.3265, "step": 172710 }, { "epoch": 0.6676872168359852, "grad_norm": 0.10912622511386871, "learning_rate": 0.002, "loss": 2.3477, "step": 172720 }, { "epoch": 0.6677258740393684, "grad_norm": 0.10098609328269958, "learning_rate": 0.002, "loss": 2.3315, "step": 172730 }, { "epoch": 0.6677645312427518, "grad_norm": 0.10524442046880722, "learning_rate": 0.002, "loss": 2.3464, "step": 172740 }, { "epoch": 0.6678031884461351, "grad_norm": 0.10921610891819, "learning_rate": 0.002, "loss": 2.3257, "step": 172750 }, { "epoch": 0.6678418456495183, "grad_norm": 0.15663886070251465, "learning_rate": 0.002, "loss": 2.3387, "step": 172760 }, { "epoch": 0.6678805028529016, "grad_norm": 0.09868650138378143, "learning_rate": 0.002, "loss": 2.3411, "step": 172770 }, { "epoch": 0.6679191600562849, "grad_norm": 0.13061143457889557, "learning_rate": 0.002, "loss": 2.3332, "step": 172780 }, { "epoch": 0.6679578172596682, "grad_norm": 0.10689136385917664, "learning_rate": 0.002, "loss": 2.3538, "step": 172790 }, { "epoch": 0.6679964744630514, "grad_norm": 0.10942581295967102, "learning_rate": 0.002, "loss": 2.3307, "step": 172800 }, { "epoch": 0.6680351316664347, "grad_norm": 0.09561222046613693, "learning_rate": 0.002, "loss": 2.3351, "step": 172810 }, { "epoch": 0.668073788869818, "grad_norm": 0.10102529078722, "learning_rate": 0.002, "loss": 2.3413, "step": 172820 }, { "epoch": 0.6681124460732013, "grad_norm": 0.10421686619520187, "learning_rate": 0.002, "loss": 2.3377, "step": 172830 }, { "epoch": 0.6681511032765846, "grad_norm": 0.11196110397577286, "learning_rate": 0.002, "loss": 2.337, "step": 172840 }, { "epoch": 0.6681897604799678, "grad_norm": 0.10805104672908783, "learning_rate": 0.002, "loss": 2.3235, "step": 172850 }, { "epoch": 0.6682284176833511, "grad_norm": 0.10558205097913742, "learning_rate": 0.002, "loss": 2.3397, "step": 172860 }, { "epoch": 0.6682670748867344, "grad_norm": 0.10156062245368958, "learning_rate": 0.002, "loss": 2.3231, "step": 172870 }, { "epoch": 0.6683057320901177, "grad_norm": 0.09688648581504822, "learning_rate": 0.002, "loss": 2.3437, "step": 172880 }, { "epoch": 0.6683443892935009, "grad_norm": 0.10593561083078384, "learning_rate": 0.002, "loss": 2.3431, "step": 172890 }, { "epoch": 0.6683830464968842, "grad_norm": 0.10871266573667526, "learning_rate": 0.002, "loss": 2.3351, "step": 172900 }, { "epoch": 0.6684217037002675, "grad_norm": 0.0923452377319336, "learning_rate": 0.002, "loss": 2.3564, "step": 172910 }, { "epoch": 0.6684603609036508, "grad_norm": 0.12133041024208069, "learning_rate": 0.002, "loss": 2.3444, "step": 172920 }, { "epoch": 0.668499018107034, "grad_norm": 0.11136896908283234, "learning_rate": 0.002, "loss": 2.3381, "step": 172930 }, { "epoch": 0.6685376753104173, "grad_norm": 0.140518918633461, "learning_rate": 0.002, "loss": 2.3355, "step": 172940 }, { "epoch": 0.6685763325138007, "grad_norm": 0.10117638111114502, "learning_rate": 0.002, "loss": 2.3483, "step": 172950 }, { "epoch": 0.6686149897171839, "grad_norm": 0.09949972480535507, "learning_rate": 0.002, "loss": 2.3387, "step": 172960 }, { "epoch": 0.6686536469205672, "grad_norm": 0.10655559599399567, "learning_rate": 0.002, "loss": 2.343, "step": 172970 }, { "epoch": 0.6686923041239504, "grad_norm": 0.12371232360601425, "learning_rate": 0.002, "loss": 2.3424, "step": 172980 }, { "epoch": 0.6687309613273338, "grad_norm": 0.12231665849685669, "learning_rate": 0.002, "loss": 2.3415, "step": 172990 }, { "epoch": 0.668769618530717, "grad_norm": 0.09841171652078629, "learning_rate": 0.002, "loss": 2.3522, "step": 173000 }, { "epoch": 0.6688082757341003, "grad_norm": 0.11069756746292114, "learning_rate": 0.002, "loss": 2.346, "step": 173010 }, { "epoch": 0.6688469329374835, "grad_norm": 0.10788854956626892, "learning_rate": 0.002, "loss": 2.3386, "step": 173020 }, { "epoch": 0.6688855901408669, "grad_norm": 0.10781175643205643, "learning_rate": 0.002, "loss": 2.3444, "step": 173030 }, { "epoch": 0.6689242473442502, "grad_norm": 0.11309197545051575, "learning_rate": 0.002, "loss": 2.3463, "step": 173040 }, { "epoch": 0.6689629045476334, "grad_norm": 0.09210246801376343, "learning_rate": 0.002, "loss": 2.3469, "step": 173050 }, { "epoch": 0.6690015617510167, "grad_norm": 0.105115607380867, "learning_rate": 0.002, "loss": 2.3483, "step": 173060 }, { "epoch": 0.6690402189543999, "grad_norm": 0.1119607463479042, "learning_rate": 0.002, "loss": 2.3454, "step": 173070 }, { "epoch": 0.6690788761577833, "grad_norm": 0.10662411153316498, "learning_rate": 0.002, "loss": 2.3463, "step": 173080 }, { "epoch": 0.6691175333611665, "grad_norm": 0.0970088467001915, "learning_rate": 0.002, "loss": 2.3369, "step": 173090 }, { "epoch": 0.6691561905645498, "grad_norm": 0.1164088249206543, "learning_rate": 0.002, "loss": 2.3408, "step": 173100 }, { "epoch": 0.669194847767933, "grad_norm": 0.14752018451690674, "learning_rate": 0.002, "loss": 2.3574, "step": 173110 }, { "epoch": 0.6692335049713164, "grad_norm": 0.11193527281284332, "learning_rate": 0.002, "loss": 2.3354, "step": 173120 }, { "epoch": 0.6692721621746996, "grad_norm": 0.21462298929691315, "learning_rate": 0.002, "loss": 2.3478, "step": 173130 }, { "epoch": 0.6693108193780829, "grad_norm": 0.09348655492067337, "learning_rate": 0.002, "loss": 2.3675, "step": 173140 }, { "epoch": 0.6693494765814662, "grad_norm": 0.10245306044816971, "learning_rate": 0.002, "loss": 2.3457, "step": 173150 }, { "epoch": 0.6693881337848495, "grad_norm": 0.10220210999250412, "learning_rate": 0.002, "loss": 2.3492, "step": 173160 }, { "epoch": 0.6694267909882328, "grad_norm": 0.1089777946472168, "learning_rate": 0.002, "loss": 2.3615, "step": 173170 }, { "epoch": 0.669465448191616, "grad_norm": 0.09728941321372986, "learning_rate": 0.002, "loss": 2.333, "step": 173180 }, { "epoch": 0.6695041053949993, "grad_norm": 0.10754229873418808, "learning_rate": 0.002, "loss": 2.3443, "step": 173190 }, { "epoch": 0.6695427625983826, "grad_norm": 0.11171595752239227, "learning_rate": 0.002, "loss": 2.3391, "step": 173200 }, { "epoch": 0.6695814198017659, "grad_norm": 0.0972415879368782, "learning_rate": 0.002, "loss": 2.3364, "step": 173210 }, { "epoch": 0.6696200770051491, "grad_norm": 0.09847305715084076, "learning_rate": 0.002, "loss": 2.3279, "step": 173220 }, { "epoch": 0.6696587342085324, "grad_norm": 0.10625992715358734, "learning_rate": 0.002, "loss": 2.3396, "step": 173230 }, { "epoch": 0.6696973914119156, "grad_norm": 0.09553809463977814, "learning_rate": 0.002, "loss": 2.3386, "step": 173240 }, { "epoch": 0.669736048615299, "grad_norm": 0.10626339167356491, "learning_rate": 0.002, "loss": 2.3337, "step": 173250 }, { "epoch": 0.6697747058186823, "grad_norm": 0.10143054276704788, "learning_rate": 0.002, "loss": 2.3398, "step": 173260 }, { "epoch": 0.6698133630220655, "grad_norm": 0.10228617489337921, "learning_rate": 0.002, "loss": 2.3338, "step": 173270 }, { "epoch": 0.6698520202254488, "grad_norm": 0.12096145004034042, "learning_rate": 0.002, "loss": 2.3408, "step": 173280 }, { "epoch": 0.6698906774288321, "grad_norm": 0.09476984292268753, "learning_rate": 0.002, "loss": 2.3323, "step": 173290 }, { "epoch": 0.6699293346322154, "grad_norm": 0.11676125973463058, "learning_rate": 0.002, "loss": 2.3396, "step": 173300 }, { "epoch": 0.6699679918355986, "grad_norm": 0.09415993094444275, "learning_rate": 0.002, "loss": 2.3351, "step": 173310 }, { "epoch": 0.6700066490389819, "grad_norm": 0.09608684480190277, "learning_rate": 0.002, "loss": 2.3397, "step": 173320 }, { "epoch": 0.6700453062423652, "grad_norm": 0.11301315575838089, "learning_rate": 0.002, "loss": 2.3541, "step": 173330 }, { "epoch": 0.6700839634457485, "grad_norm": 0.10460449755191803, "learning_rate": 0.002, "loss": 2.3352, "step": 173340 }, { "epoch": 0.6701226206491318, "grad_norm": 0.09276396781206131, "learning_rate": 0.002, "loss": 2.3362, "step": 173350 }, { "epoch": 0.670161277852515, "grad_norm": 0.12138626724481583, "learning_rate": 0.002, "loss": 2.3411, "step": 173360 }, { "epoch": 0.6701999350558984, "grad_norm": 0.10992854088544846, "learning_rate": 0.002, "loss": 2.3478, "step": 173370 }, { "epoch": 0.6702385922592816, "grad_norm": 0.0991344228386879, "learning_rate": 0.002, "loss": 2.3302, "step": 173380 }, { "epoch": 0.6702772494626649, "grad_norm": 0.10722635686397552, "learning_rate": 0.002, "loss": 2.3417, "step": 173390 }, { "epoch": 0.6703159066660481, "grad_norm": 0.09343632310628891, "learning_rate": 0.002, "loss": 2.3426, "step": 173400 }, { "epoch": 0.6703545638694314, "grad_norm": 0.12453175336122513, "learning_rate": 0.002, "loss": 2.3263, "step": 173410 }, { "epoch": 0.6703932210728147, "grad_norm": 0.11105192452669144, "learning_rate": 0.002, "loss": 2.3293, "step": 173420 }, { "epoch": 0.670431878276198, "grad_norm": 0.10980962961912155, "learning_rate": 0.002, "loss": 2.3439, "step": 173430 }, { "epoch": 0.6704705354795812, "grad_norm": 0.09669952839612961, "learning_rate": 0.002, "loss": 2.3484, "step": 173440 }, { "epoch": 0.6705091926829645, "grad_norm": 0.11269879341125488, "learning_rate": 0.002, "loss": 2.3303, "step": 173450 }, { "epoch": 0.6705478498863479, "grad_norm": 0.09396583586931229, "learning_rate": 0.002, "loss": 2.3331, "step": 173460 }, { "epoch": 0.6705865070897311, "grad_norm": 0.09842613339424133, "learning_rate": 0.002, "loss": 2.3503, "step": 173470 }, { "epoch": 0.6706251642931144, "grad_norm": 0.09631490707397461, "learning_rate": 0.002, "loss": 2.3605, "step": 173480 }, { "epoch": 0.6706638214964976, "grad_norm": 0.11753473430871964, "learning_rate": 0.002, "loss": 2.3397, "step": 173490 }, { "epoch": 0.670702478699881, "grad_norm": 0.1194978803396225, "learning_rate": 0.002, "loss": 2.355, "step": 173500 }, { "epoch": 0.6707411359032642, "grad_norm": 0.10833332687616348, "learning_rate": 0.002, "loss": 2.3407, "step": 173510 }, { "epoch": 0.6707797931066475, "grad_norm": 0.09782851487398148, "learning_rate": 0.002, "loss": 2.3571, "step": 173520 }, { "epoch": 0.6708184503100307, "grad_norm": 0.10231298208236694, "learning_rate": 0.002, "loss": 2.3434, "step": 173530 }, { "epoch": 0.6708571075134141, "grad_norm": 0.09388585388660431, "learning_rate": 0.002, "loss": 2.3486, "step": 173540 }, { "epoch": 0.6708957647167973, "grad_norm": 0.10473788529634476, "learning_rate": 0.002, "loss": 2.3496, "step": 173550 }, { "epoch": 0.6709344219201806, "grad_norm": 0.09349475800991058, "learning_rate": 0.002, "loss": 2.3535, "step": 173560 }, { "epoch": 0.6709730791235639, "grad_norm": 0.12404922395944595, "learning_rate": 0.002, "loss": 2.336, "step": 173570 }, { "epoch": 0.6710117363269472, "grad_norm": 0.12190348654985428, "learning_rate": 0.002, "loss": 2.3628, "step": 173580 }, { "epoch": 0.6710503935303305, "grad_norm": 0.10820824652910233, "learning_rate": 0.002, "loss": 2.3516, "step": 173590 }, { "epoch": 0.6710890507337137, "grad_norm": 0.11660218983888626, "learning_rate": 0.002, "loss": 2.3314, "step": 173600 }, { "epoch": 0.671127707937097, "grad_norm": 0.09696482867002487, "learning_rate": 0.002, "loss": 2.3416, "step": 173610 }, { "epoch": 0.6711663651404802, "grad_norm": 0.12469976395368576, "learning_rate": 0.002, "loss": 2.3592, "step": 173620 }, { "epoch": 0.6712050223438636, "grad_norm": 0.09914630651473999, "learning_rate": 0.002, "loss": 2.3557, "step": 173630 }, { "epoch": 0.6712436795472468, "grad_norm": 0.11698149144649506, "learning_rate": 0.002, "loss": 2.3429, "step": 173640 }, { "epoch": 0.6712823367506301, "grad_norm": 0.10479546338319778, "learning_rate": 0.002, "loss": 2.3293, "step": 173650 }, { "epoch": 0.6713209939540133, "grad_norm": 0.09075053781270981, "learning_rate": 0.002, "loss": 2.3289, "step": 173660 }, { "epoch": 0.6713596511573967, "grad_norm": 0.1083453819155693, "learning_rate": 0.002, "loss": 2.3525, "step": 173670 }, { "epoch": 0.67139830836078, "grad_norm": 0.24330176413059235, "learning_rate": 0.002, "loss": 2.3278, "step": 173680 }, { "epoch": 0.6714369655641632, "grad_norm": 0.10410798341035843, "learning_rate": 0.002, "loss": 2.3375, "step": 173690 }, { "epoch": 0.6714756227675465, "grad_norm": 0.1088162288069725, "learning_rate": 0.002, "loss": 2.3366, "step": 173700 }, { "epoch": 0.6715142799709298, "grad_norm": 0.09707775712013245, "learning_rate": 0.002, "loss": 2.3378, "step": 173710 }, { "epoch": 0.6715529371743131, "grad_norm": 0.10446558892726898, "learning_rate": 0.002, "loss": 2.3368, "step": 173720 }, { "epoch": 0.6715915943776963, "grad_norm": 0.10093081742525101, "learning_rate": 0.002, "loss": 2.3425, "step": 173730 }, { "epoch": 0.6716302515810796, "grad_norm": 0.08895033597946167, "learning_rate": 0.002, "loss": 2.3488, "step": 173740 }, { "epoch": 0.671668908784463, "grad_norm": 0.11400709301233292, "learning_rate": 0.002, "loss": 2.3452, "step": 173750 }, { "epoch": 0.6717075659878462, "grad_norm": 0.10750409215688705, "learning_rate": 0.002, "loss": 2.3553, "step": 173760 }, { "epoch": 0.6717462231912295, "grad_norm": 0.09366723150014877, "learning_rate": 0.002, "loss": 2.3505, "step": 173770 }, { "epoch": 0.6717848803946127, "grad_norm": 0.11019507795572281, "learning_rate": 0.002, "loss": 2.3447, "step": 173780 }, { "epoch": 0.671823537597996, "grad_norm": 0.10667601972818375, "learning_rate": 0.002, "loss": 2.3306, "step": 173790 }, { "epoch": 0.6718621948013793, "grad_norm": 0.11151214689016342, "learning_rate": 0.002, "loss": 2.3418, "step": 173800 }, { "epoch": 0.6719008520047626, "grad_norm": 0.11602430790662766, "learning_rate": 0.002, "loss": 2.3411, "step": 173810 }, { "epoch": 0.6719395092081458, "grad_norm": 0.08965712040662766, "learning_rate": 0.002, "loss": 2.3348, "step": 173820 }, { "epoch": 0.6719781664115291, "grad_norm": 0.09411187469959259, "learning_rate": 0.002, "loss": 2.3362, "step": 173830 }, { "epoch": 0.6720168236149124, "grad_norm": 0.10649342834949493, "learning_rate": 0.002, "loss": 2.3473, "step": 173840 }, { "epoch": 0.6720554808182957, "grad_norm": 0.10887311398983002, "learning_rate": 0.002, "loss": 2.3369, "step": 173850 }, { "epoch": 0.6720941380216789, "grad_norm": 0.09790990501642227, "learning_rate": 0.002, "loss": 2.333, "step": 173860 }, { "epoch": 0.6721327952250622, "grad_norm": 0.11011838912963867, "learning_rate": 0.002, "loss": 2.342, "step": 173870 }, { "epoch": 0.6721714524284456, "grad_norm": 0.11605843901634216, "learning_rate": 0.002, "loss": 2.3372, "step": 173880 }, { "epoch": 0.6722101096318288, "grad_norm": 0.09449160844087601, "learning_rate": 0.002, "loss": 2.3193, "step": 173890 }, { "epoch": 0.6722487668352121, "grad_norm": 0.10037367790937424, "learning_rate": 0.002, "loss": 2.3235, "step": 173900 }, { "epoch": 0.6722874240385953, "grad_norm": 0.09971857815980911, "learning_rate": 0.002, "loss": 2.3225, "step": 173910 }, { "epoch": 0.6723260812419787, "grad_norm": 0.12663498520851135, "learning_rate": 0.002, "loss": 2.3572, "step": 173920 }, { "epoch": 0.6723647384453619, "grad_norm": 0.10218092054128647, "learning_rate": 0.002, "loss": 2.3512, "step": 173930 }, { "epoch": 0.6724033956487452, "grad_norm": 0.09552987664937973, "learning_rate": 0.002, "loss": 2.3331, "step": 173940 }, { "epoch": 0.6724420528521284, "grad_norm": 0.1135910302400589, "learning_rate": 0.002, "loss": 2.3477, "step": 173950 }, { "epoch": 0.6724807100555118, "grad_norm": 0.09681475907564163, "learning_rate": 0.002, "loss": 2.3297, "step": 173960 }, { "epoch": 0.672519367258895, "grad_norm": 0.10417526960372925, "learning_rate": 0.002, "loss": 2.3486, "step": 173970 }, { "epoch": 0.6725580244622783, "grad_norm": 0.09279265254735947, "learning_rate": 0.002, "loss": 2.3385, "step": 173980 }, { "epoch": 0.6725966816656616, "grad_norm": 0.09781613945960999, "learning_rate": 0.002, "loss": 2.3345, "step": 173990 }, { "epoch": 0.6726353388690448, "grad_norm": 0.11745069175958633, "learning_rate": 0.002, "loss": 2.3603, "step": 174000 }, { "epoch": 0.6726739960724282, "grad_norm": 0.09635234624147415, "learning_rate": 0.002, "loss": 2.3463, "step": 174010 }, { "epoch": 0.6727126532758114, "grad_norm": 0.11234599351882935, "learning_rate": 0.002, "loss": 2.3509, "step": 174020 }, { "epoch": 0.6727513104791947, "grad_norm": 0.09256234019994736, "learning_rate": 0.002, "loss": 2.3566, "step": 174030 }, { "epoch": 0.6727899676825779, "grad_norm": 0.1221209242939949, "learning_rate": 0.002, "loss": 2.345, "step": 174040 }, { "epoch": 0.6728286248859613, "grad_norm": 0.1162743866443634, "learning_rate": 0.002, "loss": 2.3305, "step": 174050 }, { "epoch": 0.6728672820893445, "grad_norm": 0.09238558262586594, "learning_rate": 0.002, "loss": 2.3412, "step": 174060 }, { "epoch": 0.6729059392927278, "grad_norm": 0.1003466248512268, "learning_rate": 0.002, "loss": 2.3534, "step": 174070 }, { "epoch": 0.672944596496111, "grad_norm": 0.11341916769742966, "learning_rate": 0.002, "loss": 2.3428, "step": 174080 }, { "epoch": 0.6729832536994944, "grad_norm": 0.11505437642335892, "learning_rate": 0.002, "loss": 2.3438, "step": 174090 }, { "epoch": 0.6730219109028777, "grad_norm": 0.13397817313671112, "learning_rate": 0.002, "loss": 2.343, "step": 174100 }, { "epoch": 0.6730605681062609, "grad_norm": 0.1059814989566803, "learning_rate": 0.002, "loss": 2.3342, "step": 174110 }, { "epoch": 0.6730992253096442, "grad_norm": 0.11259469389915466, "learning_rate": 0.002, "loss": 2.3513, "step": 174120 }, { "epoch": 0.6731378825130275, "grad_norm": 0.10442819446325302, "learning_rate": 0.002, "loss": 2.3548, "step": 174130 }, { "epoch": 0.6731765397164108, "grad_norm": 0.09504423290491104, "learning_rate": 0.002, "loss": 2.344, "step": 174140 }, { "epoch": 0.673215196919794, "grad_norm": 0.12814587354660034, "learning_rate": 0.002, "loss": 2.3494, "step": 174150 }, { "epoch": 0.6732538541231773, "grad_norm": 0.09581288695335388, "learning_rate": 0.002, "loss": 2.3341, "step": 174160 }, { "epoch": 0.6732925113265605, "grad_norm": 0.09748073667287827, "learning_rate": 0.002, "loss": 2.3558, "step": 174170 }, { "epoch": 0.6733311685299439, "grad_norm": 0.10420890897512436, "learning_rate": 0.002, "loss": 2.3392, "step": 174180 }, { "epoch": 0.6733698257333272, "grad_norm": 0.10162430256605148, "learning_rate": 0.002, "loss": 2.3415, "step": 174190 }, { "epoch": 0.6734084829367104, "grad_norm": 0.10959216952323914, "learning_rate": 0.002, "loss": 2.3283, "step": 174200 }, { "epoch": 0.6734471401400937, "grad_norm": 0.10551372915506363, "learning_rate": 0.002, "loss": 2.3412, "step": 174210 }, { "epoch": 0.673485797343477, "grad_norm": 0.09912195056676865, "learning_rate": 0.002, "loss": 2.3462, "step": 174220 }, { "epoch": 0.6735244545468603, "grad_norm": 0.09719519317150116, "learning_rate": 0.002, "loss": 2.3299, "step": 174230 }, { "epoch": 0.6735631117502435, "grad_norm": 0.09635747969150543, "learning_rate": 0.002, "loss": 2.3334, "step": 174240 }, { "epoch": 0.6736017689536268, "grad_norm": 0.09930815547704697, "learning_rate": 0.002, "loss": 2.3346, "step": 174250 }, { "epoch": 0.6736404261570101, "grad_norm": 0.10854223370552063, "learning_rate": 0.002, "loss": 2.3225, "step": 174260 }, { "epoch": 0.6736790833603934, "grad_norm": 0.141081765294075, "learning_rate": 0.002, "loss": 2.3335, "step": 174270 }, { "epoch": 0.6737177405637766, "grad_norm": 0.09559612721204758, "learning_rate": 0.002, "loss": 2.3405, "step": 174280 }, { "epoch": 0.6737563977671599, "grad_norm": 0.11618802696466446, "learning_rate": 0.002, "loss": 2.3352, "step": 174290 }, { "epoch": 0.6737950549705433, "grad_norm": 0.10047265142202377, "learning_rate": 0.002, "loss": 2.3425, "step": 174300 }, { "epoch": 0.6738337121739265, "grad_norm": 0.11988501995801926, "learning_rate": 0.002, "loss": 2.3326, "step": 174310 }, { "epoch": 0.6738723693773098, "grad_norm": 0.11306623369455338, "learning_rate": 0.002, "loss": 2.3304, "step": 174320 }, { "epoch": 0.673911026580693, "grad_norm": 0.09925418347120285, "learning_rate": 0.002, "loss": 2.3541, "step": 174330 }, { "epoch": 0.6739496837840763, "grad_norm": 0.12130644172430038, "learning_rate": 0.002, "loss": 2.3445, "step": 174340 }, { "epoch": 0.6739883409874596, "grad_norm": 0.14711587131023407, "learning_rate": 0.002, "loss": 2.3288, "step": 174350 }, { "epoch": 0.6740269981908429, "grad_norm": 0.1304912120103836, "learning_rate": 0.002, "loss": 2.3467, "step": 174360 }, { "epoch": 0.6740656553942261, "grad_norm": 0.10425528883934021, "learning_rate": 0.002, "loss": 2.3445, "step": 174370 }, { "epoch": 0.6741043125976094, "grad_norm": 0.10731105506420135, "learning_rate": 0.002, "loss": 2.353, "step": 174380 }, { "epoch": 0.6741429698009928, "grad_norm": 0.09718817472457886, "learning_rate": 0.002, "loss": 2.3427, "step": 174390 }, { "epoch": 0.674181627004376, "grad_norm": 0.14790160953998566, "learning_rate": 0.002, "loss": 2.3332, "step": 174400 }, { "epoch": 0.6742202842077593, "grad_norm": 0.1125716120004654, "learning_rate": 0.002, "loss": 2.3265, "step": 174410 }, { "epoch": 0.6742589414111425, "grad_norm": 0.0974559560418129, "learning_rate": 0.002, "loss": 2.351, "step": 174420 }, { "epoch": 0.6742975986145259, "grad_norm": 0.11191441118717194, "learning_rate": 0.002, "loss": 2.3427, "step": 174430 }, { "epoch": 0.6743362558179091, "grad_norm": 0.11979559808969498, "learning_rate": 0.002, "loss": 2.3434, "step": 174440 }, { "epoch": 0.6743749130212924, "grad_norm": 0.13155923783779144, "learning_rate": 0.002, "loss": 2.3467, "step": 174450 }, { "epoch": 0.6744135702246756, "grad_norm": 0.10687007009983063, "learning_rate": 0.002, "loss": 2.3318, "step": 174460 }, { "epoch": 0.674452227428059, "grad_norm": 0.08892308175563812, "learning_rate": 0.002, "loss": 2.3438, "step": 174470 }, { "epoch": 0.6744908846314422, "grad_norm": 0.10634169727563858, "learning_rate": 0.002, "loss": 2.3551, "step": 174480 }, { "epoch": 0.6745295418348255, "grad_norm": 0.9842195510864258, "learning_rate": 0.002, "loss": 2.3407, "step": 174490 }, { "epoch": 0.6745681990382087, "grad_norm": 0.10620087385177612, "learning_rate": 0.002, "loss": 2.3376, "step": 174500 }, { "epoch": 0.6746068562415921, "grad_norm": 0.10536065697669983, "learning_rate": 0.002, "loss": 2.3566, "step": 174510 }, { "epoch": 0.6746455134449754, "grad_norm": 0.09353658556938171, "learning_rate": 0.002, "loss": 2.3494, "step": 174520 }, { "epoch": 0.6746841706483586, "grad_norm": 0.10854317247867584, "learning_rate": 0.002, "loss": 2.3535, "step": 174530 }, { "epoch": 0.6747228278517419, "grad_norm": 0.11007989197969437, "learning_rate": 0.002, "loss": 2.3309, "step": 174540 }, { "epoch": 0.6747614850551251, "grad_norm": 0.10677378624677658, "learning_rate": 0.002, "loss": 2.351, "step": 174550 }, { "epoch": 0.6748001422585085, "grad_norm": 0.10250411182641983, "learning_rate": 0.002, "loss": 2.3537, "step": 174560 }, { "epoch": 0.6748387994618917, "grad_norm": 0.12104775756597519, "learning_rate": 0.002, "loss": 2.3418, "step": 174570 }, { "epoch": 0.674877456665275, "grad_norm": 0.10298202931880951, "learning_rate": 0.002, "loss": 2.3562, "step": 174580 }, { "epoch": 0.6749161138686582, "grad_norm": 0.15690723061561584, "learning_rate": 0.002, "loss": 2.3456, "step": 174590 }, { "epoch": 0.6749547710720416, "grad_norm": 0.11443834751844406, "learning_rate": 0.002, "loss": 2.3438, "step": 174600 }, { "epoch": 0.6749934282754249, "grad_norm": 0.0889236256480217, "learning_rate": 0.002, "loss": 2.3279, "step": 174610 }, { "epoch": 0.6750320854788081, "grad_norm": 0.10543763637542725, "learning_rate": 0.002, "loss": 2.3428, "step": 174620 }, { "epoch": 0.6750707426821914, "grad_norm": 0.12079984694719315, "learning_rate": 0.002, "loss": 2.3358, "step": 174630 }, { "epoch": 0.6751093998855747, "grad_norm": 0.08959334343671799, "learning_rate": 0.002, "loss": 2.3261, "step": 174640 }, { "epoch": 0.675148057088958, "grad_norm": 0.09968244284391403, "learning_rate": 0.002, "loss": 2.3478, "step": 174650 }, { "epoch": 0.6751867142923412, "grad_norm": 0.09839235991239548, "learning_rate": 0.002, "loss": 2.3487, "step": 174660 }, { "epoch": 0.6752253714957245, "grad_norm": 0.12109307199716568, "learning_rate": 0.002, "loss": 2.341, "step": 174670 }, { "epoch": 0.6752640286991078, "grad_norm": 0.09745091944932938, "learning_rate": 0.002, "loss": 2.3523, "step": 174680 }, { "epoch": 0.6753026859024911, "grad_norm": 0.10645421594381332, "learning_rate": 0.002, "loss": 2.3331, "step": 174690 }, { "epoch": 0.6753413431058743, "grad_norm": 0.10345783829689026, "learning_rate": 0.002, "loss": 2.3423, "step": 174700 }, { "epoch": 0.6753800003092576, "grad_norm": 0.10724533349275589, "learning_rate": 0.002, "loss": 2.3498, "step": 174710 }, { "epoch": 0.6754186575126409, "grad_norm": 0.11676304042339325, "learning_rate": 0.002, "loss": 2.3449, "step": 174720 }, { "epoch": 0.6754573147160242, "grad_norm": 0.09646974503993988, "learning_rate": 0.002, "loss": 2.3373, "step": 174730 }, { "epoch": 0.6754959719194075, "grad_norm": 0.10687783360481262, "learning_rate": 0.002, "loss": 2.3465, "step": 174740 }, { "epoch": 0.6755346291227907, "grad_norm": 0.10278775542974472, "learning_rate": 0.002, "loss": 2.3496, "step": 174750 }, { "epoch": 0.675573286326174, "grad_norm": 0.10462471842765808, "learning_rate": 0.002, "loss": 2.3437, "step": 174760 }, { "epoch": 0.6756119435295573, "grad_norm": 0.11497380584478378, "learning_rate": 0.002, "loss": 2.3514, "step": 174770 }, { "epoch": 0.6756506007329406, "grad_norm": 0.11669895797967911, "learning_rate": 0.002, "loss": 2.346, "step": 174780 }, { "epoch": 0.6756892579363238, "grad_norm": 0.094562828540802, "learning_rate": 0.002, "loss": 2.377, "step": 174790 }, { "epoch": 0.6757279151397071, "grad_norm": 0.1032317727804184, "learning_rate": 0.002, "loss": 2.3329, "step": 174800 }, { "epoch": 0.6757665723430905, "grad_norm": 0.12714195251464844, "learning_rate": 0.002, "loss": 2.3213, "step": 174810 }, { "epoch": 0.6758052295464737, "grad_norm": 0.10985013097524643, "learning_rate": 0.002, "loss": 2.3284, "step": 174820 }, { "epoch": 0.675843886749857, "grad_norm": 0.10180466622114182, "learning_rate": 0.002, "loss": 2.3522, "step": 174830 }, { "epoch": 0.6758825439532402, "grad_norm": 0.1006990447640419, "learning_rate": 0.002, "loss": 2.3414, "step": 174840 }, { "epoch": 0.6759212011566236, "grad_norm": 0.10727022588253021, "learning_rate": 0.002, "loss": 2.3498, "step": 174850 }, { "epoch": 0.6759598583600068, "grad_norm": 0.12462523579597473, "learning_rate": 0.002, "loss": 2.3385, "step": 174860 }, { "epoch": 0.6759985155633901, "grad_norm": 0.09988413751125336, "learning_rate": 0.002, "loss": 2.3552, "step": 174870 }, { "epoch": 0.6760371727667733, "grad_norm": 0.10924515873193741, "learning_rate": 0.002, "loss": 2.3396, "step": 174880 }, { "epoch": 0.6760758299701567, "grad_norm": 0.10243270546197891, "learning_rate": 0.002, "loss": 2.3422, "step": 174890 }, { "epoch": 0.67611448717354, "grad_norm": 0.10781146585941315, "learning_rate": 0.002, "loss": 2.3471, "step": 174900 }, { "epoch": 0.6761531443769232, "grad_norm": 0.10424323379993439, "learning_rate": 0.002, "loss": 2.3145, "step": 174910 }, { "epoch": 0.6761918015803065, "grad_norm": 0.10621855407953262, "learning_rate": 0.002, "loss": 2.3513, "step": 174920 }, { "epoch": 0.6762304587836897, "grad_norm": 0.09816805273294449, "learning_rate": 0.002, "loss": 2.354, "step": 174930 }, { "epoch": 0.6762691159870731, "grad_norm": 0.13953115046024323, "learning_rate": 0.002, "loss": 2.3473, "step": 174940 }, { "epoch": 0.6763077731904563, "grad_norm": 0.14118227362632751, "learning_rate": 0.002, "loss": 2.3314, "step": 174950 }, { "epoch": 0.6763464303938396, "grad_norm": 0.10070734471082687, "learning_rate": 0.002, "loss": 2.3385, "step": 174960 }, { "epoch": 0.6763850875972228, "grad_norm": 0.11258470267057419, "learning_rate": 0.002, "loss": 2.346, "step": 174970 }, { "epoch": 0.6764237448006062, "grad_norm": 0.10367239266633987, "learning_rate": 0.002, "loss": 2.3529, "step": 174980 }, { "epoch": 0.6764624020039894, "grad_norm": 0.09598944336175919, "learning_rate": 0.002, "loss": 2.3324, "step": 174990 }, { "epoch": 0.6765010592073727, "grad_norm": 0.09504383057355881, "learning_rate": 0.002, "loss": 2.3329, "step": 175000 }, { "epoch": 0.6765397164107559, "grad_norm": 0.13171350955963135, "learning_rate": 0.002, "loss": 2.3368, "step": 175010 }, { "epoch": 0.6765783736141393, "grad_norm": 0.09265443682670593, "learning_rate": 0.002, "loss": 2.3482, "step": 175020 }, { "epoch": 0.6766170308175226, "grad_norm": 0.11585959047079086, "learning_rate": 0.002, "loss": 2.3487, "step": 175030 }, { "epoch": 0.6766556880209058, "grad_norm": 0.10820352286100388, "learning_rate": 0.002, "loss": 2.3357, "step": 175040 }, { "epoch": 0.6766943452242891, "grad_norm": 0.09318529069423676, "learning_rate": 0.002, "loss": 2.3547, "step": 175050 }, { "epoch": 0.6767330024276724, "grad_norm": 0.09986617416143417, "learning_rate": 0.002, "loss": 2.3306, "step": 175060 }, { "epoch": 0.6767716596310557, "grad_norm": 0.08849131315946579, "learning_rate": 0.002, "loss": 2.3444, "step": 175070 }, { "epoch": 0.6768103168344389, "grad_norm": 0.12096413224935532, "learning_rate": 0.002, "loss": 2.3481, "step": 175080 }, { "epoch": 0.6768489740378222, "grad_norm": 0.09429614245891571, "learning_rate": 0.002, "loss": 2.3503, "step": 175090 }, { "epoch": 0.6768876312412054, "grad_norm": 0.11105930060148239, "learning_rate": 0.002, "loss": 2.337, "step": 175100 }, { "epoch": 0.6769262884445888, "grad_norm": 0.10252535343170166, "learning_rate": 0.002, "loss": 2.3516, "step": 175110 }, { "epoch": 0.676964945647972, "grad_norm": 0.10260120779275894, "learning_rate": 0.002, "loss": 2.3373, "step": 175120 }, { "epoch": 0.6770036028513553, "grad_norm": 0.11789651960134506, "learning_rate": 0.002, "loss": 2.3534, "step": 175130 }, { "epoch": 0.6770422600547386, "grad_norm": 0.11125901341438293, "learning_rate": 0.002, "loss": 2.3503, "step": 175140 }, { "epoch": 0.6770809172581219, "grad_norm": 0.10644970089197159, "learning_rate": 0.002, "loss": 2.3487, "step": 175150 }, { "epoch": 0.6771195744615052, "grad_norm": 0.12210649251937866, "learning_rate": 0.002, "loss": 2.3522, "step": 175160 }, { "epoch": 0.6771582316648884, "grad_norm": 0.12430441379547119, "learning_rate": 0.002, "loss": 2.3242, "step": 175170 }, { "epoch": 0.6771968888682717, "grad_norm": 0.11322494596242905, "learning_rate": 0.002, "loss": 2.3391, "step": 175180 }, { "epoch": 0.677235546071655, "grad_norm": 0.11685037612915039, "learning_rate": 0.002, "loss": 2.3431, "step": 175190 }, { "epoch": 0.6772742032750383, "grad_norm": 0.09667099267244339, "learning_rate": 0.002, "loss": 2.3484, "step": 175200 }, { "epoch": 0.6773128604784215, "grad_norm": 0.09514366090297699, "learning_rate": 0.002, "loss": 2.3469, "step": 175210 }, { "epoch": 0.6773515176818048, "grad_norm": 0.09986500442028046, "learning_rate": 0.002, "loss": 2.339, "step": 175220 }, { "epoch": 0.6773901748851882, "grad_norm": 0.09903539717197418, "learning_rate": 0.002, "loss": 2.3371, "step": 175230 }, { "epoch": 0.6774288320885714, "grad_norm": 0.11586198955774307, "learning_rate": 0.002, "loss": 2.3381, "step": 175240 }, { "epoch": 0.6774674892919547, "grad_norm": 0.12105626612901688, "learning_rate": 0.002, "loss": 2.3286, "step": 175250 }, { "epoch": 0.6775061464953379, "grad_norm": 0.10017690062522888, "learning_rate": 0.002, "loss": 2.3294, "step": 175260 }, { "epoch": 0.6775448036987212, "grad_norm": 0.10963975638151169, "learning_rate": 0.002, "loss": 2.3139, "step": 175270 }, { "epoch": 0.6775834609021045, "grad_norm": 0.12276821583509445, "learning_rate": 0.002, "loss": 2.3598, "step": 175280 }, { "epoch": 0.6776221181054878, "grad_norm": 0.11214709281921387, "learning_rate": 0.002, "loss": 2.3324, "step": 175290 }, { "epoch": 0.677660775308871, "grad_norm": 0.11168606579303741, "learning_rate": 0.002, "loss": 2.3313, "step": 175300 }, { "epoch": 0.6776994325122543, "grad_norm": 0.10621609538793564, "learning_rate": 0.002, "loss": 2.3527, "step": 175310 }, { "epoch": 0.6777380897156376, "grad_norm": 0.12428240478038788, "learning_rate": 0.002, "loss": 2.3367, "step": 175320 }, { "epoch": 0.6777767469190209, "grad_norm": 0.1008407399058342, "learning_rate": 0.002, "loss": 2.3368, "step": 175330 }, { "epoch": 0.6778154041224042, "grad_norm": 0.10509267449378967, "learning_rate": 0.002, "loss": 2.3389, "step": 175340 }, { "epoch": 0.6778540613257874, "grad_norm": 0.11543798446655273, "learning_rate": 0.002, "loss": 2.3592, "step": 175350 }, { "epoch": 0.6778927185291708, "grad_norm": 0.10814554244279861, "learning_rate": 0.002, "loss": 2.3427, "step": 175360 }, { "epoch": 0.677931375732554, "grad_norm": 0.09828812628984451, "learning_rate": 0.002, "loss": 2.3346, "step": 175370 }, { "epoch": 0.6779700329359373, "grad_norm": 0.09091383218765259, "learning_rate": 0.002, "loss": 2.3478, "step": 175380 }, { "epoch": 0.6780086901393205, "grad_norm": 0.10134921967983246, "learning_rate": 0.002, "loss": 2.3539, "step": 175390 }, { "epoch": 0.6780473473427039, "grad_norm": 0.11470029503107071, "learning_rate": 0.002, "loss": 2.34, "step": 175400 }, { "epoch": 0.6780860045460871, "grad_norm": 0.0986943319439888, "learning_rate": 0.002, "loss": 2.3345, "step": 175410 }, { "epoch": 0.6781246617494704, "grad_norm": 0.11578582972288132, "learning_rate": 0.002, "loss": 2.341, "step": 175420 }, { "epoch": 0.6781633189528536, "grad_norm": 0.11289183795452118, "learning_rate": 0.002, "loss": 2.3351, "step": 175430 }, { "epoch": 0.678201976156237, "grad_norm": 0.10435447096824646, "learning_rate": 0.002, "loss": 2.3463, "step": 175440 }, { "epoch": 0.6782406333596203, "grad_norm": 0.10721666365861893, "learning_rate": 0.002, "loss": 2.339, "step": 175450 }, { "epoch": 0.6782792905630035, "grad_norm": 0.10489135980606079, "learning_rate": 0.002, "loss": 2.3501, "step": 175460 }, { "epoch": 0.6783179477663868, "grad_norm": 0.08868864178657532, "learning_rate": 0.002, "loss": 2.3238, "step": 175470 }, { "epoch": 0.67835660496977, "grad_norm": 0.11689954251050949, "learning_rate": 0.002, "loss": 2.3492, "step": 175480 }, { "epoch": 0.6783952621731534, "grad_norm": 0.11360076814889908, "learning_rate": 0.002, "loss": 2.3485, "step": 175490 }, { "epoch": 0.6784339193765366, "grad_norm": 0.10669480264186859, "learning_rate": 0.002, "loss": 2.3402, "step": 175500 }, { "epoch": 0.6784725765799199, "grad_norm": 0.11861662566661835, "learning_rate": 0.002, "loss": 2.3485, "step": 175510 }, { "epoch": 0.6785112337833031, "grad_norm": 0.10718594491481781, "learning_rate": 0.002, "loss": 2.35, "step": 175520 }, { "epoch": 0.6785498909866865, "grad_norm": 0.0939483791589737, "learning_rate": 0.002, "loss": 2.352, "step": 175530 }, { "epoch": 0.6785885481900698, "grad_norm": 0.12440059334039688, "learning_rate": 0.002, "loss": 2.3476, "step": 175540 }, { "epoch": 0.678627205393453, "grad_norm": 0.09499397873878479, "learning_rate": 0.002, "loss": 2.3354, "step": 175550 }, { "epoch": 0.6786658625968363, "grad_norm": 0.11958306282758713, "learning_rate": 0.002, "loss": 2.3364, "step": 175560 }, { "epoch": 0.6787045198002196, "grad_norm": 0.11625002324581146, "learning_rate": 0.002, "loss": 2.3301, "step": 175570 }, { "epoch": 0.6787431770036029, "grad_norm": 0.10071554034948349, "learning_rate": 0.002, "loss": 2.3444, "step": 175580 }, { "epoch": 0.6787818342069861, "grad_norm": 0.11125433444976807, "learning_rate": 0.002, "loss": 2.3396, "step": 175590 }, { "epoch": 0.6788204914103694, "grad_norm": 0.12130042165517807, "learning_rate": 0.002, "loss": 2.335, "step": 175600 }, { "epoch": 0.6788591486137527, "grad_norm": 0.10506013035774231, "learning_rate": 0.002, "loss": 2.3516, "step": 175610 }, { "epoch": 0.678897805817136, "grad_norm": 0.11317040771245956, "learning_rate": 0.002, "loss": 2.3406, "step": 175620 }, { "epoch": 0.6789364630205192, "grad_norm": 0.09023216366767883, "learning_rate": 0.002, "loss": 2.3436, "step": 175630 }, { "epoch": 0.6789751202239025, "grad_norm": 0.09898082911968231, "learning_rate": 0.002, "loss": 2.3501, "step": 175640 }, { "epoch": 0.6790137774272857, "grad_norm": 0.09514901787042618, "learning_rate": 0.002, "loss": 2.3521, "step": 175650 }, { "epoch": 0.6790524346306691, "grad_norm": 0.10889595001935959, "learning_rate": 0.002, "loss": 2.3432, "step": 175660 }, { "epoch": 0.6790910918340524, "grad_norm": 0.09953156113624573, "learning_rate": 0.002, "loss": 2.3511, "step": 175670 }, { "epoch": 0.6791297490374356, "grad_norm": 0.09541979432106018, "learning_rate": 0.002, "loss": 2.3341, "step": 175680 }, { "epoch": 0.6791684062408189, "grad_norm": 0.09662420302629471, "learning_rate": 0.002, "loss": 2.3373, "step": 175690 }, { "epoch": 0.6792070634442022, "grad_norm": 0.1041659340262413, "learning_rate": 0.002, "loss": 2.3388, "step": 175700 }, { "epoch": 0.6792457206475855, "grad_norm": 0.09401014447212219, "learning_rate": 0.002, "loss": 2.3321, "step": 175710 }, { "epoch": 0.6792843778509687, "grad_norm": 0.11179909110069275, "learning_rate": 0.002, "loss": 2.3347, "step": 175720 }, { "epoch": 0.679323035054352, "grad_norm": 0.11274517327547073, "learning_rate": 0.002, "loss": 2.3446, "step": 175730 }, { "epoch": 0.6793616922577354, "grad_norm": 0.10251244157552719, "learning_rate": 0.002, "loss": 2.3364, "step": 175740 }, { "epoch": 0.6794003494611186, "grad_norm": 0.11762916296720505, "learning_rate": 0.002, "loss": 2.3487, "step": 175750 }, { "epoch": 0.6794390066645019, "grad_norm": 0.10066024959087372, "learning_rate": 0.002, "loss": 2.3398, "step": 175760 }, { "epoch": 0.6794776638678851, "grad_norm": 0.10446237772703171, "learning_rate": 0.002, "loss": 2.3614, "step": 175770 }, { "epoch": 0.6795163210712685, "grad_norm": 0.10105162113904953, "learning_rate": 0.002, "loss": 2.3476, "step": 175780 }, { "epoch": 0.6795549782746517, "grad_norm": 0.11937082558870316, "learning_rate": 0.002, "loss": 2.3451, "step": 175790 }, { "epoch": 0.679593635478035, "grad_norm": 0.1018209308385849, "learning_rate": 0.002, "loss": 2.3463, "step": 175800 }, { "epoch": 0.6796322926814182, "grad_norm": 0.10392194241285324, "learning_rate": 0.002, "loss": 2.3359, "step": 175810 }, { "epoch": 0.6796709498848015, "grad_norm": 0.09500529617071152, "learning_rate": 0.002, "loss": 2.3623, "step": 175820 }, { "epoch": 0.6797096070881848, "grad_norm": 0.10185185074806213, "learning_rate": 0.002, "loss": 2.338, "step": 175830 }, { "epoch": 0.6797482642915681, "grad_norm": 0.10656815022230148, "learning_rate": 0.002, "loss": 2.3354, "step": 175840 }, { "epoch": 0.6797869214949513, "grad_norm": 0.1034289002418518, "learning_rate": 0.002, "loss": 2.3339, "step": 175850 }, { "epoch": 0.6798255786983346, "grad_norm": 0.10846489667892456, "learning_rate": 0.002, "loss": 2.3523, "step": 175860 }, { "epoch": 0.679864235901718, "grad_norm": 0.09887100011110306, "learning_rate": 0.002, "loss": 2.348, "step": 175870 }, { "epoch": 0.6799028931051012, "grad_norm": 0.11460306495428085, "learning_rate": 0.002, "loss": 2.3369, "step": 175880 }, { "epoch": 0.6799415503084845, "grad_norm": 0.12321964651346207, "learning_rate": 0.002, "loss": 2.3378, "step": 175890 }, { "epoch": 0.6799802075118677, "grad_norm": 0.13607902824878693, "learning_rate": 0.002, "loss": 2.3409, "step": 175900 }, { "epoch": 0.6800188647152511, "grad_norm": 0.09586074203252792, "learning_rate": 0.002, "loss": 2.3416, "step": 175910 }, { "epoch": 0.6800575219186343, "grad_norm": 0.10172473639249802, "learning_rate": 0.002, "loss": 2.3508, "step": 175920 }, { "epoch": 0.6800961791220176, "grad_norm": 0.1413239687681198, "learning_rate": 0.002, "loss": 2.3324, "step": 175930 }, { "epoch": 0.6801348363254008, "grad_norm": 0.11910225450992584, "learning_rate": 0.002, "loss": 2.3403, "step": 175940 }, { "epoch": 0.6801734935287842, "grad_norm": 0.09417007863521576, "learning_rate": 0.002, "loss": 2.3387, "step": 175950 }, { "epoch": 0.6802121507321675, "grad_norm": 0.08883190155029297, "learning_rate": 0.002, "loss": 2.3255, "step": 175960 }, { "epoch": 0.6802508079355507, "grad_norm": 0.0998324379324913, "learning_rate": 0.002, "loss": 2.3451, "step": 175970 }, { "epoch": 0.680289465138934, "grad_norm": 0.10993354022502899, "learning_rate": 0.002, "loss": 2.3402, "step": 175980 }, { "epoch": 0.6803281223423173, "grad_norm": 0.09957070648670197, "learning_rate": 0.002, "loss": 2.3293, "step": 175990 }, { "epoch": 0.6803667795457006, "grad_norm": 0.09717411547899246, "learning_rate": 0.002, "loss": 2.3278, "step": 176000 }, { "epoch": 0.6804054367490838, "grad_norm": 0.11479727178812027, "learning_rate": 0.002, "loss": 2.3232, "step": 176010 }, { "epoch": 0.6804440939524671, "grad_norm": 0.11380119621753693, "learning_rate": 0.002, "loss": 2.3298, "step": 176020 }, { "epoch": 0.6804827511558503, "grad_norm": 0.09961897879838943, "learning_rate": 0.002, "loss": 2.3382, "step": 176030 }, { "epoch": 0.6805214083592337, "grad_norm": 0.10932581126689911, "learning_rate": 0.002, "loss": 2.343, "step": 176040 }, { "epoch": 0.680560065562617, "grad_norm": 0.09730560332536697, "learning_rate": 0.002, "loss": 2.3246, "step": 176050 }, { "epoch": 0.6805987227660002, "grad_norm": 0.1072082445025444, "learning_rate": 0.002, "loss": 2.354, "step": 176060 }, { "epoch": 0.6806373799693834, "grad_norm": 0.10722892731428146, "learning_rate": 0.002, "loss": 2.3307, "step": 176070 }, { "epoch": 0.6806760371727668, "grad_norm": 0.11320706456899643, "learning_rate": 0.002, "loss": 2.3448, "step": 176080 }, { "epoch": 0.6807146943761501, "grad_norm": 0.11437489092350006, "learning_rate": 0.002, "loss": 2.3446, "step": 176090 }, { "epoch": 0.6807533515795333, "grad_norm": 0.08987102657556534, "learning_rate": 0.002, "loss": 2.3426, "step": 176100 }, { "epoch": 0.6807920087829166, "grad_norm": 0.12513278424739838, "learning_rate": 0.002, "loss": 2.3526, "step": 176110 }, { "epoch": 0.6808306659862999, "grad_norm": 0.10193182528018951, "learning_rate": 0.002, "loss": 2.351, "step": 176120 }, { "epoch": 0.6808693231896832, "grad_norm": 0.09458781033754349, "learning_rate": 0.002, "loss": 2.3446, "step": 176130 }, { "epoch": 0.6809079803930664, "grad_norm": 0.09140831977128983, "learning_rate": 0.002, "loss": 2.3431, "step": 176140 }, { "epoch": 0.6809466375964497, "grad_norm": 0.09771974384784698, "learning_rate": 0.002, "loss": 2.3516, "step": 176150 }, { "epoch": 0.680985294799833, "grad_norm": 0.09318406134843826, "learning_rate": 0.002, "loss": 2.3374, "step": 176160 }, { "epoch": 0.6810239520032163, "grad_norm": 0.15654128789901733, "learning_rate": 0.002, "loss": 2.3456, "step": 176170 }, { "epoch": 0.6810626092065996, "grad_norm": 0.09128965437412262, "learning_rate": 0.002, "loss": 2.3394, "step": 176180 }, { "epoch": 0.6811012664099828, "grad_norm": 0.11416096240282059, "learning_rate": 0.002, "loss": 2.3473, "step": 176190 }, { "epoch": 0.6811399236133661, "grad_norm": 0.09656243771314621, "learning_rate": 0.002, "loss": 2.3473, "step": 176200 }, { "epoch": 0.6811785808167494, "grad_norm": 0.10527592897415161, "learning_rate": 0.002, "loss": 2.3349, "step": 176210 }, { "epoch": 0.6812172380201327, "grad_norm": 0.11535614728927612, "learning_rate": 0.002, "loss": 2.3599, "step": 176220 }, { "epoch": 0.6812558952235159, "grad_norm": 0.0986952930688858, "learning_rate": 0.002, "loss": 2.3537, "step": 176230 }, { "epoch": 0.6812945524268992, "grad_norm": 0.10911183804273605, "learning_rate": 0.002, "loss": 2.3399, "step": 176240 }, { "epoch": 0.6813332096302825, "grad_norm": 0.10323513299226761, "learning_rate": 0.002, "loss": 2.3433, "step": 176250 }, { "epoch": 0.6813718668336658, "grad_norm": 0.13658122718334198, "learning_rate": 0.002, "loss": 2.3456, "step": 176260 }, { "epoch": 0.681410524037049, "grad_norm": 0.11002452671527863, "learning_rate": 0.002, "loss": 2.3461, "step": 176270 }, { "epoch": 0.6814491812404323, "grad_norm": 0.10110387951135635, "learning_rate": 0.002, "loss": 2.3364, "step": 176280 }, { "epoch": 0.6814878384438157, "grad_norm": 0.11138048022985458, "learning_rate": 0.002, "loss": 2.3547, "step": 176290 }, { "epoch": 0.6815264956471989, "grad_norm": 0.17693695425987244, "learning_rate": 0.002, "loss": 2.35, "step": 176300 }, { "epoch": 0.6815651528505822, "grad_norm": 0.11095073819160461, "learning_rate": 0.002, "loss": 2.3386, "step": 176310 }, { "epoch": 0.6816038100539654, "grad_norm": 1.074438452720642, "learning_rate": 0.002, "loss": 2.3327, "step": 176320 }, { "epoch": 0.6816424672573488, "grad_norm": 0.10732623934745789, "learning_rate": 0.002, "loss": 2.3458, "step": 176330 }, { "epoch": 0.681681124460732, "grad_norm": 0.1113322302699089, "learning_rate": 0.002, "loss": 2.3427, "step": 176340 }, { "epoch": 0.6817197816641153, "grad_norm": 0.11108031868934631, "learning_rate": 0.002, "loss": 2.3344, "step": 176350 }, { "epoch": 0.6817584388674985, "grad_norm": 0.11360624432563782, "learning_rate": 0.002, "loss": 2.3528, "step": 176360 }, { "epoch": 0.6817970960708819, "grad_norm": 0.10377056896686554, "learning_rate": 0.002, "loss": 2.3566, "step": 176370 }, { "epoch": 0.6818357532742652, "grad_norm": 0.15662111341953278, "learning_rate": 0.002, "loss": 2.3361, "step": 176380 }, { "epoch": 0.6818744104776484, "grad_norm": 0.1019727885723114, "learning_rate": 0.002, "loss": 2.3486, "step": 176390 }, { "epoch": 0.6819130676810317, "grad_norm": 0.10392680764198303, "learning_rate": 0.002, "loss": 2.3442, "step": 176400 }, { "epoch": 0.6819517248844149, "grad_norm": 0.13927051424980164, "learning_rate": 0.002, "loss": 2.3384, "step": 176410 }, { "epoch": 0.6819903820877983, "grad_norm": 0.10128787159919739, "learning_rate": 0.002, "loss": 2.3486, "step": 176420 }, { "epoch": 0.6820290392911815, "grad_norm": 0.1053689643740654, "learning_rate": 0.002, "loss": 2.3331, "step": 176430 }, { "epoch": 0.6820676964945648, "grad_norm": 0.10823885351419449, "learning_rate": 0.002, "loss": 2.3377, "step": 176440 }, { "epoch": 0.682106353697948, "grad_norm": 0.11570017784833908, "learning_rate": 0.002, "loss": 2.3419, "step": 176450 }, { "epoch": 0.6821450109013314, "grad_norm": 0.11610689759254456, "learning_rate": 0.002, "loss": 2.3466, "step": 176460 }, { "epoch": 0.6821836681047146, "grad_norm": 0.11022695899009705, "learning_rate": 0.002, "loss": 2.3433, "step": 176470 }, { "epoch": 0.6822223253080979, "grad_norm": 0.1143764927983284, "learning_rate": 0.002, "loss": 2.3387, "step": 176480 }, { "epoch": 0.6822609825114812, "grad_norm": 0.10477987676858902, "learning_rate": 0.002, "loss": 2.3448, "step": 176490 }, { "epoch": 0.6822996397148645, "grad_norm": 0.11330216377973557, "learning_rate": 0.002, "loss": 2.3468, "step": 176500 }, { "epoch": 0.6823382969182478, "grad_norm": 0.09663552045822144, "learning_rate": 0.002, "loss": 2.3395, "step": 176510 }, { "epoch": 0.682376954121631, "grad_norm": 0.1135273277759552, "learning_rate": 0.002, "loss": 2.3485, "step": 176520 }, { "epoch": 0.6824156113250143, "grad_norm": 0.10124694555997849, "learning_rate": 0.002, "loss": 2.3412, "step": 176530 }, { "epoch": 0.6824542685283976, "grad_norm": 0.1054057776927948, "learning_rate": 0.002, "loss": 2.3536, "step": 176540 }, { "epoch": 0.6824929257317809, "grad_norm": 0.12605276703834534, "learning_rate": 0.002, "loss": 2.3347, "step": 176550 }, { "epoch": 0.6825315829351641, "grad_norm": 0.10874178260564804, "learning_rate": 0.002, "loss": 2.3318, "step": 176560 }, { "epoch": 0.6825702401385474, "grad_norm": 0.10447292774915695, "learning_rate": 0.002, "loss": 2.3357, "step": 176570 }, { "epoch": 0.6826088973419306, "grad_norm": 0.11027207970619202, "learning_rate": 0.002, "loss": 2.3409, "step": 176580 }, { "epoch": 0.682647554545314, "grad_norm": 0.10599247366189957, "learning_rate": 0.002, "loss": 2.3558, "step": 176590 }, { "epoch": 0.6826862117486973, "grad_norm": 0.09568461775779724, "learning_rate": 0.002, "loss": 2.338, "step": 176600 }, { "epoch": 0.6827248689520805, "grad_norm": 0.11142941564321518, "learning_rate": 0.002, "loss": 2.3405, "step": 176610 }, { "epoch": 0.6827635261554638, "grad_norm": 0.09197314828634262, "learning_rate": 0.002, "loss": 2.3316, "step": 176620 }, { "epoch": 0.6828021833588471, "grad_norm": 0.09502045065164566, "learning_rate": 0.002, "loss": 2.3546, "step": 176630 }, { "epoch": 0.6828408405622304, "grad_norm": 0.1063617616891861, "learning_rate": 0.002, "loss": 2.351, "step": 176640 }, { "epoch": 0.6828794977656136, "grad_norm": 0.11514590680599213, "learning_rate": 0.002, "loss": 2.3412, "step": 176650 }, { "epoch": 0.6829181549689969, "grad_norm": 0.0956290066242218, "learning_rate": 0.002, "loss": 2.3253, "step": 176660 }, { "epoch": 0.6829568121723802, "grad_norm": 0.10817734152078629, "learning_rate": 0.002, "loss": 2.3354, "step": 176670 }, { "epoch": 0.6829954693757635, "grad_norm": 0.11415576934814453, "learning_rate": 0.002, "loss": 2.3451, "step": 176680 }, { "epoch": 0.6830341265791467, "grad_norm": 0.08466695249080658, "learning_rate": 0.002, "loss": 2.3393, "step": 176690 }, { "epoch": 0.68307278378253, "grad_norm": 0.11186059564352036, "learning_rate": 0.002, "loss": 2.3356, "step": 176700 }, { "epoch": 0.6831114409859134, "grad_norm": 0.08967024832963943, "learning_rate": 0.002, "loss": 2.3258, "step": 176710 }, { "epoch": 0.6831500981892966, "grad_norm": 0.12295638769865036, "learning_rate": 0.002, "loss": 2.3385, "step": 176720 }, { "epoch": 0.6831887553926799, "grad_norm": 0.09820482134819031, "learning_rate": 0.002, "loss": 2.3495, "step": 176730 }, { "epoch": 0.6832274125960631, "grad_norm": 0.10533328354358673, "learning_rate": 0.002, "loss": 2.3432, "step": 176740 }, { "epoch": 0.6832660697994464, "grad_norm": 0.10046335309743881, "learning_rate": 0.002, "loss": 2.3302, "step": 176750 }, { "epoch": 0.6833047270028297, "grad_norm": 0.14013177156448364, "learning_rate": 0.002, "loss": 2.3376, "step": 176760 }, { "epoch": 0.683343384206213, "grad_norm": 0.10282900184392929, "learning_rate": 0.002, "loss": 2.3334, "step": 176770 }, { "epoch": 0.6833820414095962, "grad_norm": 0.11350609362125397, "learning_rate": 0.002, "loss": 2.3376, "step": 176780 }, { "epoch": 0.6834206986129795, "grad_norm": 0.0963103398680687, "learning_rate": 0.002, "loss": 2.3401, "step": 176790 }, { "epoch": 0.6834593558163629, "grad_norm": 0.12058515101671219, "learning_rate": 0.002, "loss": 2.3426, "step": 176800 }, { "epoch": 0.6834980130197461, "grad_norm": 0.12234441190958023, "learning_rate": 0.002, "loss": 2.3348, "step": 176810 }, { "epoch": 0.6835366702231294, "grad_norm": 0.0914405956864357, "learning_rate": 0.002, "loss": 2.3355, "step": 176820 }, { "epoch": 0.6835753274265126, "grad_norm": 0.09852568060159683, "learning_rate": 0.002, "loss": 2.3463, "step": 176830 }, { "epoch": 0.683613984629896, "grad_norm": 0.1033724918961525, "learning_rate": 0.002, "loss": 2.3461, "step": 176840 }, { "epoch": 0.6836526418332792, "grad_norm": 0.10538680106401443, "learning_rate": 0.002, "loss": 2.3294, "step": 176850 }, { "epoch": 0.6836912990366625, "grad_norm": 0.08989083766937256, "learning_rate": 0.002, "loss": 2.347, "step": 176860 }, { "epoch": 0.6837299562400457, "grad_norm": 0.09663797169923782, "learning_rate": 0.002, "loss": 2.3272, "step": 176870 }, { "epoch": 0.6837686134434291, "grad_norm": 0.10179861634969711, "learning_rate": 0.002, "loss": 2.3536, "step": 176880 }, { "epoch": 0.6838072706468123, "grad_norm": 0.09698754549026489, "learning_rate": 0.002, "loss": 2.3425, "step": 176890 }, { "epoch": 0.6838459278501956, "grad_norm": 0.11067965626716614, "learning_rate": 0.002, "loss": 2.3311, "step": 176900 }, { "epoch": 0.6838845850535789, "grad_norm": 0.10127593576908112, "learning_rate": 0.002, "loss": 2.3358, "step": 176910 }, { "epoch": 0.6839232422569622, "grad_norm": 0.10865731537342072, "learning_rate": 0.002, "loss": 2.3459, "step": 176920 }, { "epoch": 0.6839618994603455, "grad_norm": 0.12426352500915527, "learning_rate": 0.002, "loss": 2.338, "step": 176930 }, { "epoch": 0.6840005566637287, "grad_norm": 0.09453696757555008, "learning_rate": 0.002, "loss": 2.3559, "step": 176940 }, { "epoch": 0.684039213867112, "grad_norm": 0.10218081623315811, "learning_rate": 0.002, "loss": 2.3445, "step": 176950 }, { "epoch": 0.6840778710704952, "grad_norm": 0.10880590230226517, "learning_rate": 0.002, "loss": 2.324, "step": 176960 }, { "epoch": 0.6841165282738786, "grad_norm": 0.1122904047369957, "learning_rate": 0.002, "loss": 2.3402, "step": 176970 }, { "epoch": 0.6841551854772618, "grad_norm": 0.11233898252248764, "learning_rate": 0.002, "loss": 2.3387, "step": 176980 }, { "epoch": 0.6841938426806451, "grad_norm": 0.12309885770082474, "learning_rate": 0.002, "loss": 2.3397, "step": 176990 }, { "epoch": 0.6842324998840283, "grad_norm": 0.11137855798006058, "learning_rate": 0.002, "loss": 2.3334, "step": 177000 }, { "epoch": 0.6842711570874117, "grad_norm": 0.10196123272180557, "learning_rate": 0.002, "loss": 2.3247, "step": 177010 }, { "epoch": 0.684309814290795, "grad_norm": 0.1031576544046402, "learning_rate": 0.002, "loss": 2.3474, "step": 177020 }, { "epoch": 0.6843484714941782, "grad_norm": 0.10486750304698944, "learning_rate": 0.002, "loss": 2.3372, "step": 177030 }, { "epoch": 0.6843871286975615, "grad_norm": 0.10431206971406937, "learning_rate": 0.002, "loss": 2.3412, "step": 177040 }, { "epoch": 0.6844257859009448, "grad_norm": 0.10427211970090866, "learning_rate": 0.002, "loss": 2.3368, "step": 177050 }, { "epoch": 0.6844644431043281, "grad_norm": 0.10033883899450302, "learning_rate": 0.002, "loss": 2.3483, "step": 177060 }, { "epoch": 0.6845031003077113, "grad_norm": 0.11335708945989609, "learning_rate": 0.002, "loss": 2.3499, "step": 177070 }, { "epoch": 0.6845417575110946, "grad_norm": 0.10321256518363953, "learning_rate": 0.002, "loss": 2.3543, "step": 177080 }, { "epoch": 0.684580414714478, "grad_norm": 0.11628992855548859, "learning_rate": 0.002, "loss": 2.3429, "step": 177090 }, { "epoch": 0.6846190719178612, "grad_norm": 0.10992211103439331, "learning_rate": 0.002, "loss": 2.347, "step": 177100 }, { "epoch": 0.6846577291212445, "grad_norm": 0.0979648232460022, "learning_rate": 0.002, "loss": 2.3295, "step": 177110 }, { "epoch": 0.6846963863246277, "grad_norm": 0.11584831774234772, "learning_rate": 0.002, "loss": 2.3425, "step": 177120 }, { "epoch": 0.684735043528011, "grad_norm": 0.09908290207386017, "learning_rate": 0.002, "loss": 2.3419, "step": 177130 }, { "epoch": 0.6847737007313943, "grad_norm": 0.1265435367822647, "learning_rate": 0.002, "loss": 2.3607, "step": 177140 }, { "epoch": 0.6848123579347776, "grad_norm": 0.1148119643330574, "learning_rate": 0.002, "loss": 2.3384, "step": 177150 }, { "epoch": 0.6848510151381608, "grad_norm": 0.09815677255392075, "learning_rate": 0.002, "loss": 2.3394, "step": 177160 }, { "epoch": 0.6848896723415441, "grad_norm": 0.09669837355613708, "learning_rate": 0.002, "loss": 2.3394, "step": 177170 }, { "epoch": 0.6849283295449274, "grad_norm": 0.11474636197090149, "learning_rate": 0.002, "loss": 2.3282, "step": 177180 }, { "epoch": 0.6849669867483107, "grad_norm": 0.09963209182024002, "learning_rate": 0.002, "loss": 2.347, "step": 177190 }, { "epoch": 0.6850056439516939, "grad_norm": 0.1148679256439209, "learning_rate": 0.002, "loss": 2.3395, "step": 177200 }, { "epoch": 0.6850443011550772, "grad_norm": 0.09433385729789734, "learning_rate": 0.002, "loss": 2.335, "step": 177210 }, { "epoch": 0.6850829583584606, "grad_norm": 0.13668915629386902, "learning_rate": 0.002, "loss": 2.3602, "step": 177220 }, { "epoch": 0.6851216155618438, "grad_norm": 0.10129014402627945, "learning_rate": 0.002, "loss": 2.3378, "step": 177230 }, { "epoch": 0.6851602727652271, "grad_norm": 0.11268419027328491, "learning_rate": 0.002, "loss": 2.3367, "step": 177240 }, { "epoch": 0.6851989299686103, "grad_norm": 0.11311889439821243, "learning_rate": 0.002, "loss": 2.342, "step": 177250 }, { "epoch": 0.6852375871719937, "grad_norm": 0.09963889420032501, "learning_rate": 0.002, "loss": 2.3308, "step": 177260 }, { "epoch": 0.6852762443753769, "grad_norm": 0.10477506369352341, "learning_rate": 0.002, "loss": 2.3406, "step": 177270 }, { "epoch": 0.6853149015787602, "grad_norm": 0.10674279183149338, "learning_rate": 0.002, "loss": 2.3286, "step": 177280 }, { "epoch": 0.6853535587821434, "grad_norm": 0.11757767200469971, "learning_rate": 0.002, "loss": 2.3488, "step": 177290 }, { "epoch": 0.6853922159855268, "grad_norm": 0.11279809474945068, "learning_rate": 0.002, "loss": 2.3645, "step": 177300 }, { "epoch": 0.68543087318891, "grad_norm": 0.09075847268104553, "learning_rate": 0.002, "loss": 2.3483, "step": 177310 }, { "epoch": 0.6854695303922933, "grad_norm": 0.11798780411481857, "learning_rate": 0.002, "loss": 2.3497, "step": 177320 }, { "epoch": 0.6855081875956766, "grad_norm": 0.1009967178106308, "learning_rate": 0.002, "loss": 2.3433, "step": 177330 }, { "epoch": 0.6855468447990598, "grad_norm": 0.0941571444272995, "learning_rate": 0.002, "loss": 2.3479, "step": 177340 }, { "epoch": 0.6855855020024432, "grad_norm": 0.09473618865013123, "learning_rate": 0.002, "loss": 2.3428, "step": 177350 }, { "epoch": 0.6856241592058264, "grad_norm": 0.10038759559392929, "learning_rate": 0.002, "loss": 2.3468, "step": 177360 }, { "epoch": 0.6856628164092097, "grad_norm": 0.1028994470834732, "learning_rate": 0.002, "loss": 2.3478, "step": 177370 }, { "epoch": 0.6857014736125929, "grad_norm": 0.09498606622219086, "learning_rate": 0.002, "loss": 2.3475, "step": 177380 }, { "epoch": 0.6857401308159763, "grad_norm": 0.0928439348936081, "learning_rate": 0.002, "loss": 2.3261, "step": 177390 }, { "epoch": 0.6857787880193595, "grad_norm": 0.09801946580410004, "learning_rate": 0.002, "loss": 2.3475, "step": 177400 }, { "epoch": 0.6858174452227428, "grad_norm": 0.10568686574697495, "learning_rate": 0.002, "loss": 2.3313, "step": 177410 }, { "epoch": 0.685856102426126, "grad_norm": 0.10233966261148453, "learning_rate": 0.002, "loss": 2.3421, "step": 177420 }, { "epoch": 0.6858947596295094, "grad_norm": 0.142459899187088, "learning_rate": 0.002, "loss": 2.3431, "step": 177430 }, { "epoch": 0.6859334168328927, "grad_norm": 0.1762257218360901, "learning_rate": 0.002, "loss": 2.3466, "step": 177440 }, { "epoch": 0.6859720740362759, "grad_norm": 0.09656956791877747, "learning_rate": 0.002, "loss": 2.339, "step": 177450 }, { "epoch": 0.6860107312396592, "grad_norm": 0.10861390084028244, "learning_rate": 0.002, "loss": 2.3502, "step": 177460 }, { "epoch": 0.6860493884430425, "grad_norm": 0.10502377152442932, "learning_rate": 0.002, "loss": 2.3471, "step": 177470 }, { "epoch": 0.6860880456464258, "grad_norm": 0.09659518301486969, "learning_rate": 0.002, "loss": 2.3458, "step": 177480 }, { "epoch": 0.686126702849809, "grad_norm": 0.11638438701629639, "learning_rate": 0.002, "loss": 2.342, "step": 177490 }, { "epoch": 0.6861653600531923, "grad_norm": 0.1016044095158577, "learning_rate": 0.002, "loss": 2.3567, "step": 177500 }, { "epoch": 0.6862040172565755, "grad_norm": 0.10398321598768234, "learning_rate": 0.002, "loss": 2.3374, "step": 177510 }, { "epoch": 0.6862426744599589, "grad_norm": 0.11729230731725693, "learning_rate": 0.002, "loss": 2.3495, "step": 177520 }, { "epoch": 0.6862813316633422, "grad_norm": 0.10508260130882263, "learning_rate": 0.002, "loss": 2.3389, "step": 177530 }, { "epoch": 0.6863199888667254, "grad_norm": 0.12430533766746521, "learning_rate": 0.002, "loss": 2.3478, "step": 177540 }, { "epoch": 0.6863586460701087, "grad_norm": 0.10194669663906097, "learning_rate": 0.002, "loss": 2.3521, "step": 177550 }, { "epoch": 0.686397303273492, "grad_norm": 0.10797617584466934, "learning_rate": 0.002, "loss": 2.336, "step": 177560 }, { "epoch": 0.6864359604768753, "grad_norm": 0.11274404078722, "learning_rate": 0.002, "loss": 2.3392, "step": 177570 }, { "epoch": 0.6864746176802585, "grad_norm": 0.09633027017116547, "learning_rate": 0.002, "loss": 2.3318, "step": 177580 }, { "epoch": 0.6865132748836418, "grad_norm": 0.11169687658548355, "learning_rate": 0.002, "loss": 2.3361, "step": 177590 }, { "epoch": 0.6865519320870251, "grad_norm": 0.09493947774171829, "learning_rate": 0.002, "loss": 2.3368, "step": 177600 }, { "epoch": 0.6865905892904084, "grad_norm": 0.1339365690946579, "learning_rate": 0.002, "loss": 2.3384, "step": 177610 }, { "epoch": 0.6866292464937916, "grad_norm": 0.09708207100629807, "learning_rate": 0.002, "loss": 2.3619, "step": 177620 }, { "epoch": 0.6866679036971749, "grad_norm": 0.12348034232854843, "learning_rate": 0.002, "loss": 2.3323, "step": 177630 }, { "epoch": 0.6867065609005583, "grad_norm": 0.10543914884328842, "learning_rate": 0.002, "loss": 2.3459, "step": 177640 }, { "epoch": 0.6867452181039415, "grad_norm": 0.09651929885149002, "learning_rate": 0.002, "loss": 2.3409, "step": 177650 }, { "epoch": 0.6867838753073248, "grad_norm": 0.125656396150589, "learning_rate": 0.002, "loss": 2.3345, "step": 177660 }, { "epoch": 0.686822532510708, "grad_norm": 0.11345012485980988, "learning_rate": 0.002, "loss": 2.3336, "step": 177670 }, { "epoch": 0.6868611897140913, "grad_norm": 0.0984870120882988, "learning_rate": 0.002, "loss": 2.3382, "step": 177680 }, { "epoch": 0.6868998469174746, "grad_norm": 0.11127850413322449, "learning_rate": 0.002, "loss": 2.3433, "step": 177690 }, { "epoch": 0.6869385041208579, "grad_norm": 0.1167495921254158, "learning_rate": 0.002, "loss": 2.3529, "step": 177700 }, { "epoch": 0.6869771613242411, "grad_norm": 0.10851190239191055, "learning_rate": 0.002, "loss": 2.3472, "step": 177710 }, { "epoch": 0.6870158185276244, "grad_norm": 0.09147991240024567, "learning_rate": 0.002, "loss": 2.3253, "step": 177720 }, { "epoch": 0.6870544757310078, "grad_norm": 0.10341312736272812, "learning_rate": 0.002, "loss": 2.3561, "step": 177730 }, { "epoch": 0.687093132934391, "grad_norm": 0.09881774336099625, "learning_rate": 0.002, "loss": 2.3379, "step": 177740 }, { "epoch": 0.6871317901377743, "grad_norm": 0.11159349232912064, "learning_rate": 0.002, "loss": 2.3434, "step": 177750 }, { "epoch": 0.6871704473411575, "grad_norm": 0.09615936875343323, "learning_rate": 0.002, "loss": 2.3461, "step": 177760 }, { "epoch": 0.6872091045445409, "grad_norm": 0.1058683916926384, "learning_rate": 0.002, "loss": 2.3192, "step": 177770 }, { "epoch": 0.6872477617479241, "grad_norm": 0.1070360466837883, "learning_rate": 0.002, "loss": 2.3397, "step": 177780 }, { "epoch": 0.6872864189513074, "grad_norm": 0.10910263657569885, "learning_rate": 0.002, "loss": 2.3393, "step": 177790 }, { "epoch": 0.6873250761546906, "grad_norm": 0.10926370322704315, "learning_rate": 0.002, "loss": 2.3438, "step": 177800 }, { "epoch": 0.687363733358074, "grad_norm": 0.09721733629703522, "learning_rate": 0.002, "loss": 2.3329, "step": 177810 }, { "epoch": 0.6874023905614572, "grad_norm": 0.09801986813545227, "learning_rate": 0.002, "loss": 2.3451, "step": 177820 }, { "epoch": 0.6874410477648405, "grad_norm": 0.1177133172750473, "learning_rate": 0.002, "loss": 2.3238, "step": 177830 }, { "epoch": 0.6874797049682237, "grad_norm": 0.09813974052667618, "learning_rate": 0.002, "loss": 2.3411, "step": 177840 }, { "epoch": 0.6875183621716071, "grad_norm": 0.11016160249710083, "learning_rate": 0.002, "loss": 2.3461, "step": 177850 }, { "epoch": 0.6875570193749904, "grad_norm": 0.11785726249217987, "learning_rate": 0.002, "loss": 2.3357, "step": 177860 }, { "epoch": 0.6875956765783736, "grad_norm": 0.10382388532161713, "learning_rate": 0.002, "loss": 2.3346, "step": 177870 }, { "epoch": 0.6876343337817569, "grad_norm": 0.10329584032297134, "learning_rate": 0.002, "loss": 2.338, "step": 177880 }, { "epoch": 0.6876729909851401, "grad_norm": 0.12139497697353363, "learning_rate": 0.002, "loss": 2.3407, "step": 177890 }, { "epoch": 0.6877116481885235, "grad_norm": 0.12352616339921951, "learning_rate": 0.002, "loss": 2.3385, "step": 177900 }, { "epoch": 0.6877503053919067, "grad_norm": 0.0942232683300972, "learning_rate": 0.002, "loss": 2.3277, "step": 177910 }, { "epoch": 0.68778896259529, "grad_norm": 0.10492647439241409, "learning_rate": 0.002, "loss": 2.3483, "step": 177920 }, { "epoch": 0.6878276197986732, "grad_norm": 0.11326676607131958, "learning_rate": 0.002, "loss": 2.3381, "step": 177930 }, { "epoch": 0.6878662770020566, "grad_norm": 0.10235963016748428, "learning_rate": 0.002, "loss": 2.3525, "step": 177940 }, { "epoch": 0.6879049342054399, "grad_norm": 0.1606038212776184, "learning_rate": 0.002, "loss": 2.3422, "step": 177950 }, { "epoch": 0.6879435914088231, "grad_norm": 0.09358200430870056, "learning_rate": 0.002, "loss": 2.3319, "step": 177960 }, { "epoch": 0.6879822486122064, "grad_norm": 0.096051886677742, "learning_rate": 0.002, "loss": 2.3643, "step": 177970 }, { "epoch": 0.6880209058155897, "grad_norm": 0.10265396535396576, "learning_rate": 0.002, "loss": 2.3416, "step": 177980 }, { "epoch": 0.688059563018973, "grad_norm": 0.11899688094854355, "learning_rate": 0.002, "loss": 2.3486, "step": 177990 }, { "epoch": 0.6880982202223562, "grad_norm": 0.10440745949745178, "learning_rate": 0.002, "loss": 2.3372, "step": 178000 }, { "epoch": 0.6881368774257395, "grad_norm": 0.11681189388036728, "learning_rate": 0.002, "loss": 2.3481, "step": 178010 }, { "epoch": 0.6881755346291228, "grad_norm": 0.0977269634604454, "learning_rate": 0.002, "loss": 2.344, "step": 178020 }, { "epoch": 0.6882141918325061, "grad_norm": 0.09904341399669647, "learning_rate": 0.002, "loss": 2.3412, "step": 178030 }, { "epoch": 0.6882528490358893, "grad_norm": 0.12160401046276093, "learning_rate": 0.002, "loss": 2.3347, "step": 178040 }, { "epoch": 0.6882915062392726, "grad_norm": 0.11060669273138046, "learning_rate": 0.002, "loss": 2.3271, "step": 178050 }, { "epoch": 0.6883301634426559, "grad_norm": 0.10494236648082733, "learning_rate": 0.002, "loss": 2.3436, "step": 178060 }, { "epoch": 0.6883688206460392, "grad_norm": 0.11479184031486511, "learning_rate": 0.002, "loss": 2.3315, "step": 178070 }, { "epoch": 0.6884074778494225, "grad_norm": 0.10022313892841339, "learning_rate": 0.002, "loss": 2.3505, "step": 178080 }, { "epoch": 0.6884461350528057, "grad_norm": 0.10921555757522583, "learning_rate": 0.002, "loss": 2.3441, "step": 178090 }, { "epoch": 0.688484792256189, "grad_norm": 0.14623263478279114, "learning_rate": 0.002, "loss": 2.3297, "step": 178100 }, { "epoch": 0.6885234494595723, "grad_norm": 0.10130447149276733, "learning_rate": 0.002, "loss": 2.3415, "step": 178110 }, { "epoch": 0.6885621066629556, "grad_norm": 0.10313340276479721, "learning_rate": 0.002, "loss": 2.318, "step": 178120 }, { "epoch": 0.6886007638663388, "grad_norm": 0.10620785504579544, "learning_rate": 0.002, "loss": 2.355, "step": 178130 }, { "epoch": 0.6886394210697221, "grad_norm": 0.11439868062734604, "learning_rate": 0.002, "loss": 2.3387, "step": 178140 }, { "epoch": 0.6886780782731055, "grad_norm": 0.1117575466632843, "learning_rate": 0.002, "loss": 2.3428, "step": 178150 }, { "epoch": 0.6887167354764887, "grad_norm": 0.1372932493686676, "learning_rate": 0.002, "loss": 2.3258, "step": 178160 }, { "epoch": 0.688755392679872, "grad_norm": 0.11603402346372604, "learning_rate": 0.002, "loss": 2.3281, "step": 178170 }, { "epoch": 0.6887940498832552, "grad_norm": 0.09763218462467194, "learning_rate": 0.002, "loss": 2.3412, "step": 178180 }, { "epoch": 0.6888327070866386, "grad_norm": 0.09390419721603394, "learning_rate": 0.002, "loss": 2.3546, "step": 178190 }, { "epoch": 0.6888713642900218, "grad_norm": 0.09340616315603256, "learning_rate": 0.002, "loss": 2.3494, "step": 178200 }, { "epoch": 0.6889100214934051, "grad_norm": 0.11203593015670776, "learning_rate": 0.002, "loss": 2.3309, "step": 178210 }, { "epoch": 0.6889486786967883, "grad_norm": 0.12373124063014984, "learning_rate": 0.002, "loss": 2.3453, "step": 178220 }, { "epoch": 0.6889873359001717, "grad_norm": 0.09314067661762238, "learning_rate": 0.002, "loss": 2.3502, "step": 178230 }, { "epoch": 0.689025993103555, "grad_norm": 0.10530916601419449, "learning_rate": 0.002, "loss": 2.3312, "step": 178240 }, { "epoch": 0.6890646503069382, "grad_norm": 0.12179020792245865, "learning_rate": 0.002, "loss": 2.3391, "step": 178250 }, { "epoch": 0.6891033075103215, "grad_norm": 0.09535452723503113, "learning_rate": 0.002, "loss": 2.3548, "step": 178260 }, { "epoch": 0.6891419647137047, "grad_norm": 0.09405100345611572, "learning_rate": 0.002, "loss": 2.3466, "step": 178270 }, { "epoch": 0.6891806219170881, "grad_norm": 0.12436164915561676, "learning_rate": 0.002, "loss": 2.3401, "step": 178280 }, { "epoch": 0.6892192791204713, "grad_norm": 0.12237463146448135, "learning_rate": 0.002, "loss": 2.342, "step": 178290 }, { "epoch": 0.6892579363238546, "grad_norm": 0.10916969180107117, "learning_rate": 0.002, "loss": 2.3416, "step": 178300 }, { "epoch": 0.6892965935272378, "grad_norm": 0.11725562810897827, "learning_rate": 0.002, "loss": 2.3502, "step": 178310 }, { "epoch": 0.6893352507306212, "grad_norm": 0.09735278785228729, "learning_rate": 0.002, "loss": 2.3366, "step": 178320 }, { "epoch": 0.6893739079340044, "grad_norm": 0.104241743683815, "learning_rate": 0.002, "loss": 2.3423, "step": 178330 }, { "epoch": 0.6894125651373877, "grad_norm": 0.11518935114145279, "learning_rate": 0.002, "loss": 2.3457, "step": 178340 }, { "epoch": 0.6894512223407709, "grad_norm": 0.09700454771518707, "learning_rate": 0.002, "loss": 2.3431, "step": 178350 }, { "epoch": 0.6894898795441543, "grad_norm": 0.1426231861114502, "learning_rate": 0.002, "loss": 2.3355, "step": 178360 }, { "epoch": 0.6895285367475376, "grad_norm": 0.11000684648752213, "learning_rate": 0.002, "loss": 2.3408, "step": 178370 }, { "epoch": 0.6895671939509208, "grad_norm": 0.12923002243041992, "learning_rate": 0.002, "loss": 2.3323, "step": 178380 }, { "epoch": 0.6896058511543041, "grad_norm": 0.11459232121706009, "learning_rate": 0.002, "loss": 2.3466, "step": 178390 }, { "epoch": 0.6896445083576874, "grad_norm": 0.10787385702133179, "learning_rate": 0.002, "loss": 2.3465, "step": 178400 }, { "epoch": 0.6896831655610707, "grad_norm": 0.10136546939611435, "learning_rate": 0.002, "loss": 2.3436, "step": 178410 }, { "epoch": 0.6897218227644539, "grad_norm": 0.12130826711654663, "learning_rate": 0.002, "loss": 2.3499, "step": 178420 }, { "epoch": 0.6897604799678372, "grad_norm": 0.11021111905574799, "learning_rate": 0.002, "loss": 2.3284, "step": 178430 }, { "epoch": 0.6897991371712204, "grad_norm": 0.11486952006816864, "learning_rate": 0.002, "loss": 2.3478, "step": 178440 }, { "epoch": 0.6898377943746038, "grad_norm": 0.11372072994709015, "learning_rate": 0.002, "loss": 2.3511, "step": 178450 }, { "epoch": 0.689876451577987, "grad_norm": 0.11879274249076843, "learning_rate": 0.002, "loss": 2.3521, "step": 178460 }, { "epoch": 0.6899151087813703, "grad_norm": 0.09907825291156769, "learning_rate": 0.002, "loss": 2.3323, "step": 178470 }, { "epoch": 0.6899537659847536, "grad_norm": 0.11506243795156479, "learning_rate": 0.002, "loss": 2.3421, "step": 178480 }, { "epoch": 0.6899924231881369, "grad_norm": 0.09706560522317886, "learning_rate": 0.002, "loss": 2.3365, "step": 178490 }, { "epoch": 0.6900310803915202, "grad_norm": 0.09016738831996918, "learning_rate": 0.002, "loss": 2.3331, "step": 178500 }, { "epoch": 0.6900697375949034, "grad_norm": 0.14885951578617096, "learning_rate": 0.002, "loss": 2.3405, "step": 178510 }, { "epoch": 0.6901083947982867, "grad_norm": 0.1034003421664238, "learning_rate": 0.002, "loss": 2.3504, "step": 178520 }, { "epoch": 0.69014705200167, "grad_norm": 0.09764369577169418, "learning_rate": 0.002, "loss": 2.3436, "step": 178530 }, { "epoch": 0.6901857092050533, "grad_norm": 0.09645101428031921, "learning_rate": 0.002, "loss": 2.3509, "step": 178540 }, { "epoch": 0.6902243664084365, "grad_norm": 0.11894281208515167, "learning_rate": 0.002, "loss": 2.3482, "step": 178550 }, { "epoch": 0.6902630236118198, "grad_norm": 0.11207878589630127, "learning_rate": 0.002, "loss": 2.3353, "step": 178560 }, { "epoch": 0.6903016808152032, "grad_norm": 0.09871028363704681, "learning_rate": 0.002, "loss": 2.3482, "step": 178570 }, { "epoch": 0.6903403380185864, "grad_norm": 0.1066136583685875, "learning_rate": 0.002, "loss": 2.3431, "step": 178580 }, { "epoch": 0.6903789952219697, "grad_norm": 0.12492348998785019, "learning_rate": 0.002, "loss": 2.3441, "step": 178590 }, { "epoch": 0.6904176524253529, "grad_norm": 0.11499127000570297, "learning_rate": 0.002, "loss": 2.3343, "step": 178600 }, { "epoch": 0.6904563096287362, "grad_norm": 0.11852894723415375, "learning_rate": 0.002, "loss": 2.3425, "step": 178610 }, { "epoch": 0.6904949668321195, "grad_norm": 0.13737890124320984, "learning_rate": 0.002, "loss": 2.3377, "step": 178620 }, { "epoch": 0.6905336240355028, "grad_norm": 0.10862556099891663, "learning_rate": 0.002, "loss": 2.3422, "step": 178630 }, { "epoch": 0.690572281238886, "grad_norm": 0.1020117849111557, "learning_rate": 0.002, "loss": 2.3452, "step": 178640 }, { "epoch": 0.6906109384422693, "grad_norm": 0.12550663948059082, "learning_rate": 0.002, "loss": 2.3397, "step": 178650 }, { "epoch": 0.6906495956456526, "grad_norm": 0.09471151977777481, "learning_rate": 0.002, "loss": 2.3453, "step": 178660 }, { "epoch": 0.6906882528490359, "grad_norm": 0.09872092306613922, "learning_rate": 0.002, "loss": 2.333, "step": 178670 }, { "epoch": 0.6907269100524192, "grad_norm": 0.10408961027860641, "learning_rate": 0.002, "loss": 2.3365, "step": 178680 }, { "epoch": 0.6907655672558024, "grad_norm": 0.1041586622595787, "learning_rate": 0.002, "loss": 2.3505, "step": 178690 }, { "epoch": 0.6908042244591858, "grad_norm": 0.11138633638620377, "learning_rate": 0.002, "loss": 2.3295, "step": 178700 }, { "epoch": 0.690842881662569, "grad_norm": 0.09857725352048874, "learning_rate": 0.002, "loss": 2.3275, "step": 178710 }, { "epoch": 0.6908815388659523, "grad_norm": 0.10884217172861099, "learning_rate": 0.002, "loss": 2.3501, "step": 178720 }, { "epoch": 0.6909201960693355, "grad_norm": 0.1148877963423729, "learning_rate": 0.002, "loss": 2.3404, "step": 178730 }, { "epoch": 0.6909588532727189, "grad_norm": 0.10008355230093002, "learning_rate": 0.002, "loss": 2.334, "step": 178740 }, { "epoch": 0.6909975104761021, "grad_norm": 0.09689811617136002, "learning_rate": 0.002, "loss": 2.3538, "step": 178750 }, { "epoch": 0.6910361676794854, "grad_norm": 0.11208871006965637, "learning_rate": 0.002, "loss": 2.3255, "step": 178760 }, { "epoch": 0.6910748248828686, "grad_norm": 0.11783494800329208, "learning_rate": 0.002, "loss": 2.3382, "step": 178770 }, { "epoch": 0.691113482086252, "grad_norm": 0.10670506209135056, "learning_rate": 0.002, "loss": 2.3312, "step": 178780 }, { "epoch": 0.6911521392896353, "grad_norm": 0.10943397879600525, "learning_rate": 0.002, "loss": 2.3463, "step": 178790 }, { "epoch": 0.6911907964930185, "grad_norm": 0.10499881953001022, "learning_rate": 0.002, "loss": 2.3457, "step": 178800 }, { "epoch": 0.6912294536964018, "grad_norm": 0.10083287954330444, "learning_rate": 0.002, "loss": 2.3441, "step": 178810 }, { "epoch": 0.691268110899785, "grad_norm": 0.10064470022916794, "learning_rate": 0.002, "loss": 2.3344, "step": 178820 }, { "epoch": 0.6913067681031684, "grad_norm": 0.09685681015253067, "learning_rate": 0.002, "loss": 2.341, "step": 178830 }, { "epoch": 0.6913454253065516, "grad_norm": 0.12001106888055801, "learning_rate": 0.002, "loss": 2.3165, "step": 178840 }, { "epoch": 0.6913840825099349, "grad_norm": 0.09279981255531311, "learning_rate": 0.002, "loss": 2.3415, "step": 178850 }, { "epoch": 0.6914227397133181, "grad_norm": 0.09641458094120026, "learning_rate": 0.002, "loss": 2.3543, "step": 178860 }, { "epoch": 0.6914613969167015, "grad_norm": 0.10775553435087204, "learning_rate": 0.002, "loss": 2.3542, "step": 178870 }, { "epoch": 0.6915000541200848, "grad_norm": 0.09094482660293579, "learning_rate": 0.002, "loss": 2.3398, "step": 178880 }, { "epoch": 0.691538711323468, "grad_norm": 0.11673973500728607, "learning_rate": 0.002, "loss": 2.3417, "step": 178890 }, { "epoch": 0.6915773685268513, "grad_norm": 0.12628406286239624, "learning_rate": 0.002, "loss": 2.3409, "step": 178900 }, { "epoch": 0.6916160257302346, "grad_norm": 0.11211872845888138, "learning_rate": 0.002, "loss": 2.345, "step": 178910 }, { "epoch": 0.6916546829336179, "grad_norm": 0.0927414521574974, "learning_rate": 0.002, "loss": 2.3294, "step": 178920 }, { "epoch": 0.6916933401370011, "grad_norm": 0.1253931224346161, "learning_rate": 0.002, "loss": 2.3566, "step": 178930 }, { "epoch": 0.6917319973403844, "grad_norm": 0.09514501690864563, "learning_rate": 0.002, "loss": 2.3407, "step": 178940 }, { "epoch": 0.6917706545437677, "grad_norm": 0.12133972346782684, "learning_rate": 0.002, "loss": 2.36, "step": 178950 }, { "epoch": 0.691809311747151, "grad_norm": 0.10072533041238785, "learning_rate": 0.002, "loss": 2.3441, "step": 178960 }, { "epoch": 0.6918479689505342, "grad_norm": 0.1324368715286255, "learning_rate": 0.002, "loss": 2.3519, "step": 178970 }, { "epoch": 0.6918866261539175, "grad_norm": 0.10226622968912125, "learning_rate": 0.002, "loss": 2.3378, "step": 178980 }, { "epoch": 0.6919252833573007, "grad_norm": 0.09472037851810455, "learning_rate": 0.002, "loss": 2.3507, "step": 178990 }, { "epoch": 0.6919639405606841, "grad_norm": 0.10809981822967529, "learning_rate": 0.002, "loss": 2.3474, "step": 179000 }, { "epoch": 0.6920025977640674, "grad_norm": 0.097027488052845, "learning_rate": 0.002, "loss": 2.3421, "step": 179010 }, { "epoch": 0.6920412549674506, "grad_norm": 0.12362071126699448, "learning_rate": 0.002, "loss": 2.3361, "step": 179020 }, { "epoch": 0.6920799121708339, "grad_norm": 0.0996069461107254, "learning_rate": 0.002, "loss": 2.3258, "step": 179030 }, { "epoch": 0.6921185693742172, "grad_norm": 0.0949220284819603, "learning_rate": 0.002, "loss": 2.3372, "step": 179040 }, { "epoch": 0.6921572265776005, "grad_norm": 0.13637186586856842, "learning_rate": 0.002, "loss": 2.3602, "step": 179050 }, { "epoch": 0.6921958837809837, "grad_norm": 0.10567469894886017, "learning_rate": 0.002, "loss": 2.3533, "step": 179060 }, { "epoch": 0.692234540984367, "grad_norm": 0.10000057518482208, "learning_rate": 0.002, "loss": 2.348, "step": 179070 }, { "epoch": 0.6922731981877503, "grad_norm": 0.13319118320941925, "learning_rate": 0.002, "loss": 2.3466, "step": 179080 }, { "epoch": 0.6923118553911336, "grad_norm": 0.1000233143568039, "learning_rate": 0.002, "loss": 2.3363, "step": 179090 }, { "epoch": 0.6923505125945169, "grad_norm": 0.1102403849363327, "learning_rate": 0.002, "loss": 2.3467, "step": 179100 }, { "epoch": 0.6923891697979001, "grad_norm": 0.10236970335245132, "learning_rate": 0.002, "loss": 2.3285, "step": 179110 }, { "epoch": 0.6924278270012835, "grad_norm": 0.10387825965881348, "learning_rate": 0.002, "loss": 2.3389, "step": 179120 }, { "epoch": 0.6924664842046667, "grad_norm": 0.09711598604917526, "learning_rate": 0.002, "loss": 2.3353, "step": 179130 }, { "epoch": 0.69250514140805, "grad_norm": 0.10135672986507416, "learning_rate": 0.002, "loss": 2.3501, "step": 179140 }, { "epoch": 0.6925437986114332, "grad_norm": 0.10131373256444931, "learning_rate": 0.002, "loss": 2.3357, "step": 179150 }, { "epoch": 0.6925824558148165, "grad_norm": 0.10842801630496979, "learning_rate": 0.002, "loss": 2.3543, "step": 179160 }, { "epoch": 0.6926211130181998, "grad_norm": 0.13098689913749695, "learning_rate": 0.002, "loss": 2.3679, "step": 179170 }, { "epoch": 0.6926597702215831, "grad_norm": 0.09229902923107147, "learning_rate": 0.002, "loss": 2.3226, "step": 179180 }, { "epoch": 0.6926984274249663, "grad_norm": 0.09704066067934036, "learning_rate": 0.002, "loss": 2.3252, "step": 179190 }, { "epoch": 0.6927370846283496, "grad_norm": 0.10814200341701508, "learning_rate": 0.002, "loss": 2.3155, "step": 179200 }, { "epoch": 0.692775741831733, "grad_norm": 0.11031364649534225, "learning_rate": 0.002, "loss": 2.358, "step": 179210 }, { "epoch": 0.6928143990351162, "grad_norm": 0.10160470753908157, "learning_rate": 0.002, "loss": 2.3455, "step": 179220 }, { "epoch": 0.6928530562384995, "grad_norm": 0.1103799045085907, "learning_rate": 0.002, "loss": 2.3339, "step": 179230 }, { "epoch": 0.6928917134418827, "grad_norm": 0.11409421265125275, "learning_rate": 0.002, "loss": 2.3543, "step": 179240 }, { "epoch": 0.6929303706452661, "grad_norm": 0.12143899500370026, "learning_rate": 0.002, "loss": 2.3438, "step": 179250 }, { "epoch": 0.6929690278486493, "grad_norm": 0.10524741560220718, "learning_rate": 0.002, "loss": 2.3272, "step": 179260 }, { "epoch": 0.6930076850520326, "grad_norm": 0.10471028834581375, "learning_rate": 0.002, "loss": 2.3419, "step": 179270 }, { "epoch": 0.6930463422554158, "grad_norm": 0.10988292843103409, "learning_rate": 0.002, "loss": 2.3515, "step": 179280 }, { "epoch": 0.6930849994587992, "grad_norm": 0.0952146053314209, "learning_rate": 0.002, "loss": 2.3367, "step": 179290 }, { "epoch": 0.6931236566621825, "grad_norm": 0.11299467831850052, "learning_rate": 0.002, "loss": 2.3538, "step": 179300 }, { "epoch": 0.6931623138655657, "grad_norm": 0.09987322986125946, "learning_rate": 0.002, "loss": 2.3528, "step": 179310 }, { "epoch": 0.693200971068949, "grad_norm": 0.09584300220012665, "learning_rate": 0.002, "loss": 2.339, "step": 179320 }, { "epoch": 0.6932396282723323, "grad_norm": 0.1155521422624588, "learning_rate": 0.002, "loss": 2.35, "step": 179330 }, { "epoch": 0.6932782854757156, "grad_norm": 0.10797114670276642, "learning_rate": 0.002, "loss": 2.3409, "step": 179340 }, { "epoch": 0.6933169426790988, "grad_norm": 0.12133597582578659, "learning_rate": 0.002, "loss": 2.3372, "step": 179350 }, { "epoch": 0.6933555998824821, "grad_norm": 0.09763203561306, "learning_rate": 0.002, "loss": 2.3479, "step": 179360 }, { "epoch": 0.6933942570858653, "grad_norm": 0.10273556411266327, "learning_rate": 0.002, "loss": 2.3269, "step": 179370 }, { "epoch": 0.6934329142892487, "grad_norm": 0.13102386891841888, "learning_rate": 0.002, "loss": 2.3358, "step": 179380 }, { "epoch": 0.6934715714926319, "grad_norm": 0.10223275423049927, "learning_rate": 0.002, "loss": 2.3457, "step": 179390 }, { "epoch": 0.6935102286960152, "grad_norm": 0.09479638189077377, "learning_rate": 0.002, "loss": 2.3371, "step": 179400 }, { "epoch": 0.6935488858993984, "grad_norm": 0.10030090063810349, "learning_rate": 0.002, "loss": 2.3376, "step": 179410 }, { "epoch": 0.6935875431027818, "grad_norm": 0.10234583169221878, "learning_rate": 0.002, "loss": 2.3565, "step": 179420 }, { "epoch": 0.6936262003061651, "grad_norm": 0.12980692088603973, "learning_rate": 0.002, "loss": 2.343, "step": 179430 }, { "epoch": 0.6936648575095483, "grad_norm": 0.08630619943141937, "learning_rate": 0.002, "loss": 2.3512, "step": 179440 }, { "epoch": 0.6937035147129316, "grad_norm": 0.12653999030590057, "learning_rate": 0.002, "loss": 2.3369, "step": 179450 }, { "epoch": 0.6937421719163149, "grad_norm": 0.0999857485294342, "learning_rate": 0.002, "loss": 2.3424, "step": 179460 }, { "epoch": 0.6937808291196982, "grad_norm": 0.09825851023197174, "learning_rate": 0.002, "loss": 2.3252, "step": 179470 }, { "epoch": 0.6938194863230814, "grad_norm": 0.1375643014907837, "learning_rate": 0.002, "loss": 2.3333, "step": 179480 }, { "epoch": 0.6938581435264647, "grad_norm": 0.11085304617881775, "learning_rate": 0.002, "loss": 2.3459, "step": 179490 }, { "epoch": 0.693896800729848, "grad_norm": 0.09401252865791321, "learning_rate": 0.002, "loss": 2.3482, "step": 179500 }, { "epoch": 0.6939354579332313, "grad_norm": 0.10243132710456848, "learning_rate": 0.002, "loss": 2.3389, "step": 179510 }, { "epoch": 0.6939741151366146, "grad_norm": 0.10913652926683426, "learning_rate": 0.002, "loss": 2.3309, "step": 179520 }, { "epoch": 0.6940127723399978, "grad_norm": 0.14689086377620697, "learning_rate": 0.002, "loss": 2.3392, "step": 179530 }, { "epoch": 0.6940514295433811, "grad_norm": 0.1280713528394699, "learning_rate": 0.002, "loss": 2.3277, "step": 179540 }, { "epoch": 0.6940900867467644, "grad_norm": 0.10417623072862625, "learning_rate": 0.002, "loss": 2.3417, "step": 179550 }, { "epoch": 0.6941287439501477, "grad_norm": 0.09852035343647003, "learning_rate": 0.002, "loss": 2.3449, "step": 179560 }, { "epoch": 0.6941674011535309, "grad_norm": 0.11538185924291611, "learning_rate": 0.002, "loss": 2.316, "step": 179570 }, { "epoch": 0.6942060583569142, "grad_norm": 0.11690109223127365, "learning_rate": 0.002, "loss": 2.3432, "step": 179580 }, { "epoch": 0.6942447155602975, "grad_norm": 0.10348938405513763, "learning_rate": 0.002, "loss": 2.3372, "step": 179590 }, { "epoch": 0.6942833727636808, "grad_norm": 0.1061256155371666, "learning_rate": 0.002, "loss": 2.3422, "step": 179600 }, { "epoch": 0.694322029967064, "grad_norm": 0.09781806915998459, "learning_rate": 0.002, "loss": 2.3313, "step": 179610 }, { "epoch": 0.6943606871704473, "grad_norm": 0.09417387843132019, "learning_rate": 0.002, "loss": 2.3308, "step": 179620 }, { "epoch": 0.6943993443738307, "grad_norm": 0.09854403138160706, "learning_rate": 0.002, "loss": 2.3482, "step": 179630 }, { "epoch": 0.6944380015772139, "grad_norm": 0.10567111521959305, "learning_rate": 0.002, "loss": 2.34, "step": 179640 }, { "epoch": 0.6944766587805972, "grad_norm": 0.10450496524572372, "learning_rate": 0.002, "loss": 2.3321, "step": 179650 }, { "epoch": 0.6945153159839804, "grad_norm": 0.08845242857933044, "learning_rate": 0.002, "loss": 2.3489, "step": 179660 }, { "epoch": 0.6945539731873638, "grad_norm": 0.09574896097183228, "learning_rate": 0.002, "loss": 2.3407, "step": 179670 }, { "epoch": 0.694592630390747, "grad_norm": 0.09121603518724442, "learning_rate": 0.002, "loss": 2.3416, "step": 179680 }, { "epoch": 0.6946312875941303, "grad_norm": 0.10315220803022385, "learning_rate": 0.002, "loss": 2.347, "step": 179690 }, { "epoch": 0.6946699447975135, "grad_norm": 0.10953135043382645, "learning_rate": 0.002, "loss": 2.3438, "step": 179700 }, { "epoch": 0.6947086020008969, "grad_norm": 0.10534404963254929, "learning_rate": 0.002, "loss": 2.3344, "step": 179710 }, { "epoch": 0.6947472592042802, "grad_norm": 0.10050157457590103, "learning_rate": 0.002, "loss": 2.3427, "step": 179720 }, { "epoch": 0.6947859164076634, "grad_norm": 0.0944216325879097, "learning_rate": 0.002, "loss": 2.3399, "step": 179730 }, { "epoch": 0.6948245736110467, "grad_norm": 0.13836099207401276, "learning_rate": 0.002, "loss": 2.3487, "step": 179740 }, { "epoch": 0.6948632308144299, "grad_norm": 0.10789356380701065, "learning_rate": 0.002, "loss": 2.3427, "step": 179750 }, { "epoch": 0.6949018880178133, "grad_norm": 0.11295302212238312, "learning_rate": 0.002, "loss": 2.3294, "step": 179760 }, { "epoch": 0.6949405452211965, "grad_norm": 0.11282958835363388, "learning_rate": 0.002, "loss": 2.355, "step": 179770 }, { "epoch": 0.6949792024245798, "grad_norm": 0.09731011837720871, "learning_rate": 0.002, "loss": 2.3404, "step": 179780 }, { "epoch": 0.695017859627963, "grad_norm": 0.10323044657707214, "learning_rate": 0.002, "loss": 2.3425, "step": 179790 }, { "epoch": 0.6950565168313464, "grad_norm": 0.1089772880077362, "learning_rate": 0.002, "loss": 2.3367, "step": 179800 }, { "epoch": 0.6950951740347296, "grad_norm": 0.11274880915880203, "learning_rate": 0.002, "loss": 2.3388, "step": 179810 }, { "epoch": 0.6951338312381129, "grad_norm": 0.1387518048286438, "learning_rate": 0.002, "loss": 2.3573, "step": 179820 }, { "epoch": 0.6951724884414962, "grad_norm": 0.10620445013046265, "learning_rate": 0.002, "loss": 2.3327, "step": 179830 }, { "epoch": 0.6952111456448795, "grad_norm": 0.10442688316106796, "learning_rate": 0.002, "loss": 2.3344, "step": 179840 }, { "epoch": 0.6952498028482628, "grad_norm": 0.09788144379854202, "learning_rate": 0.002, "loss": 2.3468, "step": 179850 }, { "epoch": 0.695288460051646, "grad_norm": 0.09841413050889969, "learning_rate": 0.002, "loss": 2.3341, "step": 179860 }, { "epoch": 0.6953271172550293, "grad_norm": 0.12703649699687958, "learning_rate": 0.002, "loss": 2.3209, "step": 179870 }, { "epoch": 0.6953657744584126, "grad_norm": 0.1050918772816658, "learning_rate": 0.002, "loss": 2.331, "step": 179880 }, { "epoch": 0.6954044316617959, "grad_norm": 0.10114361345767975, "learning_rate": 0.002, "loss": 2.3434, "step": 179890 }, { "epoch": 0.6954430888651791, "grad_norm": 0.10773694515228271, "learning_rate": 0.002, "loss": 2.3562, "step": 179900 }, { "epoch": 0.6954817460685624, "grad_norm": 0.10803357511758804, "learning_rate": 0.002, "loss": 2.3389, "step": 179910 }, { "epoch": 0.6955204032719456, "grad_norm": 0.0990663468837738, "learning_rate": 0.002, "loss": 2.3349, "step": 179920 }, { "epoch": 0.695559060475329, "grad_norm": 0.08901944756507874, "learning_rate": 0.002, "loss": 2.3426, "step": 179930 }, { "epoch": 0.6955977176787123, "grad_norm": 0.11902262270450592, "learning_rate": 0.002, "loss": 2.3505, "step": 179940 }, { "epoch": 0.6956363748820955, "grad_norm": 0.0996413454413414, "learning_rate": 0.002, "loss": 2.3415, "step": 179950 }, { "epoch": 0.6956750320854788, "grad_norm": 0.11540281772613525, "learning_rate": 0.002, "loss": 2.3353, "step": 179960 }, { "epoch": 0.6957136892888621, "grad_norm": 0.11333900690078735, "learning_rate": 0.002, "loss": 2.328, "step": 179970 }, { "epoch": 0.6957523464922454, "grad_norm": 0.12207765132188797, "learning_rate": 0.002, "loss": 2.3244, "step": 179980 }, { "epoch": 0.6957910036956286, "grad_norm": 0.10058306157588959, "learning_rate": 0.002, "loss": 2.3415, "step": 179990 }, { "epoch": 0.6958296608990119, "grad_norm": 0.10925924777984619, "learning_rate": 0.002, "loss": 2.3543, "step": 180000 }, { "epoch": 0.6958683181023952, "grad_norm": 0.0989830419421196, "learning_rate": 0.002, "loss": 2.3362, "step": 180010 }, { "epoch": 0.6959069753057785, "grad_norm": 0.1015496775507927, "learning_rate": 0.002, "loss": 2.3567, "step": 180020 }, { "epoch": 0.6959456325091617, "grad_norm": 0.2994261085987091, "learning_rate": 0.002, "loss": 2.3378, "step": 180030 }, { "epoch": 0.695984289712545, "grad_norm": 0.11924055218696594, "learning_rate": 0.002, "loss": 2.3488, "step": 180040 }, { "epoch": 0.6960229469159284, "grad_norm": 0.10244481265544891, "learning_rate": 0.002, "loss": 2.3466, "step": 180050 }, { "epoch": 0.6960616041193116, "grad_norm": 0.09961318224668503, "learning_rate": 0.002, "loss": 2.335, "step": 180060 }, { "epoch": 0.6961002613226949, "grad_norm": 0.11061107367277145, "learning_rate": 0.002, "loss": 2.3326, "step": 180070 }, { "epoch": 0.6961389185260781, "grad_norm": 0.10321884602308273, "learning_rate": 0.002, "loss": 2.3464, "step": 180080 }, { "epoch": 0.6961775757294614, "grad_norm": 0.11215357482433319, "learning_rate": 0.002, "loss": 2.342, "step": 180090 }, { "epoch": 0.6962162329328447, "grad_norm": 0.11976996064186096, "learning_rate": 0.002, "loss": 2.3453, "step": 180100 }, { "epoch": 0.696254890136228, "grad_norm": 0.10721524804830551, "learning_rate": 0.002, "loss": 2.3523, "step": 180110 }, { "epoch": 0.6962935473396112, "grad_norm": 0.1116723120212555, "learning_rate": 0.002, "loss": 2.3459, "step": 180120 }, { "epoch": 0.6963322045429945, "grad_norm": 0.22823664546012878, "learning_rate": 0.002, "loss": 2.3345, "step": 180130 }, { "epoch": 0.6963708617463779, "grad_norm": 0.10076408833265305, "learning_rate": 0.002, "loss": 2.345, "step": 180140 }, { "epoch": 0.6964095189497611, "grad_norm": 0.09941043704748154, "learning_rate": 0.002, "loss": 2.3299, "step": 180150 }, { "epoch": 0.6964481761531444, "grad_norm": 0.13223543763160706, "learning_rate": 0.002, "loss": 2.3475, "step": 180160 }, { "epoch": 0.6964868333565276, "grad_norm": 0.10886907577514648, "learning_rate": 0.002, "loss": 2.3363, "step": 180170 }, { "epoch": 0.696525490559911, "grad_norm": 0.1024252250790596, "learning_rate": 0.002, "loss": 2.3462, "step": 180180 }, { "epoch": 0.6965641477632942, "grad_norm": 0.10686971247196198, "learning_rate": 0.002, "loss": 2.3486, "step": 180190 }, { "epoch": 0.6966028049666775, "grad_norm": 0.10215549170970917, "learning_rate": 0.002, "loss": 2.3396, "step": 180200 }, { "epoch": 0.6966414621700607, "grad_norm": 0.10930994153022766, "learning_rate": 0.002, "loss": 2.3392, "step": 180210 }, { "epoch": 0.6966801193734441, "grad_norm": 0.09673038125038147, "learning_rate": 0.002, "loss": 2.3469, "step": 180220 }, { "epoch": 0.6967187765768273, "grad_norm": 0.11928710341453552, "learning_rate": 0.002, "loss": 2.3501, "step": 180230 }, { "epoch": 0.6967574337802106, "grad_norm": 0.1123550608754158, "learning_rate": 0.002, "loss": 2.342, "step": 180240 }, { "epoch": 0.6967960909835939, "grad_norm": 0.08652473986148834, "learning_rate": 0.002, "loss": 2.3448, "step": 180250 }, { "epoch": 0.6968347481869772, "grad_norm": 0.09731192886829376, "learning_rate": 0.002, "loss": 2.3348, "step": 180260 }, { "epoch": 0.6968734053903605, "grad_norm": 0.10636240988969803, "learning_rate": 0.002, "loss": 2.3515, "step": 180270 }, { "epoch": 0.6969120625937437, "grad_norm": 0.10812932252883911, "learning_rate": 0.002, "loss": 2.3452, "step": 180280 }, { "epoch": 0.696950719797127, "grad_norm": 0.10161454230546951, "learning_rate": 0.002, "loss": 2.3646, "step": 180290 }, { "epoch": 0.6969893770005102, "grad_norm": 0.10729075968265533, "learning_rate": 0.002, "loss": 2.3277, "step": 180300 }, { "epoch": 0.6970280342038936, "grad_norm": 0.11432657390832901, "learning_rate": 0.002, "loss": 2.3439, "step": 180310 }, { "epoch": 0.6970666914072768, "grad_norm": 0.09904944151639938, "learning_rate": 0.002, "loss": 2.3511, "step": 180320 }, { "epoch": 0.6971053486106601, "grad_norm": 0.09000122547149658, "learning_rate": 0.002, "loss": 2.351, "step": 180330 }, { "epoch": 0.6971440058140433, "grad_norm": 0.10641942173242569, "learning_rate": 0.002, "loss": 2.3313, "step": 180340 }, { "epoch": 0.6971826630174267, "grad_norm": 0.11361725628376007, "learning_rate": 0.002, "loss": 2.3394, "step": 180350 }, { "epoch": 0.69722132022081, "grad_norm": 0.11069035530090332, "learning_rate": 0.002, "loss": 2.3368, "step": 180360 }, { "epoch": 0.6972599774241932, "grad_norm": 0.10378896445035934, "learning_rate": 0.002, "loss": 2.3366, "step": 180370 }, { "epoch": 0.6972986346275765, "grad_norm": 0.1104269027709961, "learning_rate": 0.002, "loss": 2.3585, "step": 180380 }, { "epoch": 0.6973372918309598, "grad_norm": 0.09437578916549683, "learning_rate": 0.002, "loss": 2.3431, "step": 180390 }, { "epoch": 0.6973759490343431, "grad_norm": 0.11597743630409241, "learning_rate": 0.002, "loss": 2.3394, "step": 180400 }, { "epoch": 0.6974146062377263, "grad_norm": 0.10915557295084, "learning_rate": 0.002, "loss": 2.3528, "step": 180410 }, { "epoch": 0.6974532634411096, "grad_norm": 0.09487058222293854, "learning_rate": 0.002, "loss": 2.3452, "step": 180420 }, { "epoch": 0.697491920644493, "grad_norm": 0.11591614037752151, "learning_rate": 0.002, "loss": 2.3432, "step": 180430 }, { "epoch": 0.6975305778478762, "grad_norm": 0.12642371654510498, "learning_rate": 0.002, "loss": 2.3436, "step": 180440 }, { "epoch": 0.6975692350512595, "grad_norm": 0.10184739530086517, "learning_rate": 0.002, "loss": 2.3509, "step": 180450 }, { "epoch": 0.6976078922546427, "grad_norm": 0.09792407602071762, "learning_rate": 0.002, "loss": 2.3489, "step": 180460 }, { "epoch": 0.697646549458026, "grad_norm": 0.14495137333869934, "learning_rate": 0.002, "loss": 2.3512, "step": 180470 }, { "epoch": 0.6976852066614093, "grad_norm": 0.10353922843933105, "learning_rate": 0.002, "loss": 2.3271, "step": 180480 }, { "epoch": 0.6977238638647926, "grad_norm": 0.09864120930433273, "learning_rate": 0.002, "loss": 2.3349, "step": 180490 }, { "epoch": 0.6977625210681758, "grad_norm": 0.10658890753984451, "learning_rate": 0.002, "loss": 2.359, "step": 180500 }, { "epoch": 0.6978011782715591, "grad_norm": 0.1569095402956009, "learning_rate": 0.002, "loss": 2.35, "step": 180510 }, { "epoch": 0.6978398354749424, "grad_norm": 0.11538206785917282, "learning_rate": 0.002, "loss": 2.348, "step": 180520 }, { "epoch": 0.6978784926783257, "grad_norm": 0.09712937474250793, "learning_rate": 0.002, "loss": 2.3435, "step": 180530 }, { "epoch": 0.6979171498817089, "grad_norm": 0.11353567987680435, "learning_rate": 0.002, "loss": 2.3272, "step": 180540 }, { "epoch": 0.6979558070850922, "grad_norm": 0.09400054067373276, "learning_rate": 0.002, "loss": 2.3394, "step": 180550 }, { "epoch": 0.6979944642884756, "grad_norm": 0.11250081658363342, "learning_rate": 0.002, "loss": 2.3439, "step": 180560 }, { "epoch": 0.6980331214918588, "grad_norm": 0.10020963102579117, "learning_rate": 0.002, "loss": 2.3397, "step": 180570 }, { "epoch": 0.6980717786952421, "grad_norm": 0.21803762018680573, "learning_rate": 0.002, "loss": 2.348, "step": 180580 }, { "epoch": 0.6981104358986253, "grad_norm": 0.11189590394496918, "learning_rate": 0.002, "loss": 2.3279, "step": 180590 }, { "epoch": 0.6981490931020087, "grad_norm": 0.10142678022384644, "learning_rate": 0.002, "loss": 2.3452, "step": 180600 }, { "epoch": 0.6981877503053919, "grad_norm": 0.09760603308677673, "learning_rate": 0.002, "loss": 2.3426, "step": 180610 }, { "epoch": 0.6982264075087752, "grad_norm": 0.10050047934055328, "learning_rate": 0.002, "loss": 2.3501, "step": 180620 }, { "epoch": 0.6982650647121584, "grad_norm": 0.11918167769908905, "learning_rate": 0.002, "loss": 2.3436, "step": 180630 }, { "epoch": 0.6983037219155418, "grad_norm": 0.11384829878807068, "learning_rate": 0.002, "loss": 2.3415, "step": 180640 }, { "epoch": 0.698342379118925, "grad_norm": 0.10176576673984528, "learning_rate": 0.002, "loss": 2.3373, "step": 180650 }, { "epoch": 0.6983810363223083, "grad_norm": 0.10738183557987213, "learning_rate": 0.002, "loss": 2.3396, "step": 180660 }, { "epoch": 0.6984196935256916, "grad_norm": 0.10513759404420853, "learning_rate": 0.002, "loss": 2.3376, "step": 180670 }, { "epoch": 0.6984583507290748, "grad_norm": 0.09060550481081009, "learning_rate": 0.002, "loss": 2.3509, "step": 180680 }, { "epoch": 0.6984970079324582, "grad_norm": 0.09310439974069595, "learning_rate": 0.002, "loss": 2.3405, "step": 180690 }, { "epoch": 0.6985356651358414, "grad_norm": 0.10888612270355225, "learning_rate": 0.002, "loss": 2.3586, "step": 180700 }, { "epoch": 0.6985743223392247, "grad_norm": 0.116920605301857, "learning_rate": 0.002, "loss": 2.3447, "step": 180710 }, { "epoch": 0.6986129795426079, "grad_norm": 0.12050127238035202, "learning_rate": 0.002, "loss": 2.3474, "step": 180720 }, { "epoch": 0.6986516367459913, "grad_norm": 0.08785030990839005, "learning_rate": 0.002, "loss": 2.3383, "step": 180730 }, { "epoch": 0.6986902939493745, "grad_norm": 0.11219271272420883, "learning_rate": 0.002, "loss": 2.3359, "step": 180740 }, { "epoch": 0.6987289511527578, "grad_norm": 0.10325204581022263, "learning_rate": 0.002, "loss": 2.3511, "step": 180750 }, { "epoch": 0.698767608356141, "grad_norm": 0.10277462750673294, "learning_rate": 0.002, "loss": 2.3436, "step": 180760 }, { "epoch": 0.6988062655595244, "grad_norm": 0.11527806520462036, "learning_rate": 0.002, "loss": 2.3347, "step": 180770 }, { "epoch": 0.6988449227629077, "grad_norm": 0.10413837432861328, "learning_rate": 0.002, "loss": 2.3305, "step": 180780 }, { "epoch": 0.6988835799662909, "grad_norm": 0.13020369410514832, "learning_rate": 0.002, "loss": 2.3253, "step": 180790 }, { "epoch": 0.6989222371696742, "grad_norm": 0.11335482448339462, "learning_rate": 0.002, "loss": 2.3411, "step": 180800 }, { "epoch": 0.6989608943730575, "grad_norm": 0.11713720858097076, "learning_rate": 0.002, "loss": 2.3276, "step": 180810 }, { "epoch": 0.6989995515764408, "grad_norm": 0.095161572098732, "learning_rate": 0.002, "loss": 2.3439, "step": 180820 }, { "epoch": 0.699038208779824, "grad_norm": 0.10159959644079208, "learning_rate": 0.002, "loss": 2.3361, "step": 180830 }, { "epoch": 0.6990768659832073, "grad_norm": 0.10313326120376587, "learning_rate": 0.002, "loss": 2.3312, "step": 180840 }, { "epoch": 0.6991155231865905, "grad_norm": 0.10190504044294357, "learning_rate": 0.002, "loss": 2.3414, "step": 180850 }, { "epoch": 0.6991541803899739, "grad_norm": 0.11342480778694153, "learning_rate": 0.002, "loss": 2.3388, "step": 180860 }, { "epoch": 0.6991928375933572, "grad_norm": 0.10530469566583633, "learning_rate": 0.002, "loss": 2.3439, "step": 180870 }, { "epoch": 0.6992314947967404, "grad_norm": 0.10882703214883804, "learning_rate": 0.002, "loss": 2.3297, "step": 180880 }, { "epoch": 0.6992701520001237, "grad_norm": 0.10685546696186066, "learning_rate": 0.002, "loss": 2.3452, "step": 180890 }, { "epoch": 0.699308809203507, "grad_norm": 0.10505948960781097, "learning_rate": 0.002, "loss": 2.3296, "step": 180900 }, { "epoch": 0.6993474664068903, "grad_norm": 0.1125546246767044, "learning_rate": 0.002, "loss": 2.3435, "step": 180910 }, { "epoch": 0.6993861236102735, "grad_norm": 0.0941130518913269, "learning_rate": 0.002, "loss": 2.3452, "step": 180920 }, { "epoch": 0.6994247808136568, "grad_norm": 0.10644858330488205, "learning_rate": 0.002, "loss": 2.3287, "step": 180930 }, { "epoch": 0.6994634380170401, "grad_norm": 0.10426277667284012, "learning_rate": 0.002, "loss": 2.3478, "step": 180940 }, { "epoch": 0.6995020952204234, "grad_norm": 0.09964601695537567, "learning_rate": 0.002, "loss": 2.3384, "step": 180950 }, { "epoch": 0.6995407524238066, "grad_norm": 0.09501111507415771, "learning_rate": 0.002, "loss": 2.3317, "step": 180960 }, { "epoch": 0.6995794096271899, "grad_norm": 0.1172516793012619, "learning_rate": 0.002, "loss": 2.3403, "step": 180970 }, { "epoch": 0.6996180668305733, "grad_norm": 0.11361086368560791, "learning_rate": 0.002, "loss": 2.3409, "step": 180980 }, { "epoch": 0.6996567240339565, "grad_norm": 0.11219489574432373, "learning_rate": 0.002, "loss": 2.3384, "step": 180990 }, { "epoch": 0.6996953812373398, "grad_norm": 0.10707321017980576, "learning_rate": 0.002, "loss": 2.3485, "step": 181000 }, { "epoch": 0.699734038440723, "grad_norm": 0.09972088038921356, "learning_rate": 0.002, "loss": 2.3409, "step": 181010 }, { "epoch": 0.6997726956441063, "grad_norm": 0.13037872314453125, "learning_rate": 0.002, "loss": 2.3375, "step": 181020 }, { "epoch": 0.6998113528474896, "grad_norm": 0.0949321910738945, "learning_rate": 0.002, "loss": 2.324, "step": 181030 }, { "epoch": 0.6998500100508729, "grad_norm": 0.09620226174592972, "learning_rate": 0.002, "loss": 2.3339, "step": 181040 }, { "epoch": 0.6998886672542561, "grad_norm": 0.11894714832305908, "learning_rate": 0.002, "loss": 2.3388, "step": 181050 }, { "epoch": 0.6999273244576394, "grad_norm": 0.10436682403087616, "learning_rate": 0.002, "loss": 2.3389, "step": 181060 }, { "epoch": 0.6999659816610228, "grad_norm": 0.08825159817934036, "learning_rate": 0.002, "loss": 2.3306, "step": 181070 }, { "epoch": 0.700004638864406, "grad_norm": 0.11684508621692657, "learning_rate": 0.002, "loss": 2.3328, "step": 181080 }, { "epoch": 0.7000432960677893, "grad_norm": 0.08650648593902588, "learning_rate": 0.002, "loss": 2.3491, "step": 181090 }, { "epoch": 0.7000819532711725, "grad_norm": 0.10749147087335587, "learning_rate": 0.002, "loss": 2.3378, "step": 181100 }, { "epoch": 0.7001206104745559, "grad_norm": 0.10667852312326431, "learning_rate": 0.002, "loss": 2.3532, "step": 181110 }, { "epoch": 0.7001592676779391, "grad_norm": 0.1255321353673935, "learning_rate": 0.002, "loss": 2.3398, "step": 181120 }, { "epoch": 0.7001979248813224, "grad_norm": 0.09984520822763443, "learning_rate": 0.002, "loss": 2.3482, "step": 181130 }, { "epoch": 0.7002365820847056, "grad_norm": 0.12421584129333496, "learning_rate": 0.002, "loss": 2.3317, "step": 181140 }, { "epoch": 0.700275239288089, "grad_norm": 0.11375346034765244, "learning_rate": 0.002, "loss": 2.3371, "step": 181150 }, { "epoch": 0.7003138964914722, "grad_norm": 0.09819741547107697, "learning_rate": 0.002, "loss": 2.3459, "step": 181160 }, { "epoch": 0.7003525536948555, "grad_norm": 0.10152056068181992, "learning_rate": 0.002, "loss": 2.3477, "step": 181170 }, { "epoch": 0.7003912108982387, "grad_norm": 0.100888691842556, "learning_rate": 0.002, "loss": 2.3573, "step": 181180 }, { "epoch": 0.7004298681016221, "grad_norm": 0.1108546033501625, "learning_rate": 0.002, "loss": 2.3403, "step": 181190 }, { "epoch": 0.7004685253050054, "grad_norm": 0.10671539604663849, "learning_rate": 0.002, "loss": 2.3378, "step": 181200 }, { "epoch": 0.7005071825083886, "grad_norm": 0.09765645116567612, "learning_rate": 0.002, "loss": 2.3504, "step": 181210 }, { "epoch": 0.7005458397117719, "grad_norm": 0.11254343390464783, "learning_rate": 0.002, "loss": 2.3295, "step": 181220 }, { "epoch": 0.7005844969151551, "grad_norm": 0.1040448248386383, "learning_rate": 0.002, "loss": 2.3557, "step": 181230 }, { "epoch": 0.7006231541185385, "grad_norm": 0.11068741232156754, "learning_rate": 0.002, "loss": 2.3311, "step": 181240 }, { "epoch": 0.7006618113219217, "grad_norm": 0.12011968344449997, "learning_rate": 0.002, "loss": 2.3475, "step": 181250 }, { "epoch": 0.700700468525305, "grad_norm": 0.09599734842777252, "learning_rate": 0.002, "loss": 2.3351, "step": 181260 }, { "epoch": 0.7007391257286882, "grad_norm": 0.11694858968257904, "learning_rate": 0.002, "loss": 2.3509, "step": 181270 }, { "epoch": 0.7007777829320716, "grad_norm": 0.09414356201887131, "learning_rate": 0.002, "loss": 2.3523, "step": 181280 }, { "epoch": 0.7008164401354549, "grad_norm": 0.12005293369293213, "learning_rate": 0.002, "loss": 2.3564, "step": 181290 }, { "epoch": 0.7008550973388381, "grad_norm": 0.10294341295957565, "learning_rate": 0.002, "loss": 2.3479, "step": 181300 }, { "epoch": 0.7008937545422214, "grad_norm": 0.10715653002262115, "learning_rate": 0.002, "loss": 2.3377, "step": 181310 }, { "epoch": 0.7009324117456047, "grad_norm": 0.11320353299379349, "learning_rate": 0.002, "loss": 2.3324, "step": 181320 }, { "epoch": 0.700971068948988, "grad_norm": 0.13430100679397583, "learning_rate": 0.002, "loss": 2.3496, "step": 181330 }, { "epoch": 0.7010097261523712, "grad_norm": 0.125883087515831, "learning_rate": 0.002, "loss": 2.3326, "step": 181340 }, { "epoch": 0.7010483833557545, "grad_norm": 0.10331200808286667, "learning_rate": 0.002, "loss": 2.3438, "step": 181350 }, { "epoch": 0.7010870405591378, "grad_norm": 0.11117926239967346, "learning_rate": 0.002, "loss": 2.3366, "step": 181360 }, { "epoch": 0.7011256977625211, "grad_norm": 0.1090509444475174, "learning_rate": 0.002, "loss": 2.3336, "step": 181370 }, { "epoch": 0.7011643549659043, "grad_norm": 0.12386742979288101, "learning_rate": 0.002, "loss": 2.3268, "step": 181380 }, { "epoch": 0.7012030121692876, "grad_norm": 0.10557752847671509, "learning_rate": 0.002, "loss": 2.3423, "step": 181390 }, { "epoch": 0.7012416693726709, "grad_norm": 0.10935811698436737, "learning_rate": 0.002, "loss": 2.3391, "step": 181400 }, { "epoch": 0.7012803265760542, "grad_norm": 0.1020449548959732, "learning_rate": 0.002, "loss": 2.3487, "step": 181410 }, { "epoch": 0.7013189837794375, "grad_norm": 0.09770620614290237, "learning_rate": 0.002, "loss": 2.3294, "step": 181420 }, { "epoch": 0.7013576409828207, "grad_norm": 0.13287243247032166, "learning_rate": 0.002, "loss": 2.349, "step": 181430 }, { "epoch": 0.701396298186204, "grad_norm": 0.10809620469808578, "learning_rate": 0.002, "loss": 2.3366, "step": 181440 }, { "epoch": 0.7014349553895873, "grad_norm": 0.09670908004045486, "learning_rate": 0.002, "loss": 2.3405, "step": 181450 }, { "epoch": 0.7014736125929706, "grad_norm": 0.10117173194885254, "learning_rate": 0.002, "loss": 2.3363, "step": 181460 }, { "epoch": 0.7015122697963538, "grad_norm": 0.10399699956178665, "learning_rate": 0.002, "loss": 2.3396, "step": 181470 }, { "epoch": 0.7015509269997371, "grad_norm": 0.09978353977203369, "learning_rate": 0.002, "loss": 2.3284, "step": 181480 }, { "epoch": 0.7015895842031205, "grad_norm": 0.10881523787975311, "learning_rate": 0.002, "loss": 2.3472, "step": 181490 }, { "epoch": 0.7016282414065037, "grad_norm": 0.10354939848184586, "learning_rate": 0.002, "loss": 2.3435, "step": 181500 }, { "epoch": 0.701666898609887, "grad_norm": 0.11343428492546082, "learning_rate": 0.002, "loss": 2.3329, "step": 181510 }, { "epoch": 0.7017055558132702, "grad_norm": 0.10757460445165634, "learning_rate": 0.002, "loss": 2.3102, "step": 181520 }, { "epoch": 0.7017442130166536, "grad_norm": 0.11501331627368927, "learning_rate": 0.002, "loss": 2.3492, "step": 181530 }, { "epoch": 0.7017828702200368, "grad_norm": 0.09403284639120102, "learning_rate": 0.002, "loss": 2.3278, "step": 181540 }, { "epoch": 0.7018215274234201, "grad_norm": 0.11164639890193939, "learning_rate": 0.002, "loss": 2.3376, "step": 181550 }, { "epoch": 0.7018601846268033, "grad_norm": 0.09477049857378006, "learning_rate": 0.002, "loss": 2.338, "step": 181560 }, { "epoch": 0.7018988418301866, "grad_norm": 0.1289137452840805, "learning_rate": 0.002, "loss": 2.3314, "step": 181570 }, { "epoch": 0.70193749903357, "grad_norm": 0.1001296266913414, "learning_rate": 0.002, "loss": 2.3376, "step": 181580 }, { "epoch": 0.7019761562369532, "grad_norm": 0.10789035260677338, "learning_rate": 0.002, "loss": 2.3304, "step": 181590 }, { "epoch": 0.7020148134403364, "grad_norm": 0.11146128922700882, "learning_rate": 0.002, "loss": 2.351, "step": 181600 }, { "epoch": 0.7020534706437197, "grad_norm": 0.11408261954784393, "learning_rate": 0.002, "loss": 2.347, "step": 181610 }, { "epoch": 0.7020921278471031, "grad_norm": 0.13551244139671326, "learning_rate": 0.002, "loss": 2.3419, "step": 181620 }, { "epoch": 0.7021307850504863, "grad_norm": 0.0898752361536026, "learning_rate": 0.002, "loss": 2.3275, "step": 181630 }, { "epoch": 0.7021694422538696, "grad_norm": 0.09439538419246674, "learning_rate": 0.002, "loss": 2.3608, "step": 181640 }, { "epoch": 0.7022080994572528, "grad_norm": 0.11113714426755905, "learning_rate": 0.002, "loss": 2.3501, "step": 181650 }, { "epoch": 0.7022467566606362, "grad_norm": 0.09541790932416916, "learning_rate": 0.002, "loss": 2.3406, "step": 181660 }, { "epoch": 0.7022854138640194, "grad_norm": 0.10134763270616531, "learning_rate": 0.002, "loss": 2.3473, "step": 181670 }, { "epoch": 0.7023240710674027, "grad_norm": 0.1164916381239891, "learning_rate": 0.002, "loss": 2.3436, "step": 181680 }, { "epoch": 0.7023627282707859, "grad_norm": 0.12147685140371323, "learning_rate": 0.002, "loss": 2.3474, "step": 181690 }, { "epoch": 0.7024013854741693, "grad_norm": 0.11087074130773544, "learning_rate": 0.002, "loss": 2.3399, "step": 181700 }, { "epoch": 0.7024400426775526, "grad_norm": 0.09606213122606277, "learning_rate": 0.002, "loss": 2.3357, "step": 181710 }, { "epoch": 0.7024786998809358, "grad_norm": 0.11690132319927216, "learning_rate": 0.002, "loss": 2.341, "step": 181720 }, { "epoch": 0.7025173570843191, "grad_norm": 0.11286009103059769, "learning_rate": 0.002, "loss": 2.3529, "step": 181730 }, { "epoch": 0.7025560142877024, "grad_norm": 0.10286867618560791, "learning_rate": 0.002, "loss": 2.342, "step": 181740 }, { "epoch": 0.7025946714910857, "grad_norm": 0.11863042414188385, "learning_rate": 0.002, "loss": 2.3594, "step": 181750 }, { "epoch": 0.7026333286944689, "grad_norm": 0.10015347599983215, "learning_rate": 0.002, "loss": 2.3382, "step": 181760 }, { "epoch": 0.7026719858978522, "grad_norm": 0.11568449437618256, "learning_rate": 0.002, "loss": 2.3559, "step": 181770 }, { "epoch": 0.7027106431012354, "grad_norm": 0.10303530097007751, "learning_rate": 0.002, "loss": 2.3479, "step": 181780 }, { "epoch": 0.7027493003046188, "grad_norm": 0.09969471395015717, "learning_rate": 0.002, "loss": 2.3217, "step": 181790 }, { "epoch": 0.702787957508002, "grad_norm": 0.09912727028131485, "learning_rate": 0.002, "loss": 2.3352, "step": 181800 }, { "epoch": 0.7028266147113853, "grad_norm": 0.10416741669178009, "learning_rate": 0.002, "loss": 2.3471, "step": 181810 }, { "epoch": 0.7028652719147686, "grad_norm": 0.10566399991512299, "learning_rate": 0.002, "loss": 2.3422, "step": 181820 }, { "epoch": 0.7029039291181519, "grad_norm": 0.10490906238555908, "learning_rate": 0.002, "loss": 2.3413, "step": 181830 }, { "epoch": 0.7029425863215352, "grad_norm": 0.08886059373617172, "learning_rate": 0.002, "loss": 2.347, "step": 181840 }, { "epoch": 0.7029812435249184, "grad_norm": 0.09844937920570374, "learning_rate": 0.002, "loss": 2.3301, "step": 181850 }, { "epoch": 0.7030199007283017, "grad_norm": 0.12183602154254913, "learning_rate": 0.002, "loss": 2.3506, "step": 181860 }, { "epoch": 0.703058557931685, "grad_norm": 0.10586797446012497, "learning_rate": 0.002, "loss": 2.3499, "step": 181870 }, { "epoch": 0.7030972151350683, "grad_norm": 0.10901835560798645, "learning_rate": 0.002, "loss": 2.3277, "step": 181880 }, { "epoch": 0.7031358723384515, "grad_norm": 0.09768293797969818, "learning_rate": 0.002, "loss": 2.3333, "step": 181890 }, { "epoch": 0.7031745295418348, "grad_norm": 0.10768637806177139, "learning_rate": 0.002, "loss": 2.3509, "step": 181900 }, { "epoch": 0.7032131867452182, "grad_norm": 0.13276785612106323, "learning_rate": 0.002, "loss": 2.3644, "step": 181910 }, { "epoch": 0.7032518439486014, "grad_norm": 0.11340388655662537, "learning_rate": 0.002, "loss": 2.3591, "step": 181920 }, { "epoch": 0.7032905011519847, "grad_norm": 0.11737782508134842, "learning_rate": 0.002, "loss": 2.3177, "step": 181930 }, { "epoch": 0.7033291583553679, "grad_norm": 0.11821569502353668, "learning_rate": 0.002, "loss": 2.3338, "step": 181940 }, { "epoch": 0.7033678155587512, "grad_norm": 0.09863719344139099, "learning_rate": 0.002, "loss": 2.3415, "step": 181950 }, { "epoch": 0.7034064727621345, "grad_norm": 0.10446591675281525, "learning_rate": 0.002, "loss": 2.3561, "step": 181960 }, { "epoch": 0.7034451299655178, "grad_norm": 0.11456377059221268, "learning_rate": 0.002, "loss": 2.3373, "step": 181970 }, { "epoch": 0.703483787168901, "grad_norm": 0.11112993210554123, "learning_rate": 0.002, "loss": 2.3292, "step": 181980 }, { "epoch": 0.7035224443722843, "grad_norm": 0.10735056549310684, "learning_rate": 0.002, "loss": 2.3378, "step": 181990 }, { "epoch": 0.7035611015756676, "grad_norm": 0.0971650630235672, "learning_rate": 0.002, "loss": 2.3406, "step": 182000 }, { "epoch": 0.7035997587790509, "grad_norm": 0.10977531224489212, "learning_rate": 0.002, "loss": 2.3369, "step": 182010 }, { "epoch": 0.7036384159824342, "grad_norm": 0.09874717891216278, "learning_rate": 0.002, "loss": 2.3348, "step": 182020 }, { "epoch": 0.7036770731858174, "grad_norm": 0.11938779801130295, "learning_rate": 0.002, "loss": 2.34, "step": 182030 }, { "epoch": 0.7037157303892008, "grad_norm": 0.09552514553070068, "learning_rate": 0.002, "loss": 2.3543, "step": 182040 }, { "epoch": 0.703754387592584, "grad_norm": 0.1094004213809967, "learning_rate": 0.002, "loss": 2.3367, "step": 182050 }, { "epoch": 0.7037930447959673, "grad_norm": 0.11232999712228775, "learning_rate": 0.002, "loss": 2.345, "step": 182060 }, { "epoch": 0.7038317019993505, "grad_norm": 0.1030832976102829, "learning_rate": 0.002, "loss": 2.3349, "step": 182070 }, { "epoch": 0.7038703592027339, "grad_norm": 0.09489600360393524, "learning_rate": 0.002, "loss": 2.3404, "step": 182080 }, { "epoch": 0.7039090164061171, "grad_norm": 0.10776722431182861, "learning_rate": 0.002, "loss": 2.3518, "step": 182090 }, { "epoch": 0.7039476736095004, "grad_norm": 0.11068689823150635, "learning_rate": 0.002, "loss": 2.3286, "step": 182100 }, { "epoch": 0.7039863308128836, "grad_norm": 0.10417316108942032, "learning_rate": 0.002, "loss": 2.3517, "step": 182110 }, { "epoch": 0.704024988016267, "grad_norm": 0.10432534664869308, "learning_rate": 0.002, "loss": 2.328, "step": 182120 }, { "epoch": 0.7040636452196503, "grad_norm": 0.11190181225538254, "learning_rate": 0.002, "loss": 2.3413, "step": 182130 }, { "epoch": 0.7041023024230335, "grad_norm": 0.10109800100326538, "learning_rate": 0.002, "loss": 2.339, "step": 182140 }, { "epoch": 0.7041409596264168, "grad_norm": 0.0988345518708229, "learning_rate": 0.002, "loss": 2.3306, "step": 182150 }, { "epoch": 0.7041796168298, "grad_norm": 0.11042675375938416, "learning_rate": 0.002, "loss": 2.3343, "step": 182160 }, { "epoch": 0.7042182740331834, "grad_norm": 0.10132566094398499, "learning_rate": 0.002, "loss": 2.3545, "step": 182170 }, { "epoch": 0.7042569312365666, "grad_norm": 0.10684694349765778, "learning_rate": 0.002, "loss": 2.3412, "step": 182180 }, { "epoch": 0.7042955884399499, "grad_norm": 0.1098591685295105, "learning_rate": 0.002, "loss": 2.3434, "step": 182190 }, { "epoch": 0.7043342456433331, "grad_norm": 0.09078842401504517, "learning_rate": 0.002, "loss": 2.3363, "step": 182200 }, { "epoch": 0.7043729028467165, "grad_norm": 0.09298959374427795, "learning_rate": 0.002, "loss": 2.3403, "step": 182210 }, { "epoch": 0.7044115600500997, "grad_norm": 0.12122335284948349, "learning_rate": 0.002, "loss": 2.3518, "step": 182220 }, { "epoch": 0.704450217253483, "grad_norm": 0.11889586597681046, "learning_rate": 0.002, "loss": 2.3531, "step": 182230 }, { "epoch": 0.7044888744568663, "grad_norm": 0.13856007158756256, "learning_rate": 0.002, "loss": 2.3555, "step": 182240 }, { "epoch": 0.7045275316602496, "grad_norm": 0.09847620874643326, "learning_rate": 0.002, "loss": 2.3524, "step": 182250 }, { "epoch": 0.7045661888636329, "grad_norm": 0.10424961149692535, "learning_rate": 0.002, "loss": 2.3401, "step": 182260 }, { "epoch": 0.7046048460670161, "grad_norm": 0.09979801625013351, "learning_rate": 0.002, "loss": 2.3499, "step": 182270 }, { "epoch": 0.7046435032703994, "grad_norm": 0.09866438060998917, "learning_rate": 0.002, "loss": 2.3447, "step": 182280 }, { "epoch": 0.7046821604737827, "grad_norm": 0.09678113460540771, "learning_rate": 0.002, "loss": 2.3476, "step": 182290 }, { "epoch": 0.704720817677166, "grad_norm": 0.12021298706531525, "learning_rate": 0.002, "loss": 2.3301, "step": 182300 }, { "epoch": 0.7047594748805492, "grad_norm": 0.12732869386672974, "learning_rate": 0.002, "loss": 2.3376, "step": 182310 }, { "epoch": 0.7047981320839325, "grad_norm": 0.10386567562818527, "learning_rate": 0.002, "loss": 2.3533, "step": 182320 }, { "epoch": 0.7048367892873157, "grad_norm": 0.09794675558805466, "learning_rate": 0.002, "loss": 2.3509, "step": 182330 }, { "epoch": 0.7048754464906991, "grad_norm": 0.09235762059688568, "learning_rate": 0.002, "loss": 2.3368, "step": 182340 }, { "epoch": 0.7049141036940824, "grad_norm": 0.11255183070898056, "learning_rate": 0.002, "loss": 2.3536, "step": 182350 }, { "epoch": 0.7049527608974656, "grad_norm": 0.09853653609752655, "learning_rate": 0.002, "loss": 2.33, "step": 182360 }, { "epoch": 0.7049914181008489, "grad_norm": 0.09108425676822662, "learning_rate": 0.002, "loss": 2.3531, "step": 182370 }, { "epoch": 0.7050300753042322, "grad_norm": 0.10841450095176697, "learning_rate": 0.002, "loss": 2.3384, "step": 182380 }, { "epoch": 0.7050687325076155, "grad_norm": 0.13346466422080994, "learning_rate": 0.002, "loss": 2.3287, "step": 182390 }, { "epoch": 0.7051073897109987, "grad_norm": 0.1046786680817604, "learning_rate": 0.002, "loss": 2.3324, "step": 182400 }, { "epoch": 0.705146046914382, "grad_norm": 0.0977960154414177, "learning_rate": 0.002, "loss": 2.3515, "step": 182410 }, { "epoch": 0.7051847041177653, "grad_norm": 0.09477897733449936, "learning_rate": 0.002, "loss": 2.3495, "step": 182420 }, { "epoch": 0.7052233613211486, "grad_norm": 0.10036720335483551, "learning_rate": 0.002, "loss": 2.3524, "step": 182430 }, { "epoch": 0.7052620185245319, "grad_norm": 0.09965860843658447, "learning_rate": 0.002, "loss": 2.3475, "step": 182440 }, { "epoch": 0.7053006757279151, "grad_norm": 0.09179232269525528, "learning_rate": 0.002, "loss": 2.3337, "step": 182450 }, { "epoch": 0.7053393329312985, "grad_norm": 0.11638399958610535, "learning_rate": 0.002, "loss": 2.342, "step": 182460 }, { "epoch": 0.7053779901346817, "grad_norm": 0.10395177453756332, "learning_rate": 0.002, "loss": 2.3435, "step": 182470 }, { "epoch": 0.705416647338065, "grad_norm": 0.10976772755384445, "learning_rate": 0.002, "loss": 2.3361, "step": 182480 }, { "epoch": 0.7054553045414482, "grad_norm": 0.10409149527549744, "learning_rate": 0.002, "loss": 2.34, "step": 182490 }, { "epoch": 0.7054939617448315, "grad_norm": 0.09813567996025085, "learning_rate": 0.002, "loss": 2.3513, "step": 182500 }, { "epoch": 0.7055326189482148, "grad_norm": 0.1130143404006958, "learning_rate": 0.002, "loss": 2.3502, "step": 182510 }, { "epoch": 0.7055712761515981, "grad_norm": 0.12172242999076843, "learning_rate": 0.002, "loss": 2.3425, "step": 182520 }, { "epoch": 0.7056099333549813, "grad_norm": 0.11206705868244171, "learning_rate": 0.002, "loss": 2.3397, "step": 182530 }, { "epoch": 0.7056485905583646, "grad_norm": 0.09689457714557648, "learning_rate": 0.002, "loss": 2.3528, "step": 182540 }, { "epoch": 0.705687247761748, "grad_norm": 0.10289297997951508, "learning_rate": 0.002, "loss": 2.3468, "step": 182550 }, { "epoch": 0.7057259049651312, "grad_norm": 0.1013554036617279, "learning_rate": 0.002, "loss": 2.3253, "step": 182560 }, { "epoch": 0.7057645621685145, "grad_norm": 0.11464133858680725, "learning_rate": 0.002, "loss": 2.3375, "step": 182570 }, { "epoch": 0.7058032193718977, "grad_norm": 0.10789859294891357, "learning_rate": 0.002, "loss": 2.3419, "step": 182580 }, { "epoch": 0.7058418765752811, "grad_norm": 0.10919328778982162, "learning_rate": 0.002, "loss": 2.3443, "step": 182590 }, { "epoch": 0.7058805337786643, "grad_norm": 0.09283306449651718, "learning_rate": 0.002, "loss": 2.3554, "step": 182600 }, { "epoch": 0.7059191909820476, "grad_norm": 0.10101396590471268, "learning_rate": 0.002, "loss": 2.3207, "step": 182610 }, { "epoch": 0.7059578481854308, "grad_norm": 0.11432278901338577, "learning_rate": 0.002, "loss": 2.3494, "step": 182620 }, { "epoch": 0.7059965053888142, "grad_norm": 0.09779641777276993, "learning_rate": 0.002, "loss": 2.3475, "step": 182630 }, { "epoch": 0.7060351625921975, "grad_norm": 0.10827631503343582, "learning_rate": 0.002, "loss": 2.3542, "step": 182640 }, { "epoch": 0.7060738197955807, "grad_norm": 0.11150870472192764, "learning_rate": 0.002, "loss": 2.3392, "step": 182650 }, { "epoch": 0.706112476998964, "grad_norm": 0.08931907266378403, "learning_rate": 0.002, "loss": 2.3254, "step": 182660 }, { "epoch": 0.7061511342023473, "grad_norm": 0.14123156666755676, "learning_rate": 0.002, "loss": 2.3361, "step": 182670 }, { "epoch": 0.7061897914057306, "grad_norm": 0.10059284418821335, "learning_rate": 0.002, "loss": 2.3318, "step": 182680 }, { "epoch": 0.7062284486091138, "grad_norm": 0.10053300112485886, "learning_rate": 0.002, "loss": 2.3399, "step": 182690 }, { "epoch": 0.7062671058124971, "grad_norm": 0.11859703063964844, "learning_rate": 0.002, "loss": 2.3388, "step": 182700 }, { "epoch": 0.7063057630158803, "grad_norm": 0.11177325248718262, "learning_rate": 0.002, "loss": 2.3406, "step": 182710 }, { "epoch": 0.7063444202192637, "grad_norm": 0.10214826464653015, "learning_rate": 0.002, "loss": 2.3486, "step": 182720 }, { "epoch": 0.7063830774226469, "grad_norm": 0.09897692501544952, "learning_rate": 0.002, "loss": 2.3337, "step": 182730 }, { "epoch": 0.7064217346260302, "grad_norm": 0.10023060441017151, "learning_rate": 0.002, "loss": 2.3216, "step": 182740 }, { "epoch": 0.7064603918294134, "grad_norm": 0.10941066592931747, "learning_rate": 0.002, "loss": 2.3258, "step": 182750 }, { "epoch": 0.7064990490327968, "grad_norm": 0.10759110748767853, "learning_rate": 0.002, "loss": 2.3387, "step": 182760 }, { "epoch": 0.7065377062361801, "grad_norm": 0.09983283281326294, "learning_rate": 0.002, "loss": 2.3445, "step": 182770 }, { "epoch": 0.7065763634395633, "grad_norm": 0.09219998121261597, "learning_rate": 0.002, "loss": 2.3421, "step": 182780 }, { "epoch": 0.7066150206429466, "grad_norm": 0.10839740186929703, "learning_rate": 0.002, "loss": 2.3276, "step": 182790 }, { "epoch": 0.7066536778463299, "grad_norm": 0.10706480592489243, "learning_rate": 0.002, "loss": 2.3443, "step": 182800 }, { "epoch": 0.7066923350497132, "grad_norm": 0.1283886581659317, "learning_rate": 0.002, "loss": 2.3459, "step": 182810 }, { "epoch": 0.7067309922530964, "grad_norm": 0.09891193360090256, "learning_rate": 0.002, "loss": 2.3395, "step": 182820 }, { "epoch": 0.7067696494564797, "grad_norm": 0.10873159766197205, "learning_rate": 0.002, "loss": 2.3481, "step": 182830 }, { "epoch": 0.706808306659863, "grad_norm": 0.09865976125001907, "learning_rate": 0.002, "loss": 2.3376, "step": 182840 }, { "epoch": 0.7068469638632463, "grad_norm": 0.1111924797296524, "learning_rate": 0.002, "loss": 2.3523, "step": 182850 }, { "epoch": 0.7068856210666296, "grad_norm": 0.09795933216810226, "learning_rate": 0.002, "loss": 2.3494, "step": 182860 }, { "epoch": 0.7069242782700128, "grad_norm": 0.10794724524021149, "learning_rate": 0.002, "loss": 2.3361, "step": 182870 }, { "epoch": 0.7069629354733961, "grad_norm": 0.11744233220815659, "learning_rate": 0.002, "loss": 2.3428, "step": 182880 }, { "epoch": 0.7070015926767794, "grad_norm": 0.0983758494257927, "learning_rate": 0.002, "loss": 2.3434, "step": 182890 }, { "epoch": 0.7070402498801627, "grad_norm": 0.1338445246219635, "learning_rate": 0.002, "loss": 2.3396, "step": 182900 }, { "epoch": 0.7070789070835459, "grad_norm": 0.10137440264225006, "learning_rate": 0.002, "loss": 2.3415, "step": 182910 }, { "epoch": 0.7071175642869292, "grad_norm": 0.10454361140727997, "learning_rate": 0.002, "loss": 2.3228, "step": 182920 }, { "epoch": 0.7071562214903125, "grad_norm": 0.10040812939405441, "learning_rate": 0.002, "loss": 2.3411, "step": 182930 }, { "epoch": 0.7071948786936958, "grad_norm": 0.12701022624969482, "learning_rate": 0.002, "loss": 2.3407, "step": 182940 }, { "epoch": 0.707233535897079, "grad_norm": 0.0989936962723732, "learning_rate": 0.002, "loss": 2.3544, "step": 182950 }, { "epoch": 0.7072721931004623, "grad_norm": 0.10258396714925766, "learning_rate": 0.002, "loss": 2.3282, "step": 182960 }, { "epoch": 0.7073108503038457, "grad_norm": 0.10500797629356384, "learning_rate": 0.002, "loss": 2.3629, "step": 182970 }, { "epoch": 0.7073495075072289, "grad_norm": 0.10724397003650665, "learning_rate": 0.002, "loss": 2.3276, "step": 182980 }, { "epoch": 0.7073881647106122, "grad_norm": 0.11122574657201767, "learning_rate": 0.002, "loss": 2.3418, "step": 182990 }, { "epoch": 0.7074268219139954, "grad_norm": 0.11353199928998947, "learning_rate": 0.002, "loss": 2.3545, "step": 183000 }, { "epoch": 0.7074654791173788, "grad_norm": 0.09575947374105453, "learning_rate": 0.002, "loss": 2.3369, "step": 183010 }, { "epoch": 0.707504136320762, "grad_norm": 0.10303820669651031, "learning_rate": 0.002, "loss": 2.3516, "step": 183020 }, { "epoch": 0.7075427935241453, "grad_norm": 0.12164933234453201, "learning_rate": 0.002, "loss": 2.3463, "step": 183030 }, { "epoch": 0.7075814507275285, "grad_norm": 0.10083574801683426, "learning_rate": 0.002, "loss": 2.3431, "step": 183040 }, { "epoch": 0.7076201079309119, "grad_norm": 0.10467392951250076, "learning_rate": 0.002, "loss": 2.3397, "step": 183050 }, { "epoch": 0.7076587651342952, "grad_norm": 0.10307256877422333, "learning_rate": 0.002, "loss": 2.3425, "step": 183060 }, { "epoch": 0.7076974223376784, "grad_norm": 0.0985746756196022, "learning_rate": 0.002, "loss": 2.3425, "step": 183070 }, { "epoch": 0.7077360795410617, "grad_norm": 0.10553467273712158, "learning_rate": 0.002, "loss": 2.3364, "step": 183080 }, { "epoch": 0.7077747367444449, "grad_norm": 0.12108207494020462, "learning_rate": 0.002, "loss": 2.3573, "step": 183090 }, { "epoch": 0.7078133939478283, "grad_norm": 0.10229145735502243, "learning_rate": 0.002, "loss": 2.3354, "step": 183100 }, { "epoch": 0.7078520511512115, "grad_norm": 0.10687445104122162, "learning_rate": 0.002, "loss": 2.3376, "step": 183110 }, { "epoch": 0.7078907083545948, "grad_norm": 0.1538669615983963, "learning_rate": 0.002, "loss": 2.3379, "step": 183120 }, { "epoch": 0.707929365557978, "grad_norm": 0.09935051947832108, "learning_rate": 0.002, "loss": 2.3316, "step": 183130 }, { "epoch": 0.7079680227613614, "grad_norm": 0.1021176427602768, "learning_rate": 0.002, "loss": 2.3434, "step": 183140 }, { "epoch": 0.7080066799647446, "grad_norm": 0.09273340553045273, "learning_rate": 0.002, "loss": 2.3472, "step": 183150 }, { "epoch": 0.7080453371681279, "grad_norm": 0.10077358782291412, "learning_rate": 0.002, "loss": 2.3464, "step": 183160 }, { "epoch": 0.7080839943715111, "grad_norm": 0.10942483693361282, "learning_rate": 0.002, "loss": 2.344, "step": 183170 }, { "epoch": 0.7081226515748945, "grad_norm": 0.10991012305021286, "learning_rate": 0.002, "loss": 2.3275, "step": 183180 }, { "epoch": 0.7081613087782778, "grad_norm": 0.0981731116771698, "learning_rate": 0.002, "loss": 2.3344, "step": 183190 }, { "epoch": 0.708199965981661, "grad_norm": 0.09662474691867828, "learning_rate": 0.002, "loss": 2.3466, "step": 183200 }, { "epoch": 0.7082386231850443, "grad_norm": 0.10109969228506088, "learning_rate": 0.002, "loss": 2.3258, "step": 183210 }, { "epoch": 0.7082772803884276, "grad_norm": 0.10239209979772568, "learning_rate": 0.002, "loss": 2.3449, "step": 183220 }, { "epoch": 0.7083159375918109, "grad_norm": 0.11773066222667694, "learning_rate": 0.002, "loss": 2.3345, "step": 183230 }, { "epoch": 0.7083545947951941, "grad_norm": 0.1125943660736084, "learning_rate": 0.002, "loss": 2.3483, "step": 183240 }, { "epoch": 0.7083932519985774, "grad_norm": 0.12445010244846344, "learning_rate": 0.002, "loss": 2.3383, "step": 183250 }, { "epoch": 0.7084319092019606, "grad_norm": 0.10312087833881378, "learning_rate": 0.002, "loss": 2.338, "step": 183260 }, { "epoch": 0.708470566405344, "grad_norm": 0.10788668692111969, "learning_rate": 0.002, "loss": 2.3518, "step": 183270 }, { "epoch": 0.7085092236087273, "grad_norm": 0.11200711131095886, "learning_rate": 0.002, "loss": 2.3393, "step": 183280 }, { "epoch": 0.7085478808121105, "grad_norm": 0.12259982526302338, "learning_rate": 0.002, "loss": 2.3344, "step": 183290 }, { "epoch": 0.7085865380154938, "grad_norm": 0.12555958330631256, "learning_rate": 0.002, "loss": 2.3461, "step": 183300 }, { "epoch": 0.7086251952188771, "grad_norm": 0.12538139522075653, "learning_rate": 0.002, "loss": 2.3406, "step": 183310 }, { "epoch": 0.7086638524222604, "grad_norm": 0.12616588175296783, "learning_rate": 0.002, "loss": 2.3337, "step": 183320 }, { "epoch": 0.7087025096256436, "grad_norm": 0.1130484938621521, "learning_rate": 0.002, "loss": 2.3423, "step": 183330 }, { "epoch": 0.7087411668290269, "grad_norm": 0.1064705178141594, "learning_rate": 0.002, "loss": 2.3395, "step": 183340 }, { "epoch": 0.7087798240324102, "grad_norm": 0.10898733139038086, "learning_rate": 0.002, "loss": 2.3395, "step": 183350 }, { "epoch": 0.7088184812357935, "grad_norm": 0.10790088027715683, "learning_rate": 0.002, "loss": 2.3533, "step": 183360 }, { "epoch": 0.7088571384391767, "grad_norm": 0.11947724968194962, "learning_rate": 0.002, "loss": 2.3325, "step": 183370 }, { "epoch": 0.70889579564256, "grad_norm": 0.10596276074647903, "learning_rate": 0.002, "loss": 2.3354, "step": 183380 }, { "epoch": 0.7089344528459434, "grad_norm": 0.09440622478723526, "learning_rate": 0.002, "loss": 2.3485, "step": 183390 }, { "epoch": 0.7089731100493266, "grad_norm": 0.11583665013313293, "learning_rate": 0.002, "loss": 2.3442, "step": 183400 }, { "epoch": 0.7090117672527099, "grad_norm": 0.10019931942224503, "learning_rate": 0.002, "loss": 2.34, "step": 183410 }, { "epoch": 0.7090504244560931, "grad_norm": 0.12555435299873352, "learning_rate": 0.002, "loss": 2.3394, "step": 183420 }, { "epoch": 0.7090890816594764, "grad_norm": 0.12102832645177841, "learning_rate": 0.002, "loss": 2.3467, "step": 183430 }, { "epoch": 0.7091277388628597, "grad_norm": 0.09992365539073944, "learning_rate": 0.002, "loss": 2.3424, "step": 183440 }, { "epoch": 0.709166396066243, "grad_norm": 0.10638532042503357, "learning_rate": 0.002, "loss": 2.3609, "step": 183450 }, { "epoch": 0.7092050532696262, "grad_norm": 0.10480239987373352, "learning_rate": 0.002, "loss": 2.3208, "step": 183460 }, { "epoch": 0.7092437104730095, "grad_norm": 0.11703069508075714, "learning_rate": 0.002, "loss": 2.3406, "step": 183470 }, { "epoch": 0.7092823676763929, "grad_norm": 0.09900394082069397, "learning_rate": 0.002, "loss": 2.3392, "step": 183480 }, { "epoch": 0.7093210248797761, "grad_norm": 0.09972035139799118, "learning_rate": 0.002, "loss": 2.3483, "step": 183490 }, { "epoch": 0.7093596820831594, "grad_norm": 0.09546131640672684, "learning_rate": 0.002, "loss": 2.3364, "step": 183500 }, { "epoch": 0.7093983392865426, "grad_norm": 0.1132010668516159, "learning_rate": 0.002, "loss": 2.3342, "step": 183510 }, { "epoch": 0.709436996489926, "grad_norm": 0.10164470970630646, "learning_rate": 0.002, "loss": 2.3561, "step": 183520 }, { "epoch": 0.7094756536933092, "grad_norm": 0.1110953763127327, "learning_rate": 0.002, "loss": 2.3504, "step": 183530 }, { "epoch": 0.7095143108966925, "grad_norm": 0.08955393731594086, "learning_rate": 0.002, "loss": 2.3362, "step": 183540 }, { "epoch": 0.7095529681000757, "grad_norm": 0.11253860592842102, "learning_rate": 0.002, "loss": 2.3407, "step": 183550 }, { "epoch": 0.7095916253034591, "grad_norm": 0.11447730660438538, "learning_rate": 0.002, "loss": 2.3528, "step": 183560 }, { "epoch": 0.7096302825068423, "grad_norm": 0.09639697521924973, "learning_rate": 0.002, "loss": 2.3584, "step": 183570 }, { "epoch": 0.7096689397102256, "grad_norm": 0.11987259984016418, "learning_rate": 0.002, "loss": 2.318, "step": 183580 }, { "epoch": 0.7097075969136089, "grad_norm": 0.10117160528898239, "learning_rate": 0.002, "loss": 2.3409, "step": 183590 }, { "epoch": 0.7097462541169922, "grad_norm": 0.11126291751861572, "learning_rate": 0.002, "loss": 2.3357, "step": 183600 }, { "epoch": 0.7097849113203755, "grad_norm": 0.10469876229763031, "learning_rate": 0.002, "loss": 2.33, "step": 183610 }, { "epoch": 0.7098235685237587, "grad_norm": 0.09360670298337936, "learning_rate": 0.002, "loss": 2.3645, "step": 183620 }, { "epoch": 0.709862225727142, "grad_norm": 0.11895138025283813, "learning_rate": 0.002, "loss": 2.3361, "step": 183630 }, { "epoch": 0.7099008829305252, "grad_norm": 0.0990893617272377, "learning_rate": 0.002, "loss": 2.3454, "step": 183640 }, { "epoch": 0.7099395401339086, "grad_norm": 0.09940275549888611, "learning_rate": 0.002, "loss": 2.3224, "step": 183650 }, { "epoch": 0.7099781973372918, "grad_norm": 0.11033112555742264, "learning_rate": 0.002, "loss": 2.3351, "step": 183660 }, { "epoch": 0.7100168545406751, "grad_norm": 0.102576345205307, "learning_rate": 0.002, "loss": 2.3355, "step": 183670 }, { "epoch": 0.7100555117440583, "grad_norm": 0.10215635597705841, "learning_rate": 0.002, "loss": 2.3418, "step": 183680 }, { "epoch": 0.7100941689474417, "grad_norm": 0.10245046019554138, "learning_rate": 0.002, "loss": 2.3363, "step": 183690 }, { "epoch": 0.710132826150825, "grad_norm": 0.11830546706914902, "learning_rate": 0.002, "loss": 2.3408, "step": 183700 }, { "epoch": 0.7101714833542082, "grad_norm": 0.0982842668890953, "learning_rate": 0.002, "loss": 2.3331, "step": 183710 }, { "epoch": 0.7102101405575915, "grad_norm": 0.1330679953098297, "learning_rate": 0.002, "loss": 2.3326, "step": 183720 }, { "epoch": 0.7102487977609748, "grad_norm": 0.105428546667099, "learning_rate": 0.002, "loss": 2.3524, "step": 183730 }, { "epoch": 0.7102874549643581, "grad_norm": 0.0875903069972992, "learning_rate": 0.002, "loss": 2.3515, "step": 183740 }, { "epoch": 0.7103261121677413, "grad_norm": 0.09843344241380692, "learning_rate": 0.002, "loss": 2.3507, "step": 183750 }, { "epoch": 0.7103647693711246, "grad_norm": 0.12091364711523056, "learning_rate": 0.002, "loss": 2.3451, "step": 183760 }, { "epoch": 0.710403426574508, "grad_norm": 0.10571946203708649, "learning_rate": 0.002, "loss": 2.3503, "step": 183770 }, { "epoch": 0.7104420837778912, "grad_norm": 0.09138306975364685, "learning_rate": 0.002, "loss": 2.3455, "step": 183780 }, { "epoch": 0.7104807409812745, "grad_norm": 0.10767629742622375, "learning_rate": 0.002, "loss": 2.332, "step": 183790 }, { "epoch": 0.7105193981846577, "grad_norm": 0.10088904201984406, "learning_rate": 0.002, "loss": 2.3405, "step": 183800 }, { "epoch": 0.710558055388041, "grad_norm": 0.11030571907758713, "learning_rate": 0.002, "loss": 2.3413, "step": 183810 }, { "epoch": 0.7105967125914243, "grad_norm": 0.11730167269706726, "learning_rate": 0.002, "loss": 2.3459, "step": 183820 }, { "epoch": 0.7106353697948076, "grad_norm": 0.10828235000371933, "learning_rate": 0.002, "loss": 2.3307, "step": 183830 }, { "epoch": 0.7106740269981908, "grad_norm": 0.09817315638065338, "learning_rate": 0.002, "loss": 2.3451, "step": 183840 }, { "epoch": 0.7107126842015741, "grad_norm": 0.1154472678899765, "learning_rate": 0.002, "loss": 2.3491, "step": 183850 }, { "epoch": 0.7107513414049574, "grad_norm": 0.11157126724720001, "learning_rate": 0.002, "loss": 2.3434, "step": 183860 }, { "epoch": 0.7107899986083407, "grad_norm": 0.10947154462337494, "learning_rate": 0.002, "loss": 2.335, "step": 183870 }, { "epoch": 0.7108286558117239, "grad_norm": 0.09724964201450348, "learning_rate": 0.002, "loss": 2.3271, "step": 183880 }, { "epoch": 0.7108673130151072, "grad_norm": 0.0903862863779068, "learning_rate": 0.002, "loss": 2.3586, "step": 183890 }, { "epoch": 0.7109059702184906, "grad_norm": 0.104709193110466, "learning_rate": 0.002, "loss": 2.3394, "step": 183900 }, { "epoch": 0.7109446274218738, "grad_norm": 0.0999518409371376, "learning_rate": 0.002, "loss": 2.3569, "step": 183910 }, { "epoch": 0.7109832846252571, "grad_norm": 0.11664184182882309, "learning_rate": 0.002, "loss": 2.3399, "step": 183920 }, { "epoch": 0.7110219418286403, "grad_norm": 0.11321470886468887, "learning_rate": 0.002, "loss": 2.3399, "step": 183930 }, { "epoch": 0.7110605990320237, "grad_norm": 0.09373850375413895, "learning_rate": 0.002, "loss": 2.3269, "step": 183940 }, { "epoch": 0.7110992562354069, "grad_norm": 0.11596566438674927, "learning_rate": 0.002, "loss": 2.3433, "step": 183950 }, { "epoch": 0.7111379134387902, "grad_norm": 0.10600019246339798, "learning_rate": 0.002, "loss": 2.3291, "step": 183960 }, { "epoch": 0.7111765706421734, "grad_norm": 0.09841473400592804, "learning_rate": 0.002, "loss": 2.3302, "step": 183970 }, { "epoch": 0.7112152278455567, "grad_norm": 0.10479195415973663, "learning_rate": 0.002, "loss": 2.329, "step": 183980 }, { "epoch": 0.71125388504894, "grad_norm": 0.09592556208372116, "learning_rate": 0.002, "loss": 2.3439, "step": 183990 }, { "epoch": 0.7112925422523233, "grad_norm": 0.12123008072376251, "learning_rate": 0.002, "loss": 2.336, "step": 184000 }, { "epoch": 0.7113311994557066, "grad_norm": 0.09279517084360123, "learning_rate": 0.002, "loss": 2.3433, "step": 184010 }, { "epoch": 0.7113698566590898, "grad_norm": 0.09922918677330017, "learning_rate": 0.002, "loss": 2.3456, "step": 184020 }, { "epoch": 0.7114085138624732, "grad_norm": 0.11793326586484909, "learning_rate": 0.002, "loss": 2.333, "step": 184030 }, { "epoch": 0.7114471710658564, "grad_norm": 0.10107146948575974, "learning_rate": 0.002, "loss": 2.3315, "step": 184040 }, { "epoch": 0.7114858282692397, "grad_norm": 0.1058441773056984, "learning_rate": 0.002, "loss": 2.343, "step": 184050 }, { "epoch": 0.7115244854726229, "grad_norm": 0.11791115254163742, "learning_rate": 0.002, "loss": 2.3465, "step": 184060 }, { "epoch": 0.7115631426760063, "grad_norm": 0.09689020365476608, "learning_rate": 0.002, "loss": 2.346, "step": 184070 }, { "epoch": 0.7116017998793895, "grad_norm": 0.09788362681865692, "learning_rate": 0.002, "loss": 2.3472, "step": 184080 }, { "epoch": 0.7116404570827728, "grad_norm": 0.10828198492527008, "learning_rate": 0.002, "loss": 2.343, "step": 184090 }, { "epoch": 0.711679114286156, "grad_norm": 0.10432813316583633, "learning_rate": 0.002, "loss": 2.3312, "step": 184100 }, { "epoch": 0.7117177714895394, "grad_norm": 0.09556274116039276, "learning_rate": 0.002, "loss": 2.3323, "step": 184110 }, { "epoch": 0.7117564286929227, "grad_norm": 0.09298153221607208, "learning_rate": 0.002, "loss": 2.3273, "step": 184120 }, { "epoch": 0.7117950858963059, "grad_norm": 0.21416905522346497, "learning_rate": 0.002, "loss": 2.3353, "step": 184130 }, { "epoch": 0.7118337430996892, "grad_norm": 0.10230828821659088, "learning_rate": 0.002, "loss": 2.3372, "step": 184140 }, { "epoch": 0.7118724003030725, "grad_norm": 0.10380011051893234, "learning_rate": 0.002, "loss": 2.3423, "step": 184150 }, { "epoch": 0.7119110575064558, "grad_norm": 0.10810267925262451, "learning_rate": 0.002, "loss": 2.3406, "step": 184160 }, { "epoch": 0.711949714709839, "grad_norm": 0.09327961504459381, "learning_rate": 0.002, "loss": 2.3433, "step": 184170 }, { "epoch": 0.7119883719132223, "grad_norm": 0.11230014264583588, "learning_rate": 0.002, "loss": 2.341, "step": 184180 }, { "epoch": 0.7120270291166055, "grad_norm": 0.10888976603746414, "learning_rate": 0.002, "loss": 2.3394, "step": 184190 }, { "epoch": 0.7120656863199889, "grad_norm": 0.10669904947280884, "learning_rate": 0.002, "loss": 2.3419, "step": 184200 }, { "epoch": 0.7121043435233722, "grad_norm": 0.10787302255630493, "learning_rate": 0.002, "loss": 2.3366, "step": 184210 }, { "epoch": 0.7121430007267554, "grad_norm": 0.10550106316804886, "learning_rate": 0.002, "loss": 2.3365, "step": 184220 }, { "epoch": 0.7121816579301387, "grad_norm": 0.09902001172304153, "learning_rate": 0.002, "loss": 2.34, "step": 184230 }, { "epoch": 0.712220315133522, "grad_norm": 0.09852997213602066, "learning_rate": 0.002, "loss": 2.3493, "step": 184240 }, { "epoch": 0.7122589723369053, "grad_norm": 0.11110862344503403, "learning_rate": 0.002, "loss": 2.3287, "step": 184250 }, { "epoch": 0.7122976295402885, "grad_norm": 0.1000390499830246, "learning_rate": 0.002, "loss": 2.3343, "step": 184260 }, { "epoch": 0.7123362867436718, "grad_norm": 0.0991111770272255, "learning_rate": 0.002, "loss": 2.3372, "step": 184270 }, { "epoch": 0.7123749439470551, "grad_norm": 0.11547648161649704, "learning_rate": 0.002, "loss": 2.3355, "step": 184280 }, { "epoch": 0.7124136011504384, "grad_norm": 0.1573108732700348, "learning_rate": 0.002, "loss": 2.3505, "step": 184290 }, { "epoch": 0.7124522583538216, "grad_norm": 0.10133729130029678, "learning_rate": 0.002, "loss": 2.3404, "step": 184300 }, { "epoch": 0.7124909155572049, "grad_norm": 0.10251521319150925, "learning_rate": 0.002, "loss": 2.3495, "step": 184310 }, { "epoch": 0.7125295727605883, "grad_norm": 0.08018740266561508, "learning_rate": 0.002, "loss": 2.3414, "step": 184320 }, { "epoch": 0.7125682299639715, "grad_norm": 0.12203532457351685, "learning_rate": 0.002, "loss": 2.3506, "step": 184330 }, { "epoch": 0.7126068871673548, "grad_norm": 0.23121006786823273, "learning_rate": 0.002, "loss": 2.3473, "step": 184340 }, { "epoch": 0.712645544370738, "grad_norm": 0.11483977735042572, "learning_rate": 0.002, "loss": 2.3568, "step": 184350 }, { "epoch": 0.7126842015741213, "grad_norm": 0.11053165048360825, "learning_rate": 0.002, "loss": 2.345, "step": 184360 }, { "epoch": 0.7127228587775046, "grad_norm": 0.14932291209697723, "learning_rate": 0.002, "loss": 2.3439, "step": 184370 }, { "epoch": 0.7127615159808879, "grad_norm": 0.09915328025817871, "learning_rate": 0.002, "loss": 2.3129, "step": 184380 }, { "epoch": 0.7128001731842711, "grad_norm": 0.10788097977638245, "learning_rate": 0.002, "loss": 2.3334, "step": 184390 }, { "epoch": 0.7128388303876544, "grad_norm": 0.11338665336370468, "learning_rate": 0.002, "loss": 2.3341, "step": 184400 }, { "epoch": 0.7128774875910378, "grad_norm": 0.11638886481523514, "learning_rate": 0.002, "loss": 2.35, "step": 184410 }, { "epoch": 0.712916144794421, "grad_norm": 0.10881996154785156, "learning_rate": 0.002, "loss": 2.3407, "step": 184420 }, { "epoch": 0.7129548019978043, "grad_norm": 0.1067526638507843, "learning_rate": 0.002, "loss": 2.333, "step": 184430 }, { "epoch": 0.7129934592011875, "grad_norm": 0.09515440464019775, "learning_rate": 0.002, "loss": 2.3625, "step": 184440 }, { "epoch": 0.7130321164045709, "grad_norm": 0.09346569329500198, "learning_rate": 0.002, "loss": 2.3428, "step": 184450 }, { "epoch": 0.7130707736079541, "grad_norm": 0.10614271461963654, "learning_rate": 0.002, "loss": 2.3458, "step": 184460 }, { "epoch": 0.7131094308113374, "grad_norm": 0.10147388279438019, "learning_rate": 0.002, "loss": 2.3388, "step": 184470 }, { "epoch": 0.7131480880147206, "grad_norm": 0.1059550866484642, "learning_rate": 0.002, "loss": 2.3374, "step": 184480 }, { "epoch": 0.713186745218104, "grad_norm": 0.11526099592447281, "learning_rate": 0.002, "loss": 2.3525, "step": 184490 }, { "epoch": 0.7132254024214872, "grad_norm": 0.10168834030628204, "learning_rate": 0.002, "loss": 2.3327, "step": 184500 }, { "epoch": 0.7132640596248705, "grad_norm": 0.10906929522752762, "learning_rate": 0.002, "loss": 2.3431, "step": 184510 }, { "epoch": 0.7133027168282537, "grad_norm": 0.10011370480060577, "learning_rate": 0.002, "loss": 2.3535, "step": 184520 }, { "epoch": 0.7133413740316371, "grad_norm": 0.1059103012084961, "learning_rate": 0.002, "loss": 2.3513, "step": 184530 }, { "epoch": 0.7133800312350204, "grad_norm": 0.09475652873516083, "learning_rate": 0.002, "loss": 2.3266, "step": 184540 }, { "epoch": 0.7134186884384036, "grad_norm": 0.10671989619731903, "learning_rate": 0.002, "loss": 2.3513, "step": 184550 }, { "epoch": 0.7134573456417869, "grad_norm": 0.12009799480438232, "learning_rate": 0.002, "loss": 2.3434, "step": 184560 }, { "epoch": 0.7134960028451701, "grad_norm": 0.0910855382680893, "learning_rate": 0.002, "loss": 2.3505, "step": 184570 }, { "epoch": 0.7135346600485535, "grad_norm": 0.11664850264787674, "learning_rate": 0.002, "loss": 2.3333, "step": 184580 }, { "epoch": 0.7135733172519367, "grad_norm": 0.10083947330713272, "learning_rate": 0.002, "loss": 2.337, "step": 184590 }, { "epoch": 0.71361197445532, "grad_norm": 0.11706222593784332, "learning_rate": 0.002, "loss": 2.3201, "step": 184600 }, { "epoch": 0.7136506316587032, "grad_norm": 0.11299701035022736, "learning_rate": 0.002, "loss": 2.3533, "step": 184610 }, { "epoch": 0.7136892888620866, "grad_norm": 0.09368452429771423, "learning_rate": 0.002, "loss": 2.348, "step": 184620 }, { "epoch": 0.7137279460654699, "grad_norm": 0.11548180878162384, "learning_rate": 0.002, "loss": 2.3478, "step": 184630 }, { "epoch": 0.7137666032688531, "grad_norm": 0.09980639070272446, "learning_rate": 0.002, "loss": 2.3451, "step": 184640 }, { "epoch": 0.7138052604722364, "grad_norm": 0.1116255670785904, "learning_rate": 0.002, "loss": 2.3502, "step": 184650 }, { "epoch": 0.7138439176756197, "grad_norm": 0.08737179636955261, "learning_rate": 0.002, "loss": 2.3394, "step": 184660 }, { "epoch": 0.713882574879003, "grad_norm": 0.12267066538333893, "learning_rate": 0.002, "loss": 2.3308, "step": 184670 }, { "epoch": 0.7139212320823862, "grad_norm": 0.10829506069421768, "learning_rate": 0.002, "loss": 2.3424, "step": 184680 }, { "epoch": 0.7139598892857695, "grad_norm": 0.09659047424793243, "learning_rate": 0.002, "loss": 2.3482, "step": 184690 }, { "epoch": 0.7139985464891528, "grad_norm": 0.09008140116930008, "learning_rate": 0.002, "loss": 2.3585, "step": 184700 }, { "epoch": 0.7140372036925361, "grad_norm": 0.14132778346538544, "learning_rate": 0.002, "loss": 2.3498, "step": 184710 }, { "epoch": 0.7140758608959193, "grad_norm": 0.10938479751348495, "learning_rate": 0.002, "loss": 2.349, "step": 184720 }, { "epoch": 0.7141145180993026, "grad_norm": 0.10237931460142136, "learning_rate": 0.002, "loss": 2.3333, "step": 184730 }, { "epoch": 0.7141531753026858, "grad_norm": 0.09910931438207626, "learning_rate": 0.002, "loss": 2.3272, "step": 184740 }, { "epoch": 0.7141918325060692, "grad_norm": 0.09198702871799469, "learning_rate": 0.002, "loss": 2.341, "step": 184750 }, { "epoch": 0.7142304897094525, "grad_norm": 0.11978321522474289, "learning_rate": 0.002, "loss": 2.3328, "step": 184760 }, { "epoch": 0.7142691469128357, "grad_norm": 0.09299996495246887, "learning_rate": 0.002, "loss": 2.3233, "step": 184770 }, { "epoch": 0.714307804116219, "grad_norm": 0.09913870692253113, "learning_rate": 0.002, "loss": 2.3393, "step": 184780 }, { "epoch": 0.7143464613196023, "grad_norm": 0.11584877967834473, "learning_rate": 0.002, "loss": 2.3653, "step": 184790 }, { "epoch": 0.7143851185229856, "grad_norm": 0.12831422686576843, "learning_rate": 0.002, "loss": 2.3358, "step": 184800 }, { "epoch": 0.7144237757263688, "grad_norm": 0.10814099758863449, "learning_rate": 0.002, "loss": 2.3324, "step": 184810 }, { "epoch": 0.7144624329297521, "grad_norm": 0.10544437915086746, "learning_rate": 0.002, "loss": 2.3537, "step": 184820 }, { "epoch": 0.7145010901331355, "grad_norm": 0.09678446501493454, "learning_rate": 0.002, "loss": 2.3387, "step": 184830 }, { "epoch": 0.7145397473365187, "grad_norm": 0.10445859283208847, "learning_rate": 0.002, "loss": 2.3627, "step": 184840 }, { "epoch": 0.714578404539902, "grad_norm": 0.10959655791521072, "learning_rate": 0.002, "loss": 2.3373, "step": 184850 }, { "epoch": 0.7146170617432852, "grad_norm": 0.09721376746892929, "learning_rate": 0.002, "loss": 2.3546, "step": 184860 }, { "epoch": 0.7146557189466686, "grad_norm": 0.10366586595773697, "learning_rate": 0.002, "loss": 2.3447, "step": 184870 }, { "epoch": 0.7146943761500518, "grad_norm": 0.10293979197740555, "learning_rate": 0.002, "loss": 2.3298, "step": 184880 }, { "epoch": 0.7147330333534351, "grad_norm": 0.12619462609291077, "learning_rate": 0.002, "loss": 2.3469, "step": 184890 }, { "epoch": 0.7147716905568183, "grad_norm": 0.08829128742218018, "learning_rate": 0.002, "loss": 2.3334, "step": 184900 }, { "epoch": 0.7148103477602016, "grad_norm": 0.10005389899015427, "learning_rate": 0.002, "loss": 2.3441, "step": 184910 }, { "epoch": 0.7148490049635849, "grad_norm": 0.14041490852832794, "learning_rate": 0.002, "loss": 2.3568, "step": 184920 }, { "epoch": 0.7148876621669682, "grad_norm": 0.10810524225234985, "learning_rate": 0.002, "loss": 2.3575, "step": 184930 }, { "epoch": 0.7149263193703514, "grad_norm": 0.0931611880660057, "learning_rate": 0.002, "loss": 2.3568, "step": 184940 }, { "epoch": 0.7149649765737347, "grad_norm": 0.11990271508693695, "learning_rate": 0.002, "loss": 2.328, "step": 184950 }, { "epoch": 0.7150036337771181, "grad_norm": 0.10736245661973953, "learning_rate": 0.002, "loss": 2.3428, "step": 184960 }, { "epoch": 0.7150422909805013, "grad_norm": 0.10061245411634445, "learning_rate": 0.002, "loss": 2.3325, "step": 184970 }, { "epoch": 0.7150809481838846, "grad_norm": 0.10759148746728897, "learning_rate": 0.002, "loss": 2.332, "step": 184980 }, { "epoch": 0.7151196053872678, "grad_norm": 0.11481016129255295, "learning_rate": 0.002, "loss": 2.3432, "step": 184990 }, { "epoch": 0.7151582625906512, "grad_norm": 0.09845632314682007, "learning_rate": 0.002, "loss": 2.3405, "step": 185000 }, { "epoch": 0.7151969197940344, "grad_norm": 0.11303407698869705, "learning_rate": 0.002, "loss": 2.3417, "step": 185010 }, { "epoch": 0.7152355769974177, "grad_norm": 0.09801331162452698, "learning_rate": 0.002, "loss": 2.3477, "step": 185020 }, { "epoch": 0.7152742342008009, "grad_norm": 0.10794106125831604, "learning_rate": 0.002, "loss": 2.3506, "step": 185030 }, { "epoch": 0.7153128914041843, "grad_norm": 0.10937917232513428, "learning_rate": 0.002, "loss": 2.3451, "step": 185040 }, { "epoch": 0.7153515486075676, "grad_norm": 0.1305137276649475, "learning_rate": 0.002, "loss": 2.3441, "step": 185050 }, { "epoch": 0.7153902058109508, "grad_norm": 0.11963261663913727, "learning_rate": 0.002, "loss": 2.3472, "step": 185060 }, { "epoch": 0.7154288630143341, "grad_norm": 0.4527278244495392, "learning_rate": 0.002, "loss": 2.371, "step": 185070 }, { "epoch": 0.7154675202177174, "grad_norm": 0.107776939868927, "learning_rate": 0.002, "loss": 2.3419, "step": 185080 }, { "epoch": 0.7155061774211007, "grad_norm": 0.1005530133843422, "learning_rate": 0.002, "loss": 2.3308, "step": 185090 }, { "epoch": 0.7155448346244839, "grad_norm": 0.11894986033439636, "learning_rate": 0.002, "loss": 2.3409, "step": 185100 }, { "epoch": 0.7155834918278672, "grad_norm": 0.09806102514266968, "learning_rate": 0.002, "loss": 2.3372, "step": 185110 }, { "epoch": 0.7156221490312504, "grad_norm": 0.09273909777402878, "learning_rate": 0.002, "loss": 2.3512, "step": 185120 }, { "epoch": 0.7156608062346338, "grad_norm": 0.14330172538757324, "learning_rate": 0.002, "loss": 2.3356, "step": 185130 }, { "epoch": 0.715699463438017, "grad_norm": 0.0993657335639, "learning_rate": 0.002, "loss": 2.3472, "step": 185140 }, { "epoch": 0.7157381206414003, "grad_norm": 0.08665373176336288, "learning_rate": 0.002, "loss": 2.3325, "step": 185150 }, { "epoch": 0.7157767778447836, "grad_norm": 0.10238996893167496, "learning_rate": 0.002, "loss": 2.3369, "step": 185160 }, { "epoch": 0.7158154350481669, "grad_norm": 0.11059210449457169, "learning_rate": 0.002, "loss": 2.3488, "step": 185170 }, { "epoch": 0.7158540922515502, "grad_norm": 0.1170814260840416, "learning_rate": 0.002, "loss": 2.3388, "step": 185180 }, { "epoch": 0.7158927494549334, "grad_norm": 0.0992651954293251, "learning_rate": 0.002, "loss": 2.3258, "step": 185190 }, { "epoch": 0.7159314066583167, "grad_norm": 0.10449914634227753, "learning_rate": 0.002, "loss": 2.3514, "step": 185200 }, { "epoch": 0.7159700638617, "grad_norm": 0.10232613235712051, "learning_rate": 0.002, "loss": 2.3538, "step": 185210 }, { "epoch": 0.7160087210650833, "grad_norm": 0.09571287781000137, "learning_rate": 0.002, "loss": 2.3437, "step": 185220 }, { "epoch": 0.7160473782684665, "grad_norm": 0.0938621312379837, "learning_rate": 0.002, "loss": 2.3372, "step": 185230 }, { "epoch": 0.7160860354718498, "grad_norm": 0.10125449299812317, "learning_rate": 0.002, "loss": 2.3429, "step": 185240 }, { "epoch": 0.7161246926752332, "grad_norm": 0.12445887178182602, "learning_rate": 0.002, "loss": 2.3401, "step": 185250 }, { "epoch": 0.7161633498786164, "grad_norm": 0.11819303780794144, "learning_rate": 0.002, "loss": 2.3447, "step": 185260 }, { "epoch": 0.7162020070819997, "grad_norm": 0.11657149344682693, "learning_rate": 0.002, "loss": 2.3417, "step": 185270 }, { "epoch": 0.7162406642853829, "grad_norm": 0.09366388618946075, "learning_rate": 0.002, "loss": 2.338, "step": 185280 }, { "epoch": 0.7162793214887662, "grad_norm": 0.0998849868774414, "learning_rate": 0.002, "loss": 2.3359, "step": 185290 }, { "epoch": 0.7163179786921495, "grad_norm": 0.1090717613697052, "learning_rate": 0.002, "loss": 2.3478, "step": 185300 }, { "epoch": 0.7163566358955328, "grad_norm": 0.10590784251689911, "learning_rate": 0.002, "loss": 2.3411, "step": 185310 }, { "epoch": 0.716395293098916, "grad_norm": 0.10902975499629974, "learning_rate": 0.002, "loss": 2.3478, "step": 185320 }, { "epoch": 0.7164339503022993, "grad_norm": 0.11425049602985382, "learning_rate": 0.002, "loss": 2.3382, "step": 185330 }, { "epoch": 0.7164726075056826, "grad_norm": 0.104640431702137, "learning_rate": 0.002, "loss": 2.3431, "step": 185340 }, { "epoch": 0.7165112647090659, "grad_norm": 0.10949306190013885, "learning_rate": 0.002, "loss": 2.3383, "step": 185350 }, { "epoch": 0.7165499219124492, "grad_norm": 0.09471194446086884, "learning_rate": 0.002, "loss": 2.3422, "step": 185360 }, { "epoch": 0.7165885791158324, "grad_norm": 0.10628344863653183, "learning_rate": 0.002, "loss": 2.3414, "step": 185370 }, { "epoch": 0.7166272363192158, "grad_norm": 0.10102435946464539, "learning_rate": 0.002, "loss": 2.3559, "step": 185380 }, { "epoch": 0.716665893522599, "grad_norm": 0.09791155159473419, "learning_rate": 0.002, "loss": 2.3384, "step": 185390 }, { "epoch": 0.7167045507259823, "grad_norm": 0.0973888710141182, "learning_rate": 0.002, "loss": 2.3304, "step": 185400 }, { "epoch": 0.7167432079293655, "grad_norm": 0.1219591274857521, "learning_rate": 0.002, "loss": 2.3561, "step": 185410 }, { "epoch": 0.7167818651327489, "grad_norm": 0.09689916670322418, "learning_rate": 0.002, "loss": 2.326, "step": 185420 }, { "epoch": 0.7168205223361321, "grad_norm": 0.10497520864009857, "learning_rate": 0.002, "loss": 2.325, "step": 185430 }, { "epoch": 0.7168591795395154, "grad_norm": 0.10271918773651123, "learning_rate": 0.002, "loss": 2.3336, "step": 185440 }, { "epoch": 0.7168978367428986, "grad_norm": 0.11452703922986984, "learning_rate": 0.002, "loss": 2.3454, "step": 185450 }, { "epoch": 0.716936493946282, "grad_norm": 0.10196271538734436, "learning_rate": 0.002, "loss": 2.3416, "step": 185460 }, { "epoch": 0.7169751511496653, "grad_norm": 0.1002076268196106, "learning_rate": 0.002, "loss": 2.3412, "step": 185470 }, { "epoch": 0.7170138083530485, "grad_norm": 0.11561955511569977, "learning_rate": 0.002, "loss": 2.3318, "step": 185480 }, { "epoch": 0.7170524655564318, "grad_norm": 0.09349963814020157, "learning_rate": 0.002, "loss": 2.3494, "step": 185490 }, { "epoch": 0.717091122759815, "grad_norm": 0.11077411472797394, "learning_rate": 0.002, "loss": 2.3432, "step": 185500 }, { "epoch": 0.7171297799631984, "grad_norm": 0.10149725526571274, "learning_rate": 0.002, "loss": 2.3322, "step": 185510 }, { "epoch": 0.7171684371665816, "grad_norm": 0.09096281230449677, "learning_rate": 0.002, "loss": 2.3425, "step": 185520 }, { "epoch": 0.7172070943699649, "grad_norm": 0.1042969599366188, "learning_rate": 0.002, "loss": 2.3384, "step": 185530 }, { "epoch": 0.7172457515733481, "grad_norm": 0.13028311729431152, "learning_rate": 0.002, "loss": 2.3487, "step": 185540 }, { "epoch": 0.7172844087767315, "grad_norm": 0.10507809370756149, "learning_rate": 0.002, "loss": 2.3408, "step": 185550 }, { "epoch": 0.7173230659801147, "grad_norm": 0.09843814373016357, "learning_rate": 0.002, "loss": 2.3445, "step": 185560 }, { "epoch": 0.717361723183498, "grad_norm": 0.0921286940574646, "learning_rate": 0.002, "loss": 2.3265, "step": 185570 }, { "epoch": 0.7174003803868813, "grad_norm": 0.09608946740627289, "learning_rate": 0.002, "loss": 2.3467, "step": 185580 }, { "epoch": 0.7174390375902646, "grad_norm": 0.12892523407936096, "learning_rate": 0.002, "loss": 2.3357, "step": 185590 }, { "epoch": 0.7174776947936479, "grad_norm": 0.09652720391750336, "learning_rate": 0.002, "loss": 2.3281, "step": 185600 }, { "epoch": 0.7175163519970311, "grad_norm": 0.09985087811946869, "learning_rate": 0.002, "loss": 2.3603, "step": 185610 }, { "epoch": 0.7175550092004144, "grad_norm": 0.12451973557472229, "learning_rate": 0.002, "loss": 2.3555, "step": 185620 }, { "epoch": 0.7175936664037977, "grad_norm": 0.09739361703395844, "learning_rate": 0.002, "loss": 2.3411, "step": 185630 }, { "epoch": 0.717632323607181, "grad_norm": 0.10199489444494247, "learning_rate": 0.002, "loss": 2.3228, "step": 185640 }, { "epoch": 0.7176709808105642, "grad_norm": 0.10813910514116287, "learning_rate": 0.002, "loss": 2.347, "step": 185650 }, { "epoch": 0.7177096380139475, "grad_norm": 0.11378483474254608, "learning_rate": 0.002, "loss": 2.3339, "step": 185660 }, { "epoch": 0.7177482952173307, "grad_norm": 0.09850470721721649, "learning_rate": 0.002, "loss": 2.3408, "step": 185670 }, { "epoch": 0.7177869524207141, "grad_norm": 0.10502240061759949, "learning_rate": 0.002, "loss": 2.3506, "step": 185680 }, { "epoch": 0.7178256096240974, "grad_norm": 0.12115588039159775, "learning_rate": 0.002, "loss": 2.337, "step": 185690 }, { "epoch": 0.7178642668274806, "grad_norm": 0.11206604540348053, "learning_rate": 0.002, "loss": 2.3327, "step": 185700 }, { "epoch": 0.7179029240308639, "grad_norm": 0.09093813598155975, "learning_rate": 0.002, "loss": 2.3385, "step": 185710 }, { "epoch": 0.7179415812342472, "grad_norm": 0.10363386571407318, "learning_rate": 0.002, "loss": 2.3438, "step": 185720 }, { "epoch": 0.7179802384376305, "grad_norm": 0.10542728006839752, "learning_rate": 0.002, "loss": 2.349, "step": 185730 }, { "epoch": 0.7180188956410137, "grad_norm": 0.10961460322141647, "learning_rate": 0.002, "loss": 2.3458, "step": 185740 }, { "epoch": 0.718057552844397, "grad_norm": 0.14820969104766846, "learning_rate": 0.002, "loss": 2.3546, "step": 185750 }, { "epoch": 0.7180962100477803, "grad_norm": 0.1017719954252243, "learning_rate": 0.002, "loss": 2.3447, "step": 185760 }, { "epoch": 0.7181348672511636, "grad_norm": 0.10929939150810242, "learning_rate": 0.002, "loss": 2.3505, "step": 185770 }, { "epoch": 0.7181735244545469, "grad_norm": 0.10564550757408142, "learning_rate": 0.002, "loss": 2.3444, "step": 185780 }, { "epoch": 0.7182121816579301, "grad_norm": 0.08694026619195938, "learning_rate": 0.002, "loss": 2.3409, "step": 185790 }, { "epoch": 0.7182508388613135, "grad_norm": 0.10610771179199219, "learning_rate": 0.002, "loss": 2.3475, "step": 185800 }, { "epoch": 0.7182894960646967, "grad_norm": 0.10886865109205246, "learning_rate": 0.002, "loss": 2.3323, "step": 185810 }, { "epoch": 0.71832815326808, "grad_norm": 0.11758004128932953, "learning_rate": 0.002, "loss": 2.3614, "step": 185820 }, { "epoch": 0.7183668104714632, "grad_norm": 0.10590004920959473, "learning_rate": 0.002, "loss": 2.3435, "step": 185830 }, { "epoch": 0.7184054676748465, "grad_norm": 0.0975596159696579, "learning_rate": 0.002, "loss": 2.3287, "step": 185840 }, { "epoch": 0.7184441248782298, "grad_norm": 0.09157725423574448, "learning_rate": 0.002, "loss": 2.3408, "step": 185850 }, { "epoch": 0.7184827820816131, "grad_norm": 0.126144677400589, "learning_rate": 0.002, "loss": 2.3286, "step": 185860 }, { "epoch": 0.7185214392849963, "grad_norm": 0.10521572083234787, "learning_rate": 0.002, "loss": 2.3364, "step": 185870 }, { "epoch": 0.7185600964883796, "grad_norm": 0.10988739877939224, "learning_rate": 0.002, "loss": 2.3401, "step": 185880 }, { "epoch": 0.718598753691763, "grad_norm": 0.10514355450868607, "learning_rate": 0.002, "loss": 2.3385, "step": 185890 }, { "epoch": 0.7186374108951462, "grad_norm": 0.11994270235300064, "learning_rate": 0.002, "loss": 2.3514, "step": 185900 }, { "epoch": 0.7186760680985295, "grad_norm": 0.11317486315965652, "learning_rate": 0.002, "loss": 2.3516, "step": 185910 }, { "epoch": 0.7187147253019127, "grad_norm": 0.11995750665664673, "learning_rate": 0.002, "loss": 2.3305, "step": 185920 }, { "epoch": 0.7187533825052961, "grad_norm": 0.09820850938558578, "learning_rate": 0.002, "loss": 2.3416, "step": 185930 }, { "epoch": 0.7187920397086793, "grad_norm": 0.10075823217630386, "learning_rate": 0.002, "loss": 2.3265, "step": 185940 }, { "epoch": 0.7188306969120626, "grad_norm": 0.10743725299835205, "learning_rate": 0.002, "loss": 2.3388, "step": 185950 }, { "epoch": 0.7188693541154458, "grad_norm": 0.10513687133789062, "learning_rate": 0.002, "loss": 2.3304, "step": 185960 }, { "epoch": 0.7189080113188292, "grad_norm": 0.138917937874794, "learning_rate": 0.002, "loss": 2.3296, "step": 185970 }, { "epoch": 0.7189466685222125, "grad_norm": 0.11696998029947281, "learning_rate": 0.002, "loss": 2.3432, "step": 185980 }, { "epoch": 0.7189853257255957, "grad_norm": 0.09162256121635437, "learning_rate": 0.002, "loss": 2.3266, "step": 185990 }, { "epoch": 0.719023982928979, "grad_norm": 0.11939941346645355, "learning_rate": 0.002, "loss": 2.3438, "step": 186000 }, { "epoch": 0.7190626401323623, "grad_norm": 0.0931636318564415, "learning_rate": 0.002, "loss": 2.349, "step": 186010 }, { "epoch": 0.7191012973357456, "grad_norm": 0.11141252517700195, "learning_rate": 0.002, "loss": 2.3524, "step": 186020 }, { "epoch": 0.7191399545391288, "grad_norm": 0.10563148558139801, "learning_rate": 0.002, "loss": 2.3442, "step": 186030 }, { "epoch": 0.7191786117425121, "grad_norm": 0.1159094050526619, "learning_rate": 0.002, "loss": 2.3431, "step": 186040 }, { "epoch": 0.7192172689458953, "grad_norm": 0.11379499733448029, "learning_rate": 0.002, "loss": 2.3362, "step": 186050 }, { "epoch": 0.7192559261492787, "grad_norm": 0.0985812321305275, "learning_rate": 0.002, "loss": 2.349, "step": 186060 }, { "epoch": 0.7192945833526619, "grad_norm": 0.12417516112327576, "learning_rate": 0.002, "loss": 2.323, "step": 186070 }, { "epoch": 0.7193332405560452, "grad_norm": 0.10407594591379166, "learning_rate": 0.002, "loss": 2.3552, "step": 186080 }, { "epoch": 0.7193718977594284, "grad_norm": 0.09413602948188782, "learning_rate": 0.002, "loss": 2.337, "step": 186090 }, { "epoch": 0.7194105549628118, "grad_norm": 0.10107819736003876, "learning_rate": 0.002, "loss": 2.34, "step": 186100 }, { "epoch": 0.7194492121661951, "grad_norm": 0.11445695906877518, "learning_rate": 0.002, "loss": 2.3462, "step": 186110 }, { "epoch": 0.7194878693695783, "grad_norm": 0.09609300643205643, "learning_rate": 0.002, "loss": 2.3286, "step": 186120 }, { "epoch": 0.7195265265729616, "grad_norm": 0.10091094672679901, "learning_rate": 0.002, "loss": 2.3386, "step": 186130 }, { "epoch": 0.7195651837763449, "grad_norm": 0.09473997354507446, "learning_rate": 0.002, "loss": 2.341, "step": 186140 }, { "epoch": 0.7196038409797282, "grad_norm": 0.1108432486653328, "learning_rate": 0.002, "loss": 2.347, "step": 186150 }, { "epoch": 0.7196424981831114, "grad_norm": 0.10067521035671234, "learning_rate": 0.002, "loss": 2.3368, "step": 186160 }, { "epoch": 0.7196811553864947, "grad_norm": 0.09316106885671616, "learning_rate": 0.002, "loss": 2.3363, "step": 186170 }, { "epoch": 0.719719812589878, "grad_norm": 0.10489480942487717, "learning_rate": 0.002, "loss": 2.3457, "step": 186180 }, { "epoch": 0.7197584697932613, "grad_norm": 0.08913571387529373, "learning_rate": 0.002, "loss": 2.3405, "step": 186190 }, { "epoch": 0.7197971269966446, "grad_norm": 0.10339523106813431, "learning_rate": 0.002, "loss": 2.3456, "step": 186200 }, { "epoch": 0.7198357842000278, "grad_norm": 0.11942090839147568, "learning_rate": 0.002, "loss": 2.3553, "step": 186210 }, { "epoch": 0.7198744414034111, "grad_norm": 0.10357536375522614, "learning_rate": 0.002, "loss": 2.3435, "step": 186220 }, { "epoch": 0.7199130986067944, "grad_norm": 0.09975355863571167, "learning_rate": 0.002, "loss": 2.3322, "step": 186230 }, { "epoch": 0.7199517558101777, "grad_norm": 0.09567605704069138, "learning_rate": 0.002, "loss": 2.3294, "step": 186240 }, { "epoch": 0.7199904130135609, "grad_norm": 0.09296274930238724, "learning_rate": 0.002, "loss": 2.3342, "step": 186250 }, { "epoch": 0.7200290702169442, "grad_norm": 0.0937802791595459, "learning_rate": 0.002, "loss": 2.3371, "step": 186260 }, { "epoch": 0.7200677274203275, "grad_norm": 0.08700099587440491, "learning_rate": 0.002, "loss": 2.346, "step": 186270 }, { "epoch": 0.7201063846237108, "grad_norm": 0.10534818470478058, "learning_rate": 0.002, "loss": 2.3349, "step": 186280 }, { "epoch": 0.720145041827094, "grad_norm": 0.09784476459026337, "learning_rate": 0.002, "loss": 2.3572, "step": 186290 }, { "epoch": 0.7201836990304773, "grad_norm": 0.14407220482826233, "learning_rate": 0.002, "loss": 2.3436, "step": 186300 }, { "epoch": 0.7202223562338607, "grad_norm": 0.10064949095249176, "learning_rate": 0.002, "loss": 2.3281, "step": 186310 }, { "epoch": 0.7202610134372439, "grad_norm": 0.10450860857963562, "learning_rate": 0.002, "loss": 2.3617, "step": 186320 }, { "epoch": 0.7202996706406272, "grad_norm": 0.10671655088663101, "learning_rate": 0.002, "loss": 2.3338, "step": 186330 }, { "epoch": 0.7203383278440104, "grad_norm": 0.10465175658464432, "learning_rate": 0.002, "loss": 2.3373, "step": 186340 }, { "epoch": 0.7203769850473938, "grad_norm": 0.10027614235877991, "learning_rate": 0.002, "loss": 2.3422, "step": 186350 }, { "epoch": 0.720415642250777, "grad_norm": 0.11972982436418533, "learning_rate": 0.002, "loss": 2.3332, "step": 186360 }, { "epoch": 0.7204542994541603, "grad_norm": 0.10320338606834412, "learning_rate": 0.002, "loss": 2.3427, "step": 186370 }, { "epoch": 0.7204929566575435, "grad_norm": 0.10482943803071976, "learning_rate": 0.002, "loss": 2.3364, "step": 186380 }, { "epoch": 0.7205316138609269, "grad_norm": 0.10837873071432114, "learning_rate": 0.002, "loss": 2.3358, "step": 186390 }, { "epoch": 0.7205702710643102, "grad_norm": 0.15739254653453827, "learning_rate": 0.002, "loss": 2.3398, "step": 186400 }, { "epoch": 0.7206089282676934, "grad_norm": 0.11209744960069656, "learning_rate": 0.002, "loss": 2.3451, "step": 186410 }, { "epoch": 0.7206475854710767, "grad_norm": 0.08895806223154068, "learning_rate": 0.002, "loss": 2.3552, "step": 186420 }, { "epoch": 0.7206862426744599, "grad_norm": 0.09714076668024063, "learning_rate": 0.002, "loss": 2.3472, "step": 186430 }, { "epoch": 0.7207248998778433, "grad_norm": 0.10694416612386703, "learning_rate": 0.002, "loss": 2.336, "step": 186440 }, { "epoch": 0.7207635570812265, "grad_norm": 0.103749580681324, "learning_rate": 0.002, "loss": 2.3297, "step": 186450 }, { "epoch": 0.7208022142846098, "grad_norm": 0.11582319438457489, "learning_rate": 0.002, "loss": 2.346, "step": 186460 }, { "epoch": 0.720840871487993, "grad_norm": 0.09324432909488678, "learning_rate": 0.002, "loss": 2.3299, "step": 186470 }, { "epoch": 0.7208795286913764, "grad_norm": 0.1003182902932167, "learning_rate": 0.002, "loss": 2.352, "step": 186480 }, { "epoch": 0.7209181858947596, "grad_norm": 0.1040387973189354, "learning_rate": 0.002, "loss": 2.3457, "step": 186490 }, { "epoch": 0.7209568430981429, "grad_norm": 0.10195592045783997, "learning_rate": 0.002, "loss": 2.3328, "step": 186500 }, { "epoch": 0.7209955003015261, "grad_norm": 0.09478186070919037, "learning_rate": 0.002, "loss": 2.3318, "step": 186510 }, { "epoch": 0.7210341575049095, "grad_norm": 0.1099415048956871, "learning_rate": 0.002, "loss": 2.3551, "step": 186520 }, { "epoch": 0.7210728147082928, "grad_norm": 0.09218371659517288, "learning_rate": 0.002, "loss": 2.3381, "step": 186530 }, { "epoch": 0.721111471911676, "grad_norm": 0.09654217213392258, "learning_rate": 0.002, "loss": 2.3465, "step": 186540 }, { "epoch": 0.7211501291150593, "grad_norm": 0.10185280442237854, "learning_rate": 0.002, "loss": 2.3604, "step": 186550 }, { "epoch": 0.7211887863184426, "grad_norm": 0.09472206234931946, "learning_rate": 0.002, "loss": 2.3384, "step": 186560 }, { "epoch": 0.7212274435218259, "grad_norm": 0.11460986733436584, "learning_rate": 0.002, "loss": 2.3515, "step": 186570 }, { "epoch": 0.7212661007252091, "grad_norm": 0.09128446131944656, "learning_rate": 0.002, "loss": 2.3405, "step": 186580 }, { "epoch": 0.7213047579285924, "grad_norm": 0.10730069130659103, "learning_rate": 0.002, "loss": 2.3271, "step": 186590 }, { "epoch": 0.7213434151319756, "grad_norm": 0.10237306356430054, "learning_rate": 0.002, "loss": 2.3341, "step": 186600 }, { "epoch": 0.721382072335359, "grad_norm": 0.09643815457820892, "learning_rate": 0.002, "loss": 2.3342, "step": 186610 }, { "epoch": 0.7214207295387423, "grad_norm": 0.11439349502325058, "learning_rate": 0.002, "loss": 2.3416, "step": 186620 }, { "epoch": 0.7214593867421255, "grad_norm": 0.1082044318318367, "learning_rate": 0.002, "loss": 2.343, "step": 186630 }, { "epoch": 0.7214980439455088, "grad_norm": 0.10950101912021637, "learning_rate": 0.002, "loss": 2.35, "step": 186640 }, { "epoch": 0.7215367011488921, "grad_norm": 0.10193609446287155, "learning_rate": 0.002, "loss": 2.342, "step": 186650 }, { "epoch": 0.7215753583522754, "grad_norm": 0.11126836389303207, "learning_rate": 0.002, "loss": 2.3295, "step": 186660 }, { "epoch": 0.7216140155556586, "grad_norm": 0.12311969697475433, "learning_rate": 0.002, "loss": 2.3298, "step": 186670 }, { "epoch": 0.7216526727590419, "grad_norm": 0.10100258886814117, "learning_rate": 0.002, "loss": 2.3427, "step": 186680 }, { "epoch": 0.7216913299624252, "grad_norm": 0.10313405841588974, "learning_rate": 0.002, "loss": 2.3457, "step": 186690 }, { "epoch": 0.7217299871658085, "grad_norm": 0.10727465897798538, "learning_rate": 0.002, "loss": 2.3343, "step": 186700 }, { "epoch": 0.7217686443691917, "grad_norm": 0.09539248794317245, "learning_rate": 0.002, "loss": 2.3274, "step": 186710 }, { "epoch": 0.721807301572575, "grad_norm": 0.11575020104646683, "learning_rate": 0.002, "loss": 2.3364, "step": 186720 }, { "epoch": 0.7218459587759584, "grad_norm": 0.10815134644508362, "learning_rate": 0.002, "loss": 2.3425, "step": 186730 }, { "epoch": 0.7218846159793416, "grad_norm": 0.11231502145528793, "learning_rate": 0.002, "loss": 2.3485, "step": 186740 }, { "epoch": 0.7219232731827249, "grad_norm": 0.12856830656528473, "learning_rate": 0.002, "loss": 2.3422, "step": 186750 }, { "epoch": 0.7219619303861081, "grad_norm": 0.10370416939258575, "learning_rate": 0.002, "loss": 2.3393, "step": 186760 }, { "epoch": 0.7220005875894914, "grad_norm": 0.11301801353693008, "learning_rate": 0.002, "loss": 2.3376, "step": 186770 }, { "epoch": 0.7220392447928747, "grad_norm": 0.10336112976074219, "learning_rate": 0.002, "loss": 2.3416, "step": 186780 }, { "epoch": 0.722077901996258, "grad_norm": 0.09517168253660202, "learning_rate": 0.002, "loss": 2.3438, "step": 186790 }, { "epoch": 0.7221165591996412, "grad_norm": 0.09284123033285141, "learning_rate": 0.002, "loss": 2.3414, "step": 186800 }, { "epoch": 0.7221552164030245, "grad_norm": 0.10720662027597427, "learning_rate": 0.002, "loss": 2.3451, "step": 186810 }, { "epoch": 0.7221938736064079, "grad_norm": 0.09715081751346588, "learning_rate": 0.002, "loss": 2.3416, "step": 186820 }, { "epoch": 0.7222325308097911, "grad_norm": 0.09416729211807251, "learning_rate": 0.002, "loss": 2.3343, "step": 186830 }, { "epoch": 0.7222711880131744, "grad_norm": 0.12686994671821594, "learning_rate": 0.002, "loss": 2.3313, "step": 186840 }, { "epoch": 0.7223098452165576, "grad_norm": 0.08926520496606827, "learning_rate": 0.002, "loss": 2.3477, "step": 186850 }, { "epoch": 0.722348502419941, "grad_norm": 0.11334452778100967, "learning_rate": 0.002, "loss": 2.3412, "step": 186860 }, { "epoch": 0.7223871596233242, "grad_norm": 0.11346606910228729, "learning_rate": 0.002, "loss": 2.3539, "step": 186870 }, { "epoch": 0.7224258168267075, "grad_norm": 0.11881070584058762, "learning_rate": 0.002, "loss": 2.3451, "step": 186880 }, { "epoch": 0.7224644740300907, "grad_norm": 0.11092095822095871, "learning_rate": 0.002, "loss": 2.333, "step": 186890 }, { "epoch": 0.7225031312334741, "grad_norm": 0.09544173628091812, "learning_rate": 0.002, "loss": 2.348, "step": 186900 }, { "epoch": 0.7225417884368573, "grad_norm": 0.12076514959335327, "learning_rate": 0.002, "loss": 2.3451, "step": 186910 }, { "epoch": 0.7225804456402406, "grad_norm": 0.10290089249610901, "learning_rate": 0.002, "loss": 2.3415, "step": 186920 }, { "epoch": 0.7226191028436239, "grad_norm": 0.12027185410261154, "learning_rate": 0.002, "loss": 2.3348, "step": 186930 }, { "epoch": 0.7226577600470072, "grad_norm": 0.09199009835720062, "learning_rate": 0.002, "loss": 2.3492, "step": 186940 }, { "epoch": 0.7226964172503905, "grad_norm": 0.10237187892198563, "learning_rate": 0.002, "loss": 2.3383, "step": 186950 }, { "epoch": 0.7227350744537737, "grad_norm": 0.09242359548807144, "learning_rate": 0.002, "loss": 2.3491, "step": 186960 }, { "epoch": 0.722773731657157, "grad_norm": 0.09228871017694473, "learning_rate": 0.002, "loss": 2.3302, "step": 186970 }, { "epoch": 0.7228123888605402, "grad_norm": 0.10070665925741196, "learning_rate": 0.002, "loss": 2.3314, "step": 186980 }, { "epoch": 0.7228510460639236, "grad_norm": 0.12258857488632202, "learning_rate": 0.002, "loss": 2.3467, "step": 186990 }, { "epoch": 0.7228897032673068, "grad_norm": 0.11036261171102524, "learning_rate": 0.002, "loss": 2.3383, "step": 187000 }, { "epoch": 0.7229283604706901, "grad_norm": 0.10454872995615005, "learning_rate": 0.002, "loss": 2.3367, "step": 187010 }, { "epoch": 0.7229670176740733, "grad_norm": 0.1144997626543045, "learning_rate": 0.002, "loss": 2.3486, "step": 187020 }, { "epoch": 0.7230056748774567, "grad_norm": 0.11421671509742737, "learning_rate": 0.002, "loss": 2.3327, "step": 187030 }, { "epoch": 0.72304433208084, "grad_norm": 0.11393462866544724, "learning_rate": 0.002, "loss": 2.341, "step": 187040 }, { "epoch": 0.7230829892842232, "grad_norm": 0.0887039452791214, "learning_rate": 0.002, "loss": 2.3399, "step": 187050 }, { "epoch": 0.7231216464876065, "grad_norm": 0.13317154347896576, "learning_rate": 0.002, "loss": 2.3327, "step": 187060 }, { "epoch": 0.7231603036909898, "grad_norm": 0.09806982427835464, "learning_rate": 0.002, "loss": 2.3504, "step": 187070 }, { "epoch": 0.7231989608943731, "grad_norm": 0.1163717657327652, "learning_rate": 0.002, "loss": 2.3538, "step": 187080 }, { "epoch": 0.7232376180977563, "grad_norm": 0.101486437022686, "learning_rate": 0.002, "loss": 2.3498, "step": 187090 }, { "epoch": 0.7232762753011396, "grad_norm": 0.12469431012868881, "learning_rate": 0.002, "loss": 2.3482, "step": 187100 }, { "epoch": 0.723314932504523, "grad_norm": 0.10116591304540634, "learning_rate": 0.002, "loss": 2.3305, "step": 187110 }, { "epoch": 0.7233535897079062, "grad_norm": 0.10730016231536865, "learning_rate": 0.002, "loss": 2.3558, "step": 187120 }, { "epoch": 0.7233922469112894, "grad_norm": 0.10383022576570511, "learning_rate": 0.002, "loss": 2.3417, "step": 187130 }, { "epoch": 0.7234309041146727, "grad_norm": 0.09397551417350769, "learning_rate": 0.002, "loss": 2.3376, "step": 187140 }, { "epoch": 0.723469561318056, "grad_norm": 0.10737968981266022, "learning_rate": 0.002, "loss": 2.3487, "step": 187150 }, { "epoch": 0.7235082185214393, "grad_norm": 0.10347533971071243, "learning_rate": 0.002, "loss": 2.3651, "step": 187160 }, { "epoch": 0.7235468757248226, "grad_norm": 0.13405457139015198, "learning_rate": 0.002, "loss": 2.3434, "step": 187170 }, { "epoch": 0.7235855329282058, "grad_norm": 0.09673825651407242, "learning_rate": 0.002, "loss": 2.3236, "step": 187180 }, { "epoch": 0.7236241901315891, "grad_norm": 0.12408943474292755, "learning_rate": 0.002, "loss": 2.3468, "step": 187190 }, { "epoch": 0.7236628473349724, "grad_norm": 0.12734411656856537, "learning_rate": 0.002, "loss": 2.3424, "step": 187200 }, { "epoch": 0.7237015045383557, "grad_norm": 0.09845370799303055, "learning_rate": 0.002, "loss": 2.3462, "step": 187210 }, { "epoch": 0.7237401617417389, "grad_norm": 0.1021999716758728, "learning_rate": 0.002, "loss": 2.3331, "step": 187220 }, { "epoch": 0.7237788189451222, "grad_norm": 0.10909496247768402, "learning_rate": 0.002, "loss": 2.3454, "step": 187230 }, { "epoch": 0.7238174761485056, "grad_norm": 0.10415089130401611, "learning_rate": 0.002, "loss": 2.3251, "step": 187240 }, { "epoch": 0.7238561333518888, "grad_norm": 0.10948038846254349, "learning_rate": 0.002, "loss": 2.3456, "step": 187250 }, { "epoch": 0.7238947905552721, "grad_norm": 0.10909941792488098, "learning_rate": 0.002, "loss": 2.333, "step": 187260 }, { "epoch": 0.7239334477586553, "grad_norm": 0.10298583656549454, "learning_rate": 0.002, "loss": 2.346, "step": 187270 }, { "epoch": 0.7239721049620387, "grad_norm": 0.09963478147983551, "learning_rate": 0.002, "loss": 2.3363, "step": 187280 }, { "epoch": 0.7240107621654219, "grad_norm": 0.09870238602161407, "learning_rate": 0.002, "loss": 2.3422, "step": 187290 }, { "epoch": 0.7240494193688052, "grad_norm": 0.11888501793146133, "learning_rate": 0.002, "loss": 2.3339, "step": 187300 }, { "epoch": 0.7240880765721884, "grad_norm": 0.10366848856210709, "learning_rate": 0.002, "loss": 2.3509, "step": 187310 }, { "epoch": 0.7241267337755717, "grad_norm": 0.10529571026563644, "learning_rate": 0.002, "loss": 2.3463, "step": 187320 }, { "epoch": 0.724165390978955, "grad_norm": 0.145135298371315, "learning_rate": 0.002, "loss": 2.3336, "step": 187330 }, { "epoch": 0.7242040481823383, "grad_norm": 0.09621170163154602, "learning_rate": 0.002, "loss": 2.3347, "step": 187340 }, { "epoch": 0.7242427053857216, "grad_norm": 0.10496283322572708, "learning_rate": 0.002, "loss": 2.3575, "step": 187350 }, { "epoch": 0.7242813625891048, "grad_norm": 0.0986151173710823, "learning_rate": 0.002, "loss": 2.3371, "step": 187360 }, { "epoch": 0.7243200197924882, "grad_norm": 0.09692153334617615, "learning_rate": 0.002, "loss": 2.3323, "step": 187370 }, { "epoch": 0.7243586769958714, "grad_norm": 0.11592217534780502, "learning_rate": 0.002, "loss": 2.3488, "step": 187380 }, { "epoch": 0.7243973341992547, "grad_norm": 0.09770040214061737, "learning_rate": 0.002, "loss": 2.3239, "step": 187390 }, { "epoch": 0.7244359914026379, "grad_norm": 0.0982050821185112, "learning_rate": 0.002, "loss": 2.344, "step": 187400 }, { "epoch": 0.7244746486060213, "grad_norm": 0.0895521268248558, "learning_rate": 0.002, "loss": 2.3376, "step": 187410 }, { "epoch": 0.7245133058094045, "grad_norm": 0.11532026529312134, "learning_rate": 0.002, "loss": 2.3388, "step": 187420 }, { "epoch": 0.7245519630127878, "grad_norm": 0.10454227775335312, "learning_rate": 0.002, "loss": 2.3285, "step": 187430 }, { "epoch": 0.724590620216171, "grad_norm": 0.10045175999403, "learning_rate": 0.002, "loss": 2.3441, "step": 187440 }, { "epoch": 0.7246292774195544, "grad_norm": 0.09500908106565475, "learning_rate": 0.002, "loss": 2.3401, "step": 187450 }, { "epoch": 0.7246679346229377, "grad_norm": 0.10398609936237335, "learning_rate": 0.002, "loss": 2.3403, "step": 187460 }, { "epoch": 0.7247065918263209, "grad_norm": 0.10558445006608963, "learning_rate": 0.002, "loss": 2.3281, "step": 187470 }, { "epoch": 0.7247452490297042, "grad_norm": 0.10564364492893219, "learning_rate": 0.002, "loss": 2.3544, "step": 187480 }, { "epoch": 0.7247839062330875, "grad_norm": 0.10846003144979477, "learning_rate": 0.002, "loss": 2.3386, "step": 187490 }, { "epoch": 0.7248225634364708, "grad_norm": 0.13258378207683563, "learning_rate": 0.002, "loss": 2.3417, "step": 187500 }, { "epoch": 0.724861220639854, "grad_norm": 0.11121944338083267, "learning_rate": 0.002, "loss": 2.3401, "step": 187510 }, { "epoch": 0.7248998778432373, "grad_norm": 0.09945479035377502, "learning_rate": 0.002, "loss": 2.3454, "step": 187520 }, { "epoch": 0.7249385350466205, "grad_norm": 0.11722530424594879, "learning_rate": 0.002, "loss": 2.3458, "step": 187530 }, { "epoch": 0.7249771922500039, "grad_norm": 0.10056839138269424, "learning_rate": 0.002, "loss": 2.3401, "step": 187540 }, { "epoch": 0.7250158494533872, "grad_norm": 0.09518096596002579, "learning_rate": 0.002, "loss": 2.3396, "step": 187550 }, { "epoch": 0.7250545066567704, "grad_norm": 0.10595232993364334, "learning_rate": 0.002, "loss": 2.3409, "step": 187560 }, { "epoch": 0.7250931638601537, "grad_norm": 0.09073364734649658, "learning_rate": 0.002, "loss": 2.3556, "step": 187570 }, { "epoch": 0.725131821063537, "grad_norm": 0.11355112493038177, "learning_rate": 0.002, "loss": 2.3411, "step": 187580 }, { "epoch": 0.7251704782669203, "grad_norm": 0.09228280931711197, "learning_rate": 0.002, "loss": 2.3427, "step": 187590 }, { "epoch": 0.7252091354703035, "grad_norm": 0.11262501776218414, "learning_rate": 0.002, "loss": 2.3356, "step": 187600 }, { "epoch": 0.7252477926736868, "grad_norm": 0.10293961316347122, "learning_rate": 0.002, "loss": 2.3463, "step": 187610 }, { "epoch": 0.7252864498770701, "grad_norm": 0.12528426945209503, "learning_rate": 0.002, "loss": 2.326, "step": 187620 }, { "epoch": 0.7253251070804534, "grad_norm": 0.13686411082744598, "learning_rate": 0.002, "loss": 2.3435, "step": 187630 }, { "epoch": 0.7253637642838366, "grad_norm": 0.09556593745946884, "learning_rate": 0.002, "loss": 2.338, "step": 187640 }, { "epoch": 0.7254024214872199, "grad_norm": 0.08956517279148102, "learning_rate": 0.002, "loss": 2.3573, "step": 187650 }, { "epoch": 0.7254410786906033, "grad_norm": 0.09875398874282837, "learning_rate": 0.002, "loss": 2.3374, "step": 187660 }, { "epoch": 0.7254797358939865, "grad_norm": 0.10570773482322693, "learning_rate": 0.002, "loss": 2.3296, "step": 187670 }, { "epoch": 0.7255183930973698, "grad_norm": 0.10656732320785522, "learning_rate": 0.002, "loss": 2.3357, "step": 187680 }, { "epoch": 0.725557050300753, "grad_norm": 0.0977579653263092, "learning_rate": 0.002, "loss": 2.3406, "step": 187690 }, { "epoch": 0.7255957075041363, "grad_norm": 0.1082698181271553, "learning_rate": 0.002, "loss": 2.3471, "step": 187700 }, { "epoch": 0.7256343647075196, "grad_norm": 0.10016846656799316, "learning_rate": 0.002, "loss": 2.3478, "step": 187710 }, { "epoch": 0.7256730219109029, "grad_norm": 0.09752330929040909, "learning_rate": 0.002, "loss": 2.3235, "step": 187720 }, { "epoch": 0.7257116791142861, "grad_norm": 0.09331176429986954, "learning_rate": 0.002, "loss": 2.3368, "step": 187730 }, { "epoch": 0.7257503363176694, "grad_norm": 0.10335078835487366, "learning_rate": 0.002, "loss": 2.342, "step": 187740 }, { "epoch": 0.7257889935210527, "grad_norm": 0.13783538341522217, "learning_rate": 0.002, "loss": 2.3478, "step": 187750 }, { "epoch": 0.725827650724436, "grad_norm": 0.10869824141263962, "learning_rate": 0.002, "loss": 2.3471, "step": 187760 }, { "epoch": 0.7258663079278193, "grad_norm": 0.1278955191373825, "learning_rate": 0.002, "loss": 2.338, "step": 187770 }, { "epoch": 0.7259049651312025, "grad_norm": 0.1017908975481987, "learning_rate": 0.002, "loss": 2.3537, "step": 187780 }, { "epoch": 0.7259436223345859, "grad_norm": 0.09236116707324982, "learning_rate": 0.002, "loss": 2.325, "step": 187790 }, { "epoch": 0.7259822795379691, "grad_norm": 0.11204037815332413, "learning_rate": 0.002, "loss": 2.3521, "step": 187800 }, { "epoch": 0.7260209367413524, "grad_norm": 0.09957336634397507, "learning_rate": 0.002, "loss": 2.3514, "step": 187810 }, { "epoch": 0.7260595939447356, "grad_norm": 0.45305535197257996, "learning_rate": 0.002, "loss": 2.3507, "step": 187820 }, { "epoch": 0.726098251148119, "grad_norm": 0.10775720328092575, "learning_rate": 0.002, "loss": 2.3566, "step": 187830 }, { "epoch": 0.7261369083515022, "grad_norm": 0.09511881321668625, "learning_rate": 0.002, "loss": 2.3462, "step": 187840 }, { "epoch": 0.7261755655548855, "grad_norm": 0.1379610151052475, "learning_rate": 0.002, "loss": 2.3472, "step": 187850 }, { "epoch": 0.7262142227582687, "grad_norm": 0.10464418679475784, "learning_rate": 0.002, "loss": 2.3286, "step": 187860 }, { "epoch": 0.7262528799616521, "grad_norm": 0.09923885762691498, "learning_rate": 0.002, "loss": 2.331, "step": 187870 }, { "epoch": 0.7262915371650354, "grad_norm": 0.10207533836364746, "learning_rate": 0.002, "loss": 2.344, "step": 187880 }, { "epoch": 0.7263301943684186, "grad_norm": 0.09654777497053146, "learning_rate": 0.002, "loss": 2.3221, "step": 187890 }, { "epoch": 0.7263688515718019, "grad_norm": 0.09469664841890335, "learning_rate": 0.002, "loss": 2.3399, "step": 187900 }, { "epoch": 0.7264075087751851, "grad_norm": 0.08925016224384308, "learning_rate": 0.002, "loss": 2.3464, "step": 187910 }, { "epoch": 0.7264461659785685, "grad_norm": 0.10459795594215393, "learning_rate": 0.002, "loss": 2.3357, "step": 187920 }, { "epoch": 0.7264848231819517, "grad_norm": 0.08447825908660889, "learning_rate": 0.002, "loss": 2.3271, "step": 187930 }, { "epoch": 0.726523480385335, "grad_norm": 0.11422178894281387, "learning_rate": 0.002, "loss": 2.3485, "step": 187940 }, { "epoch": 0.7265621375887182, "grad_norm": 0.11474387347698212, "learning_rate": 0.002, "loss": 2.3354, "step": 187950 }, { "epoch": 0.7266007947921016, "grad_norm": 0.09868604689836502, "learning_rate": 0.002, "loss": 2.3354, "step": 187960 }, { "epoch": 0.7266394519954849, "grad_norm": 0.11762841045856476, "learning_rate": 0.002, "loss": 2.353, "step": 187970 }, { "epoch": 0.7266781091988681, "grad_norm": 0.10311324149370193, "learning_rate": 0.002, "loss": 2.3473, "step": 187980 }, { "epoch": 0.7267167664022514, "grad_norm": 0.10104495286941528, "learning_rate": 0.002, "loss": 2.3384, "step": 187990 }, { "epoch": 0.7267554236056347, "grad_norm": 0.09831292927265167, "learning_rate": 0.002, "loss": 2.3406, "step": 188000 }, { "epoch": 0.726794080809018, "grad_norm": 0.11889403313398361, "learning_rate": 0.002, "loss": 2.3356, "step": 188010 }, { "epoch": 0.7268327380124012, "grad_norm": 0.21537570655345917, "learning_rate": 0.002, "loss": 2.3426, "step": 188020 }, { "epoch": 0.7268713952157845, "grad_norm": 0.10098782926797867, "learning_rate": 0.002, "loss": 2.3432, "step": 188030 }, { "epoch": 0.7269100524191678, "grad_norm": 0.1068992167711258, "learning_rate": 0.002, "loss": 2.3454, "step": 188040 }, { "epoch": 0.7269487096225511, "grad_norm": 0.1335403174161911, "learning_rate": 0.002, "loss": 2.3385, "step": 188050 }, { "epoch": 0.7269873668259343, "grad_norm": 0.11194917559623718, "learning_rate": 0.002, "loss": 2.3418, "step": 188060 }, { "epoch": 0.7270260240293176, "grad_norm": 0.13839438557624817, "learning_rate": 0.002, "loss": 2.318, "step": 188070 }, { "epoch": 0.7270646812327008, "grad_norm": 0.10070030391216278, "learning_rate": 0.002, "loss": 2.3388, "step": 188080 }, { "epoch": 0.7271033384360842, "grad_norm": 0.09300320595502853, "learning_rate": 0.002, "loss": 2.3452, "step": 188090 }, { "epoch": 0.7271419956394675, "grad_norm": 0.1040036752820015, "learning_rate": 0.002, "loss": 2.3315, "step": 188100 }, { "epoch": 0.7271806528428507, "grad_norm": 0.13322311639785767, "learning_rate": 0.002, "loss": 2.3358, "step": 188110 }, { "epoch": 0.727219310046234, "grad_norm": 0.10786911100149155, "learning_rate": 0.002, "loss": 2.3389, "step": 188120 }, { "epoch": 0.7272579672496173, "grad_norm": 0.11439685523509979, "learning_rate": 0.002, "loss": 2.3472, "step": 188130 }, { "epoch": 0.7272966244530006, "grad_norm": 0.11760654300451279, "learning_rate": 0.002, "loss": 2.3393, "step": 188140 }, { "epoch": 0.7273352816563838, "grad_norm": 0.10219842195510864, "learning_rate": 0.002, "loss": 2.3368, "step": 188150 }, { "epoch": 0.7273739388597671, "grad_norm": 0.12190880626440048, "learning_rate": 0.002, "loss": 2.3415, "step": 188160 }, { "epoch": 0.7274125960631505, "grad_norm": 0.10149639844894409, "learning_rate": 0.002, "loss": 2.3388, "step": 188170 }, { "epoch": 0.7274512532665337, "grad_norm": 0.10100218653678894, "learning_rate": 0.002, "loss": 2.3349, "step": 188180 }, { "epoch": 0.727489910469917, "grad_norm": 0.1045944094657898, "learning_rate": 0.002, "loss": 2.3506, "step": 188190 }, { "epoch": 0.7275285676733002, "grad_norm": 0.09726139903068542, "learning_rate": 0.002, "loss": 2.3287, "step": 188200 }, { "epoch": 0.7275672248766836, "grad_norm": 0.1029781699180603, "learning_rate": 0.002, "loss": 2.3374, "step": 188210 }, { "epoch": 0.7276058820800668, "grad_norm": 0.1261553019285202, "learning_rate": 0.002, "loss": 2.339, "step": 188220 }, { "epoch": 0.7276445392834501, "grad_norm": 0.11521897464990616, "learning_rate": 0.002, "loss": 2.333, "step": 188230 }, { "epoch": 0.7276831964868333, "grad_norm": 0.08909417688846588, "learning_rate": 0.002, "loss": 2.3443, "step": 188240 }, { "epoch": 0.7277218536902166, "grad_norm": 0.0991474986076355, "learning_rate": 0.002, "loss": 2.3317, "step": 188250 }, { "epoch": 0.7277605108935999, "grad_norm": 0.11477166414260864, "learning_rate": 0.002, "loss": 2.3329, "step": 188260 }, { "epoch": 0.7277991680969832, "grad_norm": 0.15573208034038544, "learning_rate": 0.002, "loss": 2.3387, "step": 188270 }, { "epoch": 0.7278378253003664, "grad_norm": 0.11599601060152054, "learning_rate": 0.002, "loss": 2.3469, "step": 188280 }, { "epoch": 0.7278764825037497, "grad_norm": 0.11741472035646439, "learning_rate": 0.002, "loss": 2.3446, "step": 188290 }, { "epoch": 0.7279151397071331, "grad_norm": 0.1139383614063263, "learning_rate": 0.002, "loss": 2.3433, "step": 188300 }, { "epoch": 0.7279537969105163, "grad_norm": 0.09910546988248825, "learning_rate": 0.002, "loss": 2.3434, "step": 188310 }, { "epoch": 0.7279924541138996, "grad_norm": 0.09799962490797043, "learning_rate": 0.002, "loss": 2.3357, "step": 188320 }, { "epoch": 0.7280311113172828, "grad_norm": 0.09024277329444885, "learning_rate": 0.002, "loss": 2.3224, "step": 188330 }, { "epoch": 0.7280697685206662, "grad_norm": 0.10700663924217224, "learning_rate": 0.002, "loss": 2.3411, "step": 188340 }, { "epoch": 0.7281084257240494, "grad_norm": 0.09063316136598587, "learning_rate": 0.002, "loss": 2.3365, "step": 188350 }, { "epoch": 0.7281470829274327, "grad_norm": 0.10456562787294388, "learning_rate": 0.002, "loss": 2.3371, "step": 188360 }, { "epoch": 0.7281857401308159, "grad_norm": 0.1041400134563446, "learning_rate": 0.002, "loss": 2.3461, "step": 188370 }, { "epoch": 0.7282243973341993, "grad_norm": 0.11024066060781479, "learning_rate": 0.002, "loss": 2.3328, "step": 188380 }, { "epoch": 0.7282630545375826, "grad_norm": 0.09569244086742401, "learning_rate": 0.002, "loss": 2.333, "step": 188390 }, { "epoch": 0.7283017117409658, "grad_norm": 0.11875152587890625, "learning_rate": 0.002, "loss": 2.3429, "step": 188400 }, { "epoch": 0.7283403689443491, "grad_norm": 0.12720754742622375, "learning_rate": 0.002, "loss": 2.3359, "step": 188410 }, { "epoch": 0.7283790261477324, "grad_norm": 0.11065363883972168, "learning_rate": 0.002, "loss": 2.3265, "step": 188420 }, { "epoch": 0.7284176833511157, "grad_norm": 0.10257042944431305, "learning_rate": 0.002, "loss": 2.3511, "step": 188430 }, { "epoch": 0.7284563405544989, "grad_norm": 0.09267813712358475, "learning_rate": 0.002, "loss": 2.3453, "step": 188440 }, { "epoch": 0.7284949977578822, "grad_norm": 0.11144296079874039, "learning_rate": 0.002, "loss": 2.3475, "step": 188450 }, { "epoch": 0.7285336549612654, "grad_norm": 0.10019582509994507, "learning_rate": 0.002, "loss": 2.3346, "step": 188460 }, { "epoch": 0.7285723121646488, "grad_norm": 0.10946372896432877, "learning_rate": 0.002, "loss": 2.3493, "step": 188470 }, { "epoch": 0.728610969368032, "grad_norm": 0.1142217218875885, "learning_rate": 0.002, "loss": 2.3451, "step": 188480 }, { "epoch": 0.7286496265714153, "grad_norm": 0.09371957927942276, "learning_rate": 0.002, "loss": 2.3615, "step": 188490 }, { "epoch": 0.7286882837747986, "grad_norm": 0.12522457540035248, "learning_rate": 0.002, "loss": 2.3417, "step": 188500 }, { "epoch": 0.7287269409781819, "grad_norm": 0.11884385347366333, "learning_rate": 0.002, "loss": 2.3561, "step": 188510 }, { "epoch": 0.7287655981815652, "grad_norm": 0.0966656431555748, "learning_rate": 0.002, "loss": 2.3208, "step": 188520 }, { "epoch": 0.7288042553849484, "grad_norm": 0.10101541131734848, "learning_rate": 0.002, "loss": 2.3496, "step": 188530 }, { "epoch": 0.7288429125883317, "grad_norm": 0.09151628613471985, "learning_rate": 0.002, "loss": 2.3504, "step": 188540 }, { "epoch": 0.728881569791715, "grad_norm": 0.10385442525148392, "learning_rate": 0.002, "loss": 2.3574, "step": 188550 }, { "epoch": 0.7289202269950983, "grad_norm": 0.10391923040151596, "learning_rate": 0.002, "loss": 2.3329, "step": 188560 }, { "epoch": 0.7289588841984815, "grad_norm": 0.10009454935789108, "learning_rate": 0.002, "loss": 2.3499, "step": 188570 }, { "epoch": 0.7289975414018648, "grad_norm": 0.1031494289636612, "learning_rate": 0.002, "loss": 2.355, "step": 188580 }, { "epoch": 0.7290361986052482, "grad_norm": 0.12123165279626846, "learning_rate": 0.002, "loss": 2.3363, "step": 188590 }, { "epoch": 0.7290748558086314, "grad_norm": 0.09856395423412323, "learning_rate": 0.002, "loss": 2.355, "step": 188600 }, { "epoch": 0.7291135130120147, "grad_norm": 0.1068253144621849, "learning_rate": 0.002, "loss": 2.3525, "step": 188610 }, { "epoch": 0.7291521702153979, "grad_norm": 0.14589570462703705, "learning_rate": 0.002, "loss": 2.3461, "step": 188620 }, { "epoch": 0.7291908274187812, "grad_norm": 0.09643730521202087, "learning_rate": 0.002, "loss": 2.3315, "step": 188630 }, { "epoch": 0.7292294846221645, "grad_norm": 0.12230101972818375, "learning_rate": 0.002, "loss": 2.3296, "step": 188640 }, { "epoch": 0.7292681418255478, "grad_norm": 0.096110999584198, "learning_rate": 0.002, "loss": 2.3314, "step": 188650 }, { "epoch": 0.729306799028931, "grad_norm": 0.10471905767917633, "learning_rate": 0.002, "loss": 2.341, "step": 188660 }, { "epoch": 0.7293454562323143, "grad_norm": 0.11107119917869568, "learning_rate": 0.002, "loss": 2.3503, "step": 188670 }, { "epoch": 0.7293841134356976, "grad_norm": 0.10437849909067154, "learning_rate": 0.002, "loss": 2.3493, "step": 188680 }, { "epoch": 0.7294227706390809, "grad_norm": 0.09436061233282089, "learning_rate": 0.002, "loss": 2.3664, "step": 188690 }, { "epoch": 0.7294614278424641, "grad_norm": 0.09973449259996414, "learning_rate": 0.002, "loss": 2.3345, "step": 188700 }, { "epoch": 0.7295000850458474, "grad_norm": 0.12083771079778671, "learning_rate": 0.002, "loss": 2.3331, "step": 188710 }, { "epoch": 0.7295387422492308, "grad_norm": 0.10498762875795364, "learning_rate": 0.002, "loss": 2.3433, "step": 188720 }, { "epoch": 0.729577399452614, "grad_norm": 0.10566692054271698, "learning_rate": 0.002, "loss": 2.333, "step": 188730 }, { "epoch": 0.7296160566559973, "grad_norm": 0.097226083278656, "learning_rate": 0.002, "loss": 2.3428, "step": 188740 }, { "epoch": 0.7296547138593805, "grad_norm": 0.12635381519794464, "learning_rate": 0.002, "loss": 2.3361, "step": 188750 }, { "epoch": 0.7296933710627639, "grad_norm": 0.09882877767086029, "learning_rate": 0.002, "loss": 2.3352, "step": 188760 }, { "epoch": 0.7297320282661471, "grad_norm": 0.1020134687423706, "learning_rate": 0.002, "loss": 2.3547, "step": 188770 }, { "epoch": 0.7297706854695304, "grad_norm": 0.09664612263441086, "learning_rate": 0.002, "loss": 2.3466, "step": 188780 }, { "epoch": 0.7298093426729136, "grad_norm": 0.11408518254756927, "learning_rate": 0.002, "loss": 2.3566, "step": 188790 }, { "epoch": 0.729847999876297, "grad_norm": 0.1196506917476654, "learning_rate": 0.002, "loss": 2.332, "step": 188800 }, { "epoch": 0.7298866570796803, "grad_norm": 0.10588350147008896, "learning_rate": 0.002, "loss": 2.3367, "step": 188810 }, { "epoch": 0.7299253142830635, "grad_norm": 0.10277391225099564, "learning_rate": 0.002, "loss": 2.3509, "step": 188820 }, { "epoch": 0.7299639714864468, "grad_norm": 0.09160996973514557, "learning_rate": 0.002, "loss": 2.3567, "step": 188830 }, { "epoch": 0.73000262868983, "grad_norm": 0.10029343515634537, "learning_rate": 0.002, "loss": 2.3515, "step": 188840 }, { "epoch": 0.7300412858932134, "grad_norm": 0.09637118130922318, "learning_rate": 0.002, "loss": 2.3442, "step": 188850 }, { "epoch": 0.7300799430965966, "grad_norm": 0.12911668419837952, "learning_rate": 0.002, "loss": 2.312, "step": 188860 }, { "epoch": 0.7301186002999799, "grad_norm": 0.0958036333322525, "learning_rate": 0.002, "loss": 2.3213, "step": 188870 }, { "epoch": 0.7301572575033631, "grad_norm": 0.10825416445732117, "learning_rate": 0.002, "loss": 2.3511, "step": 188880 }, { "epoch": 0.7301959147067465, "grad_norm": 0.1088646650314331, "learning_rate": 0.002, "loss": 2.3295, "step": 188890 }, { "epoch": 0.7302345719101297, "grad_norm": 0.10222936421632767, "learning_rate": 0.002, "loss": 2.3487, "step": 188900 }, { "epoch": 0.730273229113513, "grad_norm": 0.10391924530267715, "learning_rate": 0.002, "loss": 2.3507, "step": 188910 }, { "epoch": 0.7303118863168963, "grad_norm": 0.0968007892370224, "learning_rate": 0.002, "loss": 2.3418, "step": 188920 }, { "epoch": 0.7303505435202796, "grad_norm": 0.4386405944824219, "learning_rate": 0.002, "loss": 2.347, "step": 188930 }, { "epoch": 0.7303892007236629, "grad_norm": 0.12861132621765137, "learning_rate": 0.002, "loss": 2.354, "step": 188940 }, { "epoch": 0.7304278579270461, "grad_norm": 0.10842836648225784, "learning_rate": 0.002, "loss": 2.3369, "step": 188950 }, { "epoch": 0.7304665151304294, "grad_norm": 0.09640350192785263, "learning_rate": 0.002, "loss": 2.3456, "step": 188960 }, { "epoch": 0.7305051723338127, "grad_norm": 0.10740409791469574, "learning_rate": 0.002, "loss": 2.3284, "step": 188970 }, { "epoch": 0.730543829537196, "grad_norm": 0.09772370010614395, "learning_rate": 0.002, "loss": 2.337, "step": 188980 }, { "epoch": 0.7305824867405792, "grad_norm": 0.09943395853042603, "learning_rate": 0.002, "loss": 2.3477, "step": 188990 }, { "epoch": 0.7306211439439625, "grad_norm": 0.10708174854516983, "learning_rate": 0.002, "loss": 2.3474, "step": 189000 }, { "epoch": 0.7306598011473457, "grad_norm": 0.10401121526956558, "learning_rate": 0.002, "loss": 2.3586, "step": 189010 }, { "epoch": 0.7306984583507291, "grad_norm": 0.09652461111545563, "learning_rate": 0.002, "loss": 2.3399, "step": 189020 }, { "epoch": 0.7307371155541124, "grad_norm": 0.10575590282678604, "learning_rate": 0.002, "loss": 2.3324, "step": 189030 }, { "epoch": 0.7307757727574956, "grad_norm": 0.08972377330064774, "learning_rate": 0.002, "loss": 2.3398, "step": 189040 }, { "epoch": 0.7308144299608789, "grad_norm": 0.11111640930175781, "learning_rate": 0.002, "loss": 2.3482, "step": 189050 }, { "epoch": 0.7308530871642622, "grad_norm": 0.11301799863576889, "learning_rate": 0.002, "loss": 2.3442, "step": 189060 }, { "epoch": 0.7308917443676455, "grad_norm": 0.09813731163740158, "learning_rate": 0.002, "loss": 2.3504, "step": 189070 }, { "epoch": 0.7309304015710287, "grad_norm": 0.11718396842479706, "learning_rate": 0.002, "loss": 2.3373, "step": 189080 }, { "epoch": 0.730969058774412, "grad_norm": 0.10873469710350037, "learning_rate": 0.002, "loss": 2.3243, "step": 189090 }, { "epoch": 0.7310077159777953, "grad_norm": 0.0982939749956131, "learning_rate": 0.002, "loss": 2.3396, "step": 189100 }, { "epoch": 0.7310463731811786, "grad_norm": 0.09334085136651993, "learning_rate": 0.002, "loss": 2.3421, "step": 189110 }, { "epoch": 0.7310850303845619, "grad_norm": 0.10179479420185089, "learning_rate": 0.002, "loss": 2.3547, "step": 189120 }, { "epoch": 0.7311236875879451, "grad_norm": 0.0983622670173645, "learning_rate": 0.002, "loss": 2.3337, "step": 189130 }, { "epoch": 0.7311623447913285, "grad_norm": 0.1025642529129982, "learning_rate": 0.002, "loss": 2.3309, "step": 189140 }, { "epoch": 0.7312010019947117, "grad_norm": 0.10435685515403748, "learning_rate": 0.002, "loss": 2.3365, "step": 189150 }, { "epoch": 0.731239659198095, "grad_norm": 0.11407685279846191, "learning_rate": 0.002, "loss": 2.3557, "step": 189160 }, { "epoch": 0.7312783164014782, "grad_norm": 0.3479335606098175, "learning_rate": 0.002, "loss": 2.3413, "step": 189170 }, { "epoch": 0.7313169736048615, "grad_norm": 0.09884588420391083, "learning_rate": 0.002, "loss": 2.3578, "step": 189180 }, { "epoch": 0.7313556308082448, "grad_norm": 0.10729101300239563, "learning_rate": 0.002, "loss": 2.3466, "step": 189190 }, { "epoch": 0.7313942880116281, "grad_norm": 0.12231425940990448, "learning_rate": 0.002, "loss": 2.3479, "step": 189200 }, { "epoch": 0.7314329452150113, "grad_norm": 0.09892288595438004, "learning_rate": 0.002, "loss": 2.3315, "step": 189210 }, { "epoch": 0.7314716024183946, "grad_norm": 0.12404607981443405, "learning_rate": 0.002, "loss": 2.3417, "step": 189220 }, { "epoch": 0.731510259621778, "grad_norm": 0.09801378846168518, "learning_rate": 0.002, "loss": 2.3341, "step": 189230 }, { "epoch": 0.7315489168251612, "grad_norm": 0.10780314356088638, "learning_rate": 0.002, "loss": 2.3321, "step": 189240 }, { "epoch": 0.7315875740285445, "grad_norm": 0.1150708794593811, "learning_rate": 0.002, "loss": 2.3478, "step": 189250 }, { "epoch": 0.7316262312319277, "grad_norm": 0.100004181265831, "learning_rate": 0.002, "loss": 2.3497, "step": 189260 }, { "epoch": 0.7316648884353111, "grad_norm": 0.10322702676057816, "learning_rate": 0.002, "loss": 2.3551, "step": 189270 }, { "epoch": 0.7317035456386943, "grad_norm": 0.09638111293315887, "learning_rate": 0.002, "loss": 2.3538, "step": 189280 }, { "epoch": 0.7317422028420776, "grad_norm": 0.10771961510181427, "learning_rate": 0.002, "loss": 2.3514, "step": 189290 }, { "epoch": 0.7317808600454608, "grad_norm": 0.09627382457256317, "learning_rate": 0.002, "loss": 2.3393, "step": 189300 }, { "epoch": 0.7318195172488442, "grad_norm": 0.12090234458446503, "learning_rate": 0.002, "loss": 2.3408, "step": 189310 }, { "epoch": 0.7318581744522275, "grad_norm": 0.09437627345323563, "learning_rate": 0.002, "loss": 2.3515, "step": 189320 }, { "epoch": 0.7318968316556107, "grad_norm": 0.11243630945682526, "learning_rate": 0.002, "loss": 2.3448, "step": 189330 }, { "epoch": 0.731935488858994, "grad_norm": 0.11899004131555557, "learning_rate": 0.002, "loss": 2.3407, "step": 189340 }, { "epoch": 0.7319741460623773, "grad_norm": 0.10806789994239807, "learning_rate": 0.002, "loss": 2.3428, "step": 189350 }, { "epoch": 0.7320128032657606, "grad_norm": 0.11363356560468674, "learning_rate": 0.002, "loss": 2.3212, "step": 189360 }, { "epoch": 0.7320514604691438, "grad_norm": 0.1054205521941185, "learning_rate": 0.002, "loss": 2.3516, "step": 189370 }, { "epoch": 0.7320901176725271, "grad_norm": 0.12295114248991013, "learning_rate": 0.002, "loss": 2.3354, "step": 189380 }, { "epoch": 0.7321287748759103, "grad_norm": 0.0959784984588623, "learning_rate": 0.002, "loss": 2.3355, "step": 189390 }, { "epoch": 0.7321674320792937, "grad_norm": 0.09075594693422318, "learning_rate": 0.002, "loss": 2.3419, "step": 189400 }, { "epoch": 0.7322060892826769, "grad_norm": 0.10640782117843628, "learning_rate": 0.002, "loss": 2.3492, "step": 189410 }, { "epoch": 0.7322447464860602, "grad_norm": 0.11442548781633377, "learning_rate": 0.002, "loss": 2.3502, "step": 189420 }, { "epoch": 0.7322834036894434, "grad_norm": 0.0960836112499237, "learning_rate": 0.002, "loss": 2.3424, "step": 189430 }, { "epoch": 0.7323220608928268, "grad_norm": 0.1013663038611412, "learning_rate": 0.002, "loss": 2.3412, "step": 189440 }, { "epoch": 0.7323607180962101, "grad_norm": 0.1008801981806755, "learning_rate": 0.002, "loss": 2.3407, "step": 189450 }, { "epoch": 0.7323993752995933, "grad_norm": 0.10865923017263412, "learning_rate": 0.002, "loss": 2.3488, "step": 189460 }, { "epoch": 0.7324380325029766, "grad_norm": 0.10891637206077576, "learning_rate": 0.002, "loss": 2.3298, "step": 189470 }, { "epoch": 0.7324766897063599, "grad_norm": 0.12851367890834808, "learning_rate": 0.002, "loss": 2.3393, "step": 189480 }, { "epoch": 0.7325153469097432, "grad_norm": 0.09730476886034012, "learning_rate": 0.002, "loss": 2.3233, "step": 189490 }, { "epoch": 0.7325540041131264, "grad_norm": 0.11946532875299454, "learning_rate": 0.002, "loss": 2.3327, "step": 189500 }, { "epoch": 0.7325926613165097, "grad_norm": 0.10447601974010468, "learning_rate": 0.002, "loss": 2.3299, "step": 189510 }, { "epoch": 0.732631318519893, "grad_norm": 0.10364936292171478, "learning_rate": 0.002, "loss": 2.3465, "step": 189520 }, { "epoch": 0.7326699757232763, "grad_norm": 0.10739912837743759, "learning_rate": 0.002, "loss": 2.3576, "step": 189530 }, { "epoch": 0.7327086329266596, "grad_norm": 0.10711197555065155, "learning_rate": 0.002, "loss": 2.3275, "step": 189540 }, { "epoch": 0.7327472901300428, "grad_norm": 0.11326030641794205, "learning_rate": 0.002, "loss": 2.3353, "step": 189550 }, { "epoch": 0.7327859473334261, "grad_norm": 0.11343449354171753, "learning_rate": 0.002, "loss": 2.3357, "step": 189560 }, { "epoch": 0.7328246045368094, "grad_norm": 0.10510224103927612, "learning_rate": 0.002, "loss": 2.3569, "step": 189570 }, { "epoch": 0.7328632617401927, "grad_norm": 0.10465297102928162, "learning_rate": 0.002, "loss": 2.3418, "step": 189580 }, { "epoch": 0.7329019189435759, "grad_norm": 0.112489253282547, "learning_rate": 0.002, "loss": 2.3532, "step": 189590 }, { "epoch": 0.7329405761469592, "grad_norm": 0.09400103986263275, "learning_rate": 0.002, "loss": 2.3373, "step": 189600 }, { "epoch": 0.7329792333503425, "grad_norm": 0.10640868544578552, "learning_rate": 0.002, "loss": 2.3393, "step": 189610 }, { "epoch": 0.7330178905537258, "grad_norm": 0.11750825494527817, "learning_rate": 0.002, "loss": 2.3432, "step": 189620 }, { "epoch": 0.733056547757109, "grad_norm": 0.09786515682935715, "learning_rate": 0.002, "loss": 2.3357, "step": 189630 }, { "epoch": 0.7330952049604923, "grad_norm": 0.11627877503633499, "learning_rate": 0.002, "loss": 2.3342, "step": 189640 }, { "epoch": 0.7331338621638757, "grad_norm": 0.1656639128923416, "learning_rate": 0.002, "loss": 2.352, "step": 189650 }, { "epoch": 0.7331725193672589, "grad_norm": 0.10381890088319778, "learning_rate": 0.002, "loss": 2.3394, "step": 189660 }, { "epoch": 0.7332111765706422, "grad_norm": 0.11755353212356567, "learning_rate": 0.002, "loss": 2.3399, "step": 189670 }, { "epoch": 0.7332498337740254, "grad_norm": 0.09677081555128098, "learning_rate": 0.002, "loss": 2.359, "step": 189680 }, { "epoch": 0.7332884909774088, "grad_norm": 0.09234776347875595, "learning_rate": 0.002, "loss": 2.3364, "step": 189690 }, { "epoch": 0.733327148180792, "grad_norm": 0.10879413783550262, "learning_rate": 0.002, "loss": 2.343, "step": 189700 }, { "epoch": 0.7333658053841753, "grad_norm": 0.10317614674568176, "learning_rate": 0.002, "loss": 2.3297, "step": 189710 }, { "epoch": 0.7334044625875585, "grad_norm": 0.09623871743679047, "learning_rate": 0.002, "loss": 2.3454, "step": 189720 }, { "epoch": 0.7334431197909418, "grad_norm": 0.10473791509866714, "learning_rate": 0.002, "loss": 2.3497, "step": 189730 }, { "epoch": 0.7334817769943252, "grad_norm": 0.09343335032463074, "learning_rate": 0.002, "loss": 2.3354, "step": 189740 }, { "epoch": 0.7335204341977084, "grad_norm": 0.1115652471780777, "learning_rate": 0.002, "loss": 2.3491, "step": 189750 }, { "epoch": 0.7335590914010917, "grad_norm": 0.14288023114204407, "learning_rate": 0.002, "loss": 2.3409, "step": 189760 }, { "epoch": 0.7335977486044749, "grad_norm": 0.1096411868929863, "learning_rate": 0.002, "loss": 2.3391, "step": 189770 }, { "epoch": 0.7336364058078583, "grad_norm": 0.1012328639626503, "learning_rate": 0.002, "loss": 2.3435, "step": 189780 }, { "epoch": 0.7336750630112415, "grad_norm": 0.09999390691518784, "learning_rate": 0.002, "loss": 2.3307, "step": 189790 }, { "epoch": 0.7337137202146248, "grad_norm": 0.11927925050258636, "learning_rate": 0.002, "loss": 2.3494, "step": 189800 }, { "epoch": 0.733752377418008, "grad_norm": 0.11045157164335251, "learning_rate": 0.002, "loss": 2.3383, "step": 189810 }, { "epoch": 0.7337910346213914, "grad_norm": 0.10713623464107513, "learning_rate": 0.002, "loss": 2.3407, "step": 189820 }, { "epoch": 0.7338296918247746, "grad_norm": 0.0943923145532608, "learning_rate": 0.002, "loss": 2.3268, "step": 189830 }, { "epoch": 0.7338683490281579, "grad_norm": 0.09983383119106293, "learning_rate": 0.002, "loss": 2.334, "step": 189840 }, { "epoch": 0.7339070062315411, "grad_norm": 0.11707711219787598, "learning_rate": 0.002, "loss": 2.3474, "step": 189850 }, { "epoch": 0.7339456634349245, "grad_norm": 0.12364879250526428, "learning_rate": 0.002, "loss": 2.3319, "step": 189860 }, { "epoch": 0.7339843206383078, "grad_norm": 0.10379421710968018, "learning_rate": 0.002, "loss": 2.3357, "step": 189870 }, { "epoch": 0.734022977841691, "grad_norm": 0.0928567424416542, "learning_rate": 0.002, "loss": 2.3249, "step": 189880 }, { "epoch": 0.7340616350450743, "grad_norm": 0.1087023988366127, "learning_rate": 0.002, "loss": 2.3459, "step": 189890 }, { "epoch": 0.7341002922484576, "grad_norm": 0.0999482050538063, "learning_rate": 0.002, "loss": 2.3578, "step": 189900 }, { "epoch": 0.7341389494518409, "grad_norm": 0.0944395586848259, "learning_rate": 0.002, "loss": 2.3487, "step": 189910 }, { "epoch": 0.7341776066552241, "grad_norm": 0.09572398662567139, "learning_rate": 0.002, "loss": 2.3359, "step": 189920 }, { "epoch": 0.7342162638586074, "grad_norm": 0.09270896762609482, "learning_rate": 0.002, "loss": 2.3307, "step": 189930 }, { "epoch": 0.7342549210619906, "grad_norm": 0.09984765946865082, "learning_rate": 0.002, "loss": 2.3473, "step": 189940 }, { "epoch": 0.734293578265374, "grad_norm": 0.10290472954511642, "learning_rate": 0.002, "loss": 2.3411, "step": 189950 }, { "epoch": 0.7343322354687573, "grad_norm": 0.1093931496143341, "learning_rate": 0.002, "loss": 2.3521, "step": 189960 }, { "epoch": 0.7343708926721405, "grad_norm": 0.11136391758918762, "learning_rate": 0.002, "loss": 2.3476, "step": 189970 }, { "epoch": 0.7344095498755238, "grad_norm": 0.09859353303909302, "learning_rate": 0.002, "loss": 2.3312, "step": 189980 }, { "epoch": 0.7344482070789071, "grad_norm": 0.10707998275756836, "learning_rate": 0.002, "loss": 2.3376, "step": 189990 }, { "epoch": 0.7344868642822904, "grad_norm": 0.09826818108558655, "learning_rate": 0.002, "loss": 2.3446, "step": 190000 }, { "epoch": 0.7345255214856736, "grad_norm": 0.1676829308271408, "learning_rate": 0.002, "loss": 2.3479, "step": 190010 }, { "epoch": 0.7345641786890569, "grad_norm": 0.1222255527973175, "learning_rate": 0.002, "loss": 2.3377, "step": 190020 }, { "epoch": 0.7346028358924402, "grad_norm": 0.08716561645269394, "learning_rate": 0.002, "loss": 2.339, "step": 190030 }, { "epoch": 0.7346414930958235, "grad_norm": 0.10340414196252823, "learning_rate": 0.002, "loss": 2.351, "step": 190040 }, { "epoch": 0.7346801502992067, "grad_norm": 0.1073414757847786, "learning_rate": 0.002, "loss": 2.3486, "step": 190050 }, { "epoch": 0.73471880750259, "grad_norm": 0.11026755720376968, "learning_rate": 0.002, "loss": 2.3492, "step": 190060 }, { "epoch": 0.7347574647059734, "grad_norm": 0.12019442766904831, "learning_rate": 0.002, "loss": 2.3415, "step": 190070 }, { "epoch": 0.7347961219093566, "grad_norm": 0.10721049457788467, "learning_rate": 0.002, "loss": 2.3408, "step": 190080 }, { "epoch": 0.7348347791127399, "grad_norm": 0.10024671256542206, "learning_rate": 0.002, "loss": 2.3378, "step": 190090 }, { "epoch": 0.7348734363161231, "grad_norm": 0.09239985793828964, "learning_rate": 0.002, "loss": 2.3374, "step": 190100 }, { "epoch": 0.7349120935195064, "grad_norm": 0.09348446875810623, "learning_rate": 0.002, "loss": 2.3462, "step": 190110 }, { "epoch": 0.7349507507228897, "grad_norm": 0.11527527123689651, "learning_rate": 0.002, "loss": 2.3453, "step": 190120 }, { "epoch": 0.734989407926273, "grad_norm": 0.10688439756631851, "learning_rate": 0.002, "loss": 2.3477, "step": 190130 }, { "epoch": 0.7350280651296562, "grad_norm": 0.10173143446445465, "learning_rate": 0.002, "loss": 2.3415, "step": 190140 }, { "epoch": 0.7350667223330395, "grad_norm": 0.09939683228731155, "learning_rate": 0.002, "loss": 2.3432, "step": 190150 }, { "epoch": 0.7351053795364229, "grad_norm": 0.10313131660223007, "learning_rate": 0.002, "loss": 2.3382, "step": 190160 }, { "epoch": 0.7351440367398061, "grad_norm": 0.09282536804676056, "learning_rate": 0.002, "loss": 2.3142, "step": 190170 }, { "epoch": 0.7351826939431894, "grad_norm": 0.09631568938493729, "learning_rate": 0.002, "loss": 2.3468, "step": 190180 }, { "epoch": 0.7352213511465726, "grad_norm": 0.09160216152667999, "learning_rate": 0.002, "loss": 2.3125, "step": 190190 }, { "epoch": 0.735260008349956, "grad_norm": 0.10035625845193863, "learning_rate": 0.002, "loss": 2.3475, "step": 190200 }, { "epoch": 0.7352986655533392, "grad_norm": 0.10442013293504715, "learning_rate": 0.002, "loss": 2.3372, "step": 190210 }, { "epoch": 0.7353373227567225, "grad_norm": 0.10213469713926315, "learning_rate": 0.002, "loss": 2.3488, "step": 190220 }, { "epoch": 0.7353759799601057, "grad_norm": 0.11790081113576889, "learning_rate": 0.002, "loss": 2.3361, "step": 190230 }, { "epoch": 0.7354146371634891, "grad_norm": 0.10916659981012344, "learning_rate": 0.002, "loss": 2.3406, "step": 190240 }, { "epoch": 0.7354532943668723, "grad_norm": 0.11207661777734756, "learning_rate": 0.002, "loss": 2.3434, "step": 190250 }, { "epoch": 0.7354919515702556, "grad_norm": 0.10120797902345657, "learning_rate": 0.002, "loss": 2.3423, "step": 190260 }, { "epoch": 0.7355306087736388, "grad_norm": 0.11318664997816086, "learning_rate": 0.002, "loss": 2.341, "step": 190270 }, { "epoch": 0.7355692659770222, "grad_norm": 0.09600941836833954, "learning_rate": 0.002, "loss": 2.3369, "step": 190280 }, { "epoch": 0.7356079231804055, "grad_norm": 0.1251683384180069, "learning_rate": 0.002, "loss": 2.3451, "step": 190290 }, { "epoch": 0.7356465803837887, "grad_norm": 0.09991093724966049, "learning_rate": 0.002, "loss": 2.344, "step": 190300 }, { "epoch": 0.735685237587172, "grad_norm": 0.0951126366853714, "learning_rate": 0.002, "loss": 2.3364, "step": 190310 }, { "epoch": 0.7357238947905552, "grad_norm": 0.11469744145870209, "learning_rate": 0.002, "loss": 2.3458, "step": 190320 }, { "epoch": 0.7357625519939386, "grad_norm": 0.10002505034208298, "learning_rate": 0.002, "loss": 2.3499, "step": 190330 }, { "epoch": 0.7358012091973218, "grad_norm": 0.10277873277664185, "learning_rate": 0.002, "loss": 2.3557, "step": 190340 }, { "epoch": 0.7358398664007051, "grad_norm": 0.11609012633562088, "learning_rate": 0.002, "loss": 2.3303, "step": 190350 }, { "epoch": 0.7358785236040883, "grad_norm": 0.2405710369348526, "learning_rate": 0.002, "loss": 2.3399, "step": 190360 }, { "epoch": 0.7359171808074717, "grad_norm": 0.1104801669716835, "learning_rate": 0.002, "loss": 2.344, "step": 190370 }, { "epoch": 0.735955838010855, "grad_norm": 0.11027061939239502, "learning_rate": 0.002, "loss": 2.355, "step": 190380 }, { "epoch": 0.7359944952142382, "grad_norm": 0.09764520823955536, "learning_rate": 0.002, "loss": 2.3288, "step": 190390 }, { "epoch": 0.7360331524176215, "grad_norm": 0.10245082527399063, "learning_rate": 0.002, "loss": 2.3502, "step": 190400 }, { "epoch": 0.7360718096210048, "grad_norm": 0.09403353184461594, "learning_rate": 0.002, "loss": 2.3374, "step": 190410 }, { "epoch": 0.7361104668243881, "grad_norm": 0.11879895627498627, "learning_rate": 0.002, "loss": 2.3326, "step": 190420 }, { "epoch": 0.7361491240277713, "grad_norm": 0.12122533470392227, "learning_rate": 0.002, "loss": 2.3582, "step": 190430 }, { "epoch": 0.7361877812311546, "grad_norm": 0.1289253979921341, "learning_rate": 0.002, "loss": 2.3407, "step": 190440 }, { "epoch": 0.7362264384345379, "grad_norm": 0.12489252537488937, "learning_rate": 0.002, "loss": 2.3455, "step": 190450 }, { "epoch": 0.7362650956379212, "grad_norm": 0.11175944656133652, "learning_rate": 0.002, "loss": 2.3554, "step": 190460 }, { "epoch": 0.7363037528413044, "grad_norm": 0.09459386765956879, "learning_rate": 0.002, "loss": 2.3429, "step": 190470 }, { "epoch": 0.7363424100446877, "grad_norm": 0.1060258150100708, "learning_rate": 0.002, "loss": 2.3426, "step": 190480 }, { "epoch": 0.736381067248071, "grad_norm": 0.10711876302957535, "learning_rate": 0.002, "loss": 2.3361, "step": 190490 }, { "epoch": 0.7364197244514543, "grad_norm": 0.10872458666563034, "learning_rate": 0.002, "loss": 2.352, "step": 190500 }, { "epoch": 0.7364583816548376, "grad_norm": 0.10507549345493317, "learning_rate": 0.002, "loss": 2.3164, "step": 190510 }, { "epoch": 0.7364970388582208, "grad_norm": 0.10001684725284576, "learning_rate": 0.002, "loss": 2.3409, "step": 190520 }, { "epoch": 0.7365356960616041, "grad_norm": 0.13420626521110535, "learning_rate": 0.002, "loss": 2.3391, "step": 190530 }, { "epoch": 0.7365743532649874, "grad_norm": 0.11179409176111221, "learning_rate": 0.002, "loss": 2.3311, "step": 190540 }, { "epoch": 0.7366130104683707, "grad_norm": 0.2960387170314789, "learning_rate": 0.002, "loss": 2.3203, "step": 190550 }, { "epoch": 0.7366516676717539, "grad_norm": 0.09838057309389114, "learning_rate": 0.002, "loss": 2.3375, "step": 190560 }, { "epoch": 0.7366903248751372, "grad_norm": 0.11178871989250183, "learning_rate": 0.002, "loss": 2.3353, "step": 190570 }, { "epoch": 0.7367289820785206, "grad_norm": 0.10592333227396011, "learning_rate": 0.002, "loss": 2.3306, "step": 190580 }, { "epoch": 0.7367676392819038, "grad_norm": 0.09014479070901871, "learning_rate": 0.002, "loss": 2.3428, "step": 190590 }, { "epoch": 0.7368062964852871, "grad_norm": 0.10933726280927658, "learning_rate": 0.002, "loss": 2.3428, "step": 190600 }, { "epoch": 0.7368449536886703, "grad_norm": 0.09692629426717758, "learning_rate": 0.002, "loss": 2.3472, "step": 190610 }, { "epoch": 0.7368836108920537, "grad_norm": 0.11744661629199982, "learning_rate": 0.002, "loss": 2.3236, "step": 190620 }, { "epoch": 0.7369222680954369, "grad_norm": 0.10347847640514374, "learning_rate": 0.002, "loss": 2.3451, "step": 190630 }, { "epoch": 0.7369609252988202, "grad_norm": 0.11454054713249207, "learning_rate": 0.002, "loss": 2.3503, "step": 190640 }, { "epoch": 0.7369995825022034, "grad_norm": 0.11765061318874359, "learning_rate": 0.002, "loss": 2.3444, "step": 190650 }, { "epoch": 0.7370382397055867, "grad_norm": 0.10797842592000961, "learning_rate": 0.002, "loss": 2.3331, "step": 190660 }, { "epoch": 0.73707689690897, "grad_norm": 0.10087337344884872, "learning_rate": 0.002, "loss": 2.3539, "step": 190670 }, { "epoch": 0.7371155541123533, "grad_norm": 0.12900900840759277, "learning_rate": 0.002, "loss": 2.3407, "step": 190680 }, { "epoch": 0.7371542113157366, "grad_norm": 0.11788572371006012, "learning_rate": 0.002, "loss": 2.3365, "step": 190690 }, { "epoch": 0.7371928685191198, "grad_norm": 0.09947472810745239, "learning_rate": 0.002, "loss": 2.3271, "step": 190700 }, { "epoch": 0.7372315257225032, "grad_norm": 0.13462482392787933, "learning_rate": 0.002, "loss": 2.3472, "step": 190710 }, { "epoch": 0.7372701829258864, "grad_norm": 0.0978013277053833, "learning_rate": 0.002, "loss": 2.3433, "step": 190720 }, { "epoch": 0.7373088401292697, "grad_norm": 0.11396888643503189, "learning_rate": 0.002, "loss": 2.3536, "step": 190730 }, { "epoch": 0.7373474973326529, "grad_norm": 0.11444772034883499, "learning_rate": 0.002, "loss": 2.342, "step": 190740 }, { "epoch": 0.7373861545360363, "grad_norm": 0.09565328806638718, "learning_rate": 0.002, "loss": 2.3317, "step": 190750 }, { "epoch": 0.7374248117394195, "grad_norm": 0.09409163147211075, "learning_rate": 0.002, "loss": 2.3563, "step": 190760 }, { "epoch": 0.7374634689428028, "grad_norm": 0.09765222668647766, "learning_rate": 0.002, "loss": 2.3413, "step": 190770 }, { "epoch": 0.737502126146186, "grad_norm": 0.11007392406463623, "learning_rate": 0.002, "loss": 2.3352, "step": 190780 }, { "epoch": 0.7375407833495694, "grad_norm": 0.10825943946838379, "learning_rate": 0.002, "loss": 2.3433, "step": 190790 }, { "epoch": 0.7375794405529527, "grad_norm": 0.255696177482605, "learning_rate": 0.002, "loss": 2.3495, "step": 190800 }, { "epoch": 0.7376180977563359, "grad_norm": 0.10646931082010269, "learning_rate": 0.002, "loss": 2.3537, "step": 190810 }, { "epoch": 0.7376567549597192, "grad_norm": 0.09007599949836731, "learning_rate": 0.002, "loss": 2.3387, "step": 190820 }, { "epoch": 0.7376954121631025, "grad_norm": 0.10881149023771286, "learning_rate": 0.002, "loss": 2.352, "step": 190830 }, { "epoch": 0.7377340693664858, "grad_norm": 0.08729346096515656, "learning_rate": 0.002, "loss": 2.3521, "step": 190840 }, { "epoch": 0.737772726569869, "grad_norm": 0.10455001890659332, "learning_rate": 0.002, "loss": 2.3449, "step": 190850 }, { "epoch": 0.7378113837732523, "grad_norm": 0.11487884074449539, "learning_rate": 0.002, "loss": 2.3443, "step": 190860 }, { "epoch": 0.7378500409766355, "grad_norm": 0.10313017666339874, "learning_rate": 0.002, "loss": 2.3325, "step": 190870 }, { "epoch": 0.7378886981800189, "grad_norm": 0.10160215198993683, "learning_rate": 0.002, "loss": 2.3307, "step": 190880 }, { "epoch": 0.7379273553834022, "grad_norm": 0.0965922549366951, "learning_rate": 0.002, "loss": 2.3324, "step": 190890 }, { "epoch": 0.7379660125867854, "grad_norm": 0.10829756408929825, "learning_rate": 0.002, "loss": 2.3462, "step": 190900 }, { "epoch": 0.7380046697901687, "grad_norm": 0.10519444942474365, "learning_rate": 0.002, "loss": 2.3427, "step": 190910 }, { "epoch": 0.738043326993552, "grad_norm": 0.1222805604338646, "learning_rate": 0.002, "loss": 2.3566, "step": 190920 }, { "epoch": 0.7380819841969353, "grad_norm": 0.11346898972988129, "learning_rate": 0.002, "loss": 2.3394, "step": 190930 }, { "epoch": 0.7381206414003185, "grad_norm": 0.10387032479047775, "learning_rate": 0.002, "loss": 2.3481, "step": 190940 }, { "epoch": 0.7381592986037018, "grad_norm": 0.10021762549877167, "learning_rate": 0.002, "loss": 2.3582, "step": 190950 }, { "epoch": 0.7381979558070851, "grad_norm": 0.10510313510894775, "learning_rate": 0.002, "loss": 2.3352, "step": 190960 }, { "epoch": 0.7382366130104684, "grad_norm": 0.09277694672346115, "learning_rate": 0.002, "loss": 2.3465, "step": 190970 }, { "epoch": 0.7382752702138516, "grad_norm": 0.11668671667575836, "learning_rate": 0.002, "loss": 2.3448, "step": 190980 }, { "epoch": 0.7383139274172349, "grad_norm": 0.11169299483299255, "learning_rate": 0.002, "loss": 2.3383, "step": 190990 }, { "epoch": 0.7383525846206183, "grad_norm": 0.09932884573936462, "learning_rate": 0.002, "loss": 2.3356, "step": 191000 }, { "epoch": 0.7383912418240015, "grad_norm": 0.09709254652261734, "learning_rate": 0.002, "loss": 2.3432, "step": 191010 }, { "epoch": 0.7384298990273848, "grad_norm": 0.08417420834302902, "learning_rate": 0.002, "loss": 2.3346, "step": 191020 }, { "epoch": 0.738468556230768, "grad_norm": 0.10885326564311981, "learning_rate": 0.002, "loss": 2.3323, "step": 191030 }, { "epoch": 0.7385072134341513, "grad_norm": 0.09797409176826477, "learning_rate": 0.002, "loss": 2.3401, "step": 191040 }, { "epoch": 0.7385458706375346, "grad_norm": 0.09689269214868546, "learning_rate": 0.002, "loss": 2.3458, "step": 191050 }, { "epoch": 0.7385845278409179, "grad_norm": 0.11540959030389786, "learning_rate": 0.002, "loss": 2.3251, "step": 191060 }, { "epoch": 0.7386231850443011, "grad_norm": 0.10804573446512222, "learning_rate": 0.002, "loss": 2.348, "step": 191070 }, { "epoch": 0.7386618422476844, "grad_norm": 0.11262696981430054, "learning_rate": 0.002, "loss": 2.3403, "step": 191080 }, { "epoch": 0.7387004994510677, "grad_norm": 0.09762831032276154, "learning_rate": 0.002, "loss": 2.3328, "step": 191090 }, { "epoch": 0.738739156654451, "grad_norm": 0.10504738986492157, "learning_rate": 0.002, "loss": 2.3562, "step": 191100 }, { "epoch": 0.7387778138578343, "grad_norm": 0.0954015776515007, "learning_rate": 0.002, "loss": 2.3367, "step": 191110 }, { "epoch": 0.7388164710612175, "grad_norm": 0.10296234488487244, "learning_rate": 0.002, "loss": 2.3326, "step": 191120 }, { "epoch": 0.7388551282646009, "grad_norm": 0.12148667871952057, "learning_rate": 0.002, "loss": 2.3365, "step": 191130 }, { "epoch": 0.7388937854679841, "grad_norm": 0.09568410366773605, "learning_rate": 0.002, "loss": 2.3559, "step": 191140 }, { "epoch": 0.7389324426713674, "grad_norm": 0.09313368797302246, "learning_rate": 0.002, "loss": 2.3451, "step": 191150 }, { "epoch": 0.7389710998747506, "grad_norm": 0.09592664986848831, "learning_rate": 0.002, "loss": 2.3352, "step": 191160 }, { "epoch": 0.739009757078134, "grad_norm": 0.0917651429772377, "learning_rate": 0.002, "loss": 2.3531, "step": 191170 }, { "epoch": 0.7390484142815172, "grad_norm": 0.09378854930400848, "learning_rate": 0.002, "loss": 2.3376, "step": 191180 }, { "epoch": 0.7390870714849005, "grad_norm": 0.1044289618730545, "learning_rate": 0.002, "loss": 2.334, "step": 191190 }, { "epoch": 0.7391257286882837, "grad_norm": 0.10949849337339401, "learning_rate": 0.002, "loss": 2.3449, "step": 191200 }, { "epoch": 0.7391643858916671, "grad_norm": 0.12043566256761551, "learning_rate": 0.002, "loss": 2.3285, "step": 191210 }, { "epoch": 0.7392030430950504, "grad_norm": 0.09803483635187149, "learning_rate": 0.002, "loss": 2.3379, "step": 191220 }, { "epoch": 0.7392417002984336, "grad_norm": 0.09884842485189438, "learning_rate": 0.002, "loss": 2.3476, "step": 191230 }, { "epoch": 0.7392803575018169, "grad_norm": 0.10830123722553253, "learning_rate": 0.002, "loss": 2.3284, "step": 191240 }, { "epoch": 0.7393190147052001, "grad_norm": 0.10305802524089813, "learning_rate": 0.002, "loss": 2.3331, "step": 191250 }, { "epoch": 0.7393576719085835, "grad_norm": 0.09698376804590225, "learning_rate": 0.002, "loss": 2.3403, "step": 191260 }, { "epoch": 0.7393963291119667, "grad_norm": 0.11778568476438522, "learning_rate": 0.002, "loss": 2.34, "step": 191270 }, { "epoch": 0.73943498631535, "grad_norm": 0.10067816823720932, "learning_rate": 0.002, "loss": 2.3377, "step": 191280 }, { "epoch": 0.7394736435187332, "grad_norm": 0.0829603299498558, "learning_rate": 0.002, "loss": 2.334, "step": 191290 }, { "epoch": 0.7395123007221166, "grad_norm": 0.1029440388083458, "learning_rate": 0.002, "loss": 2.3506, "step": 191300 }, { "epoch": 0.7395509579254999, "grad_norm": 0.1000521332025528, "learning_rate": 0.002, "loss": 2.3469, "step": 191310 }, { "epoch": 0.7395896151288831, "grad_norm": 0.09940711408853531, "learning_rate": 0.002, "loss": 2.3367, "step": 191320 }, { "epoch": 0.7396282723322664, "grad_norm": 0.10901033133268356, "learning_rate": 0.002, "loss": 2.3338, "step": 191330 }, { "epoch": 0.7396669295356497, "grad_norm": 0.10165905952453613, "learning_rate": 0.002, "loss": 2.3284, "step": 191340 }, { "epoch": 0.739705586739033, "grad_norm": 0.09632701426744461, "learning_rate": 0.002, "loss": 2.3332, "step": 191350 }, { "epoch": 0.7397442439424162, "grad_norm": 0.0995948389172554, "learning_rate": 0.002, "loss": 2.3486, "step": 191360 }, { "epoch": 0.7397829011457995, "grad_norm": 0.11200392991304398, "learning_rate": 0.002, "loss": 2.3438, "step": 191370 }, { "epoch": 0.7398215583491828, "grad_norm": 0.10719288140535355, "learning_rate": 0.002, "loss": 2.3273, "step": 191380 }, { "epoch": 0.7398602155525661, "grad_norm": 0.11133550107479095, "learning_rate": 0.002, "loss": 2.3374, "step": 191390 }, { "epoch": 0.7398988727559493, "grad_norm": 0.10408426076173782, "learning_rate": 0.002, "loss": 2.3382, "step": 191400 }, { "epoch": 0.7399375299593326, "grad_norm": 0.09851880371570587, "learning_rate": 0.002, "loss": 2.3444, "step": 191410 }, { "epoch": 0.7399761871627158, "grad_norm": 0.11316296458244324, "learning_rate": 0.002, "loss": 2.3429, "step": 191420 }, { "epoch": 0.7400148443660992, "grad_norm": 0.11002680659294128, "learning_rate": 0.002, "loss": 2.3372, "step": 191430 }, { "epoch": 0.7400535015694825, "grad_norm": 0.11452383548021317, "learning_rate": 0.002, "loss": 2.3356, "step": 191440 }, { "epoch": 0.7400921587728657, "grad_norm": 0.10038605332374573, "learning_rate": 0.002, "loss": 2.3426, "step": 191450 }, { "epoch": 0.740130815976249, "grad_norm": 0.10871243476867676, "learning_rate": 0.002, "loss": 2.3476, "step": 191460 }, { "epoch": 0.7401694731796323, "grad_norm": 0.10097894817590714, "learning_rate": 0.002, "loss": 2.3505, "step": 191470 }, { "epoch": 0.7402081303830156, "grad_norm": 0.09067437052726746, "learning_rate": 0.002, "loss": 2.3304, "step": 191480 }, { "epoch": 0.7402467875863988, "grad_norm": 0.10413364320993423, "learning_rate": 0.002, "loss": 2.346, "step": 191490 }, { "epoch": 0.7402854447897821, "grad_norm": 0.09894074499607086, "learning_rate": 0.002, "loss": 2.3401, "step": 191500 }, { "epoch": 0.7403241019931655, "grad_norm": 0.10216416418552399, "learning_rate": 0.002, "loss": 2.3362, "step": 191510 }, { "epoch": 0.7403627591965487, "grad_norm": 0.1033996120095253, "learning_rate": 0.002, "loss": 2.3447, "step": 191520 }, { "epoch": 0.740401416399932, "grad_norm": 0.09722767025232315, "learning_rate": 0.002, "loss": 2.3367, "step": 191530 }, { "epoch": 0.7404400736033152, "grad_norm": 0.11290927231311798, "learning_rate": 0.002, "loss": 2.3437, "step": 191540 }, { "epoch": 0.7404787308066986, "grad_norm": 0.09460388869047165, "learning_rate": 0.002, "loss": 2.3492, "step": 191550 }, { "epoch": 0.7405173880100818, "grad_norm": 0.11431378871202469, "learning_rate": 0.002, "loss": 2.3386, "step": 191560 }, { "epoch": 0.7405560452134651, "grad_norm": 0.10955523699522018, "learning_rate": 0.002, "loss": 2.3415, "step": 191570 }, { "epoch": 0.7405947024168483, "grad_norm": 0.1201447993516922, "learning_rate": 0.002, "loss": 2.3353, "step": 191580 }, { "epoch": 0.7406333596202316, "grad_norm": 0.10241043567657471, "learning_rate": 0.002, "loss": 2.3508, "step": 191590 }, { "epoch": 0.7406720168236149, "grad_norm": 0.10200318694114685, "learning_rate": 0.002, "loss": 2.3293, "step": 191600 }, { "epoch": 0.7407106740269982, "grad_norm": 0.09642831981182098, "learning_rate": 0.002, "loss": 2.3478, "step": 191610 }, { "epoch": 0.7407493312303814, "grad_norm": 0.09920842945575714, "learning_rate": 0.002, "loss": 2.3369, "step": 191620 }, { "epoch": 0.7407879884337647, "grad_norm": 0.13008153438568115, "learning_rate": 0.002, "loss": 2.3504, "step": 191630 }, { "epoch": 0.7408266456371481, "grad_norm": 0.10366988182067871, "learning_rate": 0.002, "loss": 2.3463, "step": 191640 }, { "epoch": 0.7408653028405313, "grad_norm": 0.1006745919585228, "learning_rate": 0.002, "loss": 2.3294, "step": 191650 }, { "epoch": 0.7409039600439146, "grad_norm": 0.10098261386156082, "learning_rate": 0.002, "loss": 2.3351, "step": 191660 }, { "epoch": 0.7409426172472978, "grad_norm": 0.09893878549337387, "learning_rate": 0.002, "loss": 2.3495, "step": 191670 }, { "epoch": 0.7409812744506812, "grad_norm": 0.10766490548849106, "learning_rate": 0.002, "loss": 2.3493, "step": 191680 }, { "epoch": 0.7410199316540644, "grad_norm": 0.10770270973443985, "learning_rate": 0.002, "loss": 2.3267, "step": 191690 }, { "epoch": 0.7410585888574477, "grad_norm": 0.10260528326034546, "learning_rate": 0.002, "loss": 2.3526, "step": 191700 }, { "epoch": 0.7410972460608309, "grad_norm": 0.10132882744073868, "learning_rate": 0.002, "loss": 2.3413, "step": 191710 }, { "epoch": 0.7411359032642143, "grad_norm": 0.11207374930381775, "learning_rate": 0.002, "loss": 2.3476, "step": 191720 }, { "epoch": 0.7411745604675976, "grad_norm": 0.11558990180492401, "learning_rate": 0.002, "loss": 2.3393, "step": 191730 }, { "epoch": 0.7412132176709808, "grad_norm": 0.09880539029836655, "learning_rate": 0.002, "loss": 2.3438, "step": 191740 }, { "epoch": 0.7412518748743641, "grad_norm": 0.09084846824407578, "learning_rate": 0.002, "loss": 2.3245, "step": 191750 }, { "epoch": 0.7412905320777474, "grad_norm": 0.10979164391756058, "learning_rate": 0.002, "loss": 2.348, "step": 191760 }, { "epoch": 0.7413291892811307, "grad_norm": 0.10676354914903641, "learning_rate": 0.002, "loss": 2.3488, "step": 191770 }, { "epoch": 0.7413678464845139, "grad_norm": 0.12359759211540222, "learning_rate": 0.002, "loss": 2.3304, "step": 191780 }, { "epoch": 0.7414065036878972, "grad_norm": 0.08710779994726181, "learning_rate": 0.002, "loss": 2.3439, "step": 191790 }, { "epoch": 0.7414451608912804, "grad_norm": 0.09951417148113251, "learning_rate": 0.002, "loss": 2.3416, "step": 191800 }, { "epoch": 0.7414838180946638, "grad_norm": 0.09909050911664963, "learning_rate": 0.002, "loss": 2.3339, "step": 191810 }, { "epoch": 0.741522475298047, "grad_norm": 0.09301384538412094, "learning_rate": 0.002, "loss": 2.3387, "step": 191820 }, { "epoch": 0.7415611325014303, "grad_norm": 0.10332369804382324, "learning_rate": 0.002, "loss": 2.3373, "step": 191830 }, { "epoch": 0.7415997897048136, "grad_norm": 0.11144154518842697, "learning_rate": 0.002, "loss": 2.3375, "step": 191840 }, { "epoch": 0.7416384469081969, "grad_norm": 0.11473096907138824, "learning_rate": 0.002, "loss": 2.33, "step": 191850 }, { "epoch": 0.7416771041115802, "grad_norm": 0.09674740582704544, "learning_rate": 0.002, "loss": 2.3385, "step": 191860 }, { "epoch": 0.7417157613149634, "grad_norm": 0.10228494554758072, "learning_rate": 0.002, "loss": 2.3437, "step": 191870 }, { "epoch": 0.7417544185183467, "grad_norm": 0.08817991614341736, "learning_rate": 0.002, "loss": 2.3353, "step": 191880 }, { "epoch": 0.74179307572173, "grad_norm": 0.10097159445285797, "learning_rate": 0.002, "loss": 2.3349, "step": 191890 }, { "epoch": 0.7418317329251133, "grad_norm": 0.09437946230173111, "learning_rate": 0.002, "loss": 2.3387, "step": 191900 }, { "epoch": 0.7418703901284965, "grad_norm": 0.38441652059555054, "learning_rate": 0.002, "loss": 2.3451, "step": 191910 }, { "epoch": 0.7419090473318798, "grad_norm": 0.112345851957798, "learning_rate": 0.002, "loss": 2.3332, "step": 191920 }, { "epoch": 0.7419477045352632, "grad_norm": 0.09058287739753723, "learning_rate": 0.002, "loss": 2.3417, "step": 191930 }, { "epoch": 0.7419863617386464, "grad_norm": 0.11062506586313248, "learning_rate": 0.002, "loss": 2.3517, "step": 191940 }, { "epoch": 0.7420250189420297, "grad_norm": 0.10590583086013794, "learning_rate": 0.002, "loss": 2.3513, "step": 191950 }, { "epoch": 0.7420636761454129, "grad_norm": 0.11020754277706146, "learning_rate": 0.002, "loss": 2.3319, "step": 191960 }, { "epoch": 0.7421023333487962, "grad_norm": 0.09555596858263016, "learning_rate": 0.002, "loss": 2.3223, "step": 191970 }, { "epoch": 0.7421409905521795, "grad_norm": 0.08776164799928665, "learning_rate": 0.002, "loss": 2.3389, "step": 191980 }, { "epoch": 0.7421796477555628, "grad_norm": 0.10907012969255447, "learning_rate": 0.002, "loss": 2.334, "step": 191990 }, { "epoch": 0.742218304958946, "grad_norm": 0.11223597079515457, "learning_rate": 0.002, "loss": 2.3269, "step": 192000 }, { "epoch": 0.7422569621623293, "grad_norm": 0.10648026317358017, "learning_rate": 0.002, "loss": 2.3394, "step": 192010 }, { "epoch": 0.7422956193657126, "grad_norm": 0.09623145312070847, "learning_rate": 0.002, "loss": 2.3417, "step": 192020 }, { "epoch": 0.7423342765690959, "grad_norm": 0.09438787400722504, "learning_rate": 0.002, "loss": 2.3441, "step": 192030 }, { "epoch": 0.7423729337724791, "grad_norm": 0.1130327582359314, "learning_rate": 0.002, "loss": 2.3481, "step": 192040 }, { "epoch": 0.7424115909758624, "grad_norm": 0.10181952267885208, "learning_rate": 0.002, "loss": 2.3563, "step": 192050 }, { "epoch": 0.7424502481792458, "grad_norm": 0.09615079313516617, "learning_rate": 0.002, "loss": 2.341, "step": 192060 }, { "epoch": 0.742488905382629, "grad_norm": 0.122190922498703, "learning_rate": 0.002, "loss": 2.3366, "step": 192070 }, { "epoch": 0.7425275625860123, "grad_norm": 0.09345017373561859, "learning_rate": 0.002, "loss": 2.3338, "step": 192080 }, { "epoch": 0.7425662197893955, "grad_norm": 0.11099596321582794, "learning_rate": 0.002, "loss": 2.3579, "step": 192090 }, { "epoch": 0.7426048769927789, "grad_norm": 0.10582207888364792, "learning_rate": 0.002, "loss": 2.3356, "step": 192100 }, { "epoch": 0.7426435341961621, "grad_norm": 0.1213928833603859, "learning_rate": 0.002, "loss": 2.3361, "step": 192110 }, { "epoch": 0.7426821913995454, "grad_norm": 0.11554766446352005, "learning_rate": 0.002, "loss": 2.3403, "step": 192120 }, { "epoch": 0.7427208486029286, "grad_norm": 0.10213926434516907, "learning_rate": 0.002, "loss": 2.3347, "step": 192130 }, { "epoch": 0.7427595058063119, "grad_norm": 0.09706184267997742, "learning_rate": 0.002, "loss": 2.3383, "step": 192140 }, { "epoch": 0.7427981630096953, "grad_norm": 0.10466006398200989, "learning_rate": 0.002, "loss": 2.3426, "step": 192150 }, { "epoch": 0.7428368202130785, "grad_norm": 0.1048019751906395, "learning_rate": 0.002, "loss": 2.3383, "step": 192160 }, { "epoch": 0.7428754774164618, "grad_norm": 0.10146649181842804, "learning_rate": 0.002, "loss": 2.3189, "step": 192170 }, { "epoch": 0.742914134619845, "grad_norm": 0.09698516130447388, "learning_rate": 0.002, "loss": 2.3572, "step": 192180 }, { "epoch": 0.7429527918232284, "grad_norm": 0.10559068620204926, "learning_rate": 0.002, "loss": 2.3398, "step": 192190 }, { "epoch": 0.7429914490266116, "grad_norm": 0.10536423325538635, "learning_rate": 0.002, "loss": 2.3438, "step": 192200 }, { "epoch": 0.7430301062299949, "grad_norm": 0.09452679008245468, "learning_rate": 0.002, "loss": 2.3381, "step": 192210 }, { "epoch": 0.7430687634333781, "grad_norm": 0.10646776854991913, "learning_rate": 0.002, "loss": 2.3383, "step": 192220 }, { "epoch": 0.7431074206367615, "grad_norm": 0.10824066400527954, "learning_rate": 0.002, "loss": 2.3463, "step": 192230 }, { "epoch": 0.7431460778401447, "grad_norm": 0.11754116415977478, "learning_rate": 0.002, "loss": 2.3378, "step": 192240 }, { "epoch": 0.743184735043528, "grad_norm": 0.11462371796369553, "learning_rate": 0.002, "loss": 2.3451, "step": 192250 }, { "epoch": 0.7432233922469113, "grad_norm": 0.10152816027402878, "learning_rate": 0.002, "loss": 2.3326, "step": 192260 }, { "epoch": 0.7432620494502946, "grad_norm": 0.1248365193605423, "learning_rate": 0.002, "loss": 2.3369, "step": 192270 }, { "epoch": 0.7433007066536779, "grad_norm": 0.10560888797044754, "learning_rate": 0.002, "loss": 2.335, "step": 192280 }, { "epoch": 0.7433393638570611, "grad_norm": 0.10040289908647537, "learning_rate": 0.002, "loss": 2.3288, "step": 192290 }, { "epoch": 0.7433780210604444, "grad_norm": 0.1166427955031395, "learning_rate": 0.002, "loss": 2.3426, "step": 192300 }, { "epoch": 0.7434166782638277, "grad_norm": 0.11556585133075714, "learning_rate": 0.002, "loss": 2.3506, "step": 192310 }, { "epoch": 0.743455335467211, "grad_norm": 0.09876317530870438, "learning_rate": 0.002, "loss": 2.3441, "step": 192320 }, { "epoch": 0.7434939926705942, "grad_norm": 0.11803120374679565, "learning_rate": 0.002, "loss": 2.3369, "step": 192330 }, { "epoch": 0.7435326498739775, "grad_norm": 0.10722057521343231, "learning_rate": 0.002, "loss": 2.3497, "step": 192340 }, { "epoch": 0.7435713070773607, "grad_norm": 0.10615711659193039, "learning_rate": 0.002, "loss": 2.3305, "step": 192350 }, { "epoch": 0.7436099642807441, "grad_norm": 0.09986952692270279, "learning_rate": 0.002, "loss": 2.3406, "step": 192360 }, { "epoch": 0.7436486214841274, "grad_norm": 0.17439299821853638, "learning_rate": 0.002, "loss": 2.3542, "step": 192370 }, { "epoch": 0.7436872786875106, "grad_norm": 0.11447092890739441, "learning_rate": 0.002, "loss": 2.3497, "step": 192380 }, { "epoch": 0.7437259358908939, "grad_norm": 0.10483738780021667, "learning_rate": 0.002, "loss": 2.3562, "step": 192390 }, { "epoch": 0.7437645930942772, "grad_norm": 0.10391459614038467, "learning_rate": 0.002, "loss": 2.3507, "step": 192400 }, { "epoch": 0.7438032502976605, "grad_norm": 0.10389243811368942, "learning_rate": 0.002, "loss": 2.3398, "step": 192410 }, { "epoch": 0.7438419075010437, "grad_norm": 0.10303031653165817, "learning_rate": 0.002, "loss": 2.3376, "step": 192420 }, { "epoch": 0.743880564704427, "grad_norm": 0.10993155837059021, "learning_rate": 0.002, "loss": 2.3335, "step": 192430 }, { "epoch": 0.7439192219078103, "grad_norm": 0.11139877885580063, "learning_rate": 0.002, "loss": 2.3438, "step": 192440 }, { "epoch": 0.7439578791111936, "grad_norm": 0.11654575914144516, "learning_rate": 0.002, "loss": 2.3486, "step": 192450 }, { "epoch": 0.7439965363145769, "grad_norm": 0.11384663730859756, "learning_rate": 0.002, "loss": 2.347, "step": 192460 }, { "epoch": 0.7440351935179601, "grad_norm": 0.10386136919260025, "learning_rate": 0.002, "loss": 2.3344, "step": 192470 }, { "epoch": 0.7440738507213435, "grad_norm": 0.11686296761035919, "learning_rate": 0.002, "loss": 2.3401, "step": 192480 }, { "epoch": 0.7441125079247267, "grad_norm": 0.08788301795721054, "learning_rate": 0.002, "loss": 2.3307, "step": 192490 }, { "epoch": 0.74415116512811, "grad_norm": 0.13229641318321228, "learning_rate": 0.002, "loss": 2.3407, "step": 192500 }, { "epoch": 0.7441898223314932, "grad_norm": 0.11530508100986481, "learning_rate": 0.002, "loss": 2.3275, "step": 192510 }, { "epoch": 0.7442284795348765, "grad_norm": 0.10118814557790756, "learning_rate": 0.002, "loss": 2.3295, "step": 192520 }, { "epoch": 0.7442671367382598, "grad_norm": 0.10761450976133347, "learning_rate": 0.002, "loss": 2.3338, "step": 192530 }, { "epoch": 0.7443057939416431, "grad_norm": 0.09663943201303482, "learning_rate": 0.002, "loss": 2.3289, "step": 192540 }, { "epoch": 0.7443444511450263, "grad_norm": 0.14217162132263184, "learning_rate": 0.002, "loss": 2.3418, "step": 192550 }, { "epoch": 0.7443831083484096, "grad_norm": 0.1184287816286087, "learning_rate": 0.002, "loss": 2.3386, "step": 192560 }, { "epoch": 0.744421765551793, "grad_norm": 0.1200072392821312, "learning_rate": 0.002, "loss": 2.3483, "step": 192570 }, { "epoch": 0.7444604227551762, "grad_norm": 0.10290877521038055, "learning_rate": 0.002, "loss": 2.344, "step": 192580 }, { "epoch": 0.7444990799585595, "grad_norm": 0.11370282620191574, "learning_rate": 0.002, "loss": 2.3298, "step": 192590 }, { "epoch": 0.7445377371619427, "grad_norm": 0.10389462858438492, "learning_rate": 0.002, "loss": 2.3509, "step": 192600 }, { "epoch": 0.7445763943653261, "grad_norm": 0.10813149809837341, "learning_rate": 0.002, "loss": 2.3456, "step": 192610 }, { "epoch": 0.7446150515687093, "grad_norm": 0.11805222928524017, "learning_rate": 0.002, "loss": 2.3258, "step": 192620 }, { "epoch": 0.7446537087720926, "grad_norm": 0.10988860577344894, "learning_rate": 0.002, "loss": 2.3449, "step": 192630 }, { "epoch": 0.7446923659754758, "grad_norm": 0.12260608375072479, "learning_rate": 0.002, "loss": 2.3458, "step": 192640 }, { "epoch": 0.7447310231788592, "grad_norm": 0.12242330610752106, "learning_rate": 0.002, "loss": 2.3289, "step": 192650 }, { "epoch": 0.7447696803822424, "grad_norm": 0.09700918197631836, "learning_rate": 0.002, "loss": 2.3469, "step": 192660 }, { "epoch": 0.7448083375856257, "grad_norm": 0.12920258939266205, "learning_rate": 0.002, "loss": 2.3562, "step": 192670 }, { "epoch": 0.744846994789009, "grad_norm": 0.10415103286504745, "learning_rate": 0.002, "loss": 2.3341, "step": 192680 }, { "epoch": 0.7448856519923923, "grad_norm": 0.10821574181318283, "learning_rate": 0.002, "loss": 2.3476, "step": 192690 }, { "epoch": 0.7449243091957756, "grad_norm": 0.11690433323383331, "learning_rate": 0.002, "loss": 2.3495, "step": 192700 }, { "epoch": 0.7449629663991588, "grad_norm": 0.11213494837284088, "learning_rate": 0.002, "loss": 2.3507, "step": 192710 }, { "epoch": 0.7450016236025421, "grad_norm": 0.10390239208936691, "learning_rate": 0.002, "loss": 2.347, "step": 192720 }, { "epoch": 0.7450402808059253, "grad_norm": 0.10177457332611084, "learning_rate": 0.002, "loss": 2.3448, "step": 192730 }, { "epoch": 0.7450789380093087, "grad_norm": 0.10255581140518188, "learning_rate": 0.002, "loss": 2.3482, "step": 192740 }, { "epoch": 0.7451175952126919, "grad_norm": 0.09474656730890274, "learning_rate": 0.002, "loss": 2.3425, "step": 192750 }, { "epoch": 0.7451562524160752, "grad_norm": 0.12057298421859741, "learning_rate": 0.002, "loss": 2.3386, "step": 192760 }, { "epoch": 0.7451949096194584, "grad_norm": 0.1214093342423439, "learning_rate": 0.002, "loss": 2.3366, "step": 192770 }, { "epoch": 0.7452335668228418, "grad_norm": 0.10260576009750366, "learning_rate": 0.002, "loss": 2.3318, "step": 192780 }, { "epoch": 0.7452722240262251, "grad_norm": 0.11322708427906036, "learning_rate": 0.002, "loss": 2.3549, "step": 192790 }, { "epoch": 0.7453108812296083, "grad_norm": 0.11825566738843918, "learning_rate": 0.002, "loss": 2.3459, "step": 192800 }, { "epoch": 0.7453495384329916, "grad_norm": 0.09943481534719467, "learning_rate": 0.002, "loss": 2.3412, "step": 192810 }, { "epoch": 0.7453881956363749, "grad_norm": 0.09891356527805328, "learning_rate": 0.002, "loss": 2.3425, "step": 192820 }, { "epoch": 0.7454268528397582, "grad_norm": 0.10330082476139069, "learning_rate": 0.002, "loss": 2.3473, "step": 192830 }, { "epoch": 0.7454655100431414, "grad_norm": 0.09493868798017502, "learning_rate": 0.002, "loss": 2.3547, "step": 192840 }, { "epoch": 0.7455041672465247, "grad_norm": 0.12381740659475327, "learning_rate": 0.002, "loss": 2.3201, "step": 192850 }, { "epoch": 0.745542824449908, "grad_norm": 0.10590598732233047, "learning_rate": 0.002, "loss": 2.3374, "step": 192860 }, { "epoch": 0.7455814816532913, "grad_norm": 0.09802375733852386, "learning_rate": 0.002, "loss": 2.3493, "step": 192870 }, { "epoch": 0.7456201388566746, "grad_norm": 0.10863767564296722, "learning_rate": 0.002, "loss": 2.3485, "step": 192880 }, { "epoch": 0.7456587960600578, "grad_norm": 0.13834026455879211, "learning_rate": 0.002, "loss": 2.3476, "step": 192890 }, { "epoch": 0.7456974532634411, "grad_norm": 0.10244042426347733, "learning_rate": 0.002, "loss": 2.3459, "step": 192900 }, { "epoch": 0.7457361104668244, "grad_norm": 0.09882020950317383, "learning_rate": 0.002, "loss": 2.3413, "step": 192910 }, { "epoch": 0.7457747676702077, "grad_norm": 0.10287920385599136, "learning_rate": 0.002, "loss": 2.3343, "step": 192920 }, { "epoch": 0.7458134248735909, "grad_norm": 0.12767957150936127, "learning_rate": 0.002, "loss": 2.3371, "step": 192930 }, { "epoch": 0.7458520820769742, "grad_norm": 0.10538261383771896, "learning_rate": 0.002, "loss": 2.3566, "step": 192940 }, { "epoch": 0.7458907392803575, "grad_norm": 0.11202721297740936, "learning_rate": 0.002, "loss": 2.3466, "step": 192950 }, { "epoch": 0.7459293964837408, "grad_norm": 0.12798555195331573, "learning_rate": 0.002, "loss": 2.331, "step": 192960 }, { "epoch": 0.745968053687124, "grad_norm": 0.09712910652160645, "learning_rate": 0.002, "loss": 2.3533, "step": 192970 }, { "epoch": 0.7460067108905073, "grad_norm": 0.0980161651968956, "learning_rate": 0.002, "loss": 2.3358, "step": 192980 }, { "epoch": 0.7460453680938907, "grad_norm": 0.11752115935087204, "learning_rate": 0.002, "loss": 2.3224, "step": 192990 }, { "epoch": 0.7460840252972739, "grad_norm": 0.10274143517017365, "learning_rate": 0.002, "loss": 2.343, "step": 193000 }, { "epoch": 0.7461226825006572, "grad_norm": 0.10485357791185379, "learning_rate": 0.002, "loss": 2.3557, "step": 193010 }, { "epoch": 0.7461613397040404, "grad_norm": 0.09393350034952164, "learning_rate": 0.002, "loss": 2.3431, "step": 193020 }, { "epoch": 0.7461999969074238, "grad_norm": 0.10289428383111954, "learning_rate": 0.002, "loss": 2.3368, "step": 193030 }, { "epoch": 0.746238654110807, "grad_norm": 0.10881324112415314, "learning_rate": 0.002, "loss": 2.3506, "step": 193040 }, { "epoch": 0.7462773113141903, "grad_norm": 0.11512856185436249, "learning_rate": 0.002, "loss": 2.3467, "step": 193050 }, { "epoch": 0.7463159685175735, "grad_norm": 0.12571898102760315, "learning_rate": 0.002, "loss": 2.3458, "step": 193060 }, { "epoch": 0.7463546257209568, "grad_norm": 0.08950827270746231, "learning_rate": 0.002, "loss": 2.3459, "step": 193070 }, { "epoch": 0.7463932829243402, "grad_norm": 0.09514682739973068, "learning_rate": 0.002, "loss": 2.3452, "step": 193080 }, { "epoch": 0.7464319401277234, "grad_norm": 0.10203292220830917, "learning_rate": 0.002, "loss": 2.3506, "step": 193090 }, { "epoch": 0.7464705973311067, "grad_norm": 0.11616890877485275, "learning_rate": 0.002, "loss": 2.3457, "step": 193100 }, { "epoch": 0.7465092545344899, "grad_norm": 0.1304377019405365, "learning_rate": 0.002, "loss": 2.3311, "step": 193110 }, { "epoch": 0.7465479117378733, "grad_norm": 0.11245515197515488, "learning_rate": 0.002, "loss": 2.3315, "step": 193120 }, { "epoch": 0.7465865689412565, "grad_norm": 0.10679817199707031, "learning_rate": 0.002, "loss": 2.3395, "step": 193130 }, { "epoch": 0.7466252261446398, "grad_norm": 0.20088240504264832, "learning_rate": 0.002, "loss": 2.3539, "step": 193140 }, { "epoch": 0.746663883348023, "grad_norm": 0.10575690120458603, "learning_rate": 0.002, "loss": 2.3383, "step": 193150 }, { "epoch": 0.7467025405514064, "grad_norm": 0.11533773690462112, "learning_rate": 0.002, "loss": 2.3417, "step": 193160 }, { "epoch": 0.7467411977547896, "grad_norm": 0.09326361119747162, "learning_rate": 0.002, "loss": 2.3492, "step": 193170 }, { "epoch": 0.7467798549581729, "grad_norm": 0.09732891619205475, "learning_rate": 0.002, "loss": 2.3446, "step": 193180 }, { "epoch": 0.7468185121615561, "grad_norm": 0.11433549970388412, "learning_rate": 0.002, "loss": 2.3348, "step": 193190 }, { "epoch": 0.7468571693649395, "grad_norm": 0.12189090996980667, "learning_rate": 0.002, "loss": 2.3449, "step": 193200 }, { "epoch": 0.7468958265683228, "grad_norm": 0.1078023612499237, "learning_rate": 0.002, "loss": 2.3549, "step": 193210 }, { "epoch": 0.746934483771706, "grad_norm": 0.10564499348402023, "learning_rate": 0.002, "loss": 2.352, "step": 193220 }, { "epoch": 0.7469731409750893, "grad_norm": 0.1329101026058197, "learning_rate": 0.002, "loss": 2.3515, "step": 193230 }, { "epoch": 0.7470117981784726, "grad_norm": 0.10954732447862625, "learning_rate": 0.002, "loss": 2.3521, "step": 193240 }, { "epoch": 0.7470504553818559, "grad_norm": 0.11114715039730072, "learning_rate": 0.002, "loss": 2.3485, "step": 193250 }, { "epoch": 0.7470891125852391, "grad_norm": 0.10692232847213745, "learning_rate": 0.002, "loss": 2.3438, "step": 193260 }, { "epoch": 0.7471277697886224, "grad_norm": 0.10856243222951889, "learning_rate": 0.002, "loss": 2.3446, "step": 193270 }, { "epoch": 0.7471664269920056, "grad_norm": 0.1030806303024292, "learning_rate": 0.002, "loss": 2.3433, "step": 193280 }, { "epoch": 0.747205084195389, "grad_norm": 0.10333049297332764, "learning_rate": 0.002, "loss": 2.3394, "step": 193290 }, { "epoch": 0.7472437413987723, "grad_norm": 0.10423696041107178, "learning_rate": 0.002, "loss": 2.3419, "step": 193300 }, { "epoch": 0.7472823986021555, "grad_norm": 0.09971468150615692, "learning_rate": 0.002, "loss": 2.3365, "step": 193310 }, { "epoch": 0.7473210558055388, "grad_norm": 0.09328296780586243, "learning_rate": 0.002, "loss": 2.3379, "step": 193320 }, { "epoch": 0.7473597130089221, "grad_norm": 0.09091471135616302, "learning_rate": 0.002, "loss": 2.3366, "step": 193330 }, { "epoch": 0.7473983702123054, "grad_norm": 0.09623685479164124, "learning_rate": 0.002, "loss": 2.3376, "step": 193340 }, { "epoch": 0.7474370274156886, "grad_norm": 0.11122284084558487, "learning_rate": 0.002, "loss": 2.3287, "step": 193350 }, { "epoch": 0.7474756846190719, "grad_norm": 0.1184777170419693, "learning_rate": 0.002, "loss": 2.3327, "step": 193360 }, { "epoch": 0.7475143418224552, "grad_norm": 0.0959344357252121, "learning_rate": 0.002, "loss": 2.3396, "step": 193370 }, { "epoch": 0.7475529990258385, "grad_norm": 0.11243680119514465, "learning_rate": 0.002, "loss": 2.3558, "step": 193380 }, { "epoch": 0.7475916562292217, "grad_norm": 0.10417129844427109, "learning_rate": 0.002, "loss": 2.3471, "step": 193390 }, { "epoch": 0.747630313432605, "grad_norm": 0.14244239032268524, "learning_rate": 0.002, "loss": 2.3398, "step": 193400 }, { "epoch": 0.7476689706359884, "grad_norm": 0.10252571105957031, "learning_rate": 0.002, "loss": 2.3393, "step": 193410 }, { "epoch": 0.7477076278393716, "grad_norm": 0.10040729492902756, "learning_rate": 0.002, "loss": 2.349, "step": 193420 }, { "epoch": 0.7477462850427549, "grad_norm": 0.10703305900096893, "learning_rate": 0.002, "loss": 2.3367, "step": 193430 }, { "epoch": 0.7477849422461381, "grad_norm": 0.10389512032270432, "learning_rate": 0.002, "loss": 2.3407, "step": 193440 }, { "epoch": 0.7478235994495214, "grad_norm": 0.10328540951013565, "learning_rate": 0.002, "loss": 2.3369, "step": 193450 }, { "epoch": 0.7478622566529047, "grad_norm": 0.09823735803365707, "learning_rate": 0.002, "loss": 2.3426, "step": 193460 }, { "epoch": 0.747900913856288, "grad_norm": 0.1279013454914093, "learning_rate": 0.002, "loss": 2.3443, "step": 193470 }, { "epoch": 0.7479395710596712, "grad_norm": 0.10907725989818573, "learning_rate": 0.002, "loss": 2.3411, "step": 193480 }, { "epoch": 0.7479782282630545, "grad_norm": 0.09639472514390945, "learning_rate": 0.002, "loss": 2.3463, "step": 193490 }, { "epoch": 0.7480168854664379, "grad_norm": 0.10604395717382431, "learning_rate": 0.002, "loss": 2.361, "step": 193500 }, { "epoch": 0.7480555426698211, "grad_norm": 0.1024792343378067, "learning_rate": 0.002, "loss": 2.3443, "step": 193510 }, { "epoch": 0.7480941998732044, "grad_norm": 0.1102176234126091, "learning_rate": 0.002, "loss": 2.3485, "step": 193520 }, { "epoch": 0.7481328570765876, "grad_norm": 0.10424952208995819, "learning_rate": 0.002, "loss": 2.3408, "step": 193530 }, { "epoch": 0.748171514279971, "grad_norm": 0.11627110838890076, "learning_rate": 0.002, "loss": 2.325, "step": 193540 }, { "epoch": 0.7482101714833542, "grad_norm": 0.10998349636793137, "learning_rate": 0.002, "loss": 2.343, "step": 193550 }, { "epoch": 0.7482488286867375, "grad_norm": 0.11724622547626495, "learning_rate": 0.002, "loss": 2.3611, "step": 193560 }, { "epoch": 0.7482874858901207, "grad_norm": 0.09914011508226395, "learning_rate": 0.002, "loss": 2.3371, "step": 193570 }, { "epoch": 0.7483261430935041, "grad_norm": 0.10280796885490417, "learning_rate": 0.002, "loss": 2.3395, "step": 193580 }, { "epoch": 0.7483648002968873, "grad_norm": 0.11854589730501175, "learning_rate": 0.002, "loss": 2.3398, "step": 193590 }, { "epoch": 0.7484034575002706, "grad_norm": 0.12063703685998917, "learning_rate": 0.002, "loss": 2.3574, "step": 193600 }, { "epoch": 0.7484421147036538, "grad_norm": 0.10675029456615448, "learning_rate": 0.002, "loss": 2.3412, "step": 193610 }, { "epoch": 0.7484807719070372, "grad_norm": 0.08650118857622147, "learning_rate": 0.002, "loss": 2.3428, "step": 193620 }, { "epoch": 0.7485194291104205, "grad_norm": 0.10270006954669952, "learning_rate": 0.002, "loss": 2.3262, "step": 193630 }, { "epoch": 0.7485580863138037, "grad_norm": 0.08983159065246582, "learning_rate": 0.002, "loss": 2.3611, "step": 193640 }, { "epoch": 0.748596743517187, "grad_norm": 0.10007721185684204, "learning_rate": 0.002, "loss": 2.3505, "step": 193650 }, { "epoch": 0.7486354007205702, "grad_norm": 0.12380728870630264, "learning_rate": 0.002, "loss": 2.3291, "step": 193660 }, { "epoch": 0.7486740579239536, "grad_norm": 0.15238088369369507, "learning_rate": 0.002, "loss": 2.3397, "step": 193670 }, { "epoch": 0.7487127151273368, "grad_norm": 0.1207793727517128, "learning_rate": 0.002, "loss": 2.343, "step": 193680 }, { "epoch": 0.7487513723307201, "grad_norm": 0.1177428811788559, "learning_rate": 0.002, "loss": 2.3573, "step": 193690 }, { "epoch": 0.7487900295341033, "grad_norm": 0.1081174910068512, "learning_rate": 0.002, "loss": 2.3442, "step": 193700 }, { "epoch": 0.7488286867374867, "grad_norm": 0.10161493718624115, "learning_rate": 0.002, "loss": 2.3346, "step": 193710 }, { "epoch": 0.74886734394087, "grad_norm": 0.1058894693851471, "learning_rate": 0.002, "loss": 2.3452, "step": 193720 }, { "epoch": 0.7489060011442532, "grad_norm": 0.10565893352031708, "learning_rate": 0.002, "loss": 2.3478, "step": 193730 }, { "epoch": 0.7489446583476365, "grad_norm": 0.10452398657798767, "learning_rate": 0.002, "loss": 2.3435, "step": 193740 }, { "epoch": 0.7489833155510198, "grad_norm": 0.10174624621868134, "learning_rate": 0.002, "loss": 2.3526, "step": 193750 }, { "epoch": 0.7490219727544031, "grad_norm": 0.09607743471860886, "learning_rate": 0.002, "loss": 2.3445, "step": 193760 }, { "epoch": 0.7490606299577863, "grad_norm": 0.12087777256965637, "learning_rate": 0.002, "loss": 2.3433, "step": 193770 }, { "epoch": 0.7490992871611696, "grad_norm": 0.10284367203712463, "learning_rate": 0.002, "loss": 2.3383, "step": 193780 }, { "epoch": 0.7491379443645529, "grad_norm": 0.11069675534963608, "learning_rate": 0.002, "loss": 2.3351, "step": 193790 }, { "epoch": 0.7491766015679362, "grad_norm": 0.1019555851817131, "learning_rate": 0.002, "loss": 2.3319, "step": 193800 }, { "epoch": 0.7492152587713194, "grad_norm": 0.10605629533529282, "learning_rate": 0.002, "loss": 2.3484, "step": 193810 }, { "epoch": 0.7492539159747027, "grad_norm": 0.09732295572757721, "learning_rate": 0.002, "loss": 2.3482, "step": 193820 }, { "epoch": 0.749292573178086, "grad_norm": 0.09522780776023865, "learning_rate": 0.002, "loss": 2.3314, "step": 193830 }, { "epoch": 0.7493312303814693, "grad_norm": 0.14510689675807953, "learning_rate": 0.002, "loss": 2.3437, "step": 193840 }, { "epoch": 0.7493698875848526, "grad_norm": 0.10406015813350677, "learning_rate": 0.002, "loss": 2.3527, "step": 193850 }, { "epoch": 0.7494085447882358, "grad_norm": 0.09954486787319183, "learning_rate": 0.002, "loss": 2.3566, "step": 193860 }, { "epoch": 0.7494472019916191, "grad_norm": 0.10951186716556549, "learning_rate": 0.002, "loss": 2.3457, "step": 193870 }, { "epoch": 0.7494858591950024, "grad_norm": 0.09955327212810516, "learning_rate": 0.002, "loss": 2.3359, "step": 193880 }, { "epoch": 0.7495245163983857, "grad_norm": 0.0938873291015625, "learning_rate": 0.002, "loss": 2.3541, "step": 193890 }, { "epoch": 0.7495631736017689, "grad_norm": 0.11965670436620712, "learning_rate": 0.002, "loss": 2.3415, "step": 193900 }, { "epoch": 0.7496018308051522, "grad_norm": 0.11990394443273544, "learning_rate": 0.002, "loss": 2.3428, "step": 193910 }, { "epoch": 0.7496404880085356, "grad_norm": 0.09880927950143814, "learning_rate": 0.002, "loss": 2.338, "step": 193920 }, { "epoch": 0.7496791452119188, "grad_norm": 0.11151470243930817, "learning_rate": 0.002, "loss": 2.3401, "step": 193930 }, { "epoch": 0.7497178024153021, "grad_norm": 0.10723958164453506, "learning_rate": 0.002, "loss": 2.3436, "step": 193940 }, { "epoch": 0.7497564596186853, "grad_norm": 0.11936169117689133, "learning_rate": 0.002, "loss": 2.3277, "step": 193950 }, { "epoch": 0.7497951168220687, "grad_norm": 0.09781043231487274, "learning_rate": 0.002, "loss": 2.3474, "step": 193960 }, { "epoch": 0.7498337740254519, "grad_norm": 0.08709392696619034, "learning_rate": 0.002, "loss": 2.3394, "step": 193970 }, { "epoch": 0.7498724312288352, "grad_norm": 0.10388471931219101, "learning_rate": 0.002, "loss": 2.3356, "step": 193980 }, { "epoch": 0.7499110884322184, "grad_norm": 0.18327024579048157, "learning_rate": 0.002, "loss": 2.3532, "step": 193990 }, { "epoch": 0.7499497456356017, "grad_norm": 0.11415718495845795, "learning_rate": 0.002, "loss": 2.3376, "step": 194000 }, { "epoch": 0.749988402838985, "grad_norm": 0.1244000792503357, "learning_rate": 0.002, "loss": 2.3556, "step": 194010 }, { "epoch": 0.7500270600423683, "grad_norm": 0.10619892179965973, "learning_rate": 0.002, "loss": 2.3382, "step": 194020 }, { "epoch": 0.7500657172457516, "grad_norm": 0.11513705551624298, "learning_rate": 0.002, "loss": 2.3345, "step": 194030 }, { "epoch": 0.7501043744491348, "grad_norm": 0.09795688092708588, "learning_rate": 0.002, "loss": 2.3375, "step": 194040 }, { "epoch": 0.7501430316525182, "grad_norm": 0.10906984657049179, "learning_rate": 0.002, "loss": 2.3315, "step": 194050 }, { "epoch": 0.7501816888559014, "grad_norm": 0.10620440542697906, "learning_rate": 0.002, "loss": 2.3401, "step": 194060 }, { "epoch": 0.7502203460592847, "grad_norm": 0.10489355772733688, "learning_rate": 0.002, "loss": 2.3323, "step": 194070 }, { "epoch": 0.7502590032626679, "grad_norm": 0.09925409406423569, "learning_rate": 0.002, "loss": 2.3503, "step": 194080 }, { "epoch": 0.7502976604660513, "grad_norm": 0.11829238384962082, "learning_rate": 0.002, "loss": 2.3395, "step": 194090 }, { "epoch": 0.7503363176694345, "grad_norm": 0.10316840559244156, "learning_rate": 0.002, "loss": 2.3372, "step": 194100 }, { "epoch": 0.7503749748728178, "grad_norm": 0.09597914665937424, "learning_rate": 0.002, "loss": 2.339, "step": 194110 }, { "epoch": 0.750413632076201, "grad_norm": 0.13279080390930176, "learning_rate": 0.002, "loss": 2.3414, "step": 194120 }, { "epoch": 0.7504522892795844, "grad_norm": 0.11907597631216049, "learning_rate": 0.002, "loss": 2.3325, "step": 194130 }, { "epoch": 0.7504909464829677, "grad_norm": 0.1021730899810791, "learning_rate": 0.002, "loss": 2.3456, "step": 194140 }, { "epoch": 0.7505296036863509, "grad_norm": 0.11217319220304489, "learning_rate": 0.002, "loss": 2.3443, "step": 194150 }, { "epoch": 0.7505682608897342, "grad_norm": 0.09784113615751266, "learning_rate": 0.002, "loss": 2.3542, "step": 194160 }, { "epoch": 0.7506069180931175, "grad_norm": 0.11889325827360153, "learning_rate": 0.002, "loss": 2.3341, "step": 194170 }, { "epoch": 0.7506455752965008, "grad_norm": 0.08993558585643768, "learning_rate": 0.002, "loss": 2.3239, "step": 194180 }, { "epoch": 0.750684232499884, "grad_norm": 0.09891916066408157, "learning_rate": 0.002, "loss": 2.3325, "step": 194190 }, { "epoch": 0.7507228897032673, "grad_norm": 0.09812989085912704, "learning_rate": 0.002, "loss": 2.3302, "step": 194200 }, { "epoch": 0.7507615469066505, "grad_norm": 0.12857575714588165, "learning_rate": 0.002, "loss": 2.3464, "step": 194210 }, { "epoch": 0.7508002041100339, "grad_norm": 0.10943218320608139, "learning_rate": 0.002, "loss": 2.3349, "step": 194220 }, { "epoch": 0.7508388613134171, "grad_norm": 0.10419569164514542, "learning_rate": 0.002, "loss": 2.3493, "step": 194230 }, { "epoch": 0.7508775185168004, "grad_norm": 0.0994962528347969, "learning_rate": 0.002, "loss": 2.3474, "step": 194240 }, { "epoch": 0.7509161757201837, "grad_norm": 0.10264037549495697, "learning_rate": 0.002, "loss": 2.3407, "step": 194250 }, { "epoch": 0.750954832923567, "grad_norm": 0.1005292534828186, "learning_rate": 0.002, "loss": 2.3288, "step": 194260 }, { "epoch": 0.7509934901269503, "grad_norm": 0.10990706831216812, "learning_rate": 0.002, "loss": 2.3377, "step": 194270 }, { "epoch": 0.7510321473303335, "grad_norm": 0.12250801920890808, "learning_rate": 0.002, "loss": 2.3449, "step": 194280 }, { "epoch": 0.7510708045337168, "grad_norm": 0.10842498391866684, "learning_rate": 0.002, "loss": 2.3416, "step": 194290 }, { "epoch": 0.7511094617371001, "grad_norm": 0.11050568521022797, "learning_rate": 0.002, "loss": 2.3467, "step": 194300 }, { "epoch": 0.7511481189404834, "grad_norm": 0.11261072754859924, "learning_rate": 0.002, "loss": 2.3098, "step": 194310 }, { "epoch": 0.7511867761438666, "grad_norm": 0.10377780348062515, "learning_rate": 0.002, "loss": 2.3301, "step": 194320 }, { "epoch": 0.7512254333472499, "grad_norm": 0.09198221564292908, "learning_rate": 0.002, "loss": 2.3325, "step": 194330 }, { "epoch": 0.7512640905506333, "grad_norm": 0.09806704521179199, "learning_rate": 0.002, "loss": 2.3487, "step": 194340 }, { "epoch": 0.7513027477540165, "grad_norm": 0.13210226595401764, "learning_rate": 0.002, "loss": 2.3444, "step": 194350 }, { "epoch": 0.7513414049573998, "grad_norm": 0.11560340225696564, "learning_rate": 0.002, "loss": 2.3473, "step": 194360 }, { "epoch": 0.751380062160783, "grad_norm": 0.10489094257354736, "learning_rate": 0.002, "loss": 2.3472, "step": 194370 }, { "epoch": 0.7514187193641663, "grad_norm": 0.10843883454799652, "learning_rate": 0.002, "loss": 2.3446, "step": 194380 }, { "epoch": 0.7514573765675496, "grad_norm": 0.10712260752916336, "learning_rate": 0.002, "loss": 2.346, "step": 194390 }, { "epoch": 0.7514960337709329, "grad_norm": 0.0975417047739029, "learning_rate": 0.002, "loss": 2.3467, "step": 194400 }, { "epoch": 0.7515346909743161, "grad_norm": 0.0889492928981781, "learning_rate": 0.002, "loss": 2.3392, "step": 194410 }, { "epoch": 0.7515733481776994, "grad_norm": 0.1247059628367424, "learning_rate": 0.002, "loss": 2.3434, "step": 194420 }, { "epoch": 0.7516120053810827, "grad_norm": 0.11323944479227066, "learning_rate": 0.002, "loss": 2.3484, "step": 194430 }, { "epoch": 0.751650662584466, "grad_norm": 0.09631729871034622, "learning_rate": 0.002, "loss": 2.3567, "step": 194440 }, { "epoch": 0.7516893197878493, "grad_norm": 0.09453834593296051, "learning_rate": 0.002, "loss": 2.3273, "step": 194450 }, { "epoch": 0.7517279769912325, "grad_norm": 0.1044001653790474, "learning_rate": 0.002, "loss": 2.3317, "step": 194460 }, { "epoch": 0.7517666341946159, "grad_norm": 0.10327418148517609, "learning_rate": 0.002, "loss": 2.3326, "step": 194470 }, { "epoch": 0.7518052913979991, "grad_norm": 0.10132991522550583, "learning_rate": 0.002, "loss": 2.3482, "step": 194480 }, { "epoch": 0.7518439486013824, "grad_norm": 0.1620233654975891, "learning_rate": 0.002, "loss": 2.3376, "step": 194490 }, { "epoch": 0.7518826058047656, "grad_norm": 0.10502295196056366, "learning_rate": 0.002, "loss": 2.3513, "step": 194500 }, { "epoch": 0.751921263008149, "grad_norm": 0.10931259393692017, "learning_rate": 0.002, "loss": 2.3412, "step": 194510 }, { "epoch": 0.7519599202115322, "grad_norm": 0.11826196312904358, "learning_rate": 0.002, "loss": 2.3383, "step": 194520 }, { "epoch": 0.7519985774149155, "grad_norm": 0.11824844777584076, "learning_rate": 0.002, "loss": 2.3422, "step": 194530 }, { "epoch": 0.7520372346182987, "grad_norm": 0.10922195017337799, "learning_rate": 0.002, "loss": 2.3476, "step": 194540 }, { "epoch": 0.7520758918216821, "grad_norm": 0.13291779160499573, "learning_rate": 0.002, "loss": 2.3477, "step": 194550 }, { "epoch": 0.7521145490250654, "grad_norm": 0.10089297592639923, "learning_rate": 0.002, "loss": 2.3522, "step": 194560 }, { "epoch": 0.7521532062284486, "grad_norm": 0.11099760979413986, "learning_rate": 0.002, "loss": 2.3368, "step": 194570 }, { "epoch": 0.7521918634318319, "grad_norm": 0.10743943601846695, "learning_rate": 0.002, "loss": 2.3378, "step": 194580 }, { "epoch": 0.7522305206352151, "grad_norm": 0.11014439910650253, "learning_rate": 0.002, "loss": 2.3486, "step": 194590 }, { "epoch": 0.7522691778385985, "grad_norm": 0.09815085679292679, "learning_rate": 0.002, "loss": 2.331, "step": 194600 }, { "epoch": 0.7523078350419817, "grad_norm": 0.10134302824735641, "learning_rate": 0.002, "loss": 2.3505, "step": 194610 }, { "epoch": 0.752346492245365, "grad_norm": 0.11710251122713089, "learning_rate": 0.002, "loss": 2.3525, "step": 194620 }, { "epoch": 0.7523851494487482, "grad_norm": 0.10482754558324814, "learning_rate": 0.002, "loss": 2.3574, "step": 194630 }, { "epoch": 0.7524238066521316, "grad_norm": 0.10847599804401398, "learning_rate": 0.002, "loss": 2.3337, "step": 194640 }, { "epoch": 0.7524624638555149, "grad_norm": 0.08916240185499191, "learning_rate": 0.002, "loss": 2.3404, "step": 194650 }, { "epoch": 0.7525011210588981, "grad_norm": 0.11111991107463837, "learning_rate": 0.002, "loss": 2.3359, "step": 194660 }, { "epoch": 0.7525397782622814, "grad_norm": 0.12952084839344025, "learning_rate": 0.002, "loss": 2.342, "step": 194670 }, { "epoch": 0.7525784354656647, "grad_norm": 0.10298001766204834, "learning_rate": 0.002, "loss": 2.3562, "step": 194680 }, { "epoch": 0.752617092669048, "grad_norm": 0.11402563750743866, "learning_rate": 0.002, "loss": 2.3365, "step": 194690 }, { "epoch": 0.7526557498724312, "grad_norm": 0.13321805000305176, "learning_rate": 0.002, "loss": 2.3347, "step": 194700 }, { "epoch": 0.7526944070758145, "grad_norm": 0.10599828511476517, "learning_rate": 0.002, "loss": 2.3428, "step": 194710 }, { "epoch": 0.7527330642791978, "grad_norm": 0.10890563577413559, "learning_rate": 0.002, "loss": 2.3561, "step": 194720 }, { "epoch": 0.7527717214825811, "grad_norm": 0.10188310593366623, "learning_rate": 0.002, "loss": 2.3633, "step": 194730 }, { "epoch": 0.7528103786859643, "grad_norm": 0.10961693525314331, "learning_rate": 0.002, "loss": 2.3319, "step": 194740 }, { "epoch": 0.7528490358893476, "grad_norm": 0.08141937106847763, "learning_rate": 0.002, "loss": 2.3415, "step": 194750 }, { "epoch": 0.7528876930927308, "grad_norm": 0.11148255318403244, "learning_rate": 0.002, "loss": 2.3447, "step": 194760 }, { "epoch": 0.7529263502961142, "grad_norm": 0.0978025570511818, "learning_rate": 0.002, "loss": 2.3432, "step": 194770 }, { "epoch": 0.7529650074994975, "grad_norm": 0.09731242060661316, "learning_rate": 0.002, "loss": 2.348, "step": 194780 }, { "epoch": 0.7530036647028807, "grad_norm": 0.09884215146303177, "learning_rate": 0.002, "loss": 2.3409, "step": 194790 }, { "epoch": 0.753042321906264, "grad_norm": 0.09421688318252563, "learning_rate": 0.002, "loss": 2.3243, "step": 194800 }, { "epoch": 0.7530809791096473, "grad_norm": 0.11129975318908691, "learning_rate": 0.002, "loss": 2.3483, "step": 194810 }, { "epoch": 0.7531196363130306, "grad_norm": 0.11319313943386078, "learning_rate": 0.002, "loss": 2.3435, "step": 194820 }, { "epoch": 0.7531582935164138, "grad_norm": 0.12444295734167099, "learning_rate": 0.002, "loss": 2.342, "step": 194830 }, { "epoch": 0.7531969507197971, "grad_norm": 0.09347015619277954, "learning_rate": 0.002, "loss": 2.3551, "step": 194840 }, { "epoch": 0.7532356079231805, "grad_norm": 0.10135524719953537, "learning_rate": 0.002, "loss": 2.3395, "step": 194850 }, { "epoch": 0.7532742651265637, "grad_norm": 0.10133438557386398, "learning_rate": 0.002, "loss": 2.3466, "step": 194860 }, { "epoch": 0.753312922329947, "grad_norm": 0.10349283367395401, "learning_rate": 0.002, "loss": 2.34, "step": 194870 }, { "epoch": 0.7533515795333302, "grad_norm": 0.0932273268699646, "learning_rate": 0.002, "loss": 2.3342, "step": 194880 }, { "epoch": 0.7533902367367136, "grad_norm": 0.10109259188175201, "learning_rate": 0.002, "loss": 2.3394, "step": 194890 }, { "epoch": 0.7534288939400968, "grad_norm": 0.11513467133045197, "learning_rate": 0.002, "loss": 2.3422, "step": 194900 }, { "epoch": 0.7534675511434801, "grad_norm": 0.09564170241355896, "learning_rate": 0.002, "loss": 2.3545, "step": 194910 }, { "epoch": 0.7535062083468633, "grad_norm": 0.08503452688455582, "learning_rate": 0.002, "loss": 2.334, "step": 194920 }, { "epoch": 0.7535448655502466, "grad_norm": 0.11041513830423355, "learning_rate": 0.002, "loss": 2.3281, "step": 194930 }, { "epoch": 0.7535835227536299, "grad_norm": 0.11314481496810913, "learning_rate": 0.002, "loss": 2.3421, "step": 194940 }, { "epoch": 0.7536221799570132, "grad_norm": 0.13088074326515198, "learning_rate": 0.002, "loss": 2.3388, "step": 194950 }, { "epoch": 0.7536608371603964, "grad_norm": 0.09050699323415756, "learning_rate": 0.002, "loss": 2.3228, "step": 194960 }, { "epoch": 0.7536994943637797, "grad_norm": 0.11696422845125198, "learning_rate": 0.002, "loss": 2.3439, "step": 194970 }, { "epoch": 0.7537381515671631, "grad_norm": 0.1043548732995987, "learning_rate": 0.002, "loss": 2.3345, "step": 194980 }, { "epoch": 0.7537768087705463, "grad_norm": 0.10563669353723526, "learning_rate": 0.002, "loss": 2.3401, "step": 194990 }, { "epoch": 0.7538154659739296, "grad_norm": 0.13230913877487183, "learning_rate": 0.002, "loss": 2.326, "step": 195000 }, { "epoch": 0.7538541231773128, "grad_norm": 0.0942579135298729, "learning_rate": 0.002, "loss": 2.3477, "step": 195010 }, { "epoch": 0.7538927803806962, "grad_norm": 0.10348600894212723, "learning_rate": 0.002, "loss": 2.3294, "step": 195020 }, { "epoch": 0.7539314375840794, "grad_norm": 0.09573236107826233, "learning_rate": 0.002, "loss": 2.3511, "step": 195030 }, { "epoch": 0.7539700947874627, "grad_norm": 0.10151621699333191, "learning_rate": 0.002, "loss": 2.3479, "step": 195040 }, { "epoch": 0.7540087519908459, "grad_norm": 0.10730200260877609, "learning_rate": 0.002, "loss": 2.3415, "step": 195050 }, { "epoch": 0.7540474091942293, "grad_norm": 0.09535379707813263, "learning_rate": 0.002, "loss": 2.3586, "step": 195060 }, { "epoch": 0.7540860663976126, "grad_norm": 0.1367979794740677, "learning_rate": 0.002, "loss": 2.3304, "step": 195070 }, { "epoch": 0.7541247236009958, "grad_norm": 0.1024353876709938, "learning_rate": 0.002, "loss": 2.3367, "step": 195080 }, { "epoch": 0.7541633808043791, "grad_norm": 0.11906842142343521, "learning_rate": 0.002, "loss": 2.3298, "step": 195090 }, { "epoch": 0.7542020380077624, "grad_norm": 0.10515178740024567, "learning_rate": 0.002, "loss": 2.3369, "step": 195100 }, { "epoch": 0.7542406952111457, "grad_norm": 0.23441074788570404, "learning_rate": 0.002, "loss": 2.3459, "step": 195110 }, { "epoch": 0.7542793524145289, "grad_norm": 0.11064566671848297, "learning_rate": 0.002, "loss": 2.3379, "step": 195120 }, { "epoch": 0.7543180096179122, "grad_norm": 0.09228164702653885, "learning_rate": 0.002, "loss": 2.3465, "step": 195130 }, { "epoch": 0.7543566668212954, "grad_norm": 0.10810859501361847, "learning_rate": 0.002, "loss": 2.3385, "step": 195140 }, { "epoch": 0.7543953240246788, "grad_norm": 0.10493254661560059, "learning_rate": 0.002, "loss": 2.3674, "step": 195150 }, { "epoch": 0.754433981228062, "grad_norm": 0.11579349637031555, "learning_rate": 0.002, "loss": 2.3448, "step": 195160 }, { "epoch": 0.7544726384314453, "grad_norm": 0.08747828751802444, "learning_rate": 0.002, "loss": 2.3349, "step": 195170 }, { "epoch": 0.7545112956348285, "grad_norm": 0.11919780820608139, "learning_rate": 0.002, "loss": 2.3428, "step": 195180 }, { "epoch": 0.7545499528382119, "grad_norm": 0.10446533560752869, "learning_rate": 0.002, "loss": 2.3459, "step": 195190 }, { "epoch": 0.7545886100415952, "grad_norm": 0.09462517499923706, "learning_rate": 0.002, "loss": 2.3469, "step": 195200 }, { "epoch": 0.7546272672449784, "grad_norm": 0.12835873663425446, "learning_rate": 0.002, "loss": 2.3352, "step": 195210 }, { "epoch": 0.7546659244483617, "grad_norm": 0.11104355752468109, "learning_rate": 0.002, "loss": 2.3408, "step": 195220 }, { "epoch": 0.754704581651745, "grad_norm": 0.10860821604728699, "learning_rate": 0.002, "loss": 2.3412, "step": 195230 }, { "epoch": 0.7547432388551283, "grad_norm": 0.10522500425577164, "learning_rate": 0.002, "loss": 2.3415, "step": 195240 }, { "epoch": 0.7547818960585115, "grad_norm": 0.0963943675160408, "learning_rate": 0.002, "loss": 2.337, "step": 195250 }, { "epoch": 0.7548205532618948, "grad_norm": 0.10599056631326675, "learning_rate": 0.002, "loss": 2.3393, "step": 195260 }, { "epoch": 0.7548592104652782, "grad_norm": 0.0898580327630043, "learning_rate": 0.002, "loss": 2.3506, "step": 195270 }, { "epoch": 0.7548978676686614, "grad_norm": 0.1415392905473709, "learning_rate": 0.002, "loss": 2.3617, "step": 195280 }, { "epoch": 0.7549365248720447, "grad_norm": 0.10526993870735168, "learning_rate": 0.002, "loss": 2.3523, "step": 195290 }, { "epoch": 0.7549751820754279, "grad_norm": 0.10201417654752731, "learning_rate": 0.002, "loss": 2.3294, "step": 195300 }, { "epoch": 0.7550138392788112, "grad_norm": 0.10746268182992935, "learning_rate": 0.002, "loss": 2.334, "step": 195310 }, { "epoch": 0.7550524964821945, "grad_norm": 0.09328329563140869, "learning_rate": 0.002, "loss": 2.3426, "step": 195320 }, { "epoch": 0.7550911536855778, "grad_norm": 0.1050257459282875, "learning_rate": 0.002, "loss": 2.3403, "step": 195330 }, { "epoch": 0.755129810888961, "grad_norm": 0.11118963360786438, "learning_rate": 0.002, "loss": 2.3364, "step": 195340 }, { "epoch": 0.7551684680923443, "grad_norm": 0.11248045414686203, "learning_rate": 0.002, "loss": 2.3366, "step": 195350 }, { "epoch": 0.7552071252957276, "grad_norm": 0.10005900263786316, "learning_rate": 0.002, "loss": 2.35, "step": 195360 }, { "epoch": 0.7552457824991109, "grad_norm": 0.10226564854383469, "learning_rate": 0.002, "loss": 2.3491, "step": 195370 }, { "epoch": 0.7552844397024941, "grad_norm": 0.1185409426689148, "learning_rate": 0.002, "loss": 2.354, "step": 195380 }, { "epoch": 0.7553230969058774, "grad_norm": 0.10499685257673264, "learning_rate": 0.002, "loss": 2.3518, "step": 195390 }, { "epoch": 0.7553617541092608, "grad_norm": 0.09735177457332611, "learning_rate": 0.002, "loss": 2.3341, "step": 195400 }, { "epoch": 0.755400411312644, "grad_norm": 0.09485459327697754, "learning_rate": 0.002, "loss": 2.3421, "step": 195410 }, { "epoch": 0.7554390685160273, "grad_norm": 0.10462170094251633, "learning_rate": 0.002, "loss": 2.3307, "step": 195420 }, { "epoch": 0.7554777257194105, "grad_norm": 0.10008876770734787, "learning_rate": 0.002, "loss": 2.3466, "step": 195430 }, { "epoch": 0.7555163829227939, "grad_norm": 0.27592208981513977, "learning_rate": 0.002, "loss": 2.3412, "step": 195440 }, { "epoch": 0.7555550401261771, "grad_norm": 0.1063537746667862, "learning_rate": 0.002, "loss": 2.3329, "step": 195450 }, { "epoch": 0.7555936973295604, "grad_norm": 0.11505308747291565, "learning_rate": 0.002, "loss": 2.3273, "step": 195460 }, { "epoch": 0.7556323545329436, "grad_norm": 0.11546620726585388, "learning_rate": 0.002, "loss": 2.3503, "step": 195470 }, { "epoch": 0.7556710117363269, "grad_norm": 0.10170117765665054, "learning_rate": 0.002, "loss": 2.318, "step": 195480 }, { "epoch": 0.7557096689397103, "grad_norm": 0.09899432212114334, "learning_rate": 0.002, "loss": 2.3325, "step": 195490 }, { "epoch": 0.7557483261430935, "grad_norm": 0.10428054630756378, "learning_rate": 0.002, "loss": 2.3407, "step": 195500 }, { "epoch": 0.7557869833464768, "grad_norm": 0.13404028117656708, "learning_rate": 0.002, "loss": 2.3411, "step": 195510 }, { "epoch": 0.75582564054986, "grad_norm": 0.11861197650432587, "learning_rate": 0.002, "loss": 2.3336, "step": 195520 }, { "epoch": 0.7558642977532434, "grad_norm": 0.09653604030609131, "learning_rate": 0.002, "loss": 2.35, "step": 195530 }, { "epoch": 0.7559029549566266, "grad_norm": 0.10192802548408508, "learning_rate": 0.002, "loss": 2.3321, "step": 195540 }, { "epoch": 0.7559416121600099, "grad_norm": 0.09712671488523483, "learning_rate": 0.002, "loss": 2.3358, "step": 195550 }, { "epoch": 0.7559802693633931, "grad_norm": 0.13238525390625, "learning_rate": 0.002, "loss": 2.3412, "step": 195560 }, { "epoch": 0.7560189265667765, "grad_norm": 0.10193489491939545, "learning_rate": 0.002, "loss": 2.3466, "step": 195570 }, { "epoch": 0.7560575837701597, "grad_norm": 0.1082800030708313, "learning_rate": 0.002, "loss": 2.3351, "step": 195580 }, { "epoch": 0.756096240973543, "grad_norm": 0.11322148889303207, "learning_rate": 0.002, "loss": 2.3363, "step": 195590 }, { "epoch": 0.7561348981769263, "grad_norm": 0.10525218397378922, "learning_rate": 0.002, "loss": 2.3519, "step": 195600 }, { "epoch": 0.7561735553803096, "grad_norm": 0.10929600894451141, "learning_rate": 0.002, "loss": 2.3388, "step": 195610 }, { "epoch": 0.7562122125836929, "grad_norm": 0.10341084003448486, "learning_rate": 0.002, "loss": 2.339, "step": 195620 }, { "epoch": 0.7562508697870761, "grad_norm": 0.09806688874959946, "learning_rate": 0.002, "loss": 2.3362, "step": 195630 }, { "epoch": 0.7562895269904594, "grad_norm": 0.12035530060529709, "learning_rate": 0.002, "loss": 2.3324, "step": 195640 }, { "epoch": 0.7563281841938427, "grad_norm": 0.11518548429012299, "learning_rate": 0.002, "loss": 2.349, "step": 195650 }, { "epoch": 0.756366841397226, "grad_norm": 0.10069727897644043, "learning_rate": 0.002, "loss": 2.3343, "step": 195660 }, { "epoch": 0.7564054986006092, "grad_norm": 0.11515224725008011, "learning_rate": 0.002, "loss": 2.3392, "step": 195670 }, { "epoch": 0.7564441558039925, "grad_norm": 0.10596141964197159, "learning_rate": 0.002, "loss": 2.3343, "step": 195680 }, { "epoch": 0.7564828130073757, "grad_norm": 0.10848668217658997, "learning_rate": 0.002, "loss": 2.3416, "step": 195690 }, { "epoch": 0.7565214702107591, "grad_norm": 0.10180913656949997, "learning_rate": 0.002, "loss": 2.3372, "step": 195700 }, { "epoch": 0.7565601274141424, "grad_norm": 0.0984707847237587, "learning_rate": 0.002, "loss": 2.3518, "step": 195710 }, { "epoch": 0.7565987846175256, "grad_norm": 0.10491156578063965, "learning_rate": 0.002, "loss": 2.3234, "step": 195720 }, { "epoch": 0.7566374418209089, "grad_norm": 0.10026510804891586, "learning_rate": 0.002, "loss": 2.3265, "step": 195730 }, { "epoch": 0.7566760990242922, "grad_norm": 0.10417931526899338, "learning_rate": 0.002, "loss": 2.357, "step": 195740 }, { "epoch": 0.7567147562276755, "grad_norm": 0.08421199023723602, "learning_rate": 0.002, "loss": 2.3448, "step": 195750 }, { "epoch": 0.7567534134310587, "grad_norm": 0.11094094067811966, "learning_rate": 0.002, "loss": 2.3622, "step": 195760 }, { "epoch": 0.756792070634442, "grad_norm": 0.09546635299921036, "learning_rate": 0.002, "loss": 2.3456, "step": 195770 }, { "epoch": 0.7568307278378253, "grad_norm": 0.13321033120155334, "learning_rate": 0.002, "loss": 2.3355, "step": 195780 }, { "epoch": 0.7568693850412086, "grad_norm": 0.11739350110292435, "learning_rate": 0.002, "loss": 2.3579, "step": 195790 }, { "epoch": 0.7569080422445918, "grad_norm": 0.11135275661945343, "learning_rate": 0.002, "loss": 2.3406, "step": 195800 }, { "epoch": 0.7569466994479751, "grad_norm": 0.10918545722961426, "learning_rate": 0.002, "loss": 2.3327, "step": 195810 }, { "epoch": 0.7569853566513585, "grad_norm": 0.09732171148061752, "learning_rate": 0.002, "loss": 2.3502, "step": 195820 }, { "epoch": 0.7570240138547417, "grad_norm": 0.10396996885538101, "learning_rate": 0.002, "loss": 2.3363, "step": 195830 }, { "epoch": 0.757062671058125, "grad_norm": 0.09502806514501572, "learning_rate": 0.002, "loss": 2.3388, "step": 195840 }, { "epoch": 0.7571013282615082, "grad_norm": 0.09656667709350586, "learning_rate": 0.002, "loss": 2.3567, "step": 195850 }, { "epoch": 0.7571399854648915, "grad_norm": 0.10245371609926224, "learning_rate": 0.002, "loss": 2.3359, "step": 195860 }, { "epoch": 0.7571786426682748, "grad_norm": 0.1009119525551796, "learning_rate": 0.002, "loss": 2.3322, "step": 195870 }, { "epoch": 0.7572172998716581, "grad_norm": 0.10753369331359863, "learning_rate": 0.002, "loss": 2.333, "step": 195880 }, { "epoch": 0.7572559570750413, "grad_norm": 0.11064845323562622, "learning_rate": 0.002, "loss": 2.3454, "step": 195890 }, { "epoch": 0.7572946142784246, "grad_norm": 0.1408444494009018, "learning_rate": 0.002, "loss": 2.3523, "step": 195900 }, { "epoch": 0.757333271481808, "grad_norm": 0.10942525416612625, "learning_rate": 0.002, "loss": 2.3539, "step": 195910 }, { "epoch": 0.7573719286851912, "grad_norm": 0.08875524997711182, "learning_rate": 0.002, "loss": 2.3401, "step": 195920 }, { "epoch": 0.7574105858885745, "grad_norm": 0.1011844277381897, "learning_rate": 0.002, "loss": 2.3496, "step": 195930 }, { "epoch": 0.7574492430919577, "grad_norm": 0.1290528029203415, "learning_rate": 0.002, "loss": 2.346, "step": 195940 }, { "epoch": 0.7574879002953411, "grad_norm": 0.09972189366817474, "learning_rate": 0.002, "loss": 2.3452, "step": 195950 }, { "epoch": 0.7575265574987243, "grad_norm": 0.09701741486787796, "learning_rate": 0.002, "loss": 2.3486, "step": 195960 }, { "epoch": 0.7575652147021076, "grad_norm": 0.10647466033697128, "learning_rate": 0.002, "loss": 2.3434, "step": 195970 }, { "epoch": 0.7576038719054908, "grad_norm": 0.08993512392044067, "learning_rate": 0.002, "loss": 2.3392, "step": 195980 }, { "epoch": 0.7576425291088742, "grad_norm": 1.3735077381134033, "learning_rate": 0.002, "loss": 2.3435, "step": 195990 }, { "epoch": 0.7576811863122574, "grad_norm": 0.10575959831476212, "learning_rate": 0.002, "loss": 2.3388, "step": 196000 }, { "epoch": 0.7577198435156407, "grad_norm": 0.09548277407884598, "learning_rate": 0.002, "loss": 2.3346, "step": 196010 }, { "epoch": 0.757758500719024, "grad_norm": 0.09989648312330246, "learning_rate": 0.002, "loss": 2.3396, "step": 196020 }, { "epoch": 0.7577971579224073, "grad_norm": 0.10474807769060135, "learning_rate": 0.002, "loss": 2.3499, "step": 196030 }, { "epoch": 0.7578358151257906, "grad_norm": 0.09582081437110901, "learning_rate": 0.002, "loss": 2.3408, "step": 196040 }, { "epoch": 0.7578744723291738, "grad_norm": 0.10808078199625015, "learning_rate": 0.002, "loss": 2.3505, "step": 196050 }, { "epoch": 0.7579131295325571, "grad_norm": 0.11338485777378082, "learning_rate": 0.002, "loss": 2.3513, "step": 196060 }, { "epoch": 0.7579517867359403, "grad_norm": 0.11452560871839523, "learning_rate": 0.002, "loss": 2.357, "step": 196070 }, { "epoch": 0.7579904439393237, "grad_norm": 0.10131363570690155, "learning_rate": 0.002, "loss": 2.3582, "step": 196080 }, { "epoch": 0.7580291011427069, "grad_norm": 0.09486867487430573, "learning_rate": 0.002, "loss": 2.3395, "step": 196090 }, { "epoch": 0.7580677583460902, "grad_norm": 0.1085309162735939, "learning_rate": 0.002, "loss": 2.3427, "step": 196100 }, { "epoch": 0.7581064155494734, "grad_norm": 0.09008552134037018, "learning_rate": 0.002, "loss": 2.3315, "step": 196110 }, { "epoch": 0.7581450727528568, "grad_norm": 0.0982944443821907, "learning_rate": 0.002, "loss": 2.3457, "step": 196120 }, { "epoch": 0.7581837299562401, "grad_norm": 0.11804826557636261, "learning_rate": 0.002, "loss": 2.3477, "step": 196130 }, { "epoch": 0.7582223871596233, "grad_norm": 0.09671904146671295, "learning_rate": 0.002, "loss": 2.3496, "step": 196140 }, { "epoch": 0.7582610443630066, "grad_norm": 0.11080244183540344, "learning_rate": 0.002, "loss": 2.3389, "step": 196150 }, { "epoch": 0.7582997015663899, "grad_norm": 0.12381796538829803, "learning_rate": 0.002, "loss": 2.3154, "step": 196160 }, { "epoch": 0.7583383587697732, "grad_norm": 0.11330335587263107, "learning_rate": 0.002, "loss": 2.3448, "step": 196170 }, { "epoch": 0.7583770159731564, "grad_norm": 0.10313785076141357, "learning_rate": 0.002, "loss": 2.3473, "step": 196180 }, { "epoch": 0.7584156731765397, "grad_norm": 0.09532544761896133, "learning_rate": 0.002, "loss": 2.3394, "step": 196190 }, { "epoch": 0.758454330379923, "grad_norm": 0.10902924090623856, "learning_rate": 0.002, "loss": 2.3218, "step": 196200 }, { "epoch": 0.7584929875833063, "grad_norm": 0.09729165583848953, "learning_rate": 0.002, "loss": 2.3353, "step": 196210 }, { "epoch": 0.7585316447866896, "grad_norm": 0.12448497116565704, "learning_rate": 0.002, "loss": 2.3287, "step": 196220 }, { "epoch": 0.7585703019900728, "grad_norm": 0.09325090795755386, "learning_rate": 0.002, "loss": 2.3241, "step": 196230 }, { "epoch": 0.7586089591934561, "grad_norm": 0.1104232668876648, "learning_rate": 0.002, "loss": 2.3453, "step": 196240 }, { "epoch": 0.7586476163968394, "grad_norm": 0.10128456354141235, "learning_rate": 0.002, "loss": 2.3288, "step": 196250 }, { "epoch": 0.7586862736002227, "grad_norm": 0.10114264488220215, "learning_rate": 0.002, "loss": 2.3335, "step": 196260 }, { "epoch": 0.7587249308036059, "grad_norm": 0.1039217859506607, "learning_rate": 0.002, "loss": 2.3334, "step": 196270 }, { "epoch": 0.7587635880069892, "grad_norm": 0.09960024803876877, "learning_rate": 0.002, "loss": 2.3522, "step": 196280 }, { "epoch": 0.7588022452103725, "grad_norm": 0.109873928129673, "learning_rate": 0.002, "loss": 2.3367, "step": 196290 }, { "epoch": 0.7588409024137558, "grad_norm": 0.09769205749034882, "learning_rate": 0.002, "loss": 2.3366, "step": 196300 }, { "epoch": 0.758879559617139, "grad_norm": 0.1883150339126587, "learning_rate": 0.002, "loss": 2.3348, "step": 196310 }, { "epoch": 0.7589182168205223, "grad_norm": 0.1155470684170723, "learning_rate": 0.002, "loss": 2.3304, "step": 196320 }, { "epoch": 0.7589568740239057, "grad_norm": 0.09333515912294388, "learning_rate": 0.002, "loss": 2.3279, "step": 196330 }, { "epoch": 0.7589955312272889, "grad_norm": 0.10450160503387451, "learning_rate": 0.002, "loss": 2.3264, "step": 196340 }, { "epoch": 0.7590341884306722, "grad_norm": 0.13585828244686127, "learning_rate": 0.002, "loss": 2.3402, "step": 196350 }, { "epoch": 0.7590728456340554, "grad_norm": 0.10029277950525284, "learning_rate": 0.002, "loss": 2.3481, "step": 196360 }, { "epoch": 0.7591115028374388, "grad_norm": 0.09286853671073914, "learning_rate": 0.002, "loss": 2.3351, "step": 196370 }, { "epoch": 0.759150160040822, "grad_norm": 0.12459070980548859, "learning_rate": 0.002, "loss": 2.3415, "step": 196380 }, { "epoch": 0.7591888172442053, "grad_norm": 0.11947477608919144, "learning_rate": 0.002, "loss": 2.3299, "step": 196390 }, { "epoch": 0.7592274744475885, "grad_norm": 0.1244572103023529, "learning_rate": 0.002, "loss": 2.3434, "step": 196400 }, { "epoch": 0.7592661316509718, "grad_norm": 0.11129307746887207, "learning_rate": 0.002, "loss": 2.3485, "step": 196410 }, { "epoch": 0.7593047888543552, "grad_norm": 0.10143723338842392, "learning_rate": 0.002, "loss": 2.3275, "step": 196420 }, { "epoch": 0.7593434460577384, "grad_norm": 0.11217688024044037, "learning_rate": 0.002, "loss": 2.3459, "step": 196430 }, { "epoch": 0.7593821032611217, "grad_norm": 0.10073287785053253, "learning_rate": 0.002, "loss": 2.3545, "step": 196440 }, { "epoch": 0.7594207604645049, "grad_norm": 0.1112406775355339, "learning_rate": 0.002, "loss": 2.3395, "step": 196450 }, { "epoch": 0.7594594176678883, "grad_norm": 0.11295268684625626, "learning_rate": 0.002, "loss": 2.3391, "step": 196460 }, { "epoch": 0.7594980748712715, "grad_norm": 0.10510469228029251, "learning_rate": 0.002, "loss": 2.3315, "step": 196470 }, { "epoch": 0.7595367320746548, "grad_norm": 0.10133729130029678, "learning_rate": 0.002, "loss": 2.3505, "step": 196480 }, { "epoch": 0.759575389278038, "grad_norm": 0.10159589350223541, "learning_rate": 0.002, "loss": 2.3288, "step": 196490 }, { "epoch": 0.7596140464814214, "grad_norm": 0.1075550764799118, "learning_rate": 0.002, "loss": 2.3421, "step": 196500 }, { "epoch": 0.7596527036848046, "grad_norm": 0.11190401762723923, "learning_rate": 0.002, "loss": 2.345, "step": 196510 }, { "epoch": 0.7596913608881879, "grad_norm": 0.10415247827768326, "learning_rate": 0.002, "loss": 2.3393, "step": 196520 }, { "epoch": 0.7597300180915711, "grad_norm": 0.10985806584358215, "learning_rate": 0.002, "loss": 2.3465, "step": 196530 }, { "epoch": 0.7597686752949545, "grad_norm": 0.09883502870798111, "learning_rate": 0.002, "loss": 2.347, "step": 196540 }, { "epoch": 0.7598073324983378, "grad_norm": 0.09660974889993668, "learning_rate": 0.002, "loss": 2.3352, "step": 196550 }, { "epoch": 0.759845989701721, "grad_norm": 0.11280705779790878, "learning_rate": 0.002, "loss": 2.3524, "step": 196560 }, { "epoch": 0.7598846469051043, "grad_norm": 0.10233590006828308, "learning_rate": 0.002, "loss": 2.3392, "step": 196570 }, { "epoch": 0.7599233041084876, "grad_norm": 0.09991362690925598, "learning_rate": 0.002, "loss": 2.3408, "step": 196580 }, { "epoch": 0.7599619613118709, "grad_norm": 0.09371718019247055, "learning_rate": 0.002, "loss": 2.3325, "step": 196590 }, { "epoch": 0.7600006185152541, "grad_norm": 0.10569316893815994, "learning_rate": 0.002, "loss": 2.3454, "step": 196600 }, { "epoch": 0.7600392757186374, "grad_norm": 0.1058524027466774, "learning_rate": 0.002, "loss": 2.3501, "step": 196610 }, { "epoch": 0.7600779329220206, "grad_norm": 0.10566135495901108, "learning_rate": 0.002, "loss": 2.3427, "step": 196620 }, { "epoch": 0.760116590125404, "grad_norm": 0.09536425024271011, "learning_rate": 0.002, "loss": 2.3445, "step": 196630 }, { "epoch": 0.7601552473287873, "grad_norm": 0.10409044474363327, "learning_rate": 0.002, "loss": 2.3392, "step": 196640 }, { "epoch": 0.7601939045321705, "grad_norm": 0.09831932932138443, "learning_rate": 0.002, "loss": 2.3302, "step": 196650 }, { "epoch": 0.7602325617355538, "grad_norm": 0.10347718745470047, "learning_rate": 0.002, "loss": 2.329, "step": 196660 }, { "epoch": 0.7602712189389371, "grad_norm": 0.1087682694196701, "learning_rate": 0.002, "loss": 2.3534, "step": 196670 }, { "epoch": 0.7603098761423204, "grad_norm": 0.09737783670425415, "learning_rate": 0.002, "loss": 2.3346, "step": 196680 }, { "epoch": 0.7603485333457036, "grad_norm": 0.09639918804168701, "learning_rate": 0.002, "loss": 2.3537, "step": 196690 }, { "epoch": 0.7603871905490869, "grad_norm": 0.13335011899471283, "learning_rate": 0.002, "loss": 2.3434, "step": 196700 }, { "epoch": 0.7604258477524702, "grad_norm": 0.09955698251724243, "learning_rate": 0.002, "loss": 2.3374, "step": 196710 }, { "epoch": 0.7604645049558535, "grad_norm": 0.09763280302286148, "learning_rate": 0.002, "loss": 2.3418, "step": 196720 }, { "epoch": 0.7605031621592367, "grad_norm": 0.09992306679487228, "learning_rate": 0.002, "loss": 2.3598, "step": 196730 }, { "epoch": 0.76054181936262, "grad_norm": 0.14371579885482788, "learning_rate": 0.002, "loss": 2.3515, "step": 196740 }, { "epoch": 0.7605804765660034, "grad_norm": 0.12052550166845322, "learning_rate": 0.002, "loss": 2.3323, "step": 196750 }, { "epoch": 0.7606191337693866, "grad_norm": 0.11212249100208282, "learning_rate": 0.002, "loss": 2.3385, "step": 196760 }, { "epoch": 0.7606577909727699, "grad_norm": 0.09525085240602493, "learning_rate": 0.002, "loss": 2.3552, "step": 196770 }, { "epoch": 0.7606964481761531, "grad_norm": 0.11087365448474884, "learning_rate": 0.002, "loss": 2.34, "step": 196780 }, { "epoch": 0.7607351053795364, "grad_norm": 0.09551934152841568, "learning_rate": 0.002, "loss": 2.3454, "step": 196790 }, { "epoch": 0.7607737625829197, "grad_norm": 0.11185195297002792, "learning_rate": 0.002, "loss": 2.3319, "step": 196800 }, { "epoch": 0.760812419786303, "grad_norm": 0.13765457272529602, "learning_rate": 0.002, "loss": 2.3446, "step": 196810 }, { "epoch": 0.7608510769896862, "grad_norm": 0.10338455438613892, "learning_rate": 0.002, "loss": 2.3414, "step": 196820 }, { "epoch": 0.7608897341930695, "grad_norm": 0.10170518606901169, "learning_rate": 0.002, "loss": 2.3453, "step": 196830 }, { "epoch": 0.7609283913964529, "grad_norm": 0.10933112353086472, "learning_rate": 0.002, "loss": 2.3391, "step": 196840 }, { "epoch": 0.7609670485998361, "grad_norm": 0.1284535825252533, "learning_rate": 0.002, "loss": 2.3423, "step": 196850 }, { "epoch": 0.7610057058032194, "grad_norm": 0.08637114614248276, "learning_rate": 0.002, "loss": 2.3409, "step": 196860 }, { "epoch": 0.7610443630066026, "grad_norm": 0.09608016163110733, "learning_rate": 0.002, "loss": 2.3414, "step": 196870 }, { "epoch": 0.761083020209986, "grad_norm": 0.09457556158304214, "learning_rate": 0.002, "loss": 2.3282, "step": 196880 }, { "epoch": 0.7611216774133692, "grad_norm": 0.09683346748352051, "learning_rate": 0.002, "loss": 2.3461, "step": 196890 }, { "epoch": 0.7611603346167525, "grad_norm": 0.09376256912946701, "learning_rate": 0.002, "loss": 2.3484, "step": 196900 }, { "epoch": 0.7611989918201357, "grad_norm": 0.12256647646427155, "learning_rate": 0.002, "loss": 2.3492, "step": 196910 }, { "epoch": 0.7612376490235191, "grad_norm": 0.1191987469792366, "learning_rate": 0.002, "loss": 2.3446, "step": 196920 }, { "epoch": 0.7612763062269023, "grad_norm": 0.09384308755397797, "learning_rate": 0.002, "loss": 2.3419, "step": 196930 }, { "epoch": 0.7613149634302856, "grad_norm": 0.09424065798521042, "learning_rate": 0.002, "loss": 2.3271, "step": 196940 }, { "epoch": 0.7613536206336688, "grad_norm": 0.09528655558824539, "learning_rate": 0.002, "loss": 2.3361, "step": 196950 }, { "epoch": 0.7613922778370522, "grad_norm": 0.11627255380153656, "learning_rate": 0.002, "loss": 2.3373, "step": 196960 }, { "epoch": 0.7614309350404355, "grad_norm": 0.11006615310907364, "learning_rate": 0.002, "loss": 2.3442, "step": 196970 }, { "epoch": 0.7614695922438187, "grad_norm": 0.10915512591600418, "learning_rate": 0.002, "loss": 2.3352, "step": 196980 }, { "epoch": 0.761508249447202, "grad_norm": 0.10492531210184097, "learning_rate": 0.002, "loss": 2.3371, "step": 196990 }, { "epoch": 0.7615469066505852, "grad_norm": 0.12003140896558762, "learning_rate": 0.002, "loss": 2.3479, "step": 197000 }, { "epoch": 0.7615855638539686, "grad_norm": 0.09126259386539459, "learning_rate": 0.002, "loss": 2.3572, "step": 197010 }, { "epoch": 0.7616242210573518, "grad_norm": 0.09396257996559143, "learning_rate": 0.002, "loss": 2.3418, "step": 197020 }, { "epoch": 0.7616628782607351, "grad_norm": 0.09638381004333496, "learning_rate": 0.002, "loss": 2.3384, "step": 197030 }, { "epoch": 0.7617015354641183, "grad_norm": 0.12181457877159119, "learning_rate": 0.002, "loss": 2.3263, "step": 197040 }, { "epoch": 0.7617401926675017, "grad_norm": 0.0980510339140892, "learning_rate": 0.002, "loss": 2.3408, "step": 197050 }, { "epoch": 0.761778849870885, "grad_norm": 0.1004633978009224, "learning_rate": 0.002, "loss": 2.3264, "step": 197060 }, { "epoch": 0.7618175070742682, "grad_norm": 0.1116318479180336, "learning_rate": 0.002, "loss": 2.344, "step": 197070 }, { "epoch": 0.7618561642776515, "grad_norm": 0.11675788462162018, "learning_rate": 0.002, "loss": 2.3684, "step": 197080 }, { "epoch": 0.7618948214810348, "grad_norm": 0.11029686778783798, "learning_rate": 0.002, "loss": 2.3587, "step": 197090 }, { "epoch": 0.7619334786844181, "grad_norm": 0.43609610199928284, "learning_rate": 0.002, "loss": 2.3519, "step": 197100 }, { "epoch": 0.7619721358878013, "grad_norm": 0.10058358311653137, "learning_rate": 0.002, "loss": 2.3421, "step": 197110 }, { "epoch": 0.7620107930911846, "grad_norm": 0.10088063031435013, "learning_rate": 0.002, "loss": 2.342, "step": 197120 }, { "epoch": 0.7620494502945679, "grad_norm": 0.09839877486228943, "learning_rate": 0.002, "loss": 2.3387, "step": 197130 }, { "epoch": 0.7620881074979512, "grad_norm": 0.11465706676244736, "learning_rate": 0.002, "loss": 2.3448, "step": 197140 }, { "epoch": 0.7621267647013344, "grad_norm": 0.10121915489435196, "learning_rate": 0.002, "loss": 2.3438, "step": 197150 }, { "epoch": 0.7621654219047177, "grad_norm": 0.10331589728593826, "learning_rate": 0.002, "loss": 2.3353, "step": 197160 }, { "epoch": 0.762204079108101, "grad_norm": 0.10166185349225998, "learning_rate": 0.002, "loss": 2.3517, "step": 197170 }, { "epoch": 0.7622427363114843, "grad_norm": 0.11593326181173325, "learning_rate": 0.002, "loss": 2.3457, "step": 197180 }, { "epoch": 0.7622813935148676, "grad_norm": 0.09611688554286957, "learning_rate": 0.002, "loss": 2.3434, "step": 197190 }, { "epoch": 0.7623200507182508, "grad_norm": 0.11437992751598358, "learning_rate": 0.002, "loss": 2.348, "step": 197200 }, { "epoch": 0.7623587079216341, "grad_norm": 0.1314302235841751, "learning_rate": 0.002, "loss": 2.3582, "step": 197210 }, { "epoch": 0.7623973651250174, "grad_norm": 0.09535528719425201, "learning_rate": 0.002, "loss": 2.3238, "step": 197220 }, { "epoch": 0.7624360223284007, "grad_norm": 0.1051691323518753, "learning_rate": 0.002, "loss": 2.3284, "step": 197230 }, { "epoch": 0.7624746795317839, "grad_norm": 0.11101983487606049, "learning_rate": 0.002, "loss": 2.3455, "step": 197240 }, { "epoch": 0.7625133367351672, "grad_norm": 0.10246936231851578, "learning_rate": 0.002, "loss": 2.3581, "step": 197250 }, { "epoch": 0.7625519939385506, "grad_norm": 0.10206905007362366, "learning_rate": 0.002, "loss": 2.3272, "step": 197260 }, { "epoch": 0.7625906511419338, "grad_norm": 0.12120827287435532, "learning_rate": 0.002, "loss": 2.3413, "step": 197270 }, { "epoch": 0.7626293083453171, "grad_norm": 0.10170960426330566, "learning_rate": 0.002, "loss": 2.362, "step": 197280 }, { "epoch": 0.7626679655487003, "grad_norm": 0.09327639639377594, "learning_rate": 0.002, "loss": 2.341, "step": 197290 }, { "epoch": 0.7627066227520837, "grad_norm": 0.10556710511445999, "learning_rate": 0.002, "loss": 2.3466, "step": 197300 }, { "epoch": 0.7627452799554669, "grad_norm": 0.11077040433883667, "learning_rate": 0.002, "loss": 2.3434, "step": 197310 }, { "epoch": 0.7627839371588502, "grad_norm": 0.10688727349042892, "learning_rate": 0.002, "loss": 2.341, "step": 197320 }, { "epoch": 0.7628225943622334, "grad_norm": 0.10626237094402313, "learning_rate": 0.002, "loss": 2.352, "step": 197330 }, { "epoch": 0.7628612515656167, "grad_norm": 0.09951891005039215, "learning_rate": 0.002, "loss": 2.3411, "step": 197340 }, { "epoch": 0.762899908769, "grad_norm": 0.10448072850704193, "learning_rate": 0.002, "loss": 2.3391, "step": 197350 }, { "epoch": 0.7629385659723833, "grad_norm": 0.113190196454525, "learning_rate": 0.002, "loss": 2.3244, "step": 197360 }, { "epoch": 0.7629772231757666, "grad_norm": 0.09222755581140518, "learning_rate": 0.002, "loss": 2.3379, "step": 197370 }, { "epoch": 0.7630158803791498, "grad_norm": 0.11096397042274475, "learning_rate": 0.002, "loss": 2.3525, "step": 197380 }, { "epoch": 0.7630545375825332, "grad_norm": 0.09983041882514954, "learning_rate": 0.002, "loss": 2.3572, "step": 197390 }, { "epoch": 0.7630931947859164, "grad_norm": 0.0992233157157898, "learning_rate": 0.002, "loss": 2.3459, "step": 197400 }, { "epoch": 0.7631318519892997, "grad_norm": 0.10024307668209076, "learning_rate": 0.002, "loss": 2.3296, "step": 197410 }, { "epoch": 0.7631705091926829, "grad_norm": 0.1278943419456482, "learning_rate": 0.002, "loss": 2.3216, "step": 197420 }, { "epoch": 0.7632091663960663, "grad_norm": 0.09778820723295212, "learning_rate": 0.002, "loss": 2.3474, "step": 197430 }, { "epoch": 0.7632478235994495, "grad_norm": 0.12126130610704422, "learning_rate": 0.002, "loss": 2.3479, "step": 197440 }, { "epoch": 0.7632864808028328, "grad_norm": 0.1095130667090416, "learning_rate": 0.002, "loss": 2.3302, "step": 197450 }, { "epoch": 0.763325138006216, "grad_norm": 0.11171168088912964, "learning_rate": 0.002, "loss": 2.3348, "step": 197460 }, { "epoch": 0.7633637952095994, "grad_norm": 0.11522161960601807, "learning_rate": 0.002, "loss": 2.3521, "step": 197470 }, { "epoch": 0.7634024524129827, "grad_norm": 0.09989353269338608, "learning_rate": 0.002, "loss": 2.3422, "step": 197480 }, { "epoch": 0.7634411096163659, "grad_norm": 0.16353625059127808, "learning_rate": 0.002, "loss": 2.3513, "step": 197490 }, { "epoch": 0.7634797668197492, "grad_norm": 0.10433971881866455, "learning_rate": 0.002, "loss": 2.3374, "step": 197500 }, { "epoch": 0.7635184240231325, "grad_norm": 0.09696116298437119, "learning_rate": 0.002, "loss": 2.3476, "step": 197510 }, { "epoch": 0.7635570812265158, "grad_norm": 0.09195984154939651, "learning_rate": 0.002, "loss": 2.3405, "step": 197520 }, { "epoch": 0.763595738429899, "grad_norm": 0.09522832185029984, "learning_rate": 0.002, "loss": 2.3494, "step": 197530 }, { "epoch": 0.7636343956332823, "grad_norm": 0.10282301157712936, "learning_rate": 0.002, "loss": 2.3425, "step": 197540 }, { "epoch": 0.7636730528366655, "grad_norm": 0.0923994779586792, "learning_rate": 0.002, "loss": 2.3408, "step": 197550 }, { "epoch": 0.7637117100400489, "grad_norm": 0.10165669769048691, "learning_rate": 0.002, "loss": 2.3409, "step": 197560 }, { "epoch": 0.7637503672434321, "grad_norm": 0.11128440499305725, "learning_rate": 0.002, "loss": 2.3361, "step": 197570 }, { "epoch": 0.7637890244468154, "grad_norm": 0.10605587065219879, "learning_rate": 0.002, "loss": 2.3477, "step": 197580 }, { "epoch": 0.7638276816501987, "grad_norm": 0.11360124498605728, "learning_rate": 0.002, "loss": 2.345, "step": 197590 }, { "epoch": 0.763866338853582, "grad_norm": 0.09772372990846634, "learning_rate": 0.002, "loss": 2.3421, "step": 197600 }, { "epoch": 0.7639049960569653, "grad_norm": 0.11086378991603851, "learning_rate": 0.002, "loss": 2.3411, "step": 197610 }, { "epoch": 0.7639436532603485, "grad_norm": 0.11184732615947723, "learning_rate": 0.002, "loss": 2.3301, "step": 197620 }, { "epoch": 0.7639823104637318, "grad_norm": 0.11088281869888306, "learning_rate": 0.002, "loss": 2.3332, "step": 197630 }, { "epoch": 0.7640209676671151, "grad_norm": 0.10725362598896027, "learning_rate": 0.002, "loss": 2.347, "step": 197640 }, { "epoch": 0.7640596248704984, "grad_norm": 0.09956399351358414, "learning_rate": 0.002, "loss": 2.34, "step": 197650 }, { "epoch": 0.7640982820738816, "grad_norm": 0.09949301183223724, "learning_rate": 0.002, "loss": 2.32, "step": 197660 }, { "epoch": 0.7641369392772649, "grad_norm": 0.0930347889661789, "learning_rate": 0.002, "loss": 2.3418, "step": 197670 }, { "epoch": 0.7641755964806483, "grad_norm": 0.10376568883657455, "learning_rate": 0.002, "loss": 2.3397, "step": 197680 }, { "epoch": 0.7642142536840315, "grad_norm": 0.10890960693359375, "learning_rate": 0.002, "loss": 2.3418, "step": 197690 }, { "epoch": 0.7642529108874148, "grad_norm": 0.15354023873806, "learning_rate": 0.002, "loss": 2.3397, "step": 197700 }, { "epoch": 0.764291568090798, "grad_norm": 0.11070028692483902, "learning_rate": 0.002, "loss": 2.3475, "step": 197710 }, { "epoch": 0.7643302252941813, "grad_norm": 0.10421784222126007, "learning_rate": 0.002, "loss": 2.3229, "step": 197720 }, { "epoch": 0.7643688824975646, "grad_norm": 0.09906643629074097, "learning_rate": 0.002, "loss": 2.3151, "step": 197730 }, { "epoch": 0.7644075397009479, "grad_norm": 0.12415380030870438, "learning_rate": 0.002, "loss": 2.3664, "step": 197740 }, { "epoch": 0.7644461969043311, "grad_norm": 0.09926281869411469, "learning_rate": 0.002, "loss": 2.3425, "step": 197750 }, { "epoch": 0.7644848541077144, "grad_norm": 0.10176531225442886, "learning_rate": 0.002, "loss": 2.3322, "step": 197760 }, { "epoch": 0.7645235113110977, "grad_norm": 0.09986831992864609, "learning_rate": 0.002, "loss": 2.3474, "step": 197770 }, { "epoch": 0.764562168514481, "grad_norm": 0.09915333241224289, "learning_rate": 0.002, "loss": 2.3417, "step": 197780 }, { "epoch": 0.7646008257178643, "grad_norm": 0.10770957171916962, "learning_rate": 0.002, "loss": 2.3349, "step": 197790 }, { "epoch": 0.7646394829212475, "grad_norm": 0.09436184167861938, "learning_rate": 0.002, "loss": 2.3404, "step": 197800 }, { "epoch": 0.7646781401246309, "grad_norm": 0.10471966117620468, "learning_rate": 0.002, "loss": 2.3273, "step": 197810 }, { "epoch": 0.7647167973280141, "grad_norm": 0.09943972527980804, "learning_rate": 0.002, "loss": 2.3472, "step": 197820 }, { "epoch": 0.7647554545313974, "grad_norm": 0.11482524871826172, "learning_rate": 0.002, "loss": 2.339, "step": 197830 }, { "epoch": 0.7647941117347806, "grad_norm": 0.11444584280252457, "learning_rate": 0.002, "loss": 2.3367, "step": 197840 }, { "epoch": 0.764832768938164, "grad_norm": 0.10195798426866531, "learning_rate": 0.002, "loss": 2.3386, "step": 197850 }, { "epoch": 0.7648714261415472, "grad_norm": 0.09855667501688004, "learning_rate": 0.002, "loss": 2.3435, "step": 197860 }, { "epoch": 0.7649100833449305, "grad_norm": 0.11103340983390808, "learning_rate": 0.002, "loss": 2.328, "step": 197870 }, { "epoch": 0.7649487405483137, "grad_norm": 0.11886495351791382, "learning_rate": 0.002, "loss": 2.3551, "step": 197880 }, { "epoch": 0.764987397751697, "grad_norm": 0.11483073979616165, "learning_rate": 0.002, "loss": 2.3538, "step": 197890 }, { "epoch": 0.7650260549550804, "grad_norm": 0.107142373919487, "learning_rate": 0.002, "loss": 2.3406, "step": 197900 }, { "epoch": 0.7650647121584636, "grad_norm": 0.10728046298027039, "learning_rate": 0.002, "loss": 2.3506, "step": 197910 }, { "epoch": 0.7651033693618469, "grad_norm": 0.11045504361391068, "learning_rate": 0.002, "loss": 2.342, "step": 197920 }, { "epoch": 0.7651420265652301, "grad_norm": 0.12309353798627853, "learning_rate": 0.002, "loss": 2.3318, "step": 197930 }, { "epoch": 0.7651806837686135, "grad_norm": 0.09460339695215225, "learning_rate": 0.002, "loss": 2.3427, "step": 197940 }, { "epoch": 0.7652193409719967, "grad_norm": 0.09481628239154816, "learning_rate": 0.002, "loss": 2.3468, "step": 197950 }, { "epoch": 0.76525799817538, "grad_norm": 0.11327333003282547, "learning_rate": 0.002, "loss": 2.3328, "step": 197960 }, { "epoch": 0.7652966553787632, "grad_norm": 0.10542219132184982, "learning_rate": 0.002, "loss": 2.3397, "step": 197970 }, { "epoch": 0.7653353125821466, "grad_norm": 0.11292482167482376, "learning_rate": 0.002, "loss": 2.3327, "step": 197980 }, { "epoch": 0.7653739697855299, "grad_norm": 0.11517397314310074, "learning_rate": 0.002, "loss": 2.3388, "step": 197990 }, { "epoch": 0.7654126269889131, "grad_norm": 0.10499373078346252, "learning_rate": 0.002, "loss": 2.3483, "step": 198000 }, { "epoch": 0.7654512841922964, "grad_norm": 0.1152128055691719, "learning_rate": 0.002, "loss": 2.3357, "step": 198010 }, { "epoch": 0.7654899413956797, "grad_norm": 0.09470925480127335, "learning_rate": 0.002, "loss": 2.3497, "step": 198020 }, { "epoch": 0.765528598599063, "grad_norm": 0.09626875072717667, "learning_rate": 0.002, "loss": 2.3522, "step": 198030 }, { "epoch": 0.7655672558024462, "grad_norm": 0.1008152961730957, "learning_rate": 0.002, "loss": 2.3402, "step": 198040 }, { "epoch": 0.7656059130058295, "grad_norm": 0.09875728189945221, "learning_rate": 0.002, "loss": 2.3304, "step": 198050 }, { "epoch": 0.7656445702092128, "grad_norm": 0.10646268725395203, "learning_rate": 0.002, "loss": 2.3444, "step": 198060 }, { "epoch": 0.7656832274125961, "grad_norm": 0.10834681242704391, "learning_rate": 0.002, "loss": 2.3522, "step": 198070 }, { "epoch": 0.7657218846159793, "grad_norm": 0.12466521561145782, "learning_rate": 0.002, "loss": 2.3385, "step": 198080 }, { "epoch": 0.7657605418193626, "grad_norm": 0.08603180944919586, "learning_rate": 0.002, "loss": 2.3354, "step": 198090 }, { "epoch": 0.7657991990227458, "grad_norm": 0.1612296998500824, "learning_rate": 0.002, "loss": 2.3438, "step": 198100 }, { "epoch": 0.7658378562261292, "grad_norm": 0.1047164797782898, "learning_rate": 0.002, "loss": 2.3448, "step": 198110 }, { "epoch": 0.7658765134295125, "grad_norm": 0.09899793565273285, "learning_rate": 0.002, "loss": 2.3391, "step": 198120 }, { "epoch": 0.7659151706328957, "grad_norm": 0.1119118481874466, "learning_rate": 0.002, "loss": 2.3316, "step": 198130 }, { "epoch": 0.765953827836279, "grad_norm": 0.11324197798967361, "learning_rate": 0.002, "loss": 2.3393, "step": 198140 }, { "epoch": 0.7659924850396623, "grad_norm": 0.10404713451862335, "learning_rate": 0.002, "loss": 2.3403, "step": 198150 }, { "epoch": 0.7660311422430456, "grad_norm": 0.1023205816745758, "learning_rate": 0.002, "loss": 2.345, "step": 198160 }, { "epoch": 0.7660697994464288, "grad_norm": 0.10559005290269852, "learning_rate": 0.002, "loss": 2.344, "step": 198170 }, { "epoch": 0.7661084566498121, "grad_norm": 0.0908447802066803, "learning_rate": 0.002, "loss": 2.3384, "step": 198180 }, { "epoch": 0.7661471138531954, "grad_norm": 0.09740599989891052, "learning_rate": 0.002, "loss": 2.3383, "step": 198190 }, { "epoch": 0.7661857710565787, "grad_norm": 0.11079949140548706, "learning_rate": 0.002, "loss": 2.3339, "step": 198200 }, { "epoch": 0.766224428259962, "grad_norm": 0.10781296342611313, "learning_rate": 0.002, "loss": 2.3411, "step": 198210 }, { "epoch": 0.7662630854633452, "grad_norm": 0.09478621929883957, "learning_rate": 0.002, "loss": 2.3252, "step": 198220 }, { "epoch": 0.7663017426667286, "grad_norm": 0.1038641482591629, "learning_rate": 0.002, "loss": 2.338, "step": 198230 }, { "epoch": 0.7663403998701118, "grad_norm": 0.10898188501596451, "learning_rate": 0.002, "loss": 2.3446, "step": 198240 }, { "epoch": 0.7663790570734951, "grad_norm": 0.09819826483726501, "learning_rate": 0.002, "loss": 2.336, "step": 198250 }, { "epoch": 0.7664177142768783, "grad_norm": 0.11344137042760849, "learning_rate": 0.002, "loss": 2.3518, "step": 198260 }, { "epoch": 0.7664563714802616, "grad_norm": 0.11274783313274384, "learning_rate": 0.002, "loss": 2.3358, "step": 198270 }, { "epoch": 0.7664950286836449, "grad_norm": 0.1241864413022995, "learning_rate": 0.002, "loss": 2.3438, "step": 198280 }, { "epoch": 0.7665336858870282, "grad_norm": 0.0982760488986969, "learning_rate": 0.002, "loss": 2.3489, "step": 198290 }, { "epoch": 0.7665723430904114, "grad_norm": 0.09791640192270279, "learning_rate": 0.002, "loss": 2.3371, "step": 198300 }, { "epoch": 0.7666110002937947, "grad_norm": 0.09974829107522964, "learning_rate": 0.002, "loss": 2.3457, "step": 198310 }, { "epoch": 0.7666496574971781, "grad_norm": 0.10934463888406754, "learning_rate": 0.002, "loss": 2.3555, "step": 198320 }, { "epoch": 0.7666883147005613, "grad_norm": 0.09941709786653519, "learning_rate": 0.002, "loss": 2.3467, "step": 198330 }, { "epoch": 0.7667269719039446, "grad_norm": 0.11662741750478745, "learning_rate": 0.002, "loss": 2.3441, "step": 198340 }, { "epoch": 0.7667656291073278, "grad_norm": 0.0965876653790474, "learning_rate": 0.002, "loss": 2.3292, "step": 198350 }, { "epoch": 0.7668042863107112, "grad_norm": 0.10685340315103531, "learning_rate": 0.002, "loss": 2.3358, "step": 198360 }, { "epoch": 0.7668429435140944, "grad_norm": 0.08948315680027008, "learning_rate": 0.002, "loss": 2.3457, "step": 198370 }, { "epoch": 0.7668816007174777, "grad_norm": 0.19111230969429016, "learning_rate": 0.002, "loss": 2.339, "step": 198380 }, { "epoch": 0.7669202579208609, "grad_norm": 0.09882853180170059, "learning_rate": 0.002, "loss": 2.341, "step": 198390 }, { "epoch": 0.7669589151242443, "grad_norm": 0.09126832336187363, "learning_rate": 0.002, "loss": 2.347, "step": 198400 }, { "epoch": 0.7669975723276276, "grad_norm": 0.09862853586673737, "learning_rate": 0.002, "loss": 2.3539, "step": 198410 }, { "epoch": 0.7670362295310108, "grad_norm": 0.10504437983036041, "learning_rate": 0.002, "loss": 2.3388, "step": 198420 }, { "epoch": 0.7670748867343941, "grad_norm": 0.11240246891975403, "learning_rate": 0.002, "loss": 2.3418, "step": 198430 }, { "epoch": 0.7671135439377774, "grad_norm": 0.1019548624753952, "learning_rate": 0.002, "loss": 2.3411, "step": 198440 }, { "epoch": 0.7671522011411607, "grad_norm": 0.11174459755420685, "learning_rate": 0.002, "loss": 2.3317, "step": 198450 }, { "epoch": 0.7671908583445439, "grad_norm": 0.1078474372625351, "learning_rate": 0.002, "loss": 2.3304, "step": 198460 }, { "epoch": 0.7672295155479272, "grad_norm": 0.11592960357666016, "learning_rate": 0.002, "loss": 2.3589, "step": 198470 }, { "epoch": 0.7672681727513104, "grad_norm": 0.12025527656078339, "learning_rate": 0.002, "loss": 2.3338, "step": 198480 }, { "epoch": 0.7673068299546938, "grad_norm": 0.11120238900184631, "learning_rate": 0.002, "loss": 2.3458, "step": 198490 }, { "epoch": 0.767345487158077, "grad_norm": 0.11129934340715408, "learning_rate": 0.002, "loss": 2.3493, "step": 198500 }, { "epoch": 0.7673841443614603, "grad_norm": 0.10082795470952988, "learning_rate": 0.002, "loss": 2.3419, "step": 198510 }, { "epoch": 0.7674228015648435, "grad_norm": 0.08847419917583466, "learning_rate": 0.002, "loss": 2.3355, "step": 198520 }, { "epoch": 0.7674614587682269, "grad_norm": 0.15819251537322998, "learning_rate": 0.002, "loss": 2.336, "step": 198530 }, { "epoch": 0.7675001159716102, "grad_norm": 0.09503842145204544, "learning_rate": 0.002, "loss": 2.3272, "step": 198540 }, { "epoch": 0.7675387731749934, "grad_norm": 0.107664093375206, "learning_rate": 0.002, "loss": 2.3346, "step": 198550 }, { "epoch": 0.7675774303783767, "grad_norm": 0.10473312437534332, "learning_rate": 0.002, "loss": 2.3293, "step": 198560 }, { "epoch": 0.76761608758176, "grad_norm": 0.08977878093719482, "learning_rate": 0.002, "loss": 2.3285, "step": 198570 }, { "epoch": 0.7676547447851433, "grad_norm": 0.09275360405445099, "learning_rate": 0.002, "loss": 2.3325, "step": 198580 }, { "epoch": 0.7676934019885265, "grad_norm": 0.10299757122993469, "learning_rate": 0.002, "loss": 2.3368, "step": 198590 }, { "epoch": 0.7677320591919098, "grad_norm": 0.09542661905288696, "learning_rate": 0.002, "loss": 2.3498, "step": 198600 }, { "epoch": 0.7677707163952932, "grad_norm": 0.09400220960378647, "learning_rate": 0.002, "loss": 2.3446, "step": 198610 }, { "epoch": 0.7678093735986764, "grad_norm": 0.1107887402176857, "learning_rate": 0.002, "loss": 2.3425, "step": 198620 }, { "epoch": 0.7678480308020597, "grad_norm": 0.1019064262509346, "learning_rate": 0.002, "loss": 2.324, "step": 198630 }, { "epoch": 0.7678866880054429, "grad_norm": 0.11500236392021179, "learning_rate": 0.002, "loss": 2.3484, "step": 198640 }, { "epoch": 0.7679253452088262, "grad_norm": 0.09175709635019302, "learning_rate": 0.002, "loss": 2.3357, "step": 198650 }, { "epoch": 0.7679640024122095, "grad_norm": 0.108940489590168, "learning_rate": 0.002, "loss": 2.3288, "step": 198660 }, { "epoch": 0.7680026596155928, "grad_norm": 0.09465809166431427, "learning_rate": 0.002, "loss": 2.3507, "step": 198670 }, { "epoch": 0.768041316818976, "grad_norm": 0.12499912083148956, "learning_rate": 0.002, "loss": 2.3465, "step": 198680 }, { "epoch": 0.7680799740223593, "grad_norm": 0.09948600083589554, "learning_rate": 0.002, "loss": 2.3529, "step": 198690 }, { "epoch": 0.7681186312257426, "grad_norm": 0.09964347630739212, "learning_rate": 0.002, "loss": 2.3434, "step": 198700 }, { "epoch": 0.7681572884291259, "grad_norm": 0.11331164091825485, "learning_rate": 0.002, "loss": 2.3294, "step": 198710 }, { "epoch": 0.7681959456325091, "grad_norm": 0.11339457333087921, "learning_rate": 0.002, "loss": 2.3367, "step": 198720 }, { "epoch": 0.7682346028358924, "grad_norm": 0.10204224288463593, "learning_rate": 0.002, "loss": 2.3423, "step": 198730 }, { "epoch": 0.7682732600392758, "grad_norm": 0.1111808642745018, "learning_rate": 0.002, "loss": 2.3382, "step": 198740 }, { "epoch": 0.768311917242659, "grad_norm": 0.12683844566345215, "learning_rate": 0.002, "loss": 2.3472, "step": 198750 }, { "epoch": 0.7683505744460423, "grad_norm": 0.11128699779510498, "learning_rate": 0.002, "loss": 2.3293, "step": 198760 }, { "epoch": 0.7683892316494255, "grad_norm": 0.09829942882061005, "learning_rate": 0.002, "loss": 2.3449, "step": 198770 }, { "epoch": 0.7684278888528089, "grad_norm": 0.1252112090587616, "learning_rate": 0.002, "loss": 2.3332, "step": 198780 }, { "epoch": 0.7684665460561921, "grad_norm": 0.11201326549053192, "learning_rate": 0.002, "loss": 2.3516, "step": 198790 }, { "epoch": 0.7685052032595754, "grad_norm": 0.1164776012301445, "learning_rate": 0.002, "loss": 2.3505, "step": 198800 }, { "epoch": 0.7685438604629586, "grad_norm": 0.10831308364868164, "learning_rate": 0.002, "loss": 2.3438, "step": 198810 }, { "epoch": 0.7685825176663419, "grad_norm": 0.09699341654777527, "learning_rate": 0.002, "loss": 2.3442, "step": 198820 }, { "epoch": 0.7686211748697253, "grad_norm": 0.10668003559112549, "learning_rate": 0.002, "loss": 2.3483, "step": 198830 }, { "epoch": 0.7686598320731085, "grad_norm": 0.10341284424066544, "learning_rate": 0.002, "loss": 2.3493, "step": 198840 }, { "epoch": 0.7686984892764918, "grad_norm": 0.10115321725606918, "learning_rate": 0.002, "loss": 2.3542, "step": 198850 }, { "epoch": 0.768737146479875, "grad_norm": 0.0972030982375145, "learning_rate": 0.002, "loss": 2.3441, "step": 198860 }, { "epoch": 0.7687758036832584, "grad_norm": 0.09830368310213089, "learning_rate": 0.002, "loss": 2.3365, "step": 198870 }, { "epoch": 0.7688144608866416, "grad_norm": 0.12544550001621246, "learning_rate": 0.002, "loss": 2.3387, "step": 198880 }, { "epoch": 0.7688531180900249, "grad_norm": 0.08823167532682419, "learning_rate": 0.002, "loss": 2.3446, "step": 198890 }, { "epoch": 0.7688917752934081, "grad_norm": 0.09672213345766068, "learning_rate": 0.002, "loss": 2.3412, "step": 198900 }, { "epoch": 0.7689304324967915, "grad_norm": 0.10546525567770004, "learning_rate": 0.002, "loss": 2.3347, "step": 198910 }, { "epoch": 0.7689690897001747, "grad_norm": 0.10554137825965881, "learning_rate": 0.002, "loss": 2.3379, "step": 198920 }, { "epoch": 0.769007746903558, "grad_norm": 0.09676895290613174, "learning_rate": 0.002, "loss": 2.3309, "step": 198930 }, { "epoch": 0.7690464041069413, "grad_norm": 0.11075710505247116, "learning_rate": 0.002, "loss": 2.3241, "step": 198940 }, { "epoch": 0.7690850613103246, "grad_norm": 0.10539600253105164, "learning_rate": 0.002, "loss": 2.3438, "step": 198950 }, { "epoch": 0.7691237185137079, "grad_norm": 0.10727138817310333, "learning_rate": 0.002, "loss": 2.3536, "step": 198960 }, { "epoch": 0.7691623757170911, "grad_norm": 0.0944301187992096, "learning_rate": 0.002, "loss": 2.3307, "step": 198970 }, { "epoch": 0.7692010329204744, "grad_norm": 0.10199394077062607, "learning_rate": 0.002, "loss": 2.344, "step": 198980 }, { "epoch": 0.7692396901238577, "grad_norm": 0.11412839591503143, "learning_rate": 0.002, "loss": 2.3509, "step": 198990 }, { "epoch": 0.769278347327241, "grad_norm": 0.10684604942798615, "learning_rate": 0.002, "loss": 2.345, "step": 199000 }, { "epoch": 0.7693170045306242, "grad_norm": 0.10144107788801193, "learning_rate": 0.002, "loss": 2.331, "step": 199010 }, { "epoch": 0.7693556617340075, "grad_norm": 0.10650350153446198, "learning_rate": 0.002, "loss": 2.3422, "step": 199020 }, { "epoch": 0.7693943189373907, "grad_norm": 0.10620681196451187, "learning_rate": 0.002, "loss": 2.3248, "step": 199030 }, { "epoch": 0.7694329761407741, "grad_norm": 0.0976841002702713, "learning_rate": 0.002, "loss": 2.332, "step": 199040 }, { "epoch": 0.7694716333441574, "grad_norm": 0.09857263416051865, "learning_rate": 0.002, "loss": 2.3411, "step": 199050 }, { "epoch": 0.7695102905475406, "grad_norm": 0.10668320953845978, "learning_rate": 0.002, "loss": 2.3233, "step": 199060 }, { "epoch": 0.7695489477509239, "grad_norm": 0.11263915151357651, "learning_rate": 0.002, "loss": 2.3349, "step": 199070 }, { "epoch": 0.7695876049543072, "grad_norm": 0.1733059585094452, "learning_rate": 0.002, "loss": 2.337, "step": 199080 }, { "epoch": 0.7696262621576905, "grad_norm": 0.10049034655094147, "learning_rate": 0.002, "loss": 2.352, "step": 199090 }, { "epoch": 0.7696649193610737, "grad_norm": 0.10561171174049377, "learning_rate": 0.002, "loss": 2.3179, "step": 199100 }, { "epoch": 0.769703576564457, "grad_norm": 0.10926441848278046, "learning_rate": 0.002, "loss": 2.3459, "step": 199110 }, { "epoch": 0.7697422337678403, "grad_norm": 0.11219510436058044, "learning_rate": 0.002, "loss": 2.3507, "step": 199120 }, { "epoch": 0.7697808909712236, "grad_norm": 0.10429269075393677, "learning_rate": 0.002, "loss": 2.344, "step": 199130 }, { "epoch": 0.7698195481746068, "grad_norm": 0.1282796859741211, "learning_rate": 0.002, "loss": 2.3267, "step": 199140 }, { "epoch": 0.7698582053779901, "grad_norm": 0.10086347907781601, "learning_rate": 0.002, "loss": 2.3457, "step": 199150 }, { "epoch": 0.7698968625813735, "grad_norm": 0.10646980255842209, "learning_rate": 0.002, "loss": 2.34, "step": 199160 }, { "epoch": 0.7699355197847567, "grad_norm": 0.10037669539451599, "learning_rate": 0.002, "loss": 2.3364, "step": 199170 }, { "epoch": 0.76997417698814, "grad_norm": 0.10189975053071976, "learning_rate": 0.002, "loss": 2.326, "step": 199180 }, { "epoch": 0.7700128341915232, "grad_norm": 0.11893989145755768, "learning_rate": 0.002, "loss": 2.3449, "step": 199190 }, { "epoch": 0.7700514913949065, "grad_norm": 0.08906824141740799, "learning_rate": 0.002, "loss": 2.3431, "step": 199200 }, { "epoch": 0.7700901485982898, "grad_norm": 0.09848733246326447, "learning_rate": 0.002, "loss": 2.3385, "step": 199210 }, { "epoch": 0.7701288058016731, "grad_norm": 0.09648095816373825, "learning_rate": 0.002, "loss": 2.3425, "step": 199220 }, { "epoch": 0.7701674630050563, "grad_norm": 0.10332004725933075, "learning_rate": 0.002, "loss": 2.3266, "step": 199230 }, { "epoch": 0.7702061202084396, "grad_norm": 0.09868767857551575, "learning_rate": 0.002, "loss": 2.3365, "step": 199240 }, { "epoch": 0.770244777411823, "grad_norm": 0.09203759580850601, "learning_rate": 0.002, "loss": 2.3589, "step": 199250 }, { "epoch": 0.7702834346152062, "grad_norm": 0.09470377117395401, "learning_rate": 0.002, "loss": 2.3442, "step": 199260 }, { "epoch": 0.7703220918185895, "grad_norm": 0.09742645174264908, "learning_rate": 0.002, "loss": 2.3317, "step": 199270 }, { "epoch": 0.7703607490219727, "grad_norm": 0.12007997184991837, "learning_rate": 0.002, "loss": 2.3334, "step": 199280 }, { "epoch": 0.7703994062253561, "grad_norm": 0.09929712861776352, "learning_rate": 0.002, "loss": 2.347, "step": 199290 }, { "epoch": 0.7704380634287393, "grad_norm": 0.13558223843574524, "learning_rate": 0.002, "loss": 2.3441, "step": 199300 }, { "epoch": 0.7704767206321226, "grad_norm": 0.09942631423473358, "learning_rate": 0.002, "loss": 2.3393, "step": 199310 }, { "epoch": 0.7705153778355058, "grad_norm": 0.09960619360208511, "learning_rate": 0.002, "loss": 2.3467, "step": 199320 }, { "epoch": 0.7705540350388892, "grad_norm": 0.10436469316482544, "learning_rate": 0.002, "loss": 2.3324, "step": 199330 }, { "epoch": 0.7705926922422724, "grad_norm": 0.11296504735946655, "learning_rate": 0.002, "loss": 2.3432, "step": 199340 }, { "epoch": 0.7706313494456557, "grad_norm": 0.11591428518295288, "learning_rate": 0.002, "loss": 2.3438, "step": 199350 }, { "epoch": 0.770670006649039, "grad_norm": 0.10151247680187225, "learning_rate": 0.002, "loss": 2.3444, "step": 199360 }, { "epoch": 0.7707086638524223, "grad_norm": 0.13121308386325836, "learning_rate": 0.002, "loss": 2.3436, "step": 199370 }, { "epoch": 0.7707473210558056, "grad_norm": 0.1014532521367073, "learning_rate": 0.002, "loss": 2.339, "step": 199380 }, { "epoch": 0.7707859782591888, "grad_norm": 0.09893794357776642, "learning_rate": 0.002, "loss": 2.3311, "step": 199390 }, { "epoch": 0.7708246354625721, "grad_norm": 0.11550657451152802, "learning_rate": 0.002, "loss": 2.3469, "step": 199400 }, { "epoch": 0.7708632926659553, "grad_norm": 0.09744752943515778, "learning_rate": 0.002, "loss": 2.3215, "step": 199410 }, { "epoch": 0.7709019498693387, "grad_norm": 0.09887513518333435, "learning_rate": 0.002, "loss": 2.3459, "step": 199420 }, { "epoch": 0.7709406070727219, "grad_norm": 0.13161727786064148, "learning_rate": 0.002, "loss": 2.3605, "step": 199430 }, { "epoch": 0.7709792642761052, "grad_norm": 0.10517606884241104, "learning_rate": 0.002, "loss": 2.3486, "step": 199440 }, { "epoch": 0.7710179214794884, "grad_norm": 0.10197417438030243, "learning_rate": 0.002, "loss": 2.3505, "step": 199450 }, { "epoch": 0.7710565786828718, "grad_norm": 0.0997900515794754, "learning_rate": 0.002, "loss": 2.337, "step": 199460 }, { "epoch": 0.7710952358862551, "grad_norm": 0.12040169537067413, "learning_rate": 0.002, "loss": 2.346, "step": 199470 }, { "epoch": 0.7711338930896383, "grad_norm": 0.10656525194644928, "learning_rate": 0.002, "loss": 2.3443, "step": 199480 }, { "epoch": 0.7711725502930216, "grad_norm": 0.09704674035310745, "learning_rate": 0.002, "loss": 2.3208, "step": 199490 }, { "epoch": 0.7712112074964049, "grad_norm": 0.10840927809476852, "learning_rate": 0.002, "loss": 2.3367, "step": 199500 }, { "epoch": 0.7712498646997882, "grad_norm": 0.10759053379297256, "learning_rate": 0.002, "loss": 2.3396, "step": 199510 }, { "epoch": 0.7712885219031714, "grad_norm": 0.09853320568799973, "learning_rate": 0.002, "loss": 2.3341, "step": 199520 }, { "epoch": 0.7713271791065547, "grad_norm": 0.10677963495254517, "learning_rate": 0.002, "loss": 2.3395, "step": 199530 }, { "epoch": 0.771365836309938, "grad_norm": 0.09678921103477478, "learning_rate": 0.002, "loss": 2.3421, "step": 199540 }, { "epoch": 0.7714044935133213, "grad_norm": 0.09936831146478653, "learning_rate": 0.002, "loss": 2.324, "step": 199550 }, { "epoch": 0.7714431507167046, "grad_norm": 0.09832432866096497, "learning_rate": 0.002, "loss": 2.3279, "step": 199560 }, { "epoch": 0.7714818079200878, "grad_norm": 0.105009064078331, "learning_rate": 0.002, "loss": 2.3419, "step": 199570 }, { "epoch": 0.771520465123471, "grad_norm": 0.09333699196577072, "learning_rate": 0.002, "loss": 2.332, "step": 199580 }, { "epoch": 0.7715591223268544, "grad_norm": 0.10455092787742615, "learning_rate": 0.002, "loss": 2.3412, "step": 199590 }, { "epoch": 0.7715977795302377, "grad_norm": 0.15664206445217133, "learning_rate": 0.002, "loss": 2.3481, "step": 199600 }, { "epoch": 0.7716364367336209, "grad_norm": 0.11696160584688187, "learning_rate": 0.002, "loss": 2.335, "step": 199610 }, { "epoch": 0.7716750939370042, "grad_norm": 0.10086527466773987, "learning_rate": 0.002, "loss": 2.3602, "step": 199620 }, { "epoch": 0.7717137511403875, "grad_norm": 0.09339655190706253, "learning_rate": 0.002, "loss": 2.3514, "step": 199630 }, { "epoch": 0.7717524083437708, "grad_norm": 0.09983646124601364, "learning_rate": 0.002, "loss": 2.3399, "step": 199640 }, { "epoch": 0.771791065547154, "grad_norm": 0.14009705185890198, "learning_rate": 0.002, "loss": 2.3376, "step": 199650 }, { "epoch": 0.7718297227505373, "grad_norm": 0.13227851688861847, "learning_rate": 0.002, "loss": 2.3429, "step": 199660 }, { "epoch": 0.7718683799539207, "grad_norm": 0.14454631507396698, "learning_rate": 0.002, "loss": 2.3513, "step": 199670 }, { "epoch": 0.7719070371573039, "grad_norm": 0.1293637454509735, "learning_rate": 0.002, "loss": 2.3521, "step": 199680 }, { "epoch": 0.7719456943606872, "grad_norm": 0.1015193909406662, "learning_rate": 0.002, "loss": 2.3297, "step": 199690 }, { "epoch": 0.7719843515640704, "grad_norm": 0.11463967710733414, "learning_rate": 0.002, "loss": 2.3429, "step": 199700 }, { "epoch": 0.7720230087674538, "grad_norm": 0.0929514467716217, "learning_rate": 0.002, "loss": 2.3339, "step": 199710 }, { "epoch": 0.772061665970837, "grad_norm": 0.10017342865467072, "learning_rate": 0.002, "loss": 2.3455, "step": 199720 }, { "epoch": 0.7721003231742203, "grad_norm": 0.10128919035196304, "learning_rate": 0.002, "loss": 2.3423, "step": 199730 }, { "epoch": 0.7721389803776035, "grad_norm": 1.573693037033081, "learning_rate": 0.002, "loss": 2.3551, "step": 199740 }, { "epoch": 0.7721776375809868, "grad_norm": 0.12044832110404968, "learning_rate": 0.002, "loss": 2.3715, "step": 199750 }, { "epoch": 0.7722162947843701, "grad_norm": 0.10077551752328873, "learning_rate": 0.002, "loss": 2.3526, "step": 199760 }, { "epoch": 0.7722549519877534, "grad_norm": 0.10538480430841446, "learning_rate": 0.002, "loss": 2.3412, "step": 199770 }, { "epoch": 0.7722936091911367, "grad_norm": 0.10388049483299255, "learning_rate": 0.002, "loss": 2.3232, "step": 199780 }, { "epoch": 0.7723322663945199, "grad_norm": 0.10569577664136887, "learning_rate": 0.002, "loss": 2.3343, "step": 199790 }, { "epoch": 0.7723709235979033, "grad_norm": 0.09584219008684158, "learning_rate": 0.002, "loss": 2.3375, "step": 199800 }, { "epoch": 0.7724095808012865, "grad_norm": 0.11350366473197937, "learning_rate": 0.002, "loss": 2.3539, "step": 199810 }, { "epoch": 0.7724482380046698, "grad_norm": 0.09763793647289276, "learning_rate": 0.002, "loss": 2.333, "step": 199820 }, { "epoch": 0.772486895208053, "grad_norm": 0.0916409119963646, "learning_rate": 0.002, "loss": 2.3235, "step": 199830 }, { "epoch": 0.7725255524114364, "grad_norm": 0.11362338066101074, "learning_rate": 0.002, "loss": 2.3329, "step": 199840 }, { "epoch": 0.7725642096148196, "grad_norm": 0.10839736461639404, "learning_rate": 0.002, "loss": 2.3357, "step": 199850 }, { "epoch": 0.7726028668182029, "grad_norm": 0.1145472452044487, "learning_rate": 0.002, "loss": 2.3306, "step": 199860 }, { "epoch": 0.7726415240215861, "grad_norm": 0.1063324436545372, "learning_rate": 0.002, "loss": 2.3398, "step": 199870 }, { "epoch": 0.7726801812249695, "grad_norm": 0.10044882446527481, "learning_rate": 0.002, "loss": 2.3545, "step": 199880 }, { "epoch": 0.7727188384283528, "grad_norm": 0.09982284158468246, "learning_rate": 0.002, "loss": 2.3409, "step": 199890 }, { "epoch": 0.772757495631736, "grad_norm": 0.0994446650147438, "learning_rate": 0.002, "loss": 2.346, "step": 199900 }, { "epoch": 0.7727961528351193, "grad_norm": 0.09719647467136383, "learning_rate": 0.002, "loss": 2.3352, "step": 199910 }, { "epoch": 0.7728348100385026, "grad_norm": 0.11520253121852875, "learning_rate": 0.002, "loss": 2.3243, "step": 199920 }, { "epoch": 0.7728734672418859, "grad_norm": 0.12013207376003265, "learning_rate": 0.002, "loss": 2.3423, "step": 199930 }, { "epoch": 0.7729121244452691, "grad_norm": 0.09739863872528076, "learning_rate": 0.002, "loss": 2.3442, "step": 199940 }, { "epoch": 0.7729507816486524, "grad_norm": 0.09742973744869232, "learning_rate": 0.002, "loss": 2.3405, "step": 199950 }, { "epoch": 0.7729894388520356, "grad_norm": 0.08914405852556229, "learning_rate": 0.002, "loss": 2.3331, "step": 199960 }, { "epoch": 0.773028096055419, "grad_norm": 0.11501894891262054, "learning_rate": 0.002, "loss": 2.3393, "step": 199970 }, { "epoch": 0.7730667532588023, "grad_norm": 0.10237744450569153, "learning_rate": 0.002, "loss": 2.3455, "step": 199980 }, { "epoch": 0.7731054104621855, "grad_norm": 0.0975678488612175, "learning_rate": 0.002, "loss": 2.3455, "step": 199990 }, { "epoch": 0.7731440676655688, "grad_norm": 0.1083686575293541, "learning_rate": 0.002, "loss": 2.3384, "step": 200000 }, { "epoch": 0.7731827248689521, "grad_norm": 0.11673914641141891, "learning_rate": 0.002, "loss": 2.3364, "step": 200010 }, { "epoch": 0.7732213820723354, "grad_norm": 0.11466530710458755, "learning_rate": 0.002, "loss": 2.3168, "step": 200020 }, { "epoch": 0.7732600392757186, "grad_norm": 0.08803026378154755, "learning_rate": 0.002, "loss": 2.3442, "step": 200030 }, { "epoch": 0.7732986964791019, "grad_norm": 0.10837738960981369, "learning_rate": 0.002, "loss": 2.3515, "step": 200040 }, { "epoch": 0.7733373536824852, "grad_norm": 0.0982581079006195, "learning_rate": 0.002, "loss": 2.3533, "step": 200050 }, { "epoch": 0.7733760108858685, "grad_norm": 0.11603618413209915, "learning_rate": 0.002, "loss": 2.3491, "step": 200060 }, { "epoch": 0.7734146680892517, "grad_norm": 0.10336446762084961, "learning_rate": 0.002, "loss": 2.343, "step": 200070 }, { "epoch": 0.773453325292635, "grad_norm": 0.11303336173295975, "learning_rate": 0.002, "loss": 2.3371, "step": 200080 }, { "epoch": 0.7734919824960184, "grad_norm": 0.12811653316020966, "learning_rate": 0.002, "loss": 2.3396, "step": 200090 }, { "epoch": 0.7735306396994016, "grad_norm": 0.0947103500366211, "learning_rate": 0.002, "loss": 2.3423, "step": 200100 }, { "epoch": 0.7735692969027849, "grad_norm": 0.09696483612060547, "learning_rate": 0.002, "loss": 2.352, "step": 200110 }, { "epoch": 0.7736079541061681, "grad_norm": 0.09755218029022217, "learning_rate": 0.002, "loss": 2.3543, "step": 200120 }, { "epoch": 0.7736466113095514, "grad_norm": 0.12071071565151215, "learning_rate": 0.002, "loss": 2.3396, "step": 200130 }, { "epoch": 0.7736852685129347, "grad_norm": 0.10463309288024902, "learning_rate": 0.002, "loss": 2.3384, "step": 200140 }, { "epoch": 0.773723925716318, "grad_norm": 0.09501010179519653, "learning_rate": 0.002, "loss": 2.3412, "step": 200150 }, { "epoch": 0.7737625829197012, "grad_norm": 0.13405849039554596, "learning_rate": 0.002, "loss": 2.3429, "step": 200160 }, { "epoch": 0.7738012401230845, "grad_norm": 0.08840669691562653, "learning_rate": 0.002, "loss": 2.3261, "step": 200170 }, { "epoch": 0.7738398973264679, "grad_norm": 0.1252623349428177, "learning_rate": 0.002, "loss": 2.3379, "step": 200180 }, { "epoch": 0.7738785545298511, "grad_norm": 0.11263350397348404, "learning_rate": 0.002, "loss": 2.3552, "step": 200190 }, { "epoch": 0.7739172117332344, "grad_norm": 0.09044620394706726, "learning_rate": 0.002, "loss": 2.3457, "step": 200200 }, { "epoch": 0.7739558689366176, "grad_norm": 0.11641410738229752, "learning_rate": 0.002, "loss": 2.3346, "step": 200210 }, { "epoch": 0.773994526140001, "grad_norm": 0.1021437793970108, "learning_rate": 0.002, "loss": 2.3402, "step": 200220 }, { "epoch": 0.7740331833433842, "grad_norm": 0.10622060298919678, "learning_rate": 0.002, "loss": 2.3388, "step": 200230 }, { "epoch": 0.7740718405467675, "grad_norm": 0.13026146590709686, "learning_rate": 0.002, "loss": 2.3394, "step": 200240 }, { "epoch": 0.7741104977501507, "grad_norm": 0.4658176004886627, "learning_rate": 0.002, "loss": 2.3619, "step": 200250 }, { "epoch": 0.7741491549535341, "grad_norm": 0.09170550853013992, "learning_rate": 0.002, "loss": 2.346, "step": 200260 }, { "epoch": 0.7741878121569173, "grad_norm": 0.10174133628606796, "learning_rate": 0.002, "loss": 2.3468, "step": 200270 }, { "epoch": 0.7742264693603006, "grad_norm": 0.09880983084440231, "learning_rate": 0.002, "loss": 2.3481, "step": 200280 }, { "epoch": 0.7742651265636838, "grad_norm": 0.10881564766168594, "learning_rate": 0.002, "loss": 2.3494, "step": 200290 }, { "epoch": 0.7743037837670672, "grad_norm": 0.10786505788564682, "learning_rate": 0.002, "loss": 2.3483, "step": 200300 }, { "epoch": 0.7743424409704505, "grad_norm": 0.10389269143342972, "learning_rate": 0.002, "loss": 2.3378, "step": 200310 }, { "epoch": 0.7743810981738337, "grad_norm": 0.12156102806329727, "learning_rate": 0.002, "loss": 2.3403, "step": 200320 }, { "epoch": 0.774419755377217, "grad_norm": 0.10176001489162445, "learning_rate": 0.002, "loss": 2.3469, "step": 200330 }, { "epoch": 0.7744584125806002, "grad_norm": 0.11010489612817764, "learning_rate": 0.002, "loss": 2.3473, "step": 200340 }, { "epoch": 0.7744970697839836, "grad_norm": 0.10778948664665222, "learning_rate": 0.002, "loss": 2.3214, "step": 200350 }, { "epoch": 0.7745357269873668, "grad_norm": 0.10530728101730347, "learning_rate": 0.002, "loss": 2.3345, "step": 200360 }, { "epoch": 0.7745743841907501, "grad_norm": 0.10748562216758728, "learning_rate": 0.002, "loss": 2.3275, "step": 200370 }, { "epoch": 0.7746130413941333, "grad_norm": 0.1270465850830078, "learning_rate": 0.002, "loss": 2.3501, "step": 200380 }, { "epoch": 0.7746516985975167, "grad_norm": 0.11276346445083618, "learning_rate": 0.002, "loss": 2.3308, "step": 200390 }, { "epoch": 0.7746903558009, "grad_norm": 0.09427843242883682, "learning_rate": 0.002, "loss": 2.3419, "step": 200400 }, { "epoch": 0.7747290130042832, "grad_norm": 0.10547053068876266, "learning_rate": 0.002, "loss": 2.3359, "step": 200410 }, { "epoch": 0.7747676702076665, "grad_norm": 0.0937475636601448, "learning_rate": 0.002, "loss": 2.3484, "step": 200420 }, { "epoch": 0.7748063274110498, "grad_norm": 0.10085562616586685, "learning_rate": 0.002, "loss": 2.3186, "step": 200430 }, { "epoch": 0.7748449846144331, "grad_norm": 0.10139386355876923, "learning_rate": 0.002, "loss": 2.3328, "step": 200440 }, { "epoch": 0.7748836418178163, "grad_norm": 0.11379513144493103, "learning_rate": 0.002, "loss": 2.344, "step": 200450 }, { "epoch": 0.7749222990211996, "grad_norm": 0.1242862194776535, "learning_rate": 0.002, "loss": 2.3412, "step": 200460 }, { "epoch": 0.7749609562245829, "grad_norm": 0.10932227224111557, "learning_rate": 0.002, "loss": 2.3471, "step": 200470 }, { "epoch": 0.7749996134279662, "grad_norm": 0.10406124591827393, "learning_rate": 0.002, "loss": 2.3463, "step": 200480 }, { "epoch": 0.7750382706313494, "grad_norm": 0.10119037330150604, "learning_rate": 0.002, "loss": 2.3244, "step": 200490 }, { "epoch": 0.7750769278347327, "grad_norm": 0.10155569016933441, "learning_rate": 0.002, "loss": 2.3376, "step": 200500 }, { "epoch": 0.775115585038116, "grad_norm": 0.09935098141431808, "learning_rate": 0.002, "loss": 2.3291, "step": 200510 }, { "epoch": 0.7751542422414993, "grad_norm": 0.11348678171634674, "learning_rate": 0.002, "loss": 2.34, "step": 200520 }, { "epoch": 0.7751928994448826, "grad_norm": 0.09598572552204132, "learning_rate": 0.002, "loss": 2.3484, "step": 200530 }, { "epoch": 0.7752315566482658, "grad_norm": 0.11006144434213638, "learning_rate": 0.002, "loss": 2.3421, "step": 200540 }, { "epoch": 0.7752702138516491, "grad_norm": 0.10205881297588348, "learning_rate": 0.002, "loss": 2.3272, "step": 200550 }, { "epoch": 0.7753088710550324, "grad_norm": 0.11025692522525787, "learning_rate": 0.002, "loss": 2.3564, "step": 200560 }, { "epoch": 0.7753475282584157, "grad_norm": 0.10488265752792358, "learning_rate": 0.002, "loss": 2.3446, "step": 200570 }, { "epoch": 0.7753861854617989, "grad_norm": 0.09900173544883728, "learning_rate": 0.002, "loss": 2.34, "step": 200580 }, { "epoch": 0.7754248426651822, "grad_norm": 0.09940632432699203, "learning_rate": 0.002, "loss": 2.3576, "step": 200590 }, { "epoch": 0.7754634998685656, "grad_norm": 0.10207568109035492, "learning_rate": 0.002, "loss": 2.3574, "step": 200600 }, { "epoch": 0.7755021570719488, "grad_norm": 0.09659580886363983, "learning_rate": 0.002, "loss": 2.3203, "step": 200610 }, { "epoch": 0.7755408142753321, "grad_norm": 0.13341741263866425, "learning_rate": 0.002, "loss": 2.3435, "step": 200620 }, { "epoch": 0.7755794714787153, "grad_norm": 0.12346010655164719, "learning_rate": 0.002, "loss": 2.3378, "step": 200630 }, { "epoch": 0.7756181286820987, "grad_norm": 0.09464599192142487, "learning_rate": 0.002, "loss": 2.3268, "step": 200640 }, { "epoch": 0.7756567858854819, "grad_norm": 0.10147619992494583, "learning_rate": 0.002, "loss": 2.3335, "step": 200650 }, { "epoch": 0.7756954430888652, "grad_norm": 0.09558901935815811, "learning_rate": 0.002, "loss": 2.3524, "step": 200660 }, { "epoch": 0.7757341002922484, "grad_norm": 0.11636707186698914, "learning_rate": 0.002, "loss": 2.338, "step": 200670 }, { "epoch": 0.7757727574956317, "grad_norm": 0.1300460696220398, "learning_rate": 0.002, "loss": 2.34, "step": 200680 }, { "epoch": 0.775811414699015, "grad_norm": 0.09909453988075256, "learning_rate": 0.002, "loss": 2.3314, "step": 200690 }, { "epoch": 0.7758500719023983, "grad_norm": 0.09746900200843811, "learning_rate": 0.002, "loss": 2.3432, "step": 200700 }, { "epoch": 0.7758887291057815, "grad_norm": 0.09796787053346634, "learning_rate": 0.002, "loss": 2.3257, "step": 200710 }, { "epoch": 0.7759273863091648, "grad_norm": 0.11972364038228989, "learning_rate": 0.002, "loss": 2.3265, "step": 200720 }, { "epoch": 0.7759660435125482, "grad_norm": 0.09397576004266739, "learning_rate": 0.002, "loss": 2.3299, "step": 200730 }, { "epoch": 0.7760047007159314, "grad_norm": 0.09508085995912552, "learning_rate": 0.002, "loss": 2.3448, "step": 200740 }, { "epoch": 0.7760433579193147, "grad_norm": 0.09870732575654984, "learning_rate": 0.002, "loss": 2.339, "step": 200750 }, { "epoch": 0.7760820151226979, "grad_norm": 0.10102210938930511, "learning_rate": 0.002, "loss": 2.3272, "step": 200760 }, { "epoch": 0.7761206723260813, "grad_norm": 0.10540328174829483, "learning_rate": 0.002, "loss": 2.341, "step": 200770 }, { "epoch": 0.7761593295294645, "grad_norm": 0.09322625398635864, "learning_rate": 0.002, "loss": 2.3413, "step": 200780 }, { "epoch": 0.7761979867328478, "grad_norm": 0.10350510478019714, "learning_rate": 0.002, "loss": 2.3335, "step": 200790 }, { "epoch": 0.776236643936231, "grad_norm": 0.10563094168901443, "learning_rate": 0.002, "loss": 2.3434, "step": 200800 }, { "epoch": 0.7762753011396144, "grad_norm": 0.10559462755918503, "learning_rate": 0.002, "loss": 2.3374, "step": 200810 }, { "epoch": 0.7763139583429977, "grad_norm": 0.0866955816745758, "learning_rate": 0.002, "loss": 2.3441, "step": 200820 }, { "epoch": 0.7763526155463809, "grad_norm": 0.10731297731399536, "learning_rate": 0.002, "loss": 2.3362, "step": 200830 }, { "epoch": 0.7763912727497642, "grad_norm": 0.09620799124240875, "learning_rate": 0.002, "loss": 2.3343, "step": 200840 }, { "epoch": 0.7764299299531475, "grad_norm": 0.1093180775642395, "learning_rate": 0.002, "loss": 2.3445, "step": 200850 }, { "epoch": 0.7764685871565308, "grad_norm": 0.10197804123163223, "learning_rate": 0.002, "loss": 2.3483, "step": 200860 }, { "epoch": 0.776507244359914, "grad_norm": 0.12022387236356735, "learning_rate": 0.002, "loss": 2.3284, "step": 200870 }, { "epoch": 0.7765459015632973, "grad_norm": 0.08632820844650269, "learning_rate": 0.002, "loss": 2.347, "step": 200880 }, { "epoch": 0.7765845587666805, "grad_norm": 0.11173965036869049, "learning_rate": 0.002, "loss": 2.3383, "step": 200890 }, { "epoch": 0.7766232159700639, "grad_norm": 0.10657903552055359, "learning_rate": 0.002, "loss": 2.3474, "step": 200900 }, { "epoch": 0.7766618731734471, "grad_norm": 0.0989794209599495, "learning_rate": 0.002, "loss": 2.3328, "step": 200910 }, { "epoch": 0.7767005303768304, "grad_norm": 0.10527005046606064, "learning_rate": 0.002, "loss": 2.349, "step": 200920 }, { "epoch": 0.7767391875802137, "grad_norm": 0.12138840556144714, "learning_rate": 0.002, "loss": 2.3473, "step": 200930 }, { "epoch": 0.776777844783597, "grad_norm": 0.10083303600549698, "learning_rate": 0.002, "loss": 2.3528, "step": 200940 }, { "epoch": 0.7768165019869803, "grad_norm": 0.09858760237693787, "learning_rate": 0.002, "loss": 2.3354, "step": 200950 }, { "epoch": 0.7768551591903635, "grad_norm": 0.1061653196811676, "learning_rate": 0.002, "loss": 2.3431, "step": 200960 }, { "epoch": 0.7768938163937468, "grad_norm": 0.09524355828762054, "learning_rate": 0.002, "loss": 2.3354, "step": 200970 }, { "epoch": 0.7769324735971301, "grad_norm": 0.1071450337767601, "learning_rate": 0.002, "loss": 2.34, "step": 200980 }, { "epoch": 0.7769711308005134, "grad_norm": 0.09246594458818436, "learning_rate": 0.002, "loss": 2.3581, "step": 200990 }, { "epoch": 0.7770097880038966, "grad_norm": 0.08741657435894012, "learning_rate": 0.002, "loss": 2.3437, "step": 201000 }, { "epoch": 0.7770484452072799, "grad_norm": 0.1069013699889183, "learning_rate": 0.002, "loss": 2.3515, "step": 201010 }, { "epoch": 0.7770871024106633, "grad_norm": 0.09173951297998428, "learning_rate": 0.002, "loss": 2.3424, "step": 201020 }, { "epoch": 0.7771257596140465, "grad_norm": 0.09130797535181046, "learning_rate": 0.002, "loss": 2.3431, "step": 201030 }, { "epoch": 0.7771644168174298, "grad_norm": 0.09466081857681274, "learning_rate": 0.002, "loss": 2.3454, "step": 201040 }, { "epoch": 0.777203074020813, "grad_norm": 0.11741433292627335, "learning_rate": 0.002, "loss": 2.3357, "step": 201050 }, { "epoch": 0.7772417312241963, "grad_norm": 0.09997253119945526, "learning_rate": 0.002, "loss": 2.3345, "step": 201060 }, { "epoch": 0.7772803884275796, "grad_norm": 0.0999874547123909, "learning_rate": 0.002, "loss": 2.3335, "step": 201070 }, { "epoch": 0.7773190456309629, "grad_norm": 0.11438187211751938, "learning_rate": 0.002, "loss": 2.3426, "step": 201080 }, { "epoch": 0.7773577028343461, "grad_norm": 0.19451777637004852, "learning_rate": 0.002, "loss": 2.335, "step": 201090 }, { "epoch": 0.7773963600377294, "grad_norm": 0.10860533267259598, "learning_rate": 0.002, "loss": 2.336, "step": 201100 }, { "epoch": 0.7774350172411127, "grad_norm": 0.1131187304854393, "learning_rate": 0.002, "loss": 2.3426, "step": 201110 }, { "epoch": 0.777473674444496, "grad_norm": 0.10684671252965927, "learning_rate": 0.002, "loss": 2.3421, "step": 201120 }, { "epoch": 0.7775123316478793, "grad_norm": 0.10805738717317581, "learning_rate": 0.002, "loss": 2.3339, "step": 201130 }, { "epoch": 0.7775509888512625, "grad_norm": 0.0960422232747078, "learning_rate": 0.002, "loss": 2.3507, "step": 201140 }, { "epoch": 0.7775896460546459, "grad_norm": 0.12220508605241776, "learning_rate": 0.002, "loss": 2.346, "step": 201150 }, { "epoch": 0.7776283032580291, "grad_norm": 0.09800433367490768, "learning_rate": 0.002, "loss": 2.3525, "step": 201160 }, { "epoch": 0.7776669604614124, "grad_norm": 0.09436652064323425, "learning_rate": 0.002, "loss": 2.3469, "step": 201170 }, { "epoch": 0.7777056176647956, "grad_norm": 0.0989791750907898, "learning_rate": 0.002, "loss": 2.3379, "step": 201180 }, { "epoch": 0.777744274868179, "grad_norm": 0.10169640928506851, "learning_rate": 0.002, "loss": 2.3296, "step": 201190 }, { "epoch": 0.7777829320715622, "grad_norm": 0.11647674441337585, "learning_rate": 0.002, "loss": 2.3492, "step": 201200 }, { "epoch": 0.7778215892749455, "grad_norm": 0.12052612751722336, "learning_rate": 0.002, "loss": 2.3317, "step": 201210 }, { "epoch": 0.7778602464783287, "grad_norm": 0.11180232465267181, "learning_rate": 0.002, "loss": 2.3507, "step": 201220 }, { "epoch": 0.777898903681712, "grad_norm": 0.09033545106649399, "learning_rate": 0.002, "loss": 2.3413, "step": 201230 }, { "epoch": 0.7779375608850954, "grad_norm": 0.10001803934574127, "learning_rate": 0.002, "loss": 2.3559, "step": 201240 }, { "epoch": 0.7779762180884786, "grad_norm": 0.1214684322476387, "learning_rate": 0.002, "loss": 2.347, "step": 201250 }, { "epoch": 0.7780148752918619, "grad_norm": 0.09808328002691269, "learning_rate": 0.002, "loss": 2.3445, "step": 201260 }, { "epoch": 0.7780535324952451, "grad_norm": 0.08842272311449051, "learning_rate": 0.002, "loss": 2.3208, "step": 201270 }, { "epoch": 0.7780921896986285, "grad_norm": 0.13397300243377686, "learning_rate": 0.002, "loss": 2.3377, "step": 201280 }, { "epoch": 0.7781308469020117, "grad_norm": 0.1140327900648117, "learning_rate": 0.002, "loss": 2.3482, "step": 201290 }, { "epoch": 0.778169504105395, "grad_norm": 0.09129363298416138, "learning_rate": 0.002, "loss": 2.3378, "step": 201300 }, { "epoch": 0.7782081613087782, "grad_norm": 0.10088945925235748, "learning_rate": 0.002, "loss": 2.3276, "step": 201310 }, { "epoch": 0.7782468185121616, "grad_norm": 0.09986527264118195, "learning_rate": 0.002, "loss": 2.334, "step": 201320 }, { "epoch": 0.7782854757155448, "grad_norm": 0.10828401148319244, "learning_rate": 0.002, "loss": 2.3469, "step": 201330 }, { "epoch": 0.7783241329189281, "grad_norm": 0.10116782784461975, "learning_rate": 0.002, "loss": 2.3395, "step": 201340 }, { "epoch": 0.7783627901223114, "grad_norm": 0.09193418174982071, "learning_rate": 0.002, "loss": 2.3369, "step": 201350 }, { "epoch": 0.7784014473256947, "grad_norm": 0.09904490411281586, "learning_rate": 0.002, "loss": 2.3439, "step": 201360 }, { "epoch": 0.778440104529078, "grad_norm": 0.09490885585546494, "learning_rate": 0.002, "loss": 2.3392, "step": 201370 }, { "epoch": 0.7784787617324612, "grad_norm": 0.13544167578220367, "learning_rate": 0.002, "loss": 2.3209, "step": 201380 }, { "epoch": 0.7785174189358445, "grad_norm": 0.10728796571493149, "learning_rate": 0.002, "loss": 2.3382, "step": 201390 }, { "epoch": 0.7785560761392278, "grad_norm": 0.0904412791132927, "learning_rate": 0.002, "loss": 2.3591, "step": 201400 }, { "epoch": 0.7785947333426111, "grad_norm": 0.1182757243514061, "learning_rate": 0.002, "loss": 2.3425, "step": 201410 }, { "epoch": 0.7786333905459943, "grad_norm": 0.10843978822231293, "learning_rate": 0.002, "loss": 2.3542, "step": 201420 }, { "epoch": 0.7786720477493776, "grad_norm": 0.09948401898145676, "learning_rate": 0.002, "loss": 2.3439, "step": 201430 }, { "epoch": 0.7787107049527608, "grad_norm": 0.10118865966796875, "learning_rate": 0.002, "loss": 2.3327, "step": 201440 }, { "epoch": 0.7787493621561442, "grad_norm": 0.12530890107154846, "learning_rate": 0.002, "loss": 2.3478, "step": 201450 }, { "epoch": 0.7787880193595275, "grad_norm": 0.11728890240192413, "learning_rate": 0.002, "loss": 2.3374, "step": 201460 }, { "epoch": 0.7788266765629107, "grad_norm": 0.10972714424133301, "learning_rate": 0.002, "loss": 2.346, "step": 201470 }, { "epoch": 0.778865333766294, "grad_norm": 0.10338211804628372, "learning_rate": 0.002, "loss": 2.344, "step": 201480 }, { "epoch": 0.7789039909696773, "grad_norm": 0.11228205263614655, "learning_rate": 0.002, "loss": 2.3379, "step": 201490 }, { "epoch": 0.7789426481730606, "grad_norm": 0.11558589339256287, "learning_rate": 0.002, "loss": 2.3459, "step": 201500 }, { "epoch": 0.7789813053764438, "grad_norm": 0.10174691677093506, "learning_rate": 0.002, "loss": 2.3414, "step": 201510 }, { "epoch": 0.7790199625798271, "grad_norm": 0.10404979437589645, "learning_rate": 0.002, "loss": 2.3357, "step": 201520 }, { "epoch": 0.7790586197832104, "grad_norm": 0.09674325585365295, "learning_rate": 0.002, "loss": 2.3495, "step": 201530 }, { "epoch": 0.7790972769865937, "grad_norm": 0.10181877762079239, "learning_rate": 0.002, "loss": 2.3561, "step": 201540 }, { "epoch": 0.779135934189977, "grad_norm": 0.10503356903791428, "learning_rate": 0.002, "loss": 2.3513, "step": 201550 }, { "epoch": 0.7791745913933602, "grad_norm": 0.10027583688497543, "learning_rate": 0.002, "loss": 2.3484, "step": 201560 }, { "epoch": 0.7792132485967436, "grad_norm": 0.09758977591991425, "learning_rate": 0.002, "loss": 2.3273, "step": 201570 }, { "epoch": 0.7792519058001268, "grad_norm": 0.10359928011894226, "learning_rate": 0.002, "loss": 2.3484, "step": 201580 }, { "epoch": 0.7792905630035101, "grad_norm": 0.1090666875243187, "learning_rate": 0.002, "loss": 2.35, "step": 201590 }, { "epoch": 0.7793292202068933, "grad_norm": 0.1132933497428894, "learning_rate": 0.002, "loss": 2.3292, "step": 201600 }, { "epoch": 0.7793678774102766, "grad_norm": 0.10195934027433395, "learning_rate": 0.002, "loss": 2.3512, "step": 201610 }, { "epoch": 0.7794065346136599, "grad_norm": 0.10770484060049057, "learning_rate": 0.002, "loss": 2.3289, "step": 201620 }, { "epoch": 0.7794451918170432, "grad_norm": 0.10276725143194199, "learning_rate": 0.002, "loss": 2.3313, "step": 201630 }, { "epoch": 0.7794838490204264, "grad_norm": 0.09507951885461807, "learning_rate": 0.002, "loss": 2.3324, "step": 201640 }, { "epoch": 0.7795225062238097, "grad_norm": 0.11635395884513855, "learning_rate": 0.002, "loss": 2.3245, "step": 201650 }, { "epoch": 0.7795611634271931, "grad_norm": 0.09882384538650513, "learning_rate": 0.002, "loss": 2.3327, "step": 201660 }, { "epoch": 0.7795998206305763, "grad_norm": 0.10508741438388824, "learning_rate": 0.002, "loss": 2.3196, "step": 201670 }, { "epoch": 0.7796384778339596, "grad_norm": 0.10111743956804276, "learning_rate": 0.002, "loss": 2.3297, "step": 201680 }, { "epoch": 0.7796771350373428, "grad_norm": 0.09746131300926208, "learning_rate": 0.002, "loss": 2.3311, "step": 201690 }, { "epoch": 0.7797157922407262, "grad_norm": 0.09846270829439163, "learning_rate": 0.002, "loss": 2.3359, "step": 201700 }, { "epoch": 0.7797544494441094, "grad_norm": 0.09761416912078857, "learning_rate": 0.002, "loss": 2.3208, "step": 201710 }, { "epoch": 0.7797931066474927, "grad_norm": 0.10397903621196747, "learning_rate": 0.002, "loss": 2.3493, "step": 201720 }, { "epoch": 0.7798317638508759, "grad_norm": 0.09711068868637085, "learning_rate": 0.002, "loss": 2.3415, "step": 201730 }, { "epoch": 0.7798704210542593, "grad_norm": 0.10659918934106827, "learning_rate": 0.002, "loss": 2.3378, "step": 201740 }, { "epoch": 0.7799090782576426, "grad_norm": 0.11457981914281845, "learning_rate": 0.002, "loss": 2.3439, "step": 201750 }, { "epoch": 0.7799477354610258, "grad_norm": 0.11186759173870087, "learning_rate": 0.002, "loss": 2.325, "step": 201760 }, { "epoch": 0.7799863926644091, "grad_norm": 0.08898892253637314, "learning_rate": 0.002, "loss": 2.3364, "step": 201770 }, { "epoch": 0.7800250498677924, "grad_norm": 0.09332229942083359, "learning_rate": 0.002, "loss": 2.3205, "step": 201780 }, { "epoch": 0.7800637070711757, "grad_norm": 0.1085081398487091, "learning_rate": 0.002, "loss": 2.3434, "step": 201790 }, { "epoch": 0.7801023642745589, "grad_norm": 0.12998148798942566, "learning_rate": 0.002, "loss": 2.3482, "step": 201800 }, { "epoch": 0.7801410214779422, "grad_norm": 0.1137484461069107, "learning_rate": 0.002, "loss": 2.3456, "step": 201810 }, { "epoch": 0.7801796786813254, "grad_norm": 0.10778406262397766, "learning_rate": 0.002, "loss": 2.3387, "step": 201820 }, { "epoch": 0.7802183358847088, "grad_norm": 0.10134479403495789, "learning_rate": 0.002, "loss": 2.3328, "step": 201830 }, { "epoch": 0.780256993088092, "grad_norm": 0.11462592333555222, "learning_rate": 0.002, "loss": 2.3504, "step": 201840 }, { "epoch": 0.7802956502914753, "grad_norm": 0.11337092518806458, "learning_rate": 0.002, "loss": 2.3384, "step": 201850 }, { "epoch": 0.7803343074948585, "grad_norm": 0.09721145778894424, "learning_rate": 0.002, "loss": 2.3461, "step": 201860 }, { "epoch": 0.7803729646982419, "grad_norm": 0.11457212269306183, "learning_rate": 0.002, "loss": 2.3447, "step": 201870 }, { "epoch": 0.7804116219016252, "grad_norm": 0.10253756493330002, "learning_rate": 0.002, "loss": 2.3464, "step": 201880 }, { "epoch": 0.7804502791050084, "grad_norm": 0.11434777081012726, "learning_rate": 0.002, "loss": 2.3432, "step": 201890 }, { "epoch": 0.7804889363083917, "grad_norm": 0.12110897898674011, "learning_rate": 0.002, "loss": 2.3519, "step": 201900 }, { "epoch": 0.780527593511775, "grad_norm": 0.11207719892263412, "learning_rate": 0.002, "loss": 2.3511, "step": 201910 }, { "epoch": 0.7805662507151583, "grad_norm": 0.11025089770555496, "learning_rate": 0.002, "loss": 2.3565, "step": 201920 }, { "epoch": 0.7806049079185415, "grad_norm": 0.11021070182323456, "learning_rate": 0.002, "loss": 2.3492, "step": 201930 }, { "epoch": 0.7806435651219248, "grad_norm": 0.1151655986905098, "learning_rate": 0.002, "loss": 2.3473, "step": 201940 }, { "epoch": 0.7806822223253082, "grad_norm": 0.10133794695138931, "learning_rate": 0.002, "loss": 2.3285, "step": 201950 }, { "epoch": 0.7807208795286914, "grad_norm": 0.11351914703845978, "learning_rate": 0.002, "loss": 2.3463, "step": 201960 }, { "epoch": 0.7807595367320747, "grad_norm": 0.09121467918157578, "learning_rate": 0.002, "loss": 2.3455, "step": 201970 }, { "epoch": 0.7807981939354579, "grad_norm": 0.1189068853855133, "learning_rate": 0.002, "loss": 2.3309, "step": 201980 }, { "epoch": 0.7808368511388412, "grad_norm": 0.11156963557004929, "learning_rate": 0.002, "loss": 2.3328, "step": 201990 }, { "epoch": 0.7808755083422245, "grad_norm": 0.0933595597743988, "learning_rate": 0.002, "loss": 2.3587, "step": 202000 }, { "epoch": 0.7809141655456078, "grad_norm": 0.10935278981924057, "learning_rate": 0.002, "loss": 2.3435, "step": 202010 }, { "epoch": 0.780952822748991, "grad_norm": 0.09758707880973816, "learning_rate": 0.002, "loss": 2.3417, "step": 202020 }, { "epoch": 0.7809914799523743, "grad_norm": 0.11436577141284943, "learning_rate": 0.002, "loss": 2.3456, "step": 202030 }, { "epoch": 0.7810301371557576, "grad_norm": 0.10427207499742508, "learning_rate": 0.002, "loss": 2.3445, "step": 202040 }, { "epoch": 0.7810687943591409, "grad_norm": 0.09066711366176605, "learning_rate": 0.002, "loss": 2.3449, "step": 202050 }, { "epoch": 0.7811074515625241, "grad_norm": 0.11137081682682037, "learning_rate": 0.002, "loss": 2.3436, "step": 202060 }, { "epoch": 0.7811461087659074, "grad_norm": 0.10201691091060638, "learning_rate": 0.002, "loss": 2.3495, "step": 202070 }, { "epoch": 0.7811847659692908, "grad_norm": 0.1071251705288887, "learning_rate": 0.002, "loss": 2.3336, "step": 202080 }, { "epoch": 0.781223423172674, "grad_norm": 0.10883677750825882, "learning_rate": 0.002, "loss": 2.3545, "step": 202090 }, { "epoch": 0.7812620803760573, "grad_norm": 0.13566423952579498, "learning_rate": 0.002, "loss": 2.3459, "step": 202100 }, { "epoch": 0.7813007375794405, "grad_norm": 0.09948530793190002, "learning_rate": 0.002, "loss": 2.344, "step": 202110 }, { "epoch": 0.7813393947828239, "grad_norm": 0.11490985006093979, "learning_rate": 0.002, "loss": 2.3421, "step": 202120 }, { "epoch": 0.7813780519862071, "grad_norm": 0.11161590367555618, "learning_rate": 0.002, "loss": 2.3512, "step": 202130 }, { "epoch": 0.7814167091895904, "grad_norm": 0.10396748036146164, "learning_rate": 0.002, "loss": 2.3267, "step": 202140 }, { "epoch": 0.7814553663929736, "grad_norm": 0.12490322440862656, "learning_rate": 0.002, "loss": 2.3408, "step": 202150 }, { "epoch": 0.7814940235963569, "grad_norm": 0.1008855327963829, "learning_rate": 0.002, "loss": 2.3515, "step": 202160 }, { "epoch": 0.7815326807997403, "grad_norm": 0.1047447994351387, "learning_rate": 0.002, "loss": 2.3258, "step": 202170 }, { "epoch": 0.7815713380031235, "grad_norm": 0.10873875766992569, "learning_rate": 0.002, "loss": 2.339, "step": 202180 }, { "epoch": 0.7816099952065068, "grad_norm": 0.12432877719402313, "learning_rate": 0.002, "loss": 2.3387, "step": 202190 }, { "epoch": 0.78164865240989, "grad_norm": 0.11288367211818695, "learning_rate": 0.002, "loss": 2.3452, "step": 202200 }, { "epoch": 0.7816873096132734, "grad_norm": 0.1022430807352066, "learning_rate": 0.002, "loss": 2.3378, "step": 202210 }, { "epoch": 0.7817259668166566, "grad_norm": 0.11609577387571335, "learning_rate": 0.002, "loss": 2.3291, "step": 202220 }, { "epoch": 0.7817646240200399, "grad_norm": 0.10611993074417114, "learning_rate": 0.002, "loss": 2.3323, "step": 202230 }, { "epoch": 0.7818032812234231, "grad_norm": 0.10908140987157822, "learning_rate": 0.002, "loss": 2.3395, "step": 202240 }, { "epoch": 0.7818419384268065, "grad_norm": 0.09165285527706146, "learning_rate": 0.002, "loss": 2.3348, "step": 202250 }, { "epoch": 0.7818805956301897, "grad_norm": 0.10701560974121094, "learning_rate": 0.002, "loss": 2.3345, "step": 202260 }, { "epoch": 0.781919252833573, "grad_norm": 0.11059151589870453, "learning_rate": 0.002, "loss": 2.3429, "step": 202270 }, { "epoch": 0.7819579100369562, "grad_norm": 0.10945714265108109, "learning_rate": 0.002, "loss": 2.3309, "step": 202280 }, { "epoch": 0.7819965672403396, "grad_norm": 0.0986083596944809, "learning_rate": 0.002, "loss": 2.3445, "step": 202290 }, { "epoch": 0.7820352244437229, "grad_norm": 0.10011819005012512, "learning_rate": 0.002, "loss": 2.344, "step": 202300 }, { "epoch": 0.7820738816471061, "grad_norm": 0.10819520801305771, "learning_rate": 0.002, "loss": 2.3313, "step": 202310 }, { "epoch": 0.7821125388504894, "grad_norm": 0.10630880296230316, "learning_rate": 0.002, "loss": 2.3352, "step": 202320 }, { "epoch": 0.7821511960538727, "grad_norm": 0.12038673460483551, "learning_rate": 0.002, "loss": 2.338, "step": 202330 }, { "epoch": 0.782189853257256, "grad_norm": 0.11459699273109436, "learning_rate": 0.002, "loss": 2.3417, "step": 202340 }, { "epoch": 0.7822285104606392, "grad_norm": 0.10157457739114761, "learning_rate": 0.002, "loss": 2.3345, "step": 202350 }, { "epoch": 0.7822671676640225, "grad_norm": 0.09755659103393555, "learning_rate": 0.002, "loss": 2.3452, "step": 202360 }, { "epoch": 0.7823058248674057, "grad_norm": 0.09254967421293259, "learning_rate": 0.002, "loss": 2.3373, "step": 202370 }, { "epoch": 0.7823444820707891, "grad_norm": 0.09767594188451767, "learning_rate": 0.002, "loss": 2.3546, "step": 202380 }, { "epoch": 0.7823831392741724, "grad_norm": 0.14414367079734802, "learning_rate": 0.002, "loss": 2.3293, "step": 202390 }, { "epoch": 0.7824217964775556, "grad_norm": 0.10989902168512344, "learning_rate": 0.002, "loss": 2.3292, "step": 202400 }, { "epoch": 0.7824604536809389, "grad_norm": 0.10024165362119675, "learning_rate": 0.002, "loss": 2.3382, "step": 202410 }, { "epoch": 0.7824991108843222, "grad_norm": 0.10078973323106766, "learning_rate": 0.002, "loss": 2.3267, "step": 202420 }, { "epoch": 0.7825377680877055, "grad_norm": 0.10243651270866394, "learning_rate": 0.002, "loss": 2.3609, "step": 202430 }, { "epoch": 0.7825764252910887, "grad_norm": 0.12386278808116913, "learning_rate": 0.002, "loss": 2.3357, "step": 202440 }, { "epoch": 0.782615082494472, "grad_norm": 0.09828358143568039, "learning_rate": 0.002, "loss": 2.3381, "step": 202450 }, { "epoch": 0.7826537396978553, "grad_norm": 0.10247497260570526, "learning_rate": 0.002, "loss": 2.3464, "step": 202460 }, { "epoch": 0.7826923969012386, "grad_norm": 0.1063644215464592, "learning_rate": 0.002, "loss": 2.3377, "step": 202470 }, { "epoch": 0.7827310541046218, "grad_norm": 0.08840660005807877, "learning_rate": 0.002, "loss": 2.3306, "step": 202480 }, { "epoch": 0.7827697113080051, "grad_norm": 0.10195198655128479, "learning_rate": 0.002, "loss": 2.3441, "step": 202490 }, { "epoch": 0.7828083685113885, "grad_norm": 0.119685597717762, "learning_rate": 0.002, "loss": 2.3567, "step": 202500 }, { "epoch": 0.7828470257147717, "grad_norm": 0.10883565992116928, "learning_rate": 0.002, "loss": 2.3362, "step": 202510 }, { "epoch": 0.782885682918155, "grad_norm": 0.10767433792352676, "learning_rate": 0.002, "loss": 2.3476, "step": 202520 }, { "epoch": 0.7829243401215382, "grad_norm": 0.09536431729793549, "learning_rate": 0.002, "loss": 2.3443, "step": 202530 }, { "epoch": 0.7829629973249215, "grad_norm": 0.09645789861679077, "learning_rate": 0.002, "loss": 2.3398, "step": 202540 }, { "epoch": 0.7830016545283048, "grad_norm": 0.09781122207641602, "learning_rate": 0.002, "loss": 2.3325, "step": 202550 }, { "epoch": 0.7830403117316881, "grad_norm": 0.11750921607017517, "learning_rate": 0.002, "loss": 2.3304, "step": 202560 }, { "epoch": 0.7830789689350713, "grad_norm": 0.12031945586204529, "learning_rate": 0.002, "loss": 2.3288, "step": 202570 }, { "epoch": 0.7831176261384546, "grad_norm": 0.13411380350589752, "learning_rate": 0.002, "loss": 2.3448, "step": 202580 }, { "epoch": 0.783156283341838, "grad_norm": 0.09357387572526932, "learning_rate": 0.002, "loss": 2.3236, "step": 202590 }, { "epoch": 0.7831949405452212, "grad_norm": 0.09378042072057724, "learning_rate": 0.002, "loss": 2.3297, "step": 202600 }, { "epoch": 0.7832335977486045, "grad_norm": 0.11569520086050034, "learning_rate": 0.002, "loss": 2.343, "step": 202610 }, { "epoch": 0.7832722549519877, "grad_norm": 0.09979037195444107, "learning_rate": 0.002, "loss": 2.3535, "step": 202620 }, { "epoch": 0.7833109121553711, "grad_norm": 0.0989861935377121, "learning_rate": 0.002, "loss": 2.3373, "step": 202630 }, { "epoch": 0.7833495693587543, "grad_norm": 0.12113470584154129, "learning_rate": 0.002, "loss": 2.3424, "step": 202640 }, { "epoch": 0.7833882265621376, "grad_norm": 0.09723356366157532, "learning_rate": 0.002, "loss": 2.349, "step": 202650 }, { "epoch": 0.7834268837655208, "grad_norm": 0.1004517674446106, "learning_rate": 0.002, "loss": 2.3592, "step": 202660 }, { "epoch": 0.7834655409689042, "grad_norm": 0.10730737447738647, "learning_rate": 0.002, "loss": 2.346, "step": 202670 }, { "epoch": 0.7835041981722874, "grad_norm": 0.10889331996440887, "learning_rate": 0.002, "loss": 2.3585, "step": 202680 }, { "epoch": 0.7835428553756707, "grad_norm": 0.12497900426387787, "learning_rate": 0.002, "loss": 2.3448, "step": 202690 }, { "epoch": 0.783581512579054, "grad_norm": 0.09737855941057205, "learning_rate": 0.002, "loss": 2.3405, "step": 202700 }, { "epoch": 0.7836201697824373, "grad_norm": 0.10107357054948807, "learning_rate": 0.002, "loss": 2.348, "step": 202710 }, { "epoch": 0.7836588269858206, "grad_norm": 0.11377954483032227, "learning_rate": 0.002, "loss": 2.334, "step": 202720 }, { "epoch": 0.7836974841892038, "grad_norm": 0.11078076809644699, "learning_rate": 0.002, "loss": 2.3453, "step": 202730 }, { "epoch": 0.7837361413925871, "grad_norm": 0.09624568372964859, "learning_rate": 0.002, "loss": 2.3479, "step": 202740 }, { "epoch": 0.7837747985959703, "grad_norm": 0.12164346873760223, "learning_rate": 0.002, "loss": 2.3464, "step": 202750 }, { "epoch": 0.7838134557993537, "grad_norm": 0.09853264689445496, "learning_rate": 0.002, "loss": 2.3377, "step": 202760 }, { "epoch": 0.7838521130027369, "grad_norm": 0.11185454577207565, "learning_rate": 0.002, "loss": 2.3158, "step": 202770 }, { "epoch": 0.7838907702061202, "grad_norm": 0.1107863038778305, "learning_rate": 0.002, "loss": 2.3522, "step": 202780 }, { "epoch": 0.7839294274095034, "grad_norm": 0.09015390276908875, "learning_rate": 0.002, "loss": 2.3555, "step": 202790 }, { "epoch": 0.7839680846128868, "grad_norm": 0.11195258796215057, "learning_rate": 0.002, "loss": 2.3363, "step": 202800 }, { "epoch": 0.7840067418162701, "grad_norm": 0.10910385102033615, "learning_rate": 0.002, "loss": 2.3393, "step": 202810 }, { "epoch": 0.7840453990196533, "grad_norm": 0.10820984840393066, "learning_rate": 0.002, "loss": 2.3469, "step": 202820 }, { "epoch": 0.7840840562230366, "grad_norm": 0.09601178765296936, "learning_rate": 0.002, "loss": 2.3318, "step": 202830 }, { "epoch": 0.7841227134264199, "grad_norm": 0.15023267269134521, "learning_rate": 0.002, "loss": 2.3478, "step": 202840 }, { "epoch": 0.7841613706298032, "grad_norm": 0.10086756944656372, "learning_rate": 0.002, "loss": 2.331, "step": 202850 }, { "epoch": 0.7842000278331864, "grad_norm": 0.09821043908596039, "learning_rate": 0.002, "loss": 2.3401, "step": 202860 }, { "epoch": 0.7842386850365697, "grad_norm": 0.13208691775798798, "learning_rate": 0.002, "loss": 2.3266, "step": 202870 }, { "epoch": 0.784277342239953, "grad_norm": 0.10310870409011841, "learning_rate": 0.002, "loss": 2.3445, "step": 202880 }, { "epoch": 0.7843159994433363, "grad_norm": 0.0981186106801033, "learning_rate": 0.002, "loss": 2.327, "step": 202890 }, { "epoch": 0.7843546566467196, "grad_norm": 0.12439095228910446, "learning_rate": 0.002, "loss": 2.3368, "step": 202900 }, { "epoch": 0.7843933138501028, "grad_norm": 0.10339083522558212, "learning_rate": 0.002, "loss": 2.3453, "step": 202910 }, { "epoch": 0.784431971053486, "grad_norm": 0.0957341343164444, "learning_rate": 0.002, "loss": 2.3331, "step": 202920 }, { "epoch": 0.7844706282568694, "grad_norm": 0.1126188188791275, "learning_rate": 0.002, "loss": 2.3414, "step": 202930 }, { "epoch": 0.7845092854602527, "grad_norm": 0.09723176062107086, "learning_rate": 0.002, "loss": 2.3379, "step": 202940 }, { "epoch": 0.7845479426636359, "grad_norm": 0.10407697409391403, "learning_rate": 0.002, "loss": 2.3263, "step": 202950 }, { "epoch": 0.7845865998670192, "grad_norm": 0.10393688082695007, "learning_rate": 0.002, "loss": 2.3319, "step": 202960 }, { "epoch": 0.7846252570704025, "grad_norm": 0.11379170417785645, "learning_rate": 0.002, "loss": 2.3553, "step": 202970 }, { "epoch": 0.7846639142737858, "grad_norm": 0.10667712986469269, "learning_rate": 0.002, "loss": 2.3282, "step": 202980 }, { "epoch": 0.784702571477169, "grad_norm": 0.09885899722576141, "learning_rate": 0.002, "loss": 2.3366, "step": 202990 }, { "epoch": 0.7847412286805523, "grad_norm": 0.11075273156166077, "learning_rate": 0.002, "loss": 2.345, "step": 203000 }, { "epoch": 0.7847798858839357, "grad_norm": 0.11645198613405228, "learning_rate": 0.002, "loss": 2.3458, "step": 203010 }, { "epoch": 0.7848185430873189, "grad_norm": 0.09348627924919128, "learning_rate": 0.002, "loss": 2.3401, "step": 203020 }, { "epoch": 0.7848572002907022, "grad_norm": 0.11713188886642456, "learning_rate": 0.002, "loss": 2.3259, "step": 203030 }, { "epoch": 0.7848958574940854, "grad_norm": 0.10126195847988129, "learning_rate": 0.002, "loss": 2.3445, "step": 203040 }, { "epoch": 0.7849345146974688, "grad_norm": 0.1038556843996048, "learning_rate": 0.002, "loss": 2.3419, "step": 203050 }, { "epoch": 0.784973171900852, "grad_norm": 0.10732719302177429, "learning_rate": 0.002, "loss": 2.3339, "step": 203060 }, { "epoch": 0.7850118291042353, "grad_norm": 0.117129385471344, "learning_rate": 0.002, "loss": 2.3364, "step": 203070 }, { "epoch": 0.7850504863076185, "grad_norm": 0.11160501092672348, "learning_rate": 0.002, "loss": 2.3346, "step": 203080 }, { "epoch": 0.7850891435110018, "grad_norm": 0.1068291887640953, "learning_rate": 0.002, "loss": 2.3416, "step": 203090 }, { "epoch": 0.7851278007143851, "grad_norm": 0.12483925372362137, "learning_rate": 0.002, "loss": 2.3238, "step": 203100 }, { "epoch": 0.7851664579177684, "grad_norm": 0.0880567654967308, "learning_rate": 0.002, "loss": 2.3426, "step": 203110 }, { "epoch": 0.7852051151211517, "grad_norm": 0.09908737242221832, "learning_rate": 0.002, "loss": 2.3528, "step": 203120 }, { "epoch": 0.7852437723245349, "grad_norm": 0.09003641456365585, "learning_rate": 0.002, "loss": 2.3294, "step": 203130 }, { "epoch": 0.7852824295279183, "grad_norm": 0.1001494899392128, "learning_rate": 0.002, "loss": 2.3342, "step": 203140 }, { "epoch": 0.7853210867313015, "grad_norm": 0.11012919247150421, "learning_rate": 0.002, "loss": 2.3238, "step": 203150 }, { "epoch": 0.7853597439346848, "grad_norm": 0.09928996860980988, "learning_rate": 0.002, "loss": 2.3382, "step": 203160 }, { "epoch": 0.785398401138068, "grad_norm": 0.08990038931369781, "learning_rate": 0.002, "loss": 2.3427, "step": 203170 }, { "epoch": 0.7854370583414514, "grad_norm": 0.10726006329059601, "learning_rate": 0.002, "loss": 2.3502, "step": 203180 }, { "epoch": 0.7854757155448346, "grad_norm": 0.1108621284365654, "learning_rate": 0.002, "loss": 2.3373, "step": 203190 }, { "epoch": 0.7855143727482179, "grad_norm": 0.0969405546784401, "learning_rate": 0.002, "loss": 2.337, "step": 203200 }, { "epoch": 0.7855530299516011, "grad_norm": 0.09812358021736145, "learning_rate": 0.002, "loss": 2.3555, "step": 203210 }, { "epoch": 0.7855916871549845, "grad_norm": 0.123712919652462, "learning_rate": 0.002, "loss": 2.3486, "step": 203220 }, { "epoch": 0.7856303443583678, "grad_norm": 0.12026149779558182, "learning_rate": 0.002, "loss": 2.3524, "step": 203230 }, { "epoch": 0.785669001561751, "grad_norm": 0.09507454186677933, "learning_rate": 0.002, "loss": 2.3232, "step": 203240 }, { "epoch": 0.7857076587651343, "grad_norm": 0.0988507866859436, "learning_rate": 0.002, "loss": 2.3601, "step": 203250 }, { "epoch": 0.7857463159685176, "grad_norm": 0.10693545639514923, "learning_rate": 0.002, "loss": 2.3384, "step": 203260 }, { "epoch": 0.7857849731719009, "grad_norm": 0.10580098628997803, "learning_rate": 0.002, "loss": 2.3146, "step": 203270 }, { "epoch": 0.7858236303752841, "grad_norm": 0.09059664607048035, "learning_rate": 0.002, "loss": 2.3299, "step": 203280 }, { "epoch": 0.7858622875786674, "grad_norm": 0.09549517184495926, "learning_rate": 0.002, "loss": 2.3403, "step": 203290 }, { "epoch": 0.7859009447820506, "grad_norm": 0.13090062141418457, "learning_rate": 0.002, "loss": 2.3498, "step": 203300 }, { "epoch": 0.785939601985434, "grad_norm": 0.10545851290225983, "learning_rate": 0.002, "loss": 2.3431, "step": 203310 }, { "epoch": 0.7859782591888173, "grad_norm": 0.10016728937625885, "learning_rate": 0.002, "loss": 2.3347, "step": 203320 }, { "epoch": 0.7860169163922005, "grad_norm": 0.09274963289499283, "learning_rate": 0.002, "loss": 2.3488, "step": 203330 }, { "epoch": 0.7860555735955838, "grad_norm": 0.11836186051368713, "learning_rate": 0.002, "loss": 2.3449, "step": 203340 }, { "epoch": 0.7860942307989671, "grad_norm": 0.10120458155870438, "learning_rate": 0.002, "loss": 2.3496, "step": 203350 }, { "epoch": 0.7861328880023504, "grad_norm": 0.09683175384998322, "learning_rate": 0.002, "loss": 2.3427, "step": 203360 }, { "epoch": 0.7861715452057336, "grad_norm": 0.09533454477787018, "learning_rate": 0.002, "loss": 2.3416, "step": 203370 }, { "epoch": 0.7862102024091169, "grad_norm": 0.11609603464603424, "learning_rate": 0.002, "loss": 2.339, "step": 203380 }, { "epoch": 0.7862488596125002, "grad_norm": 0.10426200181245804, "learning_rate": 0.002, "loss": 2.3529, "step": 203390 }, { "epoch": 0.7862875168158835, "grad_norm": 0.11988259851932526, "learning_rate": 0.002, "loss": 2.3353, "step": 203400 }, { "epoch": 0.7863261740192667, "grad_norm": 0.11909215897321701, "learning_rate": 0.002, "loss": 2.3403, "step": 203410 }, { "epoch": 0.78636483122265, "grad_norm": 0.1037282645702362, "learning_rate": 0.002, "loss": 2.3329, "step": 203420 }, { "epoch": 0.7864034884260334, "grad_norm": 0.10475391149520874, "learning_rate": 0.002, "loss": 2.3435, "step": 203430 }, { "epoch": 0.7864421456294166, "grad_norm": 0.12629707157611847, "learning_rate": 0.002, "loss": 2.347, "step": 203440 }, { "epoch": 0.7864808028327999, "grad_norm": 0.10719470679759979, "learning_rate": 0.002, "loss": 2.3276, "step": 203450 }, { "epoch": 0.7865194600361831, "grad_norm": 0.653010368347168, "learning_rate": 0.002, "loss": 2.3364, "step": 203460 }, { "epoch": 0.7865581172395664, "grad_norm": 0.11120946705341339, "learning_rate": 0.002, "loss": 2.3461, "step": 203470 }, { "epoch": 0.7865967744429497, "grad_norm": 0.10024162381887436, "learning_rate": 0.002, "loss": 2.3366, "step": 203480 }, { "epoch": 0.786635431646333, "grad_norm": 0.11460886150598526, "learning_rate": 0.002, "loss": 2.3403, "step": 203490 }, { "epoch": 0.7866740888497162, "grad_norm": 0.11218295991420746, "learning_rate": 0.002, "loss": 2.3363, "step": 203500 }, { "epoch": 0.7867127460530995, "grad_norm": 0.10510541498661041, "learning_rate": 0.002, "loss": 2.3455, "step": 203510 }, { "epoch": 0.7867514032564829, "grad_norm": 0.09944749623537064, "learning_rate": 0.002, "loss": 2.348, "step": 203520 }, { "epoch": 0.7867900604598661, "grad_norm": 0.100336953997612, "learning_rate": 0.002, "loss": 2.3355, "step": 203530 }, { "epoch": 0.7868287176632494, "grad_norm": 0.10724803060293198, "learning_rate": 0.002, "loss": 2.3376, "step": 203540 }, { "epoch": 0.7868673748666326, "grad_norm": 0.10677690804004669, "learning_rate": 0.002, "loss": 2.3419, "step": 203550 }, { "epoch": 0.786906032070016, "grad_norm": 0.09538012742996216, "learning_rate": 0.002, "loss": 2.3491, "step": 203560 }, { "epoch": 0.7869446892733992, "grad_norm": 0.11513350158929825, "learning_rate": 0.002, "loss": 2.3391, "step": 203570 }, { "epoch": 0.7869833464767825, "grad_norm": 0.0956110879778862, "learning_rate": 0.002, "loss": 2.3449, "step": 203580 }, { "epoch": 0.7870220036801657, "grad_norm": 0.10411123931407928, "learning_rate": 0.002, "loss": 2.3362, "step": 203590 }, { "epoch": 0.7870606608835491, "grad_norm": 0.1055002361536026, "learning_rate": 0.002, "loss": 2.3487, "step": 203600 }, { "epoch": 0.7870993180869323, "grad_norm": 0.12185431271791458, "learning_rate": 0.002, "loss": 2.3308, "step": 203610 }, { "epoch": 0.7871379752903156, "grad_norm": 0.10589416325092316, "learning_rate": 0.002, "loss": 2.3469, "step": 203620 }, { "epoch": 0.7871766324936988, "grad_norm": 0.11253327876329422, "learning_rate": 0.002, "loss": 2.3503, "step": 203630 }, { "epoch": 0.7872152896970821, "grad_norm": 0.11665678024291992, "learning_rate": 0.002, "loss": 2.3395, "step": 203640 }, { "epoch": 0.7872539469004655, "grad_norm": 0.09713021665811539, "learning_rate": 0.002, "loss": 2.341, "step": 203650 }, { "epoch": 0.7872926041038487, "grad_norm": 0.10635494440793991, "learning_rate": 0.002, "loss": 2.3502, "step": 203660 }, { "epoch": 0.787331261307232, "grad_norm": 0.14344890415668488, "learning_rate": 0.002, "loss": 2.3289, "step": 203670 }, { "epoch": 0.7873699185106152, "grad_norm": 0.10709847509860992, "learning_rate": 0.002, "loss": 2.3334, "step": 203680 }, { "epoch": 0.7874085757139986, "grad_norm": 0.10784828662872314, "learning_rate": 0.002, "loss": 2.3301, "step": 203690 }, { "epoch": 0.7874472329173818, "grad_norm": 0.29096582531929016, "learning_rate": 0.002, "loss": 2.3327, "step": 203700 }, { "epoch": 0.7874858901207651, "grad_norm": 0.10271017998456955, "learning_rate": 0.002, "loss": 2.3238, "step": 203710 }, { "epoch": 0.7875245473241483, "grad_norm": 0.10930080711841583, "learning_rate": 0.002, "loss": 2.3649, "step": 203720 }, { "epoch": 0.7875632045275317, "grad_norm": 0.08935771882534027, "learning_rate": 0.002, "loss": 2.3506, "step": 203730 }, { "epoch": 0.787601861730915, "grad_norm": 0.10123781859874725, "learning_rate": 0.002, "loss": 2.3516, "step": 203740 }, { "epoch": 0.7876405189342982, "grad_norm": 0.11019125580787659, "learning_rate": 0.002, "loss": 2.3288, "step": 203750 }, { "epoch": 0.7876791761376815, "grad_norm": 0.12791241705417633, "learning_rate": 0.002, "loss": 2.3332, "step": 203760 }, { "epoch": 0.7877178333410648, "grad_norm": 0.0956585705280304, "learning_rate": 0.002, "loss": 2.3455, "step": 203770 }, { "epoch": 0.7877564905444481, "grad_norm": 0.10439072549343109, "learning_rate": 0.002, "loss": 2.3326, "step": 203780 }, { "epoch": 0.7877951477478313, "grad_norm": 0.10286793112754822, "learning_rate": 0.002, "loss": 2.3471, "step": 203790 }, { "epoch": 0.7878338049512146, "grad_norm": 0.11320628225803375, "learning_rate": 0.002, "loss": 2.3373, "step": 203800 }, { "epoch": 0.7878724621545979, "grad_norm": 0.10071314871311188, "learning_rate": 0.002, "loss": 2.3324, "step": 203810 }, { "epoch": 0.7879111193579812, "grad_norm": 0.1389821320772171, "learning_rate": 0.002, "loss": 2.3414, "step": 203820 }, { "epoch": 0.7879497765613644, "grad_norm": 0.13283565640449524, "learning_rate": 0.002, "loss": 2.3495, "step": 203830 }, { "epoch": 0.7879884337647477, "grad_norm": 0.10520830750465393, "learning_rate": 0.002, "loss": 2.3324, "step": 203840 }, { "epoch": 0.788027090968131, "grad_norm": 0.11765392869710922, "learning_rate": 0.002, "loss": 2.3379, "step": 203850 }, { "epoch": 0.7880657481715143, "grad_norm": 0.12679457664489746, "learning_rate": 0.002, "loss": 2.3355, "step": 203860 }, { "epoch": 0.7881044053748976, "grad_norm": 0.10883157700300217, "learning_rate": 0.002, "loss": 2.3319, "step": 203870 }, { "epoch": 0.7881430625782808, "grad_norm": 0.1118154376745224, "learning_rate": 0.002, "loss": 2.3397, "step": 203880 }, { "epoch": 0.7881817197816641, "grad_norm": 0.09302463382482529, "learning_rate": 0.002, "loss": 2.3572, "step": 203890 }, { "epoch": 0.7882203769850474, "grad_norm": 0.12338177114725113, "learning_rate": 0.002, "loss": 2.3425, "step": 203900 }, { "epoch": 0.7882590341884307, "grad_norm": 0.11272131651639938, "learning_rate": 0.002, "loss": 2.3369, "step": 203910 }, { "epoch": 0.7882976913918139, "grad_norm": 0.10373769700527191, "learning_rate": 0.002, "loss": 2.343, "step": 203920 }, { "epoch": 0.7883363485951972, "grad_norm": 0.10257022827863693, "learning_rate": 0.002, "loss": 2.3208, "step": 203930 }, { "epoch": 0.7883750057985806, "grad_norm": 0.10867653042078018, "learning_rate": 0.002, "loss": 2.3409, "step": 203940 }, { "epoch": 0.7884136630019638, "grad_norm": 0.10311832278966904, "learning_rate": 0.002, "loss": 2.3504, "step": 203950 }, { "epoch": 0.7884523202053471, "grad_norm": 0.10829376429319382, "learning_rate": 0.002, "loss": 2.34, "step": 203960 }, { "epoch": 0.7884909774087303, "grad_norm": 0.11858326941728592, "learning_rate": 0.002, "loss": 2.3417, "step": 203970 }, { "epoch": 0.7885296346121137, "grad_norm": 0.1111568734049797, "learning_rate": 0.002, "loss": 2.335, "step": 203980 }, { "epoch": 0.7885682918154969, "grad_norm": 0.11772432923316956, "learning_rate": 0.002, "loss": 2.3469, "step": 203990 }, { "epoch": 0.7886069490188802, "grad_norm": 0.10340186208486557, "learning_rate": 0.002, "loss": 2.3507, "step": 204000 }, { "epoch": 0.7886456062222634, "grad_norm": 0.11137323826551437, "learning_rate": 0.002, "loss": 2.3528, "step": 204010 }, { "epoch": 0.7886842634256467, "grad_norm": 0.09535515308380127, "learning_rate": 0.002, "loss": 2.3378, "step": 204020 }, { "epoch": 0.78872292062903, "grad_norm": 0.10966328531503677, "learning_rate": 0.002, "loss": 2.331, "step": 204030 }, { "epoch": 0.7887615778324133, "grad_norm": 0.10557172447443008, "learning_rate": 0.002, "loss": 2.3287, "step": 204040 }, { "epoch": 0.7888002350357965, "grad_norm": 0.09890732169151306, "learning_rate": 0.002, "loss": 2.3414, "step": 204050 }, { "epoch": 0.7888388922391798, "grad_norm": 0.13068756461143494, "learning_rate": 0.002, "loss": 2.3439, "step": 204060 }, { "epoch": 0.7888775494425632, "grad_norm": 0.11536012589931488, "learning_rate": 0.002, "loss": 2.3505, "step": 204070 }, { "epoch": 0.7889162066459464, "grad_norm": 0.09284942597150803, "learning_rate": 0.002, "loss": 2.3228, "step": 204080 }, { "epoch": 0.7889548638493297, "grad_norm": 0.09574364870786667, "learning_rate": 0.002, "loss": 2.333, "step": 204090 }, { "epoch": 0.7889935210527129, "grad_norm": 0.10232340544462204, "learning_rate": 0.002, "loss": 2.3496, "step": 204100 }, { "epoch": 0.7890321782560963, "grad_norm": 0.12331674993038177, "learning_rate": 0.002, "loss": 2.3605, "step": 204110 }, { "epoch": 0.7890708354594795, "grad_norm": 0.10304983705282211, "learning_rate": 0.002, "loss": 2.3292, "step": 204120 }, { "epoch": 0.7891094926628628, "grad_norm": 0.11414653807878494, "learning_rate": 0.002, "loss": 2.3248, "step": 204130 }, { "epoch": 0.789148149866246, "grad_norm": 0.11107272654771805, "learning_rate": 0.002, "loss": 2.3521, "step": 204140 }, { "epoch": 0.7891868070696294, "grad_norm": 0.10653676092624664, "learning_rate": 0.002, "loss": 2.3591, "step": 204150 }, { "epoch": 0.7892254642730127, "grad_norm": 0.10755713284015656, "learning_rate": 0.002, "loss": 2.3537, "step": 204160 }, { "epoch": 0.7892641214763959, "grad_norm": 0.11351131647825241, "learning_rate": 0.002, "loss": 2.3606, "step": 204170 }, { "epoch": 0.7893027786797792, "grad_norm": 0.11735298484563828, "learning_rate": 0.002, "loss": 2.3386, "step": 204180 }, { "epoch": 0.7893414358831625, "grad_norm": 0.10693307220935822, "learning_rate": 0.002, "loss": 2.3448, "step": 204190 }, { "epoch": 0.7893800930865458, "grad_norm": 0.090082548558712, "learning_rate": 0.002, "loss": 2.3571, "step": 204200 }, { "epoch": 0.789418750289929, "grad_norm": 0.09632305055856705, "learning_rate": 0.002, "loss": 2.3262, "step": 204210 }, { "epoch": 0.7894574074933123, "grad_norm": 0.09143831580877304, "learning_rate": 0.002, "loss": 2.3419, "step": 204220 }, { "epoch": 0.7894960646966955, "grad_norm": 0.11290735006332397, "learning_rate": 0.002, "loss": 2.3434, "step": 204230 }, { "epoch": 0.7895347219000789, "grad_norm": 0.16610337793827057, "learning_rate": 0.002, "loss": 2.3291, "step": 204240 }, { "epoch": 0.7895733791034621, "grad_norm": 0.09449631720781326, "learning_rate": 0.002, "loss": 2.3436, "step": 204250 }, { "epoch": 0.7896120363068454, "grad_norm": 0.11649568378925323, "learning_rate": 0.002, "loss": 2.3481, "step": 204260 }, { "epoch": 0.7896506935102287, "grad_norm": 0.11000768840312958, "learning_rate": 0.002, "loss": 2.3364, "step": 204270 }, { "epoch": 0.789689350713612, "grad_norm": 0.09472779929637909, "learning_rate": 0.002, "loss": 2.3326, "step": 204280 }, { "epoch": 0.7897280079169953, "grad_norm": 0.1212298795580864, "learning_rate": 0.002, "loss": 2.3327, "step": 204290 }, { "epoch": 0.7897666651203785, "grad_norm": 0.10486337542533875, "learning_rate": 0.002, "loss": 2.3427, "step": 204300 }, { "epoch": 0.7898053223237618, "grad_norm": 0.12343774735927582, "learning_rate": 0.002, "loss": 2.3333, "step": 204310 }, { "epoch": 0.7898439795271451, "grad_norm": 0.10356009751558304, "learning_rate": 0.002, "loss": 2.3414, "step": 204320 }, { "epoch": 0.7898826367305284, "grad_norm": 0.11104033887386322, "learning_rate": 0.002, "loss": 2.3413, "step": 204330 }, { "epoch": 0.7899212939339116, "grad_norm": 0.10338528454303741, "learning_rate": 0.002, "loss": 2.3439, "step": 204340 }, { "epoch": 0.7899599511372949, "grad_norm": 0.11514335125684738, "learning_rate": 0.002, "loss": 2.3353, "step": 204350 }, { "epoch": 0.7899986083406783, "grad_norm": 0.10399248450994492, "learning_rate": 0.002, "loss": 2.3575, "step": 204360 }, { "epoch": 0.7900372655440615, "grad_norm": 0.08885734528303146, "learning_rate": 0.002, "loss": 2.3427, "step": 204370 }, { "epoch": 0.7900759227474448, "grad_norm": 0.11299405992031097, "learning_rate": 0.002, "loss": 2.3364, "step": 204380 }, { "epoch": 0.790114579950828, "grad_norm": 0.10836345702409744, "learning_rate": 0.002, "loss": 2.3391, "step": 204390 }, { "epoch": 0.7901532371542113, "grad_norm": 0.11612921208143234, "learning_rate": 0.002, "loss": 2.3416, "step": 204400 }, { "epoch": 0.7901918943575946, "grad_norm": 0.10149534791707993, "learning_rate": 0.002, "loss": 2.3488, "step": 204410 }, { "epoch": 0.7902305515609779, "grad_norm": 0.08733243495225906, "learning_rate": 0.002, "loss": 2.3519, "step": 204420 }, { "epoch": 0.7902692087643611, "grad_norm": 0.10776301473379135, "learning_rate": 0.002, "loss": 2.3266, "step": 204430 }, { "epoch": 0.7903078659677444, "grad_norm": 0.11737163364887238, "learning_rate": 0.002, "loss": 2.3376, "step": 204440 }, { "epoch": 0.7903465231711277, "grad_norm": 0.09715598076581955, "learning_rate": 0.002, "loss": 2.3386, "step": 204450 }, { "epoch": 0.790385180374511, "grad_norm": 0.11796846240758896, "learning_rate": 0.002, "loss": 2.3414, "step": 204460 }, { "epoch": 0.7904238375778943, "grad_norm": 0.09173443913459778, "learning_rate": 0.002, "loss": 2.3424, "step": 204470 }, { "epoch": 0.7904624947812775, "grad_norm": 0.09172165393829346, "learning_rate": 0.002, "loss": 2.3412, "step": 204480 }, { "epoch": 0.7905011519846609, "grad_norm": 0.13926003873348236, "learning_rate": 0.002, "loss": 2.3436, "step": 204490 }, { "epoch": 0.7905398091880441, "grad_norm": 0.11290960013866425, "learning_rate": 0.002, "loss": 2.3457, "step": 204500 }, { "epoch": 0.7905784663914274, "grad_norm": 0.1287475973367691, "learning_rate": 0.002, "loss": 2.3537, "step": 204510 }, { "epoch": 0.7906171235948106, "grad_norm": 0.12370341271162033, "learning_rate": 0.002, "loss": 2.3259, "step": 204520 }, { "epoch": 0.790655780798194, "grad_norm": 0.09490542113780975, "learning_rate": 0.002, "loss": 2.3449, "step": 204530 }, { "epoch": 0.7906944380015772, "grad_norm": 0.0927913635969162, "learning_rate": 0.002, "loss": 2.337, "step": 204540 }, { "epoch": 0.7907330952049605, "grad_norm": 0.11680828034877777, "learning_rate": 0.002, "loss": 2.3378, "step": 204550 }, { "epoch": 0.7907717524083437, "grad_norm": 0.09189026057720184, "learning_rate": 0.002, "loss": 2.3335, "step": 204560 }, { "epoch": 0.790810409611727, "grad_norm": 0.10120948404073715, "learning_rate": 0.002, "loss": 2.3472, "step": 204570 }, { "epoch": 0.7908490668151104, "grad_norm": 0.12998700141906738, "learning_rate": 0.002, "loss": 2.3471, "step": 204580 }, { "epoch": 0.7908877240184936, "grad_norm": 0.09915055334568024, "learning_rate": 0.002, "loss": 2.3285, "step": 204590 }, { "epoch": 0.7909263812218769, "grad_norm": 0.10773010551929474, "learning_rate": 0.002, "loss": 2.3321, "step": 204600 }, { "epoch": 0.7909650384252601, "grad_norm": 0.11004921048879623, "learning_rate": 0.002, "loss": 2.3277, "step": 204610 }, { "epoch": 0.7910036956286435, "grad_norm": 0.1033116951584816, "learning_rate": 0.002, "loss": 2.3442, "step": 204620 }, { "epoch": 0.7910423528320267, "grad_norm": 0.10842995345592499, "learning_rate": 0.002, "loss": 2.3249, "step": 204630 }, { "epoch": 0.79108101003541, "grad_norm": 0.1112065464258194, "learning_rate": 0.002, "loss": 2.3395, "step": 204640 }, { "epoch": 0.7911196672387932, "grad_norm": 0.10177678614854813, "learning_rate": 0.002, "loss": 2.3278, "step": 204650 }, { "epoch": 0.7911583244421766, "grad_norm": 0.11695540696382523, "learning_rate": 0.002, "loss": 2.341, "step": 204660 }, { "epoch": 0.7911969816455598, "grad_norm": 0.11951947212219238, "learning_rate": 0.002, "loss": 2.3438, "step": 204670 }, { "epoch": 0.7912356388489431, "grad_norm": 0.08816482871770859, "learning_rate": 0.002, "loss": 2.3253, "step": 204680 }, { "epoch": 0.7912742960523264, "grad_norm": 0.10402724891901016, "learning_rate": 0.002, "loss": 2.3376, "step": 204690 }, { "epoch": 0.7913129532557097, "grad_norm": 0.11488895863294601, "learning_rate": 0.002, "loss": 2.3386, "step": 204700 }, { "epoch": 0.791351610459093, "grad_norm": 0.10502402484416962, "learning_rate": 0.002, "loss": 2.3392, "step": 204710 }, { "epoch": 0.7913902676624762, "grad_norm": 0.08945094794034958, "learning_rate": 0.002, "loss": 2.3397, "step": 204720 }, { "epoch": 0.7914289248658595, "grad_norm": 0.09670726954936981, "learning_rate": 0.002, "loss": 2.3327, "step": 204730 }, { "epoch": 0.7914675820692428, "grad_norm": 0.10783292353153229, "learning_rate": 0.002, "loss": 2.3281, "step": 204740 }, { "epoch": 0.7915062392726261, "grad_norm": 0.10084598511457443, "learning_rate": 0.002, "loss": 2.3425, "step": 204750 }, { "epoch": 0.7915448964760093, "grad_norm": 0.10058614611625671, "learning_rate": 0.002, "loss": 2.3281, "step": 204760 }, { "epoch": 0.7915835536793926, "grad_norm": 0.10897661000490189, "learning_rate": 0.002, "loss": 2.3351, "step": 204770 }, { "epoch": 0.7916222108827758, "grad_norm": 0.11623561382293701, "learning_rate": 0.002, "loss": 2.3368, "step": 204780 }, { "epoch": 0.7916608680861592, "grad_norm": 0.10288413614034653, "learning_rate": 0.002, "loss": 2.3387, "step": 204790 }, { "epoch": 0.7916995252895425, "grad_norm": 0.10230749100446701, "learning_rate": 0.002, "loss": 2.3433, "step": 204800 }, { "epoch": 0.7917381824929257, "grad_norm": 0.11927718669176102, "learning_rate": 0.002, "loss": 2.3367, "step": 204810 }, { "epoch": 0.791776839696309, "grad_norm": 0.1255546510219574, "learning_rate": 0.002, "loss": 2.3472, "step": 204820 }, { "epoch": 0.7918154968996923, "grad_norm": 0.10689831525087357, "learning_rate": 0.002, "loss": 2.3458, "step": 204830 }, { "epoch": 0.7918541541030756, "grad_norm": 0.09706177562475204, "learning_rate": 0.002, "loss": 2.3425, "step": 204840 }, { "epoch": 0.7918928113064588, "grad_norm": 0.10006806999444962, "learning_rate": 0.002, "loss": 2.3396, "step": 204850 }, { "epoch": 0.7919314685098421, "grad_norm": 0.09978881478309631, "learning_rate": 0.002, "loss": 2.3261, "step": 204860 }, { "epoch": 0.7919701257132254, "grad_norm": 0.09379451721906662, "learning_rate": 0.002, "loss": 2.341, "step": 204870 }, { "epoch": 0.7920087829166087, "grad_norm": 0.10791292041540146, "learning_rate": 0.002, "loss": 2.3389, "step": 204880 }, { "epoch": 0.792047440119992, "grad_norm": 0.1182226613163948, "learning_rate": 0.002, "loss": 2.349, "step": 204890 }, { "epoch": 0.7920860973233752, "grad_norm": 0.09794960916042328, "learning_rate": 0.002, "loss": 2.3454, "step": 204900 }, { "epoch": 0.7921247545267586, "grad_norm": 0.10860683768987656, "learning_rate": 0.002, "loss": 2.3346, "step": 204910 }, { "epoch": 0.7921634117301418, "grad_norm": 0.10471311956644058, "learning_rate": 0.002, "loss": 2.3391, "step": 204920 }, { "epoch": 0.7922020689335251, "grad_norm": 0.1075335219502449, "learning_rate": 0.002, "loss": 2.3589, "step": 204930 }, { "epoch": 0.7922407261369083, "grad_norm": 0.1021302193403244, "learning_rate": 0.002, "loss": 2.3286, "step": 204940 }, { "epoch": 0.7922793833402916, "grad_norm": 0.10729537904262543, "learning_rate": 0.002, "loss": 2.3452, "step": 204950 }, { "epoch": 0.7923180405436749, "grad_norm": 0.10054760426282883, "learning_rate": 0.002, "loss": 2.3381, "step": 204960 }, { "epoch": 0.7923566977470582, "grad_norm": 0.10623308271169662, "learning_rate": 0.002, "loss": 2.3438, "step": 204970 }, { "epoch": 0.7923953549504414, "grad_norm": 0.1051439717411995, "learning_rate": 0.002, "loss": 2.3386, "step": 204980 }, { "epoch": 0.7924340121538247, "grad_norm": 0.09262803196907043, "learning_rate": 0.002, "loss": 2.3258, "step": 204990 }, { "epoch": 0.7924726693572081, "grad_norm": 0.10029026865959167, "learning_rate": 0.002, "loss": 2.3344, "step": 205000 }, { "epoch": 0.7925113265605913, "grad_norm": 0.1111687645316124, "learning_rate": 0.002, "loss": 2.3443, "step": 205010 }, { "epoch": 0.7925499837639746, "grad_norm": 0.09792038798332214, "learning_rate": 0.002, "loss": 2.3426, "step": 205020 }, { "epoch": 0.7925886409673578, "grad_norm": 0.12564076483249664, "learning_rate": 0.002, "loss": 2.3408, "step": 205030 }, { "epoch": 0.7926272981707412, "grad_norm": 0.12421346455812454, "learning_rate": 0.002, "loss": 2.3348, "step": 205040 }, { "epoch": 0.7926659553741244, "grad_norm": 0.11315683275461197, "learning_rate": 0.002, "loss": 2.3461, "step": 205050 }, { "epoch": 0.7927046125775077, "grad_norm": 0.11583825200796127, "learning_rate": 0.002, "loss": 2.343, "step": 205060 }, { "epoch": 0.7927432697808909, "grad_norm": 0.09638478606939316, "learning_rate": 0.002, "loss": 2.3429, "step": 205070 }, { "epoch": 0.7927819269842743, "grad_norm": 0.10320941358804703, "learning_rate": 0.002, "loss": 2.3293, "step": 205080 }, { "epoch": 0.7928205841876576, "grad_norm": 0.1072765588760376, "learning_rate": 0.002, "loss": 2.3275, "step": 205090 }, { "epoch": 0.7928592413910408, "grad_norm": 0.09241588413715363, "learning_rate": 0.002, "loss": 2.3521, "step": 205100 }, { "epoch": 0.792897898594424, "grad_norm": 0.09480182081460953, "learning_rate": 0.002, "loss": 2.3349, "step": 205110 }, { "epoch": 0.7929365557978074, "grad_norm": 0.10750217735767365, "learning_rate": 0.002, "loss": 2.3462, "step": 205120 }, { "epoch": 0.7929752130011907, "grad_norm": 0.10552404820919037, "learning_rate": 0.002, "loss": 2.3436, "step": 205130 }, { "epoch": 0.7930138702045739, "grad_norm": 0.17118145525455475, "learning_rate": 0.002, "loss": 2.348, "step": 205140 }, { "epoch": 0.7930525274079572, "grad_norm": 0.20626536011695862, "learning_rate": 0.002, "loss": 2.3589, "step": 205150 }, { "epoch": 0.7930911846113404, "grad_norm": 0.12056166678667068, "learning_rate": 0.002, "loss": 2.347, "step": 205160 }, { "epoch": 0.7931298418147238, "grad_norm": 0.34020501375198364, "learning_rate": 0.002, "loss": 2.3391, "step": 205170 }, { "epoch": 0.793168499018107, "grad_norm": 0.10309938341379166, "learning_rate": 0.002, "loss": 2.3423, "step": 205180 }, { "epoch": 0.7932071562214903, "grad_norm": 0.10739479959011078, "learning_rate": 0.002, "loss": 2.3489, "step": 205190 }, { "epoch": 0.7932458134248735, "grad_norm": 0.11030566692352295, "learning_rate": 0.002, "loss": 2.3259, "step": 205200 }, { "epoch": 0.7932844706282569, "grad_norm": 0.1024133712053299, "learning_rate": 0.002, "loss": 2.3601, "step": 205210 }, { "epoch": 0.7933231278316402, "grad_norm": 0.09965585917234421, "learning_rate": 0.002, "loss": 2.3427, "step": 205220 }, { "epoch": 0.7933617850350234, "grad_norm": 0.10700208693742752, "learning_rate": 0.002, "loss": 2.3311, "step": 205230 }, { "epoch": 0.7934004422384067, "grad_norm": 0.11820707470178604, "learning_rate": 0.002, "loss": 2.3414, "step": 205240 }, { "epoch": 0.79343909944179, "grad_norm": 0.0988406091928482, "learning_rate": 0.002, "loss": 2.3459, "step": 205250 }, { "epoch": 0.7934777566451733, "grad_norm": 0.0951349139213562, "learning_rate": 0.002, "loss": 2.3237, "step": 205260 }, { "epoch": 0.7935164138485565, "grad_norm": 0.09339561313390732, "learning_rate": 0.002, "loss": 2.3212, "step": 205270 }, { "epoch": 0.7935550710519398, "grad_norm": 0.11482594907283783, "learning_rate": 0.002, "loss": 2.3542, "step": 205280 }, { "epoch": 0.7935937282553231, "grad_norm": 0.12239178270101547, "learning_rate": 0.002, "loss": 2.3448, "step": 205290 }, { "epoch": 0.7936323854587064, "grad_norm": 0.09025915712118149, "learning_rate": 0.002, "loss": 2.3267, "step": 205300 }, { "epoch": 0.7936710426620897, "grad_norm": 0.11617813259363174, "learning_rate": 0.002, "loss": 2.353, "step": 205310 }, { "epoch": 0.7937096998654729, "grad_norm": 0.1109839677810669, "learning_rate": 0.002, "loss": 2.3382, "step": 205320 }, { "epoch": 0.7937483570688562, "grad_norm": 0.10583146661520004, "learning_rate": 0.002, "loss": 2.3397, "step": 205330 }, { "epoch": 0.7937870142722395, "grad_norm": 0.10988876223564148, "learning_rate": 0.002, "loss": 2.3315, "step": 205340 }, { "epoch": 0.7938256714756228, "grad_norm": 0.09644675254821777, "learning_rate": 0.002, "loss": 2.3307, "step": 205350 }, { "epoch": 0.793864328679006, "grad_norm": 0.10545558482408524, "learning_rate": 0.002, "loss": 2.3328, "step": 205360 }, { "epoch": 0.7939029858823893, "grad_norm": 0.09953219443559647, "learning_rate": 0.002, "loss": 2.3211, "step": 205370 }, { "epoch": 0.7939416430857726, "grad_norm": 0.12311132997274399, "learning_rate": 0.002, "loss": 2.3374, "step": 205380 }, { "epoch": 0.7939803002891559, "grad_norm": 0.10079408437013626, "learning_rate": 0.002, "loss": 2.3425, "step": 205390 }, { "epoch": 0.7940189574925391, "grad_norm": 0.10783235728740692, "learning_rate": 0.002, "loss": 2.3426, "step": 205400 }, { "epoch": 0.7940576146959224, "grad_norm": 0.10919714719057083, "learning_rate": 0.002, "loss": 2.34, "step": 205410 }, { "epoch": 0.7940962718993058, "grad_norm": 0.11178895831108093, "learning_rate": 0.002, "loss": 2.3473, "step": 205420 }, { "epoch": 0.794134929102689, "grad_norm": 0.10486460477113724, "learning_rate": 0.002, "loss": 2.3405, "step": 205430 }, { "epoch": 0.7941735863060723, "grad_norm": 0.11997535824775696, "learning_rate": 0.002, "loss": 2.3485, "step": 205440 }, { "epoch": 0.7942122435094555, "grad_norm": 0.10198147594928741, "learning_rate": 0.002, "loss": 2.3315, "step": 205450 }, { "epoch": 0.7942509007128389, "grad_norm": 0.09956075251102448, "learning_rate": 0.002, "loss": 2.3379, "step": 205460 }, { "epoch": 0.7942895579162221, "grad_norm": 0.13137564063072205, "learning_rate": 0.002, "loss": 2.3411, "step": 205470 }, { "epoch": 0.7943282151196054, "grad_norm": 0.09812214970588684, "learning_rate": 0.002, "loss": 2.3239, "step": 205480 }, { "epoch": 0.7943668723229886, "grad_norm": 0.11089440435171127, "learning_rate": 0.002, "loss": 2.3351, "step": 205490 }, { "epoch": 0.7944055295263719, "grad_norm": 0.11397991329431534, "learning_rate": 0.002, "loss": 2.3381, "step": 205500 }, { "epoch": 0.7944441867297553, "grad_norm": 0.20118115842342377, "learning_rate": 0.002, "loss": 2.336, "step": 205510 }, { "epoch": 0.7944828439331385, "grad_norm": 0.11045759916305542, "learning_rate": 0.002, "loss": 2.337, "step": 205520 }, { "epoch": 0.7945215011365218, "grad_norm": 0.11609234660863876, "learning_rate": 0.002, "loss": 2.3331, "step": 205530 }, { "epoch": 0.794560158339905, "grad_norm": 0.10128235816955566, "learning_rate": 0.002, "loss": 2.3387, "step": 205540 }, { "epoch": 0.7945988155432884, "grad_norm": 0.12456586211919785, "learning_rate": 0.002, "loss": 2.3495, "step": 205550 }, { "epoch": 0.7946374727466716, "grad_norm": 0.12482677400112152, "learning_rate": 0.002, "loss": 2.3277, "step": 205560 }, { "epoch": 0.7946761299500549, "grad_norm": 0.09432988613843918, "learning_rate": 0.002, "loss": 2.3365, "step": 205570 }, { "epoch": 0.7947147871534381, "grad_norm": 0.1161578968167305, "learning_rate": 0.002, "loss": 2.3659, "step": 205580 }, { "epoch": 0.7947534443568215, "grad_norm": 0.09937795251607895, "learning_rate": 0.002, "loss": 2.3361, "step": 205590 }, { "epoch": 0.7947921015602047, "grad_norm": 0.11066994071006775, "learning_rate": 0.002, "loss": 2.3428, "step": 205600 }, { "epoch": 0.794830758763588, "grad_norm": 0.12263431400060654, "learning_rate": 0.002, "loss": 2.343, "step": 205610 }, { "epoch": 0.7948694159669712, "grad_norm": 0.11063994467258453, "learning_rate": 0.002, "loss": 2.3382, "step": 205620 }, { "epoch": 0.7949080731703546, "grad_norm": 0.11227592080831528, "learning_rate": 0.002, "loss": 2.3462, "step": 205630 }, { "epoch": 0.7949467303737379, "grad_norm": 0.11528632044792175, "learning_rate": 0.002, "loss": 2.3517, "step": 205640 }, { "epoch": 0.7949853875771211, "grad_norm": 0.12380699068307877, "learning_rate": 0.002, "loss": 2.3557, "step": 205650 }, { "epoch": 0.7950240447805044, "grad_norm": 0.10481059551239014, "learning_rate": 0.002, "loss": 2.3318, "step": 205660 }, { "epoch": 0.7950627019838877, "grad_norm": 0.11022672802209854, "learning_rate": 0.002, "loss": 2.3556, "step": 205670 }, { "epoch": 0.795101359187271, "grad_norm": 0.1075207069516182, "learning_rate": 0.002, "loss": 2.3329, "step": 205680 }, { "epoch": 0.7951400163906542, "grad_norm": 0.10176742076873779, "learning_rate": 0.002, "loss": 2.3368, "step": 205690 }, { "epoch": 0.7951786735940375, "grad_norm": 0.10005369782447815, "learning_rate": 0.002, "loss": 2.3355, "step": 205700 }, { "epoch": 0.7952173307974207, "grad_norm": 0.11575303226709366, "learning_rate": 0.002, "loss": 2.3419, "step": 205710 }, { "epoch": 0.7952559880008041, "grad_norm": 0.10827762633562088, "learning_rate": 0.002, "loss": 2.3345, "step": 205720 }, { "epoch": 0.7952946452041874, "grad_norm": 0.0917559415102005, "learning_rate": 0.002, "loss": 2.3344, "step": 205730 }, { "epoch": 0.7953333024075706, "grad_norm": 0.10644976794719696, "learning_rate": 0.002, "loss": 2.3418, "step": 205740 }, { "epoch": 0.7953719596109539, "grad_norm": 0.11294444650411606, "learning_rate": 0.002, "loss": 2.3463, "step": 205750 }, { "epoch": 0.7954106168143372, "grad_norm": 0.0953550785779953, "learning_rate": 0.002, "loss": 2.3165, "step": 205760 }, { "epoch": 0.7954492740177205, "grad_norm": 0.11978161334991455, "learning_rate": 0.002, "loss": 2.3498, "step": 205770 }, { "epoch": 0.7954879312211037, "grad_norm": 0.10544034093618393, "learning_rate": 0.002, "loss": 2.3359, "step": 205780 }, { "epoch": 0.795526588424487, "grad_norm": 0.09561526030302048, "learning_rate": 0.002, "loss": 2.3309, "step": 205790 }, { "epoch": 0.7955652456278703, "grad_norm": 0.09656016528606415, "learning_rate": 0.002, "loss": 2.3409, "step": 205800 }, { "epoch": 0.7956039028312536, "grad_norm": 0.10788822174072266, "learning_rate": 0.002, "loss": 2.3347, "step": 205810 }, { "epoch": 0.7956425600346368, "grad_norm": 0.10486281663179398, "learning_rate": 0.002, "loss": 2.3396, "step": 205820 }, { "epoch": 0.7956812172380201, "grad_norm": 0.1045752465724945, "learning_rate": 0.002, "loss": 2.3386, "step": 205830 }, { "epoch": 0.7957198744414035, "grad_norm": 0.09988079220056534, "learning_rate": 0.002, "loss": 2.3485, "step": 205840 }, { "epoch": 0.7957585316447867, "grad_norm": 0.0971662700176239, "learning_rate": 0.002, "loss": 2.3521, "step": 205850 }, { "epoch": 0.79579718884817, "grad_norm": 0.10432875156402588, "learning_rate": 0.002, "loss": 2.339, "step": 205860 }, { "epoch": 0.7958358460515532, "grad_norm": 0.10157095640897751, "learning_rate": 0.002, "loss": 2.3261, "step": 205870 }, { "epoch": 0.7958745032549365, "grad_norm": 0.11968620866537094, "learning_rate": 0.002, "loss": 2.3283, "step": 205880 }, { "epoch": 0.7959131604583198, "grad_norm": 0.1179286390542984, "learning_rate": 0.002, "loss": 2.3339, "step": 205890 }, { "epoch": 0.7959518176617031, "grad_norm": 0.10262852162122726, "learning_rate": 0.002, "loss": 2.3337, "step": 205900 }, { "epoch": 0.7959904748650863, "grad_norm": 0.09464697539806366, "learning_rate": 0.002, "loss": 2.3462, "step": 205910 }, { "epoch": 0.7960291320684696, "grad_norm": 0.11842875927686691, "learning_rate": 0.002, "loss": 2.3353, "step": 205920 }, { "epoch": 0.796067789271853, "grad_norm": 0.15821444988250732, "learning_rate": 0.002, "loss": 2.347, "step": 205930 }, { "epoch": 0.7961064464752362, "grad_norm": 0.10689578205347061, "learning_rate": 0.002, "loss": 2.3303, "step": 205940 }, { "epoch": 0.7961451036786195, "grad_norm": 0.09144774824380875, "learning_rate": 0.002, "loss": 2.3216, "step": 205950 }, { "epoch": 0.7961837608820027, "grad_norm": 0.09909002482891083, "learning_rate": 0.002, "loss": 2.3532, "step": 205960 }, { "epoch": 0.7962224180853861, "grad_norm": 0.09510427713394165, "learning_rate": 0.002, "loss": 2.3451, "step": 205970 }, { "epoch": 0.7962610752887693, "grad_norm": 0.12517790496349335, "learning_rate": 0.002, "loss": 2.3417, "step": 205980 }, { "epoch": 0.7962997324921526, "grad_norm": 0.09638720005750656, "learning_rate": 0.002, "loss": 2.3549, "step": 205990 }, { "epoch": 0.7963383896955358, "grad_norm": 0.10534534603357315, "learning_rate": 0.002, "loss": 2.3536, "step": 206000 }, { "epoch": 0.7963770468989192, "grad_norm": 0.15926168859004974, "learning_rate": 0.002, "loss": 2.3387, "step": 206010 }, { "epoch": 0.7964157041023024, "grad_norm": 0.10147106647491455, "learning_rate": 0.002, "loss": 2.339, "step": 206020 }, { "epoch": 0.7964543613056857, "grad_norm": 0.10951374471187592, "learning_rate": 0.002, "loss": 2.3439, "step": 206030 }, { "epoch": 0.796493018509069, "grad_norm": 0.11713884770870209, "learning_rate": 0.002, "loss": 2.3272, "step": 206040 }, { "epoch": 0.7965316757124522, "grad_norm": 0.0972195565700531, "learning_rate": 0.002, "loss": 2.3413, "step": 206050 }, { "epoch": 0.7965703329158356, "grad_norm": 0.0912977084517479, "learning_rate": 0.002, "loss": 2.3429, "step": 206060 }, { "epoch": 0.7966089901192188, "grad_norm": 0.12983092665672302, "learning_rate": 0.002, "loss": 2.3392, "step": 206070 }, { "epoch": 0.7966476473226021, "grad_norm": 0.09781789034605026, "learning_rate": 0.002, "loss": 2.353, "step": 206080 }, { "epoch": 0.7966863045259853, "grad_norm": 0.1111692562699318, "learning_rate": 0.002, "loss": 2.3503, "step": 206090 }, { "epoch": 0.7967249617293687, "grad_norm": 0.11107601225376129, "learning_rate": 0.002, "loss": 2.3379, "step": 206100 }, { "epoch": 0.7967636189327519, "grad_norm": 0.10714568942785263, "learning_rate": 0.002, "loss": 2.3549, "step": 206110 }, { "epoch": 0.7968022761361352, "grad_norm": 0.0960366278886795, "learning_rate": 0.002, "loss": 2.3474, "step": 206120 }, { "epoch": 0.7968409333395184, "grad_norm": 0.12267144024372101, "learning_rate": 0.002, "loss": 2.3443, "step": 206130 }, { "epoch": 0.7968795905429018, "grad_norm": 0.11556356400251389, "learning_rate": 0.002, "loss": 2.3484, "step": 206140 }, { "epoch": 0.7969182477462851, "grad_norm": 0.09737807512283325, "learning_rate": 0.002, "loss": 2.353, "step": 206150 }, { "epoch": 0.7969569049496683, "grad_norm": 0.13528063893318176, "learning_rate": 0.002, "loss": 2.3398, "step": 206160 }, { "epoch": 0.7969955621530516, "grad_norm": 0.09639405459165573, "learning_rate": 0.002, "loss": 2.3306, "step": 206170 }, { "epoch": 0.7970342193564349, "grad_norm": 0.0964895561337471, "learning_rate": 0.002, "loss": 2.3356, "step": 206180 }, { "epoch": 0.7970728765598182, "grad_norm": 0.10793425142765045, "learning_rate": 0.002, "loss": 2.3355, "step": 206190 }, { "epoch": 0.7971115337632014, "grad_norm": 0.10876777023077011, "learning_rate": 0.002, "loss": 2.3522, "step": 206200 }, { "epoch": 0.7971501909665847, "grad_norm": 0.11629603058099747, "learning_rate": 0.002, "loss": 2.339, "step": 206210 }, { "epoch": 0.797188848169968, "grad_norm": 0.09371237456798553, "learning_rate": 0.002, "loss": 2.3328, "step": 206220 }, { "epoch": 0.7972275053733513, "grad_norm": 0.11186051368713379, "learning_rate": 0.002, "loss": 2.3319, "step": 206230 }, { "epoch": 0.7972661625767345, "grad_norm": 0.107077457010746, "learning_rate": 0.002, "loss": 2.3518, "step": 206240 }, { "epoch": 0.7973048197801178, "grad_norm": 0.12543454766273499, "learning_rate": 0.002, "loss": 2.3335, "step": 206250 }, { "epoch": 0.797343476983501, "grad_norm": 0.10010968148708344, "learning_rate": 0.002, "loss": 2.3466, "step": 206260 }, { "epoch": 0.7973821341868844, "grad_norm": 0.12213873863220215, "learning_rate": 0.002, "loss": 2.3366, "step": 206270 }, { "epoch": 0.7974207913902677, "grad_norm": 0.1030530333518982, "learning_rate": 0.002, "loss": 2.3427, "step": 206280 }, { "epoch": 0.7974594485936509, "grad_norm": 0.1313806176185608, "learning_rate": 0.002, "loss": 2.3513, "step": 206290 }, { "epoch": 0.7974981057970342, "grad_norm": 0.09398311376571655, "learning_rate": 0.002, "loss": 2.3455, "step": 206300 }, { "epoch": 0.7975367630004175, "grad_norm": 0.11330290138721466, "learning_rate": 0.002, "loss": 2.3354, "step": 206310 }, { "epoch": 0.7975754202038008, "grad_norm": 0.11221243441104889, "learning_rate": 0.002, "loss": 2.3485, "step": 206320 }, { "epoch": 0.797614077407184, "grad_norm": 0.11574556678533554, "learning_rate": 0.002, "loss": 2.3428, "step": 206330 }, { "epoch": 0.7976527346105673, "grad_norm": 0.11567448079586029, "learning_rate": 0.002, "loss": 2.3507, "step": 206340 }, { "epoch": 0.7976913918139507, "grad_norm": 0.09949743002653122, "learning_rate": 0.002, "loss": 2.3488, "step": 206350 }, { "epoch": 0.7977300490173339, "grad_norm": 0.10788501054048538, "learning_rate": 0.002, "loss": 2.3276, "step": 206360 }, { "epoch": 0.7977687062207172, "grad_norm": 0.1038108840584755, "learning_rate": 0.002, "loss": 2.3279, "step": 206370 }, { "epoch": 0.7978073634241004, "grad_norm": 0.09708569943904877, "learning_rate": 0.002, "loss": 2.3361, "step": 206380 }, { "epoch": 0.7978460206274838, "grad_norm": 0.10167262703180313, "learning_rate": 0.002, "loss": 2.3395, "step": 206390 }, { "epoch": 0.797884677830867, "grad_norm": 0.12423945218324661, "learning_rate": 0.002, "loss": 2.3297, "step": 206400 }, { "epoch": 0.7979233350342503, "grad_norm": 0.10096772015094757, "learning_rate": 0.002, "loss": 2.3417, "step": 206410 }, { "epoch": 0.7979619922376335, "grad_norm": 0.10546797513961792, "learning_rate": 0.002, "loss": 2.3283, "step": 206420 }, { "epoch": 0.7980006494410168, "grad_norm": 0.10690139979124069, "learning_rate": 0.002, "loss": 2.3426, "step": 206430 }, { "epoch": 0.7980393066444001, "grad_norm": 0.10070551186800003, "learning_rate": 0.002, "loss": 2.3392, "step": 206440 }, { "epoch": 0.7980779638477834, "grad_norm": 0.11277178674936295, "learning_rate": 0.002, "loss": 2.3208, "step": 206450 }, { "epoch": 0.7981166210511667, "grad_norm": 0.1061372309923172, "learning_rate": 0.002, "loss": 2.3365, "step": 206460 }, { "epoch": 0.7981552782545499, "grad_norm": 0.11042293906211853, "learning_rate": 0.002, "loss": 2.3357, "step": 206470 }, { "epoch": 0.7981939354579333, "grad_norm": 0.09617602825164795, "learning_rate": 0.002, "loss": 2.3444, "step": 206480 }, { "epoch": 0.7982325926613165, "grad_norm": 0.11727935075759888, "learning_rate": 0.002, "loss": 2.3445, "step": 206490 }, { "epoch": 0.7982712498646998, "grad_norm": 0.10299355536699295, "learning_rate": 0.002, "loss": 2.3444, "step": 206500 }, { "epoch": 0.798309907068083, "grad_norm": 0.10291710495948792, "learning_rate": 0.002, "loss": 2.3367, "step": 206510 }, { "epoch": 0.7983485642714664, "grad_norm": 0.09439451992511749, "learning_rate": 0.002, "loss": 2.3361, "step": 206520 }, { "epoch": 0.7983872214748496, "grad_norm": 0.10197708010673523, "learning_rate": 0.002, "loss": 2.3319, "step": 206530 }, { "epoch": 0.7984258786782329, "grad_norm": 0.11696817725896835, "learning_rate": 0.002, "loss": 2.3426, "step": 206540 }, { "epoch": 0.7984645358816161, "grad_norm": 0.10836425423622131, "learning_rate": 0.002, "loss": 2.3458, "step": 206550 }, { "epoch": 0.7985031930849995, "grad_norm": 0.10485026985406876, "learning_rate": 0.002, "loss": 2.3271, "step": 206560 }, { "epoch": 0.7985418502883828, "grad_norm": 0.09178721159696579, "learning_rate": 0.002, "loss": 2.3459, "step": 206570 }, { "epoch": 0.798580507491766, "grad_norm": 0.11446939408779144, "learning_rate": 0.002, "loss": 2.3355, "step": 206580 }, { "epoch": 0.7986191646951493, "grad_norm": 0.11932355165481567, "learning_rate": 0.002, "loss": 2.3438, "step": 206590 }, { "epoch": 0.7986578218985326, "grad_norm": 0.10004696249961853, "learning_rate": 0.002, "loss": 2.3489, "step": 206600 }, { "epoch": 0.7986964791019159, "grad_norm": 0.10023650527000427, "learning_rate": 0.002, "loss": 2.3404, "step": 206610 }, { "epoch": 0.7987351363052991, "grad_norm": 0.11644736677408218, "learning_rate": 0.002, "loss": 2.3334, "step": 206620 }, { "epoch": 0.7987737935086824, "grad_norm": 0.10645174980163574, "learning_rate": 0.002, "loss": 2.346, "step": 206630 }, { "epoch": 0.7988124507120656, "grad_norm": 0.09977910667657852, "learning_rate": 0.002, "loss": 2.3518, "step": 206640 }, { "epoch": 0.798851107915449, "grad_norm": 0.12423045188188553, "learning_rate": 0.002, "loss": 2.3476, "step": 206650 }, { "epoch": 0.7988897651188323, "grad_norm": 0.09672603756189346, "learning_rate": 0.002, "loss": 2.3392, "step": 206660 }, { "epoch": 0.7989284223222155, "grad_norm": 0.09629687666893005, "learning_rate": 0.002, "loss": 2.3409, "step": 206670 }, { "epoch": 0.7989670795255988, "grad_norm": 0.1061956062912941, "learning_rate": 0.002, "loss": 2.3417, "step": 206680 }, { "epoch": 0.7990057367289821, "grad_norm": 0.09424664080142975, "learning_rate": 0.002, "loss": 2.3336, "step": 206690 }, { "epoch": 0.7990443939323654, "grad_norm": 0.11218780279159546, "learning_rate": 0.002, "loss": 2.3398, "step": 206700 }, { "epoch": 0.7990830511357486, "grad_norm": 0.1013312116265297, "learning_rate": 0.002, "loss": 2.3237, "step": 206710 }, { "epoch": 0.7991217083391319, "grad_norm": 0.11477039009332657, "learning_rate": 0.002, "loss": 2.343, "step": 206720 }, { "epoch": 0.7991603655425152, "grad_norm": 0.10796338319778442, "learning_rate": 0.002, "loss": 2.3504, "step": 206730 }, { "epoch": 0.7991990227458985, "grad_norm": 0.10321065783500671, "learning_rate": 0.002, "loss": 2.3452, "step": 206740 }, { "epoch": 0.7992376799492817, "grad_norm": 0.10886584967374802, "learning_rate": 0.002, "loss": 2.3453, "step": 206750 }, { "epoch": 0.799276337152665, "grad_norm": 0.09953373670578003, "learning_rate": 0.002, "loss": 2.3548, "step": 206760 }, { "epoch": 0.7993149943560484, "grad_norm": 0.11496644467115402, "learning_rate": 0.002, "loss": 2.3267, "step": 206770 }, { "epoch": 0.7993536515594316, "grad_norm": 0.09579180181026459, "learning_rate": 0.002, "loss": 2.3458, "step": 206780 }, { "epoch": 0.7993923087628149, "grad_norm": 0.10276324301958084, "learning_rate": 0.002, "loss": 2.322, "step": 206790 }, { "epoch": 0.7994309659661981, "grad_norm": 0.10177237540483475, "learning_rate": 0.002, "loss": 2.3477, "step": 206800 }, { "epoch": 0.7994696231695814, "grad_norm": 0.09037131816148758, "learning_rate": 0.002, "loss": 2.3299, "step": 206810 }, { "epoch": 0.7995082803729647, "grad_norm": 0.09242180734872818, "learning_rate": 0.002, "loss": 2.3351, "step": 206820 }, { "epoch": 0.799546937576348, "grad_norm": 0.09812980145215988, "learning_rate": 0.002, "loss": 2.3351, "step": 206830 }, { "epoch": 0.7995855947797312, "grad_norm": 0.1272347867488861, "learning_rate": 0.002, "loss": 2.3471, "step": 206840 }, { "epoch": 0.7996242519831145, "grad_norm": 0.08748415857553482, "learning_rate": 0.002, "loss": 2.3358, "step": 206850 }, { "epoch": 0.7996629091864979, "grad_norm": 0.09045463800430298, "learning_rate": 0.002, "loss": 2.3521, "step": 206860 }, { "epoch": 0.7997015663898811, "grad_norm": 0.18325842916965485, "learning_rate": 0.002, "loss": 2.3423, "step": 206870 }, { "epoch": 0.7997402235932644, "grad_norm": 0.10421092808246613, "learning_rate": 0.002, "loss": 2.3411, "step": 206880 }, { "epoch": 0.7997788807966476, "grad_norm": 0.10916467010974884, "learning_rate": 0.002, "loss": 2.3628, "step": 206890 }, { "epoch": 0.799817538000031, "grad_norm": 0.10946245491504669, "learning_rate": 0.002, "loss": 2.3447, "step": 206900 }, { "epoch": 0.7998561952034142, "grad_norm": 0.11079275608062744, "learning_rate": 0.002, "loss": 2.3611, "step": 206910 }, { "epoch": 0.7998948524067975, "grad_norm": 0.10049515962600708, "learning_rate": 0.002, "loss": 2.3394, "step": 206920 }, { "epoch": 0.7999335096101807, "grad_norm": 0.10186389833688736, "learning_rate": 0.002, "loss": 2.3362, "step": 206930 }, { "epoch": 0.7999721668135641, "grad_norm": 0.11401376873254776, "learning_rate": 0.002, "loss": 2.3445, "step": 206940 }, { "epoch": 0.8000108240169473, "grad_norm": 0.11036250740289688, "learning_rate": 0.002, "loss": 2.3354, "step": 206950 }, { "epoch": 0.8000494812203306, "grad_norm": 0.10665077716112137, "learning_rate": 0.002, "loss": 2.3443, "step": 206960 }, { "epoch": 0.8000881384237138, "grad_norm": 0.10211837291717529, "learning_rate": 0.002, "loss": 2.3349, "step": 206970 }, { "epoch": 0.8001267956270971, "grad_norm": 0.12194209545850754, "learning_rate": 0.002, "loss": 2.335, "step": 206980 }, { "epoch": 0.8001654528304805, "grad_norm": 0.09793148189783096, "learning_rate": 0.002, "loss": 2.3315, "step": 206990 }, { "epoch": 0.8002041100338637, "grad_norm": 0.12128207832574844, "learning_rate": 0.002, "loss": 2.3473, "step": 207000 }, { "epoch": 0.800242767237247, "grad_norm": 0.12369337677955627, "learning_rate": 0.002, "loss": 2.3346, "step": 207010 }, { "epoch": 0.8002814244406302, "grad_norm": 0.11725110560655594, "learning_rate": 0.002, "loss": 2.3353, "step": 207020 }, { "epoch": 0.8003200816440136, "grad_norm": 0.12829987704753876, "learning_rate": 0.002, "loss": 2.335, "step": 207030 }, { "epoch": 0.8003587388473968, "grad_norm": 0.09357552975416183, "learning_rate": 0.002, "loss": 2.3328, "step": 207040 }, { "epoch": 0.8003973960507801, "grad_norm": 0.10493844002485275, "learning_rate": 0.002, "loss": 2.3452, "step": 207050 }, { "epoch": 0.8004360532541633, "grad_norm": 0.11441578716039658, "learning_rate": 0.002, "loss": 2.3486, "step": 207060 }, { "epoch": 0.8004747104575467, "grad_norm": 0.09632222354412079, "learning_rate": 0.002, "loss": 2.3442, "step": 207070 }, { "epoch": 0.80051336766093, "grad_norm": 0.11511845886707306, "learning_rate": 0.002, "loss": 2.3422, "step": 207080 }, { "epoch": 0.8005520248643132, "grad_norm": 0.10339998453855515, "learning_rate": 0.002, "loss": 2.3427, "step": 207090 }, { "epoch": 0.8005906820676965, "grad_norm": 0.09853748232126236, "learning_rate": 0.002, "loss": 2.3294, "step": 207100 }, { "epoch": 0.8006293392710798, "grad_norm": 0.10093691200017929, "learning_rate": 0.002, "loss": 2.328, "step": 207110 }, { "epoch": 0.8006679964744631, "grad_norm": 0.10225550830364227, "learning_rate": 0.002, "loss": 2.3388, "step": 207120 }, { "epoch": 0.8007066536778463, "grad_norm": 0.11786044389009476, "learning_rate": 0.002, "loss": 2.3384, "step": 207130 }, { "epoch": 0.8007453108812296, "grad_norm": 0.09901709854602814, "learning_rate": 0.002, "loss": 2.3453, "step": 207140 }, { "epoch": 0.8007839680846129, "grad_norm": 0.09919336438179016, "learning_rate": 0.002, "loss": 2.3361, "step": 207150 }, { "epoch": 0.8008226252879962, "grad_norm": 0.11238361895084381, "learning_rate": 0.002, "loss": 2.3503, "step": 207160 }, { "epoch": 0.8008612824913794, "grad_norm": 0.10497693717479706, "learning_rate": 0.002, "loss": 2.3248, "step": 207170 }, { "epoch": 0.8008999396947627, "grad_norm": 0.13379651308059692, "learning_rate": 0.002, "loss": 2.3507, "step": 207180 }, { "epoch": 0.800938596898146, "grad_norm": 0.10189051181077957, "learning_rate": 0.002, "loss": 2.3454, "step": 207190 }, { "epoch": 0.8009772541015293, "grad_norm": 0.10896746069192886, "learning_rate": 0.002, "loss": 2.3261, "step": 207200 }, { "epoch": 0.8010159113049126, "grad_norm": 0.10068132728338242, "learning_rate": 0.002, "loss": 2.3466, "step": 207210 }, { "epoch": 0.8010545685082958, "grad_norm": 0.1001577228307724, "learning_rate": 0.002, "loss": 2.3385, "step": 207220 }, { "epoch": 0.8010932257116791, "grad_norm": 0.09665859490633011, "learning_rate": 0.002, "loss": 2.3476, "step": 207230 }, { "epoch": 0.8011318829150624, "grad_norm": 0.09988411515951157, "learning_rate": 0.002, "loss": 2.3435, "step": 207240 }, { "epoch": 0.8011705401184457, "grad_norm": 0.12435844540596008, "learning_rate": 0.002, "loss": 2.3473, "step": 207250 }, { "epoch": 0.8012091973218289, "grad_norm": 0.10896141082048416, "learning_rate": 0.002, "loss": 2.3611, "step": 207260 }, { "epoch": 0.8012478545252122, "grad_norm": 0.11095938086509705, "learning_rate": 0.002, "loss": 2.3541, "step": 207270 }, { "epoch": 0.8012865117285956, "grad_norm": 0.11793738603591919, "learning_rate": 0.002, "loss": 2.3404, "step": 207280 }, { "epoch": 0.8013251689319788, "grad_norm": 0.11566495895385742, "learning_rate": 0.002, "loss": 2.3285, "step": 207290 }, { "epoch": 0.8013638261353621, "grad_norm": 0.09072500467300415, "learning_rate": 0.002, "loss": 2.3361, "step": 207300 }, { "epoch": 0.8014024833387453, "grad_norm": 0.10017773509025574, "learning_rate": 0.002, "loss": 2.3282, "step": 207310 }, { "epoch": 0.8014411405421287, "grad_norm": 0.12244265526533127, "learning_rate": 0.002, "loss": 2.3519, "step": 207320 }, { "epoch": 0.8014797977455119, "grad_norm": 0.09469608962535858, "learning_rate": 0.002, "loss": 2.346, "step": 207330 }, { "epoch": 0.8015184549488952, "grad_norm": 0.13152846693992615, "learning_rate": 0.002, "loss": 2.3352, "step": 207340 }, { "epoch": 0.8015571121522784, "grad_norm": 0.09928417950868607, "learning_rate": 0.002, "loss": 2.346, "step": 207350 }, { "epoch": 0.8015957693556617, "grad_norm": 0.11704422533512115, "learning_rate": 0.002, "loss": 2.3412, "step": 207360 }, { "epoch": 0.801634426559045, "grad_norm": 0.11043064296245575, "learning_rate": 0.002, "loss": 2.3452, "step": 207370 }, { "epoch": 0.8016730837624283, "grad_norm": 0.10696674138307571, "learning_rate": 0.002, "loss": 2.3174, "step": 207380 }, { "epoch": 0.8017117409658115, "grad_norm": 0.1035086140036583, "learning_rate": 0.002, "loss": 2.3417, "step": 207390 }, { "epoch": 0.8017503981691948, "grad_norm": 0.14738266170024872, "learning_rate": 0.002, "loss": 2.3261, "step": 207400 }, { "epoch": 0.8017890553725782, "grad_norm": 0.09723462909460068, "learning_rate": 0.002, "loss": 2.3524, "step": 207410 }, { "epoch": 0.8018277125759614, "grad_norm": 0.11064042150974274, "learning_rate": 0.002, "loss": 2.3415, "step": 207420 }, { "epoch": 0.8018663697793447, "grad_norm": 0.09808441996574402, "learning_rate": 0.002, "loss": 2.344, "step": 207430 }, { "epoch": 0.8019050269827279, "grad_norm": 0.09821963310241699, "learning_rate": 0.002, "loss": 2.3315, "step": 207440 }, { "epoch": 0.8019436841861113, "grad_norm": 0.09830199927091599, "learning_rate": 0.002, "loss": 2.3446, "step": 207450 }, { "epoch": 0.8019823413894945, "grad_norm": 0.1040264442563057, "learning_rate": 0.002, "loss": 2.355, "step": 207460 }, { "epoch": 0.8020209985928778, "grad_norm": 0.09451977163553238, "learning_rate": 0.002, "loss": 2.3412, "step": 207470 }, { "epoch": 0.802059655796261, "grad_norm": 0.09999929368495941, "learning_rate": 0.002, "loss": 2.3469, "step": 207480 }, { "epoch": 0.8020983129996444, "grad_norm": 0.1217181608080864, "learning_rate": 0.002, "loss": 2.3358, "step": 207490 }, { "epoch": 0.8021369702030277, "grad_norm": 0.09493351727724075, "learning_rate": 0.002, "loss": 2.3307, "step": 207500 }, { "epoch": 0.8021756274064109, "grad_norm": 0.09583764523267746, "learning_rate": 0.002, "loss": 2.3462, "step": 207510 }, { "epoch": 0.8022142846097942, "grad_norm": 0.1117585077881813, "learning_rate": 0.002, "loss": 2.3366, "step": 207520 }, { "epoch": 0.8022529418131775, "grad_norm": 0.28829827904701233, "learning_rate": 0.002, "loss": 2.3414, "step": 207530 }, { "epoch": 0.8022915990165608, "grad_norm": 0.13149257004261017, "learning_rate": 0.002, "loss": 2.3385, "step": 207540 }, { "epoch": 0.802330256219944, "grad_norm": 0.10232976824045181, "learning_rate": 0.002, "loss": 2.3401, "step": 207550 }, { "epoch": 0.8023689134233273, "grad_norm": 0.09590692818164825, "learning_rate": 0.002, "loss": 2.35, "step": 207560 }, { "epoch": 0.8024075706267105, "grad_norm": 0.10753864794969559, "learning_rate": 0.002, "loss": 2.3327, "step": 207570 }, { "epoch": 0.8024462278300939, "grad_norm": 0.0987880751490593, "learning_rate": 0.002, "loss": 2.3382, "step": 207580 }, { "epoch": 0.8024848850334771, "grad_norm": 0.09377659857273102, "learning_rate": 0.002, "loss": 2.3328, "step": 207590 }, { "epoch": 0.8025235422368604, "grad_norm": 0.10115095973014832, "learning_rate": 0.002, "loss": 2.3358, "step": 207600 }, { "epoch": 0.8025621994402437, "grad_norm": 0.10874200612306595, "learning_rate": 0.002, "loss": 2.337, "step": 207610 }, { "epoch": 0.802600856643627, "grad_norm": 0.10040194541215897, "learning_rate": 0.002, "loss": 2.3462, "step": 207620 }, { "epoch": 0.8026395138470103, "grad_norm": 0.11709320545196533, "learning_rate": 0.002, "loss": 2.3514, "step": 207630 }, { "epoch": 0.8026781710503935, "grad_norm": 0.09988772869110107, "learning_rate": 0.002, "loss": 2.3403, "step": 207640 }, { "epoch": 0.8027168282537768, "grad_norm": 0.10499367862939835, "learning_rate": 0.002, "loss": 2.3408, "step": 207650 }, { "epoch": 0.8027554854571601, "grad_norm": 0.13137167692184448, "learning_rate": 0.002, "loss": 2.3456, "step": 207660 }, { "epoch": 0.8027941426605434, "grad_norm": 0.14281345903873444, "learning_rate": 0.002, "loss": 2.3304, "step": 207670 }, { "epoch": 0.8028327998639266, "grad_norm": 0.0973275750875473, "learning_rate": 0.002, "loss": 2.3315, "step": 207680 }, { "epoch": 0.8028714570673099, "grad_norm": 0.10529021918773651, "learning_rate": 0.002, "loss": 2.3352, "step": 207690 }, { "epoch": 0.8029101142706933, "grad_norm": 0.09296000003814697, "learning_rate": 0.002, "loss": 2.3427, "step": 207700 }, { "epoch": 0.8029487714740765, "grad_norm": 0.0963568240404129, "learning_rate": 0.002, "loss": 2.3247, "step": 207710 }, { "epoch": 0.8029874286774598, "grad_norm": 0.1171727329492569, "learning_rate": 0.002, "loss": 2.3197, "step": 207720 }, { "epoch": 0.803026085880843, "grad_norm": 0.10067886859178543, "learning_rate": 0.002, "loss": 2.3419, "step": 207730 }, { "epoch": 0.8030647430842263, "grad_norm": 0.10748159140348434, "learning_rate": 0.002, "loss": 2.3272, "step": 207740 }, { "epoch": 0.8031034002876096, "grad_norm": 0.10766609013080597, "learning_rate": 0.002, "loss": 2.3267, "step": 207750 }, { "epoch": 0.8031420574909929, "grad_norm": 0.11034165322780609, "learning_rate": 0.002, "loss": 2.3422, "step": 207760 }, { "epoch": 0.8031807146943761, "grad_norm": 0.11566515266895294, "learning_rate": 0.002, "loss": 2.3493, "step": 207770 }, { "epoch": 0.8032193718977594, "grad_norm": 0.09075692296028137, "learning_rate": 0.002, "loss": 2.3472, "step": 207780 }, { "epoch": 0.8032580291011427, "grad_norm": 0.12387843430042267, "learning_rate": 0.002, "loss": 2.3443, "step": 207790 }, { "epoch": 0.803296686304526, "grad_norm": 0.1029786691069603, "learning_rate": 0.002, "loss": 2.3269, "step": 207800 }, { "epoch": 0.8033353435079092, "grad_norm": 0.09761625528335571, "learning_rate": 0.002, "loss": 2.3301, "step": 207810 }, { "epoch": 0.8033740007112925, "grad_norm": 0.11875072866678238, "learning_rate": 0.002, "loss": 2.3435, "step": 207820 }, { "epoch": 0.8034126579146759, "grad_norm": 0.12390270084142685, "learning_rate": 0.002, "loss": 2.3564, "step": 207830 }, { "epoch": 0.8034513151180591, "grad_norm": 0.14737088978290558, "learning_rate": 0.002, "loss": 2.3499, "step": 207840 }, { "epoch": 0.8034899723214424, "grad_norm": 0.11713667958974838, "learning_rate": 0.002, "loss": 2.3461, "step": 207850 }, { "epoch": 0.8035286295248256, "grad_norm": 0.10130646824836731, "learning_rate": 0.002, "loss": 2.3515, "step": 207860 }, { "epoch": 0.803567286728209, "grad_norm": 0.11106660962104797, "learning_rate": 0.002, "loss": 2.3416, "step": 207870 }, { "epoch": 0.8036059439315922, "grad_norm": 0.10673379898071289, "learning_rate": 0.002, "loss": 2.3336, "step": 207880 }, { "epoch": 0.8036446011349755, "grad_norm": 0.105867400765419, "learning_rate": 0.002, "loss": 2.3404, "step": 207890 }, { "epoch": 0.8036832583383587, "grad_norm": 0.10368683189153671, "learning_rate": 0.002, "loss": 2.3389, "step": 207900 }, { "epoch": 0.803721915541742, "grad_norm": 0.1729949712753296, "learning_rate": 0.002, "loss": 2.3453, "step": 207910 }, { "epoch": 0.8037605727451254, "grad_norm": 0.12230349332094193, "learning_rate": 0.002, "loss": 2.342, "step": 207920 }, { "epoch": 0.8037992299485086, "grad_norm": 0.16800034046173096, "learning_rate": 0.002, "loss": 2.3385, "step": 207930 }, { "epoch": 0.8038378871518919, "grad_norm": 0.11105374246835709, "learning_rate": 0.002, "loss": 2.3432, "step": 207940 }, { "epoch": 0.8038765443552751, "grad_norm": 0.11874057352542877, "learning_rate": 0.002, "loss": 2.3473, "step": 207950 }, { "epoch": 0.8039152015586585, "grad_norm": 0.10733795911073685, "learning_rate": 0.002, "loss": 2.3498, "step": 207960 }, { "epoch": 0.8039538587620417, "grad_norm": 0.10871712863445282, "learning_rate": 0.002, "loss": 2.3536, "step": 207970 }, { "epoch": 0.803992515965425, "grad_norm": 0.10429523140192032, "learning_rate": 0.002, "loss": 2.3526, "step": 207980 }, { "epoch": 0.8040311731688082, "grad_norm": 0.0885714516043663, "learning_rate": 0.002, "loss": 2.3309, "step": 207990 }, { "epoch": 0.8040698303721916, "grad_norm": 0.10083181411027908, "learning_rate": 0.002, "loss": 2.3466, "step": 208000 }, { "epoch": 0.8041084875755748, "grad_norm": 0.10782654583454132, "learning_rate": 0.002, "loss": 2.3414, "step": 208010 }, { "epoch": 0.8041471447789581, "grad_norm": 0.10470322519540787, "learning_rate": 0.002, "loss": 2.3388, "step": 208020 }, { "epoch": 0.8041858019823414, "grad_norm": 0.11609132587909698, "learning_rate": 0.002, "loss": 2.3251, "step": 208030 }, { "epoch": 0.8042244591857247, "grad_norm": 0.09933394938707352, "learning_rate": 0.002, "loss": 2.3392, "step": 208040 }, { "epoch": 0.804263116389108, "grad_norm": 0.10304474085569382, "learning_rate": 0.002, "loss": 2.3425, "step": 208050 }, { "epoch": 0.8043017735924912, "grad_norm": 0.10988190770149231, "learning_rate": 0.002, "loss": 2.3345, "step": 208060 }, { "epoch": 0.8043404307958745, "grad_norm": 0.09970662742853165, "learning_rate": 0.002, "loss": 2.3363, "step": 208070 }, { "epoch": 0.8043790879992578, "grad_norm": 0.12120552361011505, "learning_rate": 0.002, "loss": 2.3553, "step": 208080 }, { "epoch": 0.8044177452026411, "grad_norm": 0.10486899316310883, "learning_rate": 0.002, "loss": 2.331, "step": 208090 }, { "epoch": 0.8044564024060243, "grad_norm": 0.1178613230586052, "learning_rate": 0.002, "loss": 2.3396, "step": 208100 }, { "epoch": 0.8044950596094076, "grad_norm": 0.10001028329133987, "learning_rate": 0.002, "loss": 2.3512, "step": 208110 }, { "epoch": 0.8045337168127908, "grad_norm": 0.09504573047161102, "learning_rate": 0.002, "loss": 2.3626, "step": 208120 }, { "epoch": 0.8045723740161742, "grad_norm": 0.09070435911417007, "learning_rate": 0.002, "loss": 2.3448, "step": 208130 }, { "epoch": 0.8046110312195575, "grad_norm": 0.09632452577352524, "learning_rate": 0.002, "loss": 2.3482, "step": 208140 }, { "epoch": 0.8046496884229407, "grad_norm": 0.11543071269989014, "learning_rate": 0.002, "loss": 2.3527, "step": 208150 }, { "epoch": 0.804688345626324, "grad_norm": 0.104801706969738, "learning_rate": 0.002, "loss": 2.3406, "step": 208160 }, { "epoch": 0.8047270028297073, "grad_norm": 0.09996666014194489, "learning_rate": 0.002, "loss": 2.341, "step": 208170 }, { "epoch": 0.8047656600330906, "grad_norm": 0.10567789524793625, "learning_rate": 0.002, "loss": 2.3324, "step": 208180 }, { "epoch": 0.8048043172364738, "grad_norm": 0.0922398567199707, "learning_rate": 0.002, "loss": 2.3445, "step": 208190 }, { "epoch": 0.8048429744398571, "grad_norm": 0.10439912974834442, "learning_rate": 0.002, "loss": 2.3368, "step": 208200 }, { "epoch": 0.8048816316432404, "grad_norm": 0.10547574609518051, "learning_rate": 0.002, "loss": 2.3231, "step": 208210 }, { "epoch": 0.8049202888466237, "grad_norm": 0.09658071398735046, "learning_rate": 0.002, "loss": 2.3461, "step": 208220 }, { "epoch": 0.804958946050007, "grad_norm": 0.0904550701379776, "learning_rate": 0.002, "loss": 2.3384, "step": 208230 }, { "epoch": 0.8049976032533902, "grad_norm": 0.10824532806873322, "learning_rate": 0.002, "loss": 2.3364, "step": 208240 }, { "epoch": 0.8050362604567736, "grad_norm": 0.11004411429166794, "learning_rate": 0.002, "loss": 2.323, "step": 208250 }, { "epoch": 0.8050749176601568, "grad_norm": 0.11853194236755371, "learning_rate": 0.002, "loss": 2.3322, "step": 208260 }, { "epoch": 0.8051135748635401, "grad_norm": 0.1130291149020195, "learning_rate": 0.002, "loss": 2.3405, "step": 208270 }, { "epoch": 0.8051522320669233, "grad_norm": 0.09779933094978333, "learning_rate": 0.002, "loss": 2.3396, "step": 208280 }, { "epoch": 0.8051908892703066, "grad_norm": 0.10665308684110641, "learning_rate": 0.002, "loss": 2.3398, "step": 208290 }, { "epoch": 0.8052295464736899, "grad_norm": 0.10099265724420547, "learning_rate": 0.002, "loss": 2.3391, "step": 208300 }, { "epoch": 0.8052682036770732, "grad_norm": 0.12212111800909042, "learning_rate": 0.002, "loss": 2.3591, "step": 208310 }, { "epoch": 0.8053068608804564, "grad_norm": 0.10795871913433075, "learning_rate": 0.002, "loss": 2.3467, "step": 208320 }, { "epoch": 0.8053455180838397, "grad_norm": 0.10837031155824661, "learning_rate": 0.002, "loss": 2.3522, "step": 208330 }, { "epoch": 0.8053841752872231, "grad_norm": 0.10169872641563416, "learning_rate": 0.002, "loss": 2.3442, "step": 208340 }, { "epoch": 0.8054228324906063, "grad_norm": 0.10496000945568085, "learning_rate": 0.002, "loss": 2.3531, "step": 208350 }, { "epoch": 0.8054614896939896, "grad_norm": 0.08883891254663467, "learning_rate": 0.002, "loss": 2.3236, "step": 208360 }, { "epoch": 0.8055001468973728, "grad_norm": 0.10567431151866913, "learning_rate": 0.002, "loss": 2.3418, "step": 208370 }, { "epoch": 0.8055388041007562, "grad_norm": 0.12218322604894638, "learning_rate": 0.002, "loss": 2.3387, "step": 208380 }, { "epoch": 0.8055774613041394, "grad_norm": 0.11944884061813354, "learning_rate": 0.002, "loss": 2.333, "step": 208390 }, { "epoch": 0.8056161185075227, "grad_norm": 0.09559208154678345, "learning_rate": 0.002, "loss": 2.3196, "step": 208400 }, { "epoch": 0.8056547757109059, "grad_norm": 0.11223572492599487, "learning_rate": 0.002, "loss": 2.3466, "step": 208410 }, { "epoch": 0.8056934329142893, "grad_norm": 0.09969627857208252, "learning_rate": 0.002, "loss": 2.3566, "step": 208420 }, { "epoch": 0.8057320901176726, "grad_norm": 0.10251709073781967, "learning_rate": 0.002, "loss": 2.3348, "step": 208430 }, { "epoch": 0.8057707473210558, "grad_norm": 0.10395486652851105, "learning_rate": 0.002, "loss": 2.3457, "step": 208440 }, { "epoch": 0.805809404524439, "grad_norm": 0.10985516756772995, "learning_rate": 0.002, "loss": 2.348, "step": 208450 }, { "epoch": 0.8058480617278224, "grad_norm": 0.11914392560720444, "learning_rate": 0.002, "loss": 2.3445, "step": 208460 }, { "epoch": 0.8058867189312057, "grad_norm": 0.09631257504224777, "learning_rate": 0.002, "loss": 2.3338, "step": 208470 }, { "epoch": 0.8059253761345889, "grad_norm": 0.1129194125533104, "learning_rate": 0.002, "loss": 2.341, "step": 208480 }, { "epoch": 0.8059640333379722, "grad_norm": 0.09658629447221756, "learning_rate": 0.002, "loss": 2.3339, "step": 208490 }, { "epoch": 0.8060026905413554, "grad_norm": 0.11044806241989136, "learning_rate": 0.002, "loss": 2.3251, "step": 208500 }, { "epoch": 0.8060413477447388, "grad_norm": 0.1090376228094101, "learning_rate": 0.002, "loss": 2.3175, "step": 208510 }, { "epoch": 0.806080004948122, "grad_norm": 0.0996435284614563, "learning_rate": 0.002, "loss": 2.3534, "step": 208520 }, { "epoch": 0.8061186621515053, "grad_norm": 0.10276366770267487, "learning_rate": 0.002, "loss": 2.3313, "step": 208530 }, { "epoch": 0.8061573193548885, "grad_norm": 0.10204587876796722, "learning_rate": 0.002, "loss": 2.3292, "step": 208540 }, { "epoch": 0.8061959765582719, "grad_norm": 0.09693923592567444, "learning_rate": 0.002, "loss": 2.3367, "step": 208550 }, { "epoch": 0.8062346337616552, "grad_norm": 0.10240539163351059, "learning_rate": 0.002, "loss": 2.3439, "step": 208560 }, { "epoch": 0.8062732909650384, "grad_norm": 0.09578530490398407, "learning_rate": 0.002, "loss": 2.3502, "step": 208570 }, { "epoch": 0.8063119481684217, "grad_norm": 0.1167021319270134, "learning_rate": 0.002, "loss": 2.317, "step": 208580 }, { "epoch": 0.806350605371805, "grad_norm": 0.09664864093065262, "learning_rate": 0.002, "loss": 2.3423, "step": 208590 }, { "epoch": 0.8063892625751883, "grad_norm": 0.09875663369894028, "learning_rate": 0.002, "loss": 2.3267, "step": 208600 }, { "epoch": 0.8064279197785715, "grad_norm": 0.09840506315231323, "learning_rate": 0.002, "loss": 2.3286, "step": 208610 }, { "epoch": 0.8064665769819548, "grad_norm": 0.08871841430664062, "learning_rate": 0.002, "loss": 2.3454, "step": 208620 }, { "epoch": 0.8065052341853381, "grad_norm": 0.08804570138454437, "learning_rate": 0.002, "loss": 2.339, "step": 208630 }, { "epoch": 0.8065438913887214, "grad_norm": 0.12451464682817459, "learning_rate": 0.002, "loss": 2.346, "step": 208640 }, { "epoch": 0.8065825485921047, "grad_norm": 0.09834271669387817, "learning_rate": 0.002, "loss": 2.3364, "step": 208650 }, { "epoch": 0.8066212057954879, "grad_norm": 0.10178635269403458, "learning_rate": 0.002, "loss": 2.3408, "step": 208660 }, { "epoch": 0.8066598629988712, "grad_norm": 0.09166623651981354, "learning_rate": 0.002, "loss": 2.3559, "step": 208670 }, { "epoch": 0.8066985202022545, "grad_norm": 0.09693189710378647, "learning_rate": 0.002, "loss": 2.3378, "step": 208680 }, { "epoch": 0.8067371774056378, "grad_norm": 0.09537971019744873, "learning_rate": 0.002, "loss": 2.334, "step": 208690 }, { "epoch": 0.806775834609021, "grad_norm": 0.09352777153253555, "learning_rate": 0.002, "loss": 2.33, "step": 208700 }, { "epoch": 0.8068144918124043, "grad_norm": 0.097089983522892, "learning_rate": 0.002, "loss": 2.3442, "step": 208710 }, { "epoch": 0.8068531490157876, "grad_norm": 0.11535824090242386, "learning_rate": 0.002, "loss": 2.3357, "step": 208720 }, { "epoch": 0.8068918062191709, "grad_norm": 0.12450321763753891, "learning_rate": 0.002, "loss": 2.3569, "step": 208730 }, { "epoch": 0.8069304634225541, "grad_norm": 0.09889606386423111, "learning_rate": 0.002, "loss": 2.3489, "step": 208740 }, { "epoch": 0.8069691206259374, "grad_norm": 0.10972964018583298, "learning_rate": 0.002, "loss": 2.3429, "step": 208750 }, { "epoch": 0.8070077778293208, "grad_norm": 0.10638296604156494, "learning_rate": 0.002, "loss": 2.3399, "step": 208760 }, { "epoch": 0.807046435032704, "grad_norm": 0.09678032994270325, "learning_rate": 0.002, "loss": 2.3326, "step": 208770 }, { "epoch": 0.8070850922360873, "grad_norm": 0.11467410624027252, "learning_rate": 0.002, "loss": 2.3258, "step": 208780 }, { "epoch": 0.8071237494394705, "grad_norm": 0.10477212816476822, "learning_rate": 0.002, "loss": 2.3287, "step": 208790 }, { "epoch": 0.8071624066428539, "grad_norm": 0.12658697366714478, "learning_rate": 0.002, "loss": 2.3475, "step": 208800 }, { "epoch": 0.8072010638462371, "grad_norm": 0.10979489237070084, "learning_rate": 0.002, "loss": 2.3435, "step": 208810 }, { "epoch": 0.8072397210496204, "grad_norm": 0.10843465477228165, "learning_rate": 0.002, "loss": 2.3508, "step": 208820 }, { "epoch": 0.8072783782530036, "grad_norm": 0.09200742840766907, "learning_rate": 0.002, "loss": 2.3329, "step": 208830 }, { "epoch": 0.8073170354563869, "grad_norm": 0.09514549374580383, "learning_rate": 0.002, "loss": 2.3433, "step": 208840 }, { "epoch": 0.8073556926597703, "grad_norm": 0.1180688887834549, "learning_rate": 0.002, "loss": 2.3299, "step": 208850 }, { "epoch": 0.8073943498631535, "grad_norm": 0.09553969651460648, "learning_rate": 0.002, "loss": 2.3394, "step": 208860 }, { "epoch": 0.8074330070665368, "grad_norm": 0.10637307912111282, "learning_rate": 0.002, "loss": 2.3411, "step": 208870 }, { "epoch": 0.80747166426992, "grad_norm": 0.10888589173555374, "learning_rate": 0.002, "loss": 2.3401, "step": 208880 }, { "epoch": 0.8075103214733034, "grad_norm": 0.09537948668003082, "learning_rate": 0.002, "loss": 2.3367, "step": 208890 }, { "epoch": 0.8075489786766866, "grad_norm": 0.10661628842353821, "learning_rate": 0.002, "loss": 2.3436, "step": 208900 }, { "epoch": 0.8075876358800699, "grad_norm": 0.13251517713069916, "learning_rate": 0.002, "loss": 2.3546, "step": 208910 }, { "epoch": 0.8076262930834531, "grad_norm": 0.10121644288301468, "learning_rate": 0.002, "loss": 2.3233, "step": 208920 }, { "epoch": 0.8076649502868365, "grad_norm": 0.27895215153694153, "learning_rate": 0.002, "loss": 2.3423, "step": 208930 }, { "epoch": 0.8077036074902197, "grad_norm": 0.09418975561857224, "learning_rate": 0.002, "loss": 2.3384, "step": 208940 }, { "epoch": 0.807742264693603, "grad_norm": 0.1138615608215332, "learning_rate": 0.002, "loss": 2.3423, "step": 208950 }, { "epoch": 0.8077809218969862, "grad_norm": 0.106838159263134, "learning_rate": 0.002, "loss": 2.3396, "step": 208960 }, { "epoch": 0.8078195791003696, "grad_norm": 0.11634216457605362, "learning_rate": 0.002, "loss": 2.3573, "step": 208970 }, { "epoch": 0.8078582363037529, "grad_norm": 0.11513422429561615, "learning_rate": 0.002, "loss": 2.3312, "step": 208980 }, { "epoch": 0.8078968935071361, "grad_norm": 0.11838242411613464, "learning_rate": 0.002, "loss": 2.3379, "step": 208990 }, { "epoch": 0.8079355507105194, "grad_norm": 0.10261435061693192, "learning_rate": 0.002, "loss": 2.3397, "step": 209000 }, { "epoch": 0.8079742079139027, "grad_norm": 0.09122615307569504, "learning_rate": 0.002, "loss": 2.3346, "step": 209010 }, { "epoch": 0.808012865117286, "grad_norm": 0.1018514409661293, "learning_rate": 0.002, "loss": 2.3342, "step": 209020 }, { "epoch": 0.8080515223206692, "grad_norm": 0.11309902369976044, "learning_rate": 0.002, "loss": 2.3373, "step": 209030 }, { "epoch": 0.8080901795240525, "grad_norm": 0.09477593749761581, "learning_rate": 0.002, "loss": 2.3478, "step": 209040 }, { "epoch": 0.8081288367274357, "grad_norm": 0.11209241300821304, "learning_rate": 0.002, "loss": 2.3359, "step": 209050 }, { "epoch": 0.8081674939308191, "grad_norm": 0.11718212068080902, "learning_rate": 0.002, "loss": 2.3409, "step": 209060 }, { "epoch": 0.8082061511342024, "grad_norm": 0.10840430110692978, "learning_rate": 0.002, "loss": 2.3405, "step": 209070 }, { "epoch": 0.8082448083375856, "grad_norm": 0.09068944305181503, "learning_rate": 0.002, "loss": 2.3476, "step": 209080 }, { "epoch": 0.8082834655409689, "grad_norm": 0.10419811308383942, "learning_rate": 0.002, "loss": 2.3475, "step": 209090 }, { "epoch": 0.8083221227443522, "grad_norm": 0.10249201208353043, "learning_rate": 0.002, "loss": 2.3275, "step": 209100 }, { "epoch": 0.8083607799477355, "grad_norm": 0.09948867559432983, "learning_rate": 0.002, "loss": 2.3396, "step": 209110 }, { "epoch": 0.8083994371511187, "grad_norm": 0.10818983614444733, "learning_rate": 0.002, "loss": 2.342, "step": 209120 }, { "epoch": 0.808438094354502, "grad_norm": 0.09621796011924744, "learning_rate": 0.002, "loss": 2.3367, "step": 209130 }, { "epoch": 0.8084767515578853, "grad_norm": 0.09759146720170975, "learning_rate": 0.002, "loss": 2.3406, "step": 209140 }, { "epoch": 0.8085154087612686, "grad_norm": 0.11393880844116211, "learning_rate": 0.002, "loss": 2.3364, "step": 209150 }, { "epoch": 0.8085540659646518, "grad_norm": 0.10928820818662643, "learning_rate": 0.002, "loss": 2.3301, "step": 209160 }, { "epoch": 0.8085927231680351, "grad_norm": 0.1005977988243103, "learning_rate": 0.002, "loss": 2.3386, "step": 209170 }, { "epoch": 0.8086313803714185, "grad_norm": 0.10145454853773117, "learning_rate": 0.002, "loss": 2.3347, "step": 209180 }, { "epoch": 0.8086700375748017, "grad_norm": 0.09967648983001709, "learning_rate": 0.002, "loss": 2.3415, "step": 209190 }, { "epoch": 0.808708694778185, "grad_norm": 0.10319273173809052, "learning_rate": 0.002, "loss": 2.3622, "step": 209200 }, { "epoch": 0.8087473519815682, "grad_norm": 0.10071306675672531, "learning_rate": 0.002, "loss": 2.3374, "step": 209210 }, { "epoch": 0.8087860091849515, "grad_norm": 0.09968692809343338, "learning_rate": 0.002, "loss": 2.3462, "step": 209220 }, { "epoch": 0.8088246663883348, "grad_norm": 0.11552612483501434, "learning_rate": 0.002, "loss": 2.321, "step": 209230 }, { "epoch": 0.8088633235917181, "grad_norm": 0.10566051304340363, "learning_rate": 0.002, "loss": 2.3304, "step": 209240 }, { "epoch": 0.8089019807951013, "grad_norm": 0.10294246673583984, "learning_rate": 0.002, "loss": 2.3559, "step": 209250 }, { "epoch": 0.8089406379984846, "grad_norm": 0.10880181193351746, "learning_rate": 0.002, "loss": 2.3368, "step": 209260 }, { "epoch": 0.808979295201868, "grad_norm": 0.10486720502376556, "learning_rate": 0.002, "loss": 2.3402, "step": 209270 }, { "epoch": 0.8090179524052512, "grad_norm": 0.10026592761278152, "learning_rate": 0.002, "loss": 2.3329, "step": 209280 }, { "epoch": 0.8090566096086345, "grad_norm": 0.0959942489862442, "learning_rate": 0.002, "loss": 2.3333, "step": 209290 }, { "epoch": 0.8090952668120177, "grad_norm": 0.11561115086078644, "learning_rate": 0.002, "loss": 2.3264, "step": 209300 }, { "epoch": 0.8091339240154011, "grad_norm": 0.09788880497217178, "learning_rate": 0.002, "loss": 2.3418, "step": 209310 }, { "epoch": 0.8091725812187843, "grad_norm": 0.08990046381950378, "learning_rate": 0.002, "loss": 2.3414, "step": 209320 }, { "epoch": 0.8092112384221676, "grad_norm": 0.10459993034601212, "learning_rate": 0.002, "loss": 2.3265, "step": 209330 }, { "epoch": 0.8092498956255508, "grad_norm": 0.10062652826309204, "learning_rate": 0.002, "loss": 2.3318, "step": 209340 }, { "epoch": 0.8092885528289342, "grad_norm": 0.11534670740365982, "learning_rate": 0.002, "loss": 2.3427, "step": 209350 }, { "epoch": 0.8093272100323174, "grad_norm": 0.1055721789598465, "learning_rate": 0.002, "loss": 2.3308, "step": 209360 }, { "epoch": 0.8093658672357007, "grad_norm": 0.10717543959617615, "learning_rate": 0.002, "loss": 2.3318, "step": 209370 }, { "epoch": 0.809404524439084, "grad_norm": 0.12222599238157272, "learning_rate": 0.002, "loss": 2.3248, "step": 209380 }, { "epoch": 0.8094431816424672, "grad_norm": 0.11402303725481033, "learning_rate": 0.002, "loss": 2.3399, "step": 209390 }, { "epoch": 0.8094818388458506, "grad_norm": 0.09476860612630844, "learning_rate": 0.002, "loss": 2.3348, "step": 209400 }, { "epoch": 0.8095204960492338, "grad_norm": 0.12673380970954895, "learning_rate": 0.002, "loss": 2.3433, "step": 209410 }, { "epoch": 0.8095591532526171, "grad_norm": 0.1033923402428627, "learning_rate": 0.002, "loss": 2.3553, "step": 209420 }, { "epoch": 0.8095978104560003, "grad_norm": 0.14089608192443848, "learning_rate": 0.002, "loss": 2.3661, "step": 209430 }, { "epoch": 0.8096364676593837, "grad_norm": 0.10108928382396698, "learning_rate": 0.002, "loss": 2.3484, "step": 209440 }, { "epoch": 0.8096751248627669, "grad_norm": 0.11482837796211243, "learning_rate": 0.002, "loss": 2.354, "step": 209450 }, { "epoch": 0.8097137820661502, "grad_norm": 0.1034383699297905, "learning_rate": 0.002, "loss": 2.3399, "step": 209460 }, { "epoch": 0.8097524392695334, "grad_norm": 0.09372849017381668, "learning_rate": 0.002, "loss": 2.327, "step": 209470 }, { "epoch": 0.8097910964729168, "grad_norm": 0.10579051077365875, "learning_rate": 0.002, "loss": 2.3413, "step": 209480 }, { "epoch": 0.8098297536763001, "grad_norm": 0.09323503077030182, "learning_rate": 0.002, "loss": 2.3307, "step": 209490 }, { "epoch": 0.8098684108796833, "grad_norm": 0.09396940469741821, "learning_rate": 0.002, "loss": 2.3289, "step": 209500 }, { "epoch": 0.8099070680830666, "grad_norm": 0.09449894726276398, "learning_rate": 0.002, "loss": 2.3421, "step": 209510 }, { "epoch": 0.8099457252864499, "grad_norm": 0.10405312478542328, "learning_rate": 0.002, "loss": 2.3543, "step": 209520 }, { "epoch": 0.8099843824898332, "grad_norm": 0.10237361490726471, "learning_rate": 0.002, "loss": 2.3434, "step": 209530 }, { "epoch": 0.8100230396932164, "grad_norm": 0.11019392311573029, "learning_rate": 0.002, "loss": 2.3375, "step": 209540 }, { "epoch": 0.8100616968965997, "grad_norm": 0.12901364266872406, "learning_rate": 0.002, "loss": 2.329, "step": 209550 }, { "epoch": 0.810100354099983, "grad_norm": 0.11369182914495468, "learning_rate": 0.002, "loss": 2.3303, "step": 209560 }, { "epoch": 0.8101390113033663, "grad_norm": 0.11985727399587631, "learning_rate": 0.002, "loss": 2.3338, "step": 209570 }, { "epoch": 0.8101776685067495, "grad_norm": 0.12362784147262573, "learning_rate": 0.002, "loss": 2.3394, "step": 209580 }, { "epoch": 0.8102163257101328, "grad_norm": 0.11008656769990921, "learning_rate": 0.002, "loss": 2.3253, "step": 209590 }, { "epoch": 0.810254982913516, "grad_norm": 0.09525039047002792, "learning_rate": 0.002, "loss": 2.3375, "step": 209600 }, { "epoch": 0.8102936401168994, "grad_norm": 0.09465383738279343, "learning_rate": 0.002, "loss": 2.3335, "step": 209610 }, { "epoch": 0.8103322973202827, "grad_norm": 0.12793488800525665, "learning_rate": 0.002, "loss": 2.3347, "step": 209620 }, { "epoch": 0.8103709545236659, "grad_norm": 0.11443587392568588, "learning_rate": 0.002, "loss": 2.3433, "step": 209630 }, { "epoch": 0.8104096117270492, "grad_norm": 0.09764964878559113, "learning_rate": 0.002, "loss": 2.3404, "step": 209640 }, { "epoch": 0.8104482689304325, "grad_norm": 0.10466815531253815, "learning_rate": 0.002, "loss": 2.3389, "step": 209650 }, { "epoch": 0.8104869261338158, "grad_norm": 0.09393223375082016, "learning_rate": 0.002, "loss": 2.3451, "step": 209660 }, { "epoch": 0.810525583337199, "grad_norm": 0.10785609483718872, "learning_rate": 0.002, "loss": 2.3342, "step": 209670 }, { "epoch": 0.8105642405405823, "grad_norm": 0.09427961707115173, "learning_rate": 0.002, "loss": 2.3272, "step": 209680 }, { "epoch": 0.8106028977439657, "grad_norm": 0.1001754030585289, "learning_rate": 0.002, "loss": 2.3344, "step": 209690 }, { "epoch": 0.8106415549473489, "grad_norm": 0.11797784268856049, "learning_rate": 0.002, "loss": 2.3573, "step": 209700 }, { "epoch": 0.8106802121507322, "grad_norm": 0.1397496461868286, "learning_rate": 0.002, "loss": 2.3369, "step": 209710 }, { "epoch": 0.8107188693541154, "grad_norm": 0.10363800823688507, "learning_rate": 0.002, "loss": 2.3471, "step": 209720 }, { "epoch": 0.8107575265574988, "grad_norm": 0.11160644888877869, "learning_rate": 0.002, "loss": 2.3328, "step": 209730 }, { "epoch": 0.810796183760882, "grad_norm": 0.10050149261951447, "learning_rate": 0.002, "loss": 2.3345, "step": 209740 }, { "epoch": 0.8108348409642653, "grad_norm": 0.11539889872074127, "learning_rate": 0.002, "loss": 2.3327, "step": 209750 }, { "epoch": 0.8108734981676485, "grad_norm": 0.09776457399129868, "learning_rate": 0.002, "loss": 2.3369, "step": 209760 }, { "epoch": 0.8109121553710318, "grad_norm": 0.13011224567890167, "learning_rate": 0.002, "loss": 2.3458, "step": 209770 }, { "epoch": 0.8109508125744151, "grad_norm": 0.10698943585157394, "learning_rate": 0.002, "loss": 2.3362, "step": 209780 }, { "epoch": 0.8109894697777984, "grad_norm": 0.08842163532972336, "learning_rate": 0.002, "loss": 2.3412, "step": 209790 }, { "epoch": 0.8110281269811817, "grad_norm": 0.10007239878177643, "learning_rate": 0.002, "loss": 2.349, "step": 209800 }, { "epoch": 0.8110667841845649, "grad_norm": 0.09910252690315247, "learning_rate": 0.002, "loss": 2.3323, "step": 209810 }, { "epoch": 0.8111054413879483, "grad_norm": 0.10531774908304214, "learning_rate": 0.002, "loss": 2.3368, "step": 209820 }, { "epoch": 0.8111440985913315, "grad_norm": 0.09842731058597565, "learning_rate": 0.002, "loss": 2.3417, "step": 209830 }, { "epoch": 0.8111827557947148, "grad_norm": 0.1132144182920456, "learning_rate": 0.002, "loss": 2.3608, "step": 209840 }, { "epoch": 0.811221412998098, "grad_norm": 0.10505369305610657, "learning_rate": 0.002, "loss": 2.351, "step": 209850 }, { "epoch": 0.8112600702014814, "grad_norm": 0.09998974204063416, "learning_rate": 0.002, "loss": 2.3417, "step": 209860 }, { "epoch": 0.8112987274048646, "grad_norm": 0.1102500855922699, "learning_rate": 0.002, "loss": 2.355, "step": 209870 }, { "epoch": 0.8113373846082479, "grad_norm": 0.10504692792892456, "learning_rate": 0.002, "loss": 2.3482, "step": 209880 }, { "epoch": 0.8113760418116311, "grad_norm": 0.10265660285949707, "learning_rate": 0.002, "loss": 2.3196, "step": 209890 }, { "epoch": 0.8114146990150145, "grad_norm": 0.1298140585422516, "learning_rate": 0.002, "loss": 2.3331, "step": 209900 }, { "epoch": 0.8114533562183978, "grad_norm": 0.11420505493879318, "learning_rate": 0.002, "loss": 2.3441, "step": 209910 }, { "epoch": 0.811492013421781, "grad_norm": 0.10623852163553238, "learning_rate": 0.002, "loss": 2.3345, "step": 209920 }, { "epoch": 0.8115306706251643, "grad_norm": 0.09393060207366943, "learning_rate": 0.002, "loss": 2.3268, "step": 209930 }, { "epoch": 0.8115693278285476, "grad_norm": 0.1149723008275032, "learning_rate": 0.002, "loss": 2.3488, "step": 209940 }, { "epoch": 0.8116079850319309, "grad_norm": 0.12244723737239838, "learning_rate": 0.002, "loss": 2.3473, "step": 209950 }, { "epoch": 0.8116466422353141, "grad_norm": 0.10593932121992111, "learning_rate": 0.002, "loss": 2.3582, "step": 209960 }, { "epoch": 0.8116852994386974, "grad_norm": 0.10085232555866241, "learning_rate": 0.002, "loss": 2.3413, "step": 209970 }, { "epoch": 0.8117239566420806, "grad_norm": 0.10210473090410233, "learning_rate": 0.002, "loss": 2.3371, "step": 209980 }, { "epoch": 0.811762613845464, "grad_norm": 0.09529945999383926, "learning_rate": 0.002, "loss": 2.3379, "step": 209990 }, { "epoch": 0.8118012710488473, "grad_norm": 0.11046209186315536, "learning_rate": 0.002, "loss": 2.3283, "step": 210000 }, { "epoch": 0.8118399282522305, "grad_norm": 0.10785539448261261, "learning_rate": 0.002, "loss": 2.3411, "step": 210010 }, { "epoch": 0.8118785854556138, "grad_norm": 0.12557782232761383, "learning_rate": 0.002, "loss": 2.341, "step": 210020 }, { "epoch": 0.8119172426589971, "grad_norm": 0.11159656941890717, "learning_rate": 0.002, "loss": 2.3361, "step": 210030 }, { "epoch": 0.8119558998623804, "grad_norm": 0.10711421072483063, "learning_rate": 0.002, "loss": 2.3309, "step": 210040 }, { "epoch": 0.8119945570657636, "grad_norm": 0.10146241635084152, "learning_rate": 0.002, "loss": 2.3343, "step": 210050 }, { "epoch": 0.8120332142691469, "grad_norm": 0.10713270306587219, "learning_rate": 0.002, "loss": 2.3386, "step": 210060 }, { "epoch": 0.8120718714725302, "grad_norm": 0.10487532615661621, "learning_rate": 0.002, "loss": 2.3414, "step": 210070 }, { "epoch": 0.8121105286759135, "grad_norm": 0.09500955045223236, "learning_rate": 0.002, "loss": 2.3385, "step": 210080 }, { "epoch": 0.8121491858792967, "grad_norm": 0.10562046617269516, "learning_rate": 0.002, "loss": 2.3268, "step": 210090 }, { "epoch": 0.81218784308268, "grad_norm": 0.11464356631040573, "learning_rate": 0.002, "loss": 2.3415, "step": 210100 }, { "epoch": 0.8122265002860634, "grad_norm": 0.10423224419355392, "learning_rate": 0.002, "loss": 2.3394, "step": 210110 }, { "epoch": 0.8122651574894466, "grad_norm": 0.09312635660171509, "learning_rate": 0.002, "loss": 2.3346, "step": 210120 }, { "epoch": 0.8123038146928299, "grad_norm": 0.10562204569578171, "learning_rate": 0.002, "loss": 2.3402, "step": 210130 }, { "epoch": 0.8123424718962131, "grad_norm": 0.12157256156206131, "learning_rate": 0.002, "loss": 2.3361, "step": 210140 }, { "epoch": 0.8123811290995964, "grad_norm": 0.09659356623888016, "learning_rate": 0.002, "loss": 2.3544, "step": 210150 }, { "epoch": 0.8124197863029797, "grad_norm": 0.1745530515909195, "learning_rate": 0.002, "loss": 2.3355, "step": 210160 }, { "epoch": 0.812458443506363, "grad_norm": 0.1092507392168045, "learning_rate": 0.002, "loss": 2.3511, "step": 210170 }, { "epoch": 0.8124971007097462, "grad_norm": 0.11003392189741135, "learning_rate": 0.002, "loss": 2.357, "step": 210180 }, { "epoch": 0.8125357579131295, "grad_norm": 0.09807730466127396, "learning_rate": 0.002, "loss": 2.3328, "step": 210190 }, { "epoch": 0.8125744151165128, "grad_norm": 0.11290355026721954, "learning_rate": 0.002, "loss": 2.3367, "step": 210200 }, { "epoch": 0.8126130723198961, "grad_norm": 0.1134817823767662, "learning_rate": 0.002, "loss": 2.3351, "step": 210210 }, { "epoch": 0.8126517295232794, "grad_norm": 0.09779141843318939, "learning_rate": 0.002, "loss": 2.3425, "step": 210220 }, { "epoch": 0.8126903867266626, "grad_norm": 0.10998640209436417, "learning_rate": 0.002, "loss": 2.3451, "step": 210230 }, { "epoch": 0.812729043930046, "grad_norm": 0.10387808084487915, "learning_rate": 0.002, "loss": 2.3412, "step": 210240 }, { "epoch": 0.8127677011334292, "grad_norm": 0.09765990078449249, "learning_rate": 0.002, "loss": 2.3483, "step": 210250 }, { "epoch": 0.8128063583368125, "grad_norm": 0.09933991730213165, "learning_rate": 0.002, "loss": 2.3337, "step": 210260 }, { "epoch": 0.8128450155401957, "grad_norm": 0.09934469312429428, "learning_rate": 0.002, "loss": 2.3403, "step": 210270 }, { "epoch": 0.8128836727435791, "grad_norm": 0.11484673619270325, "learning_rate": 0.002, "loss": 2.3227, "step": 210280 }, { "epoch": 0.8129223299469623, "grad_norm": 0.091299869120121, "learning_rate": 0.002, "loss": 2.3304, "step": 210290 }, { "epoch": 0.8129609871503456, "grad_norm": 0.1021435409784317, "learning_rate": 0.002, "loss": 2.338, "step": 210300 }, { "epoch": 0.8129996443537288, "grad_norm": 0.10851260274648666, "learning_rate": 0.002, "loss": 2.3333, "step": 210310 }, { "epoch": 0.8130383015571121, "grad_norm": 0.08935797214508057, "learning_rate": 0.002, "loss": 2.3317, "step": 210320 }, { "epoch": 0.8130769587604955, "grad_norm": 0.09947099536657333, "learning_rate": 0.002, "loss": 2.3457, "step": 210330 }, { "epoch": 0.8131156159638787, "grad_norm": 0.1050669401884079, "learning_rate": 0.002, "loss": 2.3392, "step": 210340 }, { "epoch": 0.813154273167262, "grad_norm": 0.09838138520717621, "learning_rate": 0.002, "loss": 2.3365, "step": 210350 }, { "epoch": 0.8131929303706452, "grad_norm": 0.09755048900842667, "learning_rate": 0.002, "loss": 2.3378, "step": 210360 }, { "epoch": 0.8132315875740286, "grad_norm": 0.12264610826969147, "learning_rate": 0.002, "loss": 2.3371, "step": 210370 }, { "epoch": 0.8132702447774118, "grad_norm": 0.11399409174919128, "learning_rate": 0.002, "loss": 2.3454, "step": 210380 }, { "epoch": 0.8133089019807951, "grad_norm": 0.09926419705152512, "learning_rate": 0.002, "loss": 2.338, "step": 210390 }, { "epoch": 0.8133475591841783, "grad_norm": 0.095893993973732, "learning_rate": 0.002, "loss": 2.3469, "step": 210400 }, { "epoch": 0.8133862163875617, "grad_norm": 0.1031627431511879, "learning_rate": 0.002, "loss": 2.3276, "step": 210410 }, { "epoch": 0.813424873590945, "grad_norm": 0.11482734978199005, "learning_rate": 0.002, "loss": 2.3366, "step": 210420 }, { "epoch": 0.8134635307943282, "grad_norm": 0.08721525222063065, "learning_rate": 0.002, "loss": 2.3196, "step": 210430 }, { "epoch": 0.8135021879977115, "grad_norm": 0.10867653042078018, "learning_rate": 0.002, "loss": 2.3349, "step": 210440 }, { "epoch": 0.8135408452010948, "grad_norm": 0.09797938913106918, "learning_rate": 0.002, "loss": 2.3408, "step": 210450 }, { "epoch": 0.8135795024044781, "grad_norm": 0.10617242753505707, "learning_rate": 0.002, "loss": 2.3344, "step": 210460 }, { "epoch": 0.8136181596078613, "grad_norm": 0.11680958420038223, "learning_rate": 0.002, "loss": 2.3291, "step": 210470 }, { "epoch": 0.8136568168112446, "grad_norm": 0.10997258871793747, "learning_rate": 0.002, "loss": 2.3281, "step": 210480 }, { "epoch": 0.8136954740146279, "grad_norm": 0.10637106001377106, "learning_rate": 0.002, "loss": 2.3317, "step": 210490 }, { "epoch": 0.8137341312180112, "grad_norm": 0.0961093157529831, "learning_rate": 0.002, "loss": 2.3583, "step": 210500 }, { "epoch": 0.8137727884213944, "grad_norm": 0.1117602214217186, "learning_rate": 0.002, "loss": 2.34, "step": 210510 }, { "epoch": 0.8138114456247777, "grad_norm": 0.09345916658639908, "learning_rate": 0.002, "loss": 2.3467, "step": 210520 }, { "epoch": 0.813850102828161, "grad_norm": 0.12478433549404144, "learning_rate": 0.002, "loss": 2.3521, "step": 210530 }, { "epoch": 0.8138887600315443, "grad_norm": 0.09902498126029968, "learning_rate": 0.002, "loss": 2.3416, "step": 210540 }, { "epoch": 0.8139274172349276, "grad_norm": 0.12334750592708588, "learning_rate": 0.002, "loss": 2.3297, "step": 210550 }, { "epoch": 0.8139660744383108, "grad_norm": 0.11691374331712723, "learning_rate": 0.002, "loss": 2.3359, "step": 210560 }, { "epoch": 0.8140047316416941, "grad_norm": 0.1074489951133728, "learning_rate": 0.002, "loss": 2.3379, "step": 210570 }, { "epoch": 0.8140433888450774, "grad_norm": 0.1104162260890007, "learning_rate": 0.002, "loss": 2.3502, "step": 210580 }, { "epoch": 0.8140820460484607, "grad_norm": 0.09845101833343506, "learning_rate": 0.002, "loss": 2.3443, "step": 210590 }, { "epoch": 0.8141207032518439, "grad_norm": 0.10408642143011093, "learning_rate": 0.002, "loss": 2.3411, "step": 210600 }, { "epoch": 0.8141593604552272, "grad_norm": 0.1344076693058014, "learning_rate": 0.002, "loss": 2.3462, "step": 210610 }, { "epoch": 0.8141980176586106, "grad_norm": 0.09189781546592712, "learning_rate": 0.002, "loss": 2.3352, "step": 210620 }, { "epoch": 0.8142366748619938, "grad_norm": 0.09596944600343704, "learning_rate": 0.002, "loss": 2.3322, "step": 210630 }, { "epoch": 0.814275332065377, "grad_norm": 0.10636798292398453, "learning_rate": 0.002, "loss": 2.3409, "step": 210640 }, { "epoch": 0.8143139892687603, "grad_norm": 0.10187642276287079, "learning_rate": 0.002, "loss": 2.3299, "step": 210650 }, { "epoch": 0.8143526464721437, "grad_norm": 0.10037054866552353, "learning_rate": 0.002, "loss": 2.3425, "step": 210660 }, { "epoch": 0.8143913036755269, "grad_norm": 0.113653264939785, "learning_rate": 0.002, "loss": 2.3404, "step": 210670 }, { "epoch": 0.8144299608789102, "grad_norm": 0.1081879585981369, "learning_rate": 0.002, "loss": 2.334, "step": 210680 }, { "epoch": 0.8144686180822934, "grad_norm": 0.10552342981100082, "learning_rate": 0.002, "loss": 2.3567, "step": 210690 }, { "epoch": 0.8145072752856767, "grad_norm": 0.12662236392498016, "learning_rate": 0.002, "loss": 2.3489, "step": 210700 }, { "epoch": 0.81454593248906, "grad_norm": 0.10322510451078415, "learning_rate": 0.002, "loss": 2.3271, "step": 210710 }, { "epoch": 0.8145845896924433, "grad_norm": 0.10761570185422897, "learning_rate": 0.002, "loss": 2.3515, "step": 210720 }, { "epoch": 0.8146232468958265, "grad_norm": 0.09574377536773682, "learning_rate": 0.002, "loss": 2.3266, "step": 210730 }, { "epoch": 0.8146619040992098, "grad_norm": 0.10037858039140701, "learning_rate": 0.002, "loss": 2.3358, "step": 210740 }, { "epoch": 0.8147005613025932, "grad_norm": 0.13704144954681396, "learning_rate": 0.002, "loss": 2.3449, "step": 210750 }, { "epoch": 0.8147392185059764, "grad_norm": 0.09215878695249557, "learning_rate": 0.002, "loss": 2.3171, "step": 210760 }, { "epoch": 0.8147778757093597, "grad_norm": 0.09566718339920044, "learning_rate": 0.002, "loss": 2.3421, "step": 210770 }, { "epoch": 0.8148165329127429, "grad_norm": 0.1045469343662262, "learning_rate": 0.002, "loss": 2.3419, "step": 210780 }, { "epoch": 0.8148551901161263, "grad_norm": 0.11823946982622147, "learning_rate": 0.002, "loss": 2.3307, "step": 210790 }, { "epoch": 0.8148938473195095, "grad_norm": 0.10271833091974258, "learning_rate": 0.002, "loss": 2.3413, "step": 210800 }, { "epoch": 0.8149325045228928, "grad_norm": 0.09970005601644516, "learning_rate": 0.002, "loss": 2.3438, "step": 210810 }, { "epoch": 0.814971161726276, "grad_norm": 0.10856861621141434, "learning_rate": 0.002, "loss": 2.3598, "step": 210820 }, { "epoch": 0.8150098189296594, "grad_norm": 0.10430700331926346, "learning_rate": 0.002, "loss": 2.3411, "step": 210830 }, { "epoch": 0.8150484761330427, "grad_norm": 0.11050914973020554, "learning_rate": 0.002, "loss": 2.3521, "step": 210840 }, { "epoch": 0.8150871333364259, "grad_norm": 0.11571375280618668, "learning_rate": 0.002, "loss": 2.337, "step": 210850 }, { "epoch": 0.8151257905398092, "grad_norm": 0.1321517676115036, "learning_rate": 0.002, "loss": 2.3321, "step": 210860 }, { "epoch": 0.8151644477431925, "grad_norm": 0.10609392821788788, "learning_rate": 0.002, "loss": 2.3392, "step": 210870 }, { "epoch": 0.8152031049465758, "grad_norm": 0.111605204641819, "learning_rate": 0.002, "loss": 2.3449, "step": 210880 }, { "epoch": 0.815241762149959, "grad_norm": 0.10075222700834274, "learning_rate": 0.002, "loss": 2.3382, "step": 210890 }, { "epoch": 0.8152804193533423, "grad_norm": 0.11976370215415955, "learning_rate": 0.002, "loss": 2.3415, "step": 210900 }, { "epoch": 0.8153190765567255, "grad_norm": 0.09836943447589874, "learning_rate": 0.002, "loss": 2.3479, "step": 210910 }, { "epoch": 0.8153577337601089, "grad_norm": 0.0988221988081932, "learning_rate": 0.002, "loss": 2.331, "step": 210920 }, { "epoch": 0.8153963909634921, "grad_norm": 0.11077386885881424, "learning_rate": 0.002, "loss": 2.3369, "step": 210930 }, { "epoch": 0.8154350481668754, "grad_norm": 0.10229333490133286, "learning_rate": 0.002, "loss": 2.3269, "step": 210940 }, { "epoch": 0.8154737053702587, "grad_norm": 0.10310441255569458, "learning_rate": 0.002, "loss": 2.3303, "step": 210950 }, { "epoch": 0.815512362573642, "grad_norm": 0.10184182971715927, "learning_rate": 0.002, "loss": 2.3504, "step": 210960 }, { "epoch": 0.8155510197770253, "grad_norm": 0.08794326335191727, "learning_rate": 0.002, "loss": 2.318, "step": 210970 }, { "epoch": 0.8155896769804085, "grad_norm": 0.09792374819517136, "learning_rate": 0.002, "loss": 2.3365, "step": 210980 }, { "epoch": 0.8156283341837918, "grad_norm": 0.11295660585165024, "learning_rate": 0.002, "loss": 2.3327, "step": 210990 }, { "epoch": 0.8156669913871751, "grad_norm": 0.09905468672513962, "learning_rate": 0.002, "loss": 2.3429, "step": 211000 }, { "epoch": 0.8157056485905584, "grad_norm": 0.09002207964658737, "learning_rate": 0.002, "loss": 2.3352, "step": 211010 }, { "epoch": 0.8157443057939416, "grad_norm": 0.10204461216926575, "learning_rate": 0.002, "loss": 2.3249, "step": 211020 }, { "epoch": 0.8157829629973249, "grad_norm": 0.10997717827558517, "learning_rate": 0.002, "loss": 2.3281, "step": 211030 }, { "epoch": 0.8158216202007083, "grad_norm": 0.09657556563615799, "learning_rate": 0.002, "loss": 2.3295, "step": 211040 }, { "epoch": 0.8158602774040915, "grad_norm": 0.1114683449268341, "learning_rate": 0.002, "loss": 2.3265, "step": 211050 }, { "epoch": 0.8158989346074748, "grad_norm": 0.08971460908651352, "learning_rate": 0.002, "loss": 2.3392, "step": 211060 }, { "epoch": 0.815937591810858, "grad_norm": 0.1059296503663063, "learning_rate": 0.002, "loss": 2.3417, "step": 211070 }, { "epoch": 0.8159762490142413, "grad_norm": 0.09650532901287079, "learning_rate": 0.002, "loss": 2.3477, "step": 211080 }, { "epoch": 0.8160149062176246, "grad_norm": 0.11085394024848938, "learning_rate": 0.002, "loss": 2.3597, "step": 211090 }, { "epoch": 0.8160535634210079, "grad_norm": 0.08948436379432678, "learning_rate": 0.002, "loss": 2.3351, "step": 211100 }, { "epoch": 0.8160922206243911, "grad_norm": 0.11666127294301987, "learning_rate": 0.002, "loss": 2.3273, "step": 211110 }, { "epoch": 0.8161308778277744, "grad_norm": 0.1133730337023735, "learning_rate": 0.002, "loss": 2.3578, "step": 211120 }, { "epoch": 0.8161695350311577, "grad_norm": 0.09404782205820084, "learning_rate": 0.002, "loss": 2.3478, "step": 211130 }, { "epoch": 0.816208192234541, "grad_norm": 0.09028097242116928, "learning_rate": 0.002, "loss": 2.3403, "step": 211140 }, { "epoch": 0.8162468494379242, "grad_norm": 0.09908846765756607, "learning_rate": 0.002, "loss": 2.3466, "step": 211150 }, { "epoch": 0.8162855066413075, "grad_norm": 0.09854086488485336, "learning_rate": 0.002, "loss": 2.3332, "step": 211160 }, { "epoch": 0.8163241638446909, "grad_norm": 0.09441866725683212, "learning_rate": 0.002, "loss": 2.3411, "step": 211170 }, { "epoch": 0.8163628210480741, "grad_norm": 0.11249484866857529, "learning_rate": 0.002, "loss": 2.3499, "step": 211180 }, { "epoch": 0.8164014782514574, "grad_norm": 0.09532604366540909, "learning_rate": 0.002, "loss": 2.3304, "step": 211190 }, { "epoch": 0.8164401354548406, "grad_norm": 0.08785288035869598, "learning_rate": 0.002, "loss": 2.3211, "step": 211200 }, { "epoch": 0.816478792658224, "grad_norm": 0.10407032072544098, "learning_rate": 0.002, "loss": 2.3523, "step": 211210 }, { "epoch": 0.8165174498616072, "grad_norm": 0.1006706953048706, "learning_rate": 0.002, "loss": 2.3438, "step": 211220 }, { "epoch": 0.8165561070649905, "grad_norm": 0.09381138533353806, "learning_rate": 0.002, "loss": 2.3365, "step": 211230 }, { "epoch": 0.8165947642683737, "grad_norm": 0.09494668990373611, "learning_rate": 0.002, "loss": 2.3347, "step": 211240 }, { "epoch": 0.816633421471757, "grad_norm": 0.10869157314300537, "learning_rate": 0.002, "loss": 2.3363, "step": 211250 }, { "epoch": 0.8166720786751404, "grad_norm": 0.09605544805526733, "learning_rate": 0.002, "loss": 2.3409, "step": 211260 }, { "epoch": 0.8167107358785236, "grad_norm": 0.1651240438222885, "learning_rate": 0.002, "loss": 2.3418, "step": 211270 }, { "epoch": 0.8167493930819069, "grad_norm": 0.11022128909826279, "learning_rate": 0.002, "loss": 2.3273, "step": 211280 }, { "epoch": 0.8167880502852901, "grad_norm": 0.11367551237344742, "learning_rate": 0.002, "loss": 2.3341, "step": 211290 }, { "epoch": 0.8168267074886735, "grad_norm": 0.09908533096313477, "learning_rate": 0.002, "loss": 2.3408, "step": 211300 }, { "epoch": 0.8168653646920567, "grad_norm": 0.15225879848003387, "learning_rate": 0.002, "loss": 2.3581, "step": 211310 }, { "epoch": 0.81690402189544, "grad_norm": 0.09795329719781876, "learning_rate": 0.002, "loss": 2.3383, "step": 211320 }, { "epoch": 0.8169426790988232, "grad_norm": 0.11668098717927933, "learning_rate": 0.002, "loss": 2.3441, "step": 211330 }, { "epoch": 0.8169813363022066, "grad_norm": 0.19748632609844208, "learning_rate": 0.002, "loss": 2.3433, "step": 211340 }, { "epoch": 0.8170199935055898, "grad_norm": 0.11172216385602951, "learning_rate": 0.002, "loss": 2.3678, "step": 211350 }, { "epoch": 0.8170586507089731, "grad_norm": 0.10102499276399612, "learning_rate": 0.002, "loss": 2.328, "step": 211360 }, { "epoch": 0.8170973079123564, "grad_norm": 0.1347329169511795, "learning_rate": 0.002, "loss": 2.3383, "step": 211370 }, { "epoch": 0.8171359651157397, "grad_norm": 0.10373783856630325, "learning_rate": 0.002, "loss": 2.3387, "step": 211380 }, { "epoch": 0.817174622319123, "grad_norm": 0.10776378959417343, "learning_rate": 0.002, "loss": 2.3486, "step": 211390 }, { "epoch": 0.8172132795225062, "grad_norm": 0.09387657791376114, "learning_rate": 0.002, "loss": 2.3324, "step": 211400 }, { "epoch": 0.8172519367258895, "grad_norm": 0.11693062633275986, "learning_rate": 0.002, "loss": 2.3498, "step": 211410 }, { "epoch": 0.8172905939292728, "grad_norm": 0.10187207162380219, "learning_rate": 0.002, "loss": 2.3514, "step": 211420 }, { "epoch": 0.8173292511326561, "grad_norm": 0.10459417849779129, "learning_rate": 0.002, "loss": 2.3454, "step": 211430 }, { "epoch": 0.8173679083360393, "grad_norm": 0.11918050795793533, "learning_rate": 0.002, "loss": 2.3414, "step": 211440 }, { "epoch": 0.8174065655394226, "grad_norm": 0.11814329773187637, "learning_rate": 0.002, "loss": 2.3509, "step": 211450 }, { "epoch": 0.8174452227428058, "grad_norm": 0.08993051201105118, "learning_rate": 0.002, "loss": 2.3205, "step": 211460 }, { "epoch": 0.8174838799461892, "grad_norm": 0.10689457505941391, "learning_rate": 0.002, "loss": 2.3379, "step": 211470 }, { "epoch": 0.8175225371495725, "grad_norm": 0.10431385785341263, "learning_rate": 0.002, "loss": 2.3226, "step": 211480 }, { "epoch": 0.8175611943529557, "grad_norm": 0.09869911521673203, "learning_rate": 0.002, "loss": 2.3492, "step": 211490 }, { "epoch": 0.817599851556339, "grad_norm": 0.09722461551427841, "learning_rate": 0.002, "loss": 2.3382, "step": 211500 }, { "epoch": 0.8176385087597223, "grad_norm": 0.095127172768116, "learning_rate": 0.002, "loss": 2.3357, "step": 211510 }, { "epoch": 0.8176771659631056, "grad_norm": 0.10289538651704788, "learning_rate": 0.002, "loss": 2.3385, "step": 211520 }, { "epoch": 0.8177158231664888, "grad_norm": 0.09813522547483444, "learning_rate": 0.002, "loss": 2.3498, "step": 211530 }, { "epoch": 0.8177544803698721, "grad_norm": 0.12998157739639282, "learning_rate": 0.002, "loss": 2.3344, "step": 211540 }, { "epoch": 0.8177931375732554, "grad_norm": 0.14629608392715454, "learning_rate": 0.002, "loss": 2.3402, "step": 211550 }, { "epoch": 0.8178317947766387, "grad_norm": 0.09135743230581284, "learning_rate": 0.002, "loss": 2.3402, "step": 211560 }, { "epoch": 0.817870451980022, "grad_norm": 0.10678249597549438, "learning_rate": 0.002, "loss": 2.3428, "step": 211570 }, { "epoch": 0.8179091091834052, "grad_norm": 0.12359215319156647, "learning_rate": 0.002, "loss": 2.3298, "step": 211580 }, { "epoch": 0.8179477663867886, "grad_norm": 0.1174280121922493, "learning_rate": 0.002, "loss": 2.3384, "step": 211590 }, { "epoch": 0.8179864235901718, "grad_norm": 0.12401141971349716, "learning_rate": 0.002, "loss": 2.3302, "step": 211600 }, { "epoch": 0.8180250807935551, "grad_norm": 0.09652134776115417, "learning_rate": 0.002, "loss": 2.3449, "step": 211610 }, { "epoch": 0.8180637379969383, "grad_norm": 0.1306145042181015, "learning_rate": 0.002, "loss": 2.3502, "step": 211620 }, { "epoch": 0.8181023952003216, "grad_norm": 0.13372282683849335, "learning_rate": 0.002, "loss": 2.3239, "step": 211630 }, { "epoch": 0.8181410524037049, "grad_norm": 0.13966284692287445, "learning_rate": 0.002, "loss": 2.3353, "step": 211640 }, { "epoch": 0.8181797096070882, "grad_norm": 0.09505265206098557, "learning_rate": 0.002, "loss": 2.3431, "step": 211650 }, { "epoch": 0.8182183668104714, "grad_norm": 0.0904129296541214, "learning_rate": 0.002, "loss": 2.3546, "step": 211660 }, { "epoch": 0.8182570240138547, "grad_norm": 0.09401164203882217, "learning_rate": 0.002, "loss": 2.3538, "step": 211670 }, { "epoch": 0.8182956812172381, "grad_norm": 0.13849051296710968, "learning_rate": 0.002, "loss": 2.3366, "step": 211680 }, { "epoch": 0.8183343384206213, "grad_norm": 0.10335908085107803, "learning_rate": 0.002, "loss": 2.3376, "step": 211690 }, { "epoch": 0.8183729956240046, "grad_norm": 0.10235551744699478, "learning_rate": 0.002, "loss": 2.3394, "step": 211700 }, { "epoch": 0.8184116528273878, "grad_norm": 0.12333092093467712, "learning_rate": 0.002, "loss": 2.3512, "step": 211710 }, { "epoch": 0.8184503100307712, "grad_norm": 0.09448209404945374, "learning_rate": 0.002, "loss": 2.3223, "step": 211720 }, { "epoch": 0.8184889672341544, "grad_norm": 0.12399522960186005, "learning_rate": 0.002, "loss": 2.3529, "step": 211730 }, { "epoch": 0.8185276244375377, "grad_norm": 0.0968480035662651, "learning_rate": 0.002, "loss": 2.3379, "step": 211740 }, { "epoch": 0.8185662816409209, "grad_norm": 0.10621775686740875, "learning_rate": 0.002, "loss": 2.3203, "step": 211750 }, { "epoch": 0.8186049388443043, "grad_norm": 0.11033473163843155, "learning_rate": 0.002, "loss": 2.3463, "step": 211760 }, { "epoch": 0.8186435960476875, "grad_norm": 0.10795174539089203, "learning_rate": 0.002, "loss": 2.3378, "step": 211770 }, { "epoch": 0.8186822532510708, "grad_norm": 0.13240036368370056, "learning_rate": 0.002, "loss": 2.3278, "step": 211780 }, { "epoch": 0.818720910454454, "grad_norm": 0.09353047609329224, "learning_rate": 0.002, "loss": 2.3576, "step": 211790 }, { "epoch": 0.8187595676578373, "grad_norm": 0.09700329601764679, "learning_rate": 0.002, "loss": 2.3496, "step": 211800 }, { "epoch": 0.8187982248612207, "grad_norm": 0.4038792550563812, "learning_rate": 0.002, "loss": 2.3387, "step": 211810 }, { "epoch": 0.8188368820646039, "grad_norm": 0.11665640771389008, "learning_rate": 0.002, "loss": 2.3318, "step": 211820 }, { "epoch": 0.8188755392679872, "grad_norm": 0.08964387327432632, "learning_rate": 0.002, "loss": 2.3317, "step": 211830 }, { "epoch": 0.8189141964713704, "grad_norm": 0.18397657573223114, "learning_rate": 0.002, "loss": 2.3377, "step": 211840 }, { "epoch": 0.8189528536747538, "grad_norm": 0.13267698884010315, "learning_rate": 0.002, "loss": 2.3489, "step": 211850 }, { "epoch": 0.818991510878137, "grad_norm": 0.10803057998418808, "learning_rate": 0.002, "loss": 2.3504, "step": 211860 }, { "epoch": 0.8190301680815203, "grad_norm": 0.1194060891866684, "learning_rate": 0.002, "loss": 2.3491, "step": 211870 }, { "epoch": 0.8190688252849035, "grad_norm": 0.11083207279443741, "learning_rate": 0.002, "loss": 2.357, "step": 211880 }, { "epoch": 0.8191074824882869, "grad_norm": 0.10171517729759216, "learning_rate": 0.002, "loss": 2.3419, "step": 211890 }, { "epoch": 0.8191461396916702, "grad_norm": 0.11016334593296051, "learning_rate": 0.002, "loss": 2.3363, "step": 211900 }, { "epoch": 0.8191847968950534, "grad_norm": 0.11723824590444565, "learning_rate": 0.002, "loss": 2.3361, "step": 211910 }, { "epoch": 0.8192234540984367, "grad_norm": 0.11024526506662369, "learning_rate": 0.002, "loss": 2.3486, "step": 211920 }, { "epoch": 0.81926211130182, "grad_norm": 0.10914993286132812, "learning_rate": 0.002, "loss": 2.3493, "step": 211930 }, { "epoch": 0.8193007685052033, "grad_norm": 0.1024850457906723, "learning_rate": 0.002, "loss": 2.3435, "step": 211940 }, { "epoch": 0.8193394257085865, "grad_norm": 0.09786119312047958, "learning_rate": 0.002, "loss": 2.3348, "step": 211950 }, { "epoch": 0.8193780829119698, "grad_norm": 0.08920815587043762, "learning_rate": 0.002, "loss": 2.3419, "step": 211960 }, { "epoch": 0.8194167401153531, "grad_norm": 0.11163760721683502, "learning_rate": 0.002, "loss": 2.3365, "step": 211970 }, { "epoch": 0.8194553973187364, "grad_norm": 0.10414455085992813, "learning_rate": 0.002, "loss": 2.3341, "step": 211980 }, { "epoch": 0.8194940545221197, "grad_norm": 0.10462162643671036, "learning_rate": 0.002, "loss": 2.3306, "step": 211990 }, { "epoch": 0.8195327117255029, "grad_norm": 0.11228974163532257, "learning_rate": 0.002, "loss": 2.3456, "step": 212000 }, { "epoch": 0.8195713689288862, "grad_norm": 0.10974586009979248, "learning_rate": 0.002, "loss": 2.3433, "step": 212010 }, { "epoch": 0.8196100261322695, "grad_norm": 0.10059110820293427, "learning_rate": 0.002, "loss": 2.3193, "step": 212020 }, { "epoch": 0.8196486833356528, "grad_norm": 0.3894408345222473, "learning_rate": 0.002, "loss": 2.338, "step": 212030 }, { "epoch": 0.819687340539036, "grad_norm": 0.09322717040777206, "learning_rate": 0.002, "loss": 2.3416, "step": 212040 }, { "epoch": 0.8197259977424193, "grad_norm": 0.10623813420534134, "learning_rate": 0.002, "loss": 2.3468, "step": 212050 }, { "epoch": 0.8197646549458026, "grad_norm": 0.09529799222946167, "learning_rate": 0.002, "loss": 2.3314, "step": 212060 }, { "epoch": 0.8198033121491859, "grad_norm": 0.117469422519207, "learning_rate": 0.002, "loss": 2.3435, "step": 212070 }, { "epoch": 0.8198419693525691, "grad_norm": 0.09921499341726303, "learning_rate": 0.002, "loss": 2.3481, "step": 212080 }, { "epoch": 0.8198806265559524, "grad_norm": 0.09404002875089645, "learning_rate": 0.002, "loss": 2.3605, "step": 212090 }, { "epoch": 0.8199192837593358, "grad_norm": 0.11683033406734467, "learning_rate": 0.002, "loss": 2.3582, "step": 212100 }, { "epoch": 0.819957940962719, "grad_norm": 0.13784067332744598, "learning_rate": 0.002, "loss": 2.3272, "step": 212110 }, { "epoch": 0.8199965981661023, "grad_norm": 0.11979611963033676, "learning_rate": 0.002, "loss": 2.3425, "step": 212120 }, { "epoch": 0.8200352553694855, "grad_norm": 0.09688198566436768, "learning_rate": 0.002, "loss": 2.3352, "step": 212130 }, { "epoch": 0.8200739125728689, "grad_norm": 0.10285995155572891, "learning_rate": 0.002, "loss": 2.3391, "step": 212140 }, { "epoch": 0.8201125697762521, "grad_norm": 0.11738825589418411, "learning_rate": 0.002, "loss": 2.3412, "step": 212150 }, { "epoch": 0.8201512269796354, "grad_norm": 0.10605660825967789, "learning_rate": 0.002, "loss": 2.3553, "step": 212160 }, { "epoch": 0.8201898841830186, "grad_norm": 0.09839721024036407, "learning_rate": 0.002, "loss": 2.3343, "step": 212170 }, { "epoch": 0.8202285413864019, "grad_norm": 0.10906285047531128, "learning_rate": 0.002, "loss": 2.3394, "step": 212180 }, { "epoch": 0.8202671985897853, "grad_norm": 0.09535101801156998, "learning_rate": 0.002, "loss": 2.3403, "step": 212190 }, { "epoch": 0.8203058557931685, "grad_norm": 0.13287539780139923, "learning_rate": 0.002, "loss": 2.3487, "step": 212200 }, { "epoch": 0.8203445129965518, "grad_norm": 0.09444214403629303, "learning_rate": 0.002, "loss": 2.3338, "step": 212210 }, { "epoch": 0.820383170199935, "grad_norm": 0.09353778511285782, "learning_rate": 0.002, "loss": 2.3277, "step": 212220 }, { "epoch": 0.8204218274033184, "grad_norm": 0.1208491399884224, "learning_rate": 0.002, "loss": 2.336, "step": 212230 }, { "epoch": 0.8204604846067016, "grad_norm": 0.09998615086078644, "learning_rate": 0.002, "loss": 2.3487, "step": 212240 }, { "epoch": 0.8204991418100849, "grad_norm": 0.11312830448150635, "learning_rate": 0.002, "loss": 2.3431, "step": 212250 }, { "epoch": 0.8205377990134681, "grad_norm": 0.09280294924974442, "learning_rate": 0.002, "loss": 2.3543, "step": 212260 }, { "epoch": 0.8205764562168515, "grad_norm": 0.1156095340847969, "learning_rate": 0.002, "loss": 2.3464, "step": 212270 }, { "epoch": 0.8206151134202347, "grad_norm": 0.1221746876835823, "learning_rate": 0.002, "loss": 2.3412, "step": 212280 }, { "epoch": 0.820653770623618, "grad_norm": 0.10551083832979202, "learning_rate": 0.002, "loss": 2.3315, "step": 212290 }, { "epoch": 0.8206924278270012, "grad_norm": 0.1070566326379776, "learning_rate": 0.002, "loss": 2.3473, "step": 212300 }, { "epoch": 0.8207310850303846, "grad_norm": 0.1362307369709015, "learning_rate": 0.002, "loss": 2.3393, "step": 212310 }, { "epoch": 0.8207697422337679, "grad_norm": 0.10047736763954163, "learning_rate": 0.002, "loss": 2.3434, "step": 212320 }, { "epoch": 0.8208083994371511, "grad_norm": 0.10491714626550674, "learning_rate": 0.002, "loss": 2.3515, "step": 212330 }, { "epoch": 0.8208470566405344, "grad_norm": 0.1041213795542717, "learning_rate": 0.002, "loss": 2.3419, "step": 212340 }, { "epoch": 0.8208857138439177, "grad_norm": 0.11715826392173767, "learning_rate": 0.002, "loss": 2.3254, "step": 212350 }, { "epoch": 0.820924371047301, "grad_norm": 0.09646627306938171, "learning_rate": 0.002, "loss": 2.3339, "step": 212360 }, { "epoch": 0.8209630282506842, "grad_norm": 0.1179962232708931, "learning_rate": 0.002, "loss": 2.3418, "step": 212370 }, { "epoch": 0.8210016854540675, "grad_norm": 0.1079191043972969, "learning_rate": 0.002, "loss": 2.3358, "step": 212380 }, { "epoch": 0.8210403426574507, "grad_norm": 0.09939951449632645, "learning_rate": 0.002, "loss": 2.3453, "step": 212390 }, { "epoch": 0.8210789998608341, "grad_norm": 0.11012696474790573, "learning_rate": 0.002, "loss": 2.3368, "step": 212400 }, { "epoch": 0.8211176570642174, "grad_norm": 0.10655669867992401, "learning_rate": 0.002, "loss": 2.3364, "step": 212410 }, { "epoch": 0.8211563142676006, "grad_norm": 0.11589882522821426, "learning_rate": 0.002, "loss": 2.336, "step": 212420 }, { "epoch": 0.8211949714709839, "grad_norm": 0.10871432721614838, "learning_rate": 0.002, "loss": 2.3438, "step": 212430 }, { "epoch": 0.8212336286743672, "grad_norm": 0.12031927704811096, "learning_rate": 0.002, "loss": 2.3355, "step": 212440 }, { "epoch": 0.8212722858777505, "grad_norm": 0.10812465101480484, "learning_rate": 0.002, "loss": 2.3494, "step": 212450 }, { "epoch": 0.8213109430811337, "grad_norm": 0.10944726318120956, "learning_rate": 0.002, "loss": 2.3442, "step": 212460 }, { "epoch": 0.821349600284517, "grad_norm": 0.10045637935400009, "learning_rate": 0.002, "loss": 2.3285, "step": 212470 }, { "epoch": 0.8213882574879003, "grad_norm": 0.10013867914676666, "learning_rate": 0.002, "loss": 2.3456, "step": 212480 }, { "epoch": 0.8214269146912836, "grad_norm": 0.12191098183393478, "learning_rate": 0.002, "loss": 2.3394, "step": 212490 }, { "epoch": 0.8214655718946668, "grad_norm": 0.11149000376462936, "learning_rate": 0.002, "loss": 2.3541, "step": 212500 }, { "epoch": 0.8215042290980501, "grad_norm": 0.10808087885379791, "learning_rate": 0.002, "loss": 2.3322, "step": 212510 }, { "epoch": 0.8215428863014335, "grad_norm": 0.09506329149007797, "learning_rate": 0.002, "loss": 2.3283, "step": 212520 }, { "epoch": 0.8215815435048167, "grad_norm": 0.10009314119815826, "learning_rate": 0.002, "loss": 2.336, "step": 212530 }, { "epoch": 0.8216202007082, "grad_norm": 0.1179816722869873, "learning_rate": 0.002, "loss": 2.3443, "step": 212540 }, { "epoch": 0.8216588579115832, "grad_norm": 0.10906680673360825, "learning_rate": 0.002, "loss": 2.3503, "step": 212550 }, { "epoch": 0.8216975151149665, "grad_norm": 0.10762029141187668, "learning_rate": 0.002, "loss": 2.3461, "step": 212560 }, { "epoch": 0.8217361723183498, "grad_norm": 0.10383537411689758, "learning_rate": 0.002, "loss": 2.3383, "step": 212570 }, { "epoch": 0.8217748295217331, "grad_norm": 0.09981483221054077, "learning_rate": 0.002, "loss": 2.3422, "step": 212580 }, { "epoch": 0.8218134867251163, "grad_norm": 0.1330537050962448, "learning_rate": 0.002, "loss": 2.3437, "step": 212590 }, { "epoch": 0.8218521439284996, "grad_norm": 0.09957239776849747, "learning_rate": 0.002, "loss": 2.3428, "step": 212600 }, { "epoch": 0.821890801131883, "grad_norm": 0.09773680567741394, "learning_rate": 0.002, "loss": 2.3547, "step": 212610 }, { "epoch": 0.8219294583352662, "grad_norm": 0.1022195816040039, "learning_rate": 0.002, "loss": 2.3392, "step": 212620 }, { "epoch": 0.8219681155386495, "grad_norm": 0.11773774772882462, "learning_rate": 0.002, "loss": 2.3311, "step": 212630 }, { "epoch": 0.8220067727420327, "grad_norm": 0.12010695040225983, "learning_rate": 0.002, "loss": 2.3553, "step": 212640 }, { "epoch": 0.8220454299454161, "grad_norm": 0.10011716187000275, "learning_rate": 0.002, "loss": 2.3446, "step": 212650 }, { "epoch": 0.8220840871487993, "grad_norm": 0.0979095920920372, "learning_rate": 0.002, "loss": 2.3422, "step": 212660 }, { "epoch": 0.8221227443521826, "grad_norm": 0.10783082991838455, "learning_rate": 0.002, "loss": 2.3384, "step": 212670 }, { "epoch": 0.8221614015555658, "grad_norm": 0.10398319363594055, "learning_rate": 0.002, "loss": 2.348, "step": 212680 }, { "epoch": 0.8222000587589492, "grad_norm": 0.09824015945196152, "learning_rate": 0.002, "loss": 2.33, "step": 212690 }, { "epoch": 0.8222387159623324, "grad_norm": 0.09613677859306335, "learning_rate": 0.002, "loss": 2.3333, "step": 212700 }, { "epoch": 0.8222773731657157, "grad_norm": 0.1030166894197464, "learning_rate": 0.002, "loss": 2.3352, "step": 212710 }, { "epoch": 0.822316030369099, "grad_norm": 0.09595217555761337, "learning_rate": 0.002, "loss": 2.3338, "step": 212720 }, { "epoch": 0.8223546875724822, "grad_norm": 0.09824813902378082, "learning_rate": 0.002, "loss": 2.3413, "step": 212730 }, { "epoch": 0.8223933447758656, "grad_norm": 0.09953426569700241, "learning_rate": 0.002, "loss": 2.3466, "step": 212740 }, { "epoch": 0.8224320019792488, "grad_norm": 0.10548051446676254, "learning_rate": 0.002, "loss": 2.3482, "step": 212750 }, { "epoch": 0.8224706591826321, "grad_norm": 0.11247394979000092, "learning_rate": 0.002, "loss": 2.328, "step": 212760 }, { "epoch": 0.8225093163860153, "grad_norm": 0.11061998456716537, "learning_rate": 0.002, "loss": 2.3346, "step": 212770 }, { "epoch": 0.8225479735893987, "grad_norm": 0.11678779870271683, "learning_rate": 0.002, "loss": 2.3513, "step": 212780 }, { "epoch": 0.8225866307927819, "grad_norm": 0.10830101370811462, "learning_rate": 0.002, "loss": 2.3555, "step": 212790 }, { "epoch": 0.8226252879961652, "grad_norm": 0.08461033552885056, "learning_rate": 0.002, "loss": 2.3195, "step": 212800 }, { "epoch": 0.8226639451995484, "grad_norm": 0.10060620307922363, "learning_rate": 0.002, "loss": 2.3375, "step": 212810 }, { "epoch": 0.8227026024029318, "grad_norm": 0.11858765780925751, "learning_rate": 0.002, "loss": 2.3359, "step": 212820 }, { "epoch": 0.8227412596063151, "grad_norm": 0.11985529214143753, "learning_rate": 0.002, "loss": 2.3403, "step": 212830 }, { "epoch": 0.8227799168096983, "grad_norm": 0.11134400218725204, "learning_rate": 0.002, "loss": 2.3383, "step": 212840 }, { "epoch": 0.8228185740130816, "grad_norm": 0.11833249032497406, "learning_rate": 0.002, "loss": 2.3468, "step": 212850 }, { "epoch": 0.8228572312164649, "grad_norm": 0.11237727850675583, "learning_rate": 0.002, "loss": 2.3433, "step": 212860 }, { "epoch": 0.8228958884198482, "grad_norm": 0.12930941581726074, "learning_rate": 0.002, "loss": 2.3265, "step": 212870 }, { "epoch": 0.8229345456232314, "grad_norm": 0.11326020210981369, "learning_rate": 0.002, "loss": 2.3326, "step": 212880 }, { "epoch": 0.8229732028266147, "grad_norm": 0.10139208287000656, "learning_rate": 0.002, "loss": 2.343, "step": 212890 }, { "epoch": 0.823011860029998, "grad_norm": 0.11200857162475586, "learning_rate": 0.002, "loss": 2.3315, "step": 212900 }, { "epoch": 0.8230505172333813, "grad_norm": 0.15013805031776428, "learning_rate": 0.002, "loss": 2.3392, "step": 212910 }, { "epoch": 0.8230891744367645, "grad_norm": 0.10213683545589447, "learning_rate": 0.002, "loss": 2.3341, "step": 212920 }, { "epoch": 0.8231278316401478, "grad_norm": 0.1358550786972046, "learning_rate": 0.002, "loss": 2.3238, "step": 212930 }, { "epoch": 0.823166488843531, "grad_norm": 0.09838774055242538, "learning_rate": 0.002, "loss": 2.3361, "step": 212940 }, { "epoch": 0.8232051460469144, "grad_norm": 0.10487181693315506, "learning_rate": 0.002, "loss": 2.347, "step": 212950 }, { "epoch": 0.8232438032502977, "grad_norm": 0.10073231905698776, "learning_rate": 0.002, "loss": 2.3237, "step": 212960 }, { "epoch": 0.8232824604536809, "grad_norm": 0.11269784718751907, "learning_rate": 0.002, "loss": 2.3318, "step": 212970 }, { "epoch": 0.8233211176570642, "grad_norm": 0.10114607959985733, "learning_rate": 0.002, "loss": 2.3477, "step": 212980 }, { "epoch": 0.8233597748604475, "grad_norm": 0.10706079751253128, "learning_rate": 0.002, "loss": 2.3279, "step": 212990 }, { "epoch": 0.8233984320638308, "grad_norm": 0.10866525024175644, "learning_rate": 0.002, "loss": 2.3539, "step": 213000 }, { "epoch": 0.823437089267214, "grad_norm": 0.1156979352235794, "learning_rate": 0.002, "loss": 2.3432, "step": 213010 }, { "epoch": 0.8234757464705973, "grad_norm": 0.09846003353595734, "learning_rate": 0.002, "loss": 2.3635, "step": 213020 }, { "epoch": 0.8235144036739807, "grad_norm": 0.13185329735279083, "learning_rate": 0.002, "loss": 2.3265, "step": 213030 }, { "epoch": 0.8235530608773639, "grad_norm": 0.10501515120267868, "learning_rate": 0.002, "loss": 2.3355, "step": 213040 }, { "epoch": 0.8235917180807472, "grad_norm": 0.10742828249931335, "learning_rate": 0.002, "loss": 2.3552, "step": 213050 }, { "epoch": 0.8236303752841304, "grad_norm": 0.10099185258150101, "learning_rate": 0.002, "loss": 2.3382, "step": 213060 }, { "epoch": 0.8236690324875138, "grad_norm": 0.1175713986158371, "learning_rate": 0.002, "loss": 2.3393, "step": 213070 }, { "epoch": 0.823707689690897, "grad_norm": 0.11294372379779816, "learning_rate": 0.002, "loss": 2.3397, "step": 213080 }, { "epoch": 0.8237463468942803, "grad_norm": 0.11598861962556839, "learning_rate": 0.002, "loss": 2.3242, "step": 213090 }, { "epoch": 0.8237850040976635, "grad_norm": 0.10174839943647385, "learning_rate": 0.002, "loss": 2.3366, "step": 213100 }, { "epoch": 0.8238236613010468, "grad_norm": 0.13224554061889648, "learning_rate": 0.002, "loss": 2.349, "step": 213110 }, { "epoch": 0.8238623185044301, "grad_norm": 0.10385262966156006, "learning_rate": 0.002, "loss": 2.331, "step": 213120 }, { "epoch": 0.8239009757078134, "grad_norm": 0.11136598885059357, "learning_rate": 0.002, "loss": 2.3435, "step": 213130 }, { "epoch": 0.8239396329111967, "grad_norm": 0.10111375153064728, "learning_rate": 0.002, "loss": 2.3463, "step": 213140 }, { "epoch": 0.8239782901145799, "grad_norm": 0.19907134771347046, "learning_rate": 0.002, "loss": 2.3361, "step": 213150 }, { "epoch": 0.8240169473179633, "grad_norm": 0.10010344535112381, "learning_rate": 0.002, "loss": 2.3446, "step": 213160 }, { "epoch": 0.8240556045213465, "grad_norm": 0.10503219068050385, "learning_rate": 0.002, "loss": 2.3355, "step": 213170 }, { "epoch": 0.8240942617247298, "grad_norm": 0.09305467456579208, "learning_rate": 0.002, "loss": 2.3445, "step": 213180 }, { "epoch": 0.824132918928113, "grad_norm": 0.11915779858827591, "learning_rate": 0.002, "loss": 2.3365, "step": 213190 }, { "epoch": 0.8241715761314964, "grad_norm": 0.10269779711961746, "learning_rate": 0.002, "loss": 2.3386, "step": 213200 }, { "epoch": 0.8242102333348796, "grad_norm": 0.09319688379764557, "learning_rate": 0.002, "loss": 2.3382, "step": 213210 }, { "epoch": 0.8242488905382629, "grad_norm": 0.11573675274848938, "learning_rate": 0.002, "loss": 2.3336, "step": 213220 }, { "epoch": 0.8242875477416461, "grad_norm": 0.11789800226688385, "learning_rate": 0.002, "loss": 2.3393, "step": 213230 }, { "epoch": 0.8243262049450295, "grad_norm": 0.10294772684574127, "learning_rate": 0.002, "loss": 2.3412, "step": 213240 }, { "epoch": 0.8243648621484128, "grad_norm": 0.11846683919429779, "learning_rate": 0.002, "loss": 2.3295, "step": 213250 }, { "epoch": 0.824403519351796, "grad_norm": 0.11015811562538147, "learning_rate": 0.002, "loss": 2.353, "step": 213260 }, { "epoch": 0.8244421765551793, "grad_norm": 0.11703477054834366, "learning_rate": 0.002, "loss": 2.3485, "step": 213270 }, { "epoch": 0.8244808337585626, "grad_norm": 0.1198219358921051, "learning_rate": 0.002, "loss": 2.3431, "step": 213280 }, { "epoch": 0.8245194909619459, "grad_norm": 0.09992329031229019, "learning_rate": 0.002, "loss": 2.3292, "step": 213290 }, { "epoch": 0.8245581481653291, "grad_norm": 0.09687243402004242, "learning_rate": 0.002, "loss": 2.3248, "step": 213300 }, { "epoch": 0.8245968053687124, "grad_norm": 0.1290697306394577, "learning_rate": 0.002, "loss": 2.3321, "step": 213310 }, { "epoch": 0.8246354625720956, "grad_norm": 0.09441085904836655, "learning_rate": 0.002, "loss": 2.359, "step": 213320 }, { "epoch": 0.824674119775479, "grad_norm": 0.09515941888093948, "learning_rate": 0.002, "loss": 2.3466, "step": 213330 }, { "epoch": 0.8247127769788622, "grad_norm": 0.1018657311797142, "learning_rate": 0.002, "loss": 2.3381, "step": 213340 }, { "epoch": 0.8247514341822455, "grad_norm": 0.10830926895141602, "learning_rate": 0.002, "loss": 2.3283, "step": 213350 }, { "epoch": 0.8247900913856288, "grad_norm": 0.09651590883731842, "learning_rate": 0.002, "loss": 2.3349, "step": 213360 }, { "epoch": 0.8248287485890121, "grad_norm": 0.10271652042865753, "learning_rate": 0.002, "loss": 2.3392, "step": 213370 }, { "epoch": 0.8248674057923954, "grad_norm": 0.09921977669000626, "learning_rate": 0.002, "loss": 2.3303, "step": 213380 }, { "epoch": 0.8249060629957786, "grad_norm": 0.11118409782648087, "learning_rate": 0.002, "loss": 2.3314, "step": 213390 }, { "epoch": 0.8249447201991619, "grad_norm": 0.10362720489501953, "learning_rate": 0.002, "loss": 2.3531, "step": 213400 }, { "epoch": 0.8249833774025452, "grad_norm": 0.09841493517160416, "learning_rate": 0.002, "loss": 2.3383, "step": 213410 }, { "epoch": 0.8250220346059285, "grad_norm": 0.11038927733898163, "learning_rate": 0.002, "loss": 2.3379, "step": 213420 }, { "epoch": 0.8250606918093117, "grad_norm": 0.09338337928056717, "learning_rate": 0.002, "loss": 2.3552, "step": 213430 }, { "epoch": 0.825099349012695, "grad_norm": 0.09041500091552734, "learning_rate": 0.002, "loss": 2.339, "step": 213440 }, { "epoch": 0.8251380062160784, "grad_norm": 0.29466360807418823, "learning_rate": 0.002, "loss": 2.3526, "step": 213450 }, { "epoch": 0.8251766634194616, "grad_norm": 0.12012405693531036, "learning_rate": 0.002, "loss": 2.3542, "step": 213460 }, { "epoch": 0.8252153206228449, "grad_norm": 0.08641993254423141, "learning_rate": 0.002, "loss": 2.341, "step": 213470 }, { "epoch": 0.8252539778262281, "grad_norm": 0.08934962749481201, "learning_rate": 0.002, "loss": 2.344, "step": 213480 }, { "epoch": 0.8252926350296114, "grad_norm": 0.11363324522972107, "learning_rate": 0.002, "loss": 2.3311, "step": 213490 }, { "epoch": 0.8253312922329947, "grad_norm": 0.10849322378635406, "learning_rate": 0.002, "loss": 2.3456, "step": 213500 }, { "epoch": 0.825369949436378, "grad_norm": 0.10399137437343597, "learning_rate": 0.002, "loss": 2.3387, "step": 213510 }, { "epoch": 0.8254086066397612, "grad_norm": 0.10321518033742905, "learning_rate": 0.002, "loss": 2.3495, "step": 213520 }, { "epoch": 0.8254472638431445, "grad_norm": 0.11302473396062851, "learning_rate": 0.002, "loss": 2.3332, "step": 213530 }, { "epoch": 0.8254859210465278, "grad_norm": 0.12279103696346283, "learning_rate": 0.002, "loss": 2.3373, "step": 213540 }, { "epoch": 0.8255245782499111, "grad_norm": 0.10647827386856079, "learning_rate": 0.002, "loss": 2.3533, "step": 213550 }, { "epoch": 0.8255632354532944, "grad_norm": 0.1170077994465828, "learning_rate": 0.002, "loss": 2.3462, "step": 213560 }, { "epoch": 0.8256018926566776, "grad_norm": 0.09381834417581558, "learning_rate": 0.002, "loss": 2.335, "step": 213570 }, { "epoch": 0.825640549860061, "grad_norm": 0.12134481966495514, "learning_rate": 0.002, "loss": 2.3294, "step": 213580 }, { "epoch": 0.8256792070634442, "grad_norm": 0.09720830619335175, "learning_rate": 0.002, "loss": 2.3418, "step": 213590 }, { "epoch": 0.8257178642668275, "grad_norm": 0.11691569536924362, "learning_rate": 0.002, "loss": 2.3439, "step": 213600 }, { "epoch": 0.8257565214702107, "grad_norm": 0.10867815464735031, "learning_rate": 0.002, "loss": 2.352, "step": 213610 }, { "epoch": 0.8257951786735941, "grad_norm": 0.10696035623550415, "learning_rate": 0.002, "loss": 2.3452, "step": 213620 }, { "epoch": 0.8258338358769773, "grad_norm": 0.1083867996931076, "learning_rate": 0.002, "loss": 2.3394, "step": 213630 }, { "epoch": 0.8258724930803606, "grad_norm": 0.1000467911362648, "learning_rate": 0.002, "loss": 2.3296, "step": 213640 }, { "epoch": 0.8259111502837438, "grad_norm": 0.10876516997814178, "learning_rate": 0.002, "loss": 2.3311, "step": 213650 }, { "epoch": 0.8259498074871271, "grad_norm": 0.11239168047904968, "learning_rate": 0.002, "loss": 2.3493, "step": 213660 }, { "epoch": 0.8259884646905105, "grad_norm": 0.09718149155378342, "learning_rate": 0.002, "loss": 2.3406, "step": 213670 }, { "epoch": 0.8260271218938937, "grad_norm": 0.09330479800701141, "learning_rate": 0.002, "loss": 2.3378, "step": 213680 }, { "epoch": 0.826065779097277, "grad_norm": 0.14178209006786346, "learning_rate": 0.002, "loss": 2.3279, "step": 213690 }, { "epoch": 0.8261044363006602, "grad_norm": 0.097447469830513, "learning_rate": 0.002, "loss": 2.3474, "step": 213700 }, { "epoch": 0.8261430935040436, "grad_norm": 0.11665435135364532, "learning_rate": 0.002, "loss": 2.3228, "step": 213710 }, { "epoch": 0.8261817507074268, "grad_norm": 0.10119656473398209, "learning_rate": 0.002, "loss": 2.3397, "step": 213720 }, { "epoch": 0.8262204079108101, "grad_norm": 0.153537318110466, "learning_rate": 0.002, "loss": 2.3452, "step": 213730 }, { "epoch": 0.8262590651141933, "grad_norm": 0.09502313286066055, "learning_rate": 0.002, "loss": 2.329, "step": 213740 }, { "epoch": 0.8262977223175767, "grad_norm": 0.11871633678674698, "learning_rate": 0.002, "loss": 2.3449, "step": 213750 }, { "epoch": 0.82633637952096, "grad_norm": 0.11032947897911072, "learning_rate": 0.002, "loss": 2.3331, "step": 213760 }, { "epoch": 0.8263750367243432, "grad_norm": 0.10220306366682053, "learning_rate": 0.002, "loss": 2.327, "step": 213770 }, { "epoch": 0.8264136939277265, "grad_norm": 0.09602031111717224, "learning_rate": 0.002, "loss": 2.3375, "step": 213780 }, { "epoch": 0.8264523511311098, "grad_norm": 0.10085641592741013, "learning_rate": 0.002, "loss": 2.3307, "step": 213790 }, { "epoch": 0.8264910083344931, "grad_norm": 0.10796209424734116, "learning_rate": 0.002, "loss": 2.3442, "step": 213800 }, { "epoch": 0.8265296655378763, "grad_norm": 0.11179764568805695, "learning_rate": 0.002, "loss": 2.3422, "step": 213810 }, { "epoch": 0.8265683227412596, "grad_norm": 0.10923667997121811, "learning_rate": 0.002, "loss": 2.3416, "step": 213820 }, { "epoch": 0.8266069799446429, "grad_norm": 0.10238125920295715, "learning_rate": 0.002, "loss": 2.3293, "step": 213830 }, { "epoch": 0.8266456371480262, "grad_norm": 0.1121905967593193, "learning_rate": 0.002, "loss": 2.3456, "step": 213840 }, { "epoch": 0.8266842943514094, "grad_norm": 0.09311661869287491, "learning_rate": 0.002, "loss": 2.3363, "step": 213850 }, { "epoch": 0.8267229515547927, "grad_norm": 0.0946795791387558, "learning_rate": 0.002, "loss": 2.3403, "step": 213860 }, { "epoch": 0.826761608758176, "grad_norm": 0.10480405390262604, "learning_rate": 0.002, "loss": 2.3243, "step": 213870 }, { "epoch": 0.8268002659615593, "grad_norm": 0.11183927208185196, "learning_rate": 0.002, "loss": 2.34, "step": 213880 }, { "epoch": 0.8268389231649426, "grad_norm": 0.10681787133216858, "learning_rate": 0.002, "loss": 2.3457, "step": 213890 }, { "epoch": 0.8268775803683258, "grad_norm": 0.09188628196716309, "learning_rate": 0.002, "loss": 2.3376, "step": 213900 }, { "epoch": 0.8269162375717091, "grad_norm": 0.11369525641202927, "learning_rate": 0.002, "loss": 2.3511, "step": 213910 }, { "epoch": 0.8269548947750924, "grad_norm": 0.11502217501401901, "learning_rate": 0.002, "loss": 2.3331, "step": 213920 }, { "epoch": 0.8269935519784757, "grad_norm": 0.12712916731834412, "learning_rate": 0.002, "loss": 2.3283, "step": 213930 }, { "epoch": 0.8270322091818589, "grad_norm": 0.11223436892032623, "learning_rate": 0.002, "loss": 2.3219, "step": 213940 }, { "epoch": 0.8270708663852422, "grad_norm": 0.0936799943447113, "learning_rate": 0.002, "loss": 2.3389, "step": 213950 }, { "epoch": 0.8271095235886256, "grad_norm": 0.10395878553390503, "learning_rate": 0.002, "loss": 2.3452, "step": 213960 }, { "epoch": 0.8271481807920088, "grad_norm": 0.10461578518152237, "learning_rate": 0.002, "loss": 2.3411, "step": 213970 }, { "epoch": 0.827186837995392, "grad_norm": 0.10377980023622513, "learning_rate": 0.002, "loss": 2.3361, "step": 213980 }, { "epoch": 0.8272254951987753, "grad_norm": 0.10748467594385147, "learning_rate": 0.002, "loss": 2.3545, "step": 213990 }, { "epoch": 0.8272641524021587, "grad_norm": 0.10781331360340118, "learning_rate": 0.002, "loss": 2.3623, "step": 214000 }, { "epoch": 0.8273028096055419, "grad_norm": 0.0984257161617279, "learning_rate": 0.002, "loss": 2.3361, "step": 214010 }, { "epoch": 0.8273414668089252, "grad_norm": 0.09564877301454544, "learning_rate": 0.002, "loss": 2.3412, "step": 214020 }, { "epoch": 0.8273801240123084, "grad_norm": 0.10228132456541061, "learning_rate": 0.002, "loss": 2.3356, "step": 214030 }, { "epoch": 0.8274187812156917, "grad_norm": 0.1266312301158905, "learning_rate": 0.002, "loss": 2.3499, "step": 214040 }, { "epoch": 0.827457438419075, "grad_norm": 0.12409737706184387, "learning_rate": 0.002, "loss": 2.3555, "step": 214050 }, { "epoch": 0.8274960956224583, "grad_norm": 0.09669151902198792, "learning_rate": 0.002, "loss": 2.3456, "step": 214060 }, { "epoch": 0.8275347528258415, "grad_norm": 0.11038026958703995, "learning_rate": 0.002, "loss": 2.3393, "step": 214070 }, { "epoch": 0.8275734100292248, "grad_norm": 0.10415840148925781, "learning_rate": 0.002, "loss": 2.3322, "step": 214080 }, { "epoch": 0.8276120672326082, "grad_norm": 0.09661370515823364, "learning_rate": 0.002, "loss": 2.3328, "step": 214090 }, { "epoch": 0.8276507244359914, "grad_norm": 0.1039520800113678, "learning_rate": 0.002, "loss": 2.3344, "step": 214100 }, { "epoch": 0.8276893816393747, "grad_norm": 0.13938656449317932, "learning_rate": 0.002, "loss": 2.3292, "step": 214110 }, { "epoch": 0.8277280388427579, "grad_norm": 0.09539224207401276, "learning_rate": 0.002, "loss": 2.3436, "step": 214120 }, { "epoch": 0.8277666960461413, "grad_norm": 0.09107252955436707, "learning_rate": 0.002, "loss": 2.3402, "step": 214130 }, { "epoch": 0.8278053532495245, "grad_norm": 0.1036781594157219, "learning_rate": 0.002, "loss": 2.3616, "step": 214140 }, { "epoch": 0.8278440104529078, "grad_norm": 0.11991392076015472, "learning_rate": 0.002, "loss": 2.3252, "step": 214150 }, { "epoch": 0.827882667656291, "grad_norm": 0.10466620326042175, "learning_rate": 0.002, "loss": 2.3437, "step": 214160 }, { "epoch": 0.8279213248596744, "grad_norm": 0.09906827658414841, "learning_rate": 0.002, "loss": 2.341, "step": 214170 }, { "epoch": 0.8279599820630577, "grad_norm": 0.13791698217391968, "learning_rate": 0.002, "loss": 2.3391, "step": 214180 }, { "epoch": 0.8279986392664409, "grad_norm": 0.12032756954431534, "learning_rate": 0.002, "loss": 2.3537, "step": 214190 }, { "epoch": 0.8280372964698242, "grad_norm": 0.09512019902467728, "learning_rate": 0.002, "loss": 2.3535, "step": 214200 }, { "epoch": 0.8280759536732074, "grad_norm": 0.10837588459253311, "learning_rate": 0.002, "loss": 2.3381, "step": 214210 }, { "epoch": 0.8281146108765908, "grad_norm": 0.09497522562742233, "learning_rate": 0.002, "loss": 2.3271, "step": 214220 }, { "epoch": 0.828153268079974, "grad_norm": 0.08743920177221298, "learning_rate": 0.002, "loss": 2.3255, "step": 214230 }, { "epoch": 0.8281919252833573, "grad_norm": 0.09592652320861816, "learning_rate": 0.002, "loss": 2.3393, "step": 214240 }, { "epoch": 0.8282305824867405, "grad_norm": 0.09966947138309479, "learning_rate": 0.002, "loss": 2.3431, "step": 214250 }, { "epoch": 0.8282692396901239, "grad_norm": 0.10604499280452728, "learning_rate": 0.002, "loss": 2.3244, "step": 214260 }, { "epoch": 0.8283078968935071, "grad_norm": 0.09730489552021027, "learning_rate": 0.002, "loss": 2.3264, "step": 214270 }, { "epoch": 0.8283465540968904, "grad_norm": 0.10596129298210144, "learning_rate": 0.002, "loss": 2.3229, "step": 214280 }, { "epoch": 0.8283852113002736, "grad_norm": 0.11412282288074493, "learning_rate": 0.002, "loss": 2.3402, "step": 214290 }, { "epoch": 0.828423868503657, "grad_norm": 0.11002447456121445, "learning_rate": 0.002, "loss": 2.341, "step": 214300 }, { "epoch": 0.8284625257070403, "grad_norm": 0.09060240536928177, "learning_rate": 0.002, "loss": 2.3392, "step": 214310 }, { "epoch": 0.8285011829104235, "grad_norm": 0.09924402087926865, "learning_rate": 0.002, "loss": 2.3364, "step": 214320 }, { "epoch": 0.8285398401138068, "grad_norm": 0.09792988747358322, "learning_rate": 0.002, "loss": 2.3406, "step": 214330 }, { "epoch": 0.8285784973171901, "grad_norm": 0.11095894873142242, "learning_rate": 0.002, "loss": 2.3357, "step": 214340 }, { "epoch": 0.8286171545205734, "grad_norm": 0.10736696422100067, "learning_rate": 0.002, "loss": 2.3376, "step": 214350 }, { "epoch": 0.8286558117239566, "grad_norm": 0.09914804995059967, "learning_rate": 0.002, "loss": 2.3361, "step": 214360 }, { "epoch": 0.8286944689273399, "grad_norm": 0.11982014030218124, "learning_rate": 0.002, "loss": 2.3376, "step": 214370 }, { "epoch": 0.8287331261307233, "grad_norm": 0.11260449886322021, "learning_rate": 0.002, "loss": 2.3225, "step": 214380 }, { "epoch": 0.8287717833341065, "grad_norm": 0.11211036145687103, "learning_rate": 0.002, "loss": 2.3375, "step": 214390 }, { "epoch": 0.8288104405374898, "grad_norm": 0.12835681438446045, "learning_rate": 0.002, "loss": 2.3411, "step": 214400 }, { "epoch": 0.828849097740873, "grad_norm": 0.11177512258291245, "learning_rate": 0.002, "loss": 2.3435, "step": 214410 }, { "epoch": 0.8288877549442563, "grad_norm": 0.10175733268260956, "learning_rate": 0.002, "loss": 2.3398, "step": 214420 }, { "epoch": 0.8289264121476396, "grad_norm": 0.12657196819782257, "learning_rate": 0.002, "loss": 2.3434, "step": 214430 }, { "epoch": 0.8289650693510229, "grad_norm": 0.1052093431353569, "learning_rate": 0.002, "loss": 2.3312, "step": 214440 }, { "epoch": 0.8290037265544061, "grad_norm": 0.11545445024967194, "learning_rate": 0.002, "loss": 2.3411, "step": 214450 }, { "epoch": 0.8290423837577894, "grad_norm": 0.10227680951356888, "learning_rate": 0.002, "loss": 2.3474, "step": 214460 }, { "epoch": 0.8290810409611727, "grad_norm": 0.09999417513608932, "learning_rate": 0.002, "loss": 2.3325, "step": 214470 }, { "epoch": 0.829119698164556, "grad_norm": 0.10263783484697342, "learning_rate": 0.002, "loss": 2.3457, "step": 214480 }, { "epoch": 0.8291583553679392, "grad_norm": 0.1079811379313469, "learning_rate": 0.002, "loss": 2.3343, "step": 214490 }, { "epoch": 0.8291970125713225, "grad_norm": 0.09219511598348618, "learning_rate": 0.002, "loss": 2.3276, "step": 214500 }, { "epoch": 0.8292356697747059, "grad_norm": 0.11662989109754562, "learning_rate": 0.002, "loss": 2.3509, "step": 214510 }, { "epoch": 0.8292743269780891, "grad_norm": 0.10641618072986603, "learning_rate": 0.002, "loss": 2.3415, "step": 214520 }, { "epoch": 0.8293129841814724, "grad_norm": 0.08924368768930435, "learning_rate": 0.002, "loss": 2.3209, "step": 214530 }, { "epoch": 0.8293516413848556, "grad_norm": 0.0992094874382019, "learning_rate": 0.002, "loss": 2.3458, "step": 214540 }, { "epoch": 0.829390298588239, "grad_norm": 0.1098957285284996, "learning_rate": 0.002, "loss": 2.3264, "step": 214550 }, { "epoch": 0.8294289557916222, "grad_norm": 0.11594545096158981, "learning_rate": 0.002, "loss": 2.3383, "step": 214560 }, { "epoch": 0.8294676129950055, "grad_norm": 0.11425887793302536, "learning_rate": 0.002, "loss": 2.3336, "step": 214570 }, { "epoch": 0.8295062701983887, "grad_norm": 0.08713730424642563, "learning_rate": 0.002, "loss": 2.3288, "step": 214580 }, { "epoch": 0.829544927401772, "grad_norm": 0.1614382117986679, "learning_rate": 0.002, "loss": 2.3147, "step": 214590 }, { "epoch": 0.8295835846051554, "grad_norm": 0.1060347706079483, "learning_rate": 0.002, "loss": 2.3527, "step": 214600 }, { "epoch": 0.8296222418085386, "grad_norm": 0.09657891094684601, "learning_rate": 0.002, "loss": 2.3352, "step": 214610 }, { "epoch": 0.8296608990119219, "grad_norm": 0.11805426329374313, "learning_rate": 0.002, "loss": 2.3509, "step": 214620 }, { "epoch": 0.8296995562153051, "grad_norm": 0.09428200125694275, "learning_rate": 0.002, "loss": 2.3275, "step": 214630 }, { "epoch": 0.8297382134186885, "grad_norm": 0.11019503325223923, "learning_rate": 0.002, "loss": 2.336, "step": 214640 }, { "epoch": 0.8297768706220717, "grad_norm": 0.11221537739038467, "learning_rate": 0.002, "loss": 2.3499, "step": 214650 }, { "epoch": 0.829815527825455, "grad_norm": 0.1031530499458313, "learning_rate": 0.002, "loss": 2.3426, "step": 214660 }, { "epoch": 0.8298541850288382, "grad_norm": 0.11032679677009583, "learning_rate": 0.002, "loss": 2.3474, "step": 214670 }, { "epoch": 0.8298928422322216, "grad_norm": 0.10092826932668686, "learning_rate": 0.002, "loss": 2.3424, "step": 214680 }, { "epoch": 0.8299314994356048, "grad_norm": 0.1032138466835022, "learning_rate": 0.002, "loss": 2.3469, "step": 214690 }, { "epoch": 0.8299701566389881, "grad_norm": 0.11866185069084167, "learning_rate": 0.002, "loss": 2.3517, "step": 214700 }, { "epoch": 0.8300088138423714, "grad_norm": 0.12750159204006195, "learning_rate": 0.002, "loss": 2.3305, "step": 214710 }, { "epoch": 0.8300474710457547, "grad_norm": 0.10487841069698334, "learning_rate": 0.002, "loss": 2.3498, "step": 214720 }, { "epoch": 0.830086128249138, "grad_norm": 0.105401411652565, "learning_rate": 0.002, "loss": 2.3551, "step": 214730 }, { "epoch": 0.8301247854525212, "grad_norm": 0.10411553829908371, "learning_rate": 0.002, "loss": 2.3363, "step": 214740 }, { "epoch": 0.8301634426559045, "grad_norm": 0.10573433339595795, "learning_rate": 0.002, "loss": 2.3337, "step": 214750 }, { "epoch": 0.8302020998592878, "grad_norm": 0.09723833948373795, "learning_rate": 0.002, "loss": 2.3238, "step": 214760 }, { "epoch": 0.8302407570626711, "grad_norm": 0.09145607799291611, "learning_rate": 0.002, "loss": 2.3385, "step": 214770 }, { "epoch": 0.8302794142660543, "grad_norm": 0.09567610919475555, "learning_rate": 0.002, "loss": 2.3337, "step": 214780 }, { "epoch": 0.8303180714694376, "grad_norm": 0.10495653748512268, "learning_rate": 0.002, "loss": 2.3506, "step": 214790 }, { "epoch": 0.8303567286728208, "grad_norm": 0.1699315309524536, "learning_rate": 0.002, "loss": 2.3423, "step": 214800 }, { "epoch": 0.8303953858762042, "grad_norm": 0.11049603670835495, "learning_rate": 0.002, "loss": 2.3317, "step": 214810 }, { "epoch": 0.8304340430795875, "grad_norm": 0.10238435119390488, "learning_rate": 0.002, "loss": 2.3332, "step": 214820 }, { "epoch": 0.8304727002829707, "grad_norm": 0.10729759186506271, "learning_rate": 0.002, "loss": 2.3443, "step": 214830 }, { "epoch": 0.830511357486354, "grad_norm": 0.09942258894443512, "learning_rate": 0.002, "loss": 2.3383, "step": 214840 }, { "epoch": 0.8305500146897373, "grad_norm": 0.11134497076272964, "learning_rate": 0.002, "loss": 2.3265, "step": 214850 }, { "epoch": 0.8305886718931206, "grad_norm": 0.10739394277334213, "learning_rate": 0.002, "loss": 2.3224, "step": 214860 }, { "epoch": 0.8306273290965038, "grad_norm": 0.11967971920967102, "learning_rate": 0.002, "loss": 2.3365, "step": 214870 }, { "epoch": 0.8306659862998871, "grad_norm": 0.12112173438072205, "learning_rate": 0.002, "loss": 2.3402, "step": 214880 }, { "epoch": 0.8307046435032704, "grad_norm": 0.11589988321065903, "learning_rate": 0.002, "loss": 2.3416, "step": 214890 }, { "epoch": 0.8307433007066537, "grad_norm": 0.09112926572561264, "learning_rate": 0.002, "loss": 2.3458, "step": 214900 }, { "epoch": 0.830781957910037, "grad_norm": 0.11147700995206833, "learning_rate": 0.002, "loss": 2.3618, "step": 214910 }, { "epoch": 0.8308206151134202, "grad_norm": 0.10066591203212738, "learning_rate": 0.002, "loss": 2.3294, "step": 214920 }, { "epoch": 0.8308592723168036, "grad_norm": 0.10681650042533875, "learning_rate": 0.002, "loss": 2.3431, "step": 214930 }, { "epoch": 0.8308979295201868, "grad_norm": 0.10110953450202942, "learning_rate": 0.002, "loss": 2.3398, "step": 214940 }, { "epoch": 0.8309365867235701, "grad_norm": 0.1249653548002243, "learning_rate": 0.002, "loss": 2.3302, "step": 214950 }, { "epoch": 0.8309752439269533, "grad_norm": 0.11149842292070389, "learning_rate": 0.002, "loss": 2.341, "step": 214960 }, { "epoch": 0.8310139011303366, "grad_norm": 0.11847642064094543, "learning_rate": 0.002, "loss": 2.3401, "step": 214970 }, { "epoch": 0.8310525583337199, "grad_norm": 0.09807038307189941, "learning_rate": 0.002, "loss": 2.3414, "step": 214980 }, { "epoch": 0.8310912155371032, "grad_norm": 0.09750042855739594, "learning_rate": 0.002, "loss": 2.336, "step": 214990 }, { "epoch": 0.8311298727404864, "grad_norm": 0.09801283478736877, "learning_rate": 0.002, "loss": 2.3493, "step": 215000 }, { "epoch": 0.8311685299438697, "grad_norm": 0.10606060922145844, "learning_rate": 0.002, "loss": 2.3476, "step": 215010 }, { "epoch": 0.8312071871472531, "grad_norm": 0.11397572606801987, "learning_rate": 0.002, "loss": 2.3307, "step": 215020 }, { "epoch": 0.8312458443506363, "grad_norm": 0.11736032366752625, "learning_rate": 0.002, "loss": 2.341, "step": 215030 }, { "epoch": 0.8312845015540196, "grad_norm": 0.1030939519405365, "learning_rate": 0.002, "loss": 2.3449, "step": 215040 }, { "epoch": 0.8313231587574028, "grad_norm": 0.12110091745853424, "learning_rate": 0.002, "loss": 2.3411, "step": 215050 }, { "epoch": 0.8313618159607862, "grad_norm": 0.11975936591625214, "learning_rate": 0.002, "loss": 2.3523, "step": 215060 }, { "epoch": 0.8314004731641694, "grad_norm": 0.09283725917339325, "learning_rate": 0.002, "loss": 2.3299, "step": 215070 }, { "epoch": 0.8314391303675527, "grad_norm": 0.12285838276147842, "learning_rate": 0.002, "loss": 2.3317, "step": 215080 }, { "epoch": 0.8314777875709359, "grad_norm": 0.1171443834900856, "learning_rate": 0.002, "loss": 2.3461, "step": 215090 }, { "epoch": 0.8315164447743193, "grad_norm": 0.10224750638008118, "learning_rate": 0.002, "loss": 2.3384, "step": 215100 }, { "epoch": 0.8315551019777025, "grad_norm": 0.10163308680057526, "learning_rate": 0.002, "loss": 2.3398, "step": 215110 }, { "epoch": 0.8315937591810858, "grad_norm": 0.10186589509248734, "learning_rate": 0.002, "loss": 2.3332, "step": 215120 }, { "epoch": 0.831632416384469, "grad_norm": 0.10101377218961716, "learning_rate": 0.002, "loss": 2.337, "step": 215130 }, { "epoch": 0.8316710735878523, "grad_norm": 0.11015063524246216, "learning_rate": 0.002, "loss": 2.3351, "step": 215140 }, { "epoch": 0.8317097307912357, "grad_norm": 0.09436852484941483, "learning_rate": 0.002, "loss": 2.3387, "step": 215150 }, { "epoch": 0.8317483879946189, "grad_norm": 0.10956919938325882, "learning_rate": 0.002, "loss": 2.3403, "step": 215160 }, { "epoch": 0.8317870451980022, "grad_norm": 0.10172809660434723, "learning_rate": 0.002, "loss": 2.3351, "step": 215170 }, { "epoch": 0.8318257024013854, "grad_norm": 0.1085454672574997, "learning_rate": 0.002, "loss": 2.3377, "step": 215180 }, { "epoch": 0.8318643596047688, "grad_norm": 0.12386401742696762, "learning_rate": 0.002, "loss": 2.3442, "step": 215190 }, { "epoch": 0.831903016808152, "grad_norm": 0.11149362474679947, "learning_rate": 0.002, "loss": 2.3441, "step": 215200 }, { "epoch": 0.8319416740115353, "grad_norm": 0.11105955392122269, "learning_rate": 0.002, "loss": 2.3428, "step": 215210 }, { "epoch": 0.8319803312149185, "grad_norm": 0.08862071484327316, "learning_rate": 0.002, "loss": 2.3456, "step": 215220 }, { "epoch": 0.8320189884183019, "grad_norm": 0.10108217597007751, "learning_rate": 0.002, "loss": 2.3521, "step": 215230 }, { "epoch": 0.8320576456216852, "grad_norm": 0.10174451768398285, "learning_rate": 0.002, "loss": 2.3335, "step": 215240 }, { "epoch": 0.8320963028250684, "grad_norm": 0.1056303083896637, "learning_rate": 0.002, "loss": 2.3429, "step": 215250 }, { "epoch": 0.8321349600284517, "grad_norm": 0.09623876959085464, "learning_rate": 0.002, "loss": 2.3312, "step": 215260 }, { "epoch": 0.832173617231835, "grad_norm": 0.10596370697021484, "learning_rate": 0.002, "loss": 2.327, "step": 215270 }, { "epoch": 0.8322122744352183, "grad_norm": 0.11672472208738327, "learning_rate": 0.002, "loss": 2.3454, "step": 215280 }, { "epoch": 0.8322509316386015, "grad_norm": 0.09565582871437073, "learning_rate": 0.002, "loss": 2.3309, "step": 215290 }, { "epoch": 0.8322895888419848, "grad_norm": 0.10847000032663345, "learning_rate": 0.002, "loss": 2.3433, "step": 215300 }, { "epoch": 0.8323282460453681, "grad_norm": 0.10517299920320511, "learning_rate": 0.002, "loss": 2.3434, "step": 215310 }, { "epoch": 0.8323669032487514, "grad_norm": 0.09611780196428299, "learning_rate": 0.002, "loss": 2.3402, "step": 215320 }, { "epoch": 0.8324055604521347, "grad_norm": 0.10611071437597275, "learning_rate": 0.002, "loss": 2.3511, "step": 215330 }, { "epoch": 0.8324442176555179, "grad_norm": 0.10718956589698792, "learning_rate": 0.002, "loss": 2.3459, "step": 215340 }, { "epoch": 0.8324828748589012, "grad_norm": 0.11760836094617844, "learning_rate": 0.002, "loss": 2.323, "step": 215350 }, { "epoch": 0.8325215320622845, "grad_norm": 0.10962734371423721, "learning_rate": 0.002, "loss": 2.3439, "step": 215360 }, { "epoch": 0.8325601892656678, "grad_norm": 0.10704983025789261, "learning_rate": 0.002, "loss": 2.3246, "step": 215370 }, { "epoch": 0.832598846469051, "grad_norm": 0.10682962089776993, "learning_rate": 0.002, "loss": 2.3502, "step": 215380 }, { "epoch": 0.8326375036724343, "grad_norm": 0.12136154621839523, "learning_rate": 0.002, "loss": 2.3497, "step": 215390 }, { "epoch": 0.8326761608758176, "grad_norm": 0.09624449908733368, "learning_rate": 0.002, "loss": 2.3353, "step": 215400 }, { "epoch": 0.8327148180792009, "grad_norm": 0.10110598057508469, "learning_rate": 0.002, "loss": 2.3429, "step": 215410 }, { "epoch": 0.8327534752825841, "grad_norm": 0.10344604402780533, "learning_rate": 0.002, "loss": 2.351, "step": 215420 }, { "epoch": 0.8327921324859674, "grad_norm": 0.10519465059041977, "learning_rate": 0.002, "loss": 2.3302, "step": 215430 }, { "epoch": 0.8328307896893508, "grad_norm": 0.10835524648427963, "learning_rate": 0.002, "loss": 2.3184, "step": 215440 }, { "epoch": 0.832869446892734, "grad_norm": 0.09608236700296402, "learning_rate": 0.002, "loss": 2.3417, "step": 215450 }, { "epoch": 0.8329081040961173, "grad_norm": 0.09845644980669022, "learning_rate": 0.002, "loss": 2.3349, "step": 215460 }, { "epoch": 0.8329467612995005, "grad_norm": 0.11512713879346848, "learning_rate": 0.002, "loss": 2.3418, "step": 215470 }, { "epoch": 0.8329854185028839, "grad_norm": 0.10259012877941132, "learning_rate": 0.002, "loss": 2.3324, "step": 215480 }, { "epoch": 0.8330240757062671, "grad_norm": 0.08932927995920181, "learning_rate": 0.002, "loss": 2.3412, "step": 215490 }, { "epoch": 0.8330627329096504, "grad_norm": 0.11538581550121307, "learning_rate": 0.002, "loss": 2.3489, "step": 215500 }, { "epoch": 0.8331013901130336, "grad_norm": 0.13710728287696838, "learning_rate": 0.002, "loss": 2.3412, "step": 215510 }, { "epoch": 0.8331400473164169, "grad_norm": 0.0972672551870346, "learning_rate": 0.002, "loss": 2.3418, "step": 215520 }, { "epoch": 0.8331787045198003, "grad_norm": 0.09992696344852448, "learning_rate": 0.002, "loss": 2.3448, "step": 215530 }, { "epoch": 0.8332173617231835, "grad_norm": 0.1077425628900528, "learning_rate": 0.002, "loss": 2.3286, "step": 215540 }, { "epoch": 0.8332560189265668, "grad_norm": 0.10400703549385071, "learning_rate": 0.002, "loss": 2.3327, "step": 215550 }, { "epoch": 0.83329467612995, "grad_norm": 0.11263838410377502, "learning_rate": 0.002, "loss": 2.3504, "step": 215560 }, { "epoch": 0.8333333333333334, "grad_norm": 0.10221675783395767, "learning_rate": 0.002, "loss": 2.3584, "step": 215570 }, { "epoch": 0.8333719905367166, "grad_norm": 0.10675705224275589, "learning_rate": 0.002, "loss": 2.3435, "step": 215580 }, { "epoch": 0.8334106477400999, "grad_norm": 0.11262036114931107, "learning_rate": 0.002, "loss": 2.3414, "step": 215590 }, { "epoch": 0.8334493049434831, "grad_norm": 0.10636333376169205, "learning_rate": 0.002, "loss": 2.3614, "step": 215600 }, { "epoch": 0.8334879621468665, "grad_norm": 0.11252046376466751, "learning_rate": 0.002, "loss": 2.3404, "step": 215610 }, { "epoch": 0.8335266193502497, "grad_norm": 0.10936474055051804, "learning_rate": 0.002, "loss": 2.3517, "step": 215620 }, { "epoch": 0.833565276553633, "grad_norm": 0.09406422823667526, "learning_rate": 0.002, "loss": 2.3348, "step": 215630 }, { "epoch": 0.8336039337570162, "grad_norm": 0.10333401709794998, "learning_rate": 0.002, "loss": 2.3335, "step": 215640 }, { "epoch": 0.8336425909603996, "grad_norm": 0.10925310105085373, "learning_rate": 0.002, "loss": 2.3325, "step": 215650 }, { "epoch": 0.8336812481637829, "grad_norm": 0.16220533847808838, "learning_rate": 0.002, "loss": 2.3325, "step": 215660 }, { "epoch": 0.8337199053671661, "grad_norm": 0.1127329021692276, "learning_rate": 0.002, "loss": 2.3391, "step": 215670 }, { "epoch": 0.8337585625705494, "grad_norm": 0.10669612139463425, "learning_rate": 0.002, "loss": 2.329, "step": 215680 }, { "epoch": 0.8337972197739327, "grad_norm": 0.10470252484083176, "learning_rate": 0.002, "loss": 2.3604, "step": 215690 }, { "epoch": 0.833835876977316, "grad_norm": 0.10056356340646744, "learning_rate": 0.002, "loss": 2.3453, "step": 215700 }, { "epoch": 0.8338745341806992, "grad_norm": 0.09819260984659195, "learning_rate": 0.002, "loss": 2.3385, "step": 215710 }, { "epoch": 0.8339131913840825, "grad_norm": 0.09792476892471313, "learning_rate": 0.002, "loss": 2.3342, "step": 215720 }, { "epoch": 0.8339518485874657, "grad_norm": 0.09533162415027618, "learning_rate": 0.002, "loss": 2.3289, "step": 215730 }, { "epoch": 0.8339905057908491, "grad_norm": 0.11937808245420456, "learning_rate": 0.002, "loss": 2.3674, "step": 215740 }, { "epoch": 0.8340291629942324, "grad_norm": 0.10256972163915634, "learning_rate": 0.002, "loss": 2.3458, "step": 215750 }, { "epoch": 0.8340678201976156, "grad_norm": 0.10601312667131424, "learning_rate": 0.002, "loss": 2.3485, "step": 215760 }, { "epoch": 0.8341064774009989, "grad_norm": 0.10224002599716187, "learning_rate": 0.002, "loss": 2.3428, "step": 215770 }, { "epoch": 0.8341451346043822, "grad_norm": 0.09572790563106537, "learning_rate": 0.002, "loss": 2.3209, "step": 215780 }, { "epoch": 0.8341837918077655, "grad_norm": 0.1392766535282135, "learning_rate": 0.002, "loss": 2.332, "step": 215790 }, { "epoch": 0.8342224490111487, "grad_norm": 0.10670210421085358, "learning_rate": 0.002, "loss": 2.3271, "step": 215800 }, { "epoch": 0.834261106214532, "grad_norm": 0.10126934945583344, "learning_rate": 0.002, "loss": 2.3338, "step": 215810 }, { "epoch": 0.8342997634179153, "grad_norm": 0.10573316365480423, "learning_rate": 0.002, "loss": 2.3391, "step": 215820 }, { "epoch": 0.8343384206212986, "grad_norm": 0.09832464158535004, "learning_rate": 0.002, "loss": 2.3203, "step": 215830 }, { "epoch": 0.8343770778246818, "grad_norm": 0.12400636076927185, "learning_rate": 0.002, "loss": 2.3448, "step": 215840 }, { "epoch": 0.8344157350280651, "grad_norm": 0.10741636902093887, "learning_rate": 0.002, "loss": 2.343, "step": 215850 }, { "epoch": 0.8344543922314485, "grad_norm": 0.12283851206302643, "learning_rate": 0.002, "loss": 2.3422, "step": 215860 }, { "epoch": 0.8344930494348317, "grad_norm": 0.1226603090763092, "learning_rate": 0.002, "loss": 2.3324, "step": 215870 }, { "epoch": 0.834531706638215, "grad_norm": 0.09905299544334412, "learning_rate": 0.002, "loss": 2.3415, "step": 215880 }, { "epoch": 0.8345703638415982, "grad_norm": 0.10570216923952103, "learning_rate": 0.002, "loss": 2.3365, "step": 215890 }, { "epoch": 0.8346090210449815, "grad_norm": 0.1257603019475937, "learning_rate": 0.002, "loss": 2.3535, "step": 215900 }, { "epoch": 0.8346476782483648, "grad_norm": 0.11307378858327866, "learning_rate": 0.002, "loss": 2.3326, "step": 215910 }, { "epoch": 0.8346863354517481, "grad_norm": 0.13677355647087097, "learning_rate": 0.002, "loss": 2.3413, "step": 215920 }, { "epoch": 0.8347249926551313, "grad_norm": 0.10929053276777267, "learning_rate": 0.002, "loss": 2.3367, "step": 215930 }, { "epoch": 0.8347636498585146, "grad_norm": 0.08977066725492477, "learning_rate": 0.002, "loss": 2.3268, "step": 215940 }, { "epoch": 0.834802307061898, "grad_norm": 0.10597755014896393, "learning_rate": 0.002, "loss": 2.3328, "step": 215950 }, { "epoch": 0.8348409642652812, "grad_norm": 0.10703890025615692, "learning_rate": 0.002, "loss": 2.3436, "step": 215960 }, { "epoch": 0.8348796214686645, "grad_norm": 0.09597240388393402, "learning_rate": 0.002, "loss": 2.336, "step": 215970 }, { "epoch": 0.8349182786720477, "grad_norm": 0.08831729739904404, "learning_rate": 0.002, "loss": 2.3579, "step": 215980 }, { "epoch": 0.8349569358754311, "grad_norm": 0.09937921911478043, "learning_rate": 0.002, "loss": 2.3493, "step": 215990 }, { "epoch": 0.8349955930788143, "grad_norm": 0.10918296128511429, "learning_rate": 0.002, "loss": 2.3506, "step": 216000 }, { "epoch": 0.8350342502821976, "grad_norm": 0.14627370238304138, "learning_rate": 0.002, "loss": 2.3367, "step": 216010 }, { "epoch": 0.8350729074855808, "grad_norm": 0.11544416844844818, "learning_rate": 0.002, "loss": 2.3425, "step": 216020 }, { "epoch": 0.8351115646889642, "grad_norm": 0.10169287770986557, "learning_rate": 0.002, "loss": 2.3381, "step": 216030 }, { "epoch": 0.8351502218923474, "grad_norm": 0.1060442104935646, "learning_rate": 0.002, "loss": 2.3346, "step": 216040 }, { "epoch": 0.8351888790957307, "grad_norm": 0.11080343276262283, "learning_rate": 0.002, "loss": 2.3207, "step": 216050 }, { "epoch": 0.835227536299114, "grad_norm": 0.11087153106927872, "learning_rate": 0.002, "loss": 2.3284, "step": 216060 }, { "epoch": 0.8352661935024972, "grad_norm": 0.10543015599250793, "learning_rate": 0.002, "loss": 2.3368, "step": 216070 }, { "epoch": 0.8353048507058806, "grad_norm": 0.09673027694225311, "learning_rate": 0.002, "loss": 2.3373, "step": 216080 }, { "epoch": 0.8353435079092638, "grad_norm": 0.10776925086975098, "learning_rate": 0.002, "loss": 2.3542, "step": 216090 }, { "epoch": 0.8353821651126471, "grad_norm": 0.11809718608856201, "learning_rate": 0.002, "loss": 2.3357, "step": 216100 }, { "epoch": 0.8354208223160303, "grad_norm": 0.11080779135227203, "learning_rate": 0.002, "loss": 2.3531, "step": 216110 }, { "epoch": 0.8354594795194137, "grad_norm": 0.13108794391155243, "learning_rate": 0.002, "loss": 2.3467, "step": 216120 }, { "epoch": 0.8354981367227969, "grad_norm": 0.10459788143634796, "learning_rate": 0.002, "loss": 2.3281, "step": 216130 }, { "epoch": 0.8355367939261802, "grad_norm": 0.09469229727983475, "learning_rate": 0.002, "loss": 2.3546, "step": 216140 }, { "epoch": 0.8355754511295634, "grad_norm": 0.12095770239830017, "learning_rate": 0.002, "loss": 2.3356, "step": 216150 }, { "epoch": 0.8356141083329468, "grad_norm": 0.10411792248487473, "learning_rate": 0.002, "loss": 2.3665, "step": 216160 }, { "epoch": 0.8356527655363301, "grad_norm": 0.10881275683641434, "learning_rate": 0.002, "loss": 2.3397, "step": 216170 }, { "epoch": 0.8356914227397133, "grad_norm": 0.11071446537971497, "learning_rate": 0.002, "loss": 2.3289, "step": 216180 }, { "epoch": 0.8357300799430966, "grad_norm": 0.1212799996137619, "learning_rate": 0.002, "loss": 2.3393, "step": 216190 }, { "epoch": 0.8357687371464799, "grad_norm": 0.09932661801576614, "learning_rate": 0.002, "loss": 2.3456, "step": 216200 }, { "epoch": 0.8358073943498632, "grad_norm": 0.10696447640657425, "learning_rate": 0.002, "loss": 2.3514, "step": 216210 }, { "epoch": 0.8358460515532464, "grad_norm": 0.09831404685974121, "learning_rate": 0.002, "loss": 2.325, "step": 216220 }, { "epoch": 0.8358847087566297, "grad_norm": 0.10389647632837296, "learning_rate": 0.002, "loss": 2.3315, "step": 216230 }, { "epoch": 0.835923365960013, "grad_norm": 0.11285470426082611, "learning_rate": 0.002, "loss": 2.328, "step": 216240 }, { "epoch": 0.8359620231633963, "grad_norm": 0.11450444906949997, "learning_rate": 0.002, "loss": 2.3474, "step": 216250 }, { "epoch": 0.8360006803667795, "grad_norm": 0.10833612084388733, "learning_rate": 0.002, "loss": 2.3472, "step": 216260 }, { "epoch": 0.8360393375701628, "grad_norm": 0.08962885290384293, "learning_rate": 0.002, "loss": 2.347, "step": 216270 }, { "epoch": 0.836077994773546, "grad_norm": 0.09980128705501556, "learning_rate": 0.002, "loss": 2.3371, "step": 216280 }, { "epoch": 0.8361166519769294, "grad_norm": 0.09436109662055969, "learning_rate": 0.002, "loss": 2.3581, "step": 216290 }, { "epoch": 0.8361553091803127, "grad_norm": 0.08935563266277313, "learning_rate": 0.002, "loss": 2.3389, "step": 216300 }, { "epoch": 0.8361939663836959, "grad_norm": 0.1144620031118393, "learning_rate": 0.002, "loss": 2.3449, "step": 216310 }, { "epoch": 0.8362326235870792, "grad_norm": 0.0936708003282547, "learning_rate": 0.002, "loss": 2.3352, "step": 216320 }, { "epoch": 0.8362712807904625, "grad_norm": 0.12868732213974, "learning_rate": 0.002, "loss": 2.3368, "step": 216330 }, { "epoch": 0.8363099379938458, "grad_norm": 0.09277084469795227, "learning_rate": 0.002, "loss": 2.3442, "step": 216340 }, { "epoch": 0.836348595197229, "grad_norm": 0.10452208667993546, "learning_rate": 0.002, "loss": 2.3492, "step": 216350 }, { "epoch": 0.8363872524006123, "grad_norm": 0.10119964927434921, "learning_rate": 0.002, "loss": 2.3326, "step": 216360 }, { "epoch": 0.8364259096039957, "grad_norm": 0.11663848906755447, "learning_rate": 0.002, "loss": 2.3353, "step": 216370 }, { "epoch": 0.8364645668073789, "grad_norm": 0.1097102165222168, "learning_rate": 0.002, "loss": 2.3367, "step": 216380 }, { "epoch": 0.8365032240107622, "grad_norm": 0.11153378337621689, "learning_rate": 0.002, "loss": 2.3371, "step": 216390 }, { "epoch": 0.8365418812141454, "grad_norm": 0.09548214823007584, "learning_rate": 0.002, "loss": 2.3385, "step": 216400 }, { "epoch": 0.8365805384175288, "grad_norm": 0.0946519672870636, "learning_rate": 0.002, "loss": 2.3426, "step": 216410 }, { "epoch": 0.836619195620912, "grad_norm": 0.10115214437246323, "learning_rate": 0.002, "loss": 2.3544, "step": 216420 }, { "epoch": 0.8366578528242953, "grad_norm": 0.09835871309041977, "learning_rate": 0.002, "loss": 2.3424, "step": 216430 }, { "epoch": 0.8366965100276785, "grad_norm": 0.10255956649780273, "learning_rate": 0.002, "loss": 2.3204, "step": 216440 }, { "epoch": 0.8367351672310618, "grad_norm": 0.10846435278654099, "learning_rate": 0.002, "loss": 2.3322, "step": 216450 }, { "epoch": 0.8367738244344451, "grad_norm": 0.105130136013031, "learning_rate": 0.002, "loss": 2.3418, "step": 216460 }, { "epoch": 0.8368124816378284, "grad_norm": 0.09569898247718811, "learning_rate": 0.002, "loss": 2.348, "step": 216470 }, { "epoch": 0.8368511388412117, "grad_norm": 0.14089907705783844, "learning_rate": 0.002, "loss": 2.3453, "step": 216480 }, { "epoch": 0.8368897960445949, "grad_norm": 0.11543848365545273, "learning_rate": 0.002, "loss": 2.3332, "step": 216490 }, { "epoch": 0.8369284532479783, "grad_norm": 0.09327449649572372, "learning_rate": 0.002, "loss": 2.3471, "step": 216500 }, { "epoch": 0.8369671104513615, "grad_norm": 0.12228869646787643, "learning_rate": 0.002, "loss": 2.33, "step": 216510 }, { "epoch": 0.8370057676547448, "grad_norm": 0.09986231476068497, "learning_rate": 0.002, "loss": 2.3293, "step": 216520 }, { "epoch": 0.837044424858128, "grad_norm": 0.10587063431739807, "learning_rate": 0.002, "loss": 2.3398, "step": 216530 }, { "epoch": 0.8370830820615114, "grad_norm": 0.11145617067813873, "learning_rate": 0.002, "loss": 2.3369, "step": 216540 }, { "epoch": 0.8371217392648946, "grad_norm": 0.1187463253736496, "learning_rate": 0.002, "loss": 2.3477, "step": 216550 }, { "epoch": 0.8371603964682779, "grad_norm": 0.1010122075676918, "learning_rate": 0.002, "loss": 2.3233, "step": 216560 }, { "epoch": 0.8371990536716611, "grad_norm": 0.09798204153776169, "learning_rate": 0.002, "loss": 2.3402, "step": 216570 }, { "epoch": 0.8372377108750445, "grad_norm": 0.10220952332019806, "learning_rate": 0.002, "loss": 2.3397, "step": 216580 }, { "epoch": 0.8372763680784278, "grad_norm": 0.09570495784282684, "learning_rate": 0.002, "loss": 2.3388, "step": 216590 }, { "epoch": 0.837315025281811, "grad_norm": 0.12634548544883728, "learning_rate": 0.002, "loss": 2.3324, "step": 216600 }, { "epoch": 0.8373536824851943, "grad_norm": 0.10543528199195862, "learning_rate": 0.002, "loss": 2.3318, "step": 216610 }, { "epoch": 0.8373923396885776, "grad_norm": 0.10621460527181625, "learning_rate": 0.002, "loss": 2.3145, "step": 216620 }, { "epoch": 0.8374309968919609, "grad_norm": 0.1278172731399536, "learning_rate": 0.002, "loss": 2.3369, "step": 216630 }, { "epoch": 0.8374696540953441, "grad_norm": 0.10291978716850281, "learning_rate": 0.002, "loss": 2.3335, "step": 216640 }, { "epoch": 0.8375083112987274, "grad_norm": 0.1192663311958313, "learning_rate": 0.002, "loss": 2.3459, "step": 216650 }, { "epoch": 0.8375469685021106, "grad_norm": 0.10618377476930618, "learning_rate": 0.002, "loss": 2.3212, "step": 216660 }, { "epoch": 0.837585625705494, "grad_norm": 0.13136693835258484, "learning_rate": 0.002, "loss": 2.3214, "step": 216670 }, { "epoch": 0.8376242829088772, "grad_norm": 0.09881991893053055, "learning_rate": 0.002, "loss": 2.332, "step": 216680 }, { "epoch": 0.8376629401122605, "grad_norm": 0.11821454763412476, "learning_rate": 0.002, "loss": 2.344, "step": 216690 }, { "epoch": 0.8377015973156438, "grad_norm": 0.10533761233091354, "learning_rate": 0.002, "loss": 2.3414, "step": 216700 }, { "epoch": 0.8377402545190271, "grad_norm": 0.09469486027956009, "learning_rate": 0.002, "loss": 2.3235, "step": 216710 }, { "epoch": 0.8377789117224104, "grad_norm": 0.10206017643213272, "learning_rate": 0.002, "loss": 2.345, "step": 216720 }, { "epoch": 0.8378175689257936, "grad_norm": 0.11334537714719772, "learning_rate": 0.002, "loss": 2.3457, "step": 216730 }, { "epoch": 0.8378562261291769, "grad_norm": 0.11263494193553925, "learning_rate": 0.002, "loss": 2.3409, "step": 216740 }, { "epoch": 0.8378948833325602, "grad_norm": 0.11252845078706741, "learning_rate": 0.002, "loss": 2.3312, "step": 216750 }, { "epoch": 0.8379335405359435, "grad_norm": 0.11377183347940445, "learning_rate": 0.002, "loss": 2.3324, "step": 216760 }, { "epoch": 0.8379721977393267, "grad_norm": 0.12358468770980835, "learning_rate": 0.002, "loss": 2.3484, "step": 216770 }, { "epoch": 0.83801085494271, "grad_norm": 0.12280930578708649, "learning_rate": 0.002, "loss": 2.3477, "step": 216780 }, { "epoch": 0.8380495121460934, "grad_norm": 0.08837874978780746, "learning_rate": 0.002, "loss": 2.329, "step": 216790 }, { "epoch": 0.8380881693494766, "grad_norm": 0.09349276125431061, "learning_rate": 0.002, "loss": 2.3325, "step": 216800 }, { "epoch": 0.8381268265528599, "grad_norm": 0.09149591624736786, "learning_rate": 0.002, "loss": 2.3301, "step": 216810 }, { "epoch": 0.8381654837562431, "grad_norm": 0.1146298423409462, "learning_rate": 0.002, "loss": 2.3469, "step": 216820 }, { "epoch": 0.8382041409596264, "grad_norm": 0.08947496861219406, "learning_rate": 0.002, "loss": 2.3349, "step": 216830 }, { "epoch": 0.8382427981630097, "grad_norm": 0.13498574495315552, "learning_rate": 0.002, "loss": 2.3284, "step": 216840 }, { "epoch": 0.838281455366393, "grad_norm": 0.09723348915576935, "learning_rate": 0.002, "loss": 2.3578, "step": 216850 }, { "epoch": 0.8383201125697762, "grad_norm": 0.0952632948756218, "learning_rate": 0.002, "loss": 2.3544, "step": 216860 }, { "epoch": 0.8383587697731595, "grad_norm": 0.10652051120996475, "learning_rate": 0.002, "loss": 2.3505, "step": 216870 }, { "epoch": 0.8383974269765428, "grad_norm": 0.11049710214138031, "learning_rate": 0.002, "loss": 2.3305, "step": 216880 }, { "epoch": 0.8384360841799261, "grad_norm": 0.12850986421108246, "learning_rate": 0.002, "loss": 2.3443, "step": 216890 }, { "epoch": 0.8384747413833094, "grad_norm": 0.0921587198972702, "learning_rate": 0.002, "loss": 2.3505, "step": 216900 }, { "epoch": 0.8385133985866926, "grad_norm": 0.1148732528090477, "learning_rate": 0.002, "loss": 2.3259, "step": 216910 }, { "epoch": 0.838552055790076, "grad_norm": 0.17332953214645386, "learning_rate": 0.002, "loss": 2.3468, "step": 216920 }, { "epoch": 0.8385907129934592, "grad_norm": 0.10540525615215302, "learning_rate": 0.002, "loss": 2.3294, "step": 216930 }, { "epoch": 0.8386293701968425, "grad_norm": 0.0966196283698082, "learning_rate": 0.002, "loss": 2.3336, "step": 216940 }, { "epoch": 0.8386680274002257, "grad_norm": 0.1091567873954773, "learning_rate": 0.002, "loss": 2.3307, "step": 216950 }, { "epoch": 0.8387066846036091, "grad_norm": 0.09678920358419418, "learning_rate": 0.002, "loss": 2.3452, "step": 216960 }, { "epoch": 0.8387453418069923, "grad_norm": 0.09217251092195511, "learning_rate": 0.002, "loss": 2.3509, "step": 216970 }, { "epoch": 0.8387839990103756, "grad_norm": 0.09982069581747055, "learning_rate": 0.002, "loss": 2.3437, "step": 216980 }, { "epoch": 0.8388226562137588, "grad_norm": 0.09687652438879013, "learning_rate": 0.002, "loss": 2.3316, "step": 216990 }, { "epoch": 0.8388613134171421, "grad_norm": 0.13824646174907684, "learning_rate": 0.002, "loss": 2.3375, "step": 217000 }, { "epoch": 0.8388999706205255, "grad_norm": 0.09609826654195786, "learning_rate": 0.002, "loss": 2.3459, "step": 217010 }, { "epoch": 0.8389386278239087, "grad_norm": 0.09718557447195053, "learning_rate": 0.002, "loss": 2.3499, "step": 217020 }, { "epoch": 0.838977285027292, "grad_norm": 0.10497701913118362, "learning_rate": 0.002, "loss": 2.3373, "step": 217030 }, { "epoch": 0.8390159422306752, "grad_norm": 0.10044204443693161, "learning_rate": 0.002, "loss": 2.3335, "step": 217040 }, { "epoch": 0.8390545994340586, "grad_norm": 0.10007092356681824, "learning_rate": 0.002, "loss": 2.3388, "step": 217050 }, { "epoch": 0.8390932566374418, "grad_norm": 0.11548102647066116, "learning_rate": 0.002, "loss": 2.3463, "step": 217060 }, { "epoch": 0.8391319138408251, "grad_norm": 0.12100327759981155, "learning_rate": 0.002, "loss": 2.3309, "step": 217070 }, { "epoch": 0.8391705710442083, "grad_norm": 0.10042814165353775, "learning_rate": 0.002, "loss": 2.3566, "step": 217080 }, { "epoch": 0.8392092282475917, "grad_norm": 0.09519599378108978, "learning_rate": 0.002, "loss": 2.3465, "step": 217090 }, { "epoch": 0.839247885450975, "grad_norm": 0.13729625940322876, "learning_rate": 0.002, "loss": 2.3377, "step": 217100 }, { "epoch": 0.8392865426543582, "grad_norm": 0.1120065376162529, "learning_rate": 0.002, "loss": 2.3349, "step": 217110 }, { "epoch": 0.8393251998577415, "grad_norm": 0.12437760829925537, "learning_rate": 0.002, "loss": 2.3239, "step": 217120 }, { "epoch": 0.8393638570611248, "grad_norm": 0.10527113825082779, "learning_rate": 0.002, "loss": 2.3465, "step": 217130 }, { "epoch": 0.8394025142645081, "grad_norm": 0.1128300353884697, "learning_rate": 0.002, "loss": 2.3428, "step": 217140 }, { "epoch": 0.8394411714678913, "grad_norm": 0.0989866778254509, "learning_rate": 0.002, "loss": 2.3512, "step": 217150 }, { "epoch": 0.8394798286712746, "grad_norm": 0.10943536460399628, "learning_rate": 0.002, "loss": 2.3259, "step": 217160 }, { "epoch": 0.8395184858746579, "grad_norm": 0.10708934813737869, "learning_rate": 0.002, "loss": 2.3444, "step": 217170 }, { "epoch": 0.8395571430780412, "grad_norm": 0.11243987083435059, "learning_rate": 0.002, "loss": 2.347, "step": 217180 }, { "epoch": 0.8395958002814244, "grad_norm": 0.11842658370733261, "learning_rate": 0.002, "loss": 2.3506, "step": 217190 }, { "epoch": 0.8396344574848077, "grad_norm": 0.10376089066267014, "learning_rate": 0.002, "loss": 2.3443, "step": 217200 }, { "epoch": 0.839673114688191, "grad_norm": 0.11659283190965652, "learning_rate": 0.002, "loss": 2.3255, "step": 217210 }, { "epoch": 0.8397117718915743, "grad_norm": 0.11655773967504501, "learning_rate": 0.002, "loss": 2.3445, "step": 217220 }, { "epoch": 0.8397504290949576, "grad_norm": 0.09950608760118484, "learning_rate": 0.002, "loss": 2.3341, "step": 217230 }, { "epoch": 0.8397890862983408, "grad_norm": 0.10458651185035706, "learning_rate": 0.002, "loss": 2.3255, "step": 217240 }, { "epoch": 0.8398277435017241, "grad_norm": 0.10697665065526962, "learning_rate": 0.002, "loss": 2.3433, "step": 217250 }, { "epoch": 0.8398664007051074, "grad_norm": 0.12223963439464569, "learning_rate": 0.002, "loss": 2.3367, "step": 217260 }, { "epoch": 0.8399050579084907, "grad_norm": 0.095971018075943, "learning_rate": 0.002, "loss": 2.3492, "step": 217270 }, { "epoch": 0.8399437151118739, "grad_norm": 0.09986023604869843, "learning_rate": 0.002, "loss": 2.3401, "step": 217280 }, { "epoch": 0.8399823723152572, "grad_norm": 0.10367639362812042, "learning_rate": 0.002, "loss": 2.3255, "step": 217290 }, { "epoch": 0.8400210295186405, "grad_norm": 0.10191434621810913, "learning_rate": 0.002, "loss": 2.3429, "step": 217300 }, { "epoch": 0.8400596867220238, "grad_norm": 0.10491587221622467, "learning_rate": 0.002, "loss": 2.3334, "step": 217310 }, { "epoch": 0.840098343925407, "grad_norm": 0.11406951397657394, "learning_rate": 0.002, "loss": 2.3205, "step": 217320 }, { "epoch": 0.8401370011287903, "grad_norm": 0.10647682100534439, "learning_rate": 0.002, "loss": 2.3396, "step": 217330 }, { "epoch": 0.8401756583321737, "grad_norm": 0.10654482245445251, "learning_rate": 0.002, "loss": 2.3357, "step": 217340 }, { "epoch": 0.8402143155355569, "grad_norm": 0.11024165153503418, "learning_rate": 0.002, "loss": 2.3496, "step": 217350 }, { "epoch": 0.8402529727389402, "grad_norm": 0.09606366604566574, "learning_rate": 0.002, "loss": 2.3365, "step": 217360 }, { "epoch": 0.8402916299423234, "grad_norm": 0.1414986103773117, "learning_rate": 0.002, "loss": 2.341, "step": 217370 }, { "epoch": 0.8403302871457067, "grad_norm": 0.09301739931106567, "learning_rate": 0.002, "loss": 2.3419, "step": 217380 }, { "epoch": 0.84036894434909, "grad_norm": 0.10734853148460388, "learning_rate": 0.002, "loss": 2.333, "step": 217390 }, { "epoch": 0.8404076015524733, "grad_norm": 0.09502530097961426, "learning_rate": 0.002, "loss": 2.3518, "step": 217400 }, { "epoch": 0.8404462587558565, "grad_norm": 0.09801766276359558, "learning_rate": 0.002, "loss": 2.3408, "step": 217410 }, { "epoch": 0.8404849159592398, "grad_norm": 0.10311899334192276, "learning_rate": 0.002, "loss": 2.327, "step": 217420 }, { "epoch": 0.8405235731626232, "grad_norm": 0.10635611414909363, "learning_rate": 0.002, "loss": 2.3423, "step": 217430 }, { "epoch": 0.8405622303660064, "grad_norm": 0.10635276883840561, "learning_rate": 0.002, "loss": 2.3267, "step": 217440 }, { "epoch": 0.8406008875693897, "grad_norm": 0.11130545288324356, "learning_rate": 0.002, "loss": 2.3598, "step": 217450 }, { "epoch": 0.8406395447727729, "grad_norm": 0.10718841850757599, "learning_rate": 0.002, "loss": 2.3447, "step": 217460 }, { "epoch": 0.8406782019761563, "grad_norm": 0.10781049728393555, "learning_rate": 0.002, "loss": 2.3479, "step": 217470 }, { "epoch": 0.8407168591795395, "grad_norm": 0.10921121388673782, "learning_rate": 0.002, "loss": 2.3438, "step": 217480 }, { "epoch": 0.8407555163829228, "grad_norm": 0.09522861242294312, "learning_rate": 0.002, "loss": 2.3349, "step": 217490 }, { "epoch": 0.840794173586306, "grad_norm": 0.10912470519542694, "learning_rate": 0.002, "loss": 2.348, "step": 217500 }, { "epoch": 0.8408328307896894, "grad_norm": 0.12018518149852753, "learning_rate": 0.002, "loss": 2.3339, "step": 217510 }, { "epoch": 0.8408714879930727, "grad_norm": 0.12558038532733917, "learning_rate": 0.002, "loss": 2.3392, "step": 217520 }, { "epoch": 0.8409101451964559, "grad_norm": 0.09982512146234512, "learning_rate": 0.002, "loss": 2.3388, "step": 217530 }, { "epoch": 0.8409488023998392, "grad_norm": 0.09959869831800461, "learning_rate": 0.002, "loss": 2.3336, "step": 217540 }, { "epoch": 0.8409874596032224, "grad_norm": 0.10776180773973465, "learning_rate": 0.002, "loss": 2.3418, "step": 217550 }, { "epoch": 0.8410261168066058, "grad_norm": 0.11372330039739609, "learning_rate": 0.002, "loss": 2.348, "step": 217560 }, { "epoch": 0.841064774009989, "grad_norm": 0.09761717915534973, "learning_rate": 0.002, "loss": 2.3348, "step": 217570 }, { "epoch": 0.8411034312133723, "grad_norm": 0.12191992998123169, "learning_rate": 0.002, "loss": 2.332, "step": 217580 }, { "epoch": 0.8411420884167555, "grad_norm": 0.09978660941123962, "learning_rate": 0.002, "loss": 2.3386, "step": 217590 }, { "epoch": 0.8411807456201389, "grad_norm": 0.1122688353061676, "learning_rate": 0.002, "loss": 2.3421, "step": 217600 }, { "epoch": 0.8412194028235221, "grad_norm": 0.0917871966958046, "learning_rate": 0.002, "loss": 2.3201, "step": 217610 }, { "epoch": 0.8412580600269054, "grad_norm": 0.11007138341665268, "learning_rate": 0.002, "loss": 2.3366, "step": 217620 }, { "epoch": 0.8412967172302886, "grad_norm": 0.11412589251995087, "learning_rate": 0.002, "loss": 2.3429, "step": 217630 }, { "epoch": 0.841335374433672, "grad_norm": 0.09647293388843536, "learning_rate": 0.002, "loss": 2.3481, "step": 217640 }, { "epoch": 0.8413740316370553, "grad_norm": 0.10458957403898239, "learning_rate": 0.002, "loss": 2.334, "step": 217650 }, { "epoch": 0.8414126888404385, "grad_norm": 0.09868065267801285, "learning_rate": 0.002, "loss": 2.3435, "step": 217660 }, { "epoch": 0.8414513460438218, "grad_norm": 0.1279451549053192, "learning_rate": 0.002, "loss": 2.3465, "step": 217670 }, { "epoch": 0.8414900032472051, "grad_norm": 0.10215198248624802, "learning_rate": 0.002, "loss": 2.3322, "step": 217680 }, { "epoch": 0.8415286604505884, "grad_norm": 0.10315291583538055, "learning_rate": 0.002, "loss": 2.3431, "step": 217690 }, { "epoch": 0.8415673176539716, "grad_norm": 0.11462391912937164, "learning_rate": 0.002, "loss": 2.3396, "step": 217700 }, { "epoch": 0.8416059748573549, "grad_norm": 0.09662192314863205, "learning_rate": 0.002, "loss": 2.3467, "step": 217710 }, { "epoch": 0.8416446320607383, "grad_norm": 0.11247068643569946, "learning_rate": 0.002, "loss": 2.3207, "step": 217720 }, { "epoch": 0.8416832892641215, "grad_norm": 0.10318507999181747, "learning_rate": 0.002, "loss": 2.3353, "step": 217730 }, { "epoch": 0.8417219464675048, "grad_norm": 0.1108061671257019, "learning_rate": 0.002, "loss": 2.3474, "step": 217740 }, { "epoch": 0.841760603670888, "grad_norm": 0.11808360368013382, "learning_rate": 0.002, "loss": 2.3187, "step": 217750 }, { "epoch": 0.8417992608742713, "grad_norm": 0.10940134525299072, "learning_rate": 0.002, "loss": 2.3371, "step": 217760 }, { "epoch": 0.8418379180776546, "grad_norm": 0.10000176727771759, "learning_rate": 0.002, "loss": 2.3317, "step": 217770 }, { "epoch": 0.8418765752810379, "grad_norm": 0.11437246203422546, "learning_rate": 0.002, "loss": 2.3481, "step": 217780 }, { "epoch": 0.8419152324844211, "grad_norm": 0.10316790640354156, "learning_rate": 0.002, "loss": 2.3397, "step": 217790 }, { "epoch": 0.8419538896878044, "grad_norm": 0.122736856341362, "learning_rate": 0.002, "loss": 2.3375, "step": 217800 }, { "epoch": 0.8419925468911877, "grad_norm": 0.09634695202112198, "learning_rate": 0.002, "loss": 2.3568, "step": 217810 }, { "epoch": 0.842031204094571, "grad_norm": 0.11506647616624832, "learning_rate": 0.002, "loss": 2.3504, "step": 217820 }, { "epoch": 0.8420698612979542, "grad_norm": 0.11156859248876572, "learning_rate": 0.002, "loss": 2.333, "step": 217830 }, { "epoch": 0.8421085185013375, "grad_norm": 0.11431318521499634, "learning_rate": 0.002, "loss": 2.3482, "step": 217840 }, { "epoch": 0.8421471757047209, "grad_norm": 0.10677983611822128, "learning_rate": 0.002, "loss": 2.3442, "step": 217850 }, { "epoch": 0.8421858329081041, "grad_norm": 0.1121184453368187, "learning_rate": 0.002, "loss": 2.3484, "step": 217860 }, { "epoch": 0.8422244901114874, "grad_norm": 0.11475158482789993, "learning_rate": 0.002, "loss": 2.3515, "step": 217870 }, { "epoch": 0.8422631473148706, "grad_norm": 0.10288957506418228, "learning_rate": 0.002, "loss": 2.3349, "step": 217880 }, { "epoch": 0.842301804518254, "grad_norm": 0.12002434581518173, "learning_rate": 0.002, "loss": 2.3395, "step": 217890 }, { "epoch": 0.8423404617216372, "grad_norm": 0.09990371018648148, "learning_rate": 0.002, "loss": 2.3381, "step": 217900 }, { "epoch": 0.8423791189250205, "grad_norm": 0.09702201932668686, "learning_rate": 0.002, "loss": 2.3358, "step": 217910 }, { "epoch": 0.8424177761284037, "grad_norm": 0.10541484504938126, "learning_rate": 0.002, "loss": 2.3354, "step": 217920 }, { "epoch": 0.842456433331787, "grad_norm": 0.24822331964969635, "learning_rate": 0.002, "loss": 2.345, "step": 217930 }, { "epoch": 0.8424950905351704, "grad_norm": 0.09540753811597824, "learning_rate": 0.002, "loss": 2.3478, "step": 217940 }, { "epoch": 0.8425337477385536, "grad_norm": 0.12220920622348785, "learning_rate": 0.002, "loss": 2.3241, "step": 217950 }, { "epoch": 0.8425724049419369, "grad_norm": 0.1137244924902916, "learning_rate": 0.002, "loss": 2.3345, "step": 217960 }, { "epoch": 0.8426110621453201, "grad_norm": 0.1189429759979248, "learning_rate": 0.002, "loss": 2.3482, "step": 217970 }, { "epoch": 0.8426497193487035, "grad_norm": 0.09206331521272659, "learning_rate": 0.002, "loss": 2.3498, "step": 217980 }, { "epoch": 0.8426883765520867, "grad_norm": 0.08543618023395538, "learning_rate": 0.002, "loss": 2.3397, "step": 217990 }, { "epoch": 0.84272703375547, "grad_norm": 0.13945125043392181, "learning_rate": 0.002, "loss": 2.3531, "step": 218000 }, { "epoch": 0.8427656909588532, "grad_norm": 0.12041846662759781, "learning_rate": 0.002, "loss": 2.3312, "step": 218010 }, { "epoch": 0.8428043481622366, "grad_norm": 0.10267645865678787, "learning_rate": 0.002, "loss": 2.347, "step": 218020 }, { "epoch": 0.8428430053656198, "grad_norm": 0.12095742672681808, "learning_rate": 0.002, "loss": 2.3366, "step": 218030 }, { "epoch": 0.8428816625690031, "grad_norm": 0.09274441003799438, "learning_rate": 0.002, "loss": 2.3355, "step": 218040 }, { "epoch": 0.8429203197723864, "grad_norm": 0.11805014312267303, "learning_rate": 0.002, "loss": 2.3346, "step": 218050 }, { "epoch": 0.8429589769757697, "grad_norm": 0.09663267433643341, "learning_rate": 0.002, "loss": 2.3349, "step": 218060 }, { "epoch": 0.842997634179153, "grad_norm": 0.10639258474111557, "learning_rate": 0.002, "loss": 2.3462, "step": 218070 }, { "epoch": 0.8430362913825362, "grad_norm": 0.09747055172920227, "learning_rate": 0.002, "loss": 2.3465, "step": 218080 }, { "epoch": 0.8430749485859195, "grad_norm": 0.12348346412181854, "learning_rate": 0.002, "loss": 2.3476, "step": 218090 }, { "epoch": 0.8431136057893028, "grad_norm": 0.10564039647579193, "learning_rate": 0.002, "loss": 2.3476, "step": 218100 }, { "epoch": 0.8431522629926861, "grad_norm": 0.10222464054822922, "learning_rate": 0.002, "loss": 2.3442, "step": 218110 }, { "epoch": 0.8431909201960693, "grad_norm": 0.11077496409416199, "learning_rate": 0.002, "loss": 2.3298, "step": 218120 }, { "epoch": 0.8432295773994526, "grad_norm": 0.10465149581432343, "learning_rate": 0.002, "loss": 2.3371, "step": 218130 }, { "epoch": 0.8432682346028358, "grad_norm": 0.09964410960674286, "learning_rate": 0.002, "loss": 2.334, "step": 218140 }, { "epoch": 0.8433068918062192, "grad_norm": 0.0931280329823494, "learning_rate": 0.002, "loss": 2.3364, "step": 218150 }, { "epoch": 0.8433455490096025, "grad_norm": 0.12744703888893127, "learning_rate": 0.002, "loss": 2.3343, "step": 218160 }, { "epoch": 0.8433842062129857, "grad_norm": 0.10196051001548767, "learning_rate": 0.002, "loss": 2.3453, "step": 218170 }, { "epoch": 0.843422863416369, "grad_norm": 0.11853543668985367, "learning_rate": 0.002, "loss": 2.3474, "step": 218180 }, { "epoch": 0.8434615206197523, "grad_norm": 0.1072535440325737, "learning_rate": 0.002, "loss": 2.3448, "step": 218190 }, { "epoch": 0.8435001778231356, "grad_norm": 0.12971144914627075, "learning_rate": 0.002, "loss": 2.3388, "step": 218200 }, { "epoch": 0.8435388350265188, "grad_norm": 0.10555911064147949, "learning_rate": 0.002, "loss": 2.3463, "step": 218210 }, { "epoch": 0.8435774922299021, "grad_norm": 0.12876932322978973, "learning_rate": 0.002, "loss": 2.3351, "step": 218220 }, { "epoch": 0.8436161494332854, "grad_norm": 0.10753574967384338, "learning_rate": 0.002, "loss": 2.3515, "step": 218230 }, { "epoch": 0.8436548066366687, "grad_norm": 0.10081668198108673, "learning_rate": 0.002, "loss": 2.3373, "step": 218240 }, { "epoch": 0.843693463840052, "grad_norm": 0.09541888535022736, "learning_rate": 0.002, "loss": 2.3171, "step": 218250 }, { "epoch": 0.8437321210434352, "grad_norm": 0.12228714674711227, "learning_rate": 0.002, "loss": 2.3555, "step": 218260 }, { "epoch": 0.8437707782468186, "grad_norm": 0.09093743562698364, "learning_rate": 0.002, "loss": 2.3226, "step": 218270 }, { "epoch": 0.8438094354502018, "grad_norm": 0.12039365619421005, "learning_rate": 0.002, "loss": 2.3444, "step": 218280 }, { "epoch": 0.8438480926535851, "grad_norm": 0.10386831313371658, "learning_rate": 0.002, "loss": 2.3466, "step": 218290 }, { "epoch": 0.8438867498569683, "grad_norm": 0.10784223675727844, "learning_rate": 0.002, "loss": 2.3277, "step": 218300 }, { "epoch": 0.8439254070603516, "grad_norm": 0.10106562077999115, "learning_rate": 0.002, "loss": 2.3422, "step": 218310 }, { "epoch": 0.8439640642637349, "grad_norm": 0.09477700293064117, "learning_rate": 0.002, "loss": 2.3331, "step": 218320 }, { "epoch": 0.8440027214671182, "grad_norm": 0.09081584215164185, "learning_rate": 0.002, "loss": 2.3316, "step": 218330 }, { "epoch": 0.8440413786705014, "grad_norm": 0.09890346229076385, "learning_rate": 0.002, "loss": 2.337, "step": 218340 }, { "epoch": 0.8440800358738847, "grad_norm": 0.0994071513414383, "learning_rate": 0.002, "loss": 2.3577, "step": 218350 }, { "epoch": 0.8441186930772681, "grad_norm": 0.09795328974723816, "learning_rate": 0.002, "loss": 2.3386, "step": 218360 }, { "epoch": 0.8441573502806513, "grad_norm": 0.10990192741155624, "learning_rate": 0.002, "loss": 2.3367, "step": 218370 }, { "epoch": 0.8441960074840346, "grad_norm": 0.1020970419049263, "learning_rate": 0.002, "loss": 2.3367, "step": 218380 }, { "epoch": 0.8442346646874178, "grad_norm": 0.12286726385354996, "learning_rate": 0.002, "loss": 2.336, "step": 218390 }, { "epoch": 0.8442733218908012, "grad_norm": 0.10366500914096832, "learning_rate": 0.002, "loss": 2.3396, "step": 218400 }, { "epoch": 0.8443119790941844, "grad_norm": 0.09809817373752594, "learning_rate": 0.002, "loss": 2.3456, "step": 218410 }, { "epoch": 0.8443506362975677, "grad_norm": 0.09430515021085739, "learning_rate": 0.002, "loss": 2.3303, "step": 218420 }, { "epoch": 0.8443892935009509, "grad_norm": 0.10951139777898788, "learning_rate": 0.002, "loss": 2.3349, "step": 218430 }, { "epoch": 0.8444279507043343, "grad_norm": 0.1111026182770729, "learning_rate": 0.002, "loss": 2.3523, "step": 218440 }, { "epoch": 0.8444666079077175, "grad_norm": 0.10476387292146683, "learning_rate": 0.002, "loss": 2.323, "step": 218450 }, { "epoch": 0.8445052651111008, "grad_norm": 0.09188532829284668, "learning_rate": 0.002, "loss": 2.3449, "step": 218460 }, { "epoch": 0.844543922314484, "grad_norm": 0.10570862889289856, "learning_rate": 0.002, "loss": 2.3492, "step": 218470 }, { "epoch": 0.8445825795178673, "grad_norm": 0.10107731819152832, "learning_rate": 0.002, "loss": 2.3278, "step": 218480 }, { "epoch": 0.8446212367212507, "grad_norm": 0.1053171381354332, "learning_rate": 0.002, "loss": 2.3435, "step": 218490 }, { "epoch": 0.8446598939246339, "grad_norm": 0.11416930705308914, "learning_rate": 0.002, "loss": 2.3435, "step": 218500 }, { "epoch": 0.8446985511280172, "grad_norm": 0.10873345285654068, "learning_rate": 0.002, "loss": 2.334, "step": 218510 }, { "epoch": 0.8447372083314004, "grad_norm": 0.11997786164283752, "learning_rate": 0.002, "loss": 2.3354, "step": 218520 }, { "epoch": 0.8447758655347838, "grad_norm": 0.09660268574953079, "learning_rate": 0.002, "loss": 2.3232, "step": 218530 }, { "epoch": 0.844814522738167, "grad_norm": 0.12605643272399902, "learning_rate": 0.002, "loss": 2.3426, "step": 218540 }, { "epoch": 0.8448531799415503, "grad_norm": 0.12503686547279358, "learning_rate": 0.002, "loss": 2.3374, "step": 218550 }, { "epoch": 0.8448918371449335, "grad_norm": 0.09812545776367188, "learning_rate": 0.002, "loss": 2.3526, "step": 218560 }, { "epoch": 0.8449304943483169, "grad_norm": 0.1045181006193161, "learning_rate": 0.002, "loss": 2.3446, "step": 218570 }, { "epoch": 0.8449691515517002, "grad_norm": 0.09139660745859146, "learning_rate": 0.002, "loss": 2.3304, "step": 218580 }, { "epoch": 0.8450078087550834, "grad_norm": 0.11241042613983154, "learning_rate": 0.002, "loss": 2.3324, "step": 218590 }, { "epoch": 0.8450464659584667, "grad_norm": 0.11408902704715729, "learning_rate": 0.002, "loss": 2.3457, "step": 218600 }, { "epoch": 0.84508512316185, "grad_norm": 0.09838565438985825, "learning_rate": 0.002, "loss": 2.3428, "step": 218610 }, { "epoch": 0.8451237803652333, "grad_norm": 0.09781533479690552, "learning_rate": 0.002, "loss": 2.3406, "step": 218620 }, { "epoch": 0.8451624375686165, "grad_norm": 0.09544527530670166, "learning_rate": 0.002, "loss": 2.3355, "step": 218630 }, { "epoch": 0.8452010947719998, "grad_norm": 0.10308004170656204, "learning_rate": 0.002, "loss": 2.3433, "step": 218640 }, { "epoch": 0.8452397519753831, "grad_norm": 0.11524269729852676, "learning_rate": 0.002, "loss": 2.3512, "step": 218650 }, { "epoch": 0.8452784091787664, "grad_norm": 0.15980708599090576, "learning_rate": 0.002, "loss": 2.3477, "step": 218660 }, { "epoch": 0.8453170663821497, "grad_norm": 0.11850898712873459, "learning_rate": 0.002, "loss": 2.3514, "step": 218670 }, { "epoch": 0.8453557235855329, "grad_norm": 0.12017089873552322, "learning_rate": 0.002, "loss": 2.3234, "step": 218680 }, { "epoch": 0.8453943807889162, "grad_norm": 0.11325656622648239, "learning_rate": 0.002, "loss": 2.364, "step": 218690 }, { "epoch": 0.8454330379922995, "grad_norm": 0.10288386791944504, "learning_rate": 0.002, "loss": 2.3324, "step": 218700 }, { "epoch": 0.8454716951956828, "grad_norm": 0.10763750225305557, "learning_rate": 0.002, "loss": 2.348, "step": 218710 }, { "epoch": 0.845510352399066, "grad_norm": 0.12071944773197174, "learning_rate": 0.002, "loss": 2.3232, "step": 218720 }, { "epoch": 0.8455490096024493, "grad_norm": 0.10066035389900208, "learning_rate": 0.002, "loss": 2.3435, "step": 218730 }, { "epoch": 0.8455876668058326, "grad_norm": 0.11219926178455353, "learning_rate": 0.002, "loss": 2.3359, "step": 218740 }, { "epoch": 0.8456263240092159, "grad_norm": 0.09474202990531921, "learning_rate": 0.002, "loss": 2.3456, "step": 218750 }, { "epoch": 0.8456649812125991, "grad_norm": 0.11808373034000397, "learning_rate": 0.002, "loss": 2.3422, "step": 218760 }, { "epoch": 0.8457036384159824, "grad_norm": 0.10525443404912949, "learning_rate": 0.002, "loss": 2.3322, "step": 218770 }, { "epoch": 0.8457422956193658, "grad_norm": 0.09398797154426575, "learning_rate": 0.002, "loss": 2.3388, "step": 218780 }, { "epoch": 0.845780952822749, "grad_norm": 0.11471854150295258, "learning_rate": 0.002, "loss": 2.3423, "step": 218790 }, { "epoch": 0.8458196100261323, "grad_norm": 0.10278965532779694, "learning_rate": 0.002, "loss": 2.3337, "step": 218800 }, { "epoch": 0.8458582672295155, "grad_norm": 0.10400038957595825, "learning_rate": 0.002, "loss": 2.3323, "step": 218810 }, { "epoch": 0.8458969244328989, "grad_norm": 0.09779319167137146, "learning_rate": 0.002, "loss": 2.345, "step": 218820 }, { "epoch": 0.8459355816362821, "grad_norm": 0.13267405331134796, "learning_rate": 0.002, "loss": 2.3356, "step": 218830 }, { "epoch": 0.8459742388396654, "grad_norm": 0.10132269561290741, "learning_rate": 0.002, "loss": 2.3302, "step": 218840 }, { "epoch": 0.8460128960430486, "grad_norm": 0.10761400312185287, "learning_rate": 0.002, "loss": 2.3349, "step": 218850 }, { "epoch": 0.8460515532464319, "grad_norm": 0.11506160348653793, "learning_rate": 0.002, "loss": 2.3388, "step": 218860 }, { "epoch": 0.8460902104498152, "grad_norm": 0.09911599010229111, "learning_rate": 0.002, "loss": 2.3471, "step": 218870 }, { "epoch": 0.8461288676531985, "grad_norm": 0.09980878233909607, "learning_rate": 0.002, "loss": 2.3334, "step": 218880 }, { "epoch": 0.8461675248565818, "grad_norm": 0.12252838164567947, "learning_rate": 0.002, "loss": 2.3298, "step": 218890 }, { "epoch": 0.846206182059965, "grad_norm": 0.10865668207406998, "learning_rate": 0.002, "loss": 2.3334, "step": 218900 }, { "epoch": 0.8462448392633484, "grad_norm": 0.08866766840219498, "learning_rate": 0.002, "loss": 2.3433, "step": 218910 }, { "epoch": 0.8462834964667316, "grad_norm": 0.1000230684876442, "learning_rate": 0.002, "loss": 2.3373, "step": 218920 }, { "epoch": 0.8463221536701149, "grad_norm": 0.09783598780632019, "learning_rate": 0.002, "loss": 2.34, "step": 218930 }, { "epoch": 0.8463608108734981, "grad_norm": 0.09208564460277557, "learning_rate": 0.002, "loss": 2.3387, "step": 218940 }, { "epoch": 0.8463994680768815, "grad_norm": 0.11278524994850159, "learning_rate": 0.002, "loss": 2.3333, "step": 218950 }, { "epoch": 0.8464381252802647, "grad_norm": 0.10701495409011841, "learning_rate": 0.002, "loss": 2.3419, "step": 218960 }, { "epoch": 0.846476782483648, "grad_norm": 0.09828449040651321, "learning_rate": 0.002, "loss": 2.3385, "step": 218970 }, { "epoch": 0.8465154396870312, "grad_norm": 0.11303871124982834, "learning_rate": 0.002, "loss": 2.3301, "step": 218980 }, { "epoch": 0.8465540968904146, "grad_norm": 0.12167762964963913, "learning_rate": 0.002, "loss": 2.3455, "step": 218990 }, { "epoch": 0.8465927540937979, "grad_norm": 0.11140464246273041, "learning_rate": 0.002, "loss": 2.34, "step": 219000 }, { "epoch": 0.8466314112971811, "grad_norm": 0.10240872204303741, "learning_rate": 0.002, "loss": 2.3177, "step": 219010 }, { "epoch": 0.8466700685005644, "grad_norm": 0.11569183319807053, "learning_rate": 0.002, "loss": 2.3411, "step": 219020 }, { "epoch": 0.8467087257039477, "grad_norm": 0.118963323533535, "learning_rate": 0.002, "loss": 2.363, "step": 219030 }, { "epoch": 0.846747382907331, "grad_norm": 0.09310389310121536, "learning_rate": 0.002, "loss": 2.3424, "step": 219040 }, { "epoch": 0.8467860401107142, "grad_norm": 0.10494721680879593, "learning_rate": 0.002, "loss": 2.33, "step": 219050 }, { "epoch": 0.8468246973140975, "grad_norm": 0.10457353293895721, "learning_rate": 0.002, "loss": 2.3564, "step": 219060 }, { "epoch": 0.8468633545174807, "grad_norm": 0.11794960498809814, "learning_rate": 0.002, "loss": 2.3325, "step": 219070 }, { "epoch": 0.8469020117208641, "grad_norm": 0.11988719552755356, "learning_rate": 0.002, "loss": 2.3451, "step": 219080 }, { "epoch": 0.8469406689242474, "grad_norm": 0.12246638536453247, "learning_rate": 0.002, "loss": 2.3349, "step": 219090 }, { "epoch": 0.8469793261276306, "grad_norm": 0.10058612376451492, "learning_rate": 0.002, "loss": 2.342, "step": 219100 }, { "epoch": 0.8470179833310139, "grad_norm": 0.10365330427885056, "learning_rate": 0.002, "loss": 2.348, "step": 219110 }, { "epoch": 0.8470566405343972, "grad_norm": 0.10714240372180939, "learning_rate": 0.002, "loss": 2.3187, "step": 219120 }, { "epoch": 0.8470952977377805, "grad_norm": 0.10079375654459, "learning_rate": 0.002, "loss": 2.3281, "step": 219130 }, { "epoch": 0.8471339549411637, "grad_norm": 0.10422948002815247, "learning_rate": 0.002, "loss": 2.3402, "step": 219140 }, { "epoch": 0.847172612144547, "grad_norm": 0.15729708969593048, "learning_rate": 0.002, "loss": 2.3161, "step": 219150 }, { "epoch": 0.8472112693479303, "grad_norm": 0.09796754270792007, "learning_rate": 0.002, "loss": 2.3263, "step": 219160 }, { "epoch": 0.8472499265513136, "grad_norm": 0.09482486546039581, "learning_rate": 0.002, "loss": 2.3381, "step": 219170 }, { "epoch": 0.8472885837546968, "grad_norm": 0.0921812355518341, "learning_rate": 0.002, "loss": 2.3334, "step": 219180 }, { "epoch": 0.8473272409580801, "grad_norm": 0.09821398556232452, "learning_rate": 0.002, "loss": 2.3462, "step": 219190 }, { "epoch": 0.8473658981614635, "grad_norm": 0.10597255825996399, "learning_rate": 0.002, "loss": 2.347, "step": 219200 }, { "epoch": 0.8474045553648467, "grad_norm": 0.10136741399765015, "learning_rate": 0.002, "loss": 2.3483, "step": 219210 }, { "epoch": 0.84744321256823, "grad_norm": 0.10990548878908157, "learning_rate": 0.002, "loss": 2.344, "step": 219220 }, { "epoch": 0.8474818697716132, "grad_norm": 0.12417969852685928, "learning_rate": 0.002, "loss": 2.3298, "step": 219230 }, { "epoch": 0.8475205269749965, "grad_norm": 0.17158718407154083, "learning_rate": 0.002, "loss": 2.3453, "step": 219240 }, { "epoch": 0.8475591841783798, "grad_norm": 0.10663104057312012, "learning_rate": 0.002, "loss": 2.346, "step": 219250 }, { "epoch": 0.8475978413817631, "grad_norm": 0.09438648074865341, "learning_rate": 0.002, "loss": 2.3395, "step": 219260 }, { "epoch": 0.8476364985851463, "grad_norm": 0.09565073251724243, "learning_rate": 0.002, "loss": 2.3357, "step": 219270 }, { "epoch": 0.8476751557885296, "grad_norm": 0.11593829095363617, "learning_rate": 0.002, "loss": 2.331, "step": 219280 }, { "epoch": 0.847713812991913, "grad_norm": 0.10374920070171356, "learning_rate": 0.002, "loss": 2.3379, "step": 219290 }, { "epoch": 0.8477524701952962, "grad_norm": 0.1203596442937851, "learning_rate": 0.002, "loss": 2.3435, "step": 219300 }, { "epoch": 0.8477911273986795, "grad_norm": 0.1203400269150734, "learning_rate": 0.002, "loss": 2.3356, "step": 219310 }, { "epoch": 0.8478297846020627, "grad_norm": 0.09964600205421448, "learning_rate": 0.002, "loss": 2.3458, "step": 219320 }, { "epoch": 0.8478684418054461, "grad_norm": 0.1544189155101776, "learning_rate": 0.002, "loss": 2.3363, "step": 219330 }, { "epoch": 0.8479070990088293, "grad_norm": 0.09774701297283173, "learning_rate": 0.002, "loss": 2.3451, "step": 219340 }, { "epoch": 0.8479457562122126, "grad_norm": 0.09581021964550018, "learning_rate": 0.002, "loss": 2.3497, "step": 219350 }, { "epoch": 0.8479844134155958, "grad_norm": 0.0978235974907875, "learning_rate": 0.002, "loss": 2.3456, "step": 219360 }, { "epoch": 0.8480230706189792, "grad_norm": 0.2410297989845276, "learning_rate": 0.002, "loss": 2.3325, "step": 219370 }, { "epoch": 0.8480617278223624, "grad_norm": 0.10258159786462784, "learning_rate": 0.002, "loss": 2.3547, "step": 219380 }, { "epoch": 0.8481003850257457, "grad_norm": 0.11392287164926529, "learning_rate": 0.002, "loss": 2.3557, "step": 219390 }, { "epoch": 0.848139042229129, "grad_norm": 0.10421387106180191, "learning_rate": 0.002, "loss": 2.3487, "step": 219400 }, { "epoch": 0.8481776994325122, "grad_norm": 0.11930263787508011, "learning_rate": 0.002, "loss": 2.3613, "step": 219410 }, { "epoch": 0.8482163566358956, "grad_norm": 0.11117282509803772, "learning_rate": 0.002, "loss": 2.3419, "step": 219420 }, { "epoch": 0.8482550138392788, "grad_norm": 0.10350653529167175, "learning_rate": 0.002, "loss": 2.3284, "step": 219430 }, { "epoch": 0.8482936710426621, "grad_norm": 0.10714832693338394, "learning_rate": 0.002, "loss": 2.346, "step": 219440 }, { "epoch": 0.8483323282460453, "grad_norm": 0.09527911245822906, "learning_rate": 0.002, "loss": 2.3528, "step": 219450 }, { "epoch": 0.8483709854494287, "grad_norm": 0.10346433520317078, "learning_rate": 0.002, "loss": 2.3307, "step": 219460 }, { "epoch": 0.8484096426528119, "grad_norm": 0.13404396176338196, "learning_rate": 0.002, "loss": 2.3395, "step": 219470 }, { "epoch": 0.8484482998561952, "grad_norm": 0.10561177879571915, "learning_rate": 0.002, "loss": 2.3299, "step": 219480 }, { "epoch": 0.8484869570595784, "grad_norm": 0.10402088612318039, "learning_rate": 0.002, "loss": 2.3465, "step": 219490 }, { "epoch": 0.8485256142629618, "grad_norm": 0.1081523671746254, "learning_rate": 0.002, "loss": 2.349, "step": 219500 }, { "epoch": 0.848564271466345, "grad_norm": 0.10165558010339737, "learning_rate": 0.002, "loss": 2.3342, "step": 219510 }, { "epoch": 0.8486029286697283, "grad_norm": 0.10864666104316711, "learning_rate": 0.002, "loss": 2.3323, "step": 219520 }, { "epoch": 0.8486415858731116, "grad_norm": 0.11919961124658585, "learning_rate": 0.002, "loss": 2.3313, "step": 219530 }, { "epoch": 0.8486802430764949, "grad_norm": 0.10787352174520493, "learning_rate": 0.002, "loss": 2.3555, "step": 219540 }, { "epoch": 0.8487189002798782, "grad_norm": 0.09709031134843826, "learning_rate": 0.002, "loss": 2.3334, "step": 219550 }, { "epoch": 0.8487575574832614, "grad_norm": 0.11438827961683273, "learning_rate": 0.002, "loss": 2.3562, "step": 219560 }, { "epoch": 0.8487962146866447, "grad_norm": 0.20422634482383728, "learning_rate": 0.002, "loss": 2.3408, "step": 219570 }, { "epoch": 0.848834871890028, "grad_norm": 0.104178287088871, "learning_rate": 0.002, "loss": 2.371, "step": 219580 }, { "epoch": 0.8488735290934113, "grad_norm": 0.10315261781215668, "learning_rate": 0.002, "loss": 2.3414, "step": 219590 }, { "epoch": 0.8489121862967945, "grad_norm": 0.1082720160484314, "learning_rate": 0.002, "loss": 2.346, "step": 219600 }, { "epoch": 0.8489508435001778, "grad_norm": 0.11481138318777084, "learning_rate": 0.002, "loss": 2.351, "step": 219610 }, { "epoch": 0.848989500703561, "grad_norm": 0.09659004211425781, "learning_rate": 0.002, "loss": 2.3284, "step": 219620 }, { "epoch": 0.8490281579069444, "grad_norm": 0.10843570530414581, "learning_rate": 0.002, "loss": 2.3453, "step": 219630 }, { "epoch": 0.8490668151103277, "grad_norm": 0.10417255759239197, "learning_rate": 0.002, "loss": 2.3441, "step": 219640 }, { "epoch": 0.8491054723137109, "grad_norm": 0.09357796609401703, "learning_rate": 0.002, "loss": 2.3452, "step": 219650 }, { "epoch": 0.8491441295170942, "grad_norm": 0.10457409173250198, "learning_rate": 0.002, "loss": 2.3159, "step": 219660 }, { "epoch": 0.8491827867204775, "grad_norm": 0.10815020650625229, "learning_rate": 0.002, "loss": 2.335, "step": 219670 }, { "epoch": 0.8492214439238608, "grad_norm": 0.09615054726600647, "learning_rate": 0.002, "loss": 2.3417, "step": 219680 }, { "epoch": 0.849260101127244, "grad_norm": 0.1047089621424675, "learning_rate": 0.002, "loss": 2.347, "step": 219690 }, { "epoch": 0.8492987583306273, "grad_norm": 0.1393386870622635, "learning_rate": 0.002, "loss": 2.3428, "step": 219700 }, { "epoch": 0.8493374155340107, "grad_norm": 0.12716567516326904, "learning_rate": 0.002, "loss": 2.3438, "step": 219710 }, { "epoch": 0.8493760727373939, "grad_norm": 0.14259745180606842, "learning_rate": 0.002, "loss": 2.3393, "step": 219720 }, { "epoch": 0.8494147299407772, "grad_norm": 0.11295412480831146, "learning_rate": 0.002, "loss": 2.3308, "step": 219730 }, { "epoch": 0.8494533871441604, "grad_norm": 0.11027267575263977, "learning_rate": 0.002, "loss": 2.3418, "step": 219740 }, { "epoch": 0.8494920443475438, "grad_norm": 0.11603856086730957, "learning_rate": 0.002, "loss": 2.3367, "step": 219750 }, { "epoch": 0.849530701550927, "grad_norm": 0.10948418080806732, "learning_rate": 0.002, "loss": 2.3548, "step": 219760 }, { "epoch": 0.8495693587543103, "grad_norm": 0.12260384112596512, "learning_rate": 0.002, "loss": 2.3471, "step": 219770 }, { "epoch": 0.8496080159576935, "grad_norm": 0.4248383343219757, "learning_rate": 0.002, "loss": 2.3591, "step": 219780 }, { "epoch": 0.8496466731610768, "grad_norm": 0.10164932906627655, "learning_rate": 0.002, "loss": 2.3327, "step": 219790 }, { "epoch": 0.8496853303644601, "grad_norm": 1.1660302877426147, "learning_rate": 0.002, "loss": 2.3532, "step": 219800 }, { "epoch": 0.8497239875678434, "grad_norm": 0.10743222385644913, "learning_rate": 0.002, "loss": 2.3484, "step": 219810 }, { "epoch": 0.8497626447712266, "grad_norm": 0.11784674972295761, "learning_rate": 0.002, "loss": 2.3586, "step": 219820 }, { "epoch": 0.8498013019746099, "grad_norm": 0.11492542922496796, "learning_rate": 0.002, "loss": 2.3579, "step": 219830 }, { "epoch": 0.8498399591779933, "grad_norm": 0.09285452216863632, "learning_rate": 0.002, "loss": 2.3515, "step": 219840 }, { "epoch": 0.8498786163813765, "grad_norm": 0.1043357327580452, "learning_rate": 0.002, "loss": 2.3304, "step": 219850 }, { "epoch": 0.8499172735847598, "grad_norm": 0.1173747256398201, "learning_rate": 0.002, "loss": 2.339, "step": 219860 }, { "epoch": 0.849955930788143, "grad_norm": 0.12000737339258194, "learning_rate": 0.002, "loss": 2.3485, "step": 219870 }, { "epoch": 0.8499945879915264, "grad_norm": 0.0965653732419014, "learning_rate": 0.002, "loss": 2.3437, "step": 219880 }, { "epoch": 0.8500332451949096, "grad_norm": 0.09841306507587433, "learning_rate": 0.002, "loss": 2.3332, "step": 219890 }, { "epoch": 0.8500719023982929, "grad_norm": 0.09404002130031586, "learning_rate": 0.002, "loss": 2.3456, "step": 219900 }, { "epoch": 0.8501105596016761, "grad_norm": 0.09891854971647263, "learning_rate": 0.002, "loss": 2.3303, "step": 219910 }, { "epoch": 0.8501492168050595, "grad_norm": 0.10649807751178741, "learning_rate": 0.002, "loss": 2.3436, "step": 219920 }, { "epoch": 0.8501878740084428, "grad_norm": 0.10601424425840378, "learning_rate": 0.002, "loss": 2.3291, "step": 219930 }, { "epoch": 0.850226531211826, "grad_norm": 0.10602995753288269, "learning_rate": 0.002, "loss": 2.3512, "step": 219940 }, { "epoch": 0.8502651884152093, "grad_norm": 0.12841784954071045, "learning_rate": 0.002, "loss": 2.3458, "step": 219950 }, { "epoch": 0.8503038456185925, "grad_norm": 0.10099924355745316, "learning_rate": 0.002, "loss": 2.3527, "step": 219960 }, { "epoch": 0.8503425028219759, "grad_norm": 0.1031048521399498, "learning_rate": 0.002, "loss": 2.3575, "step": 219970 }, { "epoch": 0.8503811600253591, "grad_norm": 0.12088562548160553, "learning_rate": 0.002, "loss": 2.3421, "step": 219980 }, { "epoch": 0.8504198172287424, "grad_norm": 0.10422363132238388, "learning_rate": 0.002, "loss": 2.3366, "step": 219990 }, { "epoch": 0.8504584744321256, "grad_norm": 0.10978923738002777, "learning_rate": 0.002, "loss": 2.3379, "step": 220000 }, { "epoch": 0.850497131635509, "grad_norm": 0.09470580518245697, "learning_rate": 0.002, "loss": 2.3402, "step": 220010 }, { "epoch": 0.8505357888388922, "grad_norm": 0.10753884166479111, "learning_rate": 0.002, "loss": 2.3533, "step": 220020 }, { "epoch": 0.8505744460422755, "grad_norm": 0.10996290296316147, "learning_rate": 0.002, "loss": 2.3436, "step": 220030 }, { "epoch": 0.8506131032456588, "grad_norm": 0.11503701657056808, "learning_rate": 0.002, "loss": 2.3473, "step": 220040 }, { "epoch": 0.8506517604490421, "grad_norm": 0.11009927839040756, "learning_rate": 0.002, "loss": 2.3367, "step": 220050 }, { "epoch": 0.8506904176524254, "grad_norm": 0.11286523938179016, "learning_rate": 0.002, "loss": 2.3383, "step": 220060 }, { "epoch": 0.8507290748558086, "grad_norm": 0.1181272491812706, "learning_rate": 0.002, "loss": 2.3622, "step": 220070 }, { "epoch": 0.8507677320591919, "grad_norm": 0.10894773155450821, "learning_rate": 0.002, "loss": 2.3381, "step": 220080 }, { "epoch": 0.8508063892625752, "grad_norm": 0.10753612220287323, "learning_rate": 0.002, "loss": 2.3418, "step": 220090 }, { "epoch": 0.8508450464659585, "grad_norm": 0.11073257774114609, "learning_rate": 0.002, "loss": 2.3382, "step": 220100 }, { "epoch": 0.8508837036693417, "grad_norm": 0.09844937920570374, "learning_rate": 0.002, "loss": 2.3333, "step": 220110 }, { "epoch": 0.850922360872725, "grad_norm": 0.09576030820608139, "learning_rate": 0.002, "loss": 2.3512, "step": 220120 }, { "epoch": 0.8509610180761084, "grad_norm": 0.107846699655056, "learning_rate": 0.002, "loss": 2.3453, "step": 220130 }, { "epoch": 0.8509996752794916, "grad_norm": 0.10430942475795746, "learning_rate": 0.002, "loss": 2.3451, "step": 220140 }, { "epoch": 0.8510383324828749, "grad_norm": 0.09915446490049362, "learning_rate": 0.002, "loss": 2.3558, "step": 220150 }, { "epoch": 0.8510769896862581, "grad_norm": 0.12929920852184296, "learning_rate": 0.002, "loss": 2.3463, "step": 220160 }, { "epoch": 0.8511156468896414, "grad_norm": 0.10357212275266647, "learning_rate": 0.002, "loss": 2.3354, "step": 220170 }, { "epoch": 0.8511543040930247, "grad_norm": 0.1053905338048935, "learning_rate": 0.002, "loss": 2.3639, "step": 220180 }, { "epoch": 0.851192961296408, "grad_norm": 0.11608556658029556, "learning_rate": 0.002, "loss": 2.3449, "step": 220190 }, { "epoch": 0.8512316184997912, "grad_norm": 0.11105789989233017, "learning_rate": 0.002, "loss": 2.3378, "step": 220200 }, { "epoch": 0.8512702757031745, "grad_norm": 0.11207444965839386, "learning_rate": 0.002, "loss": 2.3323, "step": 220210 }, { "epoch": 0.8513089329065578, "grad_norm": 0.1263352632522583, "learning_rate": 0.002, "loss": 2.3518, "step": 220220 }, { "epoch": 0.8513475901099411, "grad_norm": 0.0969938337802887, "learning_rate": 0.002, "loss": 2.3499, "step": 220230 }, { "epoch": 0.8513862473133244, "grad_norm": 0.09842877835035324, "learning_rate": 0.002, "loss": 2.3529, "step": 220240 }, { "epoch": 0.8514249045167076, "grad_norm": 0.09710202366113663, "learning_rate": 0.002, "loss": 2.327, "step": 220250 }, { "epoch": 0.851463561720091, "grad_norm": 0.10513780266046524, "learning_rate": 0.002, "loss": 2.3488, "step": 220260 }, { "epoch": 0.8515022189234742, "grad_norm": 0.10045460611581802, "learning_rate": 0.002, "loss": 2.3426, "step": 220270 }, { "epoch": 0.8515408761268575, "grad_norm": 0.10101631283760071, "learning_rate": 0.002, "loss": 2.3422, "step": 220280 }, { "epoch": 0.8515795333302407, "grad_norm": 0.09537997096776962, "learning_rate": 0.002, "loss": 2.3314, "step": 220290 }, { "epoch": 0.8516181905336241, "grad_norm": 0.10045386105775833, "learning_rate": 0.002, "loss": 2.3367, "step": 220300 }, { "epoch": 0.8516568477370073, "grad_norm": 0.12381569296121597, "learning_rate": 0.002, "loss": 2.335, "step": 220310 }, { "epoch": 0.8516955049403906, "grad_norm": 0.08416225016117096, "learning_rate": 0.002, "loss": 2.3472, "step": 220320 }, { "epoch": 0.8517341621437738, "grad_norm": 0.09276917576789856, "learning_rate": 0.002, "loss": 2.3549, "step": 220330 }, { "epoch": 0.8517728193471571, "grad_norm": 0.10120249539613724, "learning_rate": 0.002, "loss": 2.3421, "step": 220340 }, { "epoch": 0.8518114765505405, "grad_norm": 0.10174285620450974, "learning_rate": 0.002, "loss": 2.3367, "step": 220350 }, { "epoch": 0.8518501337539237, "grad_norm": 0.09552405774593353, "learning_rate": 0.002, "loss": 2.3453, "step": 220360 }, { "epoch": 0.851888790957307, "grad_norm": 0.09580851346254349, "learning_rate": 0.002, "loss": 2.3398, "step": 220370 }, { "epoch": 0.8519274481606902, "grad_norm": 0.1014307513833046, "learning_rate": 0.002, "loss": 2.347, "step": 220380 }, { "epoch": 0.8519661053640736, "grad_norm": 0.11779122799634933, "learning_rate": 0.002, "loss": 2.33, "step": 220390 }, { "epoch": 0.8520047625674568, "grad_norm": 0.11027638614177704, "learning_rate": 0.002, "loss": 2.3493, "step": 220400 }, { "epoch": 0.8520434197708401, "grad_norm": 0.0975247174501419, "learning_rate": 0.002, "loss": 2.3458, "step": 220410 }, { "epoch": 0.8520820769742233, "grad_norm": 0.12367052584886551, "learning_rate": 0.002, "loss": 2.3323, "step": 220420 }, { "epoch": 0.8521207341776067, "grad_norm": 0.09940453618764877, "learning_rate": 0.002, "loss": 2.3365, "step": 220430 }, { "epoch": 0.85215939138099, "grad_norm": 0.10287129133939743, "learning_rate": 0.002, "loss": 2.333, "step": 220440 }, { "epoch": 0.8521980485843732, "grad_norm": 0.10441652685403824, "learning_rate": 0.002, "loss": 2.3488, "step": 220450 }, { "epoch": 0.8522367057877565, "grad_norm": 0.10059020668268204, "learning_rate": 0.002, "loss": 2.355, "step": 220460 }, { "epoch": 0.8522753629911398, "grad_norm": 0.1422119140625, "learning_rate": 0.002, "loss": 2.3427, "step": 220470 }, { "epoch": 0.8523140201945231, "grad_norm": 0.09308356046676636, "learning_rate": 0.002, "loss": 2.3383, "step": 220480 }, { "epoch": 0.8523526773979063, "grad_norm": 0.11652087420225143, "learning_rate": 0.002, "loss": 2.3343, "step": 220490 }, { "epoch": 0.8523913346012896, "grad_norm": 0.10112705081701279, "learning_rate": 0.002, "loss": 2.3317, "step": 220500 }, { "epoch": 0.8524299918046729, "grad_norm": 0.1368659883737564, "learning_rate": 0.002, "loss": 2.3306, "step": 220510 }, { "epoch": 0.8524686490080562, "grad_norm": 0.10706181079149246, "learning_rate": 0.002, "loss": 2.3516, "step": 220520 }, { "epoch": 0.8525073062114394, "grad_norm": 0.10697004199028015, "learning_rate": 0.002, "loss": 2.3217, "step": 220530 }, { "epoch": 0.8525459634148227, "grad_norm": 0.10608157515525818, "learning_rate": 0.002, "loss": 2.3314, "step": 220540 }, { "epoch": 0.852584620618206, "grad_norm": 0.10999225825071335, "learning_rate": 0.002, "loss": 2.3395, "step": 220550 }, { "epoch": 0.8526232778215893, "grad_norm": 0.09438899904489517, "learning_rate": 0.002, "loss": 2.3302, "step": 220560 }, { "epoch": 0.8526619350249726, "grad_norm": 0.09783659875392914, "learning_rate": 0.002, "loss": 2.3511, "step": 220570 }, { "epoch": 0.8527005922283558, "grad_norm": 0.09867078810930252, "learning_rate": 0.002, "loss": 2.3267, "step": 220580 }, { "epoch": 0.8527392494317391, "grad_norm": 0.09790950268507004, "learning_rate": 0.002, "loss": 2.3297, "step": 220590 }, { "epoch": 0.8527779066351224, "grad_norm": 0.12746822834014893, "learning_rate": 0.002, "loss": 2.3321, "step": 220600 }, { "epoch": 0.8528165638385057, "grad_norm": 0.11303797364234924, "learning_rate": 0.002, "loss": 2.3388, "step": 220610 }, { "epoch": 0.8528552210418889, "grad_norm": 0.0921299010515213, "learning_rate": 0.002, "loss": 2.3385, "step": 220620 }, { "epoch": 0.8528938782452722, "grad_norm": 0.11156327277421951, "learning_rate": 0.002, "loss": 2.3325, "step": 220630 }, { "epoch": 0.8529325354486555, "grad_norm": 0.11616717278957367, "learning_rate": 0.002, "loss": 2.3152, "step": 220640 }, { "epoch": 0.8529711926520388, "grad_norm": 0.09659020602703094, "learning_rate": 0.002, "loss": 2.3402, "step": 220650 }, { "epoch": 0.853009849855422, "grad_norm": 0.1045290008187294, "learning_rate": 0.002, "loss": 2.3417, "step": 220660 }, { "epoch": 0.8530485070588053, "grad_norm": 0.11413311958312988, "learning_rate": 0.002, "loss": 2.3448, "step": 220670 }, { "epoch": 0.8530871642621887, "grad_norm": 0.09833575785160065, "learning_rate": 0.002, "loss": 2.3378, "step": 220680 }, { "epoch": 0.8531258214655719, "grad_norm": 0.11460181325674057, "learning_rate": 0.002, "loss": 2.3426, "step": 220690 }, { "epoch": 0.8531644786689552, "grad_norm": 0.10402899980545044, "learning_rate": 0.002, "loss": 2.3251, "step": 220700 }, { "epoch": 0.8532031358723384, "grad_norm": 0.11590957641601562, "learning_rate": 0.002, "loss": 2.3433, "step": 220710 }, { "epoch": 0.8532417930757217, "grad_norm": 0.10896258801221848, "learning_rate": 0.002, "loss": 2.3336, "step": 220720 }, { "epoch": 0.853280450279105, "grad_norm": 0.10807967185974121, "learning_rate": 0.002, "loss": 2.3299, "step": 220730 }, { "epoch": 0.8533191074824883, "grad_norm": 0.0994463711977005, "learning_rate": 0.002, "loss": 2.3434, "step": 220740 }, { "epoch": 0.8533577646858715, "grad_norm": 0.15874207019805908, "learning_rate": 0.002, "loss": 2.3283, "step": 220750 }, { "epoch": 0.8533964218892548, "grad_norm": 0.11601907759904861, "learning_rate": 0.002, "loss": 2.3411, "step": 220760 }, { "epoch": 0.8534350790926382, "grad_norm": 0.09301315993070602, "learning_rate": 0.002, "loss": 2.3394, "step": 220770 }, { "epoch": 0.8534737362960214, "grad_norm": 0.1044008731842041, "learning_rate": 0.002, "loss": 2.3264, "step": 220780 }, { "epoch": 0.8535123934994047, "grad_norm": 0.10023225098848343, "learning_rate": 0.002, "loss": 2.3503, "step": 220790 }, { "epoch": 0.8535510507027879, "grad_norm": 0.11397735029459, "learning_rate": 0.002, "loss": 2.3509, "step": 220800 }, { "epoch": 0.8535897079061713, "grad_norm": 0.09428733587265015, "learning_rate": 0.002, "loss": 2.3382, "step": 220810 }, { "epoch": 0.8536283651095545, "grad_norm": 0.09366890043020248, "learning_rate": 0.002, "loss": 2.3532, "step": 220820 }, { "epoch": 0.8536670223129378, "grad_norm": 0.09214852005243301, "learning_rate": 0.002, "loss": 2.3455, "step": 220830 }, { "epoch": 0.853705679516321, "grad_norm": 0.09980152547359467, "learning_rate": 0.002, "loss": 2.3265, "step": 220840 }, { "epoch": 0.8537443367197044, "grad_norm": 0.09426254779100418, "learning_rate": 0.002, "loss": 2.357, "step": 220850 }, { "epoch": 0.8537829939230877, "grad_norm": 0.09684693813323975, "learning_rate": 0.002, "loss": 2.3452, "step": 220860 }, { "epoch": 0.8538216511264709, "grad_norm": 0.13577967882156372, "learning_rate": 0.002, "loss": 2.3474, "step": 220870 }, { "epoch": 0.8538603083298542, "grad_norm": 0.11286691576242447, "learning_rate": 0.002, "loss": 2.3356, "step": 220880 }, { "epoch": 0.8538989655332374, "grad_norm": 0.10951905697584152, "learning_rate": 0.002, "loss": 2.3407, "step": 220890 }, { "epoch": 0.8539376227366208, "grad_norm": 0.09772135317325592, "learning_rate": 0.002, "loss": 2.3443, "step": 220900 }, { "epoch": 0.853976279940004, "grad_norm": 0.10379073023796082, "learning_rate": 0.002, "loss": 2.3235, "step": 220910 }, { "epoch": 0.8540149371433873, "grad_norm": 0.10221673548221588, "learning_rate": 0.002, "loss": 2.329, "step": 220920 }, { "epoch": 0.8540535943467705, "grad_norm": 0.1037604883313179, "learning_rate": 0.002, "loss": 2.331, "step": 220930 }, { "epoch": 0.8540922515501539, "grad_norm": 0.09758436679840088, "learning_rate": 0.002, "loss": 2.3506, "step": 220940 }, { "epoch": 0.8541309087535371, "grad_norm": 0.11016526073217392, "learning_rate": 0.002, "loss": 2.3333, "step": 220950 }, { "epoch": 0.8541695659569204, "grad_norm": 0.10051882266998291, "learning_rate": 0.002, "loss": 2.337, "step": 220960 }, { "epoch": 0.8542082231603036, "grad_norm": 0.10580579191446304, "learning_rate": 0.002, "loss": 2.3562, "step": 220970 }, { "epoch": 0.854246880363687, "grad_norm": 0.10832671076059341, "learning_rate": 0.002, "loss": 2.347, "step": 220980 }, { "epoch": 0.8542855375670703, "grad_norm": 0.08817476779222488, "learning_rate": 0.002, "loss": 2.3265, "step": 220990 }, { "epoch": 0.8543241947704535, "grad_norm": 0.10400227457284927, "learning_rate": 0.002, "loss": 2.3366, "step": 221000 }, { "epoch": 0.8543628519738368, "grad_norm": 0.23474979400634766, "learning_rate": 0.002, "loss": 2.3452, "step": 221010 }, { "epoch": 0.8544015091772201, "grad_norm": 0.09855043888092041, "learning_rate": 0.002, "loss": 2.3359, "step": 221020 }, { "epoch": 0.8544401663806034, "grad_norm": 0.10361428558826447, "learning_rate": 0.002, "loss": 2.3313, "step": 221030 }, { "epoch": 0.8544788235839866, "grad_norm": 0.10192447900772095, "learning_rate": 0.002, "loss": 2.3269, "step": 221040 }, { "epoch": 0.8545174807873699, "grad_norm": 0.0955294519662857, "learning_rate": 0.002, "loss": 2.3407, "step": 221050 }, { "epoch": 0.8545561379907533, "grad_norm": 0.08866667747497559, "learning_rate": 0.002, "loss": 2.316, "step": 221060 }, { "epoch": 0.8545947951941365, "grad_norm": 0.10242463648319244, "learning_rate": 0.002, "loss": 2.3331, "step": 221070 }, { "epoch": 0.8546334523975198, "grad_norm": 0.12807013094425201, "learning_rate": 0.002, "loss": 2.3471, "step": 221080 }, { "epoch": 0.854672109600903, "grad_norm": 0.10718151181936264, "learning_rate": 0.002, "loss": 2.344, "step": 221090 }, { "epoch": 0.8547107668042863, "grad_norm": 0.09337849915027618, "learning_rate": 0.002, "loss": 2.3562, "step": 221100 }, { "epoch": 0.8547494240076696, "grad_norm": 0.10777440667152405, "learning_rate": 0.002, "loss": 2.3334, "step": 221110 }, { "epoch": 0.8547880812110529, "grad_norm": 0.11653538048267365, "learning_rate": 0.002, "loss": 2.3357, "step": 221120 }, { "epoch": 0.8548267384144361, "grad_norm": 0.09051918983459473, "learning_rate": 0.002, "loss": 2.3384, "step": 221130 }, { "epoch": 0.8548653956178194, "grad_norm": 0.09129197895526886, "learning_rate": 0.002, "loss": 2.3294, "step": 221140 }, { "epoch": 0.8549040528212027, "grad_norm": 0.1159987598657608, "learning_rate": 0.002, "loss": 2.3386, "step": 221150 }, { "epoch": 0.854942710024586, "grad_norm": 0.1097775474190712, "learning_rate": 0.002, "loss": 2.3407, "step": 221160 }, { "epoch": 0.8549813672279692, "grad_norm": 0.11373516172170639, "learning_rate": 0.002, "loss": 2.342, "step": 221170 }, { "epoch": 0.8550200244313525, "grad_norm": 0.10861944407224655, "learning_rate": 0.002, "loss": 2.3388, "step": 221180 }, { "epoch": 0.8550586816347359, "grad_norm": 0.09435247629880905, "learning_rate": 0.002, "loss": 2.3322, "step": 221190 }, { "epoch": 0.8550973388381191, "grad_norm": 0.11557682603597641, "learning_rate": 0.002, "loss": 2.3521, "step": 221200 }, { "epoch": 0.8551359960415024, "grad_norm": 0.09689876437187195, "learning_rate": 0.002, "loss": 2.3311, "step": 221210 }, { "epoch": 0.8551746532448856, "grad_norm": 0.18857908248901367, "learning_rate": 0.002, "loss": 2.3345, "step": 221220 }, { "epoch": 0.855213310448269, "grad_norm": 0.1228751391172409, "learning_rate": 0.002, "loss": 2.3367, "step": 221230 }, { "epoch": 0.8552519676516522, "grad_norm": 0.1002848893404007, "learning_rate": 0.002, "loss": 2.3427, "step": 221240 }, { "epoch": 0.8552906248550355, "grad_norm": 0.09509483724832535, "learning_rate": 0.002, "loss": 2.3292, "step": 221250 }, { "epoch": 0.8553292820584187, "grad_norm": 0.1349356323480606, "learning_rate": 0.002, "loss": 2.3483, "step": 221260 }, { "epoch": 0.855367939261802, "grad_norm": 0.09846282750368118, "learning_rate": 0.002, "loss": 2.3322, "step": 221270 }, { "epoch": 0.8554065964651854, "grad_norm": 0.09455190598964691, "learning_rate": 0.002, "loss": 2.3296, "step": 221280 }, { "epoch": 0.8554452536685686, "grad_norm": 0.1285431981086731, "learning_rate": 0.002, "loss": 2.3479, "step": 221290 }, { "epoch": 0.8554839108719519, "grad_norm": 0.10386514663696289, "learning_rate": 0.002, "loss": 2.3259, "step": 221300 }, { "epoch": 0.8555225680753351, "grad_norm": 0.09915978461503983, "learning_rate": 0.002, "loss": 2.3387, "step": 221310 }, { "epoch": 0.8555612252787185, "grad_norm": 0.10544169694185257, "learning_rate": 0.002, "loss": 2.3414, "step": 221320 }, { "epoch": 0.8555998824821017, "grad_norm": 0.09322534501552582, "learning_rate": 0.002, "loss": 2.3383, "step": 221330 }, { "epoch": 0.855638539685485, "grad_norm": 0.12254448235034943, "learning_rate": 0.002, "loss": 2.3441, "step": 221340 }, { "epoch": 0.8556771968888682, "grad_norm": 0.09801836311817169, "learning_rate": 0.002, "loss": 2.3424, "step": 221350 }, { "epoch": 0.8557158540922516, "grad_norm": 0.11010506004095078, "learning_rate": 0.002, "loss": 2.3445, "step": 221360 }, { "epoch": 0.8557545112956348, "grad_norm": 0.1010926365852356, "learning_rate": 0.002, "loss": 2.3327, "step": 221370 }, { "epoch": 0.8557931684990181, "grad_norm": 0.09351800382137299, "learning_rate": 0.002, "loss": 2.3378, "step": 221380 }, { "epoch": 0.8558318257024013, "grad_norm": 0.10786961019039154, "learning_rate": 0.002, "loss": 2.3428, "step": 221390 }, { "epoch": 0.8558704829057847, "grad_norm": 0.1168721541762352, "learning_rate": 0.002, "loss": 2.3444, "step": 221400 }, { "epoch": 0.855909140109168, "grad_norm": 0.13645702600479126, "learning_rate": 0.002, "loss": 2.3235, "step": 221410 }, { "epoch": 0.8559477973125512, "grad_norm": 0.09873227775096893, "learning_rate": 0.002, "loss": 2.3344, "step": 221420 }, { "epoch": 0.8559864545159345, "grad_norm": 0.10063973814249039, "learning_rate": 0.002, "loss": 2.3432, "step": 221430 }, { "epoch": 0.8560251117193178, "grad_norm": 0.10681134462356567, "learning_rate": 0.002, "loss": 2.3522, "step": 221440 }, { "epoch": 0.8560637689227011, "grad_norm": 0.12218131124973297, "learning_rate": 0.002, "loss": 2.3502, "step": 221450 }, { "epoch": 0.8561024261260843, "grad_norm": 0.12007876485586166, "learning_rate": 0.002, "loss": 2.3514, "step": 221460 }, { "epoch": 0.8561410833294676, "grad_norm": 0.09673456847667694, "learning_rate": 0.002, "loss": 2.3512, "step": 221470 }, { "epoch": 0.8561797405328508, "grad_norm": 0.10487453639507294, "learning_rate": 0.002, "loss": 2.3291, "step": 221480 }, { "epoch": 0.8562183977362342, "grad_norm": 0.09984520822763443, "learning_rate": 0.002, "loss": 2.3459, "step": 221490 }, { "epoch": 0.8562570549396175, "grad_norm": 0.10763146728277206, "learning_rate": 0.002, "loss": 2.3403, "step": 221500 }, { "epoch": 0.8562957121430007, "grad_norm": 0.10822135955095291, "learning_rate": 0.002, "loss": 2.3456, "step": 221510 }, { "epoch": 0.856334369346384, "grad_norm": 0.10311654955148697, "learning_rate": 0.002, "loss": 2.3292, "step": 221520 }, { "epoch": 0.8563730265497673, "grad_norm": 0.14389926195144653, "learning_rate": 0.002, "loss": 2.3501, "step": 221530 }, { "epoch": 0.8564116837531506, "grad_norm": 0.10449139028787613, "learning_rate": 0.002, "loss": 2.3365, "step": 221540 }, { "epoch": 0.8564503409565338, "grad_norm": 0.10372086614370346, "learning_rate": 0.002, "loss": 2.3412, "step": 221550 }, { "epoch": 0.8564889981599171, "grad_norm": 0.14530038833618164, "learning_rate": 0.002, "loss": 2.3554, "step": 221560 }, { "epoch": 0.8565276553633004, "grad_norm": 0.11310365796089172, "learning_rate": 0.002, "loss": 2.3496, "step": 221570 }, { "epoch": 0.8565663125666837, "grad_norm": 0.10484128445386887, "learning_rate": 0.002, "loss": 2.3505, "step": 221580 }, { "epoch": 0.856604969770067, "grad_norm": 0.10530244559049606, "learning_rate": 0.002, "loss": 2.3523, "step": 221590 }, { "epoch": 0.8566436269734502, "grad_norm": 0.1365823745727539, "learning_rate": 0.002, "loss": 2.3473, "step": 221600 }, { "epoch": 0.8566822841768336, "grad_norm": 0.10979634523391724, "learning_rate": 0.002, "loss": 2.344, "step": 221610 }, { "epoch": 0.8567209413802168, "grad_norm": 0.10430336743593216, "learning_rate": 0.002, "loss": 2.3292, "step": 221620 }, { "epoch": 0.8567595985836001, "grad_norm": 0.11881018429994583, "learning_rate": 0.002, "loss": 2.3282, "step": 221630 }, { "epoch": 0.8567982557869833, "grad_norm": 0.11305271834135056, "learning_rate": 0.002, "loss": 2.3256, "step": 221640 }, { "epoch": 0.8568369129903666, "grad_norm": 0.10439368337392807, "learning_rate": 0.002, "loss": 2.3433, "step": 221650 }, { "epoch": 0.8568755701937499, "grad_norm": 0.10345533490180969, "learning_rate": 0.002, "loss": 2.3493, "step": 221660 }, { "epoch": 0.8569142273971332, "grad_norm": 0.11305728554725647, "learning_rate": 0.002, "loss": 2.3577, "step": 221670 }, { "epoch": 0.8569528846005164, "grad_norm": 0.10882841795682907, "learning_rate": 0.002, "loss": 2.3488, "step": 221680 }, { "epoch": 0.8569915418038997, "grad_norm": 0.09862805157899857, "learning_rate": 0.002, "loss": 2.3419, "step": 221690 }, { "epoch": 0.8570301990072831, "grad_norm": 0.11100216209888458, "learning_rate": 0.002, "loss": 2.3526, "step": 221700 }, { "epoch": 0.8570688562106663, "grad_norm": 0.09722207486629486, "learning_rate": 0.002, "loss": 2.3394, "step": 221710 }, { "epoch": 0.8571075134140496, "grad_norm": 0.091136634349823, "learning_rate": 0.002, "loss": 2.3331, "step": 221720 }, { "epoch": 0.8571461706174328, "grad_norm": 0.10493214428424835, "learning_rate": 0.002, "loss": 2.341, "step": 221730 }, { "epoch": 0.8571848278208162, "grad_norm": 0.09578853100538254, "learning_rate": 0.002, "loss": 2.3393, "step": 221740 }, { "epoch": 0.8572234850241994, "grad_norm": 0.09849863499403, "learning_rate": 0.002, "loss": 2.3322, "step": 221750 }, { "epoch": 0.8572621422275827, "grad_norm": 0.11259200423955917, "learning_rate": 0.002, "loss": 2.3355, "step": 221760 }, { "epoch": 0.8573007994309659, "grad_norm": 0.09756533056497574, "learning_rate": 0.002, "loss": 2.3433, "step": 221770 }, { "epoch": 0.8573394566343493, "grad_norm": 0.12699413299560547, "learning_rate": 0.002, "loss": 2.3304, "step": 221780 }, { "epoch": 0.8573781138377325, "grad_norm": 0.11433012783527374, "learning_rate": 0.002, "loss": 2.3451, "step": 221790 }, { "epoch": 0.8574167710411158, "grad_norm": 0.11333296447992325, "learning_rate": 0.002, "loss": 2.3454, "step": 221800 }, { "epoch": 0.857455428244499, "grad_norm": 0.11023814976215363, "learning_rate": 0.002, "loss": 2.3452, "step": 221810 }, { "epoch": 0.8574940854478823, "grad_norm": 0.10318921506404877, "learning_rate": 0.002, "loss": 2.3257, "step": 221820 }, { "epoch": 0.8575327426512657, "grad_norm": 0.10669266432523727, "learning_rate": 0.002, "loss": 2.3337, "step": 221830 }, { "epoch": 0.8575713998546489, "grad_norm": 0.11468558758497238, "learning_rate": 0.002, "loss": 2.3422, "step": 221840 }, { "epoch": 0.8576100570580322, "grad_norm": 0.1044035330414772, "learning_rate": 0.002, "loss": 2.3529, "step": 221850 }, { "epoch": 0.8576487142614154, "grad_norm": 0.09796303510665894, "learning_rate": 0.002, "loss": 2.3329, "step": 221860 }, { "epoch": 0.8576873714647988, "grad_norm": 0.10812048614025116, "learning_rate": 0.002, "loss": 2.3353, "step": 221870 }, { "epoch": 0.857726028668182, "grad_norm": 0.12456179410219193, "learning_rate": 0.002, "loss": 2.3389, "step": 221880 }, { "epoch": 0.8577646858715653, "grad_norm": 0.11183394491672516, "learning_rate": 0.002, "loss": 2.3406, "step": 221890 }, { "epoch": 0.8578033430749485, "grad_norm": 0.12505802512168884, "learning_rate": 0.002, "loss": 2.3299, "step": 221900 }, { "epoch": 0.8578420002783319, "grad_norm": 0.10315761715173721, "learning_rate": 0.002, "loss": 2.3392, "step": 221910 }, { "epoch": 0.8578806574817152, "grad_norm": 0.10516806691884995, "learning_rate": 0.002, "loss": 2.3435, "step": 221920 }, { "epoch": 0.8579193146850984, "grad_norm": 0.12225325405597687, "learning_rate": 0.002, "loss": 2.3202, "step": 221930 }, { "epoch": 0.8579579718884817, "grad_norm": 0.09954703599214554, "learning_rate": 0.002, "loss": 2.3366, "step": 221940 }, { "epoch": 0.857996629091865, "grad_norm": 0.09836176037788391, "learning_rate": 0.002, "loss": 2.3349, "step": 221950 }, { "epoch": 0.8580352862952483, "grad_norm": 0.11061525344848633, "learning_rate": 0.002, "loss": 2.3309, "step": 221960 }, { "epoch": 0.8580739434986315, "grad_norm": 0.1230001151561737, "learning_rate": 0.002, "loss": 2.3457, "step": 221970 }, { "epoch": 0.8581126007020148, "grad_norm": 0.09734803438186646, "learning_rate": 0.002, "loss": 2.3557, "step": 221980 }, { "epoch": 0.8581512579053981, "grad_norm": 0.10479872673749924, "learning_rate": 0.002, "loss": 2.3351, "step": 221990 }, { "epoch": 0.8581899151087814, "grad_norm": 0.10779935121536255, "learning_rate": 0.002, "loss": 2.3496, "step": 222000 }, { "epoch": 0.8582285723121647, "grad_norm": 0.10514236986637115, "learning_rate": 0.002, "loss": 2.3465, "step": 222010 }, { "epoch": 0.8582672295155479, "grad_norm": 0.09969477355480194, "learning_rate": 0.002, "loss": 2.3638, "step": 222020 }, { "epoch": 0.8583058867189312, "grad_norm": 0.11630406975746155, "learning_rate": 0.002, "loss": 2.3384, "step": 222030 }, { "epoch": 0.8583445439223145, "grad_norm": 0.08934228122234344, "learning_rate": 0.002, "loss": 2.3493, "step": 222040 }, { "epoch": 0.8583832011256978, "grad_norm": 0.11019016057252884, "learning_rate": 0.002, "loss": 2.3292, "step": 222050 }, { "epoch": 0.858421858329081, "grad_norm": 0.11922332644462585, "learning_rate": 0.002, "loss": 2.3422, "step": 222060 }, { "epoch": 0.8584605155324643, "grad_norm": 0.09459064155817032, "learning_rate": 0.002, "loss": 2.3281, "step": 222070 }, { "epoch": 0.8584991727358476, "grad_norm": 0.10141943395137787, "learning_rate": 0.002, "loss": 2.3319, "step": 222080 }, { "epoch": 0.8585378299392309, "grad_norm": 0.09072628617286682, "learning_rate": 0.002, "loss": 2.3419, "step": 222090 }, { "epoch": 0.8585764871426141, "grad_norm": 0.09824512898921967, "learning_rate": 0.002, "loss": 2.3402, "step": 222100 }, { "epoch": 0.8586151443459974, "grad_norm": 0.10149113088846207, "learning_rate": 0.002, "loss": 2.3363, "step": 222110 }, { "epoch": 0.8586538015493808, "grad_norm": 0.09815272688865662, "learning_rate": 0.002, "loss": 2.3426, "step": 222120 }, { "epoch": 0.858692458752764, "grad_norm": 0.11693661659955978, "learning_rate": 0.002, "loss": 2.3582, "step": 222130 }, { "epoch": 0.8587311159561473, "grad_norm": 0.10831683874130249, "learning_rate": 0.002, "loss": 2.3493, "step": 222140 }, { "epoch": 0.8587697731595305, "grad_norm": 0.09843482077121735, "learning_rate": 0.002, "loss": 2.3449, "step": 222150 }, { "epoch": 0.8588084303629139, "grad_norm": 0.13081464171409607, "learning_rate": 0.002, "loss": 2.3405, "step": 222160 }, { "epoch": 0.8588470875662971, "grad_norm": 0.10841447860002518, "learning_rate": 0.002, "loss": 2.3505, "step": 222170 }, { "epoch": 0.8588857447696804, "grad_norm": 0.11414126306772232, "learning_rate": 0.002, "loss": 2.3436, "step": 222180 }, { "epoch": 0.8589244019730636, "grad_norm": 0.10653481632471085, "learning_rate": 0.002, "loss": 2.3305, "step": 222190 }, { "epoch": 0.8589630591764469, "grad_norm": 0.11468642204999924, "learning_rate": 0.002, "loss": 2.3537, "step": 222200 }, { "epoch": 0.8590017163798302, "grad_norm": 0.11711590737104416, "learning_rate": 0.002, "loss": 2.3231, "step": 222210 }, { "epoch": 0.8590403735832135, "grad_norm": 0.10254094004631042, "learning_rate": 0.002, "loss": 2.3441, "step": 222220 }, { "epoch": 0.8590790307865968, "grad_norm": 0.09667069464921951, "learning_rate": 0.002, "loss": 2.3277, "step": 222230 }, { "epoch": 0.85911768798998, "grad_norm": 0.09760228544473648, "learning_rate": 0.002, "loss": 2.326, "step": 222240 }, { "epoch": 0.8591563451933634, "grad_norm": 0.1100788488984108, "learning_rate": 0.002, "loss": 2.3321, "step": 222250 }, { "epoch": 0.8591950023967466, "grad_norm": 0.11454857140779495, "learning_rate": 0.002, "loss": 2.3403, "step": 222260 }, { "epoch": 0.8592336596001299, "grad_norm": 0.11041669547557831, "learning_rate": 0.002, "loss": 2.3378, "step": 222270 }, { "epoch": 0.8592723168035131, "grad_norm": 0.1071617379784584, "learning_rate": 0.002, "loss": 2.3252, "step": 222280 }, { "epoch": 0.8593109740068965, "grad_norm": 0.0996813029050827, "learning_rate": 0.002, "loss": 2.3414, "step": 222290 }, { "epoch": 0.8593496312102797, "grad_norm": 0.10956218093633652, "learning_rate": 0.002, "loss": 2.3251, "step": 222300 }, { "epoch": 0.859388288413663, "grad_norm": 0.13127194344997406, "learning_rate": 0.002, "loss": 2.3339, "step": 222310 }, { "epoch": 0.8594269456170462, "grad_norm": 0.09963233768939972, "learning_rate": 0.002, "loss": 2.3419, "step": 222320 }, { "epoch": 0.8594656028204296, "grad_norm": 0.10281427949666977, "learning_rate": 0.002, "loss": 2.3394, "step": 222330 }, { "epoch": 0.8595042600238129, "grad_norm": 0.09876349568367004, "learning_rate": 0.002, "loss": 2.3415, "step": 222340 }, { "epoch": 0.8595429172271961, "grad_norm": 0.09714307636022568, "learning_rate": 0.002, "loss": 2.329, "step": 222350 }, { "epoch": 0.8595815744305794, "grad_norm": 0.0989079400897026, "learning_rate": 0.002, "loss": 2.3397, "step": 222360 }, { "epoch": 0.8596202316339627, "grad_norm": 0.08984922617673874, "learning_rate": 0.002, "loss": 2.3383, "step": 222370 }, { "epoch": 0.859658888837346, "grad_norm": 0.10395090281963348, "learning_rate": 0.002, "loss": 2.3435, "step": 222380 }, { "epoch": 0.8596975460407292, "grad_norm": 0.13927537202835083, "learning_rate": 0.002, "loss": 2.3334, "step": 222390 }, { "epoch": 0.8597362032441125, "grad_norm": 0.11574968695640564, "learning_rate": 0.002, "loss": 2.3258, "step": 222400 }, { "epoch": 0.8597748604474957, "grad_norm": 0.11554904282093048, "learning_rate": 0.002, "loss": 2.3599, "step": 222410 }, { "epoch": 0.8598135176508791, "grad_norm": 0.0983390286564827, "learning_rate": 0.002, "loss": 2.3339, "step": 222420 }, { "epoch": 0.8598521748542624, "grad_norm": 0.09339765459299088, "learning_rate": 0.002, "loss": 2.3388, "step": 222430 }, { "epoch": 0.8598908320576456, "grad_norm": 0.11920101940631866, "learning_rate": 0.002, "loss": 2.3483, "step": 222440 }, { "epoch": 0.8599294892610289, "grad_norm": 0.10644099116325378, "learning_rate": 0.002, "loss": 2.3423, "step": 222450 }, { "epoch": 0.8599681464644122, "grad_norm": 0.10699266940355301, "learning_rate": 0.002, "loss": 2.3263, "step": 222460 }, { "epoch": 0.8600068036677955, "grad_norm": 0.11919573694467545, "learning_rate": 0.002, "loss": 2.3286, "step": 222470 }, { "epoch": 0.8600454608711787, "grad_norm": 0.11993969976902008, "learning_rate": 0.002, "loss": 2.3483, "step": 222480 }, { "epoch": 0.860084118074562, "grad_norm": 0.10443676263093948, "learning_rate": 0.002, "loss": 2.3392, "step": 222490 }, { "epoch": 0.8601227752779453, "grad_norm": 0.11422792077064514, "learning_rate": 0.002, "loss": 2.3474, "step": 222500 }, { "epoch": 0.8601614324813286, "grad_norm": 0.11695490777492523, "learning_rate": 0.002, "loss": 2.3343, "step": 222510 }, { "epoch": 0.8602000896847118, "grad_norm": 0.09599953144788742, "learning_rate": 0.002, "loss": 2.3391, "step": 222520 }, { "epoch": 0.8602387468880951, "grad_norm": 0.09750067442655563, "learning_rate": 0.002, "loss": 2.3501, "step": 222530 }, { "epoch": 0.8602774040914785, "grad_norm": 0.09763391315937042, "learning_rate": 0.002, "loss": 2.3402, "step": 222540 }, { "epoch": 0.8603160612948617, "grad_norm": 0.10374131798744202, "learning_rate": 0.002, "loss": 2.335, "step": 222550 }, { "epoch": 0.860354718498245, "grad_norm": 0.10062684118747711, "learning_rate": 0.002, "loss": 2.3517, "step": 222560 }, { "epoch": 0.8603933757016282, "grad_norm": 0.1127900630235672, "learning_rate": 0.002, "loss": 2.3483, "step": 222570 }, { "epoch": 0.8604320329050115, "grad_norm": 0.09476776421070099, "learning_rate": 0.002, "loss": 2.3369, "step": 222580 }, { "epoch": 0.8604706901083948, "grad_norm": 0.11507733166217804, "learning_rate": 0.002, "loss": 2.3351, "step": 222590 }, { "epoch": 0.8605093473117781, "grad_norm": 0.10111179202795029, "learning_rate": 0.002, "loss": 2.3423, "step": 222600 }, { "epoch": 0.8605480045151613, "grad_norm": 0.09470687061548233, "learning_rate": 0.002, "loss": 2.3431, "step": 222610 }, { "epoch": 0.8605866617185446, "grad_norm": 0.1021670550107956, "learning_rate": 0.002, "loss": 2.3393, "step": 222620 }, { "epoch": 0.860625318921928, "grad_norm": 0.08839169889688492, "learning_rate": 0.002, "loss": 2.3232, "step": 222630 }, { "epoch": 0.8606639761253112, "grad_norm": 0.11978471279144287, "learning_rate": 0.002, "loss": 2.3455, "step": 222640 }, { "epoch": 0.8607026333286945, "grad_norm": 0.11308436840772629, "learning_rate": 0.002, "loss": 2.3332, "step": 222650 }, { "epoch": 0.8607412905320777, "grad_norm": 0.09787823259830475, "learning_rate": 0.002, "loss": 2.3398, "step": 222660 }, { "epoch": 0.8607799477354611, "grad_norm": 0.1060105636715889, "learning_rate": 0.002, "loss": 2.3285, "step": 222670 }, { "epoch": 0.8608186049388443, "grad_norm": 0.09345019608736038, "learning_rate": 0.002, "loss": 2.3251, "step": 222680 }, { "epoch": 0.8608572621422276, "grad_norm": 0.1005011722445488, "learning_rate": 0.002, "loss": 2.3365, "step": 222690 }, { "epoch": 0.8608959193456108, "grad_norm": 0.1362825334072113, "learning_rate": 0.002, "loss": 2.3332, "step": 222700 }, { "epoch": 0.8609345765489942, "grad_norm": 0.1013195663690567, "learning_rate": 0.002, "loss": 2.3495, "step": 222710 }, { "epoch": 0.8609732337523774, "grad_norm": 0.1083022877573967, "learning_rate": 0.002, "loss": 2.3461, "step": 222720 }, { "epoch": 0.8610118909557607, "grad_norm": 0.12598514556884766, "learning_rate": 0.002, "loss": 2.3343, "step": 222730 }, { "epoch": 0.861050548159144, "grad_norm": 0.09817186743021011, "learning_rate": 0.002, "loss": 2.3462, "step": 222740 }, { "epoch": 0.8610892053625272, "grad_norm": 0.13273663818836212, "learning_rate": 0.002, "loss": 2.3452, "step": 222750 }, { "epoch": 0.8611278625659106, "grad_norm": 0.08212157338857651, "learning_rate": 0.002, "loss": 2.3324, "step": 222760 }, { "epoch": 0.8611665197692938, "grad_norm": 0.11281108111143112, "learning_rate": 0.002, "loss": 2.3417, "step": 222770 }, { "epoch": 0.8612051769726771, "grad_norm": 0.12281344830989838, "learning_rate": 0.002, "loss": 2.3322, "step": 222780 }, { "epoch": 0.8612438341760603, "grad_norm": 0.09948590397834778, "learning_rate": 0.002, "loss": 2.3382, "step": 222790 }, { "epoch": 0.8612824913794437, "grad_norm": 0.09139905869960785, "learning_rate": 0.002, "loss": 2.3457, "step": 222800 }, { "epoch": 0.8613211485828269, "grad_norm": 0.09847621619701385, "learning_rate": 0.002, "loss": 2.3394, "step": 222810 }, { "epoch": 0.8613598057862102, "grad_norm": 0.2321125864982605, "learning_rate": 0.002, "loss": 2.3265, "step": 222820 }, { "epoch": 0.8613984629895934, "grad_norm": 0.11758513748645782, "learning_rate": 0.002, "loss": 2.3369, "step": 222830 }, { "epoch": 0.8614371201929768, "grad_norm": 0.09759420156478882, "learning_rate": 0.002, "loss": 2.3278, "step": 222840 }, { "epoch": 0.86147577739636, "grad_norm": 0.12681740522384644, "learning_rate": 0.002, "loss": 2.3508, "step": 222850 }, { "epoch": 0.8615144345997433, "grad_norm": 0.09722145646810532, "learning_rate": 0.002, "loss": 2.333, "step": 222860 }, { "epoch": 0.8615530918031266, "grad_norm": 0.12716875970363617, "learning_rate": 0.002, "loss": 2.3227, "step": 222870 }, { "epoch": 0.8615917490065099, "grad_norm": 0.12183672189712524, "learning_rate": 0.002, "loss": 2.3392, "step": 222880 }, { "epoch": 0.8616304062098932, "grad_norm": 0.09976981580257416, "learning_rate": 0.002, "loss": 2.348, "step": 222890 }, { "epoch": 0.8616690634132764, "grad_norm": 0.0947190448641777, "learning_rate": 0.002, "loss": 2.3348, "step": 222900 }, { "epoch": 0.8617077206166597, "grad_norm": 0.10142937302589417, "learning_rate": 0.002, "loss": 2.3156, "step": 222910 }, { "epoch": 0.861746377820043, "grad_norm": 0.10353469103574753, "learning_rate": 0.002, "loss": 2.3487, "step": 222920 }, { "epoch": 0.8617850350234263, "grad_norm": 0.11019370704889297, "learning_rate": 0.002, "loss": 2.33, "step": 222930 }, { "epoch": 0.8618236922268095, "grad_norm": 0.10573454201221466, "learning_rate": 0.002, "loss": 2.3363, "step": 222940 }, { "epoch": 0.8618623494301928, "grad_norm": 0.09382513165473938, "learning_rate": 0.002, "loss": 2.3335, "step": 222950 }, { "epoch": 0.861901006633576, "grad_norm": 0.1011807918548584, "learning_rate": 0.002, "loss": 2.3384, "step": 222960 }, { "epoch": 0.8619396638369594, "grad_norm": 0.10121998190879822, "learning_rate": 0.002, "loss": 2.3349, "step": 222970 }, { "epoch": 0.8619783210403427, "grad_norm": 0.15870223939418793, "learning_rate": 0.002, "loss": 2.3461, "step": 222980 }, { "epoch": 0.8620169782437259, "grad_norm": 0.12690460681915283, "learning_rate": 0.002, "loss": 2.3493, "step": 222990 }, { "epoch": 0.8620556354471092, "grad_norm": 0.10708886384963989, "learning_rate": 0.002, "loss": 2.3403, "step": 223000 }, { "epoch": 0.8620942926504925, "grad_norm": 0.0971582904458046, "learning_rate": 0.002, "loss": 2.3393, "step": 223010 }, { "epoch": 0.8621329498538758, "grad_norm": 0.10086013376712799, "learning_rate": 0.002, "loss": 2.3329, "step": 223020 }, { "epoch": 0.862171607057259, "grad_norm": 0.11198851466178894, "learning_rate": 0.002, "loss": 2.3369, "step": 223030 }, { "epoch": 0.8622102642606423, "grad_norm": 0.11701015383005142, "learning_rate": 0.002, "loss": 2.3311, "step": 223040 }, { "epoch": 0.8622489214640257, "grad_norm": 0.10343601554632187, "learning_rate": 0.002, "loss": 2.3359, "step": 223050 }, { "epoch": 0.8622875786674089, "grad_norm": 0.09298793226480484, "learning_rate": 0.002, "loss": 2.3229, "step": 223060 }, { "epoch": 0.8623262358707922, "grad_norm": 0.08753270655870438, "learning_rate": 0.002, "loss": 2.3509, "step": 223070 }, { "epoch": 0.8623648930741754, "grad_norm": 0.1008063554763794, "learning_rate": 0.002, "loss": 2.3314, "step": 223080 }, { "epoch": 0.8624035502775588, "grad_norm": 0.10008800774812698, "learning_rate": 0.002, "loss": 2.3369, "step": 223090 }, { "epoch": 0.862442207480942, "grad_norm": 0.09982694685459137, "learning_rate": 0.002, "loss": 2.3403, "step": 223100 }, { "epoch": 0.8624808646843253, "grad_norm": 0.09783685952425003, "learning_rate": 0.002, "loss": 2.3516, "step": 223110 }, { "epoch": 0.8625195218877085, "grad_norm": 0.10794012248516083, "learning_rate": 0.002, "loss": 2.3339, "step": 223120 }, { "epoch": 0.8625581790910918, "grad_norm": 0.10148628056049347, "learning_rate": 0.002, "loss": 2.3385, "step": 223130 }, { "epoch": 0.8625968362944751, "grad_norm": 0.10609929263591766, "learning_rate": 0.002, "loss": 2.3361, "step": 223140 }, { "epoch": 0.8626354934978584, "grad_norm": 0.12248008698225021, "learning_rate": 0.002, "loss": 2.3458, "step": 223150 }, { "epoch": 0.8626741507012416, "grad_norm": 0.10261505842208862, "learning_rate": 0.002, "loss": 2.3378, "step": 223160 }, { "epoch": 0.8627128079046249, "grad_norm": 0.10075739026069641, "learning_rate": 0.002, "loss": 2.3391, "step": 223170 }, { "epoch": 0.8627514651080083, "grad_norm": 0.10503147542476654, "learning_rate": 0.002, "loss": 2.3373, "step": 223180 }, { "epoch": 0.8627901223113915, "grad_norm": 0.1116899847984314, "learning_rate": 0.002, "loss": 2.3248, "step": 223190 }, { "epoch": 0.8628287795147748, "grad_norm": 0.09337859600782394, "learning_rate": 0.002, "loss": 2.3257, "step": 223200 }, { "epoch": 0.862867436718158, "grad_norm": 0.10986005514860153, "learning_rate": 0.002, "loss": 2.3382, "step": 223210 }, { "epoch": 0.8629060939215414, "grad_norm": 0.09611022472381592, "learning_rate": 0.002, "loss": 2.339, "step": 223220 }, { "epoch": 0.8629447511249246, "grad_norm": 0.09995223581790924, "learning_rate": 0.002, "loss": 2.3609, "step": 223230 }, { "epoch": 0.8629834083283079, "grad_norm": 0.10946323722600937, "learning_rate": 0.002, "loss": 2.341, "step": 223240 }, { "epoch": 0.8630220655316911, "grad_norm": 0.10604280233383179, "learning_rate": 0.002, "loss": 2.3384, "step": 223250 }, { "epoch": 0.8630607227350745, "grad_norm": 0.10414547473192215, "learning_rate": 0.002, "loss": 2.3341, "step": 223260 }, { "epoch": 0.8630993799384578, "grad_norm": 0.12171155214309692, "learning_rate": 0.002, "loss": 2.3299, "step": 223270 }, { "epoch": 0.863138037141841, "grad_norm": 0.10600202530622482, "learning_rate": 0.002, "loss": 2.3556, "step": 223280 }, { "epoch": 0.8631766943452243, "grad_norm": 0.10572559386491776, "learning_rate": 0.002, "loss": 2.3318, "step": 223290 }, { "epoch": 0.8632153515486075, "grad_norm": 0.09074997901916504, "learning_rate": 0.002, "loss": 2.3165, "step": 223300 }, { "epoch": 0.8632540087519909, "grad_norm": 0.10141141712665558, "learning_rate": 0.002, "loss": 2.329, "step": 223310 }, { "epoch": 0.8632926659553741, "grad_norm": 0.1120740994811058, "learning_rate": 0.002, "loss": 2.3395, "step": 223320 }, { "epoch": 0.8633313231587574, "grad_norm": 0.10076904296875, "learning_rate": 0.002, "loss": 2.329, "step": 223330 }, { "epoch": 0.8633699803621406, "grad_norm": 0.11237254738807678, "learning_rate": 0.002, "loss": 2.3321, "step": 223340 }, { "epoch": 0.863408637565524, "grad_norm": 0.10964491218328476, "learning_rate": 0.002, "loss": 2.3449, "step": 223350 }, { "epoch": 0.8634472947689072, "grad_norm": 0.12141384929418564, "learning_rate": 0.002, "loss": 2.3398, "step": 223360 }, { "epoch": 0.8634859519722905, "grad_norm": 0.1059795618057251, "learning_rate": 0.002, "loss": 2.3383, "step": 223370 }, { "epoch": 0.8635246091756738, "grad_norm": 0.09433835744857788, "learning_rate": 0.002, "loss": 2.3366, "step": 223380 }, { "epoch": 0.8635632663790571, "grad_norm": 0.12807735800743103, "learning_rate": 0.002, "loss": 2.3419, "step": 223390 }, { "epoch": 0.8636019235824404, "grad_norm": 0.10353527218103409, "learning_rate": 0.002, "loss": 2.3456, "step": 223400 }, { "epoch": 0.8636405807858236, "grad_norm": 0.08917547762393951, "learning_rate": 0.002, "loss": 2.3362, "step": 223410 }, { "epoch": 0.8636792379892069, "grad_norm": 0.09677743911743164, "learning_rate": 0.002, "loss": 2.3336, "step": 223420 }, { "epoch": 0.8637178951925902, "grad_norm": 0.09181633591651917, "learning_rate": 0.002, "loss": 2.3241, "step": 223430 }, { "epoch": 0.8637565523959735, "grad_norm": 0.10283015668392181, "learning_rate": 0.002, "loss": 2.3371, "step": 223440 }, { "epoch": 0.8637952095993567, "grad_norm": 0.0930963084101677, "learning_rate": 0.002, "loss": 2.337, "step": 223450 }, { "epoch": 0.86383386680274, "grad_norm": 0.11149480938911438, "learning_rate": 0.002, "loss": 2.3392, "step": 223460 }, { "epoch": 0.8638725240061234, "grad_norm": 0.10241681337356567, "learning_rate": 0.002, "loss": 2.345, "step": 223470 }, { "epoch": 0.8639111812095066, "grad_norm": 0.09172318130731583, "learning_rate": 0.002, "loss": 2.3336, "step": 223480 }, { "epoch": 0.8639498384128899, "grad_norm": 0.09435839205980301, "learning_rate": 0.002, "loss": 2.3457, "step": 223490 }, { "epoch": 0.8639884956162731, "grad_norm": 0.09670911729335785, "learning_rate": 0.002, "loss": 2.3392, "step": 223500 }, { "epoch": 0.8640271528196564, "grad_norm": 0.10295765846967697, "learning_rate": 0.002, "loss": 2.3258, "step": 223510 }, { "epoch": 0.8640658100230397, "grad_norm": 0.09967496991157532, "learning_rate": 0.002, "loss": 2.3429, "step": 223520 }, { "epoch": 0.864104467226423, "grad_norm": 0.08990298956632614, "learning_rate": 0.002, "loss": 2.3456, "step": 223530 }, { "epoch": 0.8641431244298062, "grad_norm": 0.12398983538150787, "learning_rate": 0.002, "loss": 2.3332, "step": 223540 }, { "epoch": 0.8641817816331895, "grad_norm": 0.09743223339319229, "learning_rate": 0.002, "loss": 2.3327, "step": 223550 }, { "epoch": 0.8642204388365728, "grad_norm": 0.09881418943405151, "learning_rate": 0.002, "loss": 2.3312, "step": 223560 }, { "epoch": 0.8642590960399561, "grad_norm": 0.11653521656990051, "learning_rate": 0.002, "loss": 2.3364, "step": 223570 }, { "epoch": 0.8642977532433394, "grad_norm": 0.10108964145183563, "learning_rate": 0.002, "loss": 2.3397, "step": 223580 }, { "epoch": 0.8643364104467226, "grad_norm": 0.10443811118602753, "learning_rate": 0.002, "loss": 2.3341, "step": 223590 }, { "epoch": 0.864375067650106, "grad_norm": 0.12487456202507019, "learning_rate": 0.002, "loss": 2.3368, "step": 223600 }, { "epoch": 0.8644137248534892, "grad_norm": 0.0982515960931778, "learning_rate": 0.002, "loss": 2.3348, "step": 223610 }, { "epoch": 0.8644523820568725, "grad_norm": 0.09217357635498047, "learning_rate": 0.002, "loss": 2.3381, "step": 223620 }, { "epoch": 0.8644910392602557, "grad_norm": 0.11840027570724487, "learning_rate": 0.002, "loss": 2.3423, "step": 223630 }, { "epoch": 0.8645296964636391, "grad_norm": 0.1063842624425888, "learning_rate": 0.002, "loss": 2.3468, "step": 223640 }, { "epoch": 0.8645683536670223, "grad_norm": 0.10393808037042618, "learning_rate": 0.002, "loss": 2.3386, "step": 223650 }, { "epoch": 0.8646070108704056, "grad_norm": 0.10301019251346588, "learning_rate": 0.002, "loss": 2.3453, "step": 223660 }, { "epoch": 0.8646456680737888, "grad_norm": 0.09212903678417206, "learning_rate": 0.002, "loss": 2.3397, "step": 223670 }, { "epoch": 0.8646843252771721, "grad_norm": 0.08990156650543213, "learning_rate": 0.002, "loss": 2.3343, "step": 223680 }, { "epoch": 0.8647229824805555, "grad_norm": 0.10661827027797699, "learning_rate": 0.002, "loss": 2.3423, "step": 223690 }, { "epoch": 0.8647616396839387, "grad_norm": 0.1021680161356926, "learning_rate": 0.002, "loss": 2.3238, "step": 223700 }, { "epoch": 0.864800296887322, "grad_norm": 0.12244638800621033, "learning_rate": 0.002, "loss": 2.3378, "step": 223710 }, { "epoch": 0.8648389540907052, "grad_norm": 0.10838084667921066, "learning_rate": 0.002, "loss": 2.3397, "step": 223720 }, { "epoch": 0.8648776112940886, "grad_norm": 0.10406170040369034, "learning_rate": 0.002, "loss": 2.3411, "step": 223730 }, { "epoch": 0.8649162684974718, "grad_norm": 0.10603002458810806, "learning_rate": 0.002, "loss": 2.3401, "step": 223740 }, { "epoch": 0.8649549257008551, "grad_norm": 0.09609860181808472, "learning_rate": 0.002, "loss": 2.3487, "step": 223750 }, { "epoch": 0.8649935829042383, "grad_norm": 0.12051209807395935, "learning_rate": 0.002, "loss": 2.3419, "step": 223760 }, { "epoch": 0.8650322401076217, "grad_norm": 0.09325770288705826, "learning_rate": 0.002, "loss": 2.3316, "step": 223770 }, { "epoch": 0.865070897311005, "grad_norm": 0.11577421426773071, "learning_rate": 0.002, "loss": 2.339, "step": 223780 }, { "epoch": 0.8651095545143882, "grad_norm": 0.1013086810708046, "learning_rate": 0.002, "loss": 2.3226, "step": 223790 }, { "epoch": 0.8651482117177715, "grad_norm": 0.09731609374284744, "learning_rate": 0.002, "loss": 2.3365, "step": 223800 }, { "epoch": 0.8651868689211548, "grad_norm": 0.10846145451068878, "learning_rate": 0.002, "loss": 2.3266, "step": 223810 }, { "epoch": 0.8652255261245381, "grad_norm": 0.1096300408244133, "learning_rate": 0.002, "loss": 2.3393, "step": 223820 }, { "epoch": 0.8652641833279213, "grad_norm": 0.11329855024814606, "learning_rate": 0.002, "loss": 2.3236, "step": 223830 }, { "epoch": 0.8653028405313046, "grad_norm": 0.1072562038898468, "learning_rate": 0.002, "loss": 2.323, "step": 223840 }, { "epoch": 0.8653414977346879, "grad_norm": 0.13581515848636627, "learning_rate": 0.002, "loss": 2.3435, "step": 223850 }, { "epoch": 0.8653801549380712, "grad_norm": 0.10407847166061401, "learning_rate": 0.002, "loss": 2.3326, "step": 223860 }, { "epoch": 0.8654188121414544, "grad_norm": 0.10293195396661758, "learning_rate": 0.002, "loss": 2.3362, "step": 223870 }, { "epoch": 0.8654574693448377, "grad_norm": 0.11165141314268112, "learning_rate": 0.002, "loss": 2.3339, "step": 223880 }, { "epoch": 0.865496126548221, "grad_norm": 0.11360661685466766, "learning_rate": 0.002, "loss": 2.3299, "step": 223890 }, { "epoch": 0.8655347837516043, "grad_norm": 0.09765450656414032, "learning_rate": 0.002, "loss": 2.3472, "step": 223900 }, { "epoch": 0.8655734409549876, "grad_norm": 0.1077166348695755, "learning_rate": 0.002, "loss": 2.3427, "step": 223910 }, { "epoch": 0.8656120981583708, "grad_norm": 0.09532441943883896, "learning_rate": 0.002, "loss": 2.3332, "step": 223920 }, { "epoch": 0.8656507553617541, "grad_norm": 0.09879755228757858, "learning_rate": 0.002, "loss": 2.3305, "step": 223930 }, { "epoch": 0.8656894125651374, "grad_norm": 0.10443105548620224, "learning_rate": 0.002, "loss": 2.3421, "step": 223940 }, { "epoch": 0.8657280697685207, "grad_norm": 0.11443766206502914, "learning_rate": 0.002, "loss": 2.3375, "step": 223950 }, { "epoch": 0.8657667269719039, "grad_norm": 0.1095224916934967, "learning_rate": 0.002, "loss": 2.3454, "step": 223960 }, { "epoch": 0.8658053841752872, "grad_norm": 0.11135567724704742, "learning_rate": 0.002, "loss": 2.3427, "step": 223970 }, { "epoch": 0.8658440413786705, "grad_norm": 0.09788601845502853, "learning_rate": 0.002, "loss": 2.3445, "step": 223980 }, { "epoch": 0.8658826985820538, "grad_norm": 0.10934220254421234, "learning_rate": 0.002, "loss": 2.3333, "step": 223990 }, { "epoch": 0.865921355785437, "grad_norm": 0.09874802827835083, "learning_rate": 0.002, "loss": 2.3297, "step": 224000 }, { "epoch": 0.8659600129888203, "grad_norm": 0.11080588400363922, "learning_rate": 0.002, "loss": 2.3282, "step": 224010 }, { "epoch": 0.8659986701922037, "grad_norm": 0.09544280171394348, "learning_rate": 0.002, "loss": 2.3384, "step": 224020 }, { "epoch": 0.8660373273955869, "grad_norm": 0.08992662280797958, "learning_rate": 0.002, "loss": 2.3367, "step": 224030 }, { "epoch": 0.8660759845989702, "grad_norm": 0.12229316681623459, "learning_rate": 0.002, "loss": 2.3255, "step": 224040 }, { "epoch": 0.8661146418023534, "grad_norm": 0.11490171402692795, "learning_rate": 0.002, "loss": 2.3499, "step": 224050 }, { "epoch": 0.8661532990057367, "grad_norm": 0.09182967245578766, "learning_rate": 0.002, "loss": 2.3471, "step": 224060 }, { "epoch": 0.86619195620912, "grad_norm": 0.11167624592781067, "learning_rate": 0.002, "loss": 2.3673, "step": 224070 }, { "epoch": 0.8662306134125033, "grad_norm": 0.1071360632777214, "learning_rate": 0.002, "loss": 2.3499, "step": 224080 }, { "epoch": 0.8662692706158865, "grad_norm": 0.11656167358160019, "learning_rate": 0.002, "loss": 2.3276, "step": 224090 }, { "epoch": 0.8663079278192698, "grad_norm": 0.11341526359319687, "learning_rate": 0.002, "loss": 2.3329, "step": 224100 }, { "epoch": 0.8663465850226532, "grad_norm": 0.10455506294965744, "learning_rate": 0.002, "loss": 2.3218, "step": 224110 }, { "epoch": 0.8663852422260364, "grad_norm": 0.12185147404670715, "learning_rate": 0.002, "loss": 2.3453, "step": 224120 }, { "epoch": 0.8664238994294197, "grad_norm": 0.0907309502363205, "learning_rate": 0.002, "loss": 2.3612, "step": 224130 }, { "epoch": 0.8664625566328029, "grad_norm": 0.13204310834407806, "learning_rate": 0.002, "loss": 2.3364, "step": 224140 }, { "epoch": 0.8665012138361863, "grad_norm": 0.09790869802236557, "learning_rate": 0.002, "loss": 2.3267, "step": 224150 }, { "epoch": 0.8665398710395695, "grad_norm": 0.106050044298172, "learning_rate": 0.002, "loss": 2.3493, "step": 224160 }, { "epoch": 0.8665785282429528, "grad_norm": 0.10837047547101974, "learning_rate": 0.002, "loss": 2.3326, "step": 224170 }, { "epoch": 0.866617185446336, "grad_norm": 0.10271471738815308, "learning_rate": 0.002, "loss": 2.332, "step": 224180 }, { "epoch": 0.8666558426497194, "grad_norm": 0.10636868327856064, "learning_rate": 0.002, "loss": 2.3436, "step": 224190 }, { "epoch": 0.8666944998531027, "grad_norm": 0.13913333415985107, "learning_rate": 0.002, "loss": 2.3633, "step": 224200 }, { "epoch": 0.8667331570564859, "grad_norm": 0.10228648781776428, "learning_rate": 0.002, "loss": 2.3346, "step": 224210 }, { "epoch": 0.8667718142598692, "grad_norm": 0.107154481112957, "learning_rate": 0.002, "loss": 2.3316, "step": 224220 }, { "epoch": 0.8668104714632524, "grad_norm": 0.09693600237369537, "learning_rate": 0.002, "loss": 2.3273, "step": 224230 }, { "epoch": 0.8668491286666358, "grad_norm": 0.1205499917268753, "learning_rate": 0.002, "loss": 2.3422, "step": 224240 }, { "epoch": 0.866887785870019, "grad_norm": 0.1094214916229248, "learning_rate": 0.002, "loss": 2.3381, "step": 224250 }, { "epoch": 0.8669264430734023, "grad_norm": 0.09406102448701859, "learning_rate": 0.002, "loss": 2.3459, "step": 224260 }, { "epoch": 0.8669651002767855, "grad_norm": 0.12342895567417145, "learning_rate": 0.002, "loss": 2.3459, "step": 224270 }, { "epoch": 0.8670037574801689, "grad_norm": 0.1090339794754982, "learning_rate": 0.002, "loss": 2.3427, "step": 224280 }, { "epoch": 0.8670424146835521, "grad_norm": 0.09570826590061188, "learning_rate": 0.002, "loss": 2.3303, "step": 224290 }, { "epoch": 0.8670810718869354, "grad_norm": 0.10601980984210968, "learning_rate": 0.002, "loss": 2.352, "step": 224300 }, { "epoch": 0.8671197290903186, "grad_norm": 0.09096609055995941, "learning_rate": 0.002, "loss": 2.3138, "step": 224310 }, { "epoch": 0.867158386293702, "grad_norm": 0.12070676684379578, "learning_rate": 0.002, "loss": 2.3395, "step": 224320 }, { "epoch": 0.8671970434970853, "grad_norm": 0.12272123992443085, "learning_rate": 0.002, "loss": 2.3372, "step": 224330 }, { "epoch": 0.8672357007004685, "grad_norm": 0.09796576201915741, "learning_rate": 0.002, "loss": 2.3475, "step": 224340 }, { "epoch": 0.8672743579038518, "grad_norm": 0.09902673214673996, "learning_rate": 0.002, "loss": 2.3388, "step": 224350 }, { "epoch": 0.8673130151072351, "grad_norm": 0.10000674426555634, "learning_rate": 0.002, "loss": 2.3157, "step": 224360 }, { "epoch": 0.8673516723106184, "grad_norm": 0.08922852575778961, "learning_rate": 0.002, "loss": 2.3311, "step": 224370 }, { "epoch": 0.8673903295140016, "grad_norm": 0.11467423290014267, "learning_rate": 0.002, "loss": 2.3225, "step": 224380 }, { "epoch": 0.8674289867173849, "grad_norm": 0.08990102261304855, "learning_rate": 0.002, "loss": 2.3337, "step": 224390 }, { "epoch": 0.8674676439207682, "grad_norm": 0.2590179145336151, "learning_rate": 0.002, "loss": 2.3367, "step": 224400 }, { "epoch": 0.8675063011241515, "grad_norm": 0.11853017657995224, "learning_rate": 0.002, "loss": 2.3403, "step": 224410 }, { "epoch": 0.8675449583275348, "grad_norm": 0.10964160412549973, "learning_rate": 0.002, "loss": 2.3496, "step": 224420 }, { "epoch": 0.867583615530918, "grad_norm": 0.10733091831207275, "learning_rate": 0.002, "loss": 2.3336, "step": 224430 }, { "epoch": 0.8676222727343013, "grad_norm": 0.10910530388355255, "learning_rate": 0.002, "loss": 2.3434, "step": 224440 }, { "epoch": 0.8676609299376846, "grad_norm": 0.10902879387140274, "learning_rate": 0.002, "loss": 2.3326, "step": 224450 }, { "epoch": 0.8676995871410679, "grad_norm": 0.10836129635572433, "learning_rate": 0.002, "loss": 2.3384, "step": 224460 }, { "epoch": 0.8677382443444511, "grad_norm": 0.10329374670982361, "learning_rate": 0.002, "loss": 2.3297, "step": 224470 }, { "epoch": 0.8677769015478344, "grad_norm": 0.09962306916713715, "learning_rate": 0.002, "loss": 2.3301, "step": 224480 }, { "epoch": 0.8678155587512177, "grad_norm": 0.09849024564027786, "learning_rate": 0.002, "loss": 2.3478, "step": 224490 }, { "epoch": 0.867854215954601, "grad_norm": 0.10376264154911041, "learning_rate": 0.002, "loss": 2.3349, "step": 224500 }, { "epoch": 0.8678928731579842, "grad_norm": 0.10472551733255386, "learning_rate": 0.002, "loss": 2.3497, "step": 224510 }, { "epoch": 0.8679315303613675, "grad_norm": 0.1245124563574791, "learning_rate": 0.002, "loss": 2.3309, "step": 224520 }, { "epoch": 0.8679701875647509, "grad_norm": 0.09382300078868866, "learning_rate": 0.002, "loss": 2.3359, "step": 224530 }, { "epoch": 0.8680088447681341, "grad_norm": 0.1148284450173378, "learning_rate": 0.002, "loss": 2.3373, "step": 224540 }, { "epoch": 0.8680475019715174, "grad_norm": 0.09577217698097229, "learning_rate": 0.002, "loss": 2.3394, "step": 224550 }, { "epoch": 0.8680861591749006, "grad_norm": 0.12388365715742111, "learning_rate": 0.002, "loss": 2.3383, "step": 224560 }, { "epoch": 0.868124816378284, "grad_norm": 0.1013205349445343, "learning_rate": 0.002, "loss": 2.3306, "step": 224570 }, { "epoch": 0.8681634735816672, "grad_norm": 0.09336186200380325, "learning_rate": 0.002, "loss": 2.322, "step": 224580 }, { "epoch": 0.8682021307850505, "grad_norm": 0.12024692445993423, "learning_rate": 0.002, "loss": 2.3434, "step": 224590 }, { "epoch": 0.8682407879884337, "grad_norm": 0.11582981795072556, "learning_rate": 0.002, "loss": 2.332, "step": 224600 }, { "epoch": 0.868279445191817, "grad_norm": 0.10193216800689697, "learning_rate": 0.002, "loss": 2.3265, "step": 224610 }, { "epoch": 0.8683181023952004, "grad_norm": 0.10503973811864853, "learning_rate": 0.002, "loss": 2.3373, "step": 224620 }, { "epoch": 0.8683567595985836, "grad_norm": 0.10338109731674194, "learning_rate": 0.002, "loss": 2.3358, "step": 224630 }, { "epoch": 0.8683954168019669, "grad_norm": 0.11863572895526886, "learning_rate": 0.002, "loss": 2.3437, "step": 224640 }, { "epoch": 0.8684340740053501, "grad_norm": 0.14006471633911133, "learning_rate": 0.002, "loss": 2.3443, "step": 224650 }, { "epoch": 0.8684727312087335, "grad_norm": 0.08928750455379486, "learning_rate": 0.002, "loss": 2.3338, "step": 224660 }, { "epoch": 0.8685113884121167, "grad_norm": 0.16105005145072937, "learning_rate": 0.002, "loss": 2.3516, "step": 224670 }, { "epoch": 0.8685500456155, "grad_norm": 0.1085537001490593, "learning_rate": 0.002, "loss": 2.3415, "step": 224680 }, { "epoch": 0.8685887028188832, "grad_norm": 0.10102713853120804, "learning_rate": 0.002, "loss": 2.345, "step": 224690 }, { "epoch": 0.8686273600222666, "grad_norm": 0.128278449177742, "learning_rate": 0.002, "loss": 2.347, "step": 224700 }, { "epoch": 0.8686660172256498, "grad_norm": 0.13314104080200195, "learning_rate": 0.002, "loss": 2.3439, "step": 224710 }, { "epoch": 0.8687046744290331, "grad_norm": 0.11651406437158585, "learning_rate": 0.002, "loss": 2.3394, "step": 224720 }, { "epoch": 0.8687433316324163, "grad_norm": 0.11891312897205353, "learning_rate": 0.002, "loss": 2.3453, "step": 224730 }, { "epoch": 0.8687819888357997, "grad_norm": 0.09145008027553558, "learning_rate": 0.002, "loss": 2.3438, "step": 224740 }, { "epoch": 0.868820646039183, "grad_norm": 0.1225404292345047, "learning_rate": 0.002, "loss": 2.3414, "step": 224750 }, { "epoch": 0.8688593032425662, "grad_norm": 0.1320568174123764, "learning_rate": 0.002, "loss": 2.3372, "step": 224760 }, { "epoch": 0.8688979604459495, "grad_norm": 0.09423022717237473, "learning_rate": 0.002, "loss": 2.3442, "step": 224770 }, { "epoch": 0.8689366176493328, "grad_norm": 0.09961433708667755, "learning_rate": 0.002, "loss": 2.3399, "step": 224780 }, { "epoch": 0.8689752748527161, "grad_norm": 0.14324261248111725, "learning_rate": 0.002, "loss": 2.336, "step": 224790 }, { "epoch": 0.8690139320560993, "grad_norm": 0.13467486202716827, "learning_rate": 0.002, "loss": 2.3328, "step": 224800 }, { "epoch": 0.8690525892594826, "grad_norm": 0.10713488608598709, "learning_rate": 0.002, "loss": 2.3261, "step": 224810 }, { "epoch": 0.8690912464628658, "grad_norm": 0.11601117253303528, "learning_rate": 0.002, "loss": 2.3271, "step": 224820 }, { "epoch": 0.8691299036662492, "grad_norm": 0.11640362441539764, "learning_rate": 0.002, "loss": 2.3165, "step": 224830 }, { "epoch": 0.8691685608696325, "grad_norm": 0.13760827481746674, "learning_rate": 0.002, "loss": 2.329, "step": 224840 }, { "epoch": 0.8692072180730157, "grad_norm": 0.11234764754772186, "learning_rate": 0.002, "loss": 2.3508, "step": 224850 }, { "epoch": 0.869245875276399, "grad_norm": 0.10185302793979645, "learning_rate": 0.002, "loss": 2.3426, "step": 224860 }, { "epoch": 0.8692845324797823, "grad_norm": 0.15274551510810852, "learning_rate": 0.002, "loss": 2.3399, "step": 224870 }, { "epoch": 0.8693231896831656, "grad_norm": 0.0994364470243454, "learning_rate": 0.002, "loss": 2.3418, "step": 224880 }, { "epoch": 0.8693618468865488, "grad_norm": 0.12746405601501465, "learning_rate": 0.002, "loss": 2.3584, "step": 224890 }, { "epoch": 0.8694005040899321, "grad_norm": 0.24766580760478973, "learning_rate": 0.002, "loss": 2.3257, "step": 224900 }, { "epoch": 0.8694391612933154, "grad_norm": 0.1405581384897232, "learning_rate": 0.002, "loss": 2.3439, "step": 224910 }, { "epoch": 0.8694778184966987, "grad_norm": 0.11151156574487686, "learning_rate": 0.002, "loss": 2.3337, "step": 224920 }, { "epoch": 0.869516475700082, "grad_norm": 0.11751139163970947, "learning_rate": 0.002, "loss": 2.3292, "step": 224930 }, { "epoch": 0.8695551329034652, "grad_norm": 0.11526947468519211, "learning_rate": 0.002, "loss": 2.3344, "step": 224940 }, { "epoch": 0.8695937901068486, "grad_norm": 0.10381560027599335, "learning_rate": 0.002, "loss": 2.3467, "step": 224950 }, { "epoch": 0.8696324473102318, "grad_norm": 0.09702971577644348, "learning_rate": 0.002, "loss": 2.3376, "step": 224960 }, { "epoch": 0.8696711045136151, "grad_norm": 0.11534032225608826, "learning_rate": 0.002, "loss": 2.3414, "step": 224970 }, { "epoch": 0.8697097617169983, "grad_norm": 0.12234510481357574, "learning_rate": 0.002, "loss": 2.3236, "step": 224980 }, { "epoch": 0.8697484189203816, "grad_norm": 0.10677313804626465, "learning_rate": 0.002, "loss": 2.3445, "step": 224990 }, { "epoch": 0.8697870761237649, "grad_norm": 0.09780658036470413, "learning_rate": 0.002, "loss": 2.3439, "step": 225000 }, { "epoch": 0.8698257333271482, "grad_norm": 0.10465943068265915, "learning_rate": 0.002, "loss": 2.3315, "step": 225010 }, { "epoch": 0.8698643905305314, "grad_norm": 0.11495811492204666, "learning_rate": 0.002, "loss": 2.3239, "step": 225020 }, { "epoch": 0.8699030477339147, "grad_norm": 0.10129937529563904, "learning_rate": 0.002, "loss": 2.3426, "step": 225030 }, { "epoch": 0.869941704937298, "grad_norm": 0.11727405339479446, "learning_rate": 0.002, "loss": 2.3402, "step": 225040 }, { "epoch": 0.8699803621406813, "grad_norm": 0.11523114889860153, "learning_rate": 0.002, "loss": 2.3363, "step": 225050 }, { "epoch": 0.8700190193440646, "grad_norm": 0.10869179666042328, "learning_rate": 0.002, "loss": 2.3386, "step": 225060 }, { "epoch": 0.8700576765474478, "grad_norm": 0.11668886244297028, "learning_rate": 0.002, "loss": 2.3451, "step": 225070 }, { "epoch": 0.8700963337508312, "grad_norm": 0.10371943563222885, "learning_rate": 0.002, "loss": 2.3209, "step": 225080 }, { "epoch": 0.8701349909542144, "grad_norm": 0.11870495975017548, "learning_rate": 0.002, "loss": 2.3377, "step": 225090 }, { "epoch": 0.8701736481575977, "grad_norm": 0.1018364354968071, "learning_rate": 0.002, "loss": 2.3553, "step": 225100 }, { "epoch": 0.8702123053609809, "grad_norm": 0.09280882030725479, "learning_rate": 0.002, "loss": 2.3429, "step": 225110 }, { "epoch": 0.8702509625643643, "grad_norm": 0.11336492002010345, "learning_rate": 0.002, "loss": 2.3469, "step": 225120 }, { "epoch": 0.8702896197677475, "grad_norm": 0.12220898270606995, "learning_rate": 0.002, "loss": 2.3421, "step": 225130 }, { "epoch": 0.8703282769711308, "grad_norm": 0.10010644793510437, "learning_rate": 0.002, "loss": 2.3454, "step": 225140 }, { "epoch": 0.870366934174514, "grad_norm": 0.11782457679510117, "learning_rate": 0.002, "loss": 2.3558, "step": 225150 }, { "epoch": 0.8704055913778973, "grad_norm": 0.09656857699155807, "learning_rate": 0.002, "loss": 2.3446, "step": 225160 }, { "epoch": 0.8704442485812807, "grad_norm": 0.10837922245264053, "learning_rate": 0.002, "loss": 2.3346, "step": 225170 }, { "epoch": 0.8704829057846639, "grad_norm": 0.09899026900529861, "learning_rate": 0.002, "loss": 2.339, "step": 225180 }, { "epoch": 0.8705215629880472, "grad_norm": 0.10472776740789413, "learning_rate": 0.002, "loss": 2.3308, "step": 225190 }, { "epoch": 0.8705602201914304, "grad_norm": 0.09675848484039307, "learning_rate": 0.002, "loss": 2.3451, "step": 225200 }, { "epoch": 0.8705988773948138, "grad_norm": 0.10160231590270996, "learning_rate": 0.002, "loss": 2.3351, "step": 225210 }, { "epoch": 0.870637534598197, "grad_norm": 0.10469699651002884, "learning_rate": 0.002, "loss": 2.3242, "step": 225220 }, { "epoch": 0.8706761918015803, "grad_norm": 0.09993157535791397, "learning_rate": 0.002, "loss": 2.3447, "step": 225230 }, { "epoch": 0.8707148490049635, "grad_norm": 0.10199610888957977, "learning_rate": 0.002, "loss": 2.3526, "step": 225240 }, { "epoch": 0.8707535062083469, "grad_norm": 0.09740594029426575, "learning_rate": 0.002, "loss": 2.3494, "step": 225250 }, { "epoch": 0.8707921634117302, "grad_norm": 0.10081925988197327, "learning_rate": 0.002, "loss": 2.3436, "step": 225260 }, { "epoch": 0.8708308206151134, "grad_norm": 0.098286472260952, "learning_rate": 0.002, "loss": 2.3273, "step": 225270 }, { "epoch": 0.8708694778184967, "grad_norm": 0.10415423661470413, "learning_rate": 0.002, "loss": 2.3316, "step": 225280 }, { "epoch": 0.87090813502188, "grad_norm": 0.10316446423530579, "learning_rate": 0.002, "loss": 2.3466, "step": 225290 }, { "epoch": 0.8709467922252633, "grad_norm": 0.0970345288515091, "learning_rate": 0.002, "loss": 2.3488, "step": 225300 }, { "epoch": 0.8709854494286465, "grad_norm": 0.1323097199201584, "learning_rate": 0.002, "loss": 2.3412, "step": 225310 }, { "epoch": 0.8710241066320298, "grad_norm": 0.11852015554904938, "learning_rate": 0.002, "loss": 2.3433, "step": 225320 }, { "epoch": 0.8710627638354131, "grad_norm": 0.09876296669244766, "learning_rate": 0.002, "loss": 2.3365, "step": 225330 }, { "epoch": 0.8711014210387964, "grad_norm": 0.15328599512577057, "learning_rate": 0.002, "loss": 2.3385, "step": 225340 }, { "epoch": 0.8711400782421796, "grad_norm": 0.12386604398488998, "learning_rate": 0.002, "loss": 2.3425, "step": 225350 }, { "epoch": 0.8711787354455629, "grad_norm": 0.09469375014305115, "learning_rate": 0.002, "loss": 2.3435, "step": 225360 }, { "epoch": 0.8712173926489462, "grad_norm": 0.10191961377859116, "learning_rate": 0.002, "loss": 2.3533, "step": 225370 }, { "epoch": 0.8712560498523295, "grad_norm": 0.15213637053966522, "learning_rate": 0.002, "loss": 2.3511, "step": 225380 }, { "epoch": 0.8712947070557128, "grad_norm": 0.10185971856117249, "learning_rate": 0.002, "loss": 2.3425, "step": 225390 }, { "epoch": 0.871333364259096, "grad_norm": 0.12874957919120789, "learning_rate": 0.002, "loss": 2.355, "step": 225400 }, { "epoch": 0.8713720214624793, "grad_norm": 0.10777001827955246, "learning_rate": 0.002, "loss": 2.3378, "step": 225410 }, { "epoch": 0.8714106786658626, "grad_norm": 0.11325021833181381, "learning_rate": 0.002, "loss": 2.3455, "step": 225420 }, { "epoch": 0.8714493358692459, "grad_norm": 0.10347343236207962, "learning_rate": 0.002, "loss": 2.3477, "step": 225430 }, { "epoch": 0.8714879930726291, "grad_norm": 0.1121433898806572, "learning_rate": 0.002, "loss": 2.3539, "step": 225440 }, { "epoch": 0.8715266502760124, "grad_norm": 0.09990080446004868, "learning_rate": 0.002, "loss": 2.3405, "step": 225450 }, { "epoch": 0.8715653074793958, "grad_norm": 0.13435986638069153, "learning_rate": 0.002, "loss": 2.3365, "step": 225460 }, { "epoch": 0.871603964682779, "grad_norm": 0.15727294981479645, "learning_rate": 0.002, "loss": 2.3422, "step": 225470 }, { "epoch": 0.8716426218861623, "grad_norm": 0.08477748930454254, "learning_rate": 0.002, "loss": 2.3384, "step": 225480 }, { "epoch": 0.8716812790895455, "grad_norm": 0.1023939922451973, "learning_rate": 0.002, "loss": 2.3506, "step": 225490 }, { "epoch": 0.8717199362929289, "grad_norm": 0.10732561349868774, "learning_rate": 0.002, "loss": 2.3255, "step": 225500 }, { "epoch": 0.8717585934963121, "grad_norm": 0.12268977612257004, "learning_rate": 0.002, "loss": 2.3448, "step": 225510 }, { "epoch": 0.8717972506996954, "grad_norm": 0.1006249263882637, "learning_rate": 0.002, "loss": 2.3462, "step": 225520 }, { "epoch": 0.8718359079030786, "grad_norm": 0.09923502057790756, "learning_rate": 0.002, "loss": 2.3392, "step": 225530 }, { "epoch": 0.8718745651064619, "grad_norm": 0.1083303838968277, "learning_rate": 0.002, "loss": 2.3451, "step": 225540 }, { "epoch": 0.8719132223098452, "grad_norm": 0.10976643115282059, "learning_rate": 0.002, "loss": 2.3328, "step": 225550 }, { "epoch": 0.8719518795132285, "grad_norm": 0.12189637869596481, "learning_rate": 0.002, "loss": 2.3455, "step": 225560 }, { "epoch": 0.8719905367166118, "grad_norm": 0.10473313927650452, "learning_rate": 0.002, "loss": 2.331, "step": 225570 }, { "epoch": 0.872029193919995, "grad_norm": 0.09554488211870193, "learning_rate": 0.002, "loss": 2.3371, "step": 225580 }, { "epoch": 0.8720678511233784, "grad_norm": 0.1166779100894928, "learning_rate": 0.002, "loss": 2.3416, "step": 225590 }, { "epoch": 0.8721065083267616, "grad_norm": 0.10591023415327072, "learning_rate": 0.002, "loss": 2.3389, "step": 225600 }, { "epoch": 0.8721451655301449, "grad_norm": 0.09874525666236877, "learning_rate": 0.002, "loss": 2.3566, "step": 225610 }, { "epoch": 0.8721838227335281, "grad_norm": 0.11275798082351685, "learning_rate": 0.002, "loss": 2.3376, "step": 225620 }, { "epoch": 0.8722224799369115, "grad_norm": 0.10221873968839645, "learning_rate": 0.002, "loss": 2.3233, "step": 225630 }, { "epoch": 0.8722611371402947, "grad_norm": 0.2800368368625641, "learning_rate": 0.002, "loss": 2.3391, "step": 225640 }, { "epoch": 0.872299794343678, "grad_norm": 0.10905713587999344, "learning_rate": 0.002, "loss": 2.3407, "step": 225650 }, { "epoch": 0.8723384515470612, "grad_norm": 0.10796914994716644, "learning_rate": 0.002, "loss": 2.3391, "step": 225660 }, { "epoch": 0.8723771087504446, "grad_norm": 0.13260188698768616, "learning_rate": 0.002, "loss": 2.3508, "step": 225670 }, { "epoch": 0.8724157659538279, "grad_norm": 0.10988302528858185, "learning_rate": 0.002, "loss": 2.3343, "step": 225680 }, { "epoch": 0.8724544231572111, "grad_norm": 0.09902926534414291, "learning_rate": 0.002, "loss": 2.325, "step": 225690 }, { "epoch": 0.8724930803605944, "grad_norm": 0.1075626090168953, "learning_rate": 0.002, "loss": 2.3441, "step": 225700 }, { "epoch": 0.8725317375639776, "grad_norm": 0.11184363812208176, "learning_rate": 0.002, "loss": 2.342, "step": 225710 }, { "epoch": 0.872570394767361, "grad_norm": 0.1105227917432785, "learning_rate": 0.002, "loss": 2.3334, "step": 225720 }, { "epoch": 0.8726090519707442, "grad_norm": 0.1228136196732521, "learning_rate": 0.002, "loss": 2.3429, "step": 225730 }, { "epoch": 0.8726477091741275, "grad_norm": 0.09833686798810959, "learning_rate": 0.002, "loss": 2.3516, "step": 225740 }, { "epoch": 0.8726863663775107, "grad_norm": 0.10563940554857254, "learning_rate": 0.002, "loss": 2.3447, "step": 225750 }, { "epoch": 0.8727250235808941, "grad_norm": 0.10530819743871689, "learning_rate": 0.002, "loss": 2.325, "step": 225760 }, { "epoch": 0.8727636807842774, "grad_norm": 0.1024976521730423, "learning_rate": 0.002, "loss": 2.3351, "step": 225770 }, { "epoch": 0.8728023379876606, "grad_norm": 0.11485017091035843, "learning_rate": 0.002, "loss": 2.3467, "step": 225780 }, { "epoch": 0.8728409951910439, "grad_norm": 0.09713538736104965, "learning_rate": 0.002, "loss": 2.3306, "step": 225790 }, { "epoch": 0.8728796523944272, "grad_norm": 0.100540891289711, "learning_rate": 0.002, "loss": 2.3345, "step": 225800 }, { "epoch": 0.8729183095978105, "grad_norm": 0.11147937923669815, "learning_rate": 0.002, "loss": 2.3407, "step": 225810 }, { "epoch": 0.8729569668011937, "grad_norm": 0.09945949912071228, "learning_rate": 0.002, "loss": 2.3438, "step": 225820 }, { "epoch": 0.872995624004577, "grad_norm": 0.1261347085237503, "learning_rate": 0.002, "loss": 2.3471, "step": 225830 }, { "epoch": 0.8730342812079603, "grad_norm": 0.10962780565023422, "learning_rate": 0.002, "loss": 2.3439, "step": 225840 }, { "epoch": 0.8730729384113436, "grad_norm": 0.11201975494623184, "learning_rate": 0.002, "loss": 2.3243, "step": 225850 }, { "epoch": 0.8731115956147268, "grad_norm": 0.1256123185157776, "learning_rate": 0.002, "loss": 2.3445, "step": 225860 }, { "epoch": 0.8731502528181101, "grad_norm": 0.11174170672893524, "learning_rate": 0.002, "loss": 2.3327, "step": 225870 }, { "epoch": 0.8731889100214935, "grad_norm": 0.10957685858011246, "learning_rate": 0.002, "loss": 2.3471, "step": 225880 }, { "epoch": 0.8732275672248767, "grad_norm": 0.11033196747303009, "learning_rate": 0.002, "loss": 2.341, "step": 225890 }, { "epoch": 0.87326622442826, "grad_norm": 0.11110338568687439, "learning_rate": 0.002, "loss": 2.334, "step": 225900 }, { "epoch": 0.8733048816316432, "grad_norm": 0.10742199420928955, "learning_rate": 0.002, "loss": 2.3342, "step": 225910 }, { "epoch": 0.8733435388350265, "grad_norm": 0.10513104498386383, "learning_rate": 0.002, "loss": 2.3415, "step": 225920 }, { "epoch": 0.8733821960384098, "grad_norm": 0.10281091928482056, "learning_rate": 0.002, "loss": 2.3484, "step": 225930 }, { "epoch": 0.8734208532417931, "grad_norm": 0.10133229941129684, "learning_rate": 0.002, "loss": 2.3321, "step": 225940 }, { "epoch": 0.8734595104451763, "grad_norm": 0.10914817452430725, "learning_rate": 0.002, "loss": 2.3318, "step": 225950 }, { "epoch": 0.8734981676485596, "grad_norm": 0.11362199485301971, "learning_rate": 0.002, "loss": 2.3325, "step": 225960 }, { "epoch": 0.873536824851943, "grad_norm": 0.1011771410703659, "learning_rate": 0.002, "loss": 2.3541, "step": 225970 }, { "epoch": 0.8735754820553262, "grad_norm": 0.09920097142457962, "learning_rate": 0.002, "loss": 2.3425, "step": 225980 }, { "epoch": 0.8736141392587095, "grad_norm": 0.1007387787103653, "learning_rate": 0.002, "loss": 2.3465, "step": 225990 }, { "epoch": 0.8736527964620927, "grad_norm": 0.09851537644863129, "learning_rate": 0.002, "loss": 2.3429, "step": 226000 }, { "epoch": 0.8736914536654761, "grad_norm": 0.096402108669281, "learning_rate": 0.002, "loss": 2.3331, "step": 226010 }, { "epoch": 0.8737301108688593, "grad_norm": 0.10415016859769821, "learning_rate": 0.002, "loss": 2.3374, "step": 226020 }, { "epoch": 0.8737687680722426, "grad_norm": 0.10720174014568329, "learning_rate": 0.002, "loss": 2.3502, "step": 226030 }, { "epoch": 0.8738074252756258, "grad_norm": 0.09929098933935165, "learning_rate": 0.002, "loss": 2.3342, "step": 226040 }, { "epoch": 0.8738460824790092, "grad_norm": 0.11904928088188171, "learning_rate": 0.002, "loss": 2.3465, "step": 226050 }, { "epoch": 0.8738847396823924, "grad_norm": 0.0928359255194664, "learning_rate": 0.002, "loss": 2.3162, "step": 226060 }, { "epoch": 0.8739233968857757, "grad_norm": 0.11214454472064972, "learning_rate": 0.002, "loss": 2.3474, "step": 226070 }, { "epoch": 0.873962054089159, "grad_norm": 0.1066681444644928, "learning_rate": 0.002, "loss": 2.3501, "step": 226080 }, { "epoch": 0.8740007112925422, "grad_norm": 0.10091183334589005, "learning_rate": 0.002, "loss": 2.3481, "step": 226090 }, { "epoch": 0.8740393684959256, "grad_norm": 0.11797045916318893, "learning_rate": 0.002, "loss": 2.3357, "step": 226100 }, { "epoch": 0.8740780256993088, "grad_norm": 0.0893377959728241, "learning_rate": 0.002, "loss": 2.3423, "step": 226110 }, { "epoch": 0.8741166829026921, "grad_norm": 0.11226366460323334, "learning_rate": 0.002, "loss": 2.3478, "step": 226120 }, { "epoch": 0.8741553401060753, "grad_norm": 0.11060541868209839, "learning_rate": 0.002, "loss": 2.348, "step": 226130 }, { "epoch": 0.8741939973094587, "grad_norm": 0.09058070182800293, "learning_rate": 0.002, "loss": 2.3333, "step": 226140 }, { "epoch": 0.8742326545128419, "grad_norm": 0.11443872004747391, "learning_rate": 0.002, "loss": 2.3291, "step": 226150 }, { "epoch": 0.8742713117162252, "grad_norm": 0.09787355363368988, "learning_rate": 0.002, "loss": 2.339, "step": 226160 }, { "epoch": 0.8743099689196084, "grad_norm": 0.130850151181221, "learning_rate": 0.002, "loss": 2.3542, "step": 226170 }, { "epoch": 0.8743486261229918, "grad_norm": 0.11858386546373367, "learning_rate": 0.002, "loss": 2.3453, "step": 226180 }, { "epoch": 0.874387283326375, "grad_norm": 0.09746160358190536, "learning_rate": 0.002, "loss": 2.3396, "step": 226190 }, { "epoch": 0.8744259405297583, "grad_norm": 0.09695523232221603, "learning_rate": 0.002, "loss": 2.3469, "step": 226200 }, { "epoch": 0.8744645977331416, "grad_norm": 0.09290745109319687, "learning_rate": 0.002, "loss": 2.3354, "step": 226210 }, { "epoch": 0.8745032549365249, "grad_norm": 0.1002887487411499, "learning_rate": 0.002, "loss": 2.3314, "step": 226220 }, { "epoch": 0.8745419121399082, "grad_norm": 0.10883896052837372, "learning_rate": 0.002, "loss": 2.3351, "step": 226230 }, { "epoch": 0.8745805693432914, "grad_norm": 0.11657164990901947, "learning_rate": 0.002, "loss": 2.3287, "step": 226240 }, { "epoch": 0.8746192265466747, "grad_norm": 0.09341996163129807, "learning_rate": 0.002, "loss": 2.3244, "step": 226250 }, { "epoch": 0.874657883750058, "grad_norm": 0.10273752361536026, "learning_rate": 0.002, "loss": 2.3309, "step": 226260 }, { "epoch": 0.8746965409534413, "grad_norm": 0.08396825194358826, "learning_rate": 0.002, "loss": 2.3311, "step": 226270 }, { "epoch": 0.8747351981568245, "grad_norm": 0.10608824342489243, "learning_rate": 0.002, "loss": 2.3562, "step": 226280 }, { "epoch": 0.8747738553602078, "grad_norm": 0.09661603718996048, "learning_rate": 0.002, "loss": 2.3427, "step": 226290 }, { "epoch": 0.874812512563591, "grad_norm": 0.11775680631399155, "learning_rate": 0.002, "loss": 2.3375, "step": 226300 }, { "epoch": 0.8748511697669744, "grad_norm": 0.10600218921899796, "learning_rate": 0.002, "loss": 2.3412, "step": 226310 }, { "epoch": 0.8748898269703577, "grad_norm": 0.09572699666023254, "learning_rate": 0.002, "loss": 2.3363, "step": 226320 }, { "epoch": 0.8749284841737409, "grad_norm": 0.11705332249403, "learning_rate": 0.002, "loss": 2.3333, "step": 226330 }, { "epoch": 0.8749671413771242, "grad_norm": 0.11242889612913132, "learning_rate": 0.002, "loss": 2.3438, "step": 226340 }, { "epoch": 0.8750057985805075, "grad_norm": 0.1230028048157692, "learning_rate": 0.002, "loss": 2.3371, "step": 226350 }, { "epoch": 0.8750444557838908, "grad_norm": 0.09983881562948227, "learning_rate": 0.002, "loss": 2.3428, "step": 226360 }, { "epoch": 0.875083112987274, "grad_norm": 0.08863212168216705, "learning_rate": 0.002, "loss": 2.3361, "step": 226370 }, { "epoch": 0.8751217701906573, "grad_norm": 0.0950503945350647, "learning_rate": 0.002, "loss": 2.3395, "step": 226380 }, { "epoch": 0.8751604273940407, "grad_norm": 0.11235824972391129, "learning_rate": 0.002, "loss": 2.344, "step": 226390 }, { "epoch": 0.8751990845974239, "grad_norm": 0.11355585604906082, "learning_rate": 0.002, "loss": 2.3494, "step": 226400 }, { "epoch": 0.8752377418008072, "grad_norm": 0.1016106829047203, "learning_rate": 0.002, "loss": 2.34, "step": 226410 }, { "epoch": 0.8752763990041904, "grad_norm": 0.10593704879283905, "learning_rate": 0.002, "loss": 2.329, "step": 226420 }, { "epoch": 0.8753150562075738, "grad_norm": 0.12088311463594437, "learning_rate": 0.002, "loss": 2.3308, "step": 226430 }, { "epoch": 0.875353713410957, "grad_norm": 0.09646426886320114, "learning_rate": 0.002, "loss": 2.3434, "step": 226440 }, { "epoch": 0.8753923706143403, "grad_norm": 0.09106966108083725, "learning_rate": 0.002, "loss": 2.3388, "step": 226450 }, { "epoch": 0.8754310278177235, "grad_norm": 0.10133379697799683, "learning_rate": 0.002, "loss": 2.3456, "step": 226460 }, { "epoch": 0.8754696850211068, "grad_norm": 0.10170179605484009, "learning_rate": 0.002, "loss": 2.3402, "step": 226470 }, { "epoch": 0.8755083422244901, "grad_norm": 0.1035342887043953, "learning_rate": 0.002, "loss": 2.3427, "step": 226480 }, { "epoch": 0.8755469994278734, "grad_norm": 0.09977155178785324, "learning_rate": 0.002, "loss": 2.3355, "step": 226490 }, { "epoch": 0.8755856566312566, "grad_norm": 0.10518094152212143, "learning_rate": 0.002, "loss": 2.3355, "step": 226500 }, { "epoch": 0.8756243138346399, "grad_norm": 0.1063164472579956, "learning_rate": 0.002, "loss": 2.339, "step": 226510 }, { "epoch": 0.8756629710380233, "grad_norm": 0.09273169934749603, "learning_rate": 0.002, "loss": 2.3496, "step": 226520 }, { "epoch": 0.8757016282414065, "grad_norm": 0.11271461099386215, "learning_rate": 0.002, "loss": 2.352, "step": 226530 }, { "epoch": 0.8757402854447898, "grad_norm": 0.12785807251930237, "learning_rate": 0.002, "loss": 2.3374, "step": 226540 }, { "epoch": 0.875778942648173, "grad_norm": 0.10536123067140579, "learning_rate": 0.002, "loss": 2.33, "step": 226550 }, { "epoch": 0.8758175998515564, "grad_norm": 0.10244054347276688, "learning_rate": 0.002, "loss": 2.339, "step": 226560 }, { "epoch": 0.8758562570549396, "grad_norm": 0.1381578892469406, "learning_rate": 0.002, "loss": 2.3372, "step": 226570 }, { "epoch": 0.8758949142583229, "grad_norm": 0.11038932204246521, "learning_rate": 0.002, "loss": 2.3408, "step": 226580 }, { "epoch": 0.8759335714617061, "grad_norm": 0.12924733757972717, "learning_rate": 0.002, "loss": 2.3281, "step": 226590 }, { "epoch": 0.8759722286650895, "grad_norm": 0.09875442832708359, "learning_rate": 0.002, "loss": 2.3199, "step": 226600 }, { "epoch": 0.8760108858684728, "grad_norm": 0.11456121504306793, "learning_rate": 0.002, "loss": 2.3481, "step": 226610 }, { "epoch": 0.876049543071856, "grad_norm": 0.09922508150339127, "learning_rate": 0.002, "loss": 2.3293, "step": 226620 }, { "epoch": 0.8760882002752393, "grad_norm": 0.09993966668844223, "learning_rate": 0.002, "loss": 2.3601, "step": 226630 }, { "epoch": 0.8761268574786225, "grad_norm": 0.09752309322357178, "learning_rate": 0.002, "loss": 2.3502, "step": 226640 }, { "epoch": 0.8761655146820059, "grad_norm": 0.0955125242471695, "learning_rate": 0.002, "loss": 2.3351, "step": 226650 }, { "epoch": 0.8762041718853891, "grad_norm": 0.10003511607646942, "learning_rate": 0.002, "loss": 2.3304, "step": 226660 }, { "epoch": 0.8762428290887724, "grad_norm": 0.13894720375537872, "learning_rate": 0.002, "loss": 2.3471, "step": 226670 }, { "epoch": 0.8762814862921556, "grad_norm": 0.10475655645132065, "learning_rate": 0.002, "loss": 2.3437, "step": 226680 }, { "epoch": 0.876320143495539, "grad_norm": 0.10792479664087296, "learning_rate": 0.002, "loss": 2.3483, "step": 226690 }, { "epoch": 0.8763588006989222, "grad_norm": 0.12203353643417358, "learning_rate": 0.002, "loss": 2.334, "step": 226700 }, { "epoch": 0.8763974579023055, "grad_norm": 0.09583789110183716, "learning_rate": 0.002, "loss": 2.3375, "step": 226710 }, { "epoch": 0.8764361151056888, "grad_norm": 0.10303744673728943, "learning_rate": 0.002, "loss": 2.3226, "step": 226720 }, { "epoch": 0.8764747723090721, "grad_norm": 0.09134358167648315, "learning_rate": 0.002, "loss": 2.3457, "step": 226730 }, { "epoch": 0.8765134295124554, "grad_norm": 0.5620478391647339, "learning_rate": 0.002, "loss": 2.3431, "step": 226740 }, { "epoch": 0.8765520867158386, "grad_norm": 0.10164141654968262, "learning_rate": 0.002, "loss": 2.3278, "step": 226750 }, { "epoch": 0.8765907439192219, "grad_norm": 0.10489300638437271, "learning_rate": 0.002, "loss": 2.339, "step": 226760 }, { "epoch": 0.8766294011226052, "grad_norm": 0.09423074871301651, "learning_rate": 0.002, "loss": 2.3471, "step": 226770 }, { "epoch": 0.8766680583259885, "grad_norm": 0.13391481339931488, "learning_rate": 0.002, "loss": 2.3424, "step": 226780 }, { "epoch": 0.8767067155293717, "grad_norm": 0.09959197789430618, "learning_rate": 0.002, "loss": 2.3412, "step": 226790 }, { "epoch": 0.876745372732755, "grad_norm": 0.1476932317018509, "learning_rate": 0.002, "loss": 2.3473, "step": 226800 }, { "epoch": 0.8767840299361384, "grad_norm": 0.1119222566485405, "learning_rate": 0.002, "loss": 2.3268, "step": 226810 }, { "epoch": 0.8768226871395216, "grad_norm": 0.10764788091182709, "learning_rate": 0.002, "loss": 2.3369, "step": 226820 }, { "epoch": 0.8768613443429049, "grad_norm": 0.09944967925548553, "learning_rate": 0.002, "loss": 2.3332, "step": 226830 }, { "epoch": 0.8769000015462881, "grad_norm": 0.09813541173934937, "learning_rate": 0.002, "loss": 2.3279, "step": 226840 }, { "epoch": 0.8769386587496714, "grad_norm": 0.12044399231672287, "learning_rate": 0.002, "loss": 2.3372, "step": 226850 }, { "epoch": 0.8769773159530547, "grad_norm": 0.10897082835435867, "learning_rate": 0.002, "loss": 2.333, "step": 226860 }, { "epoch": 0.877015973156438, "grad_norm": 0.10732237249612808, "learning_rate": 0.002, "loss": 2.3488, "step": 226870 }, { "epoch": 0.8770546303598212, "grad_norm": 0.11734417825937271, "learning_rate": 0.002, "loss": 2.3427, "step": 226880 }, { "epoch": 0.8770932875632045, "grad_norm": 0.09896469116210938, "learning_rate": 0.002, "loss": 2.3296, "step": 226890 }, { "epoch": 0.8771319447665878, "grad_norm": 0.12109851837158203, "learning_rate": 0.002, "loss": 2.3476, "step": 226900 }, { "epoch": 0.8771706019699711, "grad_norm": 0.10177912563085556, "learning_rate": 0.002, "loss": 2.3436, "step": 226910 }, { "epoch": 0.8772092591733543, "grad_norm": 0.10985736548900604, "learning_rate": 0.002, "loss": 2.3418, "step": 226920 }, { "epoch": 0.8772479163767376, "grad_norm": 0.10610976070165634, "learning_rate": 0.002, "loss": 2.3571, "step": 226930 }, { "epoch": 0.877286573580121, "grad_norm": 0.10284141451120377, "learning_rate": 0.002, "loss": 2.3564, "step": 226940 }, { "epoch": 0.8773252307835042, "grad_norm": 0.10084030777215958, "learning_rate": 0.002, "loss": 2.3352, "step": 226950 }, { "epoch": 0.8773638879868875, "grad_norm": 0.11514756828546524, "learning_rate": 0.002, "loss": 2.3314, "step": 226960 }, { "epoch": 0.8774025451902707, "grad_norm": 0.10323145985603333, "learning_rate": 0.002, "loss": 2.3397, "step": 226970 }, { "epoch": 0.8774412023936541, "grad_norm": 0.09738577902317047, "learning_rate": 0.002, "loss": 2.3393, "step": 226980 }, { "epoch": 0.8774798595970373, "grad_norm": 0.09809859097003937, "learning_rate": 0.002, "loss": 2.3541, "step": 226990 }, { "epoch": 0.8775185168004206, "grad_norm": 0.11053669452667236, "learning_rate": 0.002, "loss": 2.3424, "step": 227000 }, { "epoch": 0.8775571740038038, "grad_norm": 0.11902974545955658, "learning_rate": 0.002, "loss": 2.3277, "step": 227010 }, { "epoch": 0.8775958312071871, "grad_norm": 0.10271196067333221, "learning_rate": 0.002, "loss": 2.3264, "step": 227020 }, { "epoch": 0.8776344884105705, "grad_norm": 0.11581193655729294, "learning_rate": 0.002, "loss": 2.3457, "step": 227030 }, { "epoch": 0.8776731456139537, "grad_norm": 0.1064939945936203, "learning_rate": 0.002, "loss": 2.3358, "step": 227040 }, { "epoch": 0.877711802817337, "grad_norm": 0.10863006114959717, "learning_rate": 0.002, "loss": 2.3376, "step": 227050 }, { "epoch": 0.8777504600207202, "grad_norm": 0.10866818577051163, "learning_rate": 0.002, "loss": 2.3341, "step": 227060 }, { "epoch": 0.8777891172241036, "grad_norm": 0.09740670770406723, "learning_rate": 0.002, "loss": 2.3398, "step": 227070 }, { "epoch": 0.8778277744274868, "grad_norm": 0.11110842227935791, "learning_rate": 0.002, "loss": 2.3353, "step": 227080 }, { "epoch": 0.8778664316308701, "grad_norm": 0.10708796977996826, "learning_rate": 0.002, "loss": 2.3501, "step": 227090 }, { "epoch": 0.8779050888342533, "grad_norm": 0.10029701143503189, "learning_rate": 0.002, "loss": 2.3449, "step": 227100 }, { "epoch": 0.8779437460376367, "grad_norm": 0.10857430845499039, "learning_rate": 0.002, "loss": 2.3421, "step": 227110 }, { "epoch": 0.87798240324102, "grad_norm": 0.12921090424060822, "learning_rate": 0.002, "loss": 2.3385, "step": 227120 }, { "epoch": 0.8780210604444032, "grad_norm": 0.11042632162570953, "learning_rate": 0.002, "loss": 2.3406, "step": 227130 }, { "epoch": 0.8780597176477865, "grad_norm": 0.09481260925531387, "learning_rate": 0.002, "loss": 2.3376, "step": 227140 }, { "epoch": 0.8780983748511698, "grad_norm": 0.10456386208534241, "learning_rate": 0.002, "loss": 2.3427, "step": 227150 }, { "epoch": 0.8781370320545531, "grad_norm": 0.09098687022924423, "learning_rate": 0.002, "loss": 2.3253, "step": 227160 }, { "epoch": 0.8781756892579363, "grad_norm": 0.11767854541540146, "learning_rate": 0.002, "loss": 2.3295, "step": 227170 }, { "epoch": 0.8782143464613196, "grad_norm": 0.11023345589637756, "learning_rate": 0.002, "loss": 2.3421, "step": 227180 }, { "epoch": 0.8782530036647029, "grad_norm": 0.10239629447460175, "learning_rate": 0.002, "loss": 2.3467, "step": 227190 }, { "epoch": 0.8782916608680862, "grad_norm": 0.11235344409942627, "learning_rate": 0.002, "loss": 2.3408, "step": 227200 }, { "epoch": 0.8783303180714694, "grad_norm": 0.13639125227928162, "learning_rate": 0.002, "loss": 2.3347, "step": 227210 }, { "epoch": 0.8783689752748527, "grad_norm": 0.11406679451465607, "learning_rate": 0.002, "loss": 2.3357, "step": 227220 }, { "epoch": 0.878407632478236, "grad_norm": 0.12473028898239136, "learning_rate": 0.002, "loss": 2.3403, "step": 227230 }, { "epoch": 0.8784462896816193, "grad_norm": 0.09570764005184174, "learning_rate": 0.002, "loss": 2.328, "step": 227240 }, { "epoch": 0.8784849468850026, "grad_norm": 0.0939561128616333, "learning_rate": 0.002, "loss": 2.3128, "step": 227250 }, { "epoch": 0.8785236040883858, "grad_norm": 0.12944932281970978, "learning_rate": 0.002, "loss": 2.3421, "step": 227260 }, { "epoch": 0.8785622612917691, "grad_norm": 0.1073397770524025, "learning_rate": 0.002, "loss": 2.3393, "step": 227270 }, { "epoch": 0.8786009184951524, "grad_norm": 0.09914367645978928, "learning_rate": 0.002, "loss": 2.3285, "step": 227280 }, { "epoch": 0.8786395756985357, "grad_norm": 0.1429779827594757, "learning_rate": 0.002, "loss": 2.3506, "step": 227290 }, { "epoch": 0.8786782329019189, "grad_norm": 0.09743501991033554, "learning_rate": 0.002, "loss": 2.3491, "step": 227300 }, { "epoch": 0.8787168901053022, "grad_norm": 0.10444758087396622, "learning_rate": 0.002, "loss": 2.3395, "step": 227310 }, { "epoch": 0.8787555473086855, "grad_norm": 0.10072892159223557, "learning_rate": 0.002, "loss": 2.331, "step": 227320 }, { "epoch": 0.8787942045120688, "grad_norm": 0.11068283766508102, "learning_rate": 0.002, "loss": 2.3326, "step": 227330 }, { "epoch": 0.878832861715452, "grad_norm": 0.11059466749429703, "learning_rate": 0.002, "loss": 2.3532, "step": 227340 }, { "epoch": 0.8788715189188353, "grad_norm": 0.1134902760386467, "learning_rate": 0.002, "loss": 2.3355, "step": 227350 }, { "epoch": 0.8789101761222187, "grad_norm": 0.11368384957313538, "learning_rate": 0.002, "loss": 2.331, "step": 227360 }, { "epoch": 0.8789488333256019, "grad_norm": 0.10789523273706436, "learning_rate": 0.002, "loss": 2.3453, "step": 227370 }, { "epoch": 0.8789874905289852, "grad_norm": 0.11604959517717361, "learning_rate": 0.002, "loss": 2.3304, "step": 227380 }, { "epoch": 0.8790261477323684, "grad_norm": 0.09539544582366943, "learning_rate": 0.002, "loss": 2.3423, "step": 227390 }, { "epoch": 0.8790648049357517, "grad_norm": 0.10180415958166122, "learning_rate": 0.002, "loss": 2.3541, "step": 227400 }, { "epoch": 0.879103462139135, "grad_norm": 0.10947983711957932, "learning_rate": 0.002, "loss": 2.3173, "step": 227410 }, { "epoch": 0.8791421193425183, "grad_norm": 0.10848617553710938, "learning_rate": 0.002, "loss": 2.329, "step": 227420 }, { "epoch": 0.8791807765459015, "grad_norm": 0.12041208893060684, "learning_rate": 0.002, "loss": 2.3478, "step": 227430 }, { "epoch": 0.8792194337492848, "grad_norm": 0.09465758502483368, "learning_rate": 0.002, "loss": 2.3385, "step": 227440 }, { "epoch": 0.8792580909526682, "grad_norm": 0.14059308171272278, "learning_rate": 0.002, "loss": 2.3306, "step": 227450 }, { "epoch": 0.8792967481560514, "grad_norm": 0.10597053915262222, "learning_rate": 0.002, "loss": 2.3473, "step": 227460 }, { "epoch": 0.8793354053594347, "grad_norm": 0.10933779925107956, "learning_rate": 0.002, "loss": 2.3487, "step": 227470 }, { "epoch": 0.8793740625628179, "grad_norm": 0.09205158799886703, "learning_rate": 0.002, "loss": 2.3326, "step": 227480 }, { "epoch": 0.8794127197662013, "grad_norm": 0.10398532450199127, "learning_rate": 0.002, "loss": 2.332, "step": 227490 }, { "epoch": 0.8794513769695845, "grad_norm": 0.10922596603631973, "learning_rate": 0.002, "loss": 2.3366, "step": 227500 }, { "epoch": 0.8794900341729678, "grad_norm": 0.11087905615568161, "learning_rate": 0.002, "loss": 2.3501, "step": 227510 }, { "epoch": 0.879528691376351, "grad_norm": 0.11130758374929428, "learning_rate": 0.002, "loss": 2.3526, "step": 227520 }, { "epoch": 0.8795673485797344, "grad_norm": 0.09835169464349747, "learning_rate": 0.002, "loss": 2.3372, "step": 227530 }, { "epoch": 0.8796060057831177, "grad_norm": 0.11270011216402054, "learning_rate": 0.002, "loss": 2.3497, "step": 227540 }, { "epoch": 0.8796446629865009, "grad_norm": 0.08977176249027252, "learning_rate": 0.002, "loss": 2.3347, "step": 227550 }, { "epoch": 0.8796833201898842, "grad_norm": 0.10379301011562347, "learning_rate": 0.002, "loss": 2.3245, "step": 227560 }, { "epoch": 0.8797219773932674, "grad_norm": 0.10165230184793472, "learning_rate": 0.002, "loss": 2.3289, "step": 227570 }, { "epoch": 0.8797606345966508, "grad_norm": 0.10444454848766327, "learning_rate": 0.002, "loss": 2.3464, "step": 227580 }, { "epoch": 0.879799291800034, "grad_norm": 0.10805295407772064, "learning_rate": 0.002, "loss": 2.341, "step": 227590 }, { "epoch": 0.8798379490034173, "grad_norm": 0.0940571278333664, "learning_rate": 0.002, "loss": 2.3423, "step": 227600 }, { "epoch": 0.8798766062068005, "grad_norm": 0.10650225728750229, "learning_rate": 0.002, "loss": 2.3511, "step": 227610 }, { "epoch": 0.8799152634101839, "grad_norm": 0.09061005711555481, "learning_rate": 0.002, "loss": 2.3507, "step": 227620 }, { "epoch": 0.8799539206135671, "grad_norm": 0.11038070172071457, "learning_rate": 0.002, "loss": 2.327, "step": 227630 }, { "epoch": 0.8799925778169504, "grad_norm": 0.11455640941858292, "learning_rate": 0.002, "loss": 2.3387, "step": 227640 }, { "epoch": 0.8800312350203336, "grad_norm": 0.10514939576387405, "learning_rate": 0.002, "loss": 2.335, "step": 227650 }, { "epoch": 0.880069892223717, "grad_norm": 0.10529999434947968, "learning_rate": 0.002, "loss": 2.3384, "step": 227660 }, { "epoch": 0.8801085494271003, "grad_norm": 0.10474987328052521, "learning_rate": 0.002, "loss": 2.345, "step": 227670 }, { "epoch": 0.8801472066304835, "grad_norm": 0.12214624136686325, "learning_rate": 0.002, "loss": 2.3399, "step": 227680 }, { "epoch": 0.8801858638338668, "grad_norm": 0.11123227328062057, "learning_rate": 0.002, "loss": 2.3498, "step": 227690 }, { "epoch": 0.8802245210372501, "grad_norm": 0.09702645987272263, "learning_rate": 0.002, "loss": 2.3374, "step": 227700 }, { "epoch": 0.8802631782406334, "grad_norm": 0.10156147181987762, "learning_rate": 0.002, "loss": 2.3302, "step": 227710 }, { "epoch": 0.8803018354440166, "grad_norm": 0.10677109658718109, "learning_rate": 0.002, "loss": 2.3365, "step": 227720 }, { "epoch": 0.8803404926473999, "grad_norm": 0.09380175173282623, "learning_rate": 0.002, "loss": 2.3329, "step": 227730 }, { "epoch": 0.8803791498507832, "grad_norm": 0.0992397591471672, "learning_rate": 0.002, "loss": 2.333, "step": 227740 }, { "epoch": 0.8804178070541665, "grad_norm": 0.10190727561712265, "learning_rate": 0.002, "loss": 2.3417, "step": 227750 }, { "epoch": 0.8804564642575498, "grad_norm": 0.11910542100667953, "learning_rate": 0.002, "loss": 2.3518, "step": 227760 }, { "epoch": 0.880495121460933, "grad_norm": 0.1118072047829628, "learning_rate": 0.002, "loss": 2.3307, "step": 227770 }, { "epoch": 0.8805337786643163, "grad_norm": 0.10263451933860779, "learning_rate": 0.002, "loss": 2.3309, "step": 227780 }, { "epoch": 0.8805724358676996, "grad_norm": 0.09938156604766846, "learning_rate": 0.002, "loss": 2.3285, "step": 227790 }, { "epoch": 0.8806110930710829, "grad_norm": 0.11796408146619797, "learning_rate": 0.002, "loss": 2.3321, "step": 227800 }, { "epoch": 0.8806497502744661, "grad_norm": 0.08841456472873688, "learning_rate": 0.002, "loss": 2.3549, "step": 227810 }, { "epoch": 0.8806884074778494, "grad_norm": 0.11047626286745071, "learning_rate": 0.002, "loss": 2.35, "step": 227820 }, { "epoch": 0.8807270646812327, "grad_norm": 0.10162214189767838, "learning_rate": 0.002, "loss": 2.3362, "step": 227830 }, { "epoch": 0.880765721884616, "grad_norm": 0.11247002333402634, "learning_rate": 0.002, "loss": 2.328, "step": 227840 }, { "epoch": 0.8808043790879992, "grad_norm": 0.09718988835811615, "learning_rate": 0.002, "loss": 2.3434, "step": 227850 }, { "epoch": 0.8808430362913825, "grad_norm": 0.13489054143428802, "learning_rate": 0.002, "loss": 2.338, "step": 227860 }, { "epoch": 0.8808816934947659, "grad_norm": 0.1064859926700592, "learning_rate": 0.002, "loss": 2.336, "step": 227870 }, { "epoch": 0.8809203506981491, "grad_norm": 0.10662670433521271, "learning_rate": 0.002, "loss": 2.3375, "step": 227880 }, { "epoch": 0.8809590079015324, "grad_norm": 0.10369833558797836, "learning_rate": 0.002, "loss": 2.3479, "step": 227890 }, { "epoch": 0.8809976651049156, "grad_norm": 0.11877351999282837, "learning_rate": 0.002, "loss": 2.3482, "step": 227900 }, { "epoch": 0.881036322308299, "grad_norm": 0.09558946639299393, "learning_rate": 0.002, "loss": 2.3677, "step": 227910 }, { "epoch": 0.8810749795116822, "grad_norm": 0.09941543638706207, "learning_rate": 0.002, "loss": 2.3373, "step": 227920 }, { "epoch": 0.8811136367150655, "grad_norm": 0.10346577316522598, "learning_rate": 0.002, "loss": 2.3291, "step": 227930 }, { "epoch": 0.8811522939184487, "grad_norm": 0.09705489128828049, "learning_rate": 0.002, "loss": 2.3291, "step": 227940 }, { "epoch": 0.881190951121832, "grad_norm": 0.107711561024189, "learning_rate": 0.002, "loss": 2.3279, "step": 227950 }, { "epoch": 0.8812296083252154, "grad_norm": 0.10006747394800186, "learning_rate": 0.002, "loss": 2.355, "step": 227960 }, { "epoch": 0.8812682655285986, "grad_norm": 0.10859206318855286, "learning_rate": 0.002, "loss": 2.3465, "step": 227970 }, { "epoch": 0.8813069227319819, "grad_norm": 0.09800279885530472, "learning_rate": 0.002, "loss": 2.3204, "step": 227980 }, { "epoch": 0.8813455799353651, "grad_norm": 0.10673708468675613, "learning_rate": 0.002, "loss": 2.3434, "step": 227990 }, { "epoch": 0.8813842371387485, "grad_norm": 0.10194409638643265, "learning_rate": 0.002, "loss": 2.3436, "step": 228000 }, { "epoch": 0.8814228943421317, "grad_norm": 0.08737658709287643, "learning_rate": 0.002, "loss": 2.3512, "step": 228010 }, { "epoch": 0.881461551545515, "grad_norm": 0.0974418893456459, "learning_rate": 0.002, "loss": 2.3308, "step": 228020 }, { "epoch": 0.8815002087488982, "grad_norm": 0.1162291020154953, "learning_rate": 0.002, "loss": 2.3397, "step": 228030 }, { "epoch": 0.8815388659522816, "grad_norm": 0.11426295340061188, "learning_rate": 0.002, "loss": 2.3367, "step": 228040 }, { "epoch": 0.8815775231556648, "grad_norm": 0.09751128405332565, "learning_rate": 0.002, "loss": 2.3253, "step": 228050 }, { "epoch": 0.8816161803590481, "grad_norm": 0.1150236502289772, "learning_rate": 0.002, "loss": 2.3529, "step": 228060 }, { "epoch": 0.8816548375624313, "grad_norm": 0.10207517445087433, "learning_rate": 0.002, "loss": 2.3343, "step": 228070 }, { "epoch": 0.8816934947658147, "grad_norm": 0.09584974497556686, "learning_rate": 0.002, "loss": 2.3404, "step": 228080 }, { "epoch": 0.881732151969198, "grad_norm": 0.08666618168354034, "learning_rate": 0.002, "loss": 2.3258, "step": 228090 }, { "epoch": 0.8817708091725812, "grad_norm": 0.11309351027011871, "learning_rate": 0.002, "loss": 2.3302, "step": 228100 }, { "epoch": 0.8818094663759645, "grad_norm": 0.11253513395786285, "learning_rate": 0.002, "loss": 2.3335, "step": 228110 }, { "epoch": 0.8818481235793477, "grad_norm": 0.12086072564125061, "learning_rate": 0.002, "loss": 2.3457, "step": 228120 }, { "epoch": 0.8818867807827311, "grad_norm": 0.12018612772226334, "learning_rate": 0.002, "loss": 2.3572, "step": 228130 }, { "epoch": 0.8819254379861143, "grad_norm": 0.09976814687252045, "learning_rate": 0.002, "loss": 2.3399, "step": 228140 }, { "epoch": 0.8819640951894976, "grad_norm": 0.10137394070625305, "learning_rate": 0.002, "loss": 2.3454, "step": 228150 }, { "epoch": 0.8820027523928808, "grad_norm": 0.1139528751373291, "learning_rate": 0.002, "loss": 2.3417, "step": 228160 }, { "epoch": 0.8820414095962642, "grad_norm": 0.09931164979934692, "learning_rate": 0.002, "loss": 2.3396, "step": 228170 }, { "epoch": 0.8820800667996475, "grad_norm": 0.12239488959312439, "learning_rate": 0.002, "loss": 2.3491, "step": 228180 }, { "epoch": 0.8821187240030307, "grad_norm": 0.10625220090150833, "learning_rate": 0.002, "loss": 2.3539, "step": 228190 }, { "epoch": 0.882157381206414, "grad_norm": 0.09328392893075943, "learning_rate": 0.002, "loss": 2.3412, "step": 228200 }, { "epoch": 0.8821960384097973, "grad_norm": 0.11561544239521027, "learning_rate": 0.002, "loss": 2.3394, "step": 228210 }, { "epoch": 0.8822346956131806, "grad_norm": 0.1072704866528511, "learning_rate": 0.002, "loss": 2.3538, "step": 228220 }, { "epoch": 0.8822733528165638, "grad_norm": 0.11116018146276474, "learning_rate": 0.002, "loss": 2.3347, "step": 228230 }, { "epoch": 0.8823120100199471, "grad_norm": 0.10479837656021118, "learning_rate": 0.002, "loss": 2.3455, "step": 228240 }, { "epoch": 0.8823506672233304, "grad_norm": 0.1300334632396698, "learning_rate": 0.002, "loss": 2.3238, "step": 228250 }, { "epoch": 0.8823893244267137, "grad_norm": 0.12884414196014404, "learning_rate": 0.002, "loss": 2.3357, "step": 228260 }, { "epoch": 0.882427981630097, "grad_norm": 0.11360549926757812, "learning_rate": 0.002, "loss": 2.3535, "step": 228270 }, { "epoch": 0.8824666388334802, "grad_norm": 0.09666585922241211, "learning_rate": 0.002, "loss": 2.3517, "step": 228280 }, { "epoch": 0.8825052960368636, "grad_norm": 0.0928926020860672, "learning_rate": 0.002, "loss": 2.3478, "step": 228290 }, { "epoch": 0.8825439532402468, "grad_norm": 0.10815281420946121, "learning_rate": 0.002, "loss": 2.3396, "step": 228300 }, { "epoch": 0.8825826104436301, "grad_norm": 0.1050979271531105, "learning_rate": 0.002, "loss": 2.348, "step": 228310 }, { "epoch": 0.8826212676470133, "grad_norm": 0.08897456526756287, "learning_rate": 0.002, "loss": 2.3308, "step": 228320 }, { "epoch": 0.8826599248503966, "grad_norm": 0.12232557684183121, "learning_rate": 0.002, "loss": 2.3368, "step": 228330 }, { "epoch": 0.8826985820537799, "grad_norm": 0.0979345440864563, "learning_rate": 0.002, "loss": 2.3327, "step": 228340 }, { "epoch": 0.8827372392571632, "grad_norm": 0.10978517681360245, "learning_rate": 0.002, "loss": 2.3264, "step": 228350 }, { "epoch": 0.8827758964605464, "grad_norm": 0.10496382415294647, "learning_rate": 0.002, "loss": 2.3397, "step": 228360 }, { "epoch": 0.8828145536639297, "grad_norm": 0.09739359468221664, "learning_rate": 0.002, "loss": 2.3405, "step": 228370 }, { "epoch": 0.882853210867313, "grad_norm": 0.10366075485944748, "learning_rate": 0.002, "loss": 2.3298, "step": 228380 }, { "epoch": 0.8828918680706963, "grad_norm": 0.0847225934267044, "learning_rate": 0.002, "loss": 2.3418, "step": 228390 }, { "epoch": 0.8829305252740796, "grad_norm": 0.10986627638339996, "learning_rate": 0.002, "loss": 2.3415, "step": 228400 }, { "epoch": 0.8829691824774628, "grad_norm": 0.1198427677154541, "learning_rate": 0.002, "loss": 2.333, "step": 228410 }, { "epoch": 0.8830078396808462, "grad_norm": 0.11189264059066772, "learning_rate": 0.002, "loss": 2.3259, "step": 228420 }, { "epoch": 0.8830464968842294, "grad_norm": 0.11902923882007599, "learning_rate": 0.002, "loss": 2.341, "step": 228430 }, { "epoch": 0.8830851540876127, "grad_norm": 0.09806467592716217, "learning_rate": 0.002, "loss": 2.3415, "step": 228440 }, { "epoch": 0.8831238112909959, "grad_norm": 0.12040401995182037, "learning_rate": 0.002, "loss": 2.3431, "step": 228450 }, { "epoch": 0.8831624684943793, "grad_norm": 0.11240734905004501, "learning_rate": 0.002, "loss": 2.3241, "step": 228460 }, { "epoch": 0.8832011256977625, "grad_norm": 0.13707761466503143, "learning_rate": 0.002, "loss": 2.3541, "step": 228470 }, { "epoch": 0.8832397829011458, "grad_norm": 0.10513965040445328, "learning_rate": 0.002, "loss": 2.3541, "step": 228480 }, { "epoch": 0.883278440104529, "grad_norm": 0.11313371360301971, "learning_rate": 0.002, "loss": 2.3494, "step": 228490 }, { "epoch": 0.8833170973079123, "grad_norm": 0.09669902175664902, "learning_rate": 0.002, "loss": 2.3281, "step": 228500 }, { "epoch": 0.8833557545112957, "grad_norm": 0.10225246101617813, "learning_rate": 0.002, "loss": 2.3398, "step": 228510 }, { "epoch": 0.8833944117146789, "grad_norm": 0.09568794071674347, "learning_rate": 0.002, "loss": 2.3251, "step": 228520 }, { "epoch": 0.8834330689180622, "grad_norm": 0.12730160355567932, "learning_rate": 0.002, "loss": 2.3423, "step": 228530 }, { "epoch": 0.8834717261214454, "grad_norm": 0.11025600880384445, "learning_rate": 0.002, "loss": 2.3456, "step": 228540 }, { "epoch": 0.8835103833248288, "grad_norm": 0.09465350210666656, "learning_rate": 0.002, "loss": 2.3432, "step": 228550 }, { "epoch": 0.883549040528212, "grad_norm": 0.09230029582977295, "learning_rate": 0.002, "loss": 2.3224, "step": 228560 }, { "epoch": 0.8835876977315953, "grad_norm": 0.10580102354288101, "learning_rate": 0.002, "loss": 2.3408, "step": 228570 }, { "epoch": 0.8836263549349785, "grad_norm": 0.1314316838979721, "learning_rate": 0.002, "loss": 2.3513, "step": 228580 }, { "epoch": 0.8836650121383619, "grad_norm": 0.11994590610265732, "learning_rate": 0.002, "loss": 2.3502, "step": 228590 }, { "epoch": 0.8837036693417452, "grad_norm": 0.11196423321962357, "learning_rate": 0.002, "loss": 2.359, "step": 228600 }, { "epoch": 0.8837423265451284, "grad_norm": 0.09601790457963943, "learning_rate": 0.002, "loss": 2.3307, "step": 228610 }, { "epoch": 0.8837809837485117, "grad_norm": 0.08654838055372238, "learning_rate": 0.002, "loss": 2.3335, "step": 228620 }, { "epoch": 0.883819640951895, "grad_norm": 0.10150102525949478, "learning_rate": 0.002, "loss": 2.3311, "step": 228630 }, { "epoch": 0.8838582981552783, "grad_norm": 0.10870946198701859, "learning_rate": 0.002, "loss": 2.357, "step": 228640 }, { "epoch": 0.8838969553586615, "grad_norm": 0.11671384423971176, "learning_rate": 0.002, "loss": 2.3291, "step": 228650 }, { "epoch": 0.8839356125620448, "grad_norm": 0.0986432358622551, "learning_rate": 0.002, "loss": 2.3406, "step": 228660 }, { "epoch": 0.8839742697654281, "grad_norm": 0.11731009930372238, "learning_rate": 0.002, "loss": 2.3492, "step": 228670 }, { "epoch": 0.8840129269688114, "grad_norm": 0.09436880052089691, "learning_rate": 0.002, "loss": 2.3543, "step": 228680 }, { "epoch": 0.8840515841721946, "grad_norm": 0.19049400091171265, "learning_rate": 0.002, "loss": 2.3311, "step": 228690 }, { "epoch": 0.8840902413755779, "grad_norm": 0.11192015558481216, "learning_rate": 0.002, "loss": 2.3411, "step": 228700 }, { "epoch": 0.8841288985789612, "grad_norm": 0.09574125707149506, "learning_rate": 0.002, "loss": 2.3627, "step": 228710 }, { "epoch": 0.8841675557823445, "grad_norm": 0.1215815544128418, "learning_rate": 0.002, "loss": 2.3534, "step": 228720 }, { "epoch": 0.8842062129857278, "grad_norm": 0.11466271430253983, "learning_rate": 0.002, "loss": 2.3306, "step": 228730 }, { "epoch": 0.884244870189111, "grad_norm": 0.11675561219453812, "learning_rate": 0.002, "loss": 2.349, "step": 228740 }, { "epoch": 0.8842835273924943, "grad_norm": 0.11158723384141922, "learning_rate": 0.002, "loss": 2.3431, "step": 228750 }, { "epoch": 0.8843221845958776, "grad_norm": 0.10511514544487, "learning_rate": 0.002, "loss": 2.3461, "step": 228760 }, { "epoch": 0.8843608417992609, "grad_norm": 0.11220479011535645, "learning_rate": 0.002, "loss": 2.3399, "step": 228770 }, { "epoch": 0.8843994990026441, "grad_norm": 0.0958787351846695, "learning_rate": 0.002, "loss": 2.3558, "step": 228780 }, { "epoch": 0.8844381562060274, "grad_norm": 0.11027432233095169, "learning_rate": 0.002, "loss": 2.3556, "step": 228790 }, { "epoch": 0.8844768134094108, "grad_norm": 0.09384578466415405, "learning_rate": 0.002, "loss": 2.3294, "step": 228800 }, { "epoch": 0.884515470612794, "grad_norm": 0.11357222497463226, "learning_rate": 0.002, "loss": 2.3343, "step": 228810 }, { "epoch": 0.8845541278161773, "grad_norm": 0.10767655819654465, "learning_rate": 0.002, "loss": 2.3547, "step": 228820 }, { "epoch": 0.8845927850195605, "grad_norm": 0.10896115750074387, "learning_rate": 0.002, "loss": 2.3553, "step": 228830 }, { "epoch": 0.8846314422229439, "grad_norm": 0.12012704461812973, "learning_rate": 0.002, "loss": 2.3212, "step": 228840 }, { "epoch": 0.8846700994263271, "grad_norm": 0.09226809442043304, "learning_rate": 0.002, "loss": 2.3369, "step": 228850 }, { "epoch": 0.8847087566297104, "grad_norm": 0.10555747151374817, "learning_rate": 0.002, "loss": 2.3297, "step": 228860 }, { "epoch": 0.8847474138330936, "grad_norm": 0.09659525752067566, "learning_rate": 0.002, "loss": 2.3437, "step": 228870 }, { "epoch": 0.8847860710364769, "grad_norm": 0.11739001423120499, "learning_rate": 0.002, "loss": 2.3476, "step": 228880 }, { "epoch": 0.8848247282398602, "grad_norm": 0.10839199274778366, "learning_rate": 0.002, "loss": 2.3263, "step": 228890 }, { "epoch": 0.8848633854432435, "grad_norm": 0.09374792128801346, "learning_rate": 0.002, "loss": 2.3339, "step": 228900 }, { "epoch": 0.8849020426466268, "grad_norm": 0.10925695300102234, "learning_rate": 0.002, "loss": 2.3325, "step": 228910 }, { "epoch": 0.88494069985001, "grad_norm": 0.10080505162477493, "learning_rate": 0.002, "loss": 2.3482, "step": 228920 }, { "epoch": 0.8849793570533934, "grad_norm": 0.10030867159366608, "learning_rate": 0.002, "loss": 2.3388, "step": 228930 }, { "epoch": 0.8850180142567766, "grad_norm": 0.11349781602621078, "learning_rate": 0.002, "loss": 2.3483, "step": 228940 }, { "epoch": 0.8850566714601599, "grad_norm": 0.11237026751041412, "learning_rate": 0.002, "loss": 2.3341, "step": 228950 }, { "epoch": 0.8850953286635431, "grad_norm": 0.112160824239254, "learning_rate": 0.002, "loss": 2.3586, "step": 228960 }, { "epoch": 0.8851339858669265, "grad_norm": 0.09719818085432053, "learning_rate": 0.002, "loss": 2.357, "step": 228970 }, { "epoch": 0.8851726430703097, "grad_norm": 0.07541675865650177, "learning_rate": 0.002, "loss": 2.325, "step": 228980 }, { "epoch": 0.885211300273693, "grad_norm": 0.11679107695817947, "learning_rate": 0.002, "loss": 2.348, "step": 228990 }, { "epoch": 0.8852499574770762, "grad_norm": 0.10213762521743774, "learning_rate": 0.002, "loss": 2.3387, "step": 229000 }, { "epoch": 0.8852886146804596, "grad_norm": 0.10680190473794937, "learning_rate": 0.002, "loss": 2.3209, "step": 229010 }, { "epoch": 0.8853272718838429, "grad_norm": 0.10340744256973267, "learning_rate": 0.002, "loss": 2.3448, "step": 229020 }, { "epoch": 0.8853659290872261, "grad_norm": 0.10625316947698593, "learning_rate": 0.002, "loss": 2.3301, "step": 229030 }, { "epoch": 0.8854045862906094, "grad_norm": 0.11096010357141495, "learning_rate": 0.002, "loss": 2.3306, "step": 229040 }, { "epoch": 0.8854432434939926, "grad_norm": 0.10733357816934586, "learning_rate": 0.002, "loss": 2.3422, "step": 229050 }, { "epoch": 0.885481900697376, "grad_norm": 0.10386716574430466, "learning_rate": 0.002, "loss": 2.341, "step": 229060 }, { "epoch": 0.8855205579007592, "grad_norm": 0.08777379244565964, "learning_rate": 0.002, "loss": 2.3381, "step": 229070 }, { "epoch": 0.8855592151041425, "grad_norm": 0.12602606415748596, "learning_rate": 0.002, "loss": 2.3348, "step": 229080 }, { "epoch": 0.8855978723075257, "grad_norm": 0.12142560631036758, "learning_rate": 0.002, "loss": 2.3255, "step": 229090 }, { "epoch": 0.8856365295109091, "grad_norm": 0.10715343803167343, "learning_rate": 0.002, "loss": 2.3344, "step": 229100 }, { "epoch": 0.8856751867142924, "grad_norm": 0.11046537011861801, "learning_rate": 0.002, "loss": 2.351, "step": 229110 }, { "epoch": 0.8857138439176756, "grad_norm": 0.12889137864112854, "learning_rate": 0.002, "loss": 2.3458, "step": 229120 }, { "epoch": 0.8857525011210589, "grad_norm": 0.09733462333679199, "learning_rate": 0.002, "loss": 2.3213, "step": 229130 }, { "epoch": 0.8857911583244422, "grad_norm": 0.10241176933050156, "learning_rate": 0.002, "loss": 2.3454, "step": 229140 }, { "epoch": 0.8858298155278255, "grad_norm": 0.10478372871875763, "learning_rate": 0.002, "loss": 2.3363, "step": 229150 }, { "epoch": 0.8858684727312087, "grad_norm": 0.1303471326828003, "learning_rate": 0.002, "loss": 2.3355, "step": 229160 }, { "epoch": 0.885907129934592, "grad_norm": 0.12433631718158722, "learning_rate": 0.002, "loss": 2.3353, "step": 229170 }, { "epoch": 0.8859457871379753, "grad_norm": 0.10237149894237518, "learning_rate": 0.002, "loss": 2.3399, "step": 229180 }, { "epoch": 0.8859844443413586, "grad_norm": 0.10052412748336792, "learning_rate": 0.002, "loss": 2.3475, "step": 229190 }, { "epoch": 0.8860231015447418, "grad_norm": 0.09713948518037796, "learning_rate": 0.002, "loss": 2.3307, "step": 229200 }, { "epoch": 0.8860617587481251, "grad_norm": 0.1421917825937271, "learning_rate": 0.002, "loss": 2.3473, "step": 229210 }, { "epoch": 0.8861004159515085, "grad_norm": 0.12245440483093262, "learning_rate": 0.002, "loss": 2.3395, "step": 229220 }, { "epoch": 0.8861390731548917, "grad_norm": 0.1105659231543541, "learning_rate": 0.002, "loss": 2.3446, "step": 229230 }, { "epoch": 0.886177730358275, "grad_norm": 0.11617681384086609, "learning_rate": 0.002, "loss": 2.3104, "step": 229240 }, { "epoch": 0.8862163875616582, "grad_norm": 0.10943305492401123, "learning_rate": 0.002, "loss": 2.3384, "step": 229250 }, { "epoch": 0.8862550447650415, "grad_norm": 0.10044894367456436, "learning_rate": 0.002, "loss": 2.3512, "step": 229260 }, { "epoch": 0.8862937019684248, "grad_norm": 0.10492462664842606, "learning_rate": 0.002, "loss": 2.336, "step": 229270 }, { "epoch": 0.8863323591718081, "grad_norm": 0.10984104126691818, "learning_rate": 0.002, "loss": 2.3421, "step": 229280 }, { "epoch": 0.8863710163751913, "grad_norm": 0.10741350799798965, "learning_rate": 0.002, "loss": 2.3193, "step": 229290 }, { "epoch": 0.8864096735785746, "grad_norm": 0.08082353323698044, "learning_rate": 0.002, "loss": 2.3355, "step": 229300 }, { "epoch": 0.886448330781958, "grad_norm": 0.10525934398174286, "learning_rate": 0.002, "loss": 2.3465, "step": 229310 }, { "epoch": 0.8864869879853412, "grad_norm": 0.10217452049255371, "learning_rate": 0.002, "loss": 2.3391, "step": 229320 }, { "epoch": 0.8865256451887245, "grad_norm": 0.1082843765616417, "learning_rate": 0.002, "loss": 2.335, "step": 229330 }, { "epoch": 0.8865643023921077, "grad_norm": 0.10454316437244415, "learning_rate": 0.002, "loss": 2.3442, "step": 229340 }, { "epoch": 0.8866029595954911, "grad_norm": 0.10445873439311981, "learning_rate": 0.002, "loss": 2.3424, "step": 229350 }, { "epoch": 0.8866416167988743, "grad_norm": 0.0989517867565155, "learning_rate": 0.002, "loss": 2.3337, "step": 229360 }, { "epoch": 0.8866802740022576, "grad_norm": 0.10514253377914429, "learning_rate": 0.002, "loss": 2.3495, "step": 229370 }, { "epoch": 0.8867189312056408, "grad_norm": 0.10348337143659592, "learning_rate": 0.002, "loss": 2.3217, "step": 229380 }, { "epoch": 0.8867575884090242, "grad_norm": 0.0983634814620018, "learning_rate": 0.002, "loss": 2.3457, "step": 229390 }, { "epoch": 0.8867962456124074, "grad_norm": 0.10358452051877975, "learning_rate": 0.002, "loss": 2.3503, "step": 229400 }, { "epoch": 0.8868349028157907, "grad_norm": 0.10264099389314651, "learning_rate": 0.002, "loss": 2.3316, "step": 229410 }, { "epoch": 0.886873560019174, "grad_norm": 0.11348433047533035, "learning_rate": 0.002, "loss": 2.3397, "step": 229420 }, { "epoch": 0.8869122172225572, "grad_norm": 0.09199924767017365, "learning_rate": 0.002, "loss": 2.3305, "step": 229430 }, { "epoch": 0.8869508744259406, "grad_norm": 0.09641484171152115, "learning_rate": 0.002, "loss": 2.3323, "step": 229440 }, { "epoch": 0.8869895316293238, "grad_norm": 0.11211732029914856, "learning_rate": 0.002, "loss": 2.3416, "step": 229450 }, { "epoch": 0.8870281888327071, "grad_norm": 0.1105748862028122, "learning_rate": 0.002, "loss": 2.3474, "step": 229460 }, { "epoch": 0.8870668460360903, "grad_norm": 0.11591217666864395, "learning_rate": 0.002, "loss": 2.3447, "step": 229470 }, { "epoch": 0.8871055032394737, "grad_norm": 0.11064792424440384, "learning_rate": 0.002, "loss": 2.3352, "step": 229480 }, { "epoch": 0.8871441604428569, "grad_norm": 0.09471415728330612, "learning_rate": 0.002, "loss": 2.3231, "step": 229490 }, { "epoch": 0.8871828176462402, "grad_norm": 0.10499259829521179, "learning_rate": 0.002, "loss": 2.3519, "step": 229500 }, { "epoch": 0.8872214748496234, "grad_norm": 0.10962316393852234, "learning_rate": 0.002, "loss": 2.3408, "step": 229510 }, { "epoch": 0.8872601320530068, "grad_norm": 0.09535825252532959, "learning_rate": 0.002, "loss": 2.3386, "step": 229520 }, { "epoch": 0.88729878925639, "grad_norm": 0.09556851536035538, "learning_rate": 0.002, "loss": 2.3433, "step": 229530 }, { "epoch": 0.8873374464597733, "grad_norm": 0.09536069631576538, "learning_rate": 0.002, "loss": 2.3548, "step": 229540 }, { "epoch": 0.8873761036631566, "grad_norm": 0.10342884063720703, "learning_rate": 0.002, "loss": 2.3226, "step": 229550 }, { "epoch": 0.8874147608665399, "grad_norm": 0.0994892343878746, "learning_rate": 0.002, "loss": 2.3395, "step": 229560 }, { "epoch": 0.8874534180699232, "grad_norm": 0.09657128155231476, "learning_rate": 0.002, "loss": 2.326, "step": 229570 }, { "epoch": 0.8874920752733064, "grad_norm": 0.11561530083417892, "learning_rate": 0.002, "loss": 2.3438, "step": 229580 }, { "epoch": 0.8875307324766897, "grad_norm": 0.11136766523122787, "learning_rate": 0.002, "loss": 2.3447, "step": 229590 }, { "epoch": 0.887569389680073, "grad_norm": 0.11479032784700394, "learning_rate": 0.002, "loss": 2.3174, "step": 229600 }, { "epoch": 0.8876080468834563, "grad_norm": 0.10477154701948166, "learning_rate": 0.002, "loss": 2.349, "step": 229610 }, { "epoch": 0.8876467040868395, "grad_norm": 0.09890048950910568, "learning_rate": 0.002, "loss": 2.3287, "step": 229620 }, { "epoch": 0.8876853612902228, "grad_norm": 0.1130228266119957, "learning_rate": 0.002, "loss": 2.3488, "step": 229630 }, { "epoch": 0.887724018493606, "grad_norm": 0.1031917929649353, "learning_rate": 0.002, "loss": 2.332, "step": 229640 }, { "epoch": 0.8877626756969894, "grad_norm": 0.12092357873916626, "learning_rate": 0.002, "loss": 2.3305, "step": 229650 }, { "epoch": 0.8878013329003727, "grad_norm": 0.11220254004001617, "learning_rate": 0.002, "loss": 2.3471, "step": 229660 }, { "epoch": 0.8878399901037559, "grad_norm": 0.10315951704978943, "learning_rate": 0.002, "loss": 2.3245, "step": 229670 }, { "epoch": 0.8878786473071392, "grad_norm": 0.09558948129415512, "learning_rate": 0.002, "loss": 2.3253, "step": 229680 }, { "epoch": 0.8879173045105225, "grad_norm": 0.11425267159938812, "learning_rate": 0.002, "loss": 2.3329, "step": 229690 }, { "epoch": 0.8879559617139058, "grad_norm": 0.0957653596997261, "learning_rate": 0.002, "loss": 2.3432, "step": 229700 }, { "epoch": 0.887994618917289, "grad_norm": 0.09852249175310135, "learning_rate": 0.002, "loss": 2.3567, "step": 229710 }, { "epoch": 0.8880332761206723, "grad_norm": 0.12549278140068054, "learning_rate": 0.002, "loss": 2.3475, "step": 229720 }, { "epoch": 0.8880719333240557, "grad_norm": 0.10537635535001755, "learning_rate": 0.002, "loss": 2.3292, "step": 229730 }, { "epoch": 0.8881105905274389, "grad_norm": 0.10095790773630142, "learning_rate": 0.002, "loss": 2.3342, "step": 229740 }, { "epoch": 0.8881492477308222, "grad_norm": 0.11167895048856735, "learning_rate": 0.002, "loss": 2.3285, "step": 229750 }, { "epoch": 0.8881879049342054, "grad_norm": 0.09808744490146637, "learning_rate": 0.002, "loss": 2.3282, "step": 229760 }, { "epoch": 0.8882265621375888, "grad_norm": 0.10633957386016846, "learning_rate": 0.002, "loss": 2.3567, "step": 229770 }, { "epoch": 0.888265219340972, "grad_norm": 0.1152457445859909, "learning_rate": 0.002, "loss": 2.3424, "step": 229780 }, { "epoch": 0.8883038765443553, "grad_norm": 0.09889443218708038, "learning_rate": 0.002, "loss": 2.3423, "step": 229790 }, { "epoch": 0.8883425337477385, "grad_norm": 0.11010714620351791, "learning_rate": 0.002, "loss": 2.3349, "step": 229800 }, { "epoch": 0.8883811909511218, "grad_norm": 0.15187367796897888, "learning_rate": 0.002, "loss": 2.3248, "step": 229810 }, { "epoch": 0.8884198481545051, "grad_norm": 0.11343226581811905, "learning_rate": 0.002, "loss": 2.3303, "step": 229820 }, { "epoch": 0.8884585053578884, "grad_norm": 0.11588403582572937, "learning_rate": 0.002, "loss": 2.3338, "step": 229830 }, { "epoch": 0.8884971625612716, "grad_norm": 0.10823334753513336, "learning_rate": 0.002, "loss": 2.3342, "step": 229840 }, { "epoch": 0.8885358197646549, "grad_norm": 0.11281239241361618, "learning_rate": 0.002, "loss": 2.3418, "step": 229850 }, { "epoch": 0.8885744769680383, "grad_norm": 0.10507730394601822, "learning_rate": 0.002, "loss": 2.3472, "step": 229860 }, { "epoch": 0.8886131341714215, "grad_norm": 0.09830960631370544, "learning_rate": 0.002, "loss": 2.3584, "step": 229870 }, { "epoch": 0.8886517913748048, "grad_norm": 0.12222305685281754, "learning_rate": 0.002, "loss": 2.3492, "step": 229880 }, { "epoch": 0.888690448578188, "grad_norm": 0.11914224177598953, "learning_rate": 0.002, "loss": 2.3379, "step": 229890 }, { "epoch": 0.8887291057815714, "grad_norm": 0.09565926343202591, "learning_rate": 0.002, "loss": 2.3326, "step": 229900 }, { "epoch": 0.8887677629849546, "grad_norm": 0.10244297236204147, "learning_rate": 0.002, "loss": 2.3476, "step": 229910 }, { "epoch": 0.8888064201883379, "grad_norm": 0.09705287963151932, "learning_rate": 0.002, "loss": 2.3345, "step": 229920 }, { "epoch": 0.8888450773917211, "grad_norm": 0.09104941040277481, "learning_rate": 0.002, "loss": 2.3211, "step": 229930 }, { "epoch": 0.8888837345951045, "grad_norm": 0.10585571080446243, "learning_rate": 0.002, "loss": 2.3332, "step": 229940 }, { "epoch": 0.8889223917984878, "grad_norm": 0.1275555044412613, "learning_rate": 0.002, "loss": 2.3387, "step": 229950 }, { "epoch": 0.888961049001871, "grad_norm": 0.09641186147928238, "learning_rate": 0.002, "loss": 2.3485, "step": 229960 }, { "epoch": 0.8889997062052543, "grad_norm": 0.14952360093593597, "learning_rate": 0.002, "loss": 2.3186, "step": 229970 }, { "epoch": 0.8890383634086375, "grad_norm": 0.10552582889795303, "learning_rate": 0.002, "loss": 2.3401, "step": 229980 }, { "epoch": 0.8890770206120209, "grad_norm": 0.09293745458126068, "learning_rate": 0.002, "loss": 2.3228, "step": 229990 }, { "epoch": 0.8891156778154041, "grad_norm": 0.10250788927078247, "learning_rate": 0.002, "loss": 2.3369, "step": 230000 }, { "epoch": 0.8891543350187874, "grad_norm": 0.12834201753139496, "learning_rate": 0.002, "loss": 2.3249, "step": 230010 }, { "epoch": 0.8891929922221706, "grad_norm": 0.1091959998011589, "learning_rate": 0.002, "loss": 2.3401, "step": 230020 }, { "epoch": 0.889231649425554, "grad_norm": 0.11037223041057587, "learning_rate": 0.002, "loss": 2.3371, "step": 230030 }, { "epoch": 0.8892703066289372, "grad_norm": 0.1013093888759613, "learning_rate": 0.002, "loss": 2.3345, "step": 230040 }, { "epoch": 0.8893089638323205, "grad_norm": 0.10540314763784409, "learning_rate": 0.002, "loss": 2.3301, "step": 230050 }, { "epoch": 0.8893476210357038, "grad_norm": 0.10271942615509033, "learning_rate": 0.002, "loss": 2.3235, "step": 230060 }, { "epoch": 0.8893862782390871, "grad_norm": 0.09896955639123917, "learning_rate": 0.002, "loss": 2.3386, "step": 230070 }, { "epoch": 0.8894249354424704, "grad_norm": 0.11004067212343216, "learning_rate": 0.002, "loss": 2.3261, "step": 230080 }, { "epoch": 0.8894635926458536, "grad_norm": 0.11132332682609558, "learning_rate": 0.002, "loss": 2.3471, "step": 230090 }, { "epoch": 0.8895022498492369, "grad_norm": 0.11204580962657928, "learning_rate": 0.002, "loss": 2.338, "step": 230100 }, { "epoch": 0.8895409070526202, "grad_norm": 0.10116194188594818, "learning_rate": 0.002, "loss": 2.3354, "step": 230110 }, { "epoch": 0.8895795642560035, "grad_norm": 0.09769944846630096, "learning_rate": 0.002, "loss": 2.3415, "step": 230120 }, { "epoch": 0.8896182214593867, "grad_norm": 0.11125995218753815, "learning_rate": 0.002, "loss": 2.3311, "step": 230130 }, { "epoch": 0.88965687866277, "grad_norm": 0.09720432758331299, "learning_rate": 0.002, "loss": 2.3388, "step": 230140 }, { "epoch": 0.8896955358661534, "grad_norm": 0.09110531210899353, "learning_rate": 0.002, "loss": 2.3338, "step": 230150 }, { "epoch": 0.8897341930695366, "grad_norm": 0.11076952517032623, "learning_rate": 0.002, "loss": 2.3328, "step": 230160 }, { "epoch": 0.8897728502729199, "grad_norm": 0.10560379177331924, "learning_rate": 0.002, "loss": 2.3384, "step": 230170 }, { "epoch": 0.8898115074763031, "grad_norm": 0.1136026680469513, "learning_rate": 0.002, "loss": 2.3287, "step": 230180 }, { "epoch": 0.8898501646796864, "grad_norm": 0.11075719445943832, "learning_rate": 0.002, "loss": 2.3334, "step": 230190 }, { "epoch": 0.8898888218830697, "grad_norm": 0.10945750027894974, "learning_rate": 0.002, "loss": 2.3326, "step": 230200 }, { "epoch": 0.889927479086453, "grad_norm": 0.15665782988071442, "learning_rate": 0.002, "loss": 2.3309, "step": 230210 }, { "epoch": 0.8899661362898362, "grad_norm": 0.10520077496767044, "learning_rate": 0.002, "loss": 2.3445, "step": 230220 }, { "epoch": 0.8900047934932195, "grad_norm": 0.09022749960422516, "learning_rate": 0.002, "loss": 2.3374, "step": 230230 }, { "epoch": 0.8900434506966028, "grad_norm": 0.12738844752311707, "learning_rate": 0.002, "loss": 2.3411, "step": 230240 }, { "epoch": 0.8900821078999861, "grad_norm": 0.09396673738956451, "learning_rate": 0.002, "loss": 2.3436, "step": 230250 }, { "epoch": 0.8901207651033693, "grad_norm": 0.1027170792222023, "learning_rate": 0.002, "loss": 2.3308, "step": 230260 }, { "epoch": 0.8901594223067526, "grad_norm": 0.11400040239095688, "learning_rate": 0.002, "loss": 2.3496, "step": 230270 }, { "epoch": 0.890198079510136, "grad_norm": 0.13193011283874512, "learning_rate": 0.002, "loss": 2.3413, "step": 230280 }, { "epoch": 0.8902367367135192, "grad_norm": 0.09875988215208054, "learning_rate": 0.002, "loss": 2.3383, "step": 230290 }, { "epoch": 0.8902753939169025, "grad_norm": 0.11976584792137146, "learning_rate": 0.002, "loss": 2.3333, "step": 230300 }, { "epoch": 0.8903140511202857, "grad_norm": 0.11035315692424774, "learning_rate": 0.002, "loss": 2.3328, "step": 230310 }, { "epoch": 0.8903527083236691, "grad_norm": 0.0940026044845581, "learning_rate": 0.002, "loss": 2.3227, "step": 230320 }, { "epoch": 0.8903913655270523, "grad_norm": 0.10700642317533493, "learning_rate": 0.002, "loss": 2.3399, "step": 230330 }, { "epoch": 0.8904300227304356, "grad_norm": 0.09496472775936127, "learning_rate": 0.002, "loss": 2.3477, "step": 230340 }, { "epoch": 0.8904686799338188, "grad_norm": 0.11273860931396484, "learning_rate": 0.002, "loss": 2.3373, "step": 230350 }, { "epoch": 0.8905073371372021, "grad_norm": 0.09876589477062225, "learning_rate": 0.002, "loss": 2.3302, "step": 230360 }, { "epoch": 0.8905459943405855, "grad_norm": 0.10113933682441711, "learning_rate": 0.002, "loss": 2.3443, "step": 230370 }, { "epoch": 0.8905846515439687, "grad_norm": 0.09982689470052719, "learning_rate": 0.002, "loss": 2.3423, "step": 230380 }, { "epoch": 0.890623308747352, "grad_norm": 0.09933792799711227, "learning_rate": 0.002, "loss": 2.3291, "step": 230390 }, { "epoch": 0.8906619659507352, "grad_norm": 0.10531254857778549, "learning_rate": 0.002, "loss": 2.3346, "step": 230400 }, { "epoch": 0.8907006231541186, "grad_norm": 0.10806956887245178, "learning_rate": 0.002, "loss": 2.3279, "step": 230410 }, { "epoch": 0.8907392803575018, "grad_norm": 0.10214032977819443, "learning_rate": 0.002, "loss": 2.3505, "step": 230420 }, { "epoch": 0.8907779375608851, "grad_norm": 0.09792348742485046, "learning_rate": 0.002, "loss": 2.3372, "step": 230430 }, { "epoch": 0.8908165947642683, "grad_norm": 0.12945565581321716, "learning_rate": 0.002, "loss": 2.3224, "step": 230440 }, { "epoch": 0.8908552519676517, "grad_norm": 0.1092633530497551, "learning_rate": 0.002, "loss": 2.3271, "step": 230450 }, { "epoch": 0.890893909171035, "grad_norm": 0.09912041574716568, "learning_rate": 0.002, "loss": 2.3413, "step": 230460 }, { "epoch": 0.8909325663744182, "grad_norm": 0.10581175237894058, "learning_rate": 0.002, "loss": 2.3528, "step": 230470 }, { "epoch": 0.8909712235778015, "grad_norm": 0.10082611441612244, "learning_rate": 0.002, "loss": 2.337, "step": 230480 }, { "epoch": 0.8910098807811848, "grad_norm": 0.11029192805290222, "learning_rate": 0.002, "loss": 2.3358, "step": 230490 }, { "epoch": 0.8910485379845681, "grad_norm": 0.12048397958278656, "learning_rate": 0.002, "loss": 2.3265, "step": 230500 }, { "epoch": 0.8910871951879513, "grad_norm": 0.11811503767967224, "learning_rate": 0.002, "loss": 2.3274, "step": 230510 }, { "epoch": 0.8911258523913346, "grad_norm": 0.10998043417930603, "learning_rate": 0.002, "loss": 2.3554, "step": 230520 }, { "epoch": 0.8911645095947179, "grad_norm": 0.1065039113163948, "learning_rate": 0.002, "loss": 2.3517, "step": 230530 }, { "epoch": 0.8912031667981012, "grad_norm": 0.11100541800260544, "learning_rate": 0.002, "loss": 2.3446, "step": 230540 }, { "epoch": 0.8912418240014844, "grad_norm": 0.09994493424892426, "learning_rate": 0.002, "loss": 2.3399, "step": 230550 }, { "epoch": 0.8912804812048677, "grad_norm": 0.10134243965148926, "learning_rate": 0.002, "loss": 2.3227, "step": 230560 }, { "epoch": 0.8913191384082509, "grad_norm": 0.11106912046670914, "learning_rate": 0.002, "loss": 2.3581, "step": 230570 }, { "epoch": 0.8913577956116343, "grad_norm": 0.1081485003232956, "learning_rate": 0.002, "loss": 2.3216, "step": 230580 }, { "epoch": 0.8913964528150176, "grad_norm": 0.1305999904870987, "learning_rate": 0.002, "loss": 2.3458, "step": 230590 }, { "epoch": 0.8914351100184008, "grad_norm": 0.09498662501573563, "learning_rate": 0.002, "loss": 2.3455, "step": 230600 }, { "epoch": 0.8914737672217841, "grad_norm": 0.101992167532444, "learning_rate": 0.002, "loss": 2.3377, "step": 230610 }, { "epoch": 0.8915124244251674, "grad_norm": 0.10639800131320953, "learning_rate": 0.002, "loss": 2.3554, "step": 230620 }, { "epoch": 0.8915510816285507, "grad_norm": 0.09898429363965988, "learning_rate": 0.002, "loss": 2.314, "step": 230630 }, { "epoch": 0.8915897388319339, "grad_norm": 0.09582017362117767, "learning_rate": 0.002, "loss": 2.3357, "step": 230640 }, { "epoch": 0.8916283960353172, "grad_norm": 0.10886652022600174, "learning_rate": 0.002, "loss": 2.3343, "step": 230650 }, { "epoch": 0.8916670532387005, "grad_norm": 0.10178403556346893, "learning_rate": 0.002, "loss": 2.3385, "step": 230660 }, { "epoch": 0.8917057104420838, "grad_norm": 0.1168789491057396, "learning_rate": 0.002, "loss": 2.3299, "step": 230670 }, { "epoch": 0.891744367645467, "grad_norm": 0.09374828636646271, "learning_rate": 0.002, "loss": 2.3276, "step": 230680 }, { "epoch": 0.8917830248488503, "grad_norm": 0.10166969150304794, "learning_rate": 0.002, "loss": 2.3299, "step": 230690 }, { "epoch": 0.8918216820522337, "grad_norm": 0.11109510809183121, "learning_rate": 0.002, "loss": 2.3346, "step": 230700 }, { "epoch": 0.8918603392556169, "grad_norm": 0.09495483338832855, "learning_rate": 0.002, "loss": 2.3471, "step": 230710 }, { "epoch": 0.8918989964590002, "grad_norm": 0.10241233557462692, "learning_rate": 0.002, "loss": 2.3331, "step": 230720 }, { "epoch": 0.8919376536623834, "grad_norm": 0.10209019482135773, "learning_rate": 0.002, "loss": 2.3283, "step": 230730 }, { "epoch": 0.8919763108657667, "grad_norm": 0.1326477825641632, "learning_rate": 0.002, "loss": 2.3355, "step": 230740 }, { "epoch": 0.89201496806915, "grad_norm": 0.09626547992229462, "learning_rate": 0.002, "loss": 2.3553, "step": 230750 }, { "epoch": 0.8920536252725333, "grad_norm": 0.10015839338302612, "learning_rate": 0.002, "loss": 2.3231, "step": 230760 }, { "epoch": 0.8920922824759165, "grad_norm": 0.09552304446697235, "learning_rate": 0.002, "loss": 2.3265, "step": 230770 }, { "epoch": 0.8921309396792998, "grad_norm": 0.11490238457918167, "learning_rate": 0.002, "loss": 2.3419, "step": 230780 }, { "epoch": 0.8921695968826832, "grad_norm": 0.12381553649902344, "learning_rate": 0.002, "loss": 2.3371, "step": 230790 }, { "epoch": 0.8922082540860664, "grad_norm": 0.10215188562870026, "learning_rate": 0.002, "loss": 2.3434, "step": 230800 }, { "epoch": 0.8922469112894497, "grad_norm": 0.09393597394227982, "learning_rate": 0.002, "loss": 2.3398, "step": 230810 }, { "epoch": 0.8922855684928329, "grad_norm": 0.10499109327793121, "learning_rate": 0.002, "loss": 2.3409, "step": 230820 }, { "epoch": 0.8923242256962163, "grad_norm": 0.10030298680067062, "learning_rate": 0.002, "loss": 2.3423, "step": 230830 }, { "epoch": 0.8923628828995995, "grad_norm": 0.09288015961647034, "learning_rate": 0.002, "loss": 2.3463, "step": 230840 }, { "epoch": 0.8924015401029828, "grad_norm": 0.09977603703737259, "learning_rate": 0.002, "loss": 2.3232, "step": 230850 }, { "epoch": 0.892440197306366, "grad_norm": 0.09825431555509567, "learning_rate": 0.002, "loss": 2.3425, "step": 230860 }, { "epoch": 0.8924788545097494, "grad_norm": 0.11791310459375381, "learning_rate": 0.002, "loss": 2.3422, "step": 230870 }, { "epoch": 0.8925175117131326, "grad_norm": 0.09519605338573456, "learning_rate": 0.002, "loss": 2.3295, "step": 230880 }, { "epoch": 0.8925561689165159, "grad_norm": 0.10679690539836884, "learning_rate": 0.002, "loss": 2.3376, "step": 230890 }, { "epoch": 0.8925948261198992, "grad_norm": 0.102602519094944, "learning_rate": 0.002, "loss": 2.337, "step": 230900 }, { "epoch": 0.8926334833232824, "grad_norm": 0.11841274797916412, "learning_rate": 0.002, "loss": 2.331, "step": 230910 }, { "epoch": 0.8926721405266658, "grad_norm": 0.114153191447258, "learning_rate": 0.002, "loss": 2.3434, "step": 230920 }, { "epoch": 0.892710797730049, "grad_norm": 0.08805635571479797, "learning_rate": 0.002, "loss": 2.3573, "step": 230930 }, { "epoch": 0.8927494549334323, "grad_norm": 0.12973535060882568, "learning_rate": 0.002, "loss": 2.348, "step": 230940 }, { "epoch": 0.8927881121368155, "grad_norm": 0.1187136098742485, "learning_rate": 0.002, "loss": 2.3239, "step": 230950 }, { "epoch": 0.8928267693401989, "grad_norm": 0.1086641252040863, "learning_rate": 0.002, "loss": 2.3471, "step": 230960 }, { "epoch": 0.8928654265435821, "grad_norm": 0.11849938333034515, "learning_rate": 0.002, "loss": 2.3458, "step": 230970 }, { "epoch": 0.8929040837469654, "grad_norm": 0.11114994436502457, "learning_rate": 0.002, "loss": 2.3321, "step": 230980 }, { "epoch": 0.8929427409503486, "grad_norm": 0.12062875926494598, "learning_rate": 0.002, "loss": 2.3387, "step": 230990 }, { "epoch": 0.892981398153732, "grad_norm": 0.09971782565116882, "learning_rate": 0.002, "loss": 2.3409, "step": 231000 }, { "epoch": 0.8930200553571153, "grad_norm": 0.10399482399225235, "learning_rate": 0.002, "loss": 2.3297, "step": 231010 }, { "epoch": 0.8930587125604985, "grad_norm": 0.0989309698343277, "learning_rate": 0.002, "loss": 2.3333, "step": 231020 }, { "epoch": 0.8930973697638818, "grad_norm": 0.1022104024887085, "learning_rate": 0.002, "loss": 2.3396, "step": 231030 }, { "epoch": 0.8931360269672651, "grad_norm": 0.12379945814609528, "learning_rate": 0.002, "loss": 2.3518, "step": 231040 }, { "epoch": 0.8931746841706484, "grad_norm": 0.11856026947498322, "learning_rate": 0.002, "loss": 2.3594, "step": 231050 }, { "epoch": 0.8932133413740316, "grad_norm": 0.0948251411318779, "learning_rate": 0.002, "loss": 2.3477, "step": 231060 }, { "epoch": 0.8932519985774149, "grad_norm": 0.09422452747821808, "learning_rate": 0.002, "loss": 2.3469, "step": 231070 }, { "epoch": 0.8932906557807982, "grad_norm": 0.10392773896455765, "learning_rate": 0.002, "loss": 2.3212, "step": 231080 }, { "epoch": 0.8933293129841815, "grad_norm": 0.09506283700466156, "learning_rate": 0.002, "loss": 2.3441, "step": 231090 }, { "epoch": 0.8933679701875648, "grad_norm": 0.10650750249624252, "learning_rate": 0.002, "loss": 2.3501, "step": 231100 }, { "epoch": 0.893406627390948, "grad_norm": 0.11290176957845688, "learning_rate": 0.002, "loss": 2.3381, "step": 231110 }, { "epoch": 0.8934452845943313, "grad_norm": 0.1044296845793724, "learning_rate": 0.002, "loss": 2.3459, "step": 231120 }, { "epoch": 0.8934839417977146, "grad_norm": 0.09647175669670105, "learning_rate": 0.002, "loss": 2.3442, "step": 231130 }, { "epoch": 0.8935225990010979, "grad_norm": 0.09704600274562836, "learning_rate": 0.002, "loss": 2.3253, "step": 231140 }, { "epoch": 0.8935612562044811, "grad_norm": 0.14066311717033386, "learning_rate": 0.002, "loss": 2.3313, "step": 231150 }, { "epoch": 0.8935999134078644, "grad_norm": 0.09952470660209656, "learning_rate": 0.002, "loss": 2.3554, "step": 231160 }, { "epoch": 0.8936385706112477, "grad_norm": 0.09438595175743103, "learning_rate": 0.002, "loss": 2.3258, "step": 231170 }, { "epoch": 0.893677227814631, "grad_norm": 0.09615175426006317, "learning_rate": 0.002, "loss": 2.3334, "step": 231180 }, { "epoch": 0.8937158850180142, "grad_norm": 0.38761892914772034, "learning_rate": 0.002, "loss": 2.3308, "step": 231190 }, { "epoch": 0.8937545422213975, "grad_norm": 0.11786754429340363, "learning_rate": 0.002, "loss": 2.3387, "step": 231200 }, { "epoch": 0.8937931994247809, "grad_norm": 0.10287931561470032, "learning_rate": 0.002, "loss": 2.3242, "step": 231210 }, { "epoch": 0.8938318566281641, "grad_norm": 0.10862020403146744, "learning_rate": 0.002, "loss": 2.3372, "step": 231220 }, { "epoch": 0.8938705138315474, "grad_norm": 0.09461180865764618, "learning_rate": 0.002, "loss": 2.3385, "step": 231230 }, { "epoch": 0.8939091710349306, "grad_norm": 0.10775598138570786, "learning_rate": 0.002, "loss": 2.3465, "step": 231240 }, { "epoch": 0.893947828238314, "grad_norm": 0.09789089858531952, "learning_rate": 0.002, "loss": 2.3375, "step": 231250 }, { "epoch": 0.8939864854416972, "grad_norm": 0.10795722156763077, "learning_rate": 0.002, "loss": 2.3366, "step": 231260 }, { "epoch": 0.8940251426450805, "grad_norm": 0.10861648619174957, "learning_rate": 0.002, "loss": 2.3249, "step": 231270 }, { "epoch": 0.8940637998484637, "grad_norm": 0.11098982393741608, "learning_rate": 0.002, "loss": 2.3452, "step": 231280 }, { "epoch": 0.894102457051847, "grad_norm": 0.09414079785346985, "learning_rate": 0.002, "loss": 2.3314, "step": 231290 }, { "epoch": 0.8941411142552304, "grad_norm": 0.11190325766801834, "learning_rate": 0.002, "loss": 2.3474, "step": 231300 }, { "epoch": 0.8941797714586136, "grad_norm": 0.10887134075164795, "learning_rate": 0.002, "loss": 2.3552, "step": 231310 }, { "epoch": 0.8942184286619969, "grad_norm": 0.09470488131046295, "learning_rate": 0.002, "loss": 2.328, "step": 231320 }, { "epoch": 0.8942570858653801, "grad_norm": 0.10217797756195068, "learning_rate": 0.002, "loss": 2.356, "step": 231330 }, { "epoch": 0.8942957430687635, "grad_norm": 0.0979577824473381, "learning_rate": 0.002, "loss": 2.34, "step": 231340 }, { "epoch": 0.8943344002721467, "grad_norm": 0.10248088836669922, "learning_rate": 0.002, "loss": 2.3378, "step": 231350 }, { "epoch": 0.89437305747553, "grad_norm": 0.09941322356462479, "learning_rate": 0.002, "loss": 2.3442, "step": 231360 }, { "epoch": 0.8944117146789132, "grad_norm": 0.12089301645755768, "learning_rate": 0.002, "loss": 2.323, "step": 231370 }, { "epoch": 0.8944503718822966, "grad_norm": 0.1151440218091011, "learning_rate": 0.002, "loss": 2.3297, "step": 231380 }, { "epoch": 0.8944890290856798, "grad_norm": 0.116435207426548, "learning_rate": 0.002, "loss": 2.3216, "step": 231390 }, { "epoch": 0.8945276862890631, "grad_norm": 0.10244743525981903, "learning_rate": 0.002, "loss": 2.3308, "step": 231400 }, { "epoch": 0.8945663434924463, "grad_norm": 0.13515597581863403, "learning_rate": 0.002, "loss": 2.3408, "step": 231410 }, { "epoch": 0.8946050006958297, "grad_norm": 0.09385132789611816, "learning_rate": 0.002, "loss": 2.3445, "step": 231420 }, { "epoch": 0.894643657899213, "grad_norm": 0.10678960382938385, "learning_rate": 0.002, "loss": 2.3256, "step": 231430 }, { "epoch": 0.8946823151025962, "grad_norm": 0.10624562203884125, "learning_rate": 0.002, "loss": 2.3463, "step": 231440 }, { "epoch": 0.8947209723059795, "grad_norm": 0.1100616380572319, "learning_rate": 0.002, "loss": 2.3317, "step": 231450 }, { "epoch": 0.8947596295093627, "grad_norm": 0.0967680811882019, "learning_rate": 0.002, "loss": 2.3438, "step": 231460 }, { "epoch": 0.8947982867127461, "grad_norm": 0.12556961178779602, "learning_rate": 0.002, "loss": 2.3355, "step": 231470 }, { "epoch": 0.8948369439161293, "grad_norm": 0.11757630109786987, "learning_rate": 0.002, "loss": 2.3325, "step": 231480 }, { "epoch": 0.8948756011195126, "grad_norm": 0.09802540391683578, "learning_rate": 0.002, "loss": 2.3479, "step": 231490 }, { "epoch": 0.8949142583228958, "grad_norm": 0.10392263531684875, "learning_rate": 0.002, "loss": 2.3466, "step": 231500 }, { "epoch": 0.8949529155262792, "grad_norm": 0.11355400085449219, "learning_rate": 0.002, "loss": 2.339, "step": 231510 }, { "epoch": 0.8949915727296625, "grad_norm": 0.09654173254966736, "learning_rate": 0.002, "loss": 2.3145, "step": 231520 }, { "epoch": 0.8950302299330457, "grad_norm": 0.1176266297698021, "learning_rate": 0.002, "loss": 2.3359, "step": 231530 }, { "epoch": 0.895068887136429, "grad_norm": 0.10907765477895737, "learning_rate": 0.002, "loss": 2.3316, "step": 231540 }, { "epoch": 0.8951075443398123, "grad_norm": 0.10915853828191757, "learning_rate": 0.002, "loss": 2.3343, "step": 231550 }, { "epoch": 0.8951462015431956, "grad_norm": 0.11131073534488678, "learning_rate": 0.002, "loss": 2.3627, "step": 231560 }, { "epoch": 0.8951848587465788, "grad_norm": 0.11446822434663773, "learning_rate": 0.002, "loss": 2.3451, "step": 231570 }, { "epoch": 0.8952235159499621, "grad_norm": 0.12288888543844223, "learning_rate": 0.002, "loss": 2.3355, "step": 231580 }, { "epoch": 0.8952621731533454, "grad_norm": 0.3278125524520874, "learning_rate": 0.002, "loss": 2.3462, "step": 231590 }, { "epoch": 0.8953008303567287, "grad_norm": 0.11354555934667587, "learning_rate": 0.002, "loss": 2.3499, "step": 231600 }, { "epoch": 0.895339487560112, "grad_norm": 0.10242932289838791, "learning_rate": 0.002, "loss": 2.3485, "step": 231610 }, { "epoch": 0.8953781447634952, "grad_norm": 0.1100035160779953, "learning_rate": 0.002, "loss": 2.341, "step": 231620 }, { "epoch": 0.8954168019668786, "grad_norm": 0.09858231991529465, "learning_rate": 0.002, "loss": 2.3365, "step": 231630 }, { "epoch": 0.8954554591702618, "grad_norm": 0.10620801895856857, "learning_rate": 0.002, "loss": 2.3411, "step": 231640 }, { "epoch": 0.8954941163736451, "grad_norm": 0.12707732617855072, "learning_rate": 0.002, "loss": 2.3325, "step": 231650 }, { "epoch": 0.8955327735770283, "grad_norm": 0.12811803817749023, "learning_rate": 0.002, "loss": 2.3272, "step": 231660 }, { "epoch": 0.8955714307804116, "grad_norm": 0.1011258214712143, "learning_rate": 0.002, "loss": 2.351, "step": 231670 }, { "epoch": 0.8956100879837949, "grad_norm": 0.10836885124444962, "learning_rate": 0.002, "loss": 2.345, "step": 231680 }, { "epoch": 0.8956487451871782, "grad_norm": 0.11384893953800201, "learning_rate": 0.002, "loss": 2.3238, "step": 231690 }, { "epoch": 0.8956874023905614, "grad_norm": 0.10242240130901337, "learning_rate": 0.002, "loss": 2.3405, "step": 231700 }, { "epoch": 0.8957260595939447, "grad_norm": 0.10519468039274216, "learning_rate": 0.002, "loss": 2.3383, "step": 231710 }, { "epoch": 0.895764716797328, "grad_norm": 0.1039244532585144, "learning_rate": 0.002, "loss": 2.3286, "step": 231720 }, { "epoch": 0.8958033740007113, "grad_norm": 0.1218051016330719, "learning_rate": 0.002, "loss": 2.3408, "step": 231730 }, { "epoch": 0.8958420312040946, "grad_norm": 0.13029955327510834, "learning_rate": 0.002, "loss": 2.322, "step": 231740 }, { "epoch": 0.8958806884074778, "grad_norm": 0.10035617649555206, "learning_rate": 0.002, "loss": 2.3288, "step": 231750 }, { "epoch": 0.8959193456108612, "grad_norm": 0.0961938425898552, "learning_rate": 0.002, "loss": 2.3278, "step": 231760 }, { "epoch": 0.8959580028142444, "grad_norm": 0.11479473859071732, "learning_rate": 0.002, "loss": 2.3362, "step": 231770 }, { "epoch": 0.8959966600176277, "grad_norm": 0.11282454431056976, "learning_rate": 0.002, "loss": 2.3432, "step": 231780 }, { "epoch": 0.8960353172210109, "grad_norm": 0.10895568132400513, "learning_rate": 0.002, "loss": 2.3458, "step": 231790 }, { "epoch": 0.8960739744243943, "grad_norm": 0.12822161614894867, "learning_rate": 0.002, "loss": 2.3365, "step": 231800 }, { "epoch": 0.8961126316277775, "grad_norm": 0.11044373363256454, "learning_rate": 0.002, "loss": 2.3422, "step": 231810 }, { "epoch": 0.8961512888311608, "grad_norm": 0.09917205572128296, "learning_rate": 0.002, "loss": 2.3417, "step": 231820 }, { "epoch": 0.896189946034544, "grad_norm": 0.10342488437891006, "learning_rate": 0.002, "loss": 2.3286, "step": 231830 }, { "epoch": 0.8962286032379273, "grad_norm": 0.11677366495132446, "learning_rate": 0.002, "loss": 2.3327, "step": 231840 }, { "epoch": 0.8962672604413107, "grad_norm": 0.10196980088949203, "learning_rate": 0.002, "loss": 2.3159, "step": 231850 }, { "epoch": 0.8963059176446939, "grad_norm": 0.2261200100183487, "learning_rate": 0.002, "loss": 2.3551, "step": 231860 }, { "epoch": 0.8963445748480772, "grad_norm": 0.10797779262065887, "learning_rate": 0.002, "loss": 2.3263, "step": 231870 }, { "epoch": 0.8963832320514604, "grad_norm": 0.1007605567574501, "learning_rate": 0.002, "loss": 2.3463, "step": 231880 }, { "epoch": 0.8964218892548438, "grad_norm": 0.10831963270902634, "learning_rate": 0.002, "loss": 2.3449, "step": 231890 }, { "epoch": 0.896460546458227, "grad_norm": 0.1089211255311966, "learning_rate": 0.002, "loss": 2.3495, "step": 231900 }, { "epoch": 0.8964992036616103, "grad_norm": 0.11141608655452728, "learning_rate": 0.002, "loss": 2.3357, "step": 231910 }, { "epoch": 0.8965378608649935, "grad_norm": 0.11648708581924438, "learning_rate": 0.002, "loss": 2.3468, "step": 231920 }, { "epoch": 0.8965765180683769, "grad_norm": 0.09122157096862793, "learning_rate": 0.002, "loss": 2.3307, "step": 231930 }, { "epoch": 0.8966151752717602, "grad_norm": 0.10319860279560089, "learning_rate": 0.002, "loss": 2.3396, "step": 231940 }, { "epoch": 0.8966538324751434, "grad_norm": 0.1191958412528038, "learning_rate": 0.002, "loss": 2.3288, "step": 231950 }, { "epoch": 0.8966924896785267, "grad_norm": 0.09657797962427139, "learning_rate": 0.002, "loss": 2.3309, "step": 231960 }, { "epoch": 0.89673114688191, "grad_norm": 0.11388564109802246, "learning_rate": 0.002, "loss": 2.3179, "step": 231970 }, { "epoch": 0.8967698040852933, "grad_norm": 0.09544172137975693, "learning_rate": 0.002, "loss": 2.3352, "step": 231980 }, { "epoch": 0.8968084612886765, "grad_norm": 0.11244893074035645, "learning_rate": 0.002, "loss": 2.3278, "step": 231990 }, { "epoch": 0.8968471184920598, "grad_norm": 0.12543711066246033, "learning_rate": 0.002, "loss": 2.3299, "step": 232000 }, { "epoch": 0.8968857756954431, "grad_norm": 0.0969160795211792, "learning_rate": 0.002, "loss": 2.341, "step": 232010 }, { "epoch": 0.8969244328988264, "grad_norm": 0.10747043043375015, "learning_rate": 0.002, "loss": 2.3437, "step": 232020 }, { "epoch": 0.8969630901022096, "grad_norm": 0.10407809168100357, "learning_rate": 0.002, "loss": 2.339, "step": 232030 }, { "epoch": 0.8970017473055929, "grad_norm": 0.1038336455821991, "learning_rate": 0.002, "loss": 2.3413, "step": 232040 }, { "epoch": 0.8970404045089762, "grad_norm": 0.13403278589248657, "learning_rate": 0.002, "loss": 2.3339, "step": 232050 }, { "epoch": 0.8970790617123595, "grad_norm": 0.09164881706237793, "learning_rate": 0.002, "loss": 2.3456, "step": 232060 }, { "epoch": 0.8971177189157428, "grad_norm": 0.1021568551659584, "learning_rate": 0.002, "loss": 2.3438, "step": 232070 }, { "epoch": 0.897156376119126, "grad_norm": 0.10171541571617126, "learning_rate": 0.002, "loss": 2.3321, "step": 232080 }, { "epoch": 0.8971950333225093, "grad_norm": 0.10120661556720734, "learning_rate": 0.002, "loss": 2.3448, "step": 232090 }, { "epoch": 0.8972336905258926, "grad_norm": 0.11354358494281769, "learning_rate": 0.002, "loss": 2.3341, "step": 232100 }, { "epoch": 0.8972723477292759, "grad_norm": 0.11167127639055252, "learning_rate": 0.002, "loss": 2.3286, "step": 232110 }, { "epoch": 0.8973110049326591, "grad_norm": 0.10344775021076202, "learning_rate": 0.002, "loss": 2.3435, "step": 232120 }, { "epoch": 0.8973496621360424, "grad_norm": 0.09466537833213806, "learning_rate": 0.002, "loss": 2.3359, "step": 232130 }, { "epoch": 0.8973883193394258, "grad_norm": 0.11126919835805893, "learning_rate": 0.002, "loss": 2.3512, "step": 232140 }, { "epoch": 0.897426976542809, "grad_norm": 0.1044890433549881, "learning_rate": 0.002, "loss": 2.332, "step": 232150 }, { "epoch": 0.8974656337461923, "grad_norm": 0.10409554839134216, "learning_rate": 0.002, "loss": 2.3281, "step": 232160 }, { "epoch": 0.8975042909495755, "grad_norm": 0.11487068235874176, "learning_rate": 0.002, "loss": 2.3308, "step": 232170 }, { "epoch": 0.8975429481529589, "grad_norm": 0.09771732240915298, "learning_rate": 0.002, "loss": 2.3389, "step": 232180 }, { "epoch": 0.8975816053563421, "grad_norm": 0.13147693872451782, "learning_rate": 0.002, "loss": 2.3546, "step": 232190 }, { "epoch": 0.8976202625597254, "grad_norm": 0.12580551207065582, "learning_rate": 0.002, "loss": 2.3419, "step": 232200 }, { "epoch": 0.8976589197631086, "grad_norm": 0.10050297528505325, "learning_rate": 0.002, "loss": 2.3414, "step": 232210 }, { "epoch": 0.8976975769664919, "grad_norm": 0.12342985719442368, "learning_rate": 0.002, "loss": 2.3488, "step": 232220 }, { "epoch": 0.8977362341698752, "grad_norm": 0.09766845405101776, "learning_rate": 0.002, "loss": 2.348, "step": 232230 }, { "epoch": 0.8977748913732585, "grad_norm": 0.09380897879600525, "learning_rate": 0.002, "loss": 2.344, "step": 232240 }, { "epoch": 0.8978135485766418, "grad_norm": 0.12180311977863312, "learning_rate": 0.002, "loss": 2.3392, "step": 232250 }, { "epoch": 0.897852205780025, "grad_norm": 0.12489345669746399, "learning_rate": 0.002, "loss": 2.3549, "step": 232260 }, { "epoch": 0.8978908629834084, "grad_norm": 0.10110122710466385, "learning_rate": 0.002, "loss": 2.3372, "step": 232270 }, { "epoch": 0.8979295201867916, "grad_norm": 0.10600592195987701, "learning_rate": 0.002, "loss": 2.334, "step": 232280 }, { "epoch": 0.8979681773901749, "grad_norm": 0.10433664172887802, "learning_rate": 0.002, "loss": 2.3497, "step": 232290 }, { "epoch": 0.8980068345935581, "grad_norm": 0.09368140250444412, "learning_rate": 0.002, "loss": 2.3311, "step": 232300 }, { "epoch": 0.8980454917969415, "grad_norm": 0.11990946531295776, "learning_rate": 0.002, "loss": 2.3482, "step": 232310 }, { "epoch": 0.8980841490003247, "grad_norm": 0.09606378525495529, "learning_rate": 0.002, "loss": 2.3346, "step": 232320 }, { "epoch": 0.898122806203708, "grad_norm": 0.10541123896837234, "learning_rate": 0.002, "loss": 2.3452, "step": 232330 }, { "epoch": 0.8981614634070912, "grad_norm": 0.10391794145107269, "learning_rate": 0.002, "loss": 2.331, "step": 232340 }, { "epoch": 0.8982001206104746, "grad_norm": 0.10537216812372208, "learning_rate": 0.002, "loss": 2.3416, "step": 232350 }, { "epoch": 0.8982387778138579, "grad_norm": 0.09761541336774826, "learning_rate": 0.002, "loss": 2.3295, "step": 232360 }, { "epoch": 0.8982774350172411, "grad_norm": 0.10396650433540344, "learning_rate": 0.002, "loss": 2.3367, "step": 232370 }, { "epoch": 0.8983160922206244, "grad_norm": 0.10859867930412292, "learning_rate": 0.002, "loss": 2.3288, "step": 232380 }, { "epoch": 0.8983547494240076, "grad_norm": 0.10541373491287231, "learning_rate": 0.002, "loss": 2.346, "step": 232390 }, { "epoch": 0.898393406627391, "grad_norm": 0.1089557409286499, "learning_rate": 0.002, "loss": 2.3369, "step": 232400 }, { "epoch": 0.8984320638307742, "grad_norm": 0.1103198453783989, "learning_rate": 0.002, "loss": 2.3384, "step": 232410 }, { "epoch": 0.8984707210341575, "grad_norm": 0.10214130580425262, "learning_rate": 0.002, "loss": 2.3537, "step": 232420 }, { "epoch": 0.8985093782375407, "grad_norm": 0.09987477213144302, "learning_rate": 0.002, "loss": 2.3282, "step": 232430 }, { "epoch": 0.8985480354409241, "grad_norm": 0.1155705377459526, "learning_rate": 0.002, "loss": 2.3421, "step": 232440 }, { "epoch": 0.8985866926443073, "grad_norm": 0.09893004596233368, "learning_rate": 0.002, "loss": 2.3407, "step": 232450 }, { "epoch": 0.8986253498476906, "grad_norm": 0.09296012669801712, "learning_rate": 0.002, "loss": 2.3387, "step": 232460 }, { "epoch": 0.8986640070510739, "grad_norm": 0.09634601324796677, "learning_rate": 0.002, "loss": 2.3299, "step": 232470 }, { "epoch": 0.8987026642544572, "grad_norm": 0.1043325737118721, "learning_rate": 0.002, "loss": 2.3448, "step": 232480 }, { "epoch": 0.8987413214578405, "grad_norm": 0.11548048257827759, "learning_rate": 0.002, "loss": 2.3374, "step": 232490 }, { "epoch": 0.8987799786612237, "grad_norm": 0.10021772980690002, "learning_rate": 0.002, "loss": 2.3527, "step": 232500 }, { "epoch": 0.898818635864607, "grad_norm": 0.11486779898405075, "learning_rate": 0.002, "loss": 2.3275, "step": 232510 }, { "epoch": 0.8988572930679903, "grad_norm": 0.10646865516901016, "learning_rate": 0.002, "loss": 2.3398, "step": 232520 }, { "epoch": 0.8988959502713736, "grad_norm": 0.08879152685403824, "learning_rate": 0.002, "loss": 2.3299, "step": 232530 }, { "epoch": 0.8989346074747568, "grad_norm": 0.10799049586057663, "learning_rate": 0.002, "loss": 2.3474, "step": 232540 }, { "epoch": 0.8989732646781401, "grad_norm": 0.09489186108112335, "learning_rate": 0.002, "loss": 2.3425, "step": 232550 }, { "epoch": 0.8990119218815235, "grad_norm": 0.10621532797813416, "learning_rate": 0.002, "loss": 2.3334, "step": 232560 }, { "epoch": 0.8990505790849067, "grad_norm": 0.09988352656364441, "learning_rate": 0.002, "loss": 2.3345, "step": 232570 }, { "epoch": 0.89908923628829, "grad_norm": 0.10915430635213852, "learning_rate": 0.002, "loss": 2.3453, "step": 232580 }, { "epoch": 0.8991278934916732, "grad_norm": 0.1114613264799118, "learning_rate": 0.002, "loss": 2.3444, "step": 232590 }, { "epoch": 0.8991665506950565, "grad_norm": 0.13072876632213593, "learning_rate": 0.002, "loss": 2.3486, "step": 232600 }, { "epoch": 0.8992052078984398, "grad_norm": 0.09026247262954712, "learning_rate": 0.002, "loss": 2.3421, "step": 232610 }, { "epoch": 0.8992438651018231, "grad_norm": 0.09996059536933899, "learning_rate": 0.002, "loss": 2.3393, "step": 232620 }, { "epoch": 0.8992825223052063, "grad_norm": 0.09811939299106598, "learning_rate": 0.002, "loss": 2.3395, "step": 232630 }, { "epoch": 0.8993211795085896, "grad_norm": 0.11068232357501984, "learning_rate": 0.002, "loss": 2.3424, "step": 232640 }, { "epoch": 0.899359836711973, "grad_norm": 0.09158763289451599, "learning_rate": 0.002, "loss": 2.3376, "step": 232650 }, { "epoch": 0.8993984939153562, "grad_norm": 0.5038883090019226, "learning_rate": 0.002, "loss": 2.3354, "step": 232660 }, { "epoch": 0.8994371511187395, "grad_norm": 0.10323592275381088, "learning_rate": 0.002, "loss": 2.3416, "step": 232670 }, { "epoch": 0.8994758083221227, "grad_norm": 0.12048277258872986, "learning_rate": 0.002, "loss": 2.3317, "step": 232680 }, { "epoch": 0.8995144655255061, "grad_norm": 0.11678492277860641, "learning_rate": 0.002, "loss": 2.3518, "step": 232690 }, { "epoch": 0.8995531227288893, "grad_norm": 0.10575645416975021, "learning_rate": 0.002, "loss": 2.3367, "step": 232700 }, { "epoch": 0.8995917799322726, "grad_norm": 0.11219353973865509, "learning_rate": 0.002, "loss": 2.3443, "step": 232710 }, { "epoch": 0.8996304371356558, "grad_norm": 0.10328909009695053, "learning_rate": 0.002, "loss": 2.3407, "step": 232720 }, { "epoch": 0.8996690943390392, "grad_norm": 0.10199911147356033, "learning_rate": 0.002, "loss": 2.3528, "step": 232730 }, { "epoch": 0.8997077515424224, "grad_norm": 0.1011134684085846, "learning_rate": 0.002, "loss": 2.3343, "step": 232740 }, { "epoch": 0.8997464087458057, "grad_norm": 0.11138148605823517, "learning_rate": 0.002, "loss": 2.3391, "step": 232750 }, { "epoch": 0.899785065949189, "grad_norm": 0.13523036241531372, "learning_rate": 0.002, "loss": 2.3383, "step": 232760 }, { "epoch": 0.8998237231525722, "grad_norm": 0.09843233972787857, "learning_rate": 0.002, "loss": 2.3602, "step": 232770 }, { "epoch": 0.8998623803559556, "grad_norm": 0.08956612646579742, "learning_rate": 0.002, "loss": 2.3377, "step": 232780 }, { "epoch": 0.8999010375593388, "grad_norm": 0.09797809273004532, "learning_rate": 0.002, "loss": 2.3468, "step": 232790 }, { "epoch": 0.8999396947627221, "grad_norm": 0.14079497754573822, "learning_rate": 0.002, "loss": 2.3316, "step": 232800 }, { "epoch": 0.8999783519661053, "grad_norm": 0.10322416573762894, "learning_rate": 0.002, "loss": 2.3404, "step": 232810 }, { "epoch": 0.9000170091694887, "grad_norm": 0.1083340048789978, "learning_rate": 0.002, "loss": 2.334, "step": 232820 }, { "epoch": 0.9000556663728719, "grad_norm": 0.09649095684289932, "learning_rate": 0.002, "loss": 2.3355, "step": 232830 }, { "epoch": 0.9000943235762552, "grad_norm": 0.0971124917268753, "learning_rate": 0.002, "loss": 2.3413, "step": 232840 }, { "epoch": 0.9001329807796384, "grad_norm": 0.10010434687137604, "learning_rate": 0.002, "loss": 2.3394, "step": 232850 }, { "epoch": 0.9001716379830218, "grad_norm": 0.0972244068980217, "learning_rate": 0.002, "loss": 2.3424, "step": 232860 }, { "epoch": 0.900210295186405, "grad_norm": 0.11431008577346802, "learning_rate": 0.002, "loss": 2.343, "step": 232870 }, { "epoch": 0.9002489523897883, "grad_norm": 0.10072416067123413, "learning_rate": 0.002, "loss": 2.351, "step": 232880 }, { "epoch": 0.9002876095931716, "grad_norm": 0.10842856019735336, "learning_rate": 0.002, "loss": 2.3264, "step": 232890 }, { "epoch": 0.9003262667965549, "grad_norm": 0.10901232063770294, "learning_rate": 0.002, "loss": 2.3425, "step": 232900 }, { "epoch": 0.9003649239999382, "grad_norm": 0.09080494195222855, "learning_rate": 0.002, "loss": 2.3325, "step": 232910 }, { "epoch": 0.9004035812033214, "grad_norm": 0.10430239886045456, "learning_rate": 0.002, "loss": 2.33, "step": 232920 }, { "epoch": 0.9004422384067047, "grad_norm": 0.11572965234518051, "learning_rate": 0.002, "loss": 2.3175, "step": 232930 }, { "epoch": 0.900480895610088, "grad_norm": 0.12283935397863388, "learning_rate": 0.002, "loss": 2.3454, "step": 232940 }, { "epoch": 0.9005195528134713, "grad_norm": 0.11047730594873428, "learning_rate": 0.002, "loss": 2.3384, "step": 232950 }, { "epoch": 0.9005582100168545, "grad_norm": 0.11393677443265915, "learning_rate": 0.002, "loss": 2.3375, "step": 232960 }, { "epoch": 0.9005968672202378, "grad_norm": 0.11150172352790833, "learning_rate": 0.002, "loss": 2.3554, "step": 232970 }, { "epoch": 0.900635524423621, "grad_norm": 0.10802357643842697, "learning_rate": 0.002, "loss": 2.3474, "step": 232980 }, { "epoch": 0.9006741816270044, "grad_norm": 0.10877017676830292, "learning_rate": 0.002, "loss": 2.3537, "step": 232990 }, { "epoch": 0.9007128388303877, "grad_norm": 0.11371221393346786, "learning_rate": 0.002, "loss": 2.3439, "step": 233000 }, { "epoch": 0.9007514960337709, "grad_norm": 0.0988493487238884, "learning_rate": 0.002, "loss": 2.3209, "step": 233010 }, { "epoch": 0.9007901532371542, "grad_norm": 0.11515526473522186, "learning_rate": 0.002, "loss": 2.3391, "step": 233020 }, { "epoch": 0.9008288104405375, "grad_norm": 0.113592229783535, "learning_rate": 0.002, "loss": 2.3298, "step": 233030 }, { "epoch": 0.9008674676439208, "grad_norm": 0.09327522665262222, "learning_rate": 0.002, "loss": 2.3358, "step": 233040 }, { "epoch": 0.900906124847304, "grad_norm": 0.10999246686697006, "learning_rate": 0.002, "loss": 2.3286, "step": 233050 }, { "epoch": 0.9009447820506873, "grad_norm": 0.10084047168493271, "learning_rate": 0.002, "loss": 2.3248, "step": 233060 }, { "epoch": 0.9009834392540707, "grad_norm": 0.10265267640352249, "learning_rate": 0.002, "loss": 2.3285, "step": 233070 }, { "epoch": 0.9010220964574539, "grad_norm": 0.11654006689786911, "learning_rate": 0.002, "loss": 2.3425, "step": 233080 }, { "epoch": 0.9010607536608372, "grad_norm": 0.10442501306533813, "learning_rate": 0.002, "loss": 2.3367, "step": 233090 }, { "epoch": 0.9010994108642204, "grad_norm": 0.0955716222524643, "learning_rate": 0.002, "loss": 2.3536, "step": 233100 }, { "epoch": 0.9011380680676038, "grad_norm": 0.09556197375059128, "learning_rate": 0.002, "loss": 2.3402, "step": 233110 }, { "epoch": 0.901176725270987, "grad_norm": 0.10069194436073303, "learning_rate": 0.002, "loss": 2.3342, "step": 233120 }, { "epoch": 0.9012153824743703, "grad_norm": 0.10242833942174911, "learning_rate": 0.002, "loss": 2.3301, "step": 233130 }, { "epoch": 0.9012540396777535, "grad_norm": 0.10300853103399277, "learning_rate": 0.002, "loss": 2.3528, "step": 233140 }, { "epoch": 0.9012926968811368, "grad_norm": 0.11195770651102066, "learning_rate": 0.002, "loss": 2.3474, "step": 233150 }, { "epoch": 0.9013313540845201, "grad_norm": 0.09703370183706284, "learning_rate": 0.002, "loss": 2.3367, "step": 233160 }, { "epoch": 0.9013700112879034, "grad_norm": 0.10532054305076599, "learning_rate": 0.002, "loss": 2.3351, "step": 233170 }, { "epoch": 0.9014086684912866, "grad_norm": 0.09363120049238205, "learning_rate": 0.002, "loss": 2.3464, "step": 233180 }, { "epoch": 0.9014473256946699, "grad_norm": 0.10732677578926086, "learning_rate": 0.002, "loss": 2.3397, "step": 233190 }, { "epoch": 0.9014859828980533, "grad_norm": 0.09427095949649811, "learning_rate": 0.002, "loss": 2.3402, "step": 233200 }, { "epoch": 0.9015246401014365, "grad_norm": 0.13079047203063965, "learning_rate": 0.002, "loss": 2.3555, "step": 233210 }, { "epoch": 0.9015632973048198, "grad_norm": 0.11108674108982086, "learning_rate": 0.002, "loss": 2.3284, "step": 233220 }, { "epoch": 0.901601954508203, "grad_norm": 0.09413529932498932, "learning_rate": 0.002, "loss": 2.3262, "step": 233230 }, { "epoch": 0.9016406117115864, "grad_norm": 0.10972938686609268, "learning_rate": 0.002, "loss": 2.3184, "step": 233240 }, { "epoch": 0.9016792689149696, "grad_norm": 0.11578845232725143, "learning_rate": 0.002, "loss": 2.3436, "step": 233250 }, { "epoch": 0.9017179261183529, "grad_norm": 0.1254650205373764, "learning_rate": 0.002, "loss": 2.3434, "step": 233260 }, { "epoch": 0.9017565833217361, "grad_norm": 0.09579180181026459, "learning_rate": 0.002, "loss": 2.3304, "step": 233270 }, { "epoch": 0.9017952405251195, "grad_norm": 0.09996544569730759, "learning_rate": 0.002, "loss": 2.3362, "step": 233280 }, { "epoch": 0.9018338977285028, "grad_norm": 0.09571780264377594, "learning_rate": 0.002, "loss": 2.336, "step": 233290 }, { "epoch": 0.901872554931886, "grad_norm": 0.0973309725522995, "learning_rate": 0.002, "loss": 2.3328, "step": 233300 }, { "epoch": 0.9019112121352693, "grad_norm": 0.09493835270404816, "learning_rate": 0.002, "loss": 2.3441, "step": 233310 }, { "epoch": 0.9019498693386525, "grad_norm": 0.09489183127880096, "learning_rate": 0.002, "loss": 2.3329, "step": 233320 }, { "epoch": 0.9019885265420359, "grad_norm": 0.09764562547206879, "learning_rate": 0.002, "loss": 2.3285, "step": 233330 }, { "epoch": 0.9020271837454191, "grad_norm": 0.10663142800331116, "learning_rate": 0.002, "loss": 2.3201, "step": 233340 }, { "epoch": 0.9020658409488024, "grad_norm": 0.10935185104608536, "learning_rate": 0.002, "loss": 2.3407, "step": 233350 }, { "epoch": 0.9021044981521856, "grad_norm": 0.09917981177568436, "learning_rate": 0.002, "loss": 2.3299, "step": 233360 }, { "epoch": 0.902143155355569, "grad_norm": 0.12524504959583282, "learning_rate": 0.002, "loss": 2.338, "step": 233370 }, { "epoch": 0.9021818125589522, "grad_norm": 0.10528213530778885, "learning_rate": 0.002, "loss": 2.3403, "step": 233380 }, { "epoch": 0.9022204697623355, "grad_norm": 0.12502014636993408, "learning_rate": 0.002, "loss": 2.3571, "step": 233390 }, { "epoch": 0.9022591269657187, "grad_norm": 0.10130679607391357, "learning_rate": 0.002, "loss": 2.3284, "step": 233400 }, { "epoch": 0.9022977841691021, "grad_norm": 0.09227457642555237, "learning_rate": 0.002, "loss": 2.322, "step": 233410 }, { "epoch": 0.9023364413724854, "grad_norm": 0.09661556035280228, "learning_rate": 0.002, "loss": 2.3327, "step": 233420 }, { "epoch": 0.9023750985758686, "grad_norm": 0.10869093984365463, "learning_rate": 0.002, "loss": 2.3312, "step": 233430 }, { "epoch": 0.9024137557792519, "grad_norm": 0.11370524019002914, "learning_rate": 0.002, "loss": 2.3303, "step": 233440 }, { "epoch": 0.9024524129826352, "grad_norm": 0.10741494596004486, "learning_rate": 0.002, "loss": 2.3446, "step": 233450 }, { "epoch": 0.9024910701860185, "grad_norm": 0.11763168126344681, "learning_rate": 0.002, "loss": 2.3296, "step": 233460 }, { "epoch": 0.9025297273894017, "grad_norm": 0.09434393793344498, "learning_rate": 0.002, "loss": 2.3306, "step": 233470 }, { "epoch": 0.902568384592785, "grad_norm": 0.10174699127674103, "learning_rate": 0.002, "loss": 2.3473, "step": 233480 }, { "epoch": 0.9026070417961684, "grad_norm": 0.09130481630563736, "learning_rate": 0.002, "loss": 2.3393, "step": 233490 }, { "epoch": 0.9026456989995516, "grad_norm": 0.09817611426115036, "learning_rate": 0.002, "loss": 2.3436, "step": 233500 }, { "epoch": 0.9026843562029349, "grad_norm": 0.12424413859844208, "learning_rate": 0.002, "loss": 2.342, "step": 233510 }, { "epoch": 0.9027230134063181, "grad_norm": 0.11780202388763428, "learning_rate": 0.002, "loss": 2.3494, "step": 233520 }, { "epoch": 0.9027616706097014, "grad_norm": 0.10289761424064636, "learning_rate": 0.002, "loss": 2.3443, "step": 233530 }, { "epoch": 0.9028003278130847, "grad_norm": 0.08934229612350464, "learning_rate": 0.002, "loss": 2.3422, "step": 233540 }, { "epoch": 0.902838985016468, "grad_norm": 0.11549816280603409, "learning_rate": 0.002, "loss": 2.3442, "step": 233550 }, { "epoch": 0.9028776422198512, "grad_norm": 0.10548436641693115, "learning_rate": 0.002, "loss": 2.3372, "step": 233560 }, { "epoch": 0.9029162994232345, "grad_norm": 0.09892436861991882, "learning_rate": 0.002, "loss": 2.3274, "step": 233570 }, { "epoch": 0.9029549566266178, "grad_norm": 0.11846853047609329, "learning_rate": 0.002, "loss": 2.344, "step": 233580 }, { "epoch": 0.9029936138300011, "grad_norm": 0.10741759091615677, "learning_rate": 0.002, "loss": 2.345, "step": 233590 }, { "epoch": 0.9030322710333843, "grad_norm": 0.09817928075790405, "learning_rate": 0.002, "loss": 2.3482, "step": 233600 }, { "epoch": 0.9030709282367676, "grad_norm": 0.1170048788189888, "learning_rate": 0.002, "loss": 2.3328, "step": 233610 }, { "epoch": 0.903109585440151, "grad_norm": 0.10460609942674637, "learning_rate": 0.002, "loss": 2.33, "step": 233620 }, { "epoch": 0.9031482426435342, "grad_norm": 0.11310793459415436, "learning_rate": 0.002, "loss": 2.339, "step": 233630 }, { "epoch": 0.9031868998469175, "grad_norm": 0.12793530523777008, "learning_rate": 0.002, "loss": 2.3324, "step": 233640 }, { "epoch": 0.9032255570503007, "grad_norm": 0.11377539485692978, "learning_rate": 0.002, "loss": 2.3393, "step": 233650 }, { "epoch": 0.9032642142536841, "grad_norm": 0.1026814803481102, "learning_rate": 0.002, "loss": 2.3311, "step": 233660 }, { "epoch": 0.9033028714570673, "grad_norm": 0.11845576763153076, "learning_rate": 0.002, "loss": 2.3517, "step": 233670 }, { "epoch": 0.9033415286604506, "grad_norm": 0.10374502837657928, "learning_rate": 0.002, "loss": 2.3364, "step": 233680 }, { "epoch": 0.9033801858638338, "grad_norm": 0.11612123996019363, "learning_rate": 0.002, "loss": 2.3257, "step": 233690 }, { "epoch": 0.9034188430672171, "grad_norm": 0.11562748998403549, "learning_rate": 0.002, "loss": 2.3366, "step": 233700 }, { "epoch": 0.9034575002706005, "grad_norm": 0.12471418082714081, "learning_rate": 0.002, "loss": 2.3266, "step": 233710 }, { "epoch": 0.9034961574739837, "grad_norm": 0.09950396418571472, "learning_rate": 0.002, "loss": 2.3438, "step": 233720 }, { "epoch": 0.903534814677367, "grad_norm": 0.10906396806240082, "learning_rate": 0.002, "loss": 2.3299, "step": 233730 }, { "epoch": 0.9035734718807502, "grad_norm": 0.11370264738798141, "learning_rate": 0.002, "loss": 2.3458, "step": 233740 }, { "epoch": 0.9036121290841336, "grad_norm": 0.11147429049015045, "learning_rate": 0.002, "loss": 2.3551, "step": 233750 }, { "epoch": 0.9036507862875168, "grad_norm": 0.10699936747550964, "learning_rate": 0.002, "loss": 2.3408, "step": 233760 }, { "epoch": 0.9036894434909001, "grad_norm": 0.12339533865451813, "learning_rate": 0.002, "loss": 2.3387, "step": 233770 }, { "epoch": 0.9037281006942833, "grad_norm": 0.118447445333004, "learning_rate": 0.002, "loss": 2.3425, "step": 233780 }, { "epoch": 0.9037667578976667, "grad_norm": 0.10083477944135666, "learning_rate": 0.002, "loss": 2.3461, "step": 233790 }, { "epoch": 0.90380541510105, "grad_norm": 0.1282907873392105, "learning_rate": 0.002, "loss": 2.3498, "step": 233800 }, { "epoch": 0.9038440723044332, "grad_norm": 0.11394553631544113, "learning_rate": 0.002, "loss": 2.3417, "step": 233810 }, { "epoch": 0.9038827295078165, "grad_norm": 0.0914125069975853, "learning_rate": 0.002, "loss": 2.3382, "step": 233820 }, { "epoch": 0.9039213867111998, "grad_norm": 0.10464632511138916, "learning_rate": 0.002, "loss": 2.3435, "step": 233830 }, { "epoch": 0.9039600439145831, "grad_norm": 0.11399640142917633, "learning_rate": 0.002, "loss": 2.3298, "step": 233840 }, { "epoch": 0.9039987011179663, "grad_norm": 0.11424519866704941, "learning_rate": 0.002, "loss": 2.3436, "step": 233850 }, { "epoch": 0.9040373583213496, "grad_norm": 0.12120144814252853, "learning_rate": 0.002, "loss": 2.3463, "step": 233860 }, { "epoch": 0.9040760155247328, "grad_norm": 0.10877831280231476, "learning_rate": 0.002, "loss": 2.3427, "step": 233870 }, { "epoch": 0.9041146727281162, "grad_norm": 0.15334224700927734, "learning_rate": 0.002, "loss": 2.3345, "step": 233880 }, { "epoch": 0.9041533299314994, "grad_norm": 0.12217187136411667, "learning_rate": 0.002, "loss": 2.3484, "step": 233890 }, { "epoch": 0.9041919871348827, "grad_norm": 0.11143625527620316, "learning_rate": 0.002, "loss": 2.3411, "step": 233900 }, { "epoch": 0.9042306443382659, "grad_norm": 0.1174347847700119, "learning_rate": 0.002, "loss": 2.334, "step": 233910 }, { "epoch": 0.9042693015416493, "grad_norm": 0.09945125877857208, "learning_rate": 0.002, "loss": 2.3446, "step": 233920 }, { "epoch": 0.9043079587450326, "grad_norm": 0.09746522456407547, "learning_rate": 0.002, "loss": 2.3315, "step": 233930 }, { "epoch": 0.9043466159484158, "grad_norm": 0.12990349531173706, "learning_rate": 0.002, "loss": 2.34, "step": 233940 }, { "epoch": 0.9043852731517991, "grad_norm": 0.11263786256313324, "learning_rate": 0.002, "loss": 2.3252, "step": 233950 }, { "epoch": 0.9044239303551824, "grad_norm": 0.11403274536132812, "learning_rate": 0.002, "loss": 2.3227, "step": 233960 }, { "epoch": 0.9044625875585657, "grad_norm": 0.08966249227523804, "learning_rate": 0.002, "loss": 2.3398, "step": 233970 }, { "epoch": 0.9045012447619489, "grad_norm": 0.1288156658411026, "learning_rate": 0.002, "loss": 2.3236, "step": 233980 }, { "epoch": 0.9045399019653322, "grad_norm": 0.09932001680135727, "learning_rate": 0.002, "loss": 2.3416, "step": 233990 }, { "epoch": 0.9045785591687155, "grad_norm": 0.10278764367103577, "learning_rate": 0.002, "loss": 2.3236, "step": 234000 }, { "epoch": 0.9046172163720988, "grad_norm": 0.10722273588180542, "learning_rate": 0.002, "loss": 2.3147, "step": 234010 }, { "epoch": 0.904655873575482, "grad_norm": 0.1080775111913681, "learning_rate": 0.002, "loss": 2.3491, "step": 234020 }, { "epoch": 0.9046945307788653, "grad_norm": 0.11944068223237991, "learning_rate": 0.002, "loss": 2.3329, "step": 234030 }, { "epoch": 0.9047331879822487, "grad_norm": 0.11086134612560272, "learning_rate": 0.002, "loss": 2.3348, "step": 234040 }, { "epoch": 0.9047718451856319, "grad_norm": 0.11324305832386017, "learning_rate": 0.002, "loss": 2.3338, "step": 234050 }, { "epoch": 0.9048105023890152, "grad_norm": 0.09748173505067825, "learning_rate": 0.002, "loss": 2.3405, "step": 234060 }, { "epoch": 0.9048491595923984, "grad_norm": 0.11554388701915741, "learning_rate": 0.002, "loss": 2.3464, "step": 234070 }, { "epoch": 0.9048878167957817, "grad_norm": 0.107016921043396, "learning_rate": 0.002, "loss": 2.3383, "step": 234080 }, { "epoch": 0.904926473999165, "grad_norm": 0.2644636631011963, "learning_rate": 0.002, "loss": 2.3489, "step": 234090 }, { "epoch": 0.9049651312025483, "grad_norm": 0.09860588610172272, "learning_rate": 0.002, "loss": 2.345, "step": 234100 }, { "epoch": 0.9050037884059315, "grad_norm": 0.11851309984922409, "learning_rate": 0.002, "loss": 2.3483, "step": 234110 }, { "epoch": 0.9050424456093148, "grad_norm": 0.16849271953105927, "learning_rate": 0.002, "loss": 2.3374, "step": 234120 }, { "epoch": 0.9050811028126982, "grad_norm": 0.11365487426519394, "learning_rate": 0.002, "loss": 2.3536, "step": 234130 }, { "epoch": 0.9051197600160814, "grad_norm": 0.1182205006480217, "learning_rate": 0.002, "loss": 2.3443, "step": 234140 }, { "epoch": 0.9051584172194647, "grad_norm": 0.10879401117563248, "learning_rate": 0.002, "loss": 2.3506, "step": 234150 }, { "epoch": 0.9051970744228479, "grad_norm": 0.12132114171981812, "learning_rate": 0.002, "loss": 2.335, "step": 234160 }, { "epoch": 0.9052357316262313, "grad_norm": 0.10799846798181534, "learning_rate": 0.002, "loss": 2.324, "step": 234170 }, { "epoch": 0.9052743888296145, "grad_norm": 0.10171600431203842, "learning_rate": 0.002, "loss": 2.3321, "step": 234180 }, { "epoch": 0.9053130460329978, "grad_norm": 0.11105701327323914, "learning_rate": 0.002, "loss": 2.3393, "step": 234190 }, { "epoch": 0.905351703236381, "grad_norm": 0.11268547922372818, "learning_rate": 0.002, "loss": 2.3441, "step": 234200 }, { "epoch": 0.9053903604397644, "grad_norm": 0.11618811637163162, "learning_rate": 0.002, "loss": 2.327, "step": 234210 }, { "epoch": 0.9054290176431476, "grad_norm": 0.11327293515205383, "learning_rate": 0.002, "loss": 2.3377, "step": 234220 }, { "epoch": 0.9054676748465309, "grad_norm": 0.44370654225349426, "learning_rate": 0.002, "loss": 2.3471, "step": 234230 }, { "epoch": 0.9055063320499142, "grad_norm": 0.12347027659416199, "learning_rate": 0.002, "loss": 2.3442, "step": 234240 }, { "epoch": 0.9055449892532974, "grad_norm": 0.10137667506933212, "learning_rate": 0.002, "loss": 2.353, "step": 234250 }, { "epoch": 0.9055836464566808, "grad_norm": 0.11867160350084305, "learning_rate": 0.002, "loss": 2.3186, "step": 234260 }, { "epoch": 0.905622303660064, "grad_norm": 0.10813513398170471, "learning_rate": 0.002, "loss": 2.3393, "step": 234270 }, { "epoch": 0.9056609608634473, "grad_norm": 0.10353584587574005, "learning_rate": 0.002, "loss": 2.3358, "step": 234280 }, { "epoch": 0.9056996180668305, "grad_norm": 0.10264535993337631, "learning_rate": 0.002, "loss": 2.3433, "step": 234290 }, { "epoch": 0.9057382752702139, "grad_norm": 0.12715892493724823, "learning_rate": 0.002, "loss": 2.3335, "step": 234300 }, { "epoch": 0.9057769324735971, "grad_norm": 0.11884993314743042, "learning_rate": 0.002, "loss": 2.334, "step": 234310 }, { "epoch": 0.9058155896769804, "grad_norm": 0.1040872186422348, "learning_rate": 0.002, "loss": 2.3394, "step": 234320 }, { "epoch": 0.9058542468803636, "grad_norm": 0.09612486511468887, "learning_rate": 0.002, "loss": 2.3494, "step": 234330 }, { "epoch": 0.905892904083747, "grad_norm": 0.10156971961259842, "learning_rate": 0.002, "loss": 2.3456, "step": 234340 }, { "epoch": 0.9059315612871303, "grad_norm": 0.09877484291791916, "learning_rate": 0.002, "loss": 2.3415, "step": 234350 }, { "epoch": 0.9059702184905135, "grad_norm": 0.1371445655822754, "learning_rate": 0.002, "loss": 2.3638, "step": 234360 }, { "epoch": 0.9060088756938968, "grad_norm": 0.0945514440536499, "learning_rate": 0.002, "loss": 2.3303, "step": 234370 }, { "epoch": 0.9060475328972801, "grad_norm": 0.10155311971902847, "learning_rate": 0.002, "loss": 2.3386, "step": 234380 }, { "epoch": 0.9060861901006634, "grad_norm": 0.0989159569144249, "learning_rate": 0.002, "loss": 2.3182, "step": 234390 }, { "epoch": 0.9061248473040466, "grad_norm": 0.11544256657361984, "learning_rate": 0.002, "loss": 2.3373, "step": 234400 }, { "epoch": 0.9061635045074299, "grad_norm": 0.09864155203104019, "learning_rate": 0.002, "loss": 2.3381, "step": 234410 }, { "epoch": 0.9062021617108132, "grad_norm": 0.10460689663887024, "learning_rate": 0.002, "loss": 2.3396, "step": 234420 }, { "epoch": 0.9062408189141965, "grad_norm": 0.1467406451702118, "learning_rate": 0.002, "loss": 2.3348, "step": 234430 }, { "epoch": 0.9062794761175798, "grad_norm": 0.10551851242780685, "learning_rate": 0.002, "loss": 2.3412, "step": 234440 }, { "epoch": 0.906318133320963, "grad_norm": 0.1010807529091835, "learning_rate": 0.002, "loss": 2.3416, "step": 234450 }, { "epoch": 0.9063567905243463, "grad_norm": 0.10369355976581573, "learning_rate": 0.002, "loss": 2.3458, "step": 234460 }, { "epoch": 0.9063954477277296, "grad_norm": 0.10652629286050797, "learning_rate": 0.002, "loss": 2.3356, "step": 234470 }, { "epoch": 0.9064341049311129, "grad_norm": 0.10208172351121902, "learning_rate": 0.002, "loss": 2.3218, "step": 234480 }, { "epoch": 0.9064727621344961, "grad_norm": 0.10453961789608002, "learning_rate": 0.002, "loss": 2.3328, "step": 234490 }, { "epoch": 0.9065114193378794, "grad_norm": 0.09997710585594177, "learning_rate": 0.002, "loss": 2.3351, "step": 234500 }, { "epoch": 0.9065500765412627, "grad_norm": 0.11260194331407547, "learning_rate": 0.002, "loss": 2.3496, "step": 234510 }, { "epoch": 0.906588733744646, "grad_norm": 0.11529659479856491, "learning_rate": 0.002, "loss": 2.3517, "step": 234520 }, { "epoch": 0.9066273909480292, "grad_norm": 0.09845767170190811, "learning_rate": 0.002, "loss": 2.3347, "step": 234530 }, { "epoch": 0.9066660481514125, "grad_norm": 0.10353818535804749, "learning_rate": 0.002, "loss": 2.3443, "step": 234540 }, { "epoch": 0.9067047053547959, "grad_norm": 0.09427450597286224, "learning_rate": 0.002, "loss": 2.3267, "step": 234550 }, { "epoch": 0.9067433625581791, "grad_norm": 0.11889142543077469, "learning_rate": 0.002, "loss": 2.3266, "step": 234560 }, { "epoch": 0.9067820197615624, "grad_norm": 0.10010946542024612, "learning_rate": 0.002, "loss": 2.35, "step": 234570 }, { "epoch": 0.9068206769649456, "grad_norm": 0.09039853513240814, "learning_rate": 0.002, "loss": 2.3551, "step": 234580 }, { "epoch": 0.906859334168329, "grad_norm": 0.10822974890470505, "learning_rate": 0.002, "loss": 2.3365, "step": 234590 }, { "epoch": 0.9068979913717122, "grad_norm": 0.10804063826799393, "learning_rate": 0.002, "loss": 2.3559, "step": 234600 }, { "epoch": 0.9069366485750955, "grad_norm": 0.1527954339981079, "learning_rate": 0.002, "loss": 2.3192, "step": 234610 }, { "epoch": 0.9069753057784787, "grad_norm": 0.11379213631153107, "learning_rate": 0.002, "loss": 2.3179, "step": 234620 }, { "epoch": 0.907013962981862, "grad_norm": 0.11318988353013992, "learning_rate": 0.002, "loss": 2.3321, "step": 234630 }, { "epoch": 0.9070526201852454, "grad_norm": 0.1034546047449112, "learning_rate": 0.002, "loss": 2.3154, "step": 234640 }, { "epoch": 0.9070912773886286, "grad_norm": 0.09208408743143082, "learning_rate": 0.002, "loss": 2.3473, "step": 234650 }, { "epoch": 0.9071299345920119, "grad_norm": 0.10531508177518845, "learning_rate": 0.002, "loss": 2.3421, "step": 234660 }, { "epoch": 0.9071685917953951, "grad_norm": 0.13348685204982758, "learning_rate": 0.002, "loss": 2.3485, "step": 234670 }, { "epoch": 0.9072072489987785, "grad_norm": 0.09402325004339218, "learning_rate": 0.002, "loss": 2.3403, "step": 234680 }, { "epoch": 0.9072459062021617, "grad_norm": 0.10708408802747726, "learning_rate": 0.002, "loss": 2.3518, "step": 234690 }, { "epoch": 0.907284563405545, "grad_norm": 0.13726216554641724, "learning_rate": 0.002, "loss": 2.3422, "step": 234700 }, { "epoch": 0.9073232206089282, "grad_norm": 0.10366878658533096, "learning_rate": 0.002, "loss": 2.3535, "step": 234710 }, { "epoch": 0.9073618778123116, "grad_norm": 0.10182026773691177, "learning_rate": 0.002, "loss": 2.3397, "step": 234720 }, { "epoch": 0.9074005350156948, "grad_norm": 0.1012740358710289, "learning_rate": 0.002, "loss": 2.3302, "step": 234730 }, { "epoch": 0.9074391922190781, "grad_norm": 0.10341080278158188, "learning_rate": 0.002, "loss": 2.3438, "step": 234740 }, { "epoch": 0.9074778494224613, "grad_norm": 0.09370353817939758, "learning_rate": 0.002, "loss": 2.3271, "step": 234750 }, { "epoch": 0.9075165066258447, "grad_norm": 0.10617724061012268, "learning_rate": 0.002, "loss": 2.3426, "step": 234760 }, { "epoch": 0.907555163829228, "grad_norm": 0.09987720102071762, "learning_rate": 0.002, "loss": 2.3297, "step": 234770 }, { "epoch": 0.9075938210326112, "grad_norm": 0.11386090517044067, "learning_rate": 0.002, "loss": 2.3371, "step": 234780 }, { "epoch": 0.9076324782359945, "grad_norm": 0.09530167281627655, "learning_rate": 0.002, "loss": 2.3402, "step": 234790 }, { "epoch": 0.9076711354393777, "grad_norm": 0.12116479128599167, "learning_rate": 0.002, "loss": 2.3376, "step": 234800 }, { "epoch": 0.9077097926427611, "grad_norm": 0.10751676559448242, "learning_rate": 0.002, "loss": 2.3374, "step": 234810 }, { "epoch": 0.9077484498461443, "grad_norm": 0.10586011409759521, "learning_rate": 0.002, "loss": 2.3598, "step": 234820 }, { "epoch": 0.9077871070495276, "grad_norm": 0.09758689254522324, "learning_rate": 0.002, "loss": 2.3374, "step": 234830 }, { "epoch": 0.9078257642529108, "grad_norm": 0.11259765177965164, "learning_rate": 0.002, "loss": 2.3366, "step": 234840 }, { "epoch": 0.9078644214562942, "grad_norm": 0.1003914549946785, "learning_rate": 0.002, "loss": 2.3484, "step": 234850 }, { "epoch": 0.9079030786596775, "grad_norm": 0.09935523569583893, "learning_rate": 0.002, "loss": 2.341, "step": 234860 }, { "epoch": 0.9079417358630607, "grad_norm": 0.10035855323076248, "learning_rate": 0.002, "loss": 2.3555, "step": 234870 }, { "epoch": 0.907980393066444, "grad_norm": 0.08553393930196762, "learning_rate": 0.002, "loss": 2.3398, "step": 234880 }, { "epoch": 0.9080190502698273, "grad_norm": 0.1025664210319519, "learning_rate": 0.002, "loss": 2.3412, "step": 234890 }, { "epoch": 0.9080577074732106, "grad_norm": 0.1198197677731514, "learning_rate": 0.002, "loss": 2.3369, "step": 234900 }, { "epoch": 0.9080963646765938, "grad_norm": 0.11244381964206696, "learning_rate": 0.002, "loss": 2.3232, "step": 234910 }, { "epoch": 0.9081350218799771, "grad_norm": 0.10571009665727615, "learning_rate": 0.002, "loss": 2.3318, "step": 234920 }, { "epoch": 0.9081736790833604, "grad_norm": 0.12540297210216522, "learning_rate": 0.002, "loss": 2.3391, "step": 234930 }, { "epoch": 0.9082123362867437, "grad_norm": 0.10460840165615082, "learning_rate": 0.002, "loss": 2.3493, "step": 234940 }, { "epoch": 0.908250993490127, "grad_norm": 0.11541231721639633, "learning_rate": 0.002, "loss": 2.3405, "step": 234950 }, { "epoch": 0.9082896506935102, "grad_norm": 0.11149951815605164, "learning_rate": 0.002, "loss": 2.3373, "step": 234960 }, { "epoch": 0.9083283078968936, "grad_norm": 0.12732405960559845, "learning_rate": 0.002, "loss": 2.3364, "step": 234970 }, { "epoch": 0.9083669651002768, "grad_norm": 0.09354390949010849, "learning_rate": 0.002, "loss": 2.3342, "step": 234980 }, { "epoch": 0.9084056223036601, "grad_norm": 0.11542489379644394, "learning_rate": 0.002, "loss": 2.3317, "step": 234990 }, { "epoch": 0.9084442795070433, "grad_norm": 0.09353950619697571, "learning_rate": 0.002, "loss": 2.3557, "step": 235000 }, { "epoch": 0.9084829367104266, "grad_norm": 0.12837208807468414, "learning_rate": 0.002, "loss": 2.3388, "step": 235010 }, { "epoch": 0.9085215939138099, "grad_norm": 0.11282764375209808, "learning_rate": 0.002, "loss": 2.3262, "step": 235020 }, { "epoch": 0.9085602511171932, "grad_norm": 0.12078604847192764, "learning_rate": 0.002, "loss": 2.3349, "step": 235030 }, { "epoch": 0.9085989083205764, "grad_norm": 0.11550453305244446, "learning_rate": 0.002, "loss": 2.3509, "step": 235040 }, { "epoch": 0.9086375655239597, "grad_norm": 0.10718785226345062, "learning_rate": 0.002, "loss": 2.3353, "step": 235050 }, { "epoch": 0.908676222727343, "grad_norm": 0.21481357514858246, "learning_rate": 0.002, "loss": 2.3257, "step": 235060 }, { "epoch": 0.9087148799307263, "grad_norm": 0.12408475577831268, "learning_rate": 0.002, "loss": 2.3463, "step": 235070 }, { "epoch": 0.9087535371341096, "grad_norm": 0.10488598793745041, "learning_rate": 0.002, "loss": 2.3315, "step": 235080 }, { "epoch": 0.9087921943374928, "grad_norm": 0.10466601699590683, "learning_rate": 0.002, "loss": 2.3369, "step": 235090 }, { "epoch": 0.9088308515408762, "grad_norm": 0.1314753293991089, "learning_rate": 0.002, "loss": 2.3436, "step": 235100 }, { "epoch": 0.9088695087442594, "grad_norm": 0.11760780215263367, "learning_rate": 0.002, "loss": 2.3417, "step": 235110 }, { "epoch": 0.9089081659476427, "grad_norm": 0.10353752225637436, "learning_rate": 0.002, "loss": 2.3352, "step": 235120 }, { "epoch": 0.9089468231510259, "grad_norm": 0.1124408170580864, "learning_rate": 0.002, "loss": 2.3095, "step": 235130 }, { "epoch": 0.9089854803544093, "grad_norm": 0.11053520441055298, "learning_rate": 0.002, "loss": 2.3298, "step": 235140 }, { "epoch": 0.9090241375577925, "grad_norm": 0.10394348949193954, "learning_rate": 0.002, "loss": 2.3561, "step": 235150 }, { "epoch": 0.9090627947611758, "grad_norm": 0.10440591722726822, "learning_rate": 0.002, "loss": 2.342, "step": 235160 }, { "epoch": 0.909101451964559, "grad_norm": 0.09316018968820572, "learning_rate": 0.002, "loss": 2.3545, "step": 235170 }, { "epoch": 0.9091401091679423, "grad_norm": 0.14168131351470947, "learning_rate": 0.002, "loss": 2.3167, "step": 235180 }, { "epoch": 0.9091787663713257, "grad_norm": 0.09957494586706161, "learning_rate": 0.002, "loss": 2.3332, "step": 235190 }, { "epoch": 0.9092174235747089, "grad_norm": 0.09146034717559814, "learning_rate": 0.002, "loss": 2.3336, "step": 235200 }, { "epoch": 0.9092560807780922, "grad_norm": 0.11028812825679779, "learning_rate": 0.002, "loss": 2.3502, "step": 235210 }, { "epoch": 0.9092947379814754, "grad_norm": 0.09550734609365463, "learning_rate": 0.002, "loss": 2.3489, "step": 235220 }, { "epoch": 0.9093333951848588, "grad_norm": 0.10537036508321762, "learning_rate": 0.002, "loss": 2.3306, "step": 235230 }, { "epoch": 0.909372052388242, "grad_norm": 0.10891729593276978, "learning_rate": 0.002, "loss": 2.348, "step": 235240 }, { "epoch": 0.9094107095916253, "grad_norm": 0.10347836464643478, "learning_rate": 0.002, "loss": 2.341, "step": 235250 }, { "epoch": 0.9094493667950085, "grad_norm": 0.10081382840871811, "learning_rate": 0.002, "loss": 2.3399, "step": 235260 }, { "epoch": 0.9094880239983919, "grad_norm": 0.09710804373025894, "learning_rate": 0.002, "loss": 2.3435, "step": 235270 }, { "epoch": 0.9095266812017752, "grad_norm": 0.1072625070810318, "learning_rate": 0.002, "loss": 2.3296, "step": 235280 }, { "epoch": 0.9095653384051584, "grad_norm": 0.11930724233388901, "learning_rate": 0.002, "loss": 2.3321, "step": 235290 }, { "epoch": 0.9096039956085417, "grad_norm": 0.10877171903848648, "learning_rate": 0.002, "loss": 2.3494, "step": 235300 }, { "epoch": 0.909642652811925, "grad_norm": 0.1109236478805542, "learning_rate": 0.002, "loss": 2.3298, "step": 235310 }, { "epoch": 0.9096813100153083, "grad_norm": 0.10849165171384811, "learning_rate": 0.002, "loss": 2.3305, "step": 235320 }, { "epoch": 0.9097199672186915, "grad_norm": 0.09898397326469421, "learning_rate": 0.002, "loss": 2.3442, "step": 235330 }, { "epoch": 0.9097586244220748, "grad_norm": 0.09823174774646759, "learning_rate": 0.002, "loss": 2.3556, "step": 235340 }, { "epoch": 0.9097972816254581, "grad_norm": 0.10743778198957443, "learning_rate": 0.002, "loss": 2.3293, "step": 235350 }, { "epoch": 0.9098359388288414, "grad_norm": 0.1055048406124115, "learning_rate": 0.002, "loss": 2.3448, "step": 235360 }, { "epoch": 0.9098745960322246, "grad_norm": 0.11165113747119904, "learning_rate": 0.002, "loss": 2.3428, "step": 235370 }, { "epoch": 0.9099132532356079, "grad_norm": 0.10300975292921066, "learning_rate": 0.002, "loss": 2.3561, "step": 235380 }, { "epoch": 0.9099519104389912, "grad_norm": 0.12773124873638153, "learning_rate": 0.002, "loss": 2.3389, "step": 235390 }, { "epoch": 0.9099905676423745, "grad_norm": 0.1127619743347168, "learning_rate": 0.002, "loss": 2.3439, "step": 235400 }, { "epoch": 0.9100292248457578, "grad_norm": 0.09095638245344162, "learning_rate": 0.002, "loss": 2.3438, "step": 235410 }, { "epoch": 0.910067882049141, "grad_norm": 0.10773008316755295, "learning_rate": 0.002, "loss": 2.3457, "step": 235420 }, { "epoch": 0.9101065392525243, "grad_norm": 0.09691687673330307, "learning_rate": 0.002, "loss": 2.349, "step": 235430 }, { "epoch": 0.9101451964559076, "grad_norm": 0.10554266721010208, "learning_rate": 0.002, "loss": 2.3074, "step": 235440 }, { "epoch": 0.9101838536592909, "grad_norm": 0.14661146700382233, "learning_rate": 0.002, "loss": 2.3477, "step": 235450 }, { "epoch": 0.9102225108626741, "grad_norm": 0.10983286052942276, "learning_rate": 0.002, "loss": 2.3356, "step": 235460 }, { "epoch": 0.9102611680660574, "grad_norm": 0.1018320843577385, "learning_rate": 0.002, "loss": 2.3285, "step": 235470 }, { "epoch": 0.9102998252694408, "grad_norm": 0.10218097269535065, "learning_rate": 0.002, "loss": 2.3182, "step": 235480 }, { "epoch": 0.910338482472824, "grad_norm": 0.10761865228414536, "learning_rate": 0.002, "loss": 2.3397, "step": 235490 }, { "epoch": 0.9103771396762073, "grad_norm": 0.1041545644402504, "learning_rate": 0.002, "loss": 2.3307, "step": 235500 }, { "epoch": 0.9104157968795905, "grad_norm": 0.11502991616725922, "learning_rate": 0.002, "loss": 2.3418, "step": 235510 }, { "epoch": 0.9104544540829739, "grad_norm": 0.08802340924739838, "learning_rate": 0.002, "loss": 2.3469, "step": 235520 }, { "epoch": 0.9104931112863571, "grad_norm": 0.0944049060344696, "learning_rate": 0.002, "loss": 2.3407, "step": 235530 }, { "epoch": 0.9105317684897404, "grad_norm": 0.11132679879665375, "learning_rate": 0.002, "loss": 2.343, "step": 235540 }, { "epoch": 0.9105704256931236, "grad_norm": 0.11546406894922256, "learning_rate": 0.002, "loss": 2.3407, "step": 235550 }, { "epoch": 0.9106090828965069, "grad_norm": 0.09580505639314651, "learning_rate": 0.002, "loss": 2.3445, "step": 235560 }, { "epoch": 0.9106477400998902, "grad_norm": 0.09115248918533325, "learning_rate": 0.002, "loss": 2.3324, "step": 235570 }, { "epoch": 0.9106863973032735, "grad_norm": 0.09833600372076035, "learning_rate": 0.002, "loss": 2.3351, "step": 235580 }, { "epoch": 0.9107250545066568, "grad_norm": 0.10657432675361633, "learning_rate": 0.002, "loss": 2.3485, "step": 235590 }, { "epoch": 0.91076371171004, "grad_norm": 0.11782419681549072, "learning_rate": 0.002, "loss": 2.3305, "step": 235600 }, { "epoch": 0.9108023689134234, "grad_norm": 0.1045074462890625, "learning_rate": 0.002, "loss": 2.3285, "step": 235610 }, { "epoch": 0.9108410261168066, "grad_norm": 0.09903860092163086, "learning_rate": 0.002, "loss": 2.3418, "step": 235620 }, { "epoch": 0.9108796833201899, "grad_norm": 0.1405385136604309, "learning_rate": 0.002, "loss": 2.34, "step": 235630 }, { "epoch": 0.9109183405235731, "grad_norm": 0.1074899360537529, "learning_rate": 0.002, "loss": 2.3512, "step": 235640 }, { "epoch": 0.9109569977269565, "grad_norm": 0.11212822794914246, "learning_rate": 0.002, "loss": 2.3223, "step": 235650 }, { "epoch": 0.9109956549303397, "grad_norm": 0.0984254702925682, "learning_rate": 0.002, "loss": 2.3369, "step": 235660 }, { "epoch": 0.911034312133723, "grad_norm": 0.11358426511287689, "learning_rate": 0.002, "loss": 2.3404, "step": 235670 }, { "epoch": 0.9110729693371062, "grad_norm": 0.11881627887487411, "learning_rate": 0.002, "loss": 2.3382, "step": 235680 }, { "epoch": 0.9111116265404896, "grad_norm": 0.09670265763998032, "learning_rate": 0.002, "loss": 2.3335, "step": 235690 }, { "epoch": 0.9111502837438729, "grad_norm": 0.0945124551653862, "learning_rate": 0.002, "loss": 2.3496, "step": 235700 }, { "epoch": 0.9111889409472561, "grad_norm": 0.09245097637176514, "learning_rate": 0.002, "loss": 2.3484, "step": 235710 }, { "epoch": 0.9112275981506394, "grad_norm": 0.11580207198858261, "learning_rate": 0.002, "loss": 2.3456, "step": 235720 }, { "epoch": 0.9112662553540226, "grad_norm": 0.11975334584712982, "learning_rate": 0.002, "loss": 2.3518, "step": 235730 }, { "epoch": 0.911304912557406, "grad_norm": 0.10776902735233307, "learning_rate": 0.002, "loss": 2.3439, "step": 235740 }, { "epoch": 0.9113435697607892, "grad_norm": 0.1209128350019455, "learning_rate": 0.002, "loss": 2.3275, "step": 235750 }, { "epoch": 0.9113822269641725, "grad_norm": 0.10492858290672302, "learning_rate": 0.002, "loss": 2.3334, "step": 235760 }, { "epoch": 0.9114208841675557, "grad_norm": 0.10763996839523315, "learning_rate": 0.002, "loss": 2.324, "step": 235770 }, { "epoch": 0.9114595413709391, "grad_norm": 0.12180914729833603, "learning_rate": 0.002, "loss": 2.3429, "step": 235780 }, { "epoch": 0.9114981985743223, "grad_norm": 0.1048932746052742, "learning_rate": 0.002, "loss": 2.3297, "step": 235790 }, { "epoch": 0.9115368557777056, "grad_norm": 0.1038237065076828, "learning_rate": 0.002, "loss": 2.3439, "step": 235800 }, { "epoch": 0.9115755129810889, "grad_norm": 0.10732347518205643, "learning_rate": 0.002, "loss": 2.3467, "step": 235810 }, { "epoch": 0.9116141701844722, "grad_norm": 0.08621811866760254, "learning_rate": 0.002, "loss": 2.3455, "step": 235820 }, { "epoch": 0.9116528273878555, "grad_norm": 0.12064923346042633, "learning_rate": 0.002, "loss": 2.3361, "step": 235830 }, { "epoch": 0.9116914845912387, "grad_norm": 0.09688537567853928, "learning_rate": 0.002, "loss": 2.3266, "step": 235840 }, { "epoch": 0.911730141794622, "grad_norm": 0.11360147595405579, "learning_rate": 0.002, "loss": 2.3282, "step": 235850 }, { "epoch": 0.9117687989980053, "grad_norm": 0.11028148233890533, "learning_rate": 0.002, "loss": 2.3302, "step": 235860 }, { "epoch": 0.9118074562013886, "grad_norm": 0.10759235918521881, "learning_rate": 0.002, "loss": 2.324, "step": 235870 }, { "epoch": 0.9118461134047718, "grad_norm": 0.09140212833881378, "learning_rate": 0.002, "loss": 2.3173, "step": 235880 }, { "epoch": 0.9118847706081551, "grad_norm": 0.15058700740337372, "learning_rate": 0.002, "loss": 2.3407, "step": 235890 }, { "epoch": 0.9119234278115385, "grad_norm": 0.11045285314321518, "learning_rate": 0.002, "loss": 2.3462, "step": 235900 }, { "epoch": 0.9119620850149217, "grad_norm": 0.09746697545051575, "learning_rate": 0.002, "loss": 2.3255, "step": 235910 }, { "epoch": 0.912000742218305, "grad_norm": 0.11804035305976868, "learning_rate": 0.002, "loss": 2.3565, "step": 235920 }, { "epoch": 0.9120393994216882, "grad_norm": 0.098781518638134, "learning_rate": 0.002, "loss": 2.3399, "step": 235930 }, { "epoch": 0.9120780566250715, "grad_norm": 0.11542218923568726, "learning_rate": 0.002, "loss": 2.3416, "step": 235940 }, { "epoch": 0.9121167138284548, "grad_norm": 0.10973048210144043, "learning_rate": 0.002, "loss": 2.3458, "step": 235950 }, { "epoch": 0.9121553710318381, "grad_norm": 0.100016288459301, "learning_rate": 0.002, "loss": 2.3315, "step": 235960 }, { "epoch": 0.9121940282352213, "grad_norm": 0.1159529760479927, "learning_rate": 0.002, "loss": 2.3351, "step": 235970 }, { "epoch": 0.9122326854386046, "grad_norm": 0.09956318140029907, "learning_rate": 0.002, "loss": 2.3384, "step": 235980 }, { "epoch": 0.912271342641988, "grad_norm": 0.1090196743607521, "learning_rate": 0.002, "loss": 2.3542, "step": 235990 }, { "epoch": 0.9123099998453712, "grad_norm": 0.10806025564670563, "learning_rate": 0.002, "loss": 2.3336, "step": 236000 }, { "epoch": 0.9123486570487545, "grad_norm": 0.12057144939899445, "learning_rate": 0.002, "loss": 2.3206, "step": 236010 }, { "epoch": 0.9123873142521377, "grad_norm": 0.09386761486530304, "learning_rate": 0.002, "loss": 2.3265, "step": 236020 }, { "epoch": 0.9124259714555211, "grad_norm": 0.09779149293899536, "learning_rate": 0.002, "loss": 2.3432, "step": 236030 }, { "epoch": 0.9124646286589043, "grad_norm": 0.11577455699443817, "learning_rate": 0.002, "loss": 2.3406, "step": 236040 }, { "epoch": 0.9125032858622876, "grad_norm": 0.09398800879716873, "learning_rate": 0.002, "loss": 2.3355, "step": 236050 }, { "epoch": 0.9125419430656708, "grad_norm": 0.09047604352235794, "learning_rate": 0.002, "loss": 2.3311, "step": 236060 }, { "epoch": 0.9125806002690542, "grad_norm": 0.12920865416526794, "learning_rate": 0.002, "loss": 2.3345, "step": 236070 }, { "epoch": 0.9126192574724374, "grad_norm": 0.10851120203733444, "learning_rate": 0.002, "loss": 2.3413, "step": 236080 }, { "epoch": 0.9126579146758207, "grad_norm": 0.1102474182844162, "learning_rate": 0.002, "loss": 2.356, "step": 236090 }, { "epoch": 0.9126965718792039, "grad_norm": 0.10902708768844604, "learning_rate": 0.002, "loss": 2.3425, "step": 236100 }, { "epoch": 0.9127352290825872, "grad_norm": 0.0967753529548645, "learning_rate": 0.002, "loss": 2.3492, "step": 236110 }, { "epoch": 0.9127738862859706, "grad_norm": 0.11337198317050934, "learning_rate": 0.002, "loss": 2.3378, "step": 236120 }, { "epoch": 0.9128125434893538, "grad_norm": 0.11363617330789566, "learning_rate": 0.002, "loss": 2.323, "step": 236130 }, { "epoch": 0.9128512006927371, "grad_norm": 0.1053314283490181, "learning_rate": 0.002, "loss": 2.3475, "step": 236140 }, { "epoch": 0.9128898578961203, "grad_norm": 0.11603876203298569, "learning_rate": 0.002, "loss": 2.3437, "step": 236150 }, { "epoch": 0.9129285150995037, "grad_norm": 0.10162254422903061, "learning_rate": 0.002, "loss": 2.3355, "step": 236160 }, { "epoch": 0.9129671723028869, "grad_norm": 0.10282709449529648, "learning_rate": 0.002, "loss": 2.3485, "step": 236170 }, { "epoch": 0.9130058295062702, "grad_norm": 0.11641443520784378, "learning_rate": 0.002, "loss": 2.3486, "step": 236180 }, { "epoch": 0.9130444867096534, "grad_norm": 0.11190035939216614, "learning_rate": 0.002, "loss": 2.335, "step": 236190 }, { "epoch": 0.9130831439130368, "grad_norm": 0.10466290265321732, "learning_rate": 0.002, "loss": 2.3225, "step": 236200 }, { "epoch": 0.91312180111642, "grad_norm": 0.10124977678060532, "learning_rate": 0.002, "loss": 2.3549, "step": 236210 }, { "epoch": 0.9131604583198033, "grad_norm": 0.12094393372535706, "learning_rate": 0.002, "loss": 2.3267, "step": 236220 }, { "epoch": 0.9131991155231866, "grad_norm": 0.1088256910443306, "learning_rate": 0.002, "loss": 2.3511, "step": 236230 }, { "epoch": 0.9132377727265699, "grad_norm": 0.12817198038101196, "learning_rate": 0.002, "loss": 2.3416, "step": 236240 }, { "epoch": 0.9132764299299532, "grad_norm": 0.0982198640704155, "learning_rate": 0.002, "loss": 2.34, "step": 236250 }, { "epoch": 0.9133150871333364, "grad_norm": 0.1014394536614418, "learning_rate": 0.002, "loss": 2.3379, "step": 236260 }, { "epoch": 0.9133537443367197, "grad_norm": 0.11380444467067719, "learning_rate": 0.002, "loss": 2.3458, "step": 236270 }, { "epoch": 0.913392401540103, "grad_norm": 0.10722827911376953, "learning_rate": 0.002, "loss": 2.3374, "step": 236280 }, { "epoch": 0.9134310587434863, "grad_norm": 0.1058383509516716, "learning_rate": 0.002, "loss": 2.3299, "step": 236290 }, { "epoch": 0.9134697159468695, "grad_norm": 0.11134719848632812, "learning_rate": 0.002, "loss": 2.328, "step": 236300 }, { "epoch": 0.9135083731502528, "grad_norm": 0.10544317960739136, "learning_rate": 0.002, "loss": 2.334, "step": 236310 }, { "epoch": 0.913547030353636, "grad_norm": 0.10530325025320053, "learning_rate": 0.002, "loss": 2.3254, "step": 236320 }, { "epoch": 0.9135856875570194, "grad_norm": 0.11673657596111298, "learning_rate": 0.002, "loss": 2.3459, "step": 236330 }, { "epoch": 0.9136243447604027, "grad_norm": 0.10748418420553207, "learning_rate": 0.002, "loss": 2.3506, "step": 236340 }, { "epoch": 0.9136630019637859, "grad_norm": 0.10341091454029083, "learning_rate": 0.002, "loss": 2.3337, "step": 236350 }, { "epoch": 0.9137016591671692, "grad_norm": 0.09703026711940765, "learning_rate": 0.002, "loss": 2.3414, "step": 236360 }, { "epoch": 0.9137403163705525, "grad_norm": 0.12050410360097885, "learning_rate": 0.002, "loss": 2.3407, "step": 236370 }, { "epoch": 0.9137789735739358, "grad_norm": 0.09898030012845993, "learning_rate": 0.002, "loss": 2.3312, "step": 236380 }, { "epoch": 0.913817630777319, "grad_norm": 0.12472639232873917, "learning_rate": 0.002, "loss": 2.3467, "step": 236390 }, { "epoch": 0.9138562879807023, "grad_norm": 0.10752347111701965, "learning_rate": 0.002, "loss": 2.3391, "step": 236400 }, { "epoch": 0.9138949451840856, "grad_norm": 0.10269351303577423, "learning_rate": 0.002, "loss": 2.3347, "step": 236410 }, { "epoch": 0.9139336023874689, "grad_norm": 0.12358276546001434, "learning_rate": 0.002, "loss": 2.3211, "step": 236420 }, { "epoch": 0.9139722595908522, "grad_norm": 0.10591723769903183, "learning_rate": 0.002, "loss": 2.3376, "step": 236430 }, { "epoch": 0.9140109167942354, "grad_norm": 0.11765004694461823, "learning_rate": 0.002, "loss": 2.3198, "step": 236440 }, { "epoch": 0.9140495739976188, "grad_norm": 0.10490332543849945, "learning_rate": 0.002, "loss": 2.3324, "step": 236450 }, { "epoch": 0.914088231201002, "grad_norm": 0.09827043116092682, "learning_rate": 0.002, "loss": 2.3215, "step": 236460 }, { "epoch": 0.9141268884043853, "grad_norm": 0.1087154895067215, "learning_rate": 0.002, "loss": 2.3463, "step": 236470 }, { "epoch": 0.9141655456077685, "grad_norm": 0.10880590975284576, "learning_rate": 0.002, "loss": 2.343, "step": 236480 }, { "epoch": 0.9142042028111518, "grad_norm": 0.10290855914354324, "learning_rate": 0.002, "loss": 2.3341, "step": 236490 }, { "epoch": 0.9142428600145351, "grad_norm": 0.10896014422178268, "learning_rate": 0.002, "loss": 2.352, "step": 236500 }, { "epoch": 0.9142815172179184, "grad_norm": 0.12035848945379257, "learning_rate": 0.002, "loss": 2.3464, "step": 236510 }, { "epoch": 0.9143201744213016, "grad_norm": 0.10809791088104248, "learning_rate": 0.002, "loss": 2.3397, "step": 236520 }, { "epoch": 0.9143588316246849, "grad_norm": 0.10620492696762085, "learning_rate": 0.002, "loss": 2.3358, "step": 236530 }, { "epoch": 0.9143974888280683, "grad_norm": 0.10088472813367844, "learning_rate": 0.002, "loss": 2.341, "step": 236540 }, { "epoch": 0.9144361460314515, "grad_norm": 0.09428628534078598, "learning_rate": 0.002, "loss": 2.345, "step": 236550 }, { "epoch": 0.9144748032348348, "grad_norm": 0.11219991743564606, "learning_rate": 0.002, "loss": 2.3394, "step": 236560 }, { "epoch": 0.914513460438218, "grad_norm": 0.10495533049106598, "learning_rate": 0.002, "loss": 2.3399, "step": 236570 }, { "epoch": 0.9145521176416014, "grad_norm": 0.10331616550683975, "learning_rate": 0.002, "loss": 2.3489, "step": 236580 }, { "epoch": 0.9145907748449846, "grad_norm": 0.1208433285355568, "learning_rate": 0.002, "loss": 2.3441, "step": 236590 }, { "epoch": 0.9146294320483679, "grad_norm": 0.10469435900449753, "learning_rate": 0.002, "loss": 2.3333, "step": 236600 }, { "epoch": 0.9146680892517511, "grad_norm": 0.10302191227674484, "learning_rate": 0.002, "loss": 2.3198, "step": 236610 }, { "epoch": 0.9147067464551345, "grad_norm": 0.10604588687419891, "learning_rate": 0.002, "loss": 2.3491, "step": 236620 }, { "epoch": 0.9147454036585178, "grad_norm": 0.10278229415416718, "learning_rate": 0.002, "loss": 2.3405, "step": 236630 }, { "epoch": 0.914784060861901, "grad_norm": 0.10099917650222778, "learning_rate": 0.002, "loss": 2.3519, "step": 236640 }, { "epoch": 0.9148227180652843, "grad_norm": 0.10236818343400955, "learning_rate": 0.002, "loss": 2.3482, "step": 236650 }, { "epoch": 0.9148613752686675, "grad_norm": 0.1070743128657341, "learning_rate": 0.002, "loss": 2.3366, "step": 236660 }, { "epoch": 0.9149000324720509, "grad_norm": 0.10761519521474838, "learning_rate": 0.002, "loss": 2.3344, "step": 236670 }, { "epoch": 0.9149386896754341, "grad_norm": 0.1201874390244484, "learning_rate": 0.002, "loss": 2.3365, "step": 236680 }, { "epoch": 0.9149773468788174, "grad_norm": 0.10888718068599701, "learning_rate": 0.002, "loss": 2.3245, "step": 236690 }, { "epoch": 0.9150160040822006, "grad_norm": 0.10654252767562866, "learning_rate": 0.002, "loss": 2.3299, "step": 236700 }, { "epoch": 0.915054661285584, "grad_norm": 0.10304878652095795, "learning_rate": 0.002, "loss": 2.3468, "step": 236710 }, { "epoch": 0.9150933184889672, "grad_norm": 0.10618226230144501, "learning_rate": 0.002, "loss": 2.3441, "step": 236720 }, { "epoch": 0.9151319756923505, "grad_norm": 0.09469097852706909, "learning_rate": 0.002, "loss": 2.3372, "step": 236730 }, { "epoch": 0.9151706328957337, "grad_norm": 0.10468887537717819, "learning_rate": 0.002, "loss": 2.3501, "step": 236740 }, { "epoch": 0.9152092900991171, "grad_norm": 0.1156468614935875, "learning_rate": 0.002, "loss": 2.341, "step": 236750 }, { "epoch": 0.9152479473025004, "grad_norm": 0.11189986765384674, "learning_rate": 0.002, "loss": 2.3346, "step": 236760 }, { "epoch": 0.9152866045058836, "grad_norm": 0.10088913142681122, "learning_rate": 0.002, "loss": 2.3445, "step": 236770 }, { "epoch": 0.9153252617092669, "grad_norm": 0.09479598701000214, "learning_rate": 0.002, "loss": 2.3348, "step": 236780 }, { "epoch": 0.9153639189126502, "grad_norm": 0.09515275061130524, "learning_rate": 0.002, "loss": 2.3392, "step": 236790 }, { "epoch": 0.9154025761160335, "grad_norm": 0.10245562344789505, "learning_rate": 0.002, "loss": 2.331, "step": 236800 }, { "epoch": 0.9154412333194167, "grad_norm": 0.10947719216346741, "learning_rate": 0.002, "loss": 2.3504, "step": 236810 }, { "epoch": 0.9154798905228, "grad_norm": 0.10580668598413467, "learning_rate": 0.002, "loss": 2.3452, "step": 236820 }, { "epoch": 0.9155185477261834, "grad_norm": 0.09530249238014221, "learning_rate": 0.002, "loss": 2.3268, "step": 236830 }, { "epoch": 0.9155572049295666, "grad_norm": 0.13017530739307404, "learning_rate": 0.002, "loss": 2.338, "step": 236840 }, { "epoch": 0.9155958621329499, "grad_norm": 0.09841717034578323, "learning_rate": 0.002, "loss": 2.3526, "step": 236850 }, { "epoch": 0.9156345193363331, "grad_norm": 0.11240272223949432, "learning_rate": 0.002, "loss": 2.3328, "step": 236860 }, { "epoch": 0.9156731765397164, "grad_norm": 0.10746175050735474, "learning_rate": 0.002, "loss": 2.3511, "step": 236870 }, { "epoch": 0.9157118337430997, "grad_norm": 0.10742993652820587, "learning_rate": 0.002, "loss": 2.3271, "step": 236880 }, { "epoch": 0.915750490946483, "grad_norm": 0.10194318741559982, "learning_rate": 0.002, "loss": 2.3365, "step": 236890 }, { "epoch": 0.9157891481498662, "grad_norm": 0.10081067681312561, "learning_rate": 0.002, "loss": 2.3209, "step": 236900 }, { "epoch": 0.9158278053532495, "grad_norm": 0.10650233179330826, "learning_rate": 0.002, "loss": 2.3394, "step": 236910 }, { "epoch": 0.9158664625566328, "grad_norm": 0.10834755748510361, "learning_rate": 0.002, "loss": 2.3437, "step": 236920 }, { "epoch": 0.9159051197600161, "grad_norm": 0.1015695258975029, "learning_rate": 0.002, "loss": 2.3432, "step": 236930 }, { "epoch": 0.9159437769633993, "grad_norm": 0.10104040801525116, "learning_rate": 0.002, "loss": 2.3481, "step": 236940 }, { "epoch": 0.9159824341667826, "grad_norm": 0.11582430452108383, "learning_rate": 0.002, "loss": 2.3362, "step": 236950 }, { "epoch": 0.916021091370166, "grad_norm": 0.11996284872293472, "learning_rate": 0.002, "loss": 2.3315, "step": 236960 }, { "epoch": 0.9160597485735492, "grad_norm": 0.09137024730443954, "learning_rate": 0.002, "loss": 2.3398, "step": 236970 }, { "epoch": 0.9160984057769325, "grad_norm": 0.17459522187709808, "learning_rate": 0.002, "loss": 2.3383, "step": 236980 }, { "epoch": 0.9161370629803157, "grad_norm": 0.18123406171798706, "learning_rate": 0.002, "loss": 2.3544, "step": 236990 }, { "epoch": 0.9161757201836991, "grad_norm": 0.11360806226730347, "learning_rate": 0.002, "loss": 2.3394, "step": 237000 }, { "epoch": 0.9162143773870823, "grad_norm": 0.1017279401421547, "learning_rate": 0.002, "loss": 2.3451, "step": 237010 }, { "epoch": 0.9162530345904656, "grad_norm": 0.09900280088186264, "learning_rate": 0.002, "loss": 2.3519, "step": 237020 }, { "epoch": 0.9162916917938488, "grad_norm": 0.09855163097381592, "learning_rate": 0.002, "loss": 2.3472, "step": 237030 }, { "epoch": 0.9163303489972321, "grad_norm": 0.10126887261867523, "learning_rate": 0.002, "loss": 2.3335, "step": 237040 }, { "epoch": 0.9163690062006155, "grad_norm": 0.13277678191661835, "learning_rate": 0.002, "loss": 2.337, "step": 237050 }, { "epoch": 0.9164076634039987, "grad_norm": 0.11973906308412552, "learning_rate": 0.002, "loss": 2.3554, "step": 237060 }, { "epoch": 0.916446320607382, "grad_norm": 0.09777180850505829, "learning_rate": 0.002, "loss": 2.3353, "step": 237070 }, { "epoch": 0.9164849778107652, "grad_norm": 0.09020551294088364, "learning_rate": 0.002, "loss": 2.3218, "step": 237080 }, { "epoch": 0.9165236350141486, "grad_norm": 0.11669912189245224, "learning_rate": 0.002, "loss": 2.3457, "step": 237090 }, { "epoch": 0.9165622922175318, "grad_norm": 0.10898596793413162, "learning_rate": 0.002, "loss": 2.3609, "step": 237100 }, { "epoch": 0.9166009494209151, "grad_norm": 0.09796357899904251, "learning_rate": 0.002, "loss": 2.3465, "step": 237110 }, { "epoch": 0.9166396066242983, "grad_norm": 0.10369439423084259, "learning_rate": 0.002, "loss": 2.3433, "step": 237120 }, { "epoch": 0.9166782638276817, "grad_norm": 0.09319931268692017, "learning_rate": 0.002, "loss": 2.3251, "step": 237130 }, { "epoch": 0.916716921031065, "grad_norm": 0.08715303987264633, "learning_rate": 0.002, "loss": 2.3271, "step": 237140 }, { "epoch": 0.9167555782344482, "grad_norm": 0.09676074236631393, "learning_rate": 0.002, "loss": 2.3362, "step": 237150 }, { "epoch": 0.9167942354378315, "grad_norm": 0.10120874643325806, "learning_rate": 0.002, "loss": 2.3497, "step": 237160 }, { "epoch": 0.9168328926412148, "grad_norm": 0.10892495512962341, "learning_rate": 0.002, "loss": 2.3521, "step": 237170 }, { "epoch": 0.9168715498445981, "grad_norm": 0.10513322800397873, "learning_rate": 0.002, "loss": 2.3265, "step": 237180 }, { "epoch": 0.9169102070479813, "grad_norm": 0.11788270622491837, "learning_rate": 0.002, "loss": 2.328, "step": 237190 }, { "epoch": 0.9169488642513646, "grad_norm": 0.11247450858354568, "learning_rate": 0.002, "loss": 2.3496, "step": 237200 }, { "epoch": 0.9169875214547478, "grad_norm": 0.10070424526929855, "learning_rate": 0.002, "loss": 2.3353, "step": 237210 }, { "epoch": 0.9170261786581312, "grad_norm": 0.11775757372379303, "learning_rate": 0.002, "loss": 2.348, "step": 237220 }, { "epoch": 0.9170648358615144, "grad_norm": 0.09787221252918243, "learning_rate": 0.002, "loss": 2.3451, "step": 237230 }, { "epoch": 0.9171034930648977, "grad_norm": 0.10628794878721237, "learning_rate": 0.002, "loss": 2.3348, "step": 237240 }, { "epoch": 0.9171421502682809, "grad_norm": 0.11313777416944504, "learning_rate": 0.002, "loss": 2.3296, "step": 237250 }, { "epoch": 0.9171808074716643, "grad_norm": 0.12426883727312088, "learning_rate": 0.002, "loss": 2.3392, "step": 237260 }, { "epoch": 0.9172194646750476, "grad_norm": 0.11160732060670853, "learning_rate": 0.002, "loss": 2.3376, "step": 237270 }, { "epoch": 0.9172581218784308, "grad_norm": 0.13596603274345398, "learning_rate": 0.002, "loss": 2.3296, "step": 237280 }, { "epoch": 0.9172967790818141, "grad_norm": 0.11814679205417633, "learning_rate": 0.002, "loss": 2.3452, "step": 237290 }, { "epoch": 0.9173354362851974, "grad_norm": 0.10138362646102905, "learning_rate": 0.002, "loss": 2.3408, "step": 237300 }, { "epoch": 0.9173740934885807, "grad_norm": 0.09971548616886139, "learning_rate": 0.002, "loss": 2.3411, "step": 237310 }, { "epoch": 0.9174127506919639, "grad_norm": 0.12172117829322815, "learning_rate": 0.002, "loss": 2.3387, "step": 237320 }, { "epoch": 0.9174514078953472, "grad_norm": 0.10093490779399872, "learning_rate": 0.002, "loss": 2.3322, "step": 237330 }, { "epoch": 0.9174900650987305, "grad_norm": 0.13062350451946259, "learning_rate": 0.002, "loss": 2.3508, "step": 237340 }, { "epoch": 0.9175287223021138, "grad_norm": 0.09960246086120605, "learning_rate": 0.002, "loss": 2.3347, "step": 237350 }, { "epoch": 0.917567379505497, "grad_norm": 0.13497884571552277, "learning_rate": 0.002, "loss": 2.3582, "step": 237360 }, { "epoch": 0.9176060367088803, "grad_norm": 0.09554940462112427, "learning_rate": 0.002, "loss": 2.3437, "step": 237370 }, { "epoch": 0.9176446939122637, "grad_norm": 0.1013389453291893, "learning_rate": 0.002, "loss": 2.3383, "step": 237380 }, { "epoch": 0.9176833511156469, "grad_norm": 0.11148456484079361, "learning_rate": 0.002, "loss": 2.3531, "step": 237390 }, { "epoch": 0.9177220083190302, "grad_norm": 0.08915894478559494, "learning_rate": 0.002, "loss": 2.3381, "step": 237400 }, { "epoch": 0.9177606655224134, "grad_norm": 0.1016797125339508, "learning_rate": 0.002, "loss": 2.3371, "step": 237410 }, { "epoch": 0.9177993227257967, "grad_norm": 0.09685633331537247, "learning_rate": 0.002, "loss": 2.3236, "step": 237420 }, { "epoch": 0.91783797992918, "grad_norm": 0.11382238566875458, "learning_rate": 0.002, "loss": 2.3448, "step": 237430 }, { "epoch": 0.9178766371325633, "grad_norm": 0.11097710579633713, "learning_rate": 0.002, "loss": 2.3437, "step": 237440 }, { "epoch": 0.9179152943359465, "grad_norm": 0.09930465370416641, "learning_rate": 0.002, "loss": 2.3453, "step": 237450 }, { "epoch": 0.9179539515393298, "grad_norm": 0.11404252797365189, "learning_rate": 0.002, "loss": 2.3491, "step": 237460 }, { "epoch": 0.9179926087427132, "grad_norm": 0.10939346253871918, "learning_rate": 0.002, "loss": 2.3426, "step": 237470 }, { "epoch": 0.9180312659460964, "grad_norm": 0.11002951115369797, "learning_rate": 0.002, "loss": 2.351, "step": 237480 }, { "epoch": 0.9180699231494797, "grad_norm": 0.09177740663290024, "learning_rate": 0.002, "loss": 2.34, "step": 237490 }, { "epoch": 0.9181085803528629, "grad_norm": 0.09964168071746826, "learning_rate": 0.002, "loss": 2.3243, "step": 237500 }, { "epoch": 0.9181472375562463, "grad_norm": 0.11763204634189606, "learning_rate": 0.002, "loss": 2.3553, "step": 237510 }, { "epoch": 0.9181858947596295, "grad_norm": 0.1089107096195221, "learning_rate": 0.002, "loss": 2.3332, "step": 237520 }, { "epoch": 0.9182245519630128, "grad_norm": 0.10073132067918777, "learning_rate": 0.002, "loss": 2.3396, "step": 237530 }, { "epoch": 0.918263209166396, "grad_norm": 0.09807328134775162, "learning_rate": 0.002, "loss": 2.3257, "step": 237540 }, { "epoch": 0.9183018663697794, "grad_norm": 0.12475041300058365, "learning_rate": 0.002, "loss": 2.3529, "step": 237550 }, { "epoch": 0.9183405235731626, "grad_norm": 0.12477726489305496, "learning_rate": 0.002, "loss": 2.3338, "step": 237560 }, { "epoch": 0.9183791807765459, "grad_norm": 0.2605946362018585, "learning_rate": 0.002, "loss": 2.3335, "step": 237570 }, { "epoch": 0.9184178379799292, "grad_norm": 0.09945245087146759, "learning_rate": 0.002, "loss": 2.3477, "step": 237580 }, { "epoch": 0.9184564951833124, "grad_norm": 0.10772482305765152, "learning_rate": 0.002, "loss": 2.3216, "step": 237590 }, { "epoch": 0.9184951523866958, "grad_norm": 0.1343308538198471, "learning_rate": 0.002, "loss": 2.3552, "step": 237600 }, { "epoch": 0.918533809590079, "grad_norm": 0.10860157757997513, "learning_rate": 0.002, "loss": 2.3419, "step": 237610 }, { "epoch": 0.9185724667934623, "grad_norm": 0.23557095229625702, "learning_rate": 0.002, "loss": 2.3338, "step": 237620 }, { "epoch": 0.9186111239968455, "grad_norm": 0.09972846508026123, "learning_rate": 0.002, "loss": 2.3449, "step": 237630 }, { "epoch": 0.9186497812002289, "grad_norm": 0.1198374405503273, "learning_rate": 0.002, "loss": 2.3339, "step": 237640 }, { "epoch": 0.9186884384036121, "grad_norm": 0.09672702848911285, "learning_rate": 0.002, "loss": 2.3328, "step": 237650 }, { "epoch": 0.9187270956069954, "grad_norm": 0.10464288294315338, "learning_rate": 0.002, "loss": 2.3534, "step": 237660 }, { "epoch": 0.9187657528103786, "grad_norm": 0.11569831520318985, "learning_rate": 0.002, "loss": 2.325, "step": 237670 }, { "epoch": 0.918804410013762, "grad_norm": 0.10307703167200089, "learning_rate": 0.002, "loss": 2.3298, "step": 237680 }, { "epoch": 0.9188430672171453, "grad_norm": 0.10956721007823944, "learning_rate": 0.002, "loss": 2.3446, "step": 237690 }, { "epoch": 0.9188817244205285, "grad_norm": 0.11558805406093597, "learning_rate": 0.002, "loss": 2.3329, "step": 237700 }, { "epoch": 0.9189203816239118, "grad_norm": 0.09959175437688828, "learning_rate": 0.002, "loss": 2.3508, "step": 237710 }, { "epoch": 0.9189590388272951, "grad_norm": 0.11553435772657394, "learning_rate": 0.002, "loss": 2.3517, "step": 237720 }, { "epoch": 0.9189976960306784, "grad_norm": 0.10117180645465851, "learning_rate": 0.002, "loss": 2.3238, "step": 237730 }, { "epoch": 0.9190363532340616, "grad_norm": 0.09056204557418823, "learning_rate": 0.002, "loss": 2.329, "step": 237740 }, { "epoch": 0.9190750104374449, "grad_norm": 0.10344156622886658, "learning_rate": 0.002, "loss": 2.3274, "step": 237750 }, { "epoch": 0.9191136676408282, "grad_norm": 0.12726548314094543, "learning_rate": 0.002, "loss": 2.3341, "step": 237760 }, { "epoch": 0.9191523248442115, "grad_norm": 0.10069930553436279, "learning_rate": 0.002, "loss": 2.3386, "step": 237770 }, { "epoch": 0.9191909820475948, "grad_norm": 0.1296139508485794, "learning_rate": 0.002, "loss": 2.3413, "step": 237780 }, { "epoch": 0.919229639250978, "grad_norm": 0.10524491220712662, "learning_rate": 0.002, "loss": 2.3333, "step": 237790 }, { "epoch": 0.9192682964543613, "grad_norm": 0.08840267360210419, "learning_rate": 0.002, "loss": 2.3563, "step": 237800 }, { "epoch": 0.9193069536577446, "grad_norm": 0.10867176949977875, "learning_rate": 0.002, "loss": 2.3307, "step": 237810 }, { "epoch": 0.9193456108611279, "grad_norm": 0.10205422341823578, "learning_rate": 0.002, "loss": 2.3494, "step": 237820 }, { "epoch": 0.9193842680645111, "grad_norm": 0.0953567698597908, "learning_rate": 0.002, "loss": 2.327, "step": 237830 }, { "epoch": 0.9194229252678944, "grad_norm": 0.16813983023166656, "learning_rate": 0.002, "loss": 2.3339, "step": 237840 }, { "epoch": 0.9194615824712777, "grad_norm": 0.10395313054323196, "learning_rate": 0.002, "loss": 2.3627, "step": 237850 }, { "epoch": 0.919500239674661, "grad_norm": 0.10055957734584808, "learning_rate": 0.002, "loss": 2.3461, "step": 237860 }, { "epoch": 0.9195388968780442, "grad_norm": 0.09994012117385864, "learning_rate": 0.002, "loss": 2.3338, "step": 237870 }, { "epoch": 0.9195775540814275, "grad_norm": 0.10582417994737625, "learning_rate": 0.002, "loss": 2.3422, "step": 237880 }, { "epoch": 0.9196162112848109, "grad_norm": 0.1077481359243393, "learning_rate": 0.002, "loss": 2.3403, "step": 237890 }, { "epoch": 0.9196548684881941, "grad_norm": 0.10383408516645432, "learning_rate": 0.002, "loss": 2.3596, "step": 237900 }, { "epoch": 0.9196935256915774, "grad_norm": 0.09966985136270523, "learning_rate": 0.002, "loss": 2.3502, "step": 237910 }, { "epoch": 0.9197321828949606, "grad_norm": 0.09650438278913498, "learning_rate": 0.002, "loss": 2.3363, "step": 237920 }, { "epoch": 0.919770840098344, "grad_norm": 0.11974448710680008, "learning_rate": 0.002, "loss": 2.3361, "step": 237930 }, { "epoch": 0.9198094973017272, "grad_norm": 0.10671281069517136, "learning_rate": 0.002, "loss": 2.3438, "step": 237940 }, { "epoch": 0.9198481545051105, "grad_norm": 0.10487055033445358, "learning_rate": 0.002, "loss": 2.3486, "step": 237950 }, { "epoch": 0.9198868117084937, "grad_norm": 0.10726416110992432, "learning_rate": 0.002, "loss": 2.3593, "step": 237960 }, { "epoch": 0.919925468911877, "grad_norm": 0.10068479925394058, "learning_rate": 0.002, "loss": 2.3443, "step": 237970 }, { "epoch": 0.9199641261152603, "grad_norm": 0.10443327575922012, "learning_rate": 0.002, "loss": 2.3346, "step": 237980 }, { "epoch": 0.9200027833186436, "grad_norm": 0.09864545613527298, "learning_rate": 0.002, "loss": 2.3276, "step": 237990 }, { "epoch": 0.9200414405220269, "grad_norm": 0.09579648077487946, "learning_rate": 0.002, "loss": 2.3393, "step": 238000 }, { "epoch": 0.9200800977254101, "grad_norm": 0.12088295072317123, "learning_rate": 0.002, "loss": 2.3466, "step": 238010 }, { "epoch": 0.9201187549287935, "grad_norm": 0.11437676101922989, "learning_rate": 0.002, "loss": 2.3428, "step": 238020 }, { "epoch": 0.9201574121321767, "grad_norm": 0.09059667587280273, "learning_rate": 0.002, "loss": 2.3422, "step": 238030 }, { "epoch": 0.92019606933556, "grad_norm": 0.11837775260210037, "learning_rate": 0.002, "loss": 2.3479, "step": 238040 }, { "epoch": 0.9202347265389432, "grad_norm": 0.10595916956663132, "learning_rate": 0.002, "loss": 2.3409, "step": 238050 }, { "epoch": 0.9202733837423266, "grad_norm": 0.12788908183574677, "learning_rate": 0.002, "loss": 2.352, "step": 238060 }, { "epoch": 0.9203120409457098, "grad_norm": 0.102462038397789, "learning_rate": 0.002, "loss": 2.342, "step": 238070 }, { "epoch": 0.9203506981490931, "grad_norm": 0.10429543256759644, "learning_rate": 0.002, "loss": 2.347, "step": 238080 }, { "epoch": 0.9203893553524763, "grad_norm": 0.10784517228603363, "learning_rate": 0.002, "loss": 2.333, "step": 238090 }, { "epoch": 0.9204280125558597, "grad_norm": 0.12058625370264053, "learning_rate": 0.002, "loss": 2.3445, "step": 238100 }, { "epoch": 0.920466669759243, "grad_norm": 0.10487110912799835, "learning_rate": 0.002, "loss": 2.3349, "step": 238110 }, { "epoch": 0.9205053269626262, "grad_norm": 0.1079377681016922, "learning_rate": 0.002, "loss": 2.3247, "step": 238120 }, { "epoch": 0.9205439841660095, "grad_norm": 0.11104608327150345, "learning_rate": 0.002, "loss": 2.3457, "step": 238130 }, { "epoch": 0.9205826413693927, "grad_norm": 0.12503919005393982, "learning_rate": 0.002, "loss": 2.3512, "step": 238140 }, { "epoch": 0.9206212985727761, "grad_norm": 0.10990816354751587, "learning_rate": 0.002, "loss": 2.3567, "step": 238150 }, { "epoch": 0.9206599557761593, "grad_norm": 0.11753184348344803, "learning_rate": 0.002, "loss": 2.3198, "step": 238160 }, { "epoch": 0.9206986129795426, "grad_norm": 0.11438991874456406, "learning_rate": 0.002, "loss": 2.3344, "step": 238170 }, { "epoch": 0.9207372701829258, "grad_norm": 0.11788298934698105, "learning_rate": 0.002, "loss": 2.3318, "step": 238180 }, { "epoch": 0.9207759273863092, "grad_norm": 0.1082969680428505, "learning_rate": 0.002, "loss": 2.3319, "step": 238190 }, { "epoch": 0.9208145845896925, "grad_norm": 0.12433382868766785, "learning_rate": 0.002, "loss": 2.3447, "step": 238200 }, { "epoch": 0.9208532417930757, "grad_norm": 0.12203840911388397, "learning_rate": 0.002, "loss": 2.3319, "step": 238210 }, { "epoch": 0.920891898996459, "grad_norm": 0.128168523311615, "learning_rate": 0.002, "loss": 2.33, "step": 238220 }, { "epoch": 0.9209305561998423, "grad_norm": 0.09707853198051453, "learning_rate": 0.002, "loss": 2.3554, "step": 238230 }, { "epoch": 0.9209692134032256, "grad_norm": 0.12274660915136337, "learning_rate": 0.002, "loss": 2.3389, "step": 238240 }, { "epoch": 0.9210078706066088, "grad_norm": 0.10505618900060654, "learning_rate": 0.002, "loss": 2.333, "step": 238250 }, { "epoch": 0.9210465278099921, "grad_norm": 0.08565858006477356, "learning_rate": 0.002, "loss": 2.3417, "step": 238260 }, { "epoch": 0.9210851850133754, "grad_norm": 0.10795660316944122, "learning_rate": 0.002, "loss": 2.3504, "step": 238270 }, { "epoch": 0.9211238422167587, "grad_norm": 0.1365453600883484, "learning_rate": 0.002, "loss": 2.343, "step": 238280 }, { "epoch": 0.921162499420142, "grad_norm": 0.11069447547197342, "learning_rate": 0.002, "loss": 2.3535, "step": 238290 }, { "epoch": 0.9212011566235252, "grad_norm": 0.11392568796873093, "learning_rate": 0.002, "loss": 2.3315, "step": 238300 }, { "epoch": 0.9212398138269086, "grad_norm": 0.11084969341754913, "learning_rate": 0.002, "loss": 2.3374, "step": 238310 }, { "epoch": 0.9212784710302918, "grad_norm": 0.1112273707985878, "learning_rate": 0.002, "loss": 2.3329, "step": 238320 }, { "epoch": 0.9213171282336751, "grad_norm": 0.10770048201084137, "learning_rate": 0.002, "loss": 2.3351, "step": 238330 }, { "epoch": 0.9213557854370583, "grad_norm": 0.10765349119901657, "learning_rate": 0.002, "loss": 2.3367, "step": 238340 }, { "epoch": 0.9213944426404416, "grad_norm": 0.10025139898061752, "learning_rate": 0.002, "loss": 2.3427, "step": 238350 }, { "epoch": 0.9214330998438249, "grad_norm": 0.1013595461845398, "learning_rate": 0.002, "loss": 2.3341, "step": 238360 }, { "epoch": 0.9214717570472082, "grad_norm": 0.09769086539745331, "learning_rate": 0.002, "loss": 2.331, "step": 238370 }, { "epoch": 0.9215104142505914, "grad_norm": 0.09939318150281906, "learning_rate": 0.002, "loss": 2.3359, "step": 238380 }, { "epoch": 0.9215490714539747, "grad_norm": 0.10779769718647003, "learning_rate": 0.002, "loss": 2.3494, "step": 238390 }, { "epoch": 0.921587728657358, "grad_norm": 0.11342509835958481, "learning_rate": 0.002, "loss": 2.3404, "step": 238400 }, { "epoch": 0.9216263858607413, "grad_norm": 0.10702987760305405, "learning_rate": 0.002, "loss": 2.3353, "step": 238410 }, { "epoch": 0.9216650430641246, "grad_norm": 0.10754323750734329, "learning_rate": 0.002, "loss": 2.3512, "step": 238420 }, { "epoch": 0.9217037002675078, "grad_norm": 0.10378240048885345, "learning_rate": 0.002, "loss": 2.3351, "step": 238430 }, { "epoch": 0.9217423574708912, "grad_norm": 0.09795913845300674, "learning_rate": 0.002, "loss": 2.3278, "step": 238440 }, { "epoch": 0.9217810146742744, "grad_norm": 0.10659509152173996, "learning_rate": 0.002, "loss": 2.3342, "step": 238450 }, { "epoch": 0.9218196718776577, "grad_norm": 0.1028730571269989, "learning_rate": 0.002, "loss": 2.3131, "step": 238460 }, { "epoch": 0.9218583290810409, "grad_norm": 0.11338159441947937, "learning_rate": 0.002, "loss": 2.3406, "step": 238470 }, { "epoch": 0.9218969862844243, "grad_norm": 0.0938754603266716, "learning_rate": 0.002, "loss": 2.3249, "step": 238480 }, { "epoch": 0.9219356434878075, "grad_norm": 0.10216149687767029, "learning_rate": 0.002, "loss": 2.3368, "step": 238490 }, { "epoch": 0.9219743006911908, "grad_norm": 0.0989452376961708, "learning_rate": 0.002, "loss": 2.3488, "step": 238500 }, { "epoch": 0.922012957894574, "grad_norm": 0.10219037532806396, "learning_rate": 0.002, "loss": 2.3306, "step": 238510 }, { "epoch": 0.9220516150979573, "grad_norm": 0.11570502817630768, "learning_rate": 0.002, "loss": 2.345, "step": 238520 }, { "epoch": 0.9220902723013407, "grad_norm": 0.10561752319335938, "learning_rate": 0.002, "loss": 2.3355, "step": 238530 }, { "epoch": 0.9221289295047239, "grad_norm": 0.1113727018237114, "learning_rate": 0.002, "loss": 2.3363, "step": 238540 }, { "epoch": 0.9221675867081072, "grad_norm": 0.11061765998601913, "learning_rate": 0.002, "loss": 2.3466, "step": 238550 }, { "epoch": 0.9222062439114904, "grad_norm": 0.10693425685167313, "learning_rate": 0.002, "loss": 2.3307, "step": 238560 }, { "epoch": 0.9222449011148738, "grad_norm": 0.1509937047958374, "learning_rate": 0.002, "loss": 2.3354, "step": 238570 }, { "epoch": 0.922283558318257, "grad_norm": 0.10090623050928116, "learning_rate": 0.002, "loss": 2.3286, "step": 238580 }, { "epoch": 0.9223222155216403, "grad_norm": 0.10902487486600876, "learning_rate": 0.002, "loss": 2.3445, "step": 238590 }, { "epoch": 0.9223608727250235, "grad_norm": 0.10726834833621979, "learning_rate": 0.002, "loss": 2.313, "step": 238600 }, { "epoch": 0.9223995299284069, "grad_norm": 0.12696325778961182, "learning_rate": 0.002, "loss": 2.3415, "step": 238610 }, { "epoch": 0.9224381871317902, "grad_norm": 0.10057271271944046, "learning_rate": 0.002, "loss": 2.3472, "step": 238620 }, { "epoch": 0.9224768443351734, "grad_norm": 0.10777167975902557, "learning_rate": 0.002, "loss": 2.3363, "step": 238630 }, { "epoch": 0.9225155015385567, "grad_norm": 0.1140720397233963, "learning_rate": 0.002, "loss": 2.3251, "step": 238640 }, { "epoch": 0.92255415874194, "grad_norm": 0.09188432991504669, "learning_rate": 0.002, "loss": 2.3339, "step": 238650 }, { "epoch": 0.9225928159453233, "grad_norm": 0.10969416052103043, "learning_rate": 0.002, "loss": 2.3403, "step": 238660 }, { "epoch": 0.9226314731487065, "grad_norm": 0.09891535341739655, "learning_rate": 0.002, "loss": 2.3398, "step": 238670 }, { "epoch": 0.9226701303520898, "grad_norm": 0.09819822758436203, "learning_rate": 0.002, "loss": 2.3459, "step": 238680 }, { "epoch": 0.9227087875554731, "grad_norm": 0.09815497696399689, "learning_rate": 0.002, "loss": 2.3375, "step": 238690 }, { "epoch": 0.9227474447588564, "grad_norm": 0.1160065159201622, "learning_rate": 0.002, "loss": 2.3437, "step": 238700 }, { "epoch": 0.9227861019622396, "grad_norm": 0.11586955189704895, "learning_rate": 0.002, "loss": 2.331, "step": 238710 }, { "epoch": 0.9228247591656229, "grad_norm": 0.12375964969396591, "learning_rate": 0.002, "loss": 2.3486, "step": 238720 }, { "epoch": 0.9228634163690062, "grad_norm": 0.10358904302120209, "learning_rate": 0.002, "loss": 2.3338, "step": 238730 }, { "epoch": 0.9229020735723895, "grad_norm": 0.09508515149354935, "learning_rate": 0.002, "loss": 2.3433, "step": 238740 }, { "epoch": 0.9229407307757728, "grad_norm": 0.09402194619178772, "learning_rate": 0.002, "loss": 2.3352, "step": 238750 }, { "epoch": 0.922979387979156, "grad_norm": 0.11810307204723358, "learning_rate": 0.002, "loss": 2.3357, "step": 238760 }, { "epoch": 0.9230180451825393, "grad_norm": 0.11552152037620544, "learning_rate": 0.002, "loss": 2.3305, "step": 238770 }, { "epoch": 0.9230567023859226, "grad_norm": 0.10606750100851059, "learning_rate": 0.002, "loss": 2.3425, "step": 238780 }, { "epoch": 0.9230953595893059, "grad_norm": 0.08817689120769501, "learning_rate": 0.002, "loss": 2.3397, "step": 238790 }, { "epoch": 0.9231340167926891, "grad_norm": 0.11065953969955444, "learning_rate": 0.002, "loss": 2.3505, "step": 238800 }, { "epoch": 0.9231726739960724, "grad_norm": 0.1170061007142067, "learning_rate": 0.002, "loss": 2.3383, "step": 238810 }, { "epoch": 0.9232113311994558, "grad_norm": 0.11106377840042114, "learning_rate": 0.002, "loss": 2.3345, "step": 238820 }, { "epoch": 0.923249988402839, "grad_norm": 0.10830987244844437, "learning_rate": 0.002, "loss": 2.3561, "step": 238830 }, { "epoch": 0.9232886456062223, "grad_norm": 0.11701057106256485, "learning_rate": 0.002, "loss": 2.3478, "step": 238840 }, { "epoch": 0.9233273028096055, "grad_norm": 0.09716720879077911, "learning_rate": 0.002, "loss": 2.3349, "step": 238850 }, { "epoch": 0.9233659600129889, "grad_norm": 0.0941588282585144, "learning_rate": 0.002, "loss": 2.3334, "step": 238860 }, { "epoch": 0.9234046172163721, "grad_norm": 0.09816788882017136, "learning_rate": 0.002, "loss": 2.3486, "step": 238870 }, { "epoch": 0.9234432744197554, "grad_norm": 0.11149384081363678, "learning_rate": 0.002, "loss": 2.3316, "step": 238880 }, { "epoch": 0.9234819316231386, "grad_norm": 0.11008346080780029, "learning_rate": 0.002, "loss": 2.3306, "step": 238890 }, { "epoch": 0.9235205888265219, "grad_norm": 0.10621821135282516, "learning_rate": 0.002, "loss": 2.3425, "step": 238900 }, { "epoch": 0.9235592460299052, "grad_norm": 0.09759090095758438, "learning_rate": 0.002, "loss": 2.3294, "step": 238910 }, { "epoch": 0.9235979032332885, "grad_norm": 0.10635235160589218, "learning_rate": 0.002, "loss": 2.3154, "step": 238920 }, { "epoch": 0.9236365604366717, "grad_norm": 0.10297021269798279, "learning_rate": 0.002, "loss": 2.3391, "step": 238930 }, { "epoch": 0.923675217640055, "grad_norm": 0.1188259944319725, "learning_rate": 0.002, "loss": 2.345, "step": 238940 }, { "epoch": 0.9237138748434384, "grad_norm": 0.11682503670454025, "learning_rate": 0.002, "loss": 2.3374, "step": 238950 }, { "epoch": 0.9237525320468216, "grad_norm": 0.11106524616479874, "learning_rate": 0.002, "loss": 2.3504, "step": 238960 }, { "epoch": 0.9237911892502049, "grad_norm": 0.09653395414352417, "learning_rate": 0.002, "loss": 2.348, "step": 238970 }, { "epoch": 0.9238298464535881, "grad_norm": 0.11367765069007874, "learning_rate": 0.002, "loss": 2.3251, "step": 238980 }, { "epoch": 0.9238685036569715, "grad_norm": 0.09777499735355377, "learning_rate": 0.002, "loss": 2.3432, "step": 238990 }, { "epoch": 0.9239071608603547, "grad_norm": 0.10271391272544861, "learning_rate": 0.002, "loss": 2.3404, "step": 239000 }, { "epoch": 0.923945818063738, "grad_norm": 0.13675880432128906, "learning_rate": 0.002, "loss": 2.3372, "step": 239010 }, { "epoch": 0.9239844752671212, "grad_norm": 0.09634064137935638, "learning_rate": 0.002, "loss": 2.3353, "step": 239020 }, { "epoch": 0.9240231324705046, "grad_norm": 0.14566011726856232, "learning_rate": 0.002, "loss": 2.3377, "step": 239030 }, { "epoch": 0.9240617896738879, "grad_norm": 0.10591859370470047, "learning_rate": 0.002, "loss": 2.3468, "step": 239040 }, { "epoch": 0.9241004468772711, "grad_norm": 0.11468186974525452, "learning_rate": 0.002, "loss": 2.3289, "step": 239050 }, { "epoch": 0.9241391040806544, "grad_norm": 0.11868032813072205, "learning_rate": 0.002, "loss": 2.3323, "step": 239060 }, { "epoch": 0.9241777612840376, "grad_norm": 0.24066656827926636, "learning_rate": 0.002, "loss": 2.3543, "step": 239070 }, { "epoch": 0.924216418487421, "grad_norm": 0.10723575204610825, "learning_rate": 0.002, "loss": 2.335, "step": 239080 }, { "epoch": 0.9242550756908042, "grad_norm": 0.14120464026927948, "learning_rate": 0.002, "loss": 2.3417, "step": 239090 }, { "epoch": 0.9242937328941875, "grad_norm": 0.11646062880754471, "learning_rate": 0.002, "loss": 2.3468, "step": 239100 }, { "epoch": 0.9243323900975707, "grad_norm": 0.11130773276090622, "learning_rate": 0.002, "loss": 2.3517, "step": 239110 }, { "epoch": 0.9243710473009541, "grad_norm": 0.12386900931596756, "learning_rate": 0.002, "loss": 2.3474, "step": 239120 }, { "epoch": 0.9244097045043373, "grad_norm": 0.09747873246669769, "learning_rate": 0.002, "loss": 2.3431, "step": 239130 }, { "epoch": 0.9244483617077206, "grad_norm": 0.12462751567363739, "learning_rate": 0.002, "loss": 2.33, "step": 239140 }, { "epoch": 0.9244870189111039, "grad_norm": 0.09987830370664597, "learning_rate": 0.002, "loss": 2.3513, "step": 239150 }, { "epoch": 0.9245256761144872, "grad_norm": 0.08740975707769394, "learning_rate": 0.002, "loss": 2.3315, "step": 239160 }, { "epoch": 0.9245643333178705, "grad_norm": 0.10647254437208176, "learning_rate": 0.002, "loss": 2.3511, "step": 239170 }, { "epoch": 0.9246029905212537, "grad_norm": 0.11191874742507935, "learning_rate": 0.002, "loss": 2.3531, "step": 239180 }, { "epoch": 0.924641647724637, "grad_norm": 0.10716775804758072, "learning_rate": 0.002, "loss": 2.3452, "step": 239190 }, { "epoch": 0.9246803049280203, "grad_norm": 0.11306207627058029, "learning_rate": 0.002, "loss": 2.3337, "step": 239200 }, { "epoch": 0.9247189621314036, "grad_norm": 0.11484642326831818, "learning_rate": 0.002, "loss": 2.3381, "step": 239210 }, { "epoch": 0.9247576193347868, "grad_norm": 0.12202706187963486, "learning_rate": 0.002, "loss": 2.3392, "step": 239220 }, { "epoch": 0.9247962765381701, "grad_norm": 0.10952723026275635, "learning_rate": 0.002, "loss": 2.3176, "step": 239230 }, { "epoch": 0.9248349337415535, "grad_norm": 0.09301014989614487, "learning_rate": 0.002, "loss": 2.3252, "step": 239240 }, { "epoch": 0.9248735909449367, "grad_norm": 0.12277715653181076, "learning_rate": 0.002, "loss": 2.3361, "step": 239250 }, { "epoch": 0.92491224814832, "grad_norm": 0.1088462620973587, "learning_rate": 0.002, "loss": 2.3428, "step": 239260 }, { "epoch": 0.9249509053517032, "grad_norm": 0.10005609691143036, "learning_rate": 0.002, "loss": 2.3294, "step": 239270 }, { "epoch": 0.9249895625550865, "grad_norm": 0.09543203562498093, "learning_rate": 0.002, "loss": 2.3502, "step": 239280 }, { "epoch": 0.9250282197584698, "grad_norm": 0.11769011616706848, "learning_rate": 0.002, "loss": 2.3449, "step": 239290 }, { "epoch": 0.9250668769618531, "grad_norm": 0.09605714678764343, "learning_rate": 0.002, "loss": 2.33, "step": 239300 }, { "epoch": 0.9251055341652363, "grad_norm": 0.10205802321434021, "learning_rate": 0.002, "loss": 2.3408, "step": 239310 }, { "epoch": 0.9251441913686196, "grad_norm": 0.10329537838697433, "learning_rate": 0.002, "loss": 2.3461, "step": 239320 }, { "epoch": 0.925182848572003, "grad_norm": 0.11891063302755356, "learning_rate": 0.002, "loss": 2.3355, "step": 239330 }, { "epoch": 0.9252215057753862, "grad_norm": 0.12052151560783386, "learning_rate": 0.002, "loss": 2.3369, "step": 239340 }, { "epoch": 0.9252601629787695, "grad_norm": 0.12861767411231995, "learning_rate": 0.002, "loss": 2.329, "step": 239350 }, { "epoch": 0.9252988201821527, "grad_norm": 0.09616052359342575, "learning_rate": 0.002, "loss": 2.3376, "step": 239360 }, { "epoch": 0.9253374773855361, "grad_norm": 0.09918008744716644, "learning_rate": 0.002, "loss": 2.3273, "step": 239370 }, { "epoch": 0.9253761345889193, "grad_norm": 0.1037401482462883, "learning_rate": 0.002, "loss": 2.3576, "step": 239380 }, { "epoch": 0.9254147917923026, "grad_norm": 0.10741420835256577, "learning_rate": 0.002, "loss": 2.3472, "step": 239390 }, { "epoch": 0.9254534489956858, "grad_norm": 0.10255386680364609, "learning_rate": 0.002, "loss": 2.3467, "step": 239400 }, { "epoch": 0.9254921061990692, "grad_norm": 0.11019117385149002, "learning_rate": 0.002, "loss": 2.3454, "step": 239410 }, { "epoch": 0.9255307634024524, "grad_norm": 0.10579998791217804, "learning_rate": 0.002, "loss": 2.3376, "step": 239420 }, { "epoch": 0.9255694206058357, "grad_norm": 0.1270904242992401, "learning_rate": 0.002, "loss": 2.3428, "step": 239430 }, { "epoch": 0.9256080778092189, "grad_norm": 0.17731136083602905, "learning_rate": 0.002, "loss": 2.321, "step": 239440 }, { "epoch": 0.9256467350126022, "grad_norm": 0.09464423358440399, "learning_rate": 0.002, "loss": 2.3425, "step": 239450 }, { "epoch": 0.9256853922159856, "grad_norm": 0.10702194273471832, "learning_rate": 0.002, "loss": 2.3363, "step": 239460 }, { "epoch": 0.9257240494193688, "grad_norm": 0.10974325239658356, "learning_rate": 0.002, "loss": 2.3394, "step": 239470 }, { "epoch": 0.9257627066227521, "grad_norm": 0.10220817476511002, "learning_rate": 0.002, "loss": 2.3301, "step": 239480 }, { "epoch": 0.9258013638261353, "grad_norm": 0.10747115314006805, "learning_rate": 0.002, "loss": 2.343, "step": 239490 }, { "epoch": 0.9258400210295187, "grad_norm": 0.09978757798671722, "learning_rate": 0.002, "loss": 2.3425, "step": 239500 }, { "epoch": 0.9258786782329019, "grad_norm": 0.11532643437385559, "learning_rate": 0.002, "loss": 2.3382, "step": 239510 }, { "epoch": 0.9259173354362852, "grad_norm": 0.09514863044023514, "learning_rate": 0.002, "loss": 2.3135, "step": 239520 }, { "epoch": 0.9259559926396684, "grad_norm": 0.09954030066728592, "learning_rate": 0.002, "loss": 2.3284, "step": 239530 }, { "epoch": 0.9259946498430518, "grad_norm": 0.12030558288097382, "learning_rate": 0.002, "loss": 2.356, "step": 239540 }, { "epoch": 0.926033307046435, "grad_norm": 0.12196508795022964, "learning_rate": 0.002, "loss": 2.3272, "step": 239550 }, { "epoch": 0.9260719642498183, "grad_norm": 0.09463625401258469, "learning_rate": 0.002, "loss": 2.3433, "step": 239560 }, { "epoch": 0.9261106214532016, "grad_norm": 0.11292003840208054, "learning_rate": 0.002, "loss": 2.3352, "step": 239570 }, { "epoch": 0.9261492786565849, "grad_norm": 0.10972633212804794, "learning_rate": 0.002, "loss": 2.3299, "step": 239580 }, { "epoch": 0.9261879358599682, "grad_norm": 0.11431489139795303, "learning_rate": 0.002, "loss": 2.3402, "step": 239590 }, { "epoch": 0.9262265930633514, "grad_norm": 0.1135575994849205, "learning_rate": 0.002, "loss": 2.3438, "step": 239600 }, { "epoch": 0.9262652502667347, "grad_norm": 0.13443417847156525, "learning_rate": 0.002, "loss": 2.3384, "step": 239610 }, { "epoch": 0.9263039074701179, "grad_norm": 0.130362406373024, "learning_rate": 0.002, "loss": 2.3406, "step": 239620 }, { "epoch": 0.9263425646735013, "grad_norm": 0.09769725054502487, "learning_rate": 0.002, "loss": 2.3244, "step": 239630 }, { "epoch": 0.9263812218768845, "grad_norm": 0.11235304176807404, "learning_rate": 0.002, "loss": 2.3455, "step": 239640 }, { "epoch": 0.9264198790802678, "grad_norm": 0.11400007456541061, "learning_rate": 0.002, "loss": 2.3418, "step": 239650 }, { "epoch": 0.926458536283651, "grad_norm": 0.11323840916156769, "learning_rate": 0.002, "loss": 2.3327, "step": 239660 }, { "epoch": 0.9264971934870344, "grad_norm": 0.10057419538497925, "learning_rate": 0.002, "loss": 2.3328, "step": 239670 }, { "epoch": 0.9265358506904177, "grad_norm": 0.0986354649066925, "learning_rate": 0.002, "loss": 2.3379, "step": 239680 }, { "epoch": 0.9265745078938009, "grad_norm": 0.14333440363407135, "learning_rate": 0.002, "loss": 2.3446, "step": 239690 }, { "epoch": 0.9266131650971842, "grad_norm": 0.10923615843057632, "learning_rate": 0.002, "loss": 2.3351, "step": 239700 }, { "epoch": 0.9266518223005675, "grad_norm": 0.10601142793893814, "learning_rate": 0.002, "loss": 2.3339, "step": 239710 }, { "epoch": 0.9266904795039508, "grad_norm": 0.117704838514328, "learning_rate": 0.002, "loss": 2.3411, "step": 239720 }, { "epoch": 0.926729136707334, "grad_norm": 0.1263555884361267, "learning_rate": 0.002, "loss": 2.334, "step": 239730 }, { "epoch": 0.9267677939107173, "grad_norm": 0.10091309249401093, "learning_rate": 0.002, "loss": 2.3393, "step": 239740 }, { "epoch": 0.9268064511141006, "grad_norm": 0.11265761405229568, "learning_rate": 0.002, "loss": 2.3393, "step": 239750 }, { "epoch": 0.9268451083174839, "grad_norm": 0.09833136200904846, "learning_rate": 0.002, "loss": 2.3414, "step": 239760 }, { "epoch": 0.9268837655208672, "grad_norm": 0.0956064909696579, "learning_rate": 0.002, "loss": 2.3581, "step": 239770 }, { "epoch": 0.9269224227242504, "grad_norm": 0.11916255205869675, "learning_rate": 0.002, "loss": 2.3172, "step": 239780 }, { "epoch": 0.9269610799276338, "grad_norm": 0.11998631805181503, "learning_rate": 0.002, "loss": 2.3421, "step": 239790 }, { "epoch": 0.926999737131017, "grad_norm": 0.10860449075698853, "learning_rate": 0.002, "loss": 2.3407, "step": 239800 }, { "epoch": 0.9270383943344003, "grad_norm": 0.10798289626836777, "learning_rate": 0.002, "loss": 2.3353, "step": 239810 }, { "epoch": 0.9270770515377835, "grad_norm": 0.10168136656284332, "learning_rate": 0.002, "loss": 2.3506, "step": 239820 }, { "epoch": 0.9271157087411668, "grad_norm": 0.10898247361183167, "learning_rate": 0.002, "loss": 2.3486, "step": 239830 }, { "epoch": 0.9271543659445501, "grad_norm": 0.10820508748292923, "learning_rate": 0.002, "loss": 2.3513, "step": 239840 }, { "epoch": 0.9271930231479334, "grad_norm": 0.11451074481010437, "learning_rate": 0.002, "loss": 2.3365, "step": 239850 }, { "epoch": 0.9272316803513166, "grad_norm": 0.0991031676530838, "learning_rate": 0.002, "loss": 2.3402, "step": 239860 }, { "epoch": 0.9272703375546999, "grad_norm": 0.106524758040905, "learning_rate": 0.002, "loss": 2.3354, "step": 239870 }, { "epoch": 0.9273089947580833, "grad_norm": 0.11314364522695541, "learning_rate": 0.002, "loss": 2.3454, "step": 239880 }, { "epoch": 0.9273476519614665, "grad_norm": 0.10054526478052139, "learning_rate": 0.002, "loss": 2.3355, "step": 239890 }, { "epoch": 0.9273863091648498, "grad_norm": 0.13372132182121277, "learning_rate": 0.002, "loss": 2.3479, "step": 239900 }, { "epoch": 0.927424966368233, "grad_norm": 0.09951240569353104, "learning_rate": 0.002, "loss": 2.3272, "step": 239910 }, { "epoch": 0.9274636235716164, "grad_norm": 0.10693838447332382, "learning_rate": 0.002, "loss": 2.3321, "step": 239920 }, { "epoch": 0.9275022807749996, "grad_norm": 0.11133580654859543, "learning_rate": 0.002, "loss": 2.3351, "step": 239930 }, { "epoch": 0.9275409379783829, "grad_norm": 0.11053361743688583, "learning_rate": 0.002, "loss": 2.3302, "step": 239940 }, { "epoch": 0.9275795951817661, "grad_norm": 0.10781632363796234, "learning_rate": 0.002, "loss": 2.334, "step": 239950 }, { "epoch": 0.9276182523851495, "grad_norm": 0.13119134306907654, "learning_rate": 0.002, "loss": 2.3338, "step": 239960 }, { "epoch": 0.9276569095885328, "grad_norm": 0.10563421249389648, "learning_rate": 0.002, "loss": 2.3665, "step": 239970 }, { "epoch": 0.927695566791916, "grad_norm": 0.1123589426279068, "learning_rate": 0.002, "loss": 2.3403, "step": 239980 }, { "epoch": 0.9277342239952993, "grad_norm": 0.09124656766653061, "learning_rate": 0.002, "loss": 2.3444, "step": 239990 }, { "epoch": 0.9277728811986825, "grad_norm": 0.0967029258608818, "learning_rate": 0.002, "loss": 2.3332, "step": 240000 }, { "epoch": 0.9278115384020659, "grad_norm": 0.13630369305610657, "learning_rate": 0.002, "loss": 2.3346, "step": 240010 }, { "epoch": 0.9278501956054491, "grad_norm": 0.10799701511859894, "learning_rate": 0.002, "loss": 2.3417, "step": 240020 }, { "epoch": 0.9278888528088324, "grad_norm": 0.10726698487997055, "learning_rate": 0.002, "loss": 2.3482, "step": 240030 }, { "epoch": 0.9279275100122156, "grad_norm": 0.10995513200759888, "learning_rate": 0.002, "loss": 2.3458, "step": 240040 }, { "epoch": 0.927966167215599, "grad_norm": 0.11034388840198517, "learning_rate": 0.002, "loss": 2.3486, "step": 240050 }, { "epoch": 0.9280048244189822, "grad_norm": 0.10742886364459991, "learning_rate": 0.002, "loss": 2.3522, "step": 240060 }, { "epoch": 0.9280434816223655, "grad_norm": 0.108866848051548, "learning_rate": 0.002, "loss": 2.3395, "step": 240070 }, { "epoch": 0.9280821388257487, "grad_norm": 0.10716560482978821, "learning_rate": 0.002, "loss": 2.3411, "step": 240080 }, { "epoch": 0.9281207960291321, "grad_norm": 0.08997724950313568, "learning_rate": 0.002, "loss": 2.3264, "step": 240090 }, { "epoch": 0.9281594532325154, "grad_norm": 0.12874490022659302, "learning_rate": 0.002, "loss": 2.3376, "step": 240100 }, { "epoch": 0.9281981104358986, "grad_norm": 0.11026809364557266, "learning_rate": 0.002, "loss": 2.3382, "step": 240110 }, { "epoch": 0.9282367676392819, "grad_norm": 0.11839725822210312, "learning_rate": 0.002, "loss": 2.3554, "step": 240120 }, { "epoch": 0.9282754248426652, "grad_norm": 0.10574904084205627, "learning_rate": 0.002, "loss": 2.3461, "step": 240130 }, { "epoch": 0.9283140820460485, "grad_norm": 0.09895447641611099, "learning_rate": 0.002, "loss": 2.3403, "step": 240140 }, { "epoch": 0.9283527392494317, "grad_norm": 0.10029957443475723, "learning_rate": 0.002, "loss": 2.3442, "step": 240150 }, { "epoch": 0.928391396452815, "grad_norm": 0.11841758340597153, "learning_rate": 0.002, "loss": 2.3452, "step": 240160 }, { "epoch": 0.9284300536561984, "grad_norm": 0.10327313840389252, "learning_rate": 0.002, "loss": 2.3446, "step": 240170 }, { "epoch": 0.9284687108595816, "grad_norm": 0.10563918203115463, "learning_rate": 0.002, "loss": 2.3559, "step": 240180 }, { "epoch": 0.9285073680629649, "grad_norm": 0.13680148124694824, "learning_rate": 0.002, "loss": 2.3408, "step": 240190 }, { "epoch": 0.9285460252663481, "grad_norm": 0.10711419582366943, "learning_rate": 0.002, "loss": 2.3294, "step": 240200 }, { "epoch": 0.9285846824697314, "grad_norm": 0.09397318959236145, "learning_rate": 0.002, "loss": 2.3374, "step": 240210 }, { "epoch": 0.9286233396731147, "grad_norm": 0.10058710724115372, "learning_rate": 0.002, "loss": 2.3498, "step": 240220 }, { "epoch": 0.928661996876498, "grad_norm": 0.1029769703745842, "learning_rate": 0.002, "loss": 2.3399, "step": 240230 }, { "epoch": 0.9287006540798812, "grad_norm": 0.11130029708147049, "learning_rate": 0.002, "loss": 2.3368, "step": 240240 }, { "epoch": 0.9287393112832645, "grad_norm": 0.09373640269041061, "learning_rate": 0.002, "loss": 2.3374, "step": 240250 }, { "epoch": 0.9287779684866478, "grad_norm": 0.10493456572294235, "learning_rate": 0.002, "loss": 2.3246, "step": 240260 }, { "epoch": 0.9288166256900311, "grad_norm": 0.10127705335617065, "learning_rate": 0.002, "loss": 2.3543, "step": 240270 }, { "epoch": 0.9288552828934143, "grad_norm": 0.0896872952580452, "learning_rate": 0.002, "loss": 2.3341, "step": 240280 }, { "epoch": 0.9288939400967976, "grad_norm": 0.11261755228042603, "learning_rate": 0.002, "loss": 2.342, "step": 240290 }, { "epoch": 0.928932597300181, "grad_norm": 0.09712719917297363, "learning_rate": 0.002, "loss": 2.3488, "step": 240300 }, { "epoch": 0.9289712545035642, "grad_norm": 0.1010216623544693, "learning_rate": 0.002, "loss": 2.3358, "step": 240310 }, { "epoch": 0.9290099117069475, "grad_norm": 0.10653354972600937, "learning_rate": 0.002, "loss": 2.3397, "step": 240320 }, { "epoch": 0.9290485689103307, "grad_norm": 0.09720896184444427, "learning_rate": 0.002, "loss": 2.3341, "step": 240330 }, { "epoch": 0.9290872261137141, "grad_norm": 0.09222602099180222, "learning_rate": 0.002, "loss": 2.3329, "step": 240340 }, { "epoch": 0.9291258833170973, "grad_norm": 0.09605705738067627, "learning_rate": 0.002, "loss": 2.3407, "step": 240350 }, { "epoch": 0.9291645405204806, "grad_norm": 0.11048048734664917, "learning_rate": 0.002, "loss": 2.3484, "step": 240360 }, { "epoch": 0.9292031977238638, "grad_norm": 0.10950416326522827, "learning_rate": 0.002, "loss": 2.3234, "step": 240370 }, { "epoch": 0.9292418549272471, "grad_norm": 0.10134270042181015, "learning_rate": 0.002, "loss": 2.3476, "step": 240380 }, { "epoch": 0.9292805121306305, "grad_norm": 0.09597347676753998, "learning_rate": 0.002, "loss": 2.3275, "step": 240390 }, { "epoch": 0.9293191693340137, "grad_norm": 0.1042867973446846, "learning_rate": 0.002, "loss": 2.3364, "step": 240400 }, { "epoch": 0.929357826537397, "grad_norm": 0.11298928409814835, "learning_rate": 0.002, "loss": 2.3441, "step": 240410 }, { "epoch": 0.9293964837407802, "grad_norm": 0.09755025804042816, "learning_rate": 0.002, "loss": 2.3347, "step": 240420 }, { "epoch": 0.9294351409441636, "grad_norm": 0.10076082497835159, "learning_rate": 0.002, "loss": 2.3468, "step": 240430 }, { "epoch": 0.9294737981475468, "grad_norm": 0.11371766030788422, "learning_rate": 0.002, "loss": 2.3365, "step": 240440 }, { "epoch": 0.9295124553509301, "grad_norm": 0.09814074635505676, "learning_rate": 0.002, "loss": 2.3394, "step": 240450 }, { "epoch": 0.9295511125543133, "grad_norm": 0.1093234196305275, "learning_rate": 0.002, "loss": 2.3313, "step": 240460 }, { "epoch": 0.9295897697576967, "grad_norm": 0.1031750962138176, "learning_rate": 0.002, "loss": 2.3379, "step": 240470 }, { "epoch": 0.92962842696108, "grad_norm": 0.11120779067277908, "learning_rate": 0.002, "loss": 2.3373, "step": 240480 }, { "epoch": 0.9296670841644632, "grad_norm": 0.11129971593618393, "learning_rate": 0.002, "loss": 2.3366, "step": 240490 }, { "epoch": 0.9297057413678464, "grad_norm": 0.12351063638925552, "learning_rate": 0.002, "loss": 2.3346, "step": 240500 }, { "epoch": 0.9297443985712298, "grad_norm": 0.0975901409983635, "learning_rate": 0.002, "loss": 2.3273, "step": 240510 }, { "epoch": 0.9297830557746131, "grad_norm": 0.10659302026033401, "learning_rate": 0.002, "loss": 2.3359, "step": 240520 }, { "epoch": 0.9298217129779963, "grad_norm": 0.13084393739700317, "learning_rate": 0.002, "loss": 2.3435, "step": 240530 }, { "epoch": 0.9298603701813796, "grad_norm": 0.10836442559957504, "learning_rate": 0.002, "loss": 2.3259, "step": 240540 }, { "epoch": 0.9298990273847628, "grad_norm": 0.11104420572519302, "learning_rate": 0.002, "loss": 2.3291, "step": 240550 }, { "epoch": 0.9299376845881462, "grad_norm": 0.1024618074297905, "learning_rate": 0.002, "loss": 2.3359, "step": 240560 }, { "epoch": 0.9299763417915294, "grad_norm": 0.11527860164642334, "learning_rate": 0.002, "loss": 2.3423, "step": 240570 }, { "epoch": 0.9300149989949127, "grad_norm": 0.08908341079950333, "learning_rate": 0.002, "loss": 2.3349, "step": 240580 }, { "epoch": 0.9300536561982959, "grad_norm": 0.09538891166448593, "learning_rate": 0.002, "loss": 2.3454, "step": 240590 }, { "epoch": 0.9300923134016793, "grad_norm": 0.09617014229297638, "learning_rate": 0.002, "loss": 2.3463, "step": 240600 }, { "epoch": 0.9301309706050626, "grad_norm": 0.12283073365688324, "learning_rate": 0.002, "loss": 2.3422, "step": 240610 }, { "epoch": 0.9301696278084458, "grad_norm": 0.099635049700737, "learning_rate": 0.002, "loss": 2.3433, "step": 240620 }, { "epoch": 0.9302082850118291, "grad_norm": 0.09536898136138916, "learning_rate": 0.002, "loss": 2.3318, "step": 240630 }, { "epoch": 0.9302469422152124, "grad_norm": 0.13080519437789917, "learning_rate": 0.002, "loss": 2.3263, "step": 240640 }, { "epoch": 0.9302855994185957, "grad_norm": 0.095926932990551, "learning_rate": 0.002, "loss": 2.3174, "step": 240650 }, { "epoch": 0.9303242566219789, "grad_norm": 0.1012723371386528, "learning_rate": 0.002, "loss": 2.3464, "step": 240660 }, { "epoch": 0.9303629138253622, "grad_norm": 0.10640578716993332, "learning_rate": 0.002, "loss": 2.3311, "step": 240670 }, { "epoch": 0.9304015710287455, "grad_norm": 0.09938972443342209, "learning_rate": 0.002, "loss": 2.3303, "step": 240680 }, { "epoch": 0.9304402282321288, "grad_norm": 0.09295855462551117, "learning_rate": 0.002, "loss": 2.3456, "step": 240690 }, { "epoch": 0.930478885435512, "grad_norm": 0.10483425855636597, "learning_rate": 0.002, "loss": 2.3543, "step": 240700 }, { "epoch": 0.9305175426388953, "grad_norm": 0.1012699156999588, "learning_rate": 0.002, "loss": 2.3439, "step": 240710 }, { "epoch": 0.9305561998422787, "grad_norm": 0.09559882432222366, "learning_rate": 0.002, "loss": 2.3388, "step": 240720 }, { "epoch": 0.9305948570456619, "grad_norm": 0.12409879267215729, "learning_rate": 0.002, "loss": 2.3346, "step": 240730 }, { "epoch": 0.9306335142490452, "grad_norm": 0.10803597420454025, "learning_rate": 0.002, "loss": 2.3377, "step": 240740 }, { "epoch": 0.9306721714524284, "grad_norm": 0.0958801731467247, "learning_rate": 0.002, "loss": 2.3348, "step": 240750 }, { "epoch": 0.9307108286558117, "grad_norm": 0.11432503163814545, "learning_rate": 0.002, "loss": 2.3448, "step": 240760 }, { "epoch": 0.930749485859195, "grad_norm": 0.11685143411159515, "learning_rate": 0.002, "loss": 2.3428, "step": 240770 }, { "epoch": 0.9307881430625783, "grad_norm": 0.11428704112768173, "learning_rate": 0.002, "loss": 2.3424, "step": 240780 }, { "epoch": 0.9308268002659615, "grad_norm": 0.10317996144294739, "learning_rate": 0.002, "loss": 2.3477, "step": 240790 }, { "epoch": 0.9308654574693448, "grad_norm": 0.10810055583715439, "learning_rate": 0.002, "loss": 2.3593, "step": 240800 }, { "epoch": 0.9309041146727282, "grad_norm": 0.1088952124118805, "learning_rate": 0.002, "loss": 2.3292, "step": 240810 }, { "epoch": 0.9309427718761114, "grad_norm": 0.09635835886001587, "learning_rate": 0.002, "loss": 2.3304, "step": 240820 }, { "epoch": 0.9309814290794947, "grad_norm": 0.1363757699728012, "learning_rate": 0.002, "loss": 2.3433, "step": 240830 }, { "epoch": 0.9310200862828779, "grad_norm": 0.10321643948554993, "learning_rate": 0.002, "loss": 2.3612, "step": 240840 }, { "epoch": 0.9310587434862613, "grad_norm": 0.08783495426177979, "learning_rate": 0.002, "loss": 2.3381, "step": 240850 }, { "epoch": 0.9310974006896445, "grad_norm": 0.1054040715098381, "learning_rate": 0.002, "loss": 2.3215, "step": 240860 }, { "epoch": 0.9311360578930278, "grad_norm": 0.1170346811413765, "learning_rate": 0.002, "loss": 2.3524, "step": 240870 }, { "epoch": 0.931174715096411, "grad_norm": 0.10788789391517639, "learning_rate": 0.002, "loss": 2.3445, "step": 240880 }, { "epoch": 0.9312133722997944, "grad_norm": 0.10687334090471268, "learning_rate": 0.002, "loss": 2.3349, "step": 240890 }, { "epoch": 0.9312520295031776, "grad_norm": 0.12458331137895584, "learning_rate": 0.002, "loss": 2.3444, "step": 240900 }, { "epoch": 0.9312906867065609, "grad_norm": 0.10293076932430267, "learning_rate": 0.002, "loss": 2.3377, "step": 240910 }, { "epoch": 0.9313293439099442, "grad_norm": 0.11584310233592987, "learning_rate": 0.002, "loss": 2.3525, "step": 240920 }, { "epoch": 0.9313680011133274, "grad_norm": 0.1265709102153778, "learning_rate": 0.002, "loss": 2.3321, "step": 240930 }, { "epoch": 0.9314066583167108, "grad_norm": 0.15816473960876465, "learning_rate": 0.002, "loss": 2.3411, "step": 240940 }, { "epoch": 0.931445315520094, "grad_norm": 0.11346606910228729, "learning_rate": 0.002, "loss": 2.3474, "step": 240950 }, { "epoch": 0.9314839727234773, "grad_norm": 0.10622525960206985, "learning_rate": 0.002, "loss": 2.342, "step": 240960 }, { "epoch": 0.9315226299268605, "grad_norm": 0.09166400879621506, "learning_rate": 0.002, "loss": 2.3411, "step": 240970 }, { "epoch": 0.9315612871302439, "grad_norm": 0.1251622587442398, "learning_rate": 0.002, "loss": 2.3371, "step": 240980 }, { "epoch": 0.9315999443336271, "grad_norm": 0.09309102594852448, "learning_rate": 0.002, "loss": 2.36, "step": 240990 }, { "epoch": 0.9316386015370104, "grad_norm": 0.09251026809215546, "learning_rate": 0.002, "loss": 2.3288, "step": 241000 }, { "epoch": 0.9316772587403936, "grad_norm": 0.09816301614046097, "learning_rate": 0.002, "loss": 2.3534, "step": 241010 }, { "epoch": 0.931715915943777, "grad_norm": 0.11350065469741821, "learning_rate": 0.002, "loss": 2.3223, "step": 241020 }, { "epoch": 0.9317545731471603, "grad_norm": 0.1071467250585556, "learning_rate": 0.002, "loss": 2.3492, "step": 241030 }, { "epoch": 0.9317932303505435, "grad_norm": 0.09898655116558075, "learning_rate": 0.002, "loss": 2.353, "step": 241040 }, { "epoch": 0.9318318875539268, "grad_norm": 0.15836842358112335, "learning_rate": 0.002, "loss": 2.3218, "step": 241050 }, { "epoch": 0.9318705447573101, "grad_norm": 0.10508646816015244, "learning_rate": 0.002, "loss": 2.3353, "step": 241060 }, { "epoch": 0.9319092019606934, "grad_norm": 0.1096983551979065, "learning_rate": 0.002, "loss": 2.3258, "step": 241070 }, { "epoch": 0.9319478591640766, "grad_norm": 0.12404420971870422, "learning_rate": 0.002, "loss": 2.3451, "step": 241080 }, { "epoch": 0.9319865163674599, "grad_norm": 0.1084880456328392, "learning_rate": 0.002, "loss": 2.3286, "step": 241090 }, { "epoch": 0.9320251735708432, "grad_norm": 0.0965440422296524, "learning_rate": 0.002, "loss": 2.3359, "step": 241100 }, { "epoch": 0.9320638307742265, "grad_norm": 0.10915783047676086, "learning_rate": 0.002, "loss": 2.352, "step": 241110 }, { "epoch": 0.9321024879776098, "grad_norm": 0.1070767492055893, "learning_rate": 0.002, "loss": 2.3441, "step": 241120 }, { "epoch": 0.932141145180993, "grad_norm": 0.09906446188688278, "learning_rate": 0.002, "loss": 2.3333, "step": 241130 }, { "epoch": 0.9321798023843763, "grad_norm": 0.10002636164426804, "learning_rate": 0.002, "loss": 2.3405, "step": 241140 }, { "epoch": 0.9322184595877596, "grad_norm": 0.10802870243787766, "learning_rate": 0.002, "loss": 2.3398, "step": 241150 }, { "epoch": 0.9322571167911429, "grad_norm": 0.10186523199081421, "learning_rate": 0.002, "loss": 2.3557, "step": 241160 }, { "epoch": 0.9322957739945261, "grad_norm": 0.10572991520166397, "learning_rate": 0.002, "loss": 2.3499, "step": 241170 }, { "epoch": 0.9323344311979094, "grad_norm": 0.10857295989990234, "learning_rate": 0.002, "loss": 2.3412, "step": 241180 }, { "epoch": 0.9323730884012927, "grad_norm": 0.09867467731237411, "learning_rate": 0.002, "loss": 2.3368, "step": 241190 }, { "epoch": 0.932411745604676, "grad_norm": 0.1304645538330078, "learning_rate": 0.002, "loss": 2.311, "step": 241200 }, { "epoch": 0.9324504028080592, "grad_norm": 0.11418737471103668, "learning_rate": 0.002, "loss": 2.354, "step": 241210 }, { "epoch": 0.9324890600114425, "grad_norm": 0.09130564332008362, "learning_rate": 0.002, "loss": 2.3302, "step": 241220 }, { "epoch": 0.9325277172148259, "grad_norm": 0.11547353118658066, "learning_rate": 0.002, "loss": 2.3297, "step": 241230 }, { "epoch": 0.9325663744182091, "grad_norm": 0.10222837328910828, "learning_rate": 0.002, "loss": 2.3273, "step": 241240 }, { "epoch": 0.9326050316215924, "grad_norm": 0.11761835217475891, "learning_rate": 0.002, "loss": 2.3172, "step": 241250 }, { "epoch": 0.9326436888249756, "grad_norm": 0.105369433760643, "learning_rate": 0.002, "loss": 2.3271, "step": 241260 }, { "epoch": 0.932682346028359, "grad_norm": 0.11019854247570038, "learning_rate": 0.002, "loss": 2.3276, "step": 241270 }, { "epoch": 0.9327210032317422, "grad_norm": 0.11037706583738327, "learning_rate": 0.002, "loss": 2.3357, "step": 241280 }, { "epoch": 0.9327596604351255, "grad_norm": 0.09246725589036942, "learning_rate": 0.002, "loss": 2.3397, "step": 241290 }, { "epoch": 0.9327983176385087, "grad_norm": 0.11534950137138367, "learning_rate": 0.002, "loss": 2.3307, "step": 241300 }, { "epoch": 0.932836974841892, "grad_norm": 0.09918901324272156, "learning_rate": 0.002, "loss": 2.3491, "step": 241310 }, { "epoch": 0.9328756320452753, "grad_norm": 0.09459531307220459, "learning_rate": 0.002, "loss": 2.3412, "step": 241320 }, { "epoch": 0.9329142892486586, "grad_norm": 0.10851433873176575, "learning_rate": 0.002, "loss": 2.3343, "step": 241330 }, { "epoch": 0.9329529464520419, "grad_norm": 0.10667916387319565, "learning_rate": 0.002, "loss": 2.3314, "step": 241340 }, { "epoch": 0.9329916036554251, "grad_norm": 0.0948222279548645, "learning_rate": 0.002, "loss": 2.3513, "step": 241350 }, { "epoch": 0.9330302608588085, "grad_norm": 0.1204134076833725, "learning_rate": 0.002, "loss": 2.3385, "step": 241360 }, { "epoch": 0.9330689180621917, "grad_norm": 0.09408220648765564, "learning_rate": 0.002, "loss": 2.3274, "step": 241370 }, { "epoch": 0.933107575265575, "grad_norm": 0.10862472653388977, "learning_rate": 0.002, "loss": 2.3454, "step": 241380 }, { "epoch": 0.9331462324689582, "grad_norm": 0.10759086906909943, "learning_rate": 0.002, "loss": 2.3455, "step": 241390 }, { "epoch": 0.9331848896723416, "grad_norm": 0.09432414174079895, "learning_rate": 0.002, "loss": 2.3257, "step": 241400 }, { "epoch": 0.9332235468757248, "grad_norm": 0.09516098350286484, "learning_rate": 0.002, "loss": 2.3514, "step": 241410 }, { "epoch": 0.9332622040791081, "grad_norm": 0.11452768743038177, "learning_rate": 0.002, "loss": 2.3449, "step": 241420 }, { "epoch": 0.9333008612824913, "grad_norm": 0.11694354563951492, "learning_rate": 0.002, "loss": 2.322, "step": 241430 }, { "epoch": 0.9333395184858747, "grad_norm": 0.08922861516475677, "learning_rate": 0.002, "loss": 2.3165, "step": 241440 }, { "epoch": 0.933378175689258, "grad_norm": 0.10552657395601273, "learning_rate": 0.002, "loss": 2.3398, "step": 241450 }, { "epoch": 0.9334168328926412, "grad_norm": 0.0998677909374237, "learning_rate": 0.002, "loss": 2.3378, "step": 241460 }, { "epoch": 0.9334554900960245, "grad_norm": 0.12375766783952713, "learning_rate": 0.002, "loss": 2.3246, "step": 241470 }, { "epoch": 0.9334941472994077, "grad_norm": 0.09344843029975891, "learning_rate": 0.002, "loss": 2.3301, "step": 241480 }, { "epoch": 0.9335328045027911, "grad_norm": 0.10336374491453171, "learning_rate": 0.002, "loss": 2.3331, "step": 241490 }, { "epoch": 0.9335714617061743, "grad_norm": 0.11773277819156647, "learning_rate": 0.002, "loss": 2.3466, "step": 241500 }, { "epoch": 0.9336101189095576, "grad_norm": 0.1019284725189209, "learning_rate": 0.002, "loss": 2.3354, "step": 241510 }, { "epoch": 0.9336487761129408, "grad_norm": 0.09431376308202744, "learning_rate": 0.002, "loss": 2.3257, "step": 241520 }, { "epoch": 0.9336874333163242, "grad_norm": 0.10802071541547775, "learning_rate": 0.002, "loss": 2.3478, "step": 241530 }, { "epoch": 0.9337260905197075, "grad_norm": 0.10070924460887909, "learning_rate": 0.002, "loss": 2.3274, "step": 241540 }, { "epoch": 0.9337647477230907, "grad_norm": 0.10516620427370071, "learning_rate": 0.002, "loss": 2.3391, "step": 241550 }, { "epoch": 0.933803404926474, "grad_norm": 0.10847561806440353, "learning_rate": 0.002, "loss": 2.3128, "step": 241560 }, { "epoch": 0.9338420621298573, "grad_norm": 0.10606521368026733, "learning_rate": 0.002, "loss": 2.3361, "step": 241570 }, { "epoch": 0.9338807193332406, "grad_norm": 0.09973947703838348, "learning_rate": 0.002, "loss": 2.323, "step": 241580 }, { "epoch": 0.9339193765366238, "grad_norm": 0.0958481878042221, "learning_rate": 0.002, "loss": 2.3444, "step": 241590 }, { "epoch": 0.9339580337400071, "grad_norm": 0.10320821404457092, "learning_rate": 0.002, "loss": 2.3396, "step": 241600 }, { "epoch": 0.9339966909433904, "grad_norm": 0.10255026817321777, "learning_rate": 0.002, "loss": 2.3526, "step": 241610 }, { "epoch": 0.9340353481467737, "grad_norm": 0.09719021618366241, "learning_rate": 0.002, "loss": 2.3362, "step": 241620 }, { "epoch": 0.9340740053501569, "grad_norm": 0.10893251746892929, "learning_rate": 0.002, "loss": 2.3494, "step": 241630 }, { "epoch": 0.9341126625535402, "grad_norm": 0.10667164623737335, "learning_rate": 0.002, "loss": 2.3437, "step": 241640 }, { "epoch": 0.9341513197569236, "grad_norm": 0.13029393553733826, "learning_rate": 0.002, "loss": 2.3346, "step": 241650 }, { "epoch": 0.9341899769603068, "grad_norm": 0.10004956275224686, "learning_rate": 0.002, "loss": 2.3404, "step": 241660 }, { "epoch": 0.9342286341636901, "grad_norm": 0.11355171352624893, "learning_rate": 0.002, "loss": 2.3449, "step": 241670 }, { "epoch": 0.9342672913670733, "grad_norm": 0.09860523790121078, "learning_rate": 0.002, "loss": 2.334, "step": 241680 }, { "epoch": 0.9343059485704566, "grad_norm": 0.15768641233444214, "learning_rate": 0.002, "loss": 2.3267, "step": 241690 }, { "epoch": 0.9343446057738399, "grad_norm": 0.11209391057491302, "learning_rate": 0.002, "loss": 2.3454, "step": 241700 }, { "epoch": 0.9343832629772232, "grad_norm": 0.1045207679271698, "learning_rate": 0.002, "loss": 2.3342, "step": 241710 }, { "epoch": 0.9344219201806064, "grad_norm": 0.10634932667016983, "learning_rate": 0.002, "loss": 2.3431, "step": 241720 }, { "epoch": 0.9344605773839897, "grad_norm": 0.0978906899690628, "learning_rate": 0.002, "loss": 2.3321, "step": 241730 }, { "epoch": 0.934499234587373, "grad_norm": 0.12600494921207428, "learning_rate": 0.002, "loss": 2.334, "step": 241740 }, { "epoch": 0.9345378917907563, "grad_norm": 0.09943626821041107, "learning_rate": 0.002, "loss": 2.3448, "step": 241750 }, { "epoch": 0.9345765489941396, "grad_norm": 0.10073429346084595, "learning_rate": 0.002, "loss": 2.3312, "step": 241760 }, { "epoch": 0.9346152061975228, "grad_norm": 0.12771274149417877, "learning_rate": 0.002, "loss": 2.3322, "step": 241770 }, { "epoch": 0.9346538634009062, "grad_norm": 0.12943124771118164, "learning_rate": 0.002, "loss": 2.3329, "step": 241780 }, { "epoch": 0.9346925206042894, "grad_norm": 0.11174143850803375, "learning_rate": 0.002, "loss": 2.3441, "step": 241790 }, { "epoch": 0.9347311778076727, "grad_norm": 0.1090095266699791, "learning_rate": 0.002, "loss": 2.349, "step": 241800 }, { "epoch": 0.9347698350110559, "grad_norm": 0.10961566120386124, "learning_rate": 0.002, "loss": 2.3339, "step": 241810 }, { "epoch": 0.9348084922144393, "grad_norm": 0.10913696140050888, "learning_rate": 0.002, "loss": 2.36, "step": 241820 }, { "epoch": 0.9348471494178225, "grad_norm": 0.11105609685182571, "learning_rate": 0.002, "loss": 2.3479, "step": 241830 }, { "epoch": 0.9348858066212058, "grad_norm": 0.1042783334851265, "learning_rate": 0.002, "loss": 2.3363, "step": 241840 }, { "epoch": 0.934924463824589, "grad_norm": 0.11911788582801819, "learning_rate": 0.002, "loss": 2.3549, "step": 241850 }, { "epoch": 0.9349631210279723, "grad_norm": 0.12005186080932617, "learning_rate": 0.002, "loss": 2.3457, "step": 241860 }, { "epoch": 0.9350017782313557, "grad_norm": 0.10456695407629013, "learning_rate": 0.002, "loss": 2.332, "step": 241870 }, { "epoch": 0.9350404354347389, "grad_norm": 0.10455799102783203, "learning_rate": 0.002, "loss": 2.3434, "step": 241880 }, { "epoch": 0.9350790926381222, "grad_norm": 0.09809655696153641, "learning_rate": 0.002, "loss": 2.3349, "step": 241890 }, { "epoch": 0.9351177498415054, "grad_norm": 0.1062767282128334, "learning_rate": 0.002, "loss": 2.3248, "step": 241900 }, { "epoch": 0.9351564070448888, "grad_norm": 0.10312843322753906, "learning_rate": 0.002, "loss": 2.3287, "step": 241910 }, { "epoch": 0.935195064248272, "grad_norm": 0.11013250797986984, "learning_rate": 0.002, "loss": 2.3445, "step": 241920 }, { "epoch": 0.9352337214516553, "grad_norm": 0.10098816454410553, "learning_rate": 0.002, "loss": 2.3302, "step": 241930 }, { "epoch": 0.9352723786550385, "grad_norm": 0.42135360836982727, "learning_rate": 0.002, "loss": 2.3363, "step": 241940 }, { "epoch": 0.9353110358584219, "grad_norm": 0.13825412094593048, "learning_rate": 0.002, "loss": 2.3328, "step": 241950 }, { "epoch": 0.9353496930618052, "grad_norm": 0.10749774426221848, "learning_rate": 0.002, "loss": 2.3387, "step": 241960 }, { "epoch": 0.9353883502651884, "grad_norm": 0.10179366916418076, "learning_rate": 0.002, "loss": 2.3417, "step": 241970 }, { "epoch": 0.9354270074685717, "grad_norm": 0.10680104047060013, "learning_rate": 0.002, "loss": 2.3417, "step": 241980 }, { "epoch": 0.935465664671955, "grad_norm": 0.10687895119190216, "learning_rate": 0.002, "loss": 2.3347, "step": 241990 }, { "epoch": 0.9355043218753383, "grad_norm": 0.10046619921922684, "learning_rate": 0.002, "loss": 2.3467, "step": 242000 }, { "epoch": 0.9355429790787215, "grad_norm": 0.11829427629709244, "learning_rate": 0.002, "loss": 2.3417, "step": 242010 }, { "epoch": 0.9355816362821048, "grad_norm": 0.09220101684331894, "learning_rate": 0.002, "loss": 2.339, "step": 242020 }, { "epoch": 0.935620293485488, "grad_norm": 0.15147259831428528, "learning_rate": 0.002, "loss": 2.3266, "step": 242030 }, { "epoch": 0.9356589506888714, "grad_norm": 0.10518482327461243, "learning_rate": 0.002, "loss": 2.3506, "step": 242040 }, { "epoch": 0.9356976078922546, "grad_norm": 0.1036066859960556, "learning_rate": 0.002, "loss": 2.3314, "step": 242050 }, { "epoch": 0.9357362650956379, "grad_norm": 0.11259414255619049, "learning_rate": 0.002, "loss": 2.3478, "step": 242060 }, { "epoch": 0.9357749222990212, "grad_norm": 0.12898430228233337, "learning_rate": 0.002, "loss": 2.3222, "step": 242070 }, { "epoch": 0.9358135795024045, "grad_norm": 0.09404338151216507, "learning_rate": 0.002, "loss": 2.3415, "step": 242080 }, { "epoch": 0.9358522367057878, "grad_norm": 0.09791241586208344, "learning_rate": 0.002, "loss": 2.3288, "step": 242090 }, { "epoch": 0.935890893909171, "grad_norm": 0.10467846691608429, "learning_rate": 0.002, "loss": 2.3373, "step": 242100 }, { "epoch": 0.9359295511125543, "grad_norm": 0.10960084944963455, "learning_rate": 0.002, "loss": 2.3316, "step": 242110 }, { "epoch": 0.9359682083159376, "grad_norm": 0.10058752447366714, "learning_rate": 0.002, "loss": 2.3479, "step": 242120 }, { "epoch": 0.9360068655193209, "grad_norm": 0.10815969109535217, "learning_rate": 0.002, "loss": 2.3494, "step": 242130 }, { "epoch": 0.9360455227227041, "grad_norm": 0.11273110657930374, "learning_rate": 0.002, "loss": 2.3278, "step": 242140 }, { "epoch": 0.9360841799260874, "grad_norm": 0.10401853173971176, "learning_rate": 0.002, "loss": 2.3423, "step": 242150 }, { "epoch": 0.9361228371294708, "grad_norm": 0.1059192568063736, "learning_rate": 0.002, "loss": 2.3438, "step": 242160 }, { "epoch": 0.936161494332854, "grad_norm": 0.12140562385320663, "learning_rate": 0.002, "loss": 2.3368, "step": 242170 }, { "epoch": 0.9362001515362373, "grad_norm": 0.10082773119211197, "learning_rate": 0.002, "loss": 2.3574, "step": 242180 }, { "epoch": 0.9362388087396205, "grad_norm": 0.1086844950914383, "learning_rate": 0.002, "loss": 2.3488, "step": 242190 }, { "epoch": 0.9362774659430039, "grad_norm": 0.10870152711868286, "learning_rate": 0.002, "loss": 2.3255, "step": 242200 }, { "epoch": 0.9363161231463871, "grad_norm": 0.12130552530288696, "learning_rate": 0.002, "loss": 2.3411, "step": 242210 }, { "epoch": 0.9363547803497704, "grad_norm": 0.09644386917352676, "learning_rate": 0.002, "loss": 2.3238, "step": 242220 }, { "epoch": 0.9363934375531536, "grad_norm": 0.09899096190929413, "learning_rate": 0.002, "loss": 2.3164, "step": 242230 }, { "epoch": 0.9364320947565369, "grad_norm": 0.10976357012987137, "learning_rate": 0.002, "loss": 2.34, "step": 242240 }, { "epoch": 0.9364707519599202, "grad_norm": 0.11089303344488144, "learning_rate": 0.002, "loss": 2.3494, "step": 242250 }, { "epoch": 0.9365094091633035, "grad_norm": 0.13811884820461273, "learning_rate": 0.002, "loss": 2.3472, "step": 242260 }, { "epoch": 0.9365480663666867, "grad_norm": 0.11065199226140976, "learning_rate": 0.002, "loss": 2.3364, "step": 242270 }, { "epoch": 0.93658672357007, "grad_norm": 0.11939693242311478, "learning_rate": 0.002, "loss": 2.3507, "step": 242280 }, { "epoch": 0.9366253807734534, "grad_norm": 0.10447102785110474, "learning_rate": 0.002, "loss": 2.3289, "step": 242290 }, { "epoch": 0.9366640379768366, "grad_norm": 0.10677524656057358, "learning_rate": 0.002, "loss": 2.3478, "step": 242300 }, { "epoch": 0.9367026951802199, "grad_norm": 0.11139500141143799, "learning_rate": 0.002, "loss": 2.336, "step": 242310 }, { "epoch": 0.9367413523836031, "grad_norm": 0.09366004168987274, "learning_rate": 0.002, "loss": 2.3459, "step": 242320 }, { "epoch": 0.9367800095869865, "grad_norm": 0.09346692264080048, "learning_rate": 0.002, "loss": 2.3399, "step": 242330 }, { "epoch": 0.9368186667903697, "grad_norm": 0.10834218561649323, "learning_rate": 0.002, "loss": 2.3353, "step": 242340 }, { "epoch": 0.936857323993753, "grad_norm": 0.3928252160549164, "learning_rate": 0.002, "loss": 2.3279, "step": 242350 }, { "epoch": 0.9368959811971362, "grad_norm": 0.10552085936069489, "learning_rate": 0.002, "loss": 2.3498, "step": 242360 }, { "epoch": 0.9369346384005196, "grad_norm": 0.1024235412478447, "learning_rate": 0.002, "loss": 2.3305, "step": 242370 }, { "epoch": 0.9369732956039029, "grad_norm": 0.09039077162742615, "learning_rate": 0.002, "loss": 2.3274, "step": 242380 }, { "epoch": 0.9370119528072861, "grad_norm": 0.09835212677717209, "learning_rate": 0.002, "loss": 2.3422, "step": 242390 }, { "epoch": 0.9370506100106694, "grad_norm": 0.10888617485761642, "learning_rate": 0.002, "loss": 2.3489, "step": 242400 }, { "epoch": 0.9370892672140526, "grad_norm": 0.10096684843301773, "learning_rate": 0.002, "loss": 2.3355, "step": 242410 }, { "epoch": 0.937127924417436, "grad_norm": 0.10301858931779861, "learning_rate": 0.002, "loss": 2.3402, "step": 242420 }, { "epoch": 0.9371665816208192, "grad_norm": 0.10485974699258804, "learning_rate": 0.002, "loss": 2.3286, "step": 242430 }, { "epoch": 0.9372052388242025, "grad_norm": 0.11427116394042969, "learning_rate": 0.002, "loss": 2.3379, "step": 242440 }, { "epoch": 0.9372438960275857, "grad_norm": 0.10336142033338547, "learning_rate": 0.002, "loss": 2.3357, "step": 242450 }, { "epoch": 0.9372825532309691, "grad_norm": 0.12452462315559387, "learning_rate": 0.002, "loss": 2.3365, "step": 242460 }, { "epoch": 0.9373212104343523, "grad_norm": 0.10437518358230591, "learning_rate": 0.002, "loss": 2.3381, "step": 242470 }, { "epoch": 0.9373598676377356, "grad_norm": 0.09719526022672653, "learning_rate": 0.002, "loss": 2.3197, "step": 242480 }, { "epoch": 0.9373985248411189, "grad_norm": 0.11504451185464859, "learning_rate": 0.002, "loss": 2.3401, "step": 242490 }, { "epoch": 0.9374371820445022, "grad_norm": 0.10362989455461502, "learning_rate": 0.002, "loss": 2.3347, "step": 242500 }, { "epoch": 0.9374758392478855, "grad_norm": 0.10242421180009842, "learning_rate": 0.002, "loss": 2.3347, "step": 242510 }, { "epoch": 0.9375144964512687, "grad_norm": 0.10308767855167389, "learning_rate": 0.002, "loss": 2.3322, "step": 242520 }, { "epoch": 0.937553153654652, "grad_norm": 0.1220618262887001, "learning_rate": 0.002, "loss": 2.3325, "step": 242530 }, { "epoch": 0.9375918108580353, "grad_norm": 0.10344993323087692, "learning_rate": 0.002, "loss": 2.3509, "step": 242540 }, { "epoch": 0.9376304680614186, "grad_norm": 0.14695045351982117, "learning_rate": 0.002, "loss": 2.3414, "step": 242550 }, { "epoch": 0.9376691252648018, "grad_norm": 0.11423251032829285, "learning_rate": 0.002, "loss": 2.3394, "step": 242560 }, { "epoch": 0.9377077824681851, "grad_norm": 0.09518953412771225, "learning_rate": 0.002, "loss": 2.3593, "step": 242570 }, { "epoch": 0.9377464396715685, "grad_norm": 0.15579114854335785, "learning_rate": 0.002, "loss": 2.3425, "step": 242580 }, { "epoch": 0.9377850968749517, "grad_norm": 0.09890809655189514, "learning_rate": 0.002, "loss": 2.3359, "step": 242590 }, { "epoch": 0.937823754078335, "grad_norm": 0.11543553322553635, "learning_rate": 0.002, "loss": 2.3349, "step": 242600 }, { "epoch": 0.9378624112817182, "grad_norm": 0.10215145349502563, "learning_rate": 0.002, "loss": 2.3347, "step": 242610 }, { "epoch": 0.9379010684851015, "grad_norm": 0.09498198330402374, "learning_rate": 0.002, "loss": 2.3334, "step": 242620 }, { "epoch": 0.9379397256884848, "grad_norm": 0.09896153956651688, "learning_rate": 0.002, "loss": 2.3479, "step": 242630 }, { "epoch": 0.9379783828918681, "grad_norm": 0.1034855842590332, "learning_rate": 0.002, "loss": 2.3396, "step": 242640 }, { "epoch": 0.9380170400952513, "grad_norm": 0.11195968091487885, "learning_rate": 0.002, "loss": 2.334, "step": 242650 }, { "epoch": 0.9380556972986346, "grad_norm": 0.1223968118429184, "learning_rate": 0.002, "loss": 2.3194, "step": 242660 }, { "epoch": 0.938094354502018, "grad_norm": 0.10049095004796982, "learning_rate": 0.002, "loss": 2.3474, "step": 242670 }, { "epoch": 0.9381330117054012, "grad_norm": 0.09640270471572876, "learning_rate": 0.002, "loss": 2.3262, "step": 242680 }, { "epoch": 0.9381716689087845, "grad_norm": 0.1136377602815628, "learning_rate": 0.002, "loss": 2.35, "step": 242690 }, { "epoch": 0.9382103261121677, "grad_norm": 0.09889031946659088, "learning_rate": 0.002, "loss": 2.3418, "step": 242700 }, { "epoch": 0.9382489833155511, "grad_norm": 0.09716586768627167, "learning_rate": 0.002, "loss": 2.3363, "step": 242710 }, { "epoch": 0.9382876405189343, "grad_norm": 0.09732740372419357, "learning_rate": 0.002, "loss": 2.347, "step": 242720 }, { "epoch": 0.9383262977223176, "grad_norm": 0.10785375535488129, "learning_rate": 0.002, "loss": 2.3514, "step": 242730 }, { "epoch": 0.9383649549257008, "grad_norm": 0.11654423177242279, "learning_rate": 0.002, "loss": 2.3406, "step": 242740 }, { "epoch": 0.9384036121290842, "grad_norm": 0.10839424282312393, "learning_rate": 0.002, "loss": 2.3497, "step": 242750 }, { "epoch": 0.9384422693324674, "grad_norm": 0.12296377122402191, "learning_rate": 0.002, "loss": 2.3353, "step": 242760 }, { "epoch": 0.9384809265358507, "grad_norm": 0.10874893516302109, "learning_rate": 0.002, "loss": 2.3424, "step": 242770 }, { "epoch": 0.9385195837392339, "grad_norm": 0.10407909005880356, "learning_rate": 0.002, "loss": 2.3349, "step": 242780 }, { "epoch": 0.9385582409426172, "grad_norm": 0.1068480983376503, "learning_rate": 0.002, "loss": 2.3279, "step": 242790 }, { "epoch": 0.9385968981460006, "grad_norm": 0.09913309663534164, "learning_rate": 0.002, "loss": 2.3394, "step": 242800 }, { "epoch": 0.9386355553493838, "grad_norm": 0.13421852886676788, "learning_rate": 0.002, "loss": 2.3366, "step": 242810 }, { "epoch": 0.9386742125527671, "grad_norm": 0.10474736988544464, "learning_rate": 0.002, "loss": 2.339, "step": 242820 }, { "epoch": 0.9387128697561503, "grad_norm": 0.11090757697820663, "learning_rate": 0.002, "loss": 2.3423, "step": 242830 }, { "epoch": 0.9387515269595337, "grad_norm": 0.10732854157686234, "learning_rate": 0.002, "loss": 2.327, "step": 242840 }, { "epoch": 0.9387901841629169, "grad_norm": 0.09814008325338364, "learning_rate": 0.002, "loss": 2.3423, "step": 242850 }, { "epoch": 0.9388288413663002, "grad_norm": 0.10455290973186493, "learning_rate": 0.002, "loss": 2.339, "step": 242860 }, { "epoch": 0.9388674985696834, "grad_norm": 0.1159416064620018, "learning_rate": 0.002, "loss": 2.3377, "step": 242870 }, { "epoch": 0.9389061557730668, "grad_norm": 0.09911547601222992, "learning_rate": 0.002, "loss": 2.3466, "step": 242880 }, { "epoch": 0.93894481297645, "grad_norm": 0.09677939116954803, "learning_rate": 0.002, "loss": 2.3397, "step": 242890 }, { "epoch": 0.9389834701798333, "grad_norm": 0.10465750843286514, "learning_rate": 0.002, "loss": 2.3426, "step": 242900 }, { "epoch": 0.9390221273832166, "grad_norm": 0.10974880307912827, "learning_rate": 0.002, "loss": 2.3308, "step": 242910 }, { "epoch": 0.9390607845865999, "grad_norm": 0.10110744088888168, "learning_rate": 0.002, "loss": 2.3349, "step": 242920 }, { "epoch": 0.9390994417899832, "grad_norm": 0.1309683620929718, "learning_rate": 0.002, "loss": 2.3451, "step": 242930 }, { "epoch": 0.9391380989933664, "grad_norm": 0.09194327145814896, "learning_rate": 0.002, "loss": 2.3352, "step": 242940 }, { "epoch": 0.9391767561967497, "grad_norm": 0.10804415494203568, "learning_rate": 0.002, "loss": 2.3321, "step": 242950 }, { "epoch": 0.9392154134001329, "grad_norm": 0.09277147799730301, "learning_rate": 0.002, "loss": 2.353, "step": 242960 }, { "epoch": 0.9392540706035163, "grad_norm": 0.1043214276432991, "learning_rate": 0.002, "loss": 2.3454, "step": 242970 }, { "epoch": 0.9392927278068995, "grad_norm": 0.10557788610458374, "learning_rate": 0.002, "loss": 2.3378, "step": 242980 }, { "epoch": 0.9393313850102828, "grad_norm": 0.09209178388118744, "learning_rate": 0.002, "loss": 2.3254, "step": 242990 }, { "epoch": 0.939370042213666, "grad_norm": 0.09748727828264236, "learning_rate": 0.002, "loss": 2.3434, "step": 243000 }, { "epoch": 0.9394086994170494, "grad_norm": 0.10211475938558578, "learning_rate": 0.002, "loss": 2.3472, "step": 243010 }, { "epoch": 0.9394473566204327, "grad_norm": 0.11180869489908218, "learning_rate": 0.002, "loss": 2.3293, "step": 243020 }, { "epoch": 0.9394860138238159, "grad_norm": 0.15321184694766998, "learning_rate": 0.002, "loss": 2.3442, "step": 243030 }, { "epoch": 0.9395246710271992, "grad_norm": 0.1107967421412468, "learning_rate": 0.002, "loss": 2.3447, "step": 243040 }, { "epoch": 0.9395633282305825, "grad_norm": 0.10820520669221878, "learning_rate": 0.002, "loss": 2.3326, "step": 243050 }, { "epoch": 0.9396019854339658, "grad_norm": 0.10260884463787079, "learning_rate": 0.002, "loss": 2.3372, "step": 243060 }, { "epoch": 0.939640642637349, "grad_norm": 0.10140696167945862, "learning_rate": 0.002, "loss": 2.3352, "step": 243070 }, { "epoch": 0.9396792998407323, "grad_norm": 0.11433785408735275, "learning_rate": 0.002, "loss": 2.3378, "step": 243080 }, { "epoch": 0.9397179570441156, "grad_norm": 0.10506769269704819, "learning_rate": 0.002, "loss": 2.3269, "step": 243090 }, { "epoch": 0.9397566142474989, "grad_norm": 0.1130407378077507, "learning_rate": 0.002, "loss": 2.3393, "step": 243100 }, { "epoch": 0.9397952714508822, "grad_norm": 0.10419058799743652, "learning_rate": 0.002, "loss": 2.3196, "step": 243110 }, { "epoch": 0.9398339286542654, "grad_norm": 0.0975937694311142, "learning_rate": 0.002, "loss": 2.3443, "step": 243120 }, { "epoch": 0.9398725858576488, "grad_norm": 0.09650372713804245, "learning_rate": 0.002, "loss": 2.3425, "step": 243130 }, { "epoch": 0.939911243061032, "grad_norm": 0.10235495120286942, "learning_rate": 0.002, "loss": 2.3392, "step": 243140 }, { "epoch": 0.9399499002644153, "grad_norm": 0.10510300099849701, "learning_rate": 0.002, "loss": 2.3408, "step": 243150 }, { "epoch": 0.9399885574677985, "grad_norm": 0.1160840168595314, "learning_rate": 0.002, "loss": 2.3468, "step": 243160 }, { "epoch": 0.9400272146711818, "grad_norm": 0.10195966064929962, "learning_rate": 0.002, "loss": 2.3361, "step": 243170 }, { "epoch": 0.9400658718745651, "grad_norm": 0.13564079999923706, "learning_rate": 0.002, "loss": 2.3384, "step": 243180 }, { "epoch": 0.9401045290779484, "grad_norm": 0.1034989058971405, "learning_rate": 0.002, "loss": 2.3318, "step": 243190 }, { "epoch": 0.9401431862813316, "grad_norm": 0.09181249141693115, "learning_rate": 0.002, "loss": 2.3515, "step": 243200 }, { "epoch": 0.9401818434847149, "grad_norm": 0.11321096867322922, "learning_rate": 0.002, "loss": 2.3358, "step": 243210 }, { "epoch": 0.9402205006880983, "grad_norm": 0.34259989857673645, "learning_rate": 0.002, "loss": 2.3536, "step": 243220 }, { "epoch": 0.9402591578914815, "grad_norm": 0.11003011465072632, "learning_rate": 0.002, "loss": 2.3508, "step": 243230 }, { "epoch": 0.9402978150948648, "grad_norm": 0.09221770614385605, "learning_rate": 0.002, "loss": 2.3345, "step": 243240 }, { "epoch": 0.940336472298248, "grad_norm": 0.10950542986392975, "learning_rate": 0.002, "loss": 2.3236, "step": 243250 }, { "epoch": 0.9403751295016314, "grad_norm": 0.09502715617418289, "learning_rate": 0.002, "loss": 2.3348, "step": 243260 }, { "epoch": 0.9404137867050146, "grad_norm": 0.0961625799536705, "learning_rate": 0.002, "loss": 2.3366, "step": 243270 }, { "epoch": 0.9404524439083979, "grad_norm": 0.13180868327617645, "learning_rate": 0.002, "loss": 2.3403, "step": 243280 }, { "epoch": 0.9404911011117811, "grad_norm": 0.09879843890666962, "learning_rate": 0.002, "loss": 2.3338, "step": 243290 }, { "epoch": 0.9405297583151645, "grad_norm": 0.10887906700372696, "learning_rate": 0.002, "loss": 2.339, "step": 243300 }, { "epoch": 0.9405684155185478, "grad_norm": 0.09494494646787643, "learning_rate": 0.002, "loss": 2.3309, "step": 243310 }, { "epoch": 0.940607072721931, "grad_norm": 0.11056619137525558, "learning_rate": 0.002, "loss": 2.336, "step": 243320 }, { "epoch": 0.9406457299253143, "grad_norm": 0.11053516715765, "learning_rate": 0.002, "loss": 2.3316, "step": 243330 }, { "epoch": 0.9406843871286975, "grad_norm": 0.10389503836631775, "learning_rate": 0.002, "loss": 2.3391, "step": 243340 }, { "epoch": 0.9407230443320809, "grad_norm": 0.1147523745894432, "learning_rate": 0.002, "loss": 2.3359, "step": 243350 }, { "epoch": 0.9407617015354641, "grad_norm": 0.09236140549182892, "learning_rate": 0.002, "loss": 2.34, "step": 243360 }, { "epoch": 0.9408003587388474, "grad_norm": 0.10874568670988083, "learning_rate": 0.002, "loss": 2.3388, "step": 243370 }, { "epoch": 0.9408390159422306, "grad_norm": 0.10968281328678131, "learning_rate": 0.002, "loss": 2.3399, "step": 243380 }, { "epoch": 0.940877673145614, "grad_norm": 0.10738930851221085, "learning_rate": 0.002, "loss": 2.3522, "step": 243390 }, { "epoch": 0.9409163303489972, "grad_norm": 0.10887566953897476, "learning_rate": 0.002, "loss": 2.3215, "step": 243400 }, { "epoch": 0.9409549875523805, "grad_norm": 0.09930963069200516, "learning_rate": 0.002, "loss": 2.3464, "step": 243410 }, { "epoch": 0.9409936447557637, "grad_norm": 0.10712359100580215, "learning_rate": 0.002, "loss": 2.3465, "step": 243420 }, { "epoch": 0.9410323019591471, "grad_norm": 0.10104658454656601, "learning_rate": 0.002, "loss": 2.3318, "step": 243430 }, { "epoch": 0.9410709591625304, "grad_norm": 0.09679016470909119, "learning_rate": 0.002, "loss": 2.3336, "step": 243440 }, { "epoch": 0.9411096163659136, "grad_norm": 0.11958494782447815, "learning_rate": 0.002, "loss": 2.34, "step": 243450 }, { "epoch": 0.9411482735692969, "grad_norm": 0.10792674124240875, "learning_rate": 0.002, "loss": 2.3456, "step": 243460 }, { "epoch": 0.9411869307726802, "grad_norm": 0.1145927906036377, "learning_rate": 0.002, "loss": 2.3461, "step": 243470 }, { "epoch": 0.9412255879760635, "grad_norm": 0.10597149282693863, "learning_rate": 0.002, "loss": 2.3412, "step": 243480 }, { "epoch": 0.9412642451794467, "grad_norm": 0.09116151183843613, "learning_rate": 0.002, "loss": 2.3442, "step": 243490 }, { "epoch": 0.94130290238283, "grad_norm": 0.12441765516996384, "learning_rate": 0.002, "loss": 2.3324, "step": 243500 }, { "epoch": 0.9413415595862134, "grad_norm": 0.13671045005321503, "learning_rate": 0.002, "loss": 2.3429, "step": 243510 }, { "epoch": 0.9413802167895966, "grad_norm": 0.09610971063375473, "learning_rate": 0.002, "loss": 2.3437, "step": 243520 }, { "epoch": 0.9414188739929799, "grad_norm": 0.1013101115822792, "learning_rate": 0.002, "loss": 2.3484, "step": 243530 }, { "epoch": 0.9414575311963631, "grad_norm": 0.10055457800626755, "learning_rate": 0.002, "loss": 2.3362, "step": 243540 }, { "epoch": 0.9414961883997464, "grad_norm": 0.12221028655767441, "learning_rate": 0.002, "loss": 2.3392, "step": 243550 }, { "epoch": 0.9415348456031297, "grad_norm": 0.10323991626501083, "learning_rate": 0.002, "loss": 2.3302, "step": 243560 }, { "epoch": 0.941573502806513, "grad_norm": 0.103202685713768, "learning_rate": 0.002, "loss": 2.3344, "step": 243570 }, { "epoch": 0.9416121600098962, "grad_norm": 0.10308035463094711, "learning_rate": 0.002, "loss": 2.331, "step": 243580 }, { "epoch": 0.9416508172132795, "grad_norm": 0.09553807973861694, "learning_rate": 0.002, "loss": 2.337, "step": 243590 }, { "epoch": 0.9416894744166628, "grad_norm": 0.13683472573757172, "learning_rate": 0.002, "loss": 2.3517, "step": 243600 }, { "epoch": 0.9417281316200461, "grad_norm": 0.1049049124121666, "learning_rate": 0.002, "loss": 2.3464, "step": 243610 }, { "epoch": 0.9417667888234293, "grad_norm": 0.09826502948999405, "learning_rate": 0.002, "loss": 2.3286, "step": 243620 }, { "epoch": 0.9418054460268126, "grad_norm": 0.12797874212265015, "learning_rate": 0.002, "loss": 2.3278, "step": 243630 }, { "epoch": 0.941844103230196, "grad_norm": 0.10155646502971649, "learning_rate": 0.002, "loss": 2.3412, "step": 243640 }, { "epoch": 0.9418827604335792, "grad_norm": 0.10716505348682404, "learning_rate": 0.002, "loss": 2.3354, "step": 243650 }, { "epoch": 0.9419214176369625, "grad_norm": 0.13598762452602386, "learning_rate": 0.002, "loss": 2.3326, "step": 243660 }, { "epoch": 0.9419600748403457, "grad_norm": 0.08658332377672195, "learning_rate": 0.002, "loss": 2.3244, "step": 243670 }, { "epoch": 0.9419987320437291, "grad_norm": 0.12665392458438873, "learning_rate": 0.002, "loss": 2.3389, "step": 243680 }, { "epoch": 0.9420373892471123, "grad_norm": 0.10175671428442001, "learning_rate": 0.002, "loss": 2.3311, "step": 243690 }, { "epoch": 0.9420760464504956, "grad_norm": 0.10279948264360428, "learning_rate": 0.002, "loss": 2.34, "step": 243700 }, { "epoch": 0.9421147036538788, "grad_norm": 0.09339248389005661, "learning_rate": 0.002, "loss": 2.3455, "step": 243710 }, { "epoch": 0.9421533608572621, "grad_norm": 0.09639550000429153, "learning_rate": 0.002, "loss": 2.3309, "step": 243720 }, { "epoch": 0.9421920180606455, "grad_norm": 0.09585197269916534, "learning_rate": 0.002, "loss": 2.333, "step": 243730 }, { "epoch": 0.9422306752640287, "grad_norm": 0.09819405525922775, "learning_rate": 0.002, "loss": 2.3123, "step": 243740 }, { "epoch": 0.942269332467412, "grad_norm": 0.11895592510700226, "learning_rate": 0.002, "loss": 2.3372, "step": 243750 }, { "epoch": 0.9423079896707952, "grad_norm": 0.10645242035388947, "learning_rate": 0.002, "loss": 2.3549, "step": 243760 }, { "epoch": 0.9423466468741786, "grad_norm": 0.10574015229940414, "learning_rate": 0.002, "loss": 2.335, "step": 243770 }, { "epoch": 0.9423853040775618, "grad_norm": 0.11190999299287796, "learning_rate": 0.002, "loss": 2.3492, "step": 243780 }, { "epoch": 0.9424239612809451, "grad_norm": 0.11906064301729202, "learning_rate": 0.002, "loss": 2.3343, "step": 243790 }, { "epoch": 0.9424626184843283, "grad_norm": 0.10504163056612015, "learning_rate": 0.002, "loss": 2.3425, "step": 243800 }, { "epoch": 0.9425012756877117, "grad_norm": 0.0909421443939209, "learning_rate": 0.002, "loss": 2.3333, "step": 243810 }, { "epoch": 0.942539932891095, "grad_norm": 0.10777609050273895, "learning_rate": 0.002, "loss": 2.3392, "step": 243820 }, { "epoch": 0.9425785900944782, "grad_norm": 0.09569604694843292, "learning_rate": 0.002, "loss": 2.3345, "step": 243830 }, { "epoch": 0.9426172472978614, "grad_norm": 0.11758679896593094, "learning_rate": 0.002, "loss": 2.3284, "step": 243840 }, { "epoch": 0.9426559045012448, "grad_norm": 0.10496620833873749, "learning_rate": 0.002, "loss": 2.3225, "step": 243850 }, { "epoch": 0.9426945617046281, "grad_norm": 0.13090617954730988, "learning_rate": 0.002, "loss": 2.3346, "step": 243860 }, { "epoch": 0.9427332189080113, "grad_norm": 0.11926376819610596, "learning_rate": 0.002, "loss": 2.3162, "step": 243870 }, { "epoch": 0.9427718761113946, "grad_norm": 0.100855752825737, "learning_rate": 0.002, "loss": 2.3431, "step": 243880 }, { "epoch": 0.9428105333147778, "grad_norm": 0.10731156170368195, "learning_rate": 0.002, "loss": 2.3327, "step": 243890 }, { "epoch": 0.9428491905181612, "grad_norm": 0.09543273597955704, "learning_rate": 0.002, "loss": 2.3431, "step": 243900 }, { "epoch": 0.9428878477215444, "grad_norm": 0.08852459490299225, "learning_rate": 0.002, "loss": 2.3333, "step": 243910 }, { "epoch": 0.9429265049249277, "grad_norm": 0.13325460255146027, "learning_rate": 0.002, "loss": 2.3369, "step": 243920 }, { "epoch": 0.9429651621283109, "grad_norm": 0.0993916243314743, "learning_rate": 0.002, "loss": 2.3386, "step": 243930 }, { "epoch": 0.9430038193316943, "grad_norm": 0.10384912043809891, "learning_rate": 0.002, "loss": 2.3382, "step": 243940 }, { "epoch": 0.9430424765350776, "grad_norm": 0.13025276362895966, "learning_rate": 0.002, "loss": 2.3242, "step": 243950 }, { "epoch": 0.9430811337384608, "grad_norm": 0.21691864728927612, "learning_rate": 0.002, "loss": 2.3373, "step": 243960 }, { "epoch": 0.9431197909418441, "grad_norm": 0.09893051534891129, "learning_rate": 0.002, "loss": 2.3306, "step": 243970 }, { "epoch": 0.9431584481452274, "grad_norm": 0.11579374969005585, "learning_rate": 0.002, "loss": 2.3345, "step": 243980 }, { "epoch": 0.9431971053486107, "grad_norm": 0.09239792078733444, "learning_rate": 0.002, "loss": 2.3377, "step": 243990 }, { "epoch": 0.9432357625519939, "grad_norm": 0.1062285453081131, "learning_rate": 0.002, "loss": 2.3527, "step": 244000 }, { "epoch": 0.9432744197553772, "grad_norm": 0.08503497391939163, "learning_rate": 0.002, "loss": 2.3299, "step": 244010 }, { "epoch": 0.9433130769587605, "grad_norm": 0.1249237135052681, "learning_rate": 0.002, "loss": 2.3466, "step": 244020 }, { "epoch": 0.9433517341621438, "grad_norm": 0.10373333096504211, "learning_rate": 0.002, "loss": 2.3565, "step": 244030 }, { "epoch": 0.943390391365527, "grad_norm": 0.11731874942779541, "learning_rate": 0.002, "loss": 2.347, "step": 244040 }, { "epoch": 0.9434290485689103, "grad_norm": 0.11248892545700073, "learning_rate": 0.002, "loss": 2.3401, "step": 244050 }, { "epoch": 0.9434677057722937, "grad_norm": 0.0989983081817627, "learning_rate": 0.002, "loss": 2.3393, "step": 244060 }, { "epoch": 0.9435063629756769, "grad_norm": 0.09739810228347778, "learning_rate": 0.002, "loss": 2.3299, "step": 244070 }, { "epoch": 0.9435450201790602, "grad_norm": 0.12019892036914825, "learning_rate": 0.002, "loss": 2.3397, "step": 244080 }, { "epoch": 0.9435836773824434, "grad_norm": 0.09014622122049332, "learning_rate": 0.002, "loss": 2.3414, "step": 244090 }, { "epoch": 0.9436223345858267, "grad_norm": 0.10269775241613388, "learning_rate": 0.002, "loss": 2.3435, "step": 244100 }, { "epoch": 0.94366099178921, "grad_norm": 0.1557008922100067, "learning_rate": 0.002, "loss": 2.3297, "step": 244110 }, { "epoch": 0.9436996489925933, "grad_norm": 0.18072141706943512, "learning_rate": 0.002, "loss": 2.3343, "step": 244120 }, { "epoch": 0.9437383061959765, "grad_norm": 0.11029327660799026, "learning_rate": 0.002, "loss": 2.3374, "step": 244130 }, { "epoch": 0.9437769633993598, "grad_norm": 0.11124139279127121, "learning_rate": 0.002, "loss": 2.3339, "step": 244140 }, { "epoch": 0.9438156206027432, "grad_norm": 0.09908004850149155, "learning_rate": 0.002, "loss": 2.3308, "step": 244150 }, { "epoch": 0.9438542778061264, "grad_norm": 0.1112242117524147, "learning_rate": 0.002, "loss": 2.3252, "step": 244160 }, { "epoch": 0.9438929350095097, "grad_norm": 0.09297651797533035, "learning_rate": 0.002, "loss": 2.3347, "step": 244170 }, { "epoch": 0.9439315922128929, "grad_norm": 0.12331829965114594, "learning_rate": 0.002, "loss": 2.3578, "step": 244180 }, { "epoch": 0.9439702494162763, "grad_norm": 0.10122538357973099, "learning_rate": 0.002, "loss": 2.3339, "step": 244190 }, { "epoch": 0.9440089066196595, "grad_norm": 0.11608784645795822, "learning_rate": 0.002, "loss": 2.3351, "step": 244200 }, { "epoch": 0.9440475638230428, "grad_norm": 0.12176360189914703, "learning_rate": 0.002, "loss": 2.3324, "step": 244210 }, { "epoch": 0.944086221026426, "grad_norm": 0.11390931904315948, "learning_rate": 0.002, "loss": 2.3307, "step": 244220 }, { "epoch": 0.9441248782298094, "grad_norm": 0.10589968413114548, "learning_rate": 0.002, "loss": 2.322, "step": 244230 }, { "epoch": 0.9441635354331926, "grad_norm": 0.10728061944246292, "learning_rate": 0.002, "loss": 2.3256, "step": 244240 }, { "epoch": 0.9442021926365759, "grad_norm": 0.107032909989357, "learning_rate": 0.002, "loss": 2.3295, "step": 244250 }, { "epoch": 0.9442408498399592, "grad_norm": 0.09229406714439392, "learning_rate": 0.002, "loss": 2.3272, "step": 244260 }, { "epoch": 0.9442795070433424, "grad_norm": 0.09907346963882446, "learning_rate": 0.002, "loss": 2.3206, "step": 244270 }, { "epoch": 0.9443181642467258, "grad_norm": 0.10639627277851105, "learning_rate": 0.002, "loss": 2.3321, "step": 244280 }, { "epoch": 0.944356821450109, "grad_norm": 0.11334112286567688, "learning_rate": 0.002, "loss": 2.3392, "step": 244290 }, { "epoch": 0.9443954786534923, "grad_norm": 0.12863516807556152, "learning_rate": 0.002, "loss": 2.3491, "step": 244300 }, { "epoch": 0.9444341358568755, "grad_norm": 0.10524982959032059, "learning_rate": 0.002, "loss": 2.339, "step": 244310 }, { "epoch": 0.9444727930602589, "grad_norm": 0.10224920511245728, "learning_rate": 0.002, "loss": 2.3313, "step": 244320 }, { "epoch": 0.9445114502636421, "grad_norm": 0.11514956504106522, "learning_rate": 0.002, "loss": 2.3307, "step": 244330 }, { "epoch": 0.9445501074670254, "grad_norm": 0.10550584644079208, "learning_rate": 0.002, "loss": 2.3469, "step": 244340 }, { "epoch": 0.9445887646704086, "grad_norm": 0.10284145176410675, "learning_rate": 0.002, "loss": 2.3391, "step": 244350 }, { "epoch": 0.944627421873792, "grad_norm": 0.12884284555912018, "learning_rate": 0.002, "loss": 2.3375, "step": 244360 }, { "epoch": 0.9446660790771753, "grad_norm": 0.1060062125325203, "learning_rate": 0.002, "loss": 2.3394, "step": 244370 }, { "epoch": 0.9447047362805585, "grad_norm": 0.12581922113895416, "learning_rate": 0.002, "loss": 2.3319, "step": 244380 }, { "epoch": 0.9447433934839418, "grad_norm": 0.11938612908124924, "learning_rate": 0.002, "loss": 2.3359, "step": 244390 }, { "epoch": 0.9447820506873251, "grad_norm": 0.1125989630818367, "learning_rate": 0.002, "loss": 2.344, "step": 244400 }, { "epoch": 0.9448207078907084, "grad_norm": 0.11721620708703995, "learning_rate": 0.002, "loss": 2.3392, "step": 244410 }, { "epoch": 0.9448593650940916, "grad_norm": 0.09430734813213348, "learning_rate": 0.002, "loss": 2.3506, "step": 244420 }, { "epoch": 0.9448980222974749, "grad_norm": 0.09782515466213226, "learning_rate": 0.002, "loss": 2.3383, "step": 244430 }, { "epoch": 0.9449366795008582, "grad_norm": 0.09499527513980865, "learning_rate": 0.002, "loss": 2.3497, "step": 244440 }, { "epoch": 0.9449753367042415, "grad_norm": 0.15276183187961578, "learning_rate": 0.002, "loss": 2.3462, "step": 244450 }, { "epoch": 0.9450139939076247, "grad_norm": 0.10129997134208679, "learning_rate": 0.002, "loss": 2.3496, "step": 244460 }, { "epoch": 0.945052651111008, "grad_norm": 0.10709690302610397, "learning_rate": 0.002, "loss": 2.327, "step": 244470 }, { "epoch": 0.9450913083143913, "grad_norm": 0.09405800700187683, "learning_rate": 0.002, "loss": 2.3531, "step": 244480 }, { "epoch": 0.9451299655177746, "grad_norm": 0.1307350993156433, "learning_rate": 0.002, "loss": 2.3287, "step": 244490 }, { "epoch": 0.9451686227211579, "grad_norm": 0.09851517528295517, "learning_rate": 0.002, "loss": 2.323, "step": 244500 }, { "epoch": 0.9452072799245411, "grad_norm": 0.1036297008395195, "learning_rate": 0.002, "loss": 2.3373, "step": 244510 }, { "epoch": 0.9452459371279244, "grad_norm": 0.11205063760280609, "learning_rate": 0.002, "loss": 2.3455, "step": 244520 }, { "epoch": 0.9452845943313077, "grad_norm": 0.12469026446342468, "learning_rate": 0.002, "loss": 2.3495, "step": 244530 }, { "epoch": 0.945323251534691, "grad_norm": 0.11297151446342468, "learning_rate": 0.002, "loss": 2.3465, "step": 244540 }, { "epoch": 0.9453619087380742, "grad_norm": 0.13889361917972565, "learning_rate": 0.002, "loss": 2.3317, "step": 244550 }, { "epoch": 0.9454005659414575, "grad_norm": 0.10334096103906631, "learning_rate": 0.002, "loss": 2.3422, "step": 244560 }, { "epoch": 0.9454392231448409, "grad_norm": 0.0992535948753357, "learning_rate": 0.002, "loss": 2.3484, "step": 244570 }, { "epoch": 0.9454778803482241, "grad_norm": 0.09402686357498169, "learning_rate": 0.002, "loss": 2.3466, "step": 244580 }, { "epoch": 0.9455165375516074, "grad_norm": 0.14916792511940002, "learning_rate": 0.002, "loss": 2.3428, "step": 244590 }, { "epoch": 0.9455551947549906, "grad_norm": 0.10576571524143219, "learning_rate": 0.002, "loss": 2.3326, "step": 244600 }, { "epoch": 0.945593851958374, "grad_norm": 0.11637571454048157, "learning_rate": 0.002, "loss": 2.3329, "step": 244610 }, { "epoch": 0.9456325091617572, "grad_norm": 0.13626307249069214, "learning_rate": 0.002, "loss": 2.3235, "step": 244620 }, { "epoch": 0.9456711663651405, "grad_norm": 0.09241092205047607, "learning_rate": 0.002, "loss": 2.3402, "step": 244630 }, { "epoch": 0.9457098235685237, "grad_norm": 0.13166047632694244, "learning_rate": 0.002, "loss": 2.3389, "step": 244640 }, { "epoch": 0.945748480771907, "grad_norm": 0.11197197437286377, "learning_rate": 0.002, "loss": 2.3505, "step": 244650 }, { "epoch": 0.9457871379752903, "grad_norm": 0.09408937394618988, "learning_rate": 0.002, "loss": 2.3345, "step": 244660 }, { "epoch": 0.9458257951786736, "grad_norm": 0.09625637531280518, "learning_rate": 0.002, "loss": 2.3429, "step": 244670 }, { "epoch": 0.9458644523820569, "grad_norm": 0.3478195369243622, "learning_rate": 0.002, "loss": 2.3334, "step": 244680 }, { "epoch": 0.9459031095854401, "grad_norm": 0.10369176417589188, "learning_rate": 0.002, "loss": 2.3542, "step": 244690 }, { "epoch": 0.9459417667888235, "grad_norm": 0.09355360269546509, "learning_rate": 0.002, "loss": 2.3357, "step": 244700 }, { "epoch": 0.9459804239922067, "grad_norm": 0.10981659591197968, "learning_rate": 0.002, "loss": 2.3464, "step": 244710 }, { "epoch": 0.94601908119559, "grad_norm": 0.1039636954665184, "learning_rate": 0.002, "loss": 2.3381, "step": 244720 }, { "epoch": 0.9460577383989732, "grad_norm": 0.1111103966832161, "learning_rate": 0.002, "loss": 2.3364, "step": 244730 }, { "epoch": 0.9460963956023566, "grad_norm": 0.1148524358868599, "learning_rate": 0.002, "loss": 2.3473, "step": 244740 }, { "epoch": 0.9461350528057398, "grad_norm": 0.0972360372543335, "learning_rate": 0.002, "loss": 2.3396, "step": 244750 }, { "epoch": 0.9461737100091231, "grad_norm": 0.11069841682910919, "learning_rate": 0.002, "loss": 2.3368, "step": 244760 }, { "epoch": 0.9462123672125063, "grad_norm": 0.09903131425380707, "learning_rate": 0.002, "loss": 2.3293, "step": 244770 }, { "epoch": 0.9462510244158897, "grad_norm": 0.10690746456384659, "learning_rate": 0.002, "loss": 2.3399, "step": 244780 }, { "epoch": 0.946289681619273, "grad_norm": 0.12674789130687714, "learning_rate": 0.002, "loss": 2.3397, "step": 244790 }, { "epoch": 0.9463283388226562, "grad_norm": 0.10173221677541733, "learning_rate": 0.002, "loss": 2.3426, "step": 244800 }, { "epoch": 0.9463669960260395, "grad_norm": 0.12079965323209763, "learning_rate": 0.002, "loss": 2.3347, "step": 244810 }, { "epoch": 0.9464056532294227, "grad_norm": 0.10707973688840866, "learning_rate": 0.002, "loss": 2.3269, "step": 244820 }, { "epoch": 0.9464443104328061, "grad_norm": 0.0917050912976265, "learning_rate": 0.002, "loss": 2.3267, "step": 244830 }, { "epoch": 0.9464829676361893, "grad_norm": 0.10524480789899826, "learning_rate": 0.002, "loss": 2.34, "step": 244840 }, { "epoch": 0.9465216248395726, "grad_norm": 0.10164222121238708, "learning_rate": 0.002, "loss": 2.3261, "step": 244850 }, { "epoch": 0.9465602820429558, "grad_norm": 0.09384514391422272, "learning_rate": 0.002, "loss": 2.3378, "step": 244860 }, { "epoch": 0.9465989392463392, "grad_norm": 0.10381849110126495, "learning_rate": 0.002, "loss": 2.3619, "step": 244870 }, { "epoch": 0.9466375964497225, "grad_norm": 0.10301042348146439, "learning_rate": 0.002, "loss": 2.3379, "step": 244880 }, { "epoch": 0.9466762536531057, "grad_norm": 0.09503357857465744, "learning_rate": 0.002, "loss": 2.34, "step": 244890 }, { "epoch": 0.946714910856489, "grad_norm": 0.164928138256073, "learning_rate": 0.002, "loss": 2.333, "step": 244900 }, { "epoch": 0.9467535680598723, "grad_norm": 0.11334381252527237, "learning_rate": 0.002, "loss": 2.3479, "step": 244910 }, { "epoch": 0.9467922252632556, "grad_norm": 0.11500794440507889, "learning_rate": 0.002, "loss": 2.3278, "step": 244920 }, { "epoch": 0.9468308824666388, "grad_norm": 0.10959529131650925, "learning_rate": 0.002, "loss": 2.3402, "step": 244930 }, { "epoch": 0.9468695396700221, "grad_norm": 0.10082434117794037, "learning_rate": 0.002, "loss": 2.3377, "step": 244940 }, { "epoch": 0.9469081968734054, "grad_norm": 0.1229538545012474, "learning_rate": 0.002, "loss": 2.3448, "step": 244950 }, { "epoch": 0.9469468540767887, "grad_norm": 0.11257387697696686, "learning_rate": 0.002, "loss": 2.3373, "step": 244960 }, { "epoch": 0.9469855112801719, "grad_norm": 0.08525936305522919, "learning_rate": 0.002, "loss": 2.3379, "step": 244970 }, { "epoch": 0.9470241684835552, "grad_norm": 0.12079308927059174, "learning_rate": 0.002, "loss": 2.3453, "step": 244980 }, { "epoch": 0.9470628256869386, "grad_norm": 0.09743160009384155, "learning_rate": 0.002, "loss": 2.3365, "step": 244990 }, { "epoch": 0.9471014828903218, "grad_norm": 0.10047302395105362, "learning_rate": 0.002, "loss": 2.3453, "step": 245000 }, { "epoch": 0.9471401400937051, "grad_norm": 0.10854795575141907, "learning_rate": 0.002, "loss": 2.352, "step": 245010 }, { "epoch": 0.9471787972970883, "grad_norm": 0.11618287861347198, "learning_rate": 0.002, "loss": 2.3384, "step": 245020 }, { "epoch": 0.9472174545004716, "grad_norm": 0.10113832354545593, "learning_rate": 0.002, "loss": 2.3343, "step": 245030 }, { "epoch": 0.9472561117038549, "grad_norm": 0.2170974165201187, "learning_rate": 0.002, "loss": 2.3465, "step": 245040 }, { "epoch": 0.9472947689072382, "grad_norm": 0.10892549902200699, "learning_rate": 0.002, "loss": 2.3433, "step": 245050 }, { "epoch": 0.9473334261106214, "grad_norm": 0.09819786250591278, "learning_rate": 0.002, "loss": 2.3479, "step": 245060 }, { "epoch": 0.9473720833140047, "grad_norm": 0.12030044943094254, "learning_rate": 0.002, "loss": 2.3401, "step": 245070 }, { "epoch": 0.947410740517388, "grad_norm": 0.1042487770318985, "learning_rate": 0.002, "loss": 2.3352, "step": 245080 }, { "epoch": 0.9474493977207713, "grad_norm": 0.11309467256069183, "learning_rate": 0.002, "loss": 2.337, "step": 245090 }, { "epoch": 0.9474880549241546, "grad_norm": 0.10138697177171707, "learning_rate": 0.002, "loss": 2.3329, "step": 245100 }, { "epoch": 0.9475267121275378, "grad_norm": 0.09876588732004166, "learning_rate": 0.002, "loss": 2.3448, "step": 245110 }, { "epoch": 0.9475653693309212, "grad_norm": 0.11651773750782013, "learning_rate": 0.002, "loss": 2.3354, "step": 245120 }, { "epoch": 0.9476040265343044, "grad_norm": 0.11628615111112595, "learning_rate": 0.002, "loss": 2.3323, "step": 245130 }, { "epoch": 0.9476426837376877, "grad_norm": 0.10182247310876846, "learning_rate": 0.002, "loss": 2.3475, "step": 245140 }, { "epoch": 0.9476813409410709, "grad_norm": 0.09684959799051285, "learning_rate": 0.002, "loss": 2.3382, "step": 245150 }, { "epoch": 0.9477199981444543, "grad_norm": 0.10772944241762161, "learning_rate": 0.002, "loss": 2.342, "step": 245160 }, { "epoch": 0.9477586553478375, "grad_norm": 0.10510464757680893, "learning_rate": 0.002, "loss": 2.3478, "step": 245170 }, { "epoch": 0.9477973125512208, "grad_norm": 0.10853858292102814, "learning_rate": 0.002, "loss": 2.3546, "step": 245180 }, { "epoch": 0.947835969754604, "grad_norm": 0.10508356243371964, "learning_rate": 0.002, "loss": 2.3358, "step": 245190 }, { "epoch": 0.9478746269579873, "grad_norm": 0.10015291720628738, "learning_rate": 0.002, "loss": 2.3276, "step": 245200 }, { "epoch": 0.9479132841613707, "grad_norm": 0.10214115679264069, "learning_rate": 0.002, "loss": 2.3339, "step": 245210 }, { "epoch": 0.9479519413647539, "grad_norm": 0.11255674064159393, "learning_rate": 0.002, "loss": 2.3364, "step": 245220 }, { "epoch": 0.9479905985681372, "grad_norm": 0.10145910829305649, "learning_rate": 0.002, "loss": 2.349, "step": 245230 }, { "epoch": 0.9480292557715204, "grad_norm": 0.11434584110975266, "learning_rate": 0.002, "loss": 2.3633, "step": 245240 }, { "epoch": 0.9480679129749038, "grad_norm": 0.11106500774621964, "learning_rate": 0.002, "loss": 2.3351, "step": 245250 }, { "epoch": 0.948106570178287, "grad_norm": 0.09371323138475418, "learning_rate": 0.002, "loss": 2.3428, "step": 245260 }, { "epoch": 0.9481452273816703, "grad_norm": 0.10655788332223892, "learning_rate": 0.002, "loss": 2.3362, "step": 245270 }, { "epoch": 0.9481838845850535, "grad_norm": 0.10724660009145737, "learning_rate": 0.002, "loss": 2.3406, "step": 245280 }, { "epoch": 0.9482225417884369, "grad_norm": 0.10476400703191757, "learning_rate": 0.002, "loss": 2.3423, "step": 245290 }, { "epoch": 0.9482611989918202, "grad_norm": 0.10649838298559189, "learning_rate": 0.002, "loss": 2.3286, "step": 245300 }, { "epoch": 0.9482998561952034, "grad_norm": 0.10631929337978363, "learning_rate": 0.002, "loss": 2.3354, "step": 245310 }, { "epoch": 0.9483385133985867, "grad_norm": 0.11776553839445114, "learning_rate": 0.002, "loss": 2.344, "step": 245320 }, { "epoch": 0.94837717060197, "grad_norm": 0.11228649318218231, "learning_rate": 0.002, "loss": 2.336, "step": 245330 }, { "epoch": 0.9484158278053533, "grad_norm": 0.10142796486616135, "learning_rate": 0.002, "loss": 2.3376, "step": 245340 }, { "epoch": 0.9484544850087365, "grad_norm": 0.11260076612234116, "learning_rate": 0.002, "loss": 2.3331, "step": 245350 }, { "epoch": 0.9484931422121198, "grad_norm": 0.10552968829870224, "learning_rate": 0.002, "loss": 2.3458, "step": 245360 }, { "epoch": 0.948531799415503, "grad_norm": 0.08846011757850647, "learning_rate": 0.002, "loss": 2.3451, "step": 245370 }, { "epoch": 0.9485704566188864, "grad_norm": 0.11995434761047363, "learning_rate": 0.002, "loss": 2.3388, "step": 245380 }, { "epoch": 0.9486091138222696, "grad_norm": 0.10945376753807068, "learning_rate": 0.002, "loss": 2.3563, "step": 245390 }, { "epoch": 0.9486477710256529, "grad_norm": 0.10176488757133484, "learning_rate": 0.002, "loss": 2.3518, "step": 245400 }, { "epoch": 0.9486864282290361, "grad_norm": 0.10432841628789902, "learning_rate": 0.002, "loss": 2.3455, "step": 245410 }, { "epoch": 0.9487250854324195, "grad_norm": 0.10382282733917236, "learning_rate": 0.002, "loss": 2.3448, "step": 245420 }, { "epoch": 0.9487637426358028, "grad_norm": 0.09938489645719528, "learning_rate": 0.002, "loss": 2.3339, "step": 245430 }, { "epoch": 0.948802399839186, "grad_norm": 0.10734734684228897, "learning_rate": 0.002, "loss": 2.3433, "step": 245440 }, { "epoch": 0.9488410570425693, "grad_norm": 0.11344361305236816, "learning_rate": 0.002, "loss": 2.3329, "step": 245450 }, { "epoch": 0.9488797142459526, "grad_norm": 0.09626557677984238, "learning_rate": 0.002, "loss": 2.3321, "step": 245460 }, { "epoch": 0.9489183714493359, "grad_norm": 0.10190510004758835, "learning_rate": 0.002, "loss": 2.336, "step": 245470 }, { "epoch": 0.9489570286527191, "grad_norm": 0.10002933442592621, "learning_rate": 0.002, "loss": 2.3347, "step": 245480 }, { "epoch": 0.9489956858561024, "grad_norm": 0.09390109032392502, "learning_rate": 0.002, "loss": 2.3295, "step": 245490 }, { "epoch": 0.9490343430594858, "grad_norm": 0.1125788688659668, "learning_rate": 0.002, "loss": 2.3212, "step": 245500 }, { "epoch": 0.949073000262869, "grad_norm": 0.09098855406045914, "learning_rate": 0.002, "loss": 2.3527, "step": 245510 }, { "epoch": 0.9491116574662523, "grad_norm": 0.09397424012422562, "learning_rate": 0.002, "loss": 2.3477, "step": 245520 }, { "epoch": 0.9491503146696355, "grad_norm": 0.1099240854382515, "learning_rate": 0.002, "loss": 2.3543, "step": 245530 }, { "epoch": 0.9491889718730189, "grad_norm": 0.10843119025230408, "learning_rate": 0.002, "loss": 2.3405, "step": 245540 }, { "epoch": 0.9492276290764021, "grad_norm": 0.11342742294073105, "learning_rate": 0.002, "loss": 2.3459, "step": 245550 }, { "epoch": 0.9492662862797854, "grad_norm": 0.1284036487340927, "learning_rate": 0.002, "loss": 2.3443, "step": 245560 }, { "epoch": 0.9493049434831686, "grad_norm": 0.09022320806980133, "learning_rate": 0.002, "loss": 2.3328, "step": 245570 }, { "epoch": 0.9493436006865519, "grad_norm": 0.09618698060512543, "learning_rate": 0.002, "loss": 2.3324, "step": 245580 }, { "epoch": 0.9493822578899352, "grad_norm": 0.09820694476366043, "learning_rate": 0.002, "loss": 2.3415, "step": 245590 }, { "epoch": 0.9494209150933185, "grad_norm": 0.10224698483943939, "learning_rate": 0.002, "loss": 2.3379, "step": 245600 }, { "epoch": 0.9494595722967017, "grad_norm": 0.11915197968482971, "learning_rate": 0.002, "loss": 2.3257, "step": 245610 }, { "epoch": 0.949498229500085, "grad_norm": 0.11021997034549713, "learning_rate": 0.002, "loss": 2.344, "step": 245620 }, { "epoch": 0.9495368867034684, "grad_norm": 0.11132847517728806, "learning_rate": 0.002, "loss": 2.3365, "step": 245630 }, { "epoch": 0.9495755439068516, "grad_norm": 0.1295585036277771, "learning_rate": 0.002, "loss": 2.3454, "step": 245640 }, { "epoch": 0.9496142011102349, "grad_norm": 0.10676326602697372, "learning_rate": 0.002, "loss": 2.3375, "step": 245650 }, { "epoch": 0.9496528583136181, "grad_norm": 0.10247169435024261, "learning_rate": 0.002, "loss": 2.3383, "step": 245660 }, { "epoch": 0.9496915155170015, "grad_norm": 0.11274540424346924, "learning_rate": 0.002, "loss": 2.3369, "step": 245670 }, { "epoch": 0.9497301727203847, "grad_norm": 0.0945650115609169, "learning_rate": 0.002, "loss": 2.3392, "step": 245680 }, { "epoch": 0.949768829923768, "grad_norm": 0.09697515517473221, "learning_rate": 0.002, "loss": 2.3514, "step": 245690 }, { "epoch": 0.9498074871271512, "grad_norm": 0.10053026676177979, "learning_rate": 0.002, "loss": 2.3333, "step": 245700 }, { "epoch": 0.9498461443305346, "grad_norm": 0.10225459188222885, "learning_rate": 0.002, "loss": 2.3454, "step": 245710 }, { "epoch": 0.9498848015339179, "grad_norm": 0.10824661701917648, "learning_rate": 0.002, "loss": 2.3557, "step": 245720 }, { "epoch": 0.9499234587373011, "grad_norm": 0.09760218113660812, "learning_rate": 0.002, "loss": 2.3337, "step": 245730 }, { "epoch": 0.9499621159406844, "grad_norm": 0.09142498672008514, "learning_rate": 0.002, "loss": 2.3432, "step": 245740 }, { "epoch": 0.9500007731440676, "grad_norm": 0.11499843001365662, "learning_rate": 0.002, "loss": 2.3399, "step": 245750 }, { "epoch": 0.950039430347451, "grad_norm": 0.09203146398067474, "learning_rate": 0.002, "loss": 2.3362, "step": 245760 }, { "epoch": 0.9500780875508342, "grad_norm": 0.11725558340549469, "learning_rate": 0.002, "loss": 2.3397, "step": 245770 }, { "epoch": 0.9501167447542175, "grad_norm": 0.11182353645563126, "learning_rate": 0.002, "loss": 2.325, "step": 245780 }, { "epoch": 0.9501554019576007, "grad_norm": 0.09739293158054352, "learning_rate": 0.002, "loss": 2.3256, "step": 245790 }, { "epoch": 0.9501940591609841, "grad_norm": 0.11021154373884201, "learning_rate": 0.002, "loss": 2.3307, "step": 245800 }, { "epoch": 0.9502327163643673, "grad_norm": 0.0996776819229126, "learning_rate": 0.002, "loss": 2.336, "step": 245810 }, { "epoch": 0.9502713735677506, "grad_norm": 0.10235131531953812, "learning_rate": 0.002, "loss": 2.3472, "step": 245820 }, { "epoch": 0.9503100307711339, "grad_norm": 0.10729663819074631, "learning_rate": 0.002, "loss": 2.3305, "step": 245830 }, { "epoch": 0.9503486879745172, "grad_norm": 0.10996519029140472, "learning_rate": 0.002, "loss": 2.3443, "step": 245840 }, { "epoch": 0.9503873451779005, "grad_norm": 0.10459493100643158, "learning_rate": 0.002, "loss": 2.3462, "step": 245850 }, { "epoch": 0.9504260023812837, "grad_norm": 0.11086221784353256, "learning_rate": 0.002, "loss": 2.3412, "step": 245860 }, { "epoch": 0.950464659584667, "grad_norm": 0.0975433811545372, "learning_rate": 0.002, "loss": 2.336, "step": 245870 }, { "epoch": 0.9505033167880503, "grad_norm": 0.10561605542898178, "learning_rate": 0.002, "loss": 2.3257, "step": 245880 }, { "epoch": 0.9505419739914336, "grad_norm": 0.11538262665271759, "learning_rate": 0.002, "loss": 2.3293, "step": 245890 }, { "epoch": 0.9505806311948168, "grad_norm": 0.20161397755146027, "learning_rate": 0.002, "loss": 2.3555, "step": 245900 }, { "epoch": 0.9506192883982001, "grad_norm": 0.11669044941663742, "learning_rate": 0.002, "loss": 2.3396, "step": 245910 }, { "epoch": 0.9506579456015835, "grad_norm": 0.09776271134614944, "learning_rate": 0.002, "loss": 2.3372, "step": 245920 }, { "epoch": 0.9506966028049667, "grad_norm": 0.1042805090546608, "learning_rate": 0.002, "loss": 2.3218, "step": 245930 }, { "epoch": 0.95073526000835, "grad_norm": 0.15709929168224335, "learning_rate": 0.002, "loss": 2.3546, "step": 245940 }, { "epoch": 0.9507739172117332, "grad_norm": 0.11055158823728561, "learning_rate": 0.002, "loss": 2.3281, "step": 245950 }, { "epoch": 0.9508125744151165, "grad_norm": 0.12504521012306213, "learning_rate": 0.002, "loss": 2.3481, "step": 245960 }, { "epoch": 0.9508512316184998, "grad_norm": 0.09743013232946396, "learning_rate": 0.002, "loss": 2.3701, "step": 245970 }, { "epoch": 0.9508898888218831, "grad_norm": 0.11348041146993637, "learning_rate": 0.002, "loss": 2.3419, "step": 245980 }, { "epoch": 0.9509285460252663, "grad_norm": 0.0906619057059288, "learning_rate": 0.002, "loss": 2.326, "step": 245990 }, { "epoch": 0.9509672032286496, "grad_norm": 0.10841189324855804, "learning_rate": 0.002, "loss": 2.3346, "step": 246000 }, { "epoch": 0.951005860432033, "grad_norm": 0.09922589361667633, "learning_rate": 0.002, "loss": 2.3529, "step": 246010 }, { "epoch": 0.9510445176354162, "grad_norm": 0.11780747771263123, "learning_rate": 0.002, "loss": 2.3465, "step": 246020 }, { "epoch": 0.9510831748387994, "grad_norm": 0.11119019240140915, "learning_rate": 0.002, "loss": 2.3361, "step": 246030 }, { "epoch": 0.9511218320421827, "grad_norm": 0.09635666012763977, "learning_rate": 0.002, "loss": 2.3381, "step": 246040 }, { "epoch": 0.9511604892455661, "grad_norm": 0.10957963764667511, "learning_rate": 0.002, "loss": 2.3533, "step": 246050 }, { "epoch": 0.9511991464489493, "grad_norm": 0.10559819638729095, "learning_rate": 0.002, "loss": 2.3395, "step": 246060 }, { "epoch": 0.9512378036523326, "grad_norm": 0.12483032792806625, "learning_rate": 0.002, "loss": 2.3403, "step": 246070 }, { "epoch": 0.9512764608557158, "grad_norm": 0.09919989109039307, "learning_rate": 0.002, "loss": 2.3293, "step": 246080 }, { "epoch": 0.9513151180590992, "grad_norm": 0.14158926904201508, "learning_rate": 0.002, "loss": 2.3509, "step": 246090 }, { "epoch": 0.9513537752624824, "grad_norm": 0.09532709419727325, "learning_rate": 0.002, "loss": 2.3332, "step": 246100 }, { "epoch": 0.9513924324658657, "grad_norm": 0.11871691048145294, "learning_rate": 0.002, "loss": 2.3471, "step": 246110 }, { "epoch": 0.9514310896692489, "grad_norm": 0.10590434819459915, "learning_rate": 0.002, "loss": 2.3452, "step": 246120 }, { "epoch": 0.9514697468726322, "grad_norm": 0.09818745404481888, "learning_rate": 0.002, "loss": 2.345, "step": 246130 }, { "epoch": 0.9515084040760156, "grad_norm": 0.09181661903858185, "learning_rate": 0.002, "loss": 2.3346, "step": 246140 }, { "epoch": 0.9515470612793988, "grad_norm": 0.11111031472682953, "learning_rate": 0.002, "loss": 2.3546, "step": 246150 }, { "epoch": 0.9515857184827821, "grad_norm": 0.10768986493349075, "learning_rate": 0.002, "loss": 2.3417, "step": 246160 }, { "epoch": 0.9516243756861653, "grad_norm": 0.11306337267160416, "learning_rate": 0.002, "loss": 2.3212, "step": 246170 }, { "epoch": 0.9516630328895487, "grad_norm": 0.12405366450548172, "learning_rate": 0.002, "loss": 2.3328, "step": 246180 }, { "epoch": 0.9517016900929319, "grad_norm": 0.12463579326868057, "learning_rate": 0.002, "loss": 2.3535, "step": 246190 }, { "epoch": 0.9517403472963152, "grad_norm": 0.09911695122718811, "learning_rate": 0.002, "loss": 2.3351, "step": 246200 }, { "epoch": 0.9517790044996984, "grad_norm": 0.09321106225252151, "learning_rate": 0.002, "loss": 2.3441, "step": 246210 }, { "epoch": 0.9518176617030818, "grad_norm": 0.10201654583215714, "learning_rate": 0.002, "loss": 2.3316, "step": 246220 }, { "epoch": 0.951856318906465, "grad_norm": 0.11066223680973053, "learning_rate": 0.002, "loss": 2.3211, "step": 246230 }, { "epoch": 0.9518949761098483, "grad_norm": 0.10625241696834564, "learning_rate": 0.002, "loss": 2.3271, "step": 246240 }, { "epoch": 0.9519336333132316, "grad_norm": 0.10942903161048889, "learning_rate": 0.002, "loss": 2.3426, "step": 246250 }, { "epoch": 0.9519722905166149, "grad_norm": 0.08949451148509979, "learning_rate": 0.002, "loss": 2.3543, "step": 246260 }, { "epoch": 0.9520109477199982, "grad_norm": 0.10243234783411026, "learning_rate": 0.002, "loss": 2.3211, "step": 246270 }, { "epoch": 0.9520496049233814, "grad_norm": 0.09917983412742615, "learning_rate": 0.002, "loss": 2.3353, "step": 246280 }, { "epoch": 0.9520882621267647, "grad_norm": 0.37809163331985474, "learning_rate": 0.002, "loss": 2.3376, "step": 246290 }, { "epoch": 0.9521269193301479, "grad_norm": 0.09876684844493866, "learning_rate": 0.002, "loss": 2.3432, "step": 246300 }, { "epoch": 0.9521655765335313, "grad_norm": 0.09615564346313477, "learning_rate": 0.002, "loss": 2.3429, "step": 246310 }, { "epoch": 0.9522042337369145, "grad_norm": 0.09892557561397552, "learning_rate": 0.002, "loss": 2.3438, "step": 246320 }, { "epoch": 0.9522428909402978, "grad_norm": 0.12730903923511505, "learning_rate": 0.002, "loss": 2.3416, "step": 246330 }, { "epoch": 0.952281548143681, "grad_norm": 0.09999363869428635, "learning_rate": 0.002, "loss": 2.3298, "step": 246340 }, { "epoch": 0.9523202053470644, "grad_norm": 0.11508851498365402, "learning_rate": 0.002, "loss": 2.3443, "step": 246350 }, { "epoch": 0.9523588625504477, "grad_norm": 0.10577461868524551, "learning_rate": 0.002, "loss": 2.3362, "step": 246360 }, { "epoch": 0.9523975197538309, "grad_norm": 0.08935672789812088, "learning_rate": 0.002, "loss": 2.3216, "step": 246370 }, { "epoch": 0.9524361769572142, "grad_norm": 0.11016064137220383, "learning_rate": 0.002, "loss": 2.3462, "step": 246380 }, { "epoch": 0.9524748341605975, "grad_norm": 0.10751229524612427, "learning_rate": 0.002, "loss": 2.3396, "step": 246390 }, { "epoch": 0.9525134913639808, "grad_norm": 0.10001815110445023, "learning_rate": 0.002, "loss": 2.3404, "step": 246400 }, { "epoch": 0.952552148567364, "grad_norm": 0.10038241744041443, "learning_rate": 0.002, "loss": 2.3411, "step": 246410 }, { "epoch": 0.9525908057707473, "grad_norm": 0.10372153669595718, "learning_rate": 0.002, "loss": 2.3483, "step": 246420 }, { "epoch": 0.9526294629741306, "grad_norm": 0.10932927578687668, "learning_rate": 0.002, "loss": 2.3425, "step": 246430 }, { "epoch": 0.9526681201775139, "grad_norm": 0.13651612401008606, "learning_rate": 0.002, "loss": 2.3267, "step": 246440 }, { "epoch": 0.9527067773808972, "grad_norm": 0.09618911892175674, "learning_rate": 0.002, "loss": 2.343, "step": 246450 }, { "epoch": 0.9527454345842804, "grad_norm": 0.10881288349628448, "learning_rate": 0.002, "loss": 2.3385, "step": 246460 }, { "epoch": 0.9527840917876638, "grad_norm": 0.09795135259628296, "learning_rate": 0.002, "loss": 2.3333, "step": 246470 }, { "epoch": 0.952822748991047, "grad_norm": 0.12024227529764175, "learning_rate": 0.002, "loss": 2.3197, "step": 246480 }, { "epoch": 0.9528614061944303, "grad_norm": 0.10273991525173187, "learning_rate": 0.002, "loss": 2.3304, "step": 246490 }, { "epoch": 0.9529000633978135, "grad_norm": 0.08923361450433731, "learning_rate": 0.002, "loss": 2.3433, "step": 246500 }, { "epoch": 0.9529387206011968, "grad_norm": 0.1068204715847969, "learning_rate": 0.002, "loss": 2.336, "step": 246510 }, { "epoch": 0.9529773778045801, "grad_norm": 0.11845804005861282, "learning_rate": 0.002, "loss": 2.3502, "step": 246520 }, { "epoch": 0.9530160350079634, "grad_norm": 0.09708742052316666, "learning_rate": 0.002, "loss": 2.3441, "step": 246530 }, { "epoch": 0.9530546922113466, "grad_norm": 0.09450454264879227, "learning_rate": 0.002, "loss": 2.3536, "step": 246540 }, { "epoch": 0.9530933494147299, "grad_norm": 0.09565430879592896, "learning_rate": 0.002, "loss": 2.3334, "step": 246550 }, { "epoch": 0.9531320066181133, "grad_norm": 0.11724113672971725, "learning_rate": 0.002, "loss": 2.3426, "step": 246560 }, { "epoch": 0.9531706638214965, "grad_norm": 0.10473284125328064, "learning_rate": 0.002, "loss": 2.3348, "step": 246570 }, { "epoch": 0.9532093210248798, "grad_norm": 0.11020340025424957, "learning_rate": 0.002, "loss": 2.3395, "step": 246580 }, { "epoch": 0.953247978228263, "grad_norm": 0.09498760104179382, "learning_rate": 0.002, "loss": 2.3399, "step": 246590 }, { "epoch": 0.9532866354316464, "grad_norm": 0.09037910401821136, "learning_rate": 0.002, "loss": 2.3336, "step": 246600 }, { "epoch": 0.9533252926350296, "grad_norm": 0.09975023567676544, "learning_rate": 0.002, "loss": 2.3427, "step": 246610 }, { "epoch": 0.9533639498384129, "grad_norm": 0.1050228402018547, "learning_rate": 0.002, "loss": 2.3392, "step": 246620 }, { "epoch": 0.9534026070417961, "grad_norm": 0.10599356889724731, "learning_rate": 0.002, "loss": 2.3442, "step": 246630 }, { "epoch": 0.9534412642451795, "grad_norm": 0.10019141435623169, "learning_rate": 0.002, "loss": 2.325, "step": 246640 }, { "epoch": 0.9534799214485628, "grad_norm": 0.10532790422439575, "learning_rate": 0.002, "loss": 2.3443, "step": 246650 }, { "epoch": 0.953518578651946, "grad_norm": 0.11821125447750092, "learning_rate": 0.002, "loss": 2.3328, "step": 246660 }, { "epoch": 0.9535572358553293, "grad_norm": 0.10337822139263153, "learning_rate": 0.002, "loss": 2.3373, "step": 246670 }, { "epoch": 0.9535958930587125, "grad_norm": 0.13625840842723846, "learning_rate": 0.002, "loss": 2.3549, "step": 246680 }, { "epoch": 0.9536345502620959, "grad_norm": 0.11718141287565231, "learning_rate": 0.002, "loss": 2.3599, "step": 246690 }, { "epoch": 0.9536732074654791, "grad_norm": 0.10749223828315735, "learning_rate": 0.002, "loss": 2.3411, "step": 246700 }, { "epoch": 0.9537118646688624, "grad_norm": 0.13698039948940277, "learning_rate": 0.002, "loss": 2.3432, "step": 246710 }, { "epoch": 0.9537505218722456, "grad_norm": 0.10092923790216446, "learning_rate": 0.002, "loss": 2.3383, "step": 246720 }, { "epoch": 0.953789179075629, "grad_norm": 0.10021331161260605, "learning_rate": 0.002, "loss": 2.3504, "step": 246730 }, { "epoch": 0.9538278362790122, "grad_norm": 0.09901577979326248, "learning_rate": 0.002, "loss": 2.3402, "step": 246740 }, { "epoch": 0.9538664934823955, "grad_norm": 0.11905761808156967, "learning_rate": 0.002, "loss": 2.3208, "step": 246750 }, { "epoch": 0.9539051506857787, "grad_norm": 0.09991281479597092, "learning_rate": 0.002, "loss": 2.3313, "step": 246760 }, { "epoch": 0.9539438078891621, "grad_norm": 0.11390586197376251, "learning_rate": 0.002, "loss": 2.343, "step": 246770 }, { "epoch": 0.9539824650925454, "grad_norm": 0.09168101847171783, "learning_rate": 0.002, "loss": 2.3461, "step": 246780 }, { "epoch": 0.9540211222959286, "grad_norm": 0.09830158948898315, "learning_rate": 0.002, "loss": 2.3431, "step": 246790 }, { "epoch": 0.9540597794993119, "grad_norm": 0.11884202063083649, "learning_rate": 0.002, "loss": 2.3483, "step": 246800 }, { "epoch": 0.9540984367026952, "grad_norm": 0.08972768485546112, "learning_rate": 0.002, "loss": 2.3254, "step": 246810 }, { "epoch": 0.9541370939060785, "grad_norm": 0.10713957995176315, "learning_rate": 0.002, "loss": 2.3398, "step": 246820 }, { "epoch": 0.9541757511094617, "grad_norm": 0.09925784915685654, "learning_rate": 0.002, "loss": 2.3312, "step": 246830 }, { "epoch": 0.954214408312845, "grad_norm": 0.10352442413568497, "learning_rate": 0.002, "loss": 2.3489, "step": 246840 }, { "epoch": 0.9542530655162283, "grad_norm": 0.1348172277212143, "learning_rate": 0.002, "loss": 2.3353, "step": 246850 }, { "epoch": 0.9542917227196116, "grad_norm": 0.09868699312210083, "learning_rate": 0.002, "loss": 2.3364, "step": 246860 }, { "epoch": 0.9543303799229949, "grad_norm": 0.08707798272371292, "learning_rate": 0.002, "loss": 2.3366, "step": 246870 }, { "epoch": 0.9543690371263781, "grad_norm": 0.10370556265115738, "learning_rate": 0.002, "loss": 2.3386, "step": 246880 }, { "epoch": 0.9544076943297614, "grad_norm": 0.10752521455287933, "learning_rate": 0.002, "loss": 2.3396, "step": 246890 }, { "epoch": 0.9544463515331447, "grad_norm": 0.10657340288162231, "learning_rate": 0.002, "loss": 2.3277, "step": 246900 }, { "epoch": 0.954485008736528, "grad_norm": 0.09765563160181046, "learning_rate": 0.002, "loss": 2.3563, "step": 246910 }, { "epoch": 0.9545236659399112, "grad_norm": 0.11601200699806213, "learning_rate": 0.002, "loss": 2.3335, "step": 246920 }, { "epoch": 0.9545623231432945, "grad_norm": 0.10320843011140823, "learning_rate": 0.002, "loss": 2.3409, "step": 246930 }, { "epoch": 0.9546009803466778, "grad_norm": 0.09450047463178635, "learning_rate": 0.002, "loss": 2.3352, "step": 246940 }, { "epoch": 0.9546396375500611, "grad_norm": 0.11228545755147934, "learning_rate": 0.002, "loss": 2.338, "step": 246950 }, { "epoch": 0.9546782947534443, "grad_norm": 0.10331697016954422, "learning_rate": 0.002, "loss": 2.344, "step": 246960 }, { "epoch": 0.9547169519568276, "grad_norm": 0.10116118937730789, "learning_rate": 0.002, "loss": 2.3389, "step": 246970 }, { "epoch": 0.954755609160211, "grad_norm": 0.11346136033535004, "learning_rate": 0.002, "loss": 2.3524, "step": 246980 }, { "epoch": 0.9547942663635942, "grad_norm": 0.1188848614692688, "learning_rate": 0.002, "loss": 2.3419, "step": 246990 }, { "epoch": 0.9548329235669775, "grad_norm": 0.11899816989898682, "learning_rate": 0.002, "loss": 2.3447, "step": 247000 }, { "epoch": 0.9548715807703607, "grad_norm": 0.0926302894949913, "learning_rate": 0.002, "loss": 2.339, "step": 247010 }, { "epoch": 0.9549102379737441, "grad_norm": 0.11829515546560287, "learning_rate": 0.002, "loss": 2.3379, "step": 247020 }, { "epoch": 0.9549488951771273, "grad_norm": 0.10293518751859665, "learning_rate": 0.002, "loss": 2.3378, "step": 247030 }, { "epoch": 0.9549875523805106, "grad_norm": 0.1046319380402565, "learning_rate": 0.002, "loss": 2.3466, "step": 247040 }, { "epoch": 0.9550262095838938, "grad_norm": 0.11046954244375229, "learning_rate": 0.002, "loss": 2.3403, "step": 247050 }, { "epoch": 0.9550648667872771, "grad_norm": 0.10719576478004456, "learning_rate": 0.002, "loss": 2.3382, "step": 247060 }, { "epoch": 0.9551035239906605, "grad_norm": 0.1009703278541565, "learning_rate": 0.002, "loss": 2.3428, "step": 247070 }, { "epoch": 0.9551421811940437, "grad_norm": 0.09082961827516556, "learning_rate": 0.002, "loss": 2.3352, "step": 247080 }, { "epoch": 0.955180838397427, "grad_norm": 0.11020997166633606, "learning_rate": 0.002, "loss": 2.3343, "step": 247090 }, { "epoch": 0.9552194956008102, "grad_norm": 0.10537709295749664, "learning_rate": 0.002, "loss": 2.3267, "step": 247100 }, { "epoch": 0.9552581528041936, "grad_norm": 0.10682892054319382, "learning_rate": 0.002, "loss": 2.3446, "step": 247110 }, { "epoch": 0.9552968100075768, "grad_norm": 0.10063782334327698, "learning_rate": 0.002, "loss": 2.3541, "step": 247120 }, { "epoch": 0.9553354672109601, "grad_norm": 0.10315974056720734, "learning_rate": 0.002, "loss": 2.3296, "step": 247130 }, { "epoch": 0.9553741244143433, "grad_norm": 0.09505379945039749, "learning_rate": 0.002, "loss": 2.3454, "step": 247140 }, { "epoch": 0.9554127816177267, "grad_norm": 0.10029108077287674, "learning_rate": 0.002, "loss": 2.3379, "step": 247150 }, { "epoch": 0.9554514388211099, "grad_norm": 0.0997537150979042, "learning_rate": 0.002, "loss": 2.348, "step": 247160 }, { "epoch": 0.9554900960244932, "grad_norm": 0.10752159357070923, "learning_rate": 0.002, "loss": 2.3454, "step": 247170 }, { "epoch": 0.9555287532278764, "grad_norm": 0.10309893637895584, "learning_rate": 0.002, "loss": 2.3453, "step": 247180 }, { "epoch": 0.9555674104312598, "grad_norm": 0.10903715342283249, "learning_rate": 0.002, "loss": 2.3377, "step": 247190 }, { "epoch": 0.9556060676346431, "grad_norm": 0.09624198824167252, "learning_rate": 0.002, "loss": 2.3421, "step": 247200 }, { "epoch": 0.9556447248380263, "grad_norm": 0.11103501915931702, "learning_rate": 0.002, "loss": 2.3249, "step": 247210 }, { "epoch": 0.9556833820414096, "grad_norm": 0.11439616978168488, "learning_rate": 0.002, "loss": 2.3368, "step": 247220 }, { "epoch": 0.9557220392447928, "grad_norm": 0.14776815474033356, "learning_rate": 0.002, "loss": 2.3418, "step": 247230 }, { "epoch": 0.9557606964481762, "grad_norm": 0.09352473169565201, "learning_rate": 0.002, "loss": 2.3438, "step": 247240 }, { "epoch": 0.9557993536515594, "grad_norm": 0.10370460897684097, "learning_rate": 0.002, "loss": 2.333, "step": 247250 }, { "epoch": 0.9558380108549427, "grad_norm": 0.12870363891124725, "learning_rate": 0.002, "loss": 2.3309, "step": 247260 }, { "epoch": 0.9558766680583259, "grad_norm": 0.10398175567388535, "learning_rate": 0.002, "loss": 2.3349, "step": 247270 }, { "epoch": 0.9559153252617093, "grad_norm": 0.09815921634435654, "learning_rate": 0.002, "loss": 2.3309, "step": 247280 }, { "epoch": 0.9559539824650926, "grad_norm": 0.20846377313137054, "learning_rate": 0.002, "loss": 2.3493, "step": 247290 }, { "epoch": 0.9559926396684758, "grad_norm": 0.10216788202524185, "learning_rate": 0.002, "loss": 2.3349, "step": 247300 }, { "epoch": 0.9560312968718591, "grad_norm": 0.09926524013280869, "learning_rate": 0.002, "loss": 2.3425, "step": 247310 }, { "epoch": 0.9560699540752424, "grad_norm": 0.11557048559188843, "learning_rate": 0.002, "loss": 2.3503, "step": 247320 }, { "epoch": 0.9561086112786257, "grad_norm": 0.12979860603809357, "learning_rate": 0.002, "loss": 2.3512, "step": 247330 }, { "epoch": 0.9561472684820089, "grad_norm": 0.09846516698598862, "learning_rate": 0.002, "loss": 2.3276, "step": 247340 }, { "epoch": 0.9561859256853922, "grad_norm": 0.10615224391222, "learning_rate": 0.002, "loss": 2.3472, "step": 247350 }, { "epoch": 0.9562245828887755, "grad_norm": 0.1250700205564499, "learning_rate": 0.002, "loss": 2.3251, "step": 247360 }, { "epoch": 0.9562632400921588, "grad_norm": 0.0990319475531578, "learning_rate": 0.002, "loss": 2.3425, "step": 247370 }, { "epoch": 0.956301897295542, "grad_norm": 0.1047218069434166, "learning_rate": 0.002, "loss": 2.3198, "step": 247380 }, { "epoch": 0.9563405544989253, "grad_norm": 0.11436039954423904, "learning_rate": 0.002, "loss": 2.348, "step": 247390 }, { "epoch": 0.9563792117023087, "grad_norm": 0.2365836203098297, "learning_rate": 0.002, "loss": 2.3471, "step": 247400 }, { "epoch": 0.9564178689056919, "grad_norm": 0.09409108012914658, "learning_rate": 0.002, "loss": 2.336, "step": 247410 }, { "epoch": 0.9564565261090752, "grad_norm": 0.09443030506372452, "learning_rate": 0.002, "loss": 2.3407, "step": 247420 }, { "epoch": 0.9564951833124584, "grad_norm": 0.10896280407905579, "learning_rate": 0.002, "loss": 2.3305, "step": 247430 }, { "epoch": 0.9565338405158417, "grad_norm": 0.11036523431539536, "learning_rate": 0.002, "loss": 2.3234, "step": 247440 }, { "epoch": 0.956572497719225, "grad_norm": 0.09874240309000015, "learning_rate": 0.002, "loss": 2.3517, "step": 247450 }, { "epoch": 0.9566111549226083, "grad_norm": 0.11801275610923767, "learning_rate": 0.002, "loss": 2.3387, "step": 247460 }, { "epoch": 0.9566498121259915, "grad_norm": 0.09368408471345901, "learning_rate": 0.002, "loss": 2.3469, "step": 247470 }, { "epoch": 0.9566884693293748, "grad_norm": 0.11651654541492462, "learning_rate": 0.002, "loss": 2.3589, "step": 247480 }, { "epoch": 0.9567271265327582, "grad_norm": 0.09981834888458252, "learning_rate": 0.002, "loss": 2.335, "step": 247490 }, { "epoch": 0.9567657837361414, "grad_norm": 0.11089000105857849, "learning_rate": 0.002, "loss": 2.3369, "step": 247500 }, { "epoch": 0.9568044409395247, "grad_norm": 0.11288763582706451, "learning_rate": 0.002, "loss": 2.3496, "step": 247510 }, { "epoch": 0.9568430981429079, "grad_norm": 0.12972544133663177, "learning_rate": 0.002, "loss": 2.3278, "step": 247520 }, { "epoch": 0.9568817553462913, "grad_norm": 0.11114118248224258, "learning_rate": 0.002, "loss": 2.3433, "step": 247530 }, { "epoch": 0.9569204125496745, "grad_norm": 0.09732469916343689, "learning_rate": 0.002, "loss": 2.3318, "step": 247540 }, { "epoch": 0.9569590697530578, "grad_norm": 0.09808913618326187, "learning_rate": 0.002, "loss": 2.3278, "step": 247550 }, { "epoch": 0.956997726956441, "grad_norm": 0.12151769548654556, "learning_rate": 0.002, "loss": 2.3397, "step": 247560 }, { "epoch": 0.9570363841598244, "grad_norm": 0.10195376724004745, "learning_rate": 0.002, "loss": 2.343, "step": 247570 }, { "epoch": 0.9570750413632076, "grad_norm": 0.11638251692056656, "learning_rate": 0.002, "loss": 2.336, "step": 247580 }, { "epoch": 0.9571136985665909, "grad_norm": 0.09858639538288116, "learning_rate": 0.002, "loss": 2.3286, "step": 247590 }, { "epoch": 0.9571523557699742, "grad_norm": 0.09813068062067032, "learning_rate": 0.002, "loss": 2.3371, "step": 247600 }, { "epoch": 0.9571910129733574, "grad_norm": 0.1054849699139595, "learning_rate": 0.002, "loss": 2.3412, "step": 247610 }, { "epoch": 0.9572296701767408, "grad_norm": 0.11740526556968689, "learning_rate": 0.002, "loss": 2.3465, "step": 247620 }, { "epoch": 0.957268327380124, "grad_norm": 0.09919170290231705, "learning_rate": 0.002, "loss": 2.3361, "step": 247630 }, { "epoch": 0.9573069845835073, "grad_norm": 0.09892688691616058, "learning_rate": 0.002, "loss": 2.3451, "step": 247640 }, { "epoch": 0.9573456417868905, "grad_norm": 0.09781397879123688, "learning_rate": 0.002, "loss": 2.3314, "step": 247650 }, { "epoch": 0.9573842989902739, "grad_norm": 0.09282873570919037, "learning_rate": 0.002, "loss": 2.3175, "step": 247660 }, { "epoch": 0.9574229561936571, "grad_norm": 0.08740855753421783, "learning_rate": 0.002, "loss": 2.3298, "step": 247670 }, { "epoch": 0.9574616133970404, "grad_norm": 0.11887798458337784, "learning_rate": 0.002, "loss": 2.3167, "step": 247680 }, { "epoch": 0.9575002706004236, "grad_norm": 0.1058524027466774, "learning_rate": 0.002, "loss": 2.3315, "step": 247690 }, { "epoch": 0.957538927803807, "grad_norm": 0.0968957170844078, "learning_rate": 0.002, "loss": 2.3485, "step": 247700 }, { "epoch": 0.9575775850071903, "grad_norm": 0.1604377031326294, "learning_rate": 0.002, "loss": 2.3303, "step": 247710 }, { "epoch": 0.9576162422105735, "grad_norm": 0.12406475096940994, "learning_rate": 0.002, "loss": 2.3376, "step": 247720 }, { "epoch": 0.9576548994139568, "grad_norm": 0.12045074254274368, "learning_rate": 0.002, "loss": 2.3395, "step": 247730 }, { "epoch": 0.9576935566173401, "grad_norm": 0.0910901427268982, "learning_rate": 0.002, "loss": 2.3359, "step": 247740 }, { "epoch": 0.9577322138207234, "grad_norm": 0.11357443779706955, "learning_rate": 0.002, "loss": 2.3306, "step": 247750 }, { "epoch": 0.9577708710241066, "grad_norm": 0.09268621355295181, "learning_rate": 0.002, "loss": 2.3477, "step": 247760 }, { "epoch": 0.9578095282274899, "grad_norm": 0.12930162250995636, "learning_rate": 0.002, "loss": 2.3375, "step": 247770 }, { "epoch": 0.9578481854308731, "grad_norm": 0.10533638298511505, "learning_rate": 0.002, "loss": 2.3368, "step": 247780 }, { "epoch": 0.9578868426342565, "grad_norm": 0.1110549047589302, "learning_rate": 0.002, "loss": 2.3332, "step": 247790 }, { "epoch": 0.9579254998376397, "grad_norm": 0.11005035787820816, "learning_rate": 0.002, "loss": 2.3278, "step": 247800 }, { "epoch": 0.957964157041023, "grad_norm": 0.10834237188100815, "learning_rate": 0.002, "loss": 2.3417, "step": 247810 }, { "epoch": 0.9580028142444063, "grad_norm": 0.0905105397105217, "learning_rate": 0.002, "loss": 2.3376, "step": 247820 }, { "epoch": 0.9580414714477896, "grad_norm": 0.0982719212770462, "learning_rate": 0.002, "loss": 2.349, "step": 247830 }, { "epoch": 0.9580801286511729, "grad_norm": 0.0960426852107048, "learning_rate": 0.002, "loss": 2.3328, "step": 247840 }, { "epoch": 0.9581187858545561, "grad_norm": 0.0999850258231163, "learning_rate": 0.002, "loss": 2.3486, "step": 247850 }, { "epoch": 0.9581574430579394, "grad_norm": 0.11025793850421906, "learning_rate": 0.002, "loss": 2.3363, "step": 247860 }, { "epoch": 0.9581961002613227, "grad_norm": 0.08891669660806656, "learning_rate": 0.002, "loss": 2.338, "step": 247870 }, { "epoch": 0.958234757464706, "grad_norm": 0.10581798851490021, "learning_rate": 0.002, "loss": 2.3371, "step": 247880 }, { "epoch": 0.9582734146680892, "grad_norm": 0.11585883796215057, "learning_rate": 0.002, "loss": 2.3438, "step": 247890 }, { "epoch": 0.9583120718714725, "grad_norm": 0.09135353565216064, "learning_rate": 0.002, "loss": 2.3343, "step": 247900 }, { "epoch": 0.9583507290748559, "grad_norm": 0.10126357525587082, "learning_rate": 0.002, "loss": 2.352, "step": 247910 }, { "epoch": 0.9583893862782391, "grad_norm": 0.09940259158611298, "learning_rate": 0.002, "loss": 2.3285, "step": 247920 }, { "epoch": 0.9584280434816224, "grad_norm": 0.11455532908439636, "learning_rate": 0.002, "loss": 2.3449, "step": 247930 }, { "epoch": 0.9584667006850056, "grad_norm": 0.10387568175792694, "learning_rate": 0.002, "loss": 2.3358, "step": 247940 }, { "epoch": 0.958505357888389, "grad_norm": 0.10172967612743378, "learning_rate": 0.002, "loss": 2.3515, "step": 247950 }, { "epoch": 0.9585440150917722, "grad_norm": 0.11757977306842804, "learning_rate": 0.002, "loss": 2.3367, "step": 247960 }, { "epoch": 0.9585826722951555, "grad_norm": 0.1099797710776329, "learning_rate": 0.002, "loss": 2.3484, "step": 247970 }, { "epoch": 0.9586213294985387, "grad_norm": 0.12944968044757843, "learning_rate": 0.002, "loss": 2.3504, "step": 247980 }, { "epoch": 0.958659986701922, "grad_norm": 0.09926500171422958, "learning_rate": 0.002, "loss": 2.3433, "step": 247990 }, { "epoch": 0.9586986439053053, "grad_norm": 0.13880828022956848, "learning_rate": 0.002, "loss": 2.355, "step": 248000 }, { "epoch": 0.9587373011086886, "grad_norm": 0.10396645218133926, "learning_rate": 0.002, "loss": 2.3342, "step": 248010 }, { "epoch": 0.9587759583120719, "grad_norm": 0.1147075742483139, "learning_rate": 0.002, "loss": 2.3233, "step": 248020 }, { "epoch": 0.9588146155154551, "grad_norm": 0.11550748348236084, "learning_rate": 0.002, "loss": 2.3374, "step": 248030 }, { "epoch": 0.9588532727188385, "grad_norm": 0.10915172845125198, "learning_rate": 0.002, "loss": 2.3341, "step": 248040 }, { "epoch": 0.9588919299222217, "grad_norm": 0.09901692718267441, "learning_rate": 0.002, "loss": 2.3313, "step": 248050 }, { "epoch": 0.958930587125605, "grad_norm": 0.1246907114982605, "learning_rate": 0.002, "loss": 2.3492, "step": 248060 }, { "epoch": 0.9589692443289882, "grad_norm": 0.10774242877960205, "learning_rate": 0.002, "loss": 2.3336, "step": 248070 }, { "epoch": 0.9590079015323716, "grad_norm": 0.11088234186172485, "learning_rate": 0.002, "loss": 2.3361, "step": 248080 }, { "epoch": 0.9590465587357548, "grad_norm": 0.11602111160755157, "learning_rate": 0.002, "loss": 2.3545, "step": 248090 }, { "epoch": 0.9590852159391381, "grad_norm": 0.11179874837398529, "learning_rate": 0.002, "loss": 2.3408, "step": 248100 }, { "epoch": 0.9591238731425213, "grad_norm": 0.11069482564926147, "learning_rate": 0.002, "loss": 2.3276, "step": 248110 }, { "epoch": 0.9591625303459047, "grad_norm": 0.09340058267116547, "learning_rate": 0.002, "loss": 2.3333, "step": 248120 }, { "epoch": 0.959201187549288, "grad_norm": 0.12895047664642334, "learning_rate": 0.002, "loss": 2.3489, "step": 248130 }, { "epoch": 0.9592398447526712, "grad_norm": 0.1110064834356308, "learning_rate": 0.002, "loss": 2.3308, "step": 248140 }, { "epoch": 0.9592785019560545, "grad_norm": 0.10294454544782639, "learning_rate": 0.002, "loss": 2.3357, "step": 248150 }, { "epoch": 0.9593171591594377, "grad_norm": 0.133493110537529, "learning_rate": 0.002, "loss": 2.3345, "step": 248160 }, { "epoch": 0.9593558163628211, "grad_norm": 0.11024216562509537, "learning_rate": 0.002, "loss": 2.3286, "step": 248170 }, { "epoch": 0.9593944735662043, "grad_norm": 0.11153551936149597, "learning_rate": 0.002, "loss": 2.3397, "step": 248180 }, { "epoch": 0.9594331307695876, "grad_norm": 0.11546822637319565, "learning_rate": 0.002, "loss": 2.328, "step": 248190 }, { "epoch": 0.9594717879729708, "grad_norm": 0.10031704604625702, "learning_rate": 0.002, "loss": 2.3266, "step": 248200 }, { "epoch": 0.9595104451763542, "grad_norm": 0.10214558988809586, "learning_rate": 0.002, "loss": 2.3311, "step": 248210 }, { "epoch": 0.9595491023797375, "grad_norm": 0.11143733561038971, "learning_rate": 0.002, "loss": 2.324, "step": 248220 }, { "epoch": 0.9595877595831207, "grad_norm": 0.10043030232191086, "learning_rate": 0.002, "loss": 2.3253, "step": 248230 }, { "epoch": 0.959626416786504, "grad_norm": 0.1303481161594391, "learning_rate": 0.002, "loss": 2.3461, "step": 248240 }, { "epoch": 0.9596650739898873, "grad_norm": 0.10182737559080124, "learning_rate": 0.002, "loss": 2.3438, "step": 248250 }, { "epoch": 0.9597037311932706, "grad_norm": 0.10109949111938477, "learning_rate": 0.002, "loss": 2.3422, "step": 248260 }, { "epoch": 0.9597423883966538, "grad_norm": 0.10766161978244781, "learning_rate": 0.002, "loss": 2.3516, "step": 248270 }, { "epoch": 0.9597810456000371, "grad_norm": 0.09698101133108139, "learning_rate": 0.002, "loss": 2.3306, "step": 248280 }, { "epoch": 0.9598197028034204, "grad_norm": 0.12188798189163208, "learning_rate": 0.002, "loss": 2.3374, "step": 248290 }, { "epoch": 0.9598583600068037, "grad_norm": 0.10488150268793106, "learning_rate": 0.002, "loss": 2.3251, "step": 248300 }, { "epoch": 0.9598970172101869, "grad_norm": 0.09532138705253601, "learning_rate": 0.002, "loss": 2.3374, "step": 248310 }, { "epoch": 0.9599356744135702, "grad_norm": 0.11081784963607788, "learning_rate": 0.002, "loss": 2.3335, "step": 248320 }, { "epoch": 0.9599743316169536, "grad_norm": 0.09944086521863937, "learning_rate": 0.002, "loss": 2.339, "step": 248330 }, { "epoch": 0.9600129888203368, "grad_norm": 0.13181588053703308, "learning_rate": 0.002, "loss": 2.3439, "step": 248340 }, { "epoch": 0.9600516460237201, "grad_norm": 0.10362618416547775, "learning_rate": 0.002, "loss": 2.3455, "step": 248350 }, { "epoch": 0.9600903032271033, "grad_norm": 0.10724879801273346, "learning_rate": 0.002, "loss": 2.3294, "step": 248360 }, { "epoch": 0.9601289604304866, "grad_norm": 0.09819664061069489, "learning_rate": 0.002, "loss": 2.3332, "step": 248370 }, { "epoch": 0.9601676176338699, "grad_norm": 0.11974728852510452, "learning_rate": 0.002, "loss": 2.3393, "step": 248380 }, { "epoch": 0.9602062748372532, "grad_norm": 0.11084406077861786, "learning_rate": 0.002, "loss": 2.3443, "step": 248390 }, { "epoch": 0.9602449320406364, "grad_norm": 0.09880924224853516, "learning_rate": 0.002, "loss": 2.3218, "step": 248400 }, { "epoch": 0.9602835892440197, "grad_norm": 0.12196718156337738, "learning_rate": 0.002, "loss": 2.3371, "step": 248410 }, { "epoch": 0.960322246447403, "grad_norm": 0.12853261828422546, "learning_rate": 0.002, "loss": 2.3454, "step": 248420 }, { "epoch": 0.9603609036507863, "grad_norm": 0.10215596109628677, "learning_rate": 0.002, "loss": 2.333, "step": 248430 }, { "epoch": 0.9603995608541696, "grad_norm": 0.12762248516082764, "learning_rate": 0.002, "loss": 2.3413, "step": 248440 }, { "epoch": 0.9604382180575528, "grad_norm": 0.10123594850301743, "learning_rate": 0.002, "loss": 2.3341, "step": 248450 }, { "epoch": 0.9604768752609362, "grad_norm": 0.12691077589988708, "learning_rate": 0.002, "loss": 2.3401, "step": 248460 }, { "epoch": 0.9605155324643194, "grad_norm": 0.09701501578092575, "learning_rate": 0.002, "loss": 2.351, "step": 248470 }, { "epoch": 0.9605541896677027, "grad_norm": 0.12225864827632904, "learning_rate": 0.002, "loss": 2.3215, "step": 248480 }, { "epoch": 0.9605928468710859, "grad_norm": 0.10921717435121536, "learning_rate": 0.002, "loss": 2.3346, "step": 248490 }, { "epoch": 0.9606315040744693, "grad_norm": 0.11986319720745087, "learning_rate": 0.002, "loss": 2.3428, "step": 248500 }, { "epoch": 0.9606701612778525, "grad_norm": 0.09127210080623627, "learning_rate": 0.002, "loss": 2.3375, "step": 248510 }, { "epoch": 0.9607088184812358, "grad_norm": 0.13293032348155975, "learning_rate": 0.002, "loss": 2.3411, "step": 248520 }, { "epoch": 0.960747475684619, "grad_norm": 0.1585289090871811, "learning_rate": 0.002, "loss": 2.3358, "step": 248530 }, { "epoch": 0.9607861328880023, "grad_norm": 0.10031675547361374, "learning_rate": 0.002, "loss": 2.3412, "step": 248540 }, { "epoch": 0.9608247900913857, "grad_norm": 0.14133134484291077, "learning_rate": 0.002, "loss": 2.3486, "step": 248550 }, { "epoch": 0.9608634472947689, "grad_norm": 0.10595063120126724, "learning_rate": 0.002, "loss": 2.323, "step": 248560 }, { "epoch": 0.9609021044981522, "grad_norm": 0.10794010013341904, "learning_rate": 0.002, "loss": 2.3355, "step": 248570 }, { "epoch": 0.9609407617015354, "grad_norm": 0.09237299859523773, "learning_rate": 0.002, "loss": 2.3303, "step": 248580 }, { "epoch": 0.9609794189049188, "grad_norm": 0.10003355890512466, "learning_rate": 0.002, "loss": 2.3372, "step": 248590 }, { "epoch": 0.961018076108302, "grad_norm": 0.11430327594280243, "learning_rate": 0.002, "loss": 2.3366, "step": 248600 }, { "epoch": 0.9610567333116853, "grad_norm": 0.09757383912801743, "learning_rate": 0.002, "loss": 2.3274, "step": 248610 }, { "epoch": 0.9610953905150685, "grad_norm": 0.11522988975048065, "learning_rate": 0.002, "loss": 2.3489, "step": 248620 }, { "epoch": 0.9611340477184519, "grad_norm": 0.09410349279642105, "learning_rate": 0.002, "loss": 2.3394, "step": 248630 }, { "epoch": 0.9611727049218352, "grad_norm": 0.11336854845285416, "learning_rate": 0.002, "loss": 2.3429, "step": 248640 }, { "epoch": 0.9612113621252184, "grad_norm": 0.1023351326584816, "learning_rate": 0.002, "loss": 2.3422, "step": 248650 }, { "epoch": 0.9612500193286017, "grad_norm": 0.11214461922645569, "learning_rate": 0.002, "loss": 2.3326, "step": 248660 }, { "epoch": 0.961288676531985, "grad_norm": 0.11730778217315674, "learning_rate": 0.002, "loss": 2.3327, "step": 248670 }, { "epoch": 0.9613273337353683, "grad_norm": 0.09071174263954163, "learning_rate": 0.002, "loss": 2.3556, "step": 248680 }, { "epoch": 0.9613659909387515, "grad_norm": 0.11421388387680054, "learning_rate": 0.002, "loss": 2.3421, "step": 248690 }, { "epoch": 0.9614046481421348, "grad_norm": 0.11856932193040848, "learning_rate": 0.002, "loss": 2.3526, "step": 248700 }, { "epoch": 0.961443305345518, "grad_norm": 0.10843708366155624, "learning_rate": 0.002, "loss": 2.3537, "step": 248710 }, { "epoch": 0.9614819625489014, "grad_norm": 0.1020749881863594, "learning_rate": 0.002, "loss": 2.3286, "step": 248720 }, { "epoch": 0.9615206197522846, "grad_norm": 0.10943154245615005, "learning_rate": 0.002, "loss": 2.3388, "step": 248730 }, { "epoch": 0.9615592769556679, "grad_norm": 0.13843214511871338, "learning_rate": 0.002, "loss": 2.3379, "step": 248740 }, { "epoch": 0.9615979341590511, "grad_norm": 0.1045226976275444, "learning_rate": 0.002, "loss": 2.3315, "step": 248750 }, { "epoch": 0.9616365913624345, "grad_norm": 0.1176600456237793, "learning_rate": 0.002, "loss": 2.3566, "step": 248760 }, { "epoch": 0.9616752485658178, "grad_norm": 0.10141972452402115, "learning_rate": 0.002, "loss": 2.3309, "step": 248770 }, { "epoch": 0.961713905769201, "grad_norm": 0.09370206296443939, "learning_rate": 0.002, "loss": 2.335, "step": 248780 }, { "epoch": 0.9617525629725843, "grad_norm": 0.10429870337247849, "learning_rate": 0.002, "loss": 2.3316, "step": 248790 }, { "epoch": 0.9617912201759676, "grad_norm": 0.11017408221960068, "learning_rate": 0.002, "loss": 2.3441, "step": 248800 }, { "epoch": 0.9618298773793509, "grad_norm": 0.11044590920209885, "learning_rate": 0.002, "loss": 2.3388, "step": 248810 }, { "epoch": 0.9618685345827341, "grad_norm": 0.1153523325920105, "learning_rate": 0.002, "loss": 2.3464, "step": 248820 }, { "epoch": 0.9619071917861174, "grad_norm": 0.10669272392988205, "learning_rate": 0.002, "loss": 2.3468, "step": 248830 }, { "epoch": 0.9619458489895008, "grad_norm": 0.11303277313709259, "learning_rate": 0.002, "loss": 2.3332, "step": 248840 }, { "epoch": 0.961984506192884, "grad_norm": 0.0925389900803566, "learning_rate": 0.002, "loss": 2.3407, "step": 248850 }, { "epoch": 0.9620231633962673, "grad_norm": 0.10389024019241333, "learning_rate": 0.002, "loss": 2.3458, "step": 248860 }, { "epoch": 0.9620618205996505, "grad_norm": 0.10728508234024048, "learning_rate": 0.002, "loss": 2.3348, "step": 248870 }, { "epoch": 0.9621004778030339, "grad_norm": 0.12325850874185562, "learning_rate": 0.002, "loss": 2.3452, "step": 248880 }, { "epoch": 0.9621391350064171, "grad_norm": 0.10077033936977386, "learning_rate": 0.002, "loss": 2.336, "step": 248890 }, { "epoch": 0.9621777922098004, "grad_norm": 0.10189589112997055, "learning_rate": 0.002, "loss": 2.3161, "step": 248900 }, { "epoch": 0.9622164494131836, "grad_norm": 0.10807923972606659, "learning_rate": 0.002, "loss": 2.3363, "step": 248910 }, { "epoch": 0.9622551066165669, "grad_norm": 0.10375122725963593, "learning_rate": 0.002, "loss": 2.3399, "step": 248920 }, { "epoch": 0.9622937638199502, "grad_norm": 0.09079185873270035, "learning_rate": 0.002, "loss": 2.3469, "step": 248930 }, { "epoch": 0.9623324210233335, "grad_norm": 0.10723765939474106, "learning_rate": 0.002, "loss": 2.3464, "step": 248940 }, { "epoch": 0.9623710782267167, "grad_norm": 0.11098438501358032, "learning_rate": 0.002, "loss": 2.3393, "step": 248950 }, { "epoch": 0.9624097354301, "grad_norm": 0.10970813035964966, "learning_rate": 0.002, "loss": 2.3394, "step": 248960 }, { "epoch": 0.9624483926334834, "grad_norm": 0.10687640309333801, "learning_rate": 0.002, "loss": 2.3365, "step": 248970 }, { "epoch": 0.9624870498368666, "grad_norm": 0.1009809747338295, "learning_rate": 0.002, "loss": 2.3296, "step": 248980 }, { "epoch": 0.9625257070402499, "grad_norm": 0.12127617001533508, "learning_rate": 0.002, "loss": 2.3283, "step": 248990 }, { "epoch": 0.9625643642436331, "grad_norm": 0.09800001233816147, "learning_rate": 0.002, "loss": 2.3372, "step": 249000 }, { "epoch": 0.9626030214470165, "grad_norm": 0.09789685159921646, "learning_rate": 0.002, "loss": 2.3388, "step": 249010 }, { "epoch": 0.9626416786503997, "grad_norm": 0.11263931542634964, "learning_rate": 0.002, "loss": 2.3329, "step": 249020 }, { "epoch": 0.962680335853783, "grad_norm": 0.12075403332710266, "learning_rate": 0.002, "loss": 2.3414, "step": 249030 }, { "epoch": 0.9627189930571662, "grad_norm": 0.10094056278467178, "learning_rate": 0.002, "loss": 2.3316, "step": 249040 }, { "epoch": 0.9627576502605496, "grad_norm": 0.10163767635822296, "learning_rate": 0.002, "loss": 2.3416, "step": 249050 }, { "epoch": 0.9627963074639329, "grad_norm": 0.09169764816761017, "learning_rate": 0.002, "loss": 2.3472, "step": 249060 }, { "epoch": 0.9628349646673161, "grad_norm": 0.11688553541898727, "learning_rate": 0.002, "loss": 2.3431, "step": 249070 }, { "epoch": 0.9628736218706994, "grad_norm": 0.11535628885030746, "learning_rate": 0.002, "loss": 2.3449, "step": 249080 }, { "epoch": 0.9629122790740826, "grad_norm": 0.11258542537689209, "learning_rate": 0.002, "loss": 2.3407, "step": 249090 }, { "epoch": 0.962950936277466, "grad_norm": 0.11126603186130524, "learning_rate": 0.002, "loss": 2.3302, "step": 249100 }, { "epoch": 0.9629895934808492, "grad_norm": 0.11631768196821213, "learning_rate": 0.002, "loss": 2.3414, "step": 249110 }, { "epoch": 0.9630282506842325, "grad_norm": 0.12675493955612183, "learning_rate": 0.002, "loss": 2.3387, "step": 249120 }, { "epoch": 0.9630669078876157, "grad_norm": 0.11257590353488922, "learning_rate": 0.002, "loss": 2.3325, "step": 249130 }, { "epoch": 0.9631055650909991, "grad_norm": 0.09850809723138809, "learning_rate": 0.002, "loss": 2.338, "step": 249140 }, { "epoch": 0.9631442222943823, "grad_norm": 0.10485752671957016, "learning_rate": 0.002, "loss": 2.3426, "step": 249150 }, { "epoch": 0.9631828794977656, "grad_norm": 0.12969525158405304, "learning_rate": 0.002, "loss": 2.3322, "step": 249160 }, { "epoch": 0.9632215367011489, "grad_norm": 0.10446181148290634, "learning_rate": 0.002, "loss": 2.3262, "step": 249170 }, { "epoch": 0.9632601939045322, "grad_norm": 0.1030307188630104, "learning_rate": 0.002, "loss": 2.337, "step": 249180 }, { "epoch": 0.9632988511079155, "grad_norm": 0.10285484790802002, "learning_rate": 0.002, "loss": 2.3469, "step": 249190 }, { "epoch": 0.9633375083112987, "grad_norm": 0.12455843389034271, "learning_rate": 0.002, "loss": 2.3553, "step": 249200 }, { "epoch": 0.963376165514682, "grad_norm": 0.11602117121219635, "learning_rate": 0.002, "loss": 2.3364, "step": 249210 }, { "epoch": 0.9634148227180653, "grad_norm": 0.12856896221637726, "learning_rate": 0.002, "loss": 2.3376, "step": 249220 }, { "epoch": 0.9634534799214486, "grad_norm": 0.09774929285049438, "learning_rate": 0.002, "loss": 2.3292, "step": 249230 }, { "epoch": 0.9634921371248318, "grad_norm": 0.13181985914707184, "learning_rate": 0.002, "loss": 2.3338, "step": 249240 }, { "epoch": 0.9635307943282151, "grad_norm": 0.10472414642572403, "learning_rate": 0.002, "loss": 2.3173, "step": 249250 }, { "epoch": 0.9635694515315985, "grad_norm": 0.09871625900268555, "learning_rate": 0.002, "loss": 2.3308, "step": 249260 }, { "epoch": 0.9636081087349817, "grad_norm": 0.0966063141822815, "learning_rate": 0.002, "loss": 2.3313, "step": 249270 }, { "epoch": 0.963646765938365, "grad_norm": 0.104548379778862, "learning_rate": 0.002, "loss": 2.322, "step": 249280 }, { "epoch": 0.9636854231417482, "grad_norm": 0.09819988161325455, "learning_rate": 0.002, "loss": 2.3329, "step": 249290 }, { "epoch": 0.9637240803451315, "grad_norm": 0.10572008043527603, "learning_rate": 0.002, "loss": 2.357, "step": 249300 }, { "epoch": 0.9637627375485148, "grad_norm": 0.1110527440905571, "learning_rate": 0.002, "loss": 2.3249, "step": 249310 }, { "epoch": 0.9638013947518981, "grad_norm": 0.11289434880018234, "learning_rate": 0.002, "loss": 2.3339, "step": 249320 }, { "epoch": 0.9638400519552813, "grad_norm": 0.11714992672204971, "learning_rate": 0.002, "loss": 2.3283, "step": 249330 }, { "epoch": 0.9638787091586646, "grad_norm": 0.10628482699394226, "learning_rate": 0.002, "loss": 2.3469, "step": 249340 }, { "epoch": 0.963917366362048, "grad_norm": 0.13175849616527557, "learning_rate": 0.002, "loss": 2.3456, "step": 249350 }, { "epoch": 0.9639560235654312, "grad_norm": 0.1098552718758583, "learning_rate": 0.002, "loss": 2.3311, "step": 249360 }, { "epoch": 0.9639946807688144, "grad_norm": 0.10685157775878906, "learning_rate": 0.002, "loss": 2.3362, "step": 249370 }, { "epoch": 0.9640333379721977, "grad_norm": 0.09267696738243103, "learning_rate": 0.002, "loss": 2.3541, "step": 249380 }, { "epoch": 0.9640719951755811, "grad_norm": 0.09387141466140747, "learning_rate": 0.002, "loss": 2.3392, "step": 249390 }, { "epoch": 0.9641106523789643, "grad_norm": 0.13935497403144836, "learning_rate": 0.002, "loss": 2.3541, "step": 249400 }, { "epoch": 0.9641493095823476, "grad_norm": 0.10094621777534485, "learning_rate": 0.002, "loss": 2.3383, "step": 249410 }, { "epoch": 0.9641879667857308, "grad_norm": 0.1260949969291687, "learning_rate": 0.002, "loss": 2.3358, "step": 249420 }, { "epoch": 0.9642266239891142, "grad_norm": 0.11990845948457718, "learning_rate": 0.002, "loss": 2.367, "step": 249430 }, { "epoch": 0.9642652811924974, "grad_norm": 0.09575967490673065, "learning_rate": 0.002, "loss": 2.3437, "step": 249440 }, { "epoch": 0.9643039383958807, "grad_norm": 0.09636884927749634, "learning_rate": 0.002, "loss": 2.3253, "step": 249450 }, { "epoch": 0.9643425955992639, "grad_norm": 0.12748612463474274, "learning_rate": 0.002, "loss": 2.3396, "step": 249460 }, { "epoch": 0.9643812528026472, "grad_norm": 0.11946625262498856, "learning_rate": 0.002, "loss": 2.3439, "step": 249470 }, { "epoch": 0.9644199100060306, "grad_norm": 0.09697463363409042, "learning_rate": 0.002, "loss": 2.3463, "step": 249480 }, { "epoch": 0.9644585672094138, "grad_norm": 0.10112636536359787, "learning_rate": 0.002, "loss": 2.3307, "step": 249490 }, { "epoch": 0.9644972244127971, "grad_norm": 0.09348530322313309, "learning_rate": 0.002, "loss": 2.3284, "step": 249500 }, { "epoch": 0.9645358816161803, "grad_norm": 0.1333259642124176, "learning_rate": 0.002, "loss": 2.3597, "step": 249510 }, { "epoch": 0.9645745388195637, "grad_norm": 0.10638459026813507, "learning_rate": 0.002, "loss": 2.3495, "step": 249520 }, { "epoch": 0.9646131960229469, "grad_norm": 0.1124267429113388, "learning_rate": 0.002, "loss": 2.332, "step": 249530 }, { "epoch": 0.9646518532263302, "grad_norm": 0.0883001908659935, "learning_rate": 0.002, "loss": 2.3395, "step": 249540 }, { "epoch": 0.9646905104297134, "grad_norm": 0.100906603038311, "learning_rate": 0.002, "loss": 2.335, "step": 249550 }, { "epoch": 0.9647291676330968, "grad_norm": 0.1052638441324234, "learning_rate": 0.002, "loss": 2.3476, "step": 249560 }, { "epoch": 0.96476782483648, "grad_norm": 0.11165803670883179, "learning_rate": 0.002, "loss": 2.3481, "step": 249570 }, { "epoch": 0.9648064820398633, "grad_norm": 0.0883340910077095, "learning_rate": 0.002, "loss": 2.3335, "step": 249580 }, { "epoch": 0.9648451392432466, "grad_norm": 0.11354761570692062, "learning_rate": 0.002, "loss": 2.326, "step": 249590 }, { "epoch": 0.9648837964466299, "grad_norm": 0.11444555968046188, "learning_rate": 0.002, "loss": 2.3376, "step": 249600 }, { "epoch": 0.9649224536500132, "grad_norm": 0.14036472141742706, "learning_rate": 0.002, "loss": 2.3239, "step": 249610 }, { "epoch": 0.9649611108533964, "grad_norm": 0.12895837426185608, "learning_rate": 0.002, "loss": 2.3506, "step": 249620 }, { "epoch": 0.9649997680567797, "grad_norm": 0.11023510992527008, "learning_rate": 0.002, "loss": 2.3577, "step": 249630 }, { "epoch": 0.9650384252601629, "grad_norm": 0.12974929809570312, "learning_rate": 0.002, "loss": 2.3315, "step": 249640 }, { "epoch": 0.9650770824635463, "grad_norm": 0.10311168432235718, "learning_rate": 0.002, "loss": 2.336, "step": 249650 }, { "epoch": 0.9651157396669295, "grad_norm": 0.10950174182653427, "learning_rate": 0.002, "loss": 2.3388, "step": 249660 }, { "epoch": 0.9651543968703128, "grad_norm": 0.09690546244382858, "learning_rate": 0.002, "loss": 2.323, "step": 249670 }, { "epoch": 0.965193054073696, "grad_norm": 0.12773922085762024, "learning_rate": 0.002, "loss": 2.323, "step": 249680 }, { "epoch": 0.9652317112770794, "grad_norm": 0.09502622485160828, "learning_rate": 0.002, "loss": 2.327, "step": 249690 }, { "epoch": 0.9652703684804627, "grad_norm": 0.15543179214000702, "learning_rate": 0.002, "loss": 2.3385, "step": 249700 }, { "epoch": 0.9653090256838459, "grad_norm": 0.11124072968959808, "learning_rate": 0.002, "loss": 2.3407, "step": 249710 }, { "epoch": 0.9653476828872292, "grad_norm": 0.09724342077970505, "learning_rate": 0.002, "loss": 2.3412, "step": 249720 }, { "epoch": 0.9653863400906125, "grad_norm": 0.08673544228076935, "learning_rate": 0.002, "loss": 2.3321, "step": 249730 }, { "epoch": 0.9654249972939958, "grad_norm": 0.11834105849266052, "learning_rate": 0.002, "loss": 2.3516, "step": 249740 }, { "epoch": 0.965463654497379, "grad_norm": 0.0999530702829361, "learning_rate": 0.002, "loss": 2.3415, "step": 249750 }, { "epoch": 0.9655023117007623, "grad_norm": 0.10077276080846786, "learning_rate": 0.002, "loss": 2.3522, "step": 249760 }, { "epoch": 0.9655409689041456, "grad_norm": 0.1093694195151329, "learning_rate": 0.002, "loss": 2.3343, "step": 249770 }, { "epoch": 0.9655796261075289, "grad_norm": 0.10006707161664963, "learning_rate": 0.002, "loss": 2.3288, "step": 249780 }, { "epoch": 0.9656182833109122, "grad_norm": 0.1079149842262268, "learning_rate": 0.002, "loss": 2.3146, "step": 249790 }, { "epoch": 0.9656569405142954, "grad_norm": 0.10379232466220856, "learning_rate": 0.002, "loss": 2.3394, "step": 249800 }, { "epoch": 0.9656955977176788, "grad_norm": 0.09677669405937195, "learning_rate": 0.002, "loss": 2.3265, "step": 249810 }, { "epoch": 0.965734254921062, "grad_norm": 0.09964921325445175, "learning_rate": 0.002, "loss": 2.3396, "step": 249820 }, { "epoch": 0.9657729121244453, "grad_norm": 0.09625493735074997, "learning_rate": 0.002, "loss": 2.34, "step": 249830 }, { "epoch": 0.9658115693278285, "grad_norm": 0.09828057885169983, "learning_rate": 0.002, "loss": 2.3317, "step": 249840 }, { "epoch": 0.9658502265312118, "grad_norm": 0.10741102695465088, "learning_rate": 0.002, "loss": 2.3361, "step": 249850 }, { "epoch": 0.9658888837345951, "grad_norm": 0.12895941734313965, "learning_rate": 0.002, "loss": 2.3451, "step": 249860 }, { "epoch": 0.9659275409379784, "grad_norm": 0.08948575705289841, "learning_rate": 0.002, "loss": 2.3347, "step": 249870 }, { "epoch": 0.9659661981413616, "grad_norm": 0.10771122574806213, "learning_rate": 0.002, "loss": 2.3363, "step": 249880 }, { "epoch": 0.9660048553447449, "grad_norm": 0.113441102206707, "learning_rate": 0.002, "loss": 2.3402, "step": 249890 }, { "epoch": 0.9660435125481283, "grad_norm": 0.10112905502319336, "learning_rate": 0.002, "loss": 2.3614, "step": 249900 }, { "epoch": 0.9660821697515115, "grad_norm": 0.1073261946439743, "learning_rate": 0.002, "loss": 2.3268, "step": 249910 }, { "epoch": 0.9661208269548948, "grad_norm": 0.11671066284179688, "learning_rate": 0.002, "loss": 2.3396, "step": 249920 }, { "epoch": 0.966159484158278, "grad_norm": 0.12026667594909668, "learning_rate": 0.002, "loss": 2.356, "step": 249930 }, { "epoch": 0.9661981413616614, "grad_norm": 0.10516425967216492, "learning_rate": 0.002, "loss": 2.3418, "step": 249940 }, { "epoch": 0.9662367985650446, "grad_norm": 0.1196758821606636, "learning_rate": 0.002, "loss": 2.3442, "step": 249950 }, { "epoch": 0.9662754557684279, "grad_norm": 0.11494259536266327, "learning_rate": 0.002, "loss": 2.3335, "step": 249960 }, { "epoch": 0.9663141129718111, "grad_norm": 0.1059424877166748, "learning_rate": 0.002, "loss": 2.3505, "step": 249970 }, { "epoch": 0.9663527701751945, "grad_norm": 0.09678887575864792, "learning_rate": 0.002, "loss": 2.3461, "step": 249980 }, { "epoch": 0.9663914273785777, "grad_norm": 0.0959455743432045, "learning_rate": 0.002, "loss": 2.3261, "step": 249990 }, { "epoch": 0.966430084581961, "grad_norm": 0.10656458884477615, "learning_rate": 0.002, "loss": 2.3383, "step": 250000 }, { "epoch": 0.9664687417853443, "grad_norm": 0.09966926276683807, "learning_rate": 0.002, "loss": 2.3366, "step": 250010 }, { "epoch": 0.9665073989887275, "grad_norm": 0.10970748960971832, "learning_rate": 0.002, "loss": 2.3334, "step": 250020 }, { "epoch": 0.9665460561921109, "grad_norm": 0.09668340533971786, "learning_rate": 0.002, "loss": 2.3358, "step": 250030 }, { "epoch": 0.9665847133954941, "grad_norm": 0.13082824647426605, "learning_rate": 0.002, "loss": 2.3519, "step": 250040 }, { "epoch": 0.9666233705988774, "grad_norm": 0.13502457737922668, "learning_rate": 0.002, "loss": 2.3275, "step": 250050 }, { "epoch": 0.9666620278022606, "grad_norm": 0.088272824883461, "learning_rate": 0.002, "loss": 2.3399, "step": 250060 }, { "epoch": 0.966700685005644, "grad_norm": 0.09272816777229309, "learning_rate": 0.002, "loss": 2.336, "step": 250070 }, { "epoch": 0.9667393422090272, "grad_norm": 0.09801635891199112, "learning_rate": 0.002, "loss": 2.3358, "step": 250080 }, { "epoch": 0.9667779994124105, "grad_norm": 0.10866446048021317, "learning_rate": 0.002, "loss": 2.3245, "step": 250090 }, { "epoch": 0.9668166566157937, "grad_norm": 0.09416583180427551, "learning_rate": 0.002, "loss": 2.3449, "step": 250100 }, { "epoch": 0.9668553138191771, "grad_norm": 0.12312600016593933, "learning_rate": 0.002, "loss": 2.3406, "step": 250110 }, { "epoch": 0.9668939710225604, "grad_norm": 0.09534309059381485, "learning_rate": 0.002, "loss": 2.3371, "step": 250120 }, { "epoch": 0.9669326282259436, "grad_norm": 0.11319991201162338, "learning_rate": 0.002, "loss": 2.3524, "step": 250130 }, { "epoch": 0.9669712854293269, "grad_norm": 0.10820036381483078, "learning_rate": 0.002, "loss": 2.3449, "step": 250140 }, { "epoch": 0.9670099426327102, "grad_norm": 0.09464868903160095, "learning_rate": 0.002, "loss": 2.3364, "step": 250150 }, { "epoch": 0.9670485998360935, "grad_norm": 0.09593628346920013, "learning_rate": 0.002, "loss": 2.3549, "step": 250160 }, { "epoch": 0.9670872570394767, "grad_norm": 0.09920892864465714, "learning_rate": 0.002, "loss": 2.3392, "step": 250170 }, { "epoch": 0.96712591424286, "grad_norm": 0.10005899518728256, "learning_rate": 0.002, "loss": 2.3244, "step": 250180 }, { "epoch": 0.9671645714462432, "grad_norm": 0.11125802993774414, "learning_rate": 0.002, "loss": 2.3298, "step": 250190 }, { "epoch": 0.9672032286496266, "grad_norm": 0.09977997839450836, "learning_rate": 0.002, "loss": 2.3462, "step": 250200 }, { "epoch": 0.9672418858530099, "grad_norm": 0.1008404940366745, "learning_rate": 0.002, "loss": 2.3322, "step": 250210 }, { "epoch": 0.9672805430563931, "grad_norm": 0.1824817657470703, "learning_rate": 0.002, "loss": 2.3422, "step": 250220 }, { "epoch": 0.9673192002597764, "grad_norm": 0.11212398111820221, "learning_rate": 0.002, "loss": 2.3428, "step": 250230 }, { "epoch": 0.9673578574631597, "grad_norm": 0.10903162509202957, "learning_rate": 0.002, "loss": 2.3303, "step": 250240 }, { "epoch": 0.967396514666543, "grad_norm": 0.09185224771499634, "learning_rate": 0.002, "loss": 2.3308, "step": 250250 }, { "epoch": 0.9674351718699262, "grad_norm": 0.09937603771686554, "learning_rate": 0.002, "loss": 2.3333, "step": 250260 }, { "epoch": 0.9674738290733095, "grad_norm": 0.11099566519260406, "learning_rate": 0.002, "loss": 2.3309, "step": 250270 }, { "epoch": 0.9675124862766928, "grad_norm": 0.09390152990818024, "learning_rate": 0.002, "loss": 2.3401, "step": 250280 }, { "epoch": 0.9675511434800761, "grad_norm": 0.10876850038766861, "learning_rate": 0.002, "loss": 2.3378, "step": 250290 }, { "epoch": 0.9675898006834593, "grad_norm": 0.12110339850187302, "learning_rate": 0.002, "loss": 2.344, "step": 250300 }, { "epoch": 0.9676284578868426, "grad_norm": 0.10830269008874893, "learning_rate": 0.002, "loss": 2.3467, "step": 250310 }, { "epoch": 0.967667115090226, "grad_norm": 0.09625596553087234, "learning_rate": 0.002, "loss": 2.3413, "step": 250320 }, { "epoch": 0.9677057722936092, "grad_norm": 0.09408263117074966, "learning_rate": 0.002, "loss": 2.3346, "step": 250330 }, { "epoch": 0.9677444294969925, "grad_norm": 0.10909605026245117, "learning_rate": 0.002, "loss": 2.3367, "step": 250340 }, { "epoch": 0.9677830867003757, "grad_norm": 0.09753488004207611, "learning_rate": 0.002, "loss": 2.3475, "step": 250350 }, { "epoch": 0.9678217439037591, "grad_norm": 0.10700512677431107, "learning_rate": 0.002, "loss": 2.3453, "step": 250360 }, { "epoch": 0.9678604011071423, "grad_norm": 0.12809878587722778, "learning_rate": 0.002, "loss": 2.3401, "step": 250370 }, { "epoch": 0.9678990583105256, "grad_norm": 0.11302416771650314, "learning_rate": 0.002, "loss": 2.3404, "step": 250380 }, { "epoch": 0.9679377155139088, "grad_norm": 0.0878138393163681, "learning_rate": 0.002, "loss": 2.3355, "step": 250390 }, { "epoch": 0.9679763727172921, "grad_norm": 0.10237754881381989, "learning_rate": 0.002, "loss": 2.339, "step": 250400 }, { "epoch": 0.9680150299206755, "grad_norm": 0.11656080931425095, "learning_rate": 0.002, "loss": 2.3111, "step": 250410 }, { "epoch": 0.9680536871240587, "grad_norm": 0.11479943245649338, "learning_rate": 0.002, "loss": 2.3399, "step": 250420 }, { "epoch": 0.968092344327442, "grad_norm": 0.08994613587856293, "learning_rate": 0.002, "loss": 2.3394, "step": 250430 }, { "epoch": 0.9681310015308252, "grad_norm": 0.09658456593751907, "learning_rate": 0.002, "loss": 2.3405, "step": 250440 }, { "epoch": 0.9681696587342086, "grad_norm": 0.10116071999073029, "learning_rate": 0.002, "loss": 2.3402, "step": 250450 }, { "epoch": 0.9682083159375918, "grad_norm": 0.10644138604402542, "learning_rate": 0.002, "loss": 2.3238, "step": 250460 }, { "epoch": 0.9682469731409751, "grad_norm": 0.1095876395702362, "learning_rate": 0.002, "loss": 2.3315, "step": 250470 }, { "epoch": 0.9682856303443583, "grad_norm": 0.10697955638170242, "learning_rate": 0.002, "loss": 2.3466, "step": 250480 }, { "epoch": 0.9683242875477417, "grad_norm": 0.09780683368444443, "learning_rate": 0.002, "loss": 2.3218, "step": 250490 }, { "epoch": 0.9683629447511249, "grad_norm": 0.09849384427070618, "learning_rate": 0.002, "loss": 2.3417, "step": 250500 }, { "epoch": 0.9684016019545082, "grad_norm": 0.09895049780607224, "learning_rate": 0.002, "loss": 2.3411, "step": 250510 }, { "epoch": 0.9684402591578914, "grad_norm": 0.10031626373529434, "learning_rate": 0.002, "loss": 2.3352, "step": 250520 }, { "epoch": 0.9684789163612748, "grad_norm": 0.1167176216840744, "learning_rate": 0.002, "loss": 2.3321, "step": 250530 }, { "epoch": 0.9685175735646581, "grad_norm": 0.11091436445713043, "learning_rate": 0.002, "loss": 2.3308, "step": 250540 }, { "epoch": 0.9685562307680413, "grad_norm": 0.09511246532201767, "learning_rate": 0.002, "loss": 2.3399, "step": 250550 }, { "epoch": 0.9685948879714246, "grad_norm": 0.09637043625116348, "learning_rate": 0.002, "loss": 2.3372, "step": 250560 }, { "epoch": 0.9686335451748078, "grad_norm": 0.10570426285266876, "learning_rate": 0.002, "loss": 2.3315, "step": 250570 }, { "epoch": 0.9686722023781912, "grad_norm": 0.08653859794139862, "learning_rate": 0.002, "loss": 2.3398, "step": 250580 }, { "epoch": 0.9687108595815744, "grad_norm": 0.11135722696781158, "learning_rate": 0.002, "loss": 2.3329, "step": 250590 }, { "epoch": 0.9687495167849577, "grad_norm": 0.10831640660762787, "learning_rate": 0.002, "loss": 2.3503, "step": 250600 }, { "epoch": 0.9687881739883409, "grad_norm": 0.10700742900371552, "learning_rate": 0.002, "loss": 2.3371, "step": 250610 }, { "epoch": 0.9688268311917243, "grad_norm": 0.09919552505016327, "learning_rate": 0.002, "loss": 2.334, "step": 250620 }, { "epoch": 0.9688654883951076, "grad_norm": 0.10234495252370834, "learning_rate": 0.002, "loss": 2.339, "step": 250630 }, { "epoch": 0.9689041455984908, "grad_norm": 0.1608228236436844, "learning_rate": 0.002, "loss": 2.3313, "step": 250640 }, { "epoch": 0.9689428028018741, "grad_norm": 0.11038243770599365, "learning_rate": 0.002, "loss": 2.3325, "step": 250650 }, { "epoch": 0.9689814600052574, "grad_norm": 0.13908079266548157, "learning_rate": 0.002, "loss": 2.3489, "step": 250660 }, { "epoch": 0.9690201172086407, "grad_norm": 0.11118654906749725, "learning_rate": 0.002, "loss": 2.3234, "step": 250670 }, { "epoch": 0.9690587744120239, "grad_norm": 0.09143441915512085, "learning_rate": 0.002, "loss": 2.3378, "step": 250680 }, { "epoch": 0.9690974316154072, "grad_norm": 0.1052967831492424, "learning_rate": 0.002, "loss": 2.3253, "step": 250690 }, { "epoch": 0.9691360888187905, "grad_norm": 0.10974379628896713, "learning_rate": 0.002, "loss": 2.3319, "step": 250700 }, { "epoch": 0.9691747460221738, "grad_norm": 0.10102606564760208, "learning_rate": 0.002, "loss": 2.3456, "step": 250710 }, { "epoch": 0.969213403225557, "grad_norm": 0.10798647254705429, "learning_rate": 0.002, "loss": 2.3383, "step": 250720 }, { "epoch": 0.9692520604289403, "grad_norm": 0.10051941126585007, "learning_rate": 0.002, "loss": 2.3416, "step": 250730 }, { "epoch": 0.9692907176323237, "grad_norm": 0.10196227580308914, "learning_rate": 0.002, "loss": 2.3373, "step": 250740 }, { "epoch": 0.9693293748357069, "grad_norm": 0.11750812828540802, "learning_rate": 0.002, "loss": 2.3515, "step": 250750 }, { "epoch": 0.9693680320390902, "grad_norm": 0.11583947390317917, "learning_rate": 0.002, "loss": 2.3362, "step": 250760 }, { "epoch": 0.9694066892424734, "grad_norm": 0.10498038679361343, "learning_rate": 0.002, "loss": 2.3408, "step": 250770 }, { "epoch": 0.9694453464458567, "grad_norm": 0.10385581851005554, "learning_rate": 0.002, "loss": 2.3339, "step": 250780 }, { "epoch": 0.96948400364924, "grad_norm": 0.10725849866867065, "learning_rate": 0.002, "loss": 2.3369, "step": 250790 }, { "epoch": 0.9695226608526233, "grad_norm": 0.10024876892566681, "learning_rate": 0.002, "loss": 2.3438, "step": 250800 }, { "epoch": 0.9695613180560065, "grad_norm": 0.11648895591497421, "learning_rate": 0.002, "loss": 2.3203, "step": 250810 }, { "epoch": 0.9695999752593898, "grad_norm": 0.11542441695928574, "learning_rate": 0.002, "loss": 2.3369, "step": 250820 }, { "epoch": 0.9696386324627732, "grad_norm": 0.09200196713209152, "learning_rate": 0.002, "loss": 2.3482, "step": 250830 }, { "epoch": 0.9696772896661564, "grad_norm": 0.09737569838762283, "learning_rate": 0.002, "loss": 2.3564, "step": 250840 }, { "epoch": 0.9697159468695397, "grad_norm": 0.10656151175498962, "learning_rate": 0.002, "loss": 2.3405, "step": 250850 }, { "epoch": 0.9697546040729229, "grad_norm": 0.10151994228363037, "learning_rate": 0.002, "loss": 2.333, "step": 250860 }, { "epoch": 0.9697932612763063, "grad_norm": 0.09500584006309509, "learning_rate": 0.002, "loss": 2.3301, "step": 250870 }, { "epoch": 0.9698319184796895, "grad_norm": 0.10878094285726547, "learning_rate": 0.002, "loss": 2.3403, "step": 250880 }, { "epoch": 0.9698705756830728, "grad_norm": 0.12469379603862762, "learning_rate": 0.002, "loss": 2.3242, "step": 250890 }, { "epoch": 0.969909232886456, "grad_norm": 0.09622928500175476, "learning_rate": 0.002, "loss": 2.343, "step": 250900 }, { "epoch": 0.9699478900898394, "grad_norm": 0.10867168009281158, "learning_rate": 0.002, "loss": 2.3496, "step": 250910 }, { "epoch": 0.9699865472932226, "grad_norm": 0.10699144005775452, "learning_rate": 0.002, "loss": 2.3464, "step": 250920 }, { "epoch": 0.9700252044966059, "grad_norm": 0.11127132177352905, "learning_rate": 0.002, "loss": 2.3524, "step": 250930 }, { "epoch": 0.9700638616999891, "grad_norm": 0.09256624430418015, "learning_rate": 0.002, "loss": 2.3351, "step": 250940 }, { "epoch": 0.9701025189033724, "grad_norm": 0.10343354195356369, "learning_rate": 0.002, "loss": 2.3261, "step": 250950 }, { "epoch": 0.9701411761067558, "grad_norm": 0.1389748454093933, "learning_rate": 0.002, "loss": 2.3451, "step": 250960 }, { "epoch": 0.970179833310139, "grad_norm": 0.11664845794439316, "learning_rate": 0.002, "loss": 2.335, "step": 250970 }, { "epoch": 0.9702184905135223, "grad_norm": 0.11075331270694733, "learning_rate": 0.002, "loss": 2.3454, "step": 250980 }, { "epoch": 0.9702571477169055, "grad_norm": 0.10614953190088272, "learning_rate": 0.002, "loss": 2.3447, "step": 250990 }, { "epoch": 0.9702958049202889, "grad_norm": 0.08824150264263153, "learning_rate": 0.002, "loss": 2.3416, "step": 251000 }, { "epoch": 0.9703344621236721, "grad_norm": 0.10004694759845734, "learning_rate": 0.002, "loss": 2.3375, "step": 251010 }, { "epoch": 0.9703731193270554, "grad_norm": 0.12179727107286453, "learning_rate": 0.002, "loss": 2.3473, "step": 251020 }, { "epoch": 0.9704117765304386, "grad_norm": 0.11327700316905975, "learning_rate": 0.002, "loss": 2.3532, "step": 251030 }, { "epoch": 0.970450433733822, "grad_norm": 0.09601608663797379, "learning_rate": 0.002, "loss": 2.3376, "step": 251040 }, { "epoch": 0.9704890909372053, "grad_norm": 0.12900158762931824, "learning_rate": 0.002, "loss": 2.3156, "step": 251050 }, { "epoch": 0.9705277481405885, "grad_norm": 0.0974268764257431, "learning_rate": 0.002, "loss": 2.343, "step": 251060 }, { "epoch": 0.9705664053439718, "grad_norm": 0.12711255252361298, "learning_rate": 0.002, "loss": 2.3355, "step": 251070 }, { "epoch": 0.9706050625473551, "grad_norm": 0.1104290634393692, "learning_rate": 0.002, "loss": 2.3504, "step": 251080 }, { "epoch": 0.9706437197507384, "grad_norm": 0.11057502776384354, "learning_rate": 0.002, "loss": 2.3257, "step": 251090 }, { "epoch": 0.9706823769541216, "grad_norm": 0.12084945291280746, "learning_rate": 0.002, "loss": 2.351, "step": 251100 }, { "epoch": 0.9707210341575049, "grad_norm": 0.11005719751119614, "learning_rate": 0.002, "loss": 2.3372, "step": 251110 }, { "epoch": 0.9707596913608881, "grad_norm": 0.10544893890619278, "learning_rate": 0.002, "loss": 2.3393, "step": 251120 }, { "epoch": 0.9707983485642715, "grad_norm": 0.1027940958738327, "learning_rate": 0.002, "loss": 2.3354, "step": 251130 }, { "epoch": 0.9708370057676547, "grad_norm": 0.09388290345668793, "learning_rate": 0.002, "loss": 2.3327, "step": 251140 }, { "epoch": 0.970875662971038, "grad_norm": 0.15099789202213287, "learning_rate": 0.002, "loss": 2.3438, "step": 251150 }, { "epoch": 0.9709143201744213, "grad_norm": 0.11640970408916473, "learning_rate": 0.002, "loss": 2.3303, "step": 251160 }, { "epoch": 0.9709529773778046, "grad_norm": 0.10199052095413208, "learning_rate": 0.002, "loss": 2.336, "step": 251170 }, { "epoch": 0.9709916345811879, "grad_norm": 0.10691885650157928, "learning_rate": 0.002, "loss": 2.3393, "step": 251180 }, { "epoch": 0.9710302917845711, "grad_norm": 0.11014623939990997, "learning_rate": 0.002, "loss": 2.3478, "step": 251190 }, { "epoch": 0.9710689489879544, "grad_norm": 0.10485488176345825, "learning_rate": 0.002, "loss": 2.3408, "step": 251200 }, { "epoch": 0.9711076061913377, "grad_norm": 0.10701548308134079, "learning_rate": 0.002, "loss": 2.3485, "step": 251210 }, { "epoch": 0.971146263394721, "grad_norm": 0.10561732202768326, "learning_rate": 0.002, "loss": 2.3401, "step": 251220 }, { "epoch": 0.9711849205981042, "grad_norm": 0.11018940061330795, "learning_rate": 0.002, "loss": 2.3371, "step": 251230 }, { "epoch": 0.9712235778014875, "grad_norm": 0.1253640353679657, "learning_rate": 0.002, "loss": 2.3091, "step": 251240 }, { "epoch": 0.9712622350048709, "grad_norm": 0.10480239987373352, "learning_rate": 0.002, "loss": 2.3339, "step": 251250 }, { "epoch": 0.9713008922082541, "grad_norm": 0.10991910099983215, "learning_rate": 0.002, "loss": 2.3313, "step": 251260 }, { "epoch": 0.9713395494116374, "grad_norm": 0.10583469271659851, "learning_rate": 0.002, "loss": 2.335, "step": 251270 }, { "epoch": 0.9713782066150206, "grad_norm": 0.09267975389957428, "learning_rate": 0.002, "loss": 2.3337, "step": 251280 }, { "epoch": 0.971416863818404, "grad_norm": 0.10932402312755585, "learning_rate": 0.002, "loss": 2.331, "step": 251290 }, { "epoch": 0.9714555210217872, "grad_norm": 0.1009744331240654, "learning_rate": 0.002, "loss": 2.3511, "step": 251300 }, { "epoch": 0.9714941782251705, "grad_norm": 0.12196720391511917, "learning_rate": 0.002, "loss": 2.3204, "step": 251310 }, { "epoch": 0.9715328354285537, "grad_norm": 0.10302551090717316, "learning_rate": 0.002, "loss": 2.3489, "step": 251320 }, { "epoch": 0.971571492631937, "grad_norm": 0.10102386027574539, "learning_rate": 0.002, "loss": 2.3421, "step": 251330 }, { "epoch": 0.9716101498353203, "grad_norm": 0.08784213662147522, "learning_rate": 0.002, "loss": 2.3492, "step": 251340 }, { "epoch": 0.9716488070387036, "grad_norm": 0.10013749450445175, "learning_rate": 0.002, "loss": 2.3291, "step": 251350 }, { "epoch": 0.9716874642420869, "grad_norm": 0.1117543876171112, "learning_rate": 0.002, "loss": 2.3406, "step": 251360 }, { "epoch": 0.9717261214454701, "grad_norm": 0.10328752547502518, "learning_rate": 0.002, "loss": 2.3396, "step": 251370 }, { "epoch": 0.9717647786488535, "grad_norm": 0.0927998349070549, "learning_rate": 0.002, "loss": 2.346, "step": 251380 }, { "epoch": 0.9718034358522367, "grad_norm": 0.10032209008932114, "learning_rate": 0.002, "loss": 2.3257, "step": 251390 }, { "epoch": 0.97184209305562, "grad_norm": 0.10912362486124039, "learning_rate": 0.002, "loss": 2.3435, "step": 251400 }, { "epoch": 0.9718807502590032, "grad_norm": 0.11461266875267029, "learning_rate": 0.002, "loss": 2.3289, "step": 251410 }, { "epoch": 0.9719194074623866, "grad_norm": 0.12664832174777985, "learning_rate": 0.002, "loss": 2.3385, "step": 251420 }, { "epoch": 0.9719580646657698, "grad_norm": 0.14733320474624634, "learning_rate": 0.002, "loss": 2.3489, "step": 251430 }, { "epoch": 0.9719967218691531, "grad_norm": 0.11037435382604599, "learning_rate": 0.002, "loss": 2.3571, "step": 251440 }, { "epoch": 0.9720353790725363, "grad_norm": 0.10550345480442047, "learning_rate": 0.002, "loss": 2.3331, "step": 251450 }, { "epoch": 0.9720740362759197, "grad_norm": 0.11252820491790771, "learning_rate": 0.002, "loss": 2.3302, "step": 251460 }, { "epoch": 0.972112693479303, "grad_norm": 0.1101410761475563, "learning_rate": 0.002, "loss": 2.3475, "step": 251470 }, { "epoch": 0.9721513506826862, "grad_norm": 0.10230425000190735, "learning_rate": 0.002, "loss": 2.3529, "step": 251480 }, { "epoch": 0.9721900078860695, "grad_norm": 0.09026966243982315, "learning_rate": 0.002, "loss": 2.3417, "step": 251490 }, { "epoch": 0.9722286650894527, "grad_norm": 0.11959964781999588, "learning_rate": 0.002, "loss": 2.3428, "step": 251500 }, { "epoch": 0.9722673222928361, "grad_norm": 0.10887648910284042, "learning_rate": 0.002, "loss": 2.3559, "step": 251510 }, { "epoch": 0.9723059794962193, "grad_norm": 0.11334493011236191, "learning_rate": 0.002, "loss": 2.3505, "step": 251520 }, { "epoch": 0.9723446366996026, "grad_norm": 0.10531239211559296, "learning_rate": 0.002, "loss": 2.3373, "step": 251530 }, { "epoch": 0.9723832939029858, "grad_norm": 0.09486491978168488, "learning_rate": 0.002, "loss": 2.3275, "step": 251540 }, { "epoch": 0.9724219511063692, "grad_norm": 0.10903091728687286, "learning_rate": 0.002, "loss": 2.3275, "step": 251550 }, { "epoch": 0.9724606083097525, "grad_norm": 0.13235808908939362, "learning_rate": 0.002, "loss": 2.3415, "step": 251560 }, { "epoch": 0.9724992655131357, "grad_norm": 0.0974523052573204, "learning_rate": 0.002, "loss": 2.3394, "step": 251570 }, { "epoch": 0.972537922716519, "grad_norm": 0.1222124919295311, "learning_rate": 0.002, "loss": 2.3319, "step": 251580 }, { "epoch": 0.9725765799199023, "grad_norm": 0.09988315403461456, "learning_rate": 0.002, "loss": 2.348, "step": 251590 }, { "epoch": 0.9726152371232856, "grad_norm": 0.10034121572971344, "learning_rate": 0.002, "loss": 2.3351, "step": 251600 }, { "epoch": 0.9726538943266688, "grad_norm": 0.12333368510007858, "learning_rate": 0.002, "loss": 2.3486, "step": 251610 }, { "epoch": 0.9726925515300521, "grad_norm": 0.10110745579004288, "learning_rate": 0.002, "loss": 2.3445, "step": 251620 }, { "epoch": 0.9727312087334354, "grad_norm": 0.10473684966564178, "learning_rate": 0.002, "loss": 2.3318, "step": 251630 }, { "epoch": 0.9727698659368187, "grad_norm": 0.10513075441122055, "learning_rate": 0.002, "loss": 2.3356, "step": 251640 }, { "epoch": 0.9728085231402019, "grad_norm": 0.09620826691389084, "learning_rate": 0.002, "loss": 2.3433, "step": 251650 }, { "epoch": 0.9728471803435852, "grad_norm": 0.12485495954751968, "learning_rate": 0.002, "loss": 2.3352, "step": 251660 }, { "epoch": 0.9728858375469686, "grad_norm": 0.10933785885572433, "learning_rate": 0.002, "loss": 2.3485, "step": 251670 }, { "epoch": 0.9729244947503518, "grad_norm": 0.1092258170247078, "learning_rate": 0.002, "loss": 2.3506, "step": 251680 }, { "epoch": 0.9729631519537351, "grad_norm": 0.1023181825876236, "learning_rate": 0.002, "loss": 2.3512, "step": 251690 }, { "epoch": 0.9730018091571183, "grad_norm": 0.1524294763803482, "learning_rate": 0.002, "loss": 2.3436, "step": 251700 }, { "epoch": 0.9730404663605016, "grad_norm": 0.10203384608030319, "learning_rate": 0.002, "loss": 2.3445, "step": 251710 }, { "epoch": 0.9730791235638849, "grad_norm": 0.10223396122455597, "learning_rate": 0.002, "loss": 2.3427, "step": 251720 }, { "epoch": 0.9731177807672682, "grad_norm": 0.11579997092485428, "learning_rate": 0.002, "loss": 2.338, "step": 251730 }, { "epoch": 0.9731564379706514, "grad_norm": 0.09853821992874146, "learning_rate": 0.002, "loss": 2.3364, "step": 251740 }, { "epoch": 0.9731950951740347, "grad_norm": 0.10674675554037094, "learning_rate": 0.002, "loss": 2.3538, "step": 251750 }, { "epoch": 0.973233752377418, "grad_norm": 0.12423861771821976, "learning_rate": 0.002, "loss": 2.3352, "step": 251760 }, { "epoch": 0.9732724095808013, "grad_norm": 0.09702294319868088, "learning_rate": 0.002, "loss": 2.3336, "step": 251770 }, { "epoch": 0.9733110667841846, "grad_norm": 0.10267679393291473, "learning_rate": 0.002, "loss": 2.3441, "step": 251780 }, { "epoch": 0.9733497239875678, "grad_norm": 0.0920570120215416, "learning_rate": 0.002, "loss": 2.3523, "step": 251790 }, { "epoch": 0.9733883811909512, "grad_norm": 0.1116960197687149, "learning_rate": 0.002, "loss": 2.3407, "step": 251800 }, { "epoch": 0.9734270383943344, "grad_norm": 0.10197541862726212, "learning_rate": 0.002, "loss": 2.3483, "step": 251810 }, { "epoch": 0.9734656955977177, "grad_norm": 0.11533726006746292, "learning_rate": 0.002, "loss": 2.338, "step": 251820 }, { "epoch": 0.9735043528011009, "grad_norm": 0.09482559561729431, "learning_rate": 0.002, "loss": 2.3245, "step": 251830 }, { "epoch": 0.9735430100044843, "grad_norm": 0.09979456663131714, "learning_rate": 0.002, "loss": 2.3332, "step": 251840 }, { "epoch": 0.9735816672078675, "grad_norm": 0.11372046917676926, "learning_rate": 0.002, "loss": 2.3249, "step": 251850 }, { "epoch": 0.9736203244112508, "grad_norm": 0.11386499553918839, "learning_rate": 0.002, "loss": 2.3348, "step": 251860 }, { "epoch": 0.973658981614634, "grad_norm": 0.09593381732702255, "learning_rate": 0.002, "loss": 2.3565, "step": 251870 }, { "epoch": 0.9736976388180173, "grad_norm": 0.10269182920455933, "learning_rate": 0.002, "loss": 2.3319, "step": 251880 }, { "epoch": 0.9737362960214007, "grad_norm": 0.12904858589172363, "learning_rate": 0.002, "loss": 2.3494, "step": 251890 }, { "epoch": 0.9737749532247839, "grad_norm": 0.11794731020927429, "learning_rate": 0.002, "loss": 2.3391, "step": 251900 }, { "epoch": 0.9738136104281672, "grad_norm": 0.11491457372903824, "learning_rate": 0.002, "loss": 2.3546, "step": 251910 }, { "epoch": 0.9738522676315504, "grad_norm": 0.09406273812055588, "learning_rate": 0.002, "loss": 2.3288, "step": 251920 }, { "epoch": 0.9738909248349338, "grad_norm": 0.12121071666479111, "learning_rate": 0.002, "loss": 2.3236, "step": 251930 }, { "epoch": 0.973929582038317, "grad_norm": 0.12540626525878906, "learning_rate": 0.002, "loss": 2.3345, "step": 251940 }, { "epoch": 0.9739682392417003, "grad_norm": 0.09689386188983917, "learning_rate": 0.002, "loss": 2.3425, "step": 251950 }, { "epoch": 0.9740068964450835, "grad_norm": 0.1502695381641388, "learning_rate": 0.002, "loss": 2.3295, "step": 251960 }, { "epoch": 0.9740455536484669, "grad_norm": 0.0994613990187645, "learning_rate": 0.002, "loss": 2.3531, "step": 251970 }, { "epoch": 0.9740842108518502, "grad_norm": 0.10326496511697769, "learning_rate": 0.002, "loss": 2.3241, "step": 251980 }, { "epoch": 0.9741228680552334, "grad_norm": 0.09585017710924149, "learning_rate": 0.002, "loss": 2.3396, "step": 251990 }, { "epoch": 0.9741615252586167, "grad_norm": 0.09650801867246628, "learning_rate": 0.002, "loss": 2.3131, "step": 252000 }, { "epoch": 0.974200182462, "grad_norm": 0.11647900938987732, "learning_rate": 0.002, "loss": 2.3427, "step": 252010 }, { "epoch": 0.9742388396653833, "grad_norm": 0.12022756040096283, "learning_rate": 0.002, "loss": 2.3426, "step": 252020 }, { "epoch": 0.9742774968687665, "grad_norm": 0.11134672164916992, "learning_rate": 0.002, "loss": 2.3143, "step": 252030 }, { "epoch": 0.9743161540721498, "grad_norm": 0.09073596447706223, "learning_rate": 0.002, "loss": 2.3462, "step": 252040 }, { "epoch": 0.974354811275533, "grad_norm": 0.09896473586559296, "learning_rate": 0.002, "loss": 2.3366, "step": 252050 }, { "epoch": 0.9743934684789164, "grad_norm": 0.12702728807926178, "learning_rate": 0.002, "loss": 2.3333, "step": 252060 }, { "epoch": 0.9744321256822996, "grad_norm": 0.09636173397302628, "learning_rate": 0.002, "loss": 2.3444, "step": 252070 }, { "epoch": 0.9744707828856829, "grad_norm": 0.09168794006109238, "learning_rate": 0.002, "loss": 2.3435, "step": 252080 }, { "epoch": 0.9745094400890661, "grad_norm": 0.36897939443588257, "learning_rate": 0.002, "loss": 2.3411, "step": 252090 }, { "epoch": 0.9745480972924495, "grad_norm": 0.11859555542469025, "learning_rate": 0.002, "loss": 2.3563, "step": 252100 }, { "epoch": 0.9745867544958328, "grad_norm": 0.11081647127866745, "learning_rate": 0.002, "loss": 2.3329, "step": 252110 }, { "epoch": 0.974625411699216, "grad_norm": 0.1135776937007904, "learning_rate": 0.002, "loss": 2.3373, "step": 252120 }, { "epoch": 0.9746640689025993, "grad_norm": 0.09395918995141983, "learning_rate": 0.002, "loss": 2.3321, "step": 252130 }, { "epoch": 0.9747027261059826, "grad_norm": 0.11018582433462143, "learning_rate": 0.002, "loss": 2.3359, "step": 252140 }, { "epoch": 0.9747413833093659, "grad_norm": 0.09823351353406906, "learning_rate": 0.002, "loss": 2.3499, "step": 252150 }, { "epoch": 0.9747800405127491, "grad_norm": 0.10773885250091553, "learning_rate": 0.002, "loss": 2.3383, "step": 252160 }, { "epoch": 0.9748186977161324, "grad_norm": 0.1168753057718277, "learning_rate": 0.002, "loss": 2.3327, "step": 252170 }, { "epoch": 0.9748573549195158, "grad_norm": 0.18037018179893494, "learning_rate": 0.002, "loss": 2.3425, "step": 252180 }, { "epoch": 0.974896012122899, "grad_norm": 0.11588846147060394, "learning_rate": 0.002, "loss": 2.3415, "step": 252190 }, { "epoch": 0.9749346693262823, "grad_norm": 0.10509663075208664, "learning_rate": 0.002, "loss": 2.344, "step": 252200 }, { "epoch": 0.9749733265296655, "grad_norm": 0.10183600336313248, "learning_rate": 0.002, "loss": 2.3491, "step": 252210 }, { "epoch": 0.9750119837330489, "grad_norm": 0.10032964497804642, "learning_rate": 0.002, "loss": 2.3331, "step": 252220 }, { "epoch": 0.9750506409364321, "grad_norm": 0.11123006790876389, "learning_rate": 0.002, "loss": 2.3277, "step": 252230 }, { "epoch": 0.9750892981398154, "grad_norm": 0.09926677495241165, "learning_rate": 0.002, "loss": 2.3292, "step": 252240 }, { "epoch": 0.9751279553431986, "grad_norm": 0.1239267885684967, "learning_rate": 0.002, "loss": 2.3307, "step": 252250 }, { "epoch": 0.9751666125465819, "grad_norm": 0.08306685090065002, "learning_rate": 0.002, "loss": 2.3555, "step": 252260 }, { "epoch": 0.9752052697499652, "grad_norm": 0.11122148483991623, "learning_rate": 0.002, "loss": 2.3436, "step": 252270 }, { "epoch": 0.9752439269533485, "grad_norm": 0.10140670090913773, "learning_rate": 0.002, "loss": 2.3445, "step": 252280 }, { "epoch": 0.9752825841567317, "grad_norm": 0.09582722932100296, "learning_rate": 0.002, "loss": 2.336, "step": 252290 }, { "epoch": 0.975321241360115, "grad_norm": 0.1066557765007019, "learning_rate": 0.002, "loss": 2.3467, "step": 252300 }, { "epoch": 0.9753598985634984, "grad_norm": 0.09405253827571869, "learning_rate": 0.002, "loss": 2.3295, "step": 252310 }, { "epoch": 0.9753985557668816, "grad_norm": 0.10288607329130173, "learning_rate": 0.002, "loss": 2.3479, "step": 252320 }, { "epoch": 0.9754372129702649, "grad_norm": 0.0957462415099144, "learning_rate": 0.002, "loss": 2.3349, "step": 252330 }, { "epoch": 0.9754758701736481, "grad_norm": 0.10872723907232285, "learning_rate": 0.002, "loss": 2.3414, "step": 252340 }, { "epoch": 0.9755145273770315, "grad_norm": 0.11865616589784622, "learning_rate": 0.002, "loss": 2.3274, "step": 252350 }, { "epoch": 0.9755531845804147, "grad_norm": 0.09794749319553375, "learning_rate": 0.002, "loss": 2.3578, "step": 252360 }, { "epoch": 0.975591841783798, "grad_norm": 0.10518945753574371, "learning_rate": 0.002, "loss": 2.3494, "step": 252370 }, { "epoch": 0.9756304989871812, "grad_norm": 0.1064748764038086, "learning_rate": 0.002, "loss": 2.3364, "step": 252380 }, { "epoch": 0.9756691561905646, "grad_norm": 0.10496281087398529, "learning_rate": 0.002, "loss": 2.3518, "step": 252390 }, { "epoch": 0.9757078133939479, "grad_norm": 0.14388880133628845, "learning_rate": 0.002, "loss": 2.3382, "step": 252400 }, { "epoch": 0.9757464705973311, "grad_norm": 0.10157719254493713, "learning_rate": 0.002, "loss": 2.3316, "step": 252410 }, { "epoch": 0.9757851278007144, "grad_norm": 0.1069631278514862, "learning_rate": 0.002, "loss": 2.3165, "step": 252420 }, { "epoch": 0.9758237850040976, "grad_norm": 0.09011048823595047, "learning_rate": 0.002, "loss": 2.3359, "step": 252430 }, { "epoch": 0.975862442207481, "grad_norm": 0.10953779518604279, "learning_rate": 0.002, "loss": 2.333, "step": 252440 }, { "epoch": 0.9759010994108642, "grad_norm": 0.1189967542886734, "learning_rate": 0.002, "loss": 2.3454, "step": 252450 }, { "epoch": 0.9759397566142475, "grad_norm": 0.09564502537250519, "learning_rate": 0.002, "loss": 2.3429, "step": 252460 }, { "epoch": 0.9759784138176307, "grad_norm": 0.11470325291156769, "learning_rate": 0.002, "loss": 2.3364, "step": 252470 }, { "epoch": 0.9760170710210141, "grad_norm": 0.10929343849420547, "learning_rate": 0.002, "loss": 2.3489, "step": 252480 }, { "epoch": 0.9760557282243973, "grad_norm": 0.11872894316911697, "learning_rate": 0.002, "loss": 2.3422, "step": 252490 }, { "epoch": 0.9760943854277806, "grad_norm": 0.09920226037502289, "learning_rate": 0.002, "loss": 2.3397, "step": 252500 }, { "epoch": 0.9761330426311638, "grad_norm": 0.10016408562660217, "learning_rate": 0.002, "loss": 2.3527, "step": 252510 }, { "epoch": 0.9761716998345472, "grad_norm": 0.13373656570911407, "learning_rate": 0.002, "loss": 2.3272, "step": 252520 }, { "epoch": 0.9762103570379305, "grad_norm": 0.10544757544994354, "learning_rate": 0.002, "loss": 2.3406, "step": 252530 }, { "epoch": 0.9762490142413137, "grad_norm": 0.09542810916900635, "learning_rate": 0.002, "loss": 2.339, "step": 252540 }, { "epoch": 0.976287671444697, "grad_norm": 0.09550932794809341, "learning_rate": 0.002, "loss": 2.3287, "step": 252550 }, { "epoch": 0.9763263286480803, "grad_norm": 0.10764776170253754, "learning_rate": 0.002, "loss": 2.3513, "step": 252560 }, { "epoch": 0.9763649858514636, "grad_norm": 0.11706431955099106, "learning_rate": 0.002, "loss": 2.3259, "step": 252570 }, { "epoch": 0.9764036430548468, "grad_norm": 0.09157240390777588, "learning_rate": 0.002, "loss": 2.3432, "step": 252580 }, { "epoch": 0.9764423002582301, "grad_norm": 0.09784223139286041, "learning_rate": 0.002, "loss": 2.338, "step": 252590 }, { "epoch": 0.9764809574616135, "grad_norm": 0.10729385167360306, "learning_rate": 0.002, "loss": 2.3227, "step": 252600 }, { "epoch": 0.9765196146649967, "grad_norm": 0.11288652569055557, "learning_rate": 0.002, "loss": 2.3369, "step": 252610 }, { "epoch": 0.97655827186838, "grad_norm": 0.10649019479751587, "learning_rate": 0.002, "loss": 2.3288, "step": 252620 }, { "epoch": 0.9765969290717632, "grad_norm": 0.16062025725841522, "learning_rate": 0.002, "loss": 2.3173, "step": 252630 }, { "epoch": 0.9766355862751465, "grad_norm": 0.1069127693772316, "learning_rate": 0.002, "loss": 2.3409, "step": 252640 }, { "epoch": 0.9766742434785298, "grad_norm": 0.09770555049180984, "learning_rate": 0.002, "loss": 2.3521, "step": 252650 }, { "epoch": 0.9767129006819131, "grad_norm": 0.10639701783657074, "learning_rate": 0.002, "loss": 2.3408, "step": 252660 }, { "epoch": 0.9767515578852963, "grad_norm": 0.13143764436244965, "learning_rate": 0.002, "loss": 2.3369, "step": 252670 }, { "epoch": 0.9767902150886796, "grad_norm": 0.09408655762672424, "learning_rate": 0.002, "loss": 2.3356, "step": 252680 }, { "epoch": 0.9768288722920629, "grad_norm": 0.11855518072843552, "learning_rate": 0.002, "loss": 2.3453, "step": 252690 }, { "epoch": 0.9768675294954462, "grad_norm": 0.09519604593515396, "learning_rate": 0.002, "loss": 2.3537, "step": 252700 }, { "epoch": 0.9769061866988294, "grad_norm": 0.11027871817350388, "learning_rate": 0.002, "loss": 2.3452, "step": 252710 }, { "epoch": 0.9769448439022127, "grad_norm": 0.10735554248094559, "learning_rate": 0.002, "loss": 2.3427, "step": 252720 }, { "epoch": 0.9769835011055961, "grad_norm": 0.09857560694217682, "learning_rate": 0.002, "loss": 2.3327, "step": 252730 }, { "epoch": 0.9770221583089793, "grad_norm": 0.09966576844453812, "learning_rate": 0.002, "loss": 2.3311, "step": 252740 }, { "epoch": 0.9770608155123626, "grad_norm": 0.12802031636238098, "learning_rate": 0.002, "loss": 2.347, "step": 252750 }, { "epoch": 0.9770994727157458, "grad_norm": 0.11255703121423721, "learning_rate": 0.002, "loss": 2.3398, "step": 252760 }, { "epoch": 0.9771381299191292, "grad_norm": 0.10530129820108414, "learning_rate": 0.002, "loss": 2.3243, "step": 252770 }, { "epoch": 0.9771767871225124, "grad_norm": 0.10851936787366867, "learning_rate": 0.002, "loss": 2.3371, "step": 252780 }, { "epoch": 0.9772154443258957, "grad_norm": 0.09338781982660294, "learning_rate": 0.002, "loss": 2.3551, "step": 252790 }, { "epoch": 0.9772541015292789, "grad_norm": 0.10688374191522598, "learning_rate": 0.002, "loss": 2.3392, "step": 252800 }, { "epoch": 0.9772927587326622, "grad_norm": 0.10073661059141159, "learning_rate": 0.002, "loss": 2.3381, "step": 252810 }, { "epoch": 0.9773314159360456, "grad_norm": 0.10849092900753021, "learning_rate": 0.002, "loss": 2.3358, "step": 252820 }, { "epoch": 0.9773700731394288, "grad_norm": 0.10362280160188675, "learning_rate": 0.002, "loss": 2.3447, "step": 252830 }, { "epoch": 0.9774087303428121, "grad_norm": 0.10138023644685745, "learning_rate": 0.002, "loss": 2.3361, "step": 252840 }, { "epoch": 0.9774473875461953, "grad_norm": 0.12903210520744324, "learning_rate": 0.002, "loss": 2.3228, "step": 252850 }, { "epoch": 0.9774860447495787, "grad_norm": 0.11094959825277328, "learning_rate": 0.002, "loss": 2.3336, "step": 252860 }, { "epoch": 0.9775247019529619, "grad_norm": 0.10609157383441925, "learning_rate": 0.002, "loss": 2.3332, "step": 252870 }, { "epoch": 0.9775633591563452, "grad_norm": 0.1097332313656807, "learning_rate": 0.002, "loss": 2.3414, "step": 252880 }, { "epoch": 0.9776020163597284, "grad_norm": 0.11047670245170593, "learning_rate": 0.002, "loss": 2.3333, "step": 252890 }, { "epoch": 0.9776406735631118, "grad_norm": 0.1258823722600937, "learning_rate": 0.002, "loss": 2.3297, "step": 252900 }, { "epoch": 0.977679330766495, "grad_norm": 0.0985330194234848, "learning_rate": 0.002, "loss": 2.3367, "step": 252910 }, { "epoch": 0.9777179879698783, "grad_norm": 0.10980021208524704, "learning_rate": 0.002, "loss": 2.3265, "step": 252920 }, { "epoch": 0.9777566451732616, "grad_norm": 0.11983881890773773, "learning_rate": 0.002, "loss": 2.3424, "step": 252930 }, { "epoch": 0.9777953023766449, "grad_norm": 0.10731696337461472, "learning_rate": 0.002, "loss": 2.349, "step": 252940 }, { "epoch": 0.9778339595800282, "grad_norm": 0.10070104151964188, "learning_rate": 0.002, "loss": 2.3451, "step": 252950 }, { "epoch": 0.9778726167834114, "grad_norm": 0.11113781481981277, "learning_rate": 0.002, "loss": 2.3498, "step": 252960 }, { "epoch": 0.9779112739867947, "grad_norm": 0.09822292625904083, "learning_rate": 0.002, "loss": 2.3486, "step": 252970 }, { "epoch": 0.9779499311901779, "grad_norm": 0.101972296833992, "learning_rate": 0.002, "loss": 2.3359, "step": 252980 }, { "epoch": 0.9779885883935613, "grad_norm": 0.12044399976730347, "learning_rate": 0.002, "loss": 2.3407, "step": 252990 }, { "epoch": 0.9780272455969445, "grad_norm": 0.13268974423408508, "learning_rate": 0.002, "loss": 2.3403, "step": 253000 }, { "epoch": 0.9780659028003278, "grad_norm": 0.10569026321172714, "learning_rate": 0.002, "loss": 2.344, "step": 253010 }, { "epoch": 0.978104560003711, "grad_norm": 0.10113076865673065, "learning_rate": 0.002, "loss": 2.3414, "step": 253020 }, { "epoch": 0.9781432172070944, "grad_norm": 0.09855961054563522, "learning_rate": 0.002, "loss": 2.3334, "step": 253030 }, { "epoch": 0.9781818744104777, "grad_norm": 0.0978899747133255, "learning_rate": 0.002, "loss": 2.3191, "step": 253040 }, { "epoch": 0.9782205316138609, "grad_norm": 0.10016301274299622, "learning_rate": 0.002, "loss": 2.3402, "step": 253050 }, { "epoch": 0.9782591888172442, "grad_norm": 0.11159918457269669, "learning_rate": 0.002, "loss": 2.3485, "step": 253060 }, { "epoch": 0.9782978460206275, "grad_norm": 0.09567983448505402, "learning_rate": 0.002, "loss": 2.3442, "step": 253070 }, { "epoch": 0.9783365032240108, "grad_norm": 0.12280935794115067, "learning_rate": 0.002, "loss": 2.3318, "step": 253080 }, { "epoch": 0.978375160427394, "grad_norm": 0.11773037165403366, "learning_rate": 0.002, "loss": 2.3312, "step": 253090 }, { "epoch": 0.9784138176307773, "grad_norm": 0.11330371350049973, "learning_rate": 0.002, "loss": 2.3457, "step": 253100 }, { "epoch": 0.9784524748341606, "grad_norm": 0.1054389551281929, "learning_rate": 0.002, "loss": 2.3391, "step": 253110 }, { "epoch": 0.9784911320375439, "grad_norm": 0.10864686965942383, "learning_rate": 0.002, "loss": 2.3334, "step": 253120 }, { "epoch": 0.9785297892409272, "grad_norm": 0.09636160731315613, "learning_rate": 0.002, "loss": 2.3431, "step": 253130 }, { "epoch": 0.9785684464443104, "grad_norm": 0.11868980526924133, "learning_rate": 0.002, "loss": 2.351, "step": 253140 }, { "epoch": 0.9786071036476938, "grad_norm": 0.11431093513965607, "learning_rate": 0.002, "loss": 2.3539, "step": 253150 }, { "epoch": 0.978645760851077, "grad_norm": 0.0956217423081398, "learning_rate": 0.002, "loss": 2.3519, "step": 253160 }, { "epoch": 0.9786844180544603, "grad_norm": 0.11623834073543549, "learning_rate": 0.002, "loss": 2.3401, "step": 253170 }, { "epoch": 0.9787230752578435, "grad_norm": 0.10607807338237762, "learning_rate": 0.002, "loss": 2.3357, "step": 253180 }, { "epoch": 0.9787617324612268, "grad_norm": 0.10642440617084503, "learning_rate": 0.002, "loss": 2.3336, "step": 253190 }, { "epoch": 0.9788003896646101, "grad_norm": 0.11321180313825607, "learning_rate": 0.002, "loss": 2.3427, "step": 253200 }, { "epoch": 0.9788390468679934, "grad_norm": 0.1318560391664505, "learning_rate": 0.002, "loss": 2.3326, "step": 253210 }, { "epoch": 0.9788777040713766, "grad_norm": 0.10777322947978973, "learning_rate": 0.002, "loss": 2.3337, "step": 253220 }, { "epoch": 0.9789163612747599, "grad_norm": 0.10849911719560623, "learning_rate": 0.002, "loss": 2.324, "step": 253230 }, { "epoch": 0.9789550184781433, "grad_norm": 0.09989572316408157, "learning_rate": 0.002, "loss": 2.3291, "step": 253240 }, { "epoch": 0.9789936756815265, "grad_norm": 0.10110043734312057, "learning_rate": 0.002, "loss": 2.3516, "step": 253250 }, { "epoch": 0.9790323328849098, "grad_norm": 0.13030365109443665, "learning_rate": 0.002, "loss": 2.351, "step": 253260 }, { "epoch": 0.979070990088293, "grad_norm": 0.09885247051715851, "learning_rate": 0.002, "loss": 2.3337, "step": 253270 }, { "epoch": 0.9791096472916764, "grad_norm": 0.0909193828701973, "learning_rate": 0.002, "loss": 2.3335, "step": 253280 }, { "epoch": 0.9791483044950596, "grad_norm": 0.10934343189001083, "learning_rate": 0.002, "loss": 2.3533, "step": 253290 }, { "epoch": 0.9791869616984429, "grad_norm": 0.11860756576061249, "learning_rate": 0.002, "loss": 2.3301, "step": 253300 }, { "epoch": 0.9792256189018261, "grad_norm": 0.09530835598707199, "learning_rate": 0.002, "loss": 2.3306, "step": 253310 }, { "epoch": 0.9792642761052095, "grad_norm": 0.1112305223941803, "learning_rate": 0.002, "loss": 2.3392, "step": 253320 }, { "epoch": 0.9793029333085927, "grad_norm": 0.09348230808973312, "learning_rate": 0.002, "loss": 2.34, "step": 253330 }, { "epoch": 0.979341590511976, "grad_norm": 0.14365029335021973, "learning_rate": 0.002, "loss": 2.3357, "step": 253340 }, { "epoch": 0.9793802477153593, "grad_norm": 0.19712699949741364, "learning_rate": 0.002, "loss": 2.3465, "step": 253350 }, { "epoch": 0.9794189049187425, "grad_norm": 0.11766904592514038, "learning_rate": 0.002, "loss": 2.3299, "step": 253360 }, { "epoch": 0.9794575621221259, "grad_norm": 0.11091282218694687, "learning_rate": 0.002, "loss": 2.3314, "step": 253370 }, { "epoch": 0.9794962193255091, "grad_norm": 0.09966139495372772, "learning_rate": 0.002, "loss": 2.3382, "step": 253380 }, { "epoch": 0.9795348765288924, "grad_norm": 0.10028279572725296, "learning_rate": 0.002, "loss": 2.3288, "step": 253390 }, { "epoch": 0.9795735337322756, "grad_norm": 0.10175572335720062, "learning_rate": 0.002, "loss": 2.3435, "step": 253400 }, { "epoch": 0.979612190935659, "grad_norm": 0.12000759690999985, "learning_rate": 0.002, "loss": 2.3344, "step": 253410 }, { "epoch": 0.9796508481390422, "grad_norm": 0.10719025880098343, "learning_rate": 0.002, "loss": 2.3354, "step": 253420 }, { "epoch": 0.9796895053424255, "grad_norm": 0.10947318375110626, "learning_rate": 0.002, "loss": 2.3311, "step": 253430 }, { "epoch": 0.9797281625458087, "grad_norm": 0.09750618785619736, "learning_rate": 0.002, "loss": 2.3373, "step": 253440 }, { "epoch": 0.9797668197491921, "grad_norm": 0.12034052610397339, "learning_rate": 0.002, "loss": 2.3332, "step": 253450 }, { "epoch": 0.9798054769525754, "grad_norm": 0.10206147283315659, "learning_rate": 0.002, "loss": 2.3515, "step": 253460 }, { "epoch": 0.9798441341559586, "grad_norm": 0.10553114116191864, "learning_rate": 0.002, "loss": 2.3374, "step": 253470 }, { "epoch": 0.9798827913593419, "grad_norm": 0.10624590516090393, "learning_rate": 0.002, "loss": 2.3501, "step": 253480 }, { "epoch": 0.9799214485627252, "grad_norm": 0.09161072969436646, "learning_rate": 0.002, "loss": 2.3464, "step": 253490 }, { "epoch": 0.9799601057661085, "grad_norm": 0.09625286608934402, "learning_rate": 0.002, "loss": 2.3387, "step": 253500 }, { "epoch": 0.9799987629694917, "grad_norm": 0.10236509889364243, "learning_rate": 0.002, "loss": 2.3258, "step": 253510 }, { "epoch": 0.980037420172875, "grad_norm": 0.09792248904705048, "learning_rate": 0.002, "loss": 2.3454, "step": 253520 }, { "epoch": 0.9800760773762582, "grad_norm": 0.10096298903226852, "learning_rate": 0.002, "loss": 2.349, "step": 253530 }, { "epoch": 0.9801147345796416, "grad_norm": 0.10640841722488403, "learning_rate": 0.002, "loss": 2.325, "step": 253540 }, { "epoch": 0.9801533917830249, "grad_norm": 0.10135815292596817, "learning_rate": 0.002, "loss": 2.3313, "step": 253550 }, { "epoch": 0.9801920489864081, "grad_norm": 0.11531524360179901, "learning_rate": 0.002, "loss": 2.3315, "step": 253560 }, { "epoch": 0.9802307061897914, "grad_norm": 0.0986768901348114, "learning_rate": 0.002, "loss": 2.3304, "step": 253570 }, { "epoch": 0.9802693633931747, "grad_norm": 0.11826640367507935, "learning_rate": 0.002, "loss": 2.3376, "step": 253580 }, { "epoch": 0.980308020596558, "grad_norm": 0.11659346520900726, "learning_rate": 0.002, "loss": 2.3438, "step": 253590 }, { "epoch": 0.9803466777999412, "grad_norm": 0.08888128399848938, "learning_rate": 0.002, "loss": 2.3418, "step": 253600 }, { "epoch": 0.9803853350033245, "grad_norm": 0.11499710381031036, "learning_rate": 0.002, "loss": 2.3441, "step": 253610 }, { "epoch": 0.9804239922067078, "grad_norm": 0.11001802980899811, "learning_rate": 0.002, "loss": 2.3341, "step": 253620 }, { "epoch": 0.9804626494100911, "grad_norm": 0.33217594027519226, "learning_rate": 0.002, "loss": 2.321, "step": 253630 }, { "epoch": 0.9805013066134743, "grad_norm": 0.11237010359764099, "learning_rate": 0.002, "loss": 2.3548, "step": 253640 }, { "epoch": 0.9805399638168576, "grad_norm": 0.0909418836236, "learning_rate": 0.002, "loss": 2.3386, "step": 253650 }, { "epoch": 0.980578621020241, "grad_norm": 0.10539261251688004, "learning_rate": 0.002, "loss": 2.3327, "step": 253660 }, { "epoch": 0.9806172782236242, "grad_norm": 0.10350189357995987, "learning_rate": 0.002, "loss": 2.3442, "step": 253670 }, { "epoch": 0.9806559354270075, "grad_norm": 0.1357734650373459, "learning_rate": 0.002, "loss": 2.3428, "step": 253680 }, { "epoch": 0.9806945926303907, "grad_norm": 0.09479259699583054, "learning_rate": 0.002, "loss": 2.3401, "step": 253690 }, { "epoch": 0.9807332498337741, "grad_norm": 0.1024928092956543, "learning_rate": 0.002, "loss": 2.3352, "step": 253700 }, { "epoch": 0.9807719070371573, "grad_norm": 0.11998184770345688, "learning_rate": 0.002, "loss": 2.335, "step": 253710 }, { "epoch": 0.9808105642405406, "grad_norm": 0.09331895411014557, "learning_rate": 0.002, "loss": 2.3372, "step": 253720 }, { "epoch": 0.9808492214439238, "grad_norm": 0.11315015703439713, "learning_rate": 0.002, "loss": 2.3312, "step": 253730 }, { "epoch": 0.9808878786473071, "grad_norm": 0.09995149821043015, "learning_rate": 0.002, "loss": 2.3337, "step": 253740 }, { "epoch": 0.9809265358506905, "grad_norm": 0.1109841987490654, "learning_rate": 0.002, "loss": 2.32, "step": 253750 }, { "epoch": 0.9809651930540737, "grad_norm": 0.0948142409324646, "learning_rate": 0.002, "loss": 2.3379, "step": 253760 }, { "epoch": 0.981003850257457, "grad_norm": 0.10762964934110641, "learning_rate": 0.002, "loss": 2.3261, "step": 253770 }, { "epoch": 0.9810425074608402, "grad_norm": 0.11686155945062637, "learning_rate": 0.002, "loss": 2.3317, "step": 253780 }, { "epoch": 0.9810811646642236, "grad_norm": 0.1214323565363884, "learning_rate": 0.002, "loss": 2.3456, "step": 253790 }, { "epoch": 0.9811198218676068, "grad_norm": 0.10316342860460281, "learning_rate": 0.002, "loss": 2.3417, "step": 253800 }, { "epoch": 0.9811584790709901, "grad_norm": 0.10486052185297012, "learning_rate": 0.002, "loss": 2.3297, "step": 253810 }, { "epoch": 0.9811971362743733, "grad_norm": 0.11336401849985123, "learning_rate": 0.002, "loss": 2.3419, "step": 253820 }, { "epoch": 0.9812357934777567, "grad_norm": 0.1080748662352562, "learning_rate": 0.002, "loss": 2.3381, "step": 253830 }, { "epoch": 0.9812744506811399, "grad_norm": 0.12142869830131531, "learning_rate": 0.002, "loss": 2.3481, "step": 253840 }, { "epoch": 0.9813131078845232, "grad_norm": 0.1060624048113823, "learning_rate": 0.002, "loss": 2.3299, "step": 253850 }, { "epoch": 0.9813517650879064, "grad_norm": 0.10174284875392914, "learning_rate": 0.002, "loss": 2.3423, "step": 253860 }, { "epoch": 0.9813904222912898, "grad_norm": 0.10029471665620804, "learning_rate": 0.002, "loss": 2.3327, "step": 253870 }, { "epoch": 0.9814290794946731, "grad_norm": 0.108481265604496, "learning_rate": 0.002, "loss": 2.3364, "step": 253880 }, { "epoch": 0.9814677366980563, "grad_norm": 0.0938580110669136, "learning_rate": 0.002, "loss": 2.3364, "step": 253890 }, { "epoch": 0.9815063939014396, "grad_norm": 0.10277163237333298, "learning_rate": 0.002, "loss": 2.3534, "step": 253900 }, { "epoch": 0.9815450511048228, "grad_norm": 0.12451539933681488, "learning_rate": 0.002, "loss": 2.3191, "step": 253910 }, { "epoch": 0.9815837083082062, "grad_norm": 0.10546960681676865, "learning_rate": 0.002, "loss": 2.3343, "step": 253920 }, { "epoch": 0.9816223655115894, "grad_norm": 0.08655869215726852, "learning_rate": 0.002, "loss": 2.3333, "step": 253930 }, { "epoch": 0.9816610227149727, "grad_norm": 0.12251607328653336, "learning_rate": 0.002, "loss": 2.3271, "step": 253940 }, { "epoch": 0.9816996799183559, "grad_norm": 0.10581085830926895, "learning_rate": 0.002, "loss": 2.34, "step": 253950 }, { "epoch": 0.9817383371217393, "grad_norm": 0.12245898693799973, "learning_rate": 0.002, "loss": 2.331, "step": 253960 }, { "epoch": 0.9817769943251226, "grad_norm": 0.11692578345537186, "learning_rate": 0.002, "loss": 2.3494, "step": 253970 }, { "epoch": 0.9818156515285058, "grad_norm": 0.10638014227151871, "learning_rate": 0.002, "loss": 2.326, "step": 253980 }, { "epoch": 0.9818543087318891, "grad_norm": 0.11459168046712875, "learning_rate": 0.002, "loss": 2.3373, "step": 253990 }, { "epoch": 0.9818929659352724, "grad_norm": 0.11234749853610992, "learning_rate": 0.002, "loss": 2.3415, "step": 254000 }, { "epoch": 0.9819316231386557, "grad_norm": 0.10456020385026932, "learning_rate": 0.002, "loss": 2.334, "step": 254010 }, { "epoch": 0.9819702803420389, "grad_norm": 0.10688741505146027, "learning_rate": 0.002, "loss": 2.3214, "step": 254020 }, { "epoch": 0.9820089375454222, "grad_norm": 0.12189941108226776, "learning_rate": 0.002, "loss": 2.3391, "step": 254030 }, { "epoch": 0.9820475947488055, "grad_norm": 0.09207422286272049, "learning_rate": 0.002, "loss": 2.344, "step": 254040 }, { "epoch": 0.9820862519521888, "grad_norm": 0.09040502458810806, "learning_rate": 0.002, "loss": 2.3404, "step": 254050 }, { "epoch": 0.982124909155572, "grad_norm": 0.08706256747245789, "learning_rate": 0.002, "loss": 2.3352, "step": 254060 }, { "epoch": 0.9821635663589553, "grad_norm": 0.10764108598232269, "learning_rate": 0.002, "loss": 2.337, "step": 254070 }, { "epoch": 0.9822022235623387, "grad_norm": 0.09894927591085434, "learning_rate": 0.002, "loss": 2.3335, "step": 254080 }, { "epoch": 0.9822408807657219, "grad_norm": 0.09293508529663086, "learning_rate": 0.002, "loss": 2.3438, "step": 254090 }, { "epoch": 0.9822795379691052, "grad_norm": 0.11155544966459274, "learning_rate": 0.002, "loss": 2.3295, "step": 254100 }, { "epoch": 0.9823181951724884, "grad_norm": 0.08967789262533188, "learning_rate": 0.002, "loss": 2.3318, "step": 254110 }, { "epoch": 0.9823568523758717, "grad_norm": 0.13756652176380157, "learning_rate": 0.002, "loss": 2.3306, "step": 254120 }, { "epoch": 0.982395509579255, "grad_norm": 0.10222943872213364, "learning_rate": 0.002, "loss": 2.3518, "step": 254130 }, { "epoch": 0.9824341667826383, "grad_norm": 0.10353002697229385, "learning_rate": 0.002, "loss": 2.3357, "step": 254140 }, { "epoch": 0.9824728239860215, "grad_norm": 0.10684805363416672, "learning_rate": 0.002, "loss": 2.3457, "step": 254150 }, { "epoch": 0.9825114811894048, "grad_norm": 0.12317736446857452, "learning_rate": 0.002, "loss": 2.323, "step": 254160 }, { "epoch": 0.9825501383927882, "grad_norm": 0.09487446397542953, "learning_rate": 0.002, "loss": 2.3383, "step": 254170 }, { "epoch": 0.9825887955961714, "grad_norm": 0.09926487505435944, "learning_rate": 0.002, "loss": 2.3225, "step": 254180 }, { "epoch": 0.9826274527995547, "grad_norm": 0.10189937800168991, "learning_rate": 0.002, "loss": 2.3517, "step": 254190 }, { "epoch": 0.9826661100029379, "grad_norm": 0.10605458915233612, "learning_rate": 0.002, "loss": 2.3424, "step": 254200 }, { "epoch": 0.9827047672063213, "grad_norm": 0.09959662705659866, "learning_rate": 0.002, "loss": 2.3346, "step": 254210 }, { "epoch": 0.9827434244097045, "grad_norm": 0.08425378799438477, "learning_rate": 0.002, "loss": 2.3326, "step": 254220 }, { "epoch": 0.9827820816130878, "grad_norm": 0.1034320816397667, "learning_rate": 0.002, "loss": 2.338, "step": 254230 }, { "epoch": 0.982820738816471, "grad_norm": 0.11652296036481857, "learning_rate": 0.002, "loss": 2.3383, "step": 254240 }, { "epoch": 0.9828593960198544, "grad_norm": 0.09380397945642471, "learning_rate": 0.002, "loss": 2.3302, "step": 254250 }, { "epoch": 0.9828980532232376, "grad_norm": 0.124928779900074, "learning_rate": 0.002, "loss": 2.332, "step": 254260 }, { "epoch": 0.9829367104266209, "grad_norm": 0.10061852633953094, "learning_rate": 0.002, "loss": 2.3352, "step": 254270 }, { "epoch": 0.9829753676300041, "grad_norm": 0.10316528379917145, "learning_rate": 0.002, "loss": 2.3438, "step": 254280 }, { "epoch": 0.9830140248333874, "grad_norm": 0.09837278723716736, "learning_rate": 0.002, "loss": 2.3335, "step": 254290 }, { "epoch": 0.9830526820367708, "grad_norm": 0.09891007095575333, "learning_rate": 0.002, "loss": 2.3442, "step": 254300 }, { "epoch": 0.983091339240154, "grad_norm": 0.10915490239858627, "learning_rate": 0.002, "loss": 2.3249, "step": 254310 }, { "epoch": 0.9831299964435373, "grad_norm": 0.10277639329433441, "learning_rate": 0.002, "loss": 2.3427, "step": 254320 }, { "epoch": 0.9831686536469205, "grad_norm": 0.10569757968187332, "learning_rate": 0.002, "loss": 2.334, "step": 254330 }, { "epoch": 0.9832073108503039, "grad_norm": 0.10817868262529373, "learning_rate": 0.002, "loss": 2.3216, "step": 254340 }, { "epoch": 0.9832459680536871, "grad_norm": 0.10384424030780792, "learning_rate": 0.002, "loss": 2.3405, "step": 254350 }, { "epoch": 0.9832846252570704, "grad_norm": 0.09562724083662033, "learning_rate": 0.002, "loss": 2.3416, "step": 254360 }, { "epoch": 0.9833232824604536, "grad_norm": 0.10068861395120621, "learning_rate": 0.002, "loss": 2.3381, "step": 254370 }, { "epoch": 0.983361939663837, "grad_norm": 0.10070518404245377, "learning_rate": 0.002, "loss": 2.3346, "step": 254380 }, { "epoch": 0.9834005968672203, "grad_norm": 0.11541334539651871, "learning_rate": 0.002, "loss": 2.327, "step": 254390 }, { "epoch": 0.9834392540706035, "grad_norm": 0.098110631108284, "learning_rate": 0.002, "loss": 2.3352, "step": 254400 }, { "epoch": 0.9834779112739868, "grad_norm": 0.1020338162779808, "learning_rate": 0.002, "loss": 2.3426, "step": 254410 }, { "epoch": 0.9835165684773701, "grad_norm": 0.11386211216449738, "learning_rate": 0.002, "loss": 2.329, "step": 254420 }, { "epoch": 0.9835552256807534, "grad_norm": 0.11197617650032043, "learning_rate": 0.002, "loss": 2.3397, "step": 254430 }, { "epoch": 0.9835938828841366, "grad_norm": 0.11133823543787003, "learning_rate": 0.002, "loss": 2.3374, "step": 254440 }, { "epoch": 0.9836325400875199, "grad_norm": 0.11409388482570648, "learning_rate": 0.002, "loss": 2.3593, "step": 254450 }, { "epoch": 0.9836711972909031, "grad_norm": 0.09850602596998215, "learning_rate": 0.002, "loss": 2.329, "step": 254460 }, { "epoch": 0.9837098544942865, "grad_norm": 0.11488984525203705, "learning_rate": 0.002, "loss": 2.331, "step": 254470 }, { "epoch": 0.9837485116976697, "grad_norm": 0.09836746007204056, "learning_rate": 0.002, "loss": 2.3314, "step": 254480 }, { "epoch": 0.983787168901053, "grad_norm": 0.11667005717754364, "learning_rate": 0.002, "loss": 2.3435, "step": 254490 }, { "epoch": 0.9838258261044363, "grad_norm": 0.11494197696447372, "learning_rate": 0.002, "loss": 2.3488, "step": 254500 }, { "epoch": 0.9838644833078196, "grad_norm": 0.09573443979024887, "learning_rate": 0.002, "loss": 2.3276, "step": 254510 }, { "epoch": 0.9839031405112029, "grad_norm": 0.09925644844770432, "learning_rate": 0.002, "loss": 2.3324, "step": 254520 }, { "epoch": 0.9839417977145861, "grad_norm": 0.12344303727149963, "learning_rate": 0.002, "loss": 2.3465, "step": 254530 }, { "epoch": 0.9839804549179694, "grad_norm": 0.13273784518241882, "learning_rate": 0.002, "loss": 2.3499, "step": 254540 }, { "epoch": 0.9840191121213527, "grad_norm": 0.09237249195575714, "learning_rate": 0.002, "loss": 2.3311, "step": 254550 }, { "epoch": 0.984057769324736, "grad_norm": 0.10044827312231064, "learning_rate": 0.002, "loss": 2.3489, "step": 254560 }, { "epoch": 0.9840964265281192, "grad_norm": 0.1014859676361084, "learning_rate": 0.002, "loss": 2.3426, "step": 254570 }, { "epoch": 0.9841350837315025, "grad_norm": 0.09613264352083206, "learning_rate": 0.002, "loss": 2.3183, "step": 254580 }, { "epoch": 0.9841737409348859, "grad_norm": 0.15587982535362244, "learning_rate": 0.002, "loss": 2.3314, "step": 254590 }, { "epoch": 0.9842123981382691, "grad_norm": 0.09434341639280319, "learning_rate": 0.002, "loss": 2.359, "step": 254600 }, { "epoch": 0.9842510553416524, "grad_norm": 0.09690546989440918, "learning_rate": 0.002, "loss": 2.3356, "step": 254610 }, { "epoch": 0.9842897125450356, "grad_norm": 0.2871216833591461, "learning_rate": 0.002, "loss": 2.3304, "step": 254620 }, { "epoch": 0.984328369748419, "grad_norm": 0.12504823505878448, "learning_rate": 0.002, "loss": 2.3479, "step": 254630 }, { "epoch": 0.9843670269518022, "grad_norm": 0.09755125641822815, "learning_rate": 0.002, "loss": 2.3325, "step": 254640 }, { "epoch": 0.9844056841551855, "grad_norm": 0.09232793748378754, "learning_rate": 0.002, "loss": 2.3297, "step": 254650 }, { "epoch": 0.9844443413585687, "grad_norm": 0.09949223697185516, "learning_rate": 0.002, "loss": 2.3367, "step": 254660 }, { "epoch": 0.984482998561952, "grad_norm": 0.13219931721687317, "learning_rate": 0.002, "loss": 2.3374, "step": 254670 }, { "epoch": 0.9845216557653353, "grad_norm": 0.09665770083665848, "learning_rate": 0.002, "loss": 2.3363, "step": 254680 }, { "epoch": 0.9845603129687186, "grad_norm": 0.11133351922035217, "learning_rate": 0.002, "loss": 2.3493, "step": 254690 }, { "epoch": 0.9845989701721019, "grad_norm": 0.10648233443498611, "learning_rate": 0.002, "loss": 2.3408, "step": 254700 }, { "epoch": 0.9846376273754851, "grad_norm": 0.11144927889108658, "learning_rate": 0.002, "loss": 2.3331, "step": 254710 }, { "epoch": 0.9846762845788685, "grad_norm": 0.09630986303091049, "learning_rate": 0.002, "loss": 2.348, "step": 254720 }, { "epoch": 0.9847149417822517, "grad_norm": 0.11907469481229782, "learning_rate": 0.002, "loss": 2.3449, "step": 254730 }, { "epoch": 0.984753598985635, "grad_norm": 0.30501991510391235, "learning_rate": 0.002, "loss": 2.3497, "step": 254740 }, { "epoch": 0.9847922561890182, "grad_norm": 0.1148744747042656, "learning_rate": 0.002, "loss": 2.3284, "step": 254750 }, { "epoch": 0.9848309133924016, "grad_norm": 0.10225886106491089, "learning_rate": 0.002, "loss": 2.3222, "step": 254760 }, { "epoch": 0.9848695705957848, "grad_norm": 0.09008200466632843, "learning_rate": 0.002, "loss": 2.3512, "step": 254770 }, { "epoch": 0.9849082277991681, "grad_norm": 0.10727483779191971, "learning_rate": 0.002, "loss": 2.3423, "step": 254780 }, { "epoch": 0.9849468850025513, "grad_norm": 0.11305704712867737, "learning_rate": 0.002, "loss": 2.3271, "step": 254790 }, { "epoch": 0.9849855422059347, "grad_norm": 0.11197128891944885, "learning_rate": 0.002, "loss": 2.3362, "step": 254800 }, { "epoch": 0.985024199409318, "grad_norm": 0.09113840758800507, "learning_rate": 0.002, "loss": 2.3403, "step": 254810 }, { "epoch": 0.9850628566127012, "grad_norm": 0.0970667377114296, "learning_rate": 0.002, "loss": 2.3325, "step": 254820 }, { "epoch": 0.9851015138160845, "grad_norm": 0.09900952130556107, "learning_rate": 0.002, "loss": 2.3328, "step": 254830 }, { "epoch": 0.9851401710194677, "grad_norm": 0.11687123775482178, "learning_rate": 0.002, "loss": 2.343, "step": 254840 }, { "epoch": 0.9851788282228511, "grad_norm": 0.09309032559394836, "learning_rate": 0.002, "loss": 2.3335, "step": 254850 }, { "epoch": 0.9852174854262343, "grad_norm": 0.09475398063659668, "learning_rate": 0.002, "loss": 2.3394, "step": 254860 }, { "epoch": 0.9852561426296176, "grad_norm": 0.09487903118133545, "learning_rate": 0.002, "loss": 2.3464, "step": 254870 }, { "epoch": 0.9852947998330008, "grad_norm": 0.11891059577465057, "learning_rate": 0.002, "loss": 2.3204, "step": 254880 }, { "epoch": 0.9853334570363842, "grad_norm": 0.10448039323091507, "learning_rate": 0.002, "loss": 2.327, "step": 254890 }, { "epoch": 0.9853721142397674, "grad_norm": 0.14385157823562622, "learning_rate": 0.002, "loss": 2.3253, "step": 254900 }, { "epoch": 0.9854107714431507, "grad_norm": 0.10002294182777405, "learning_rate": 0.002, "loss": 2.3331, "step": 254910 }, { "epoch": 0.985449428646534, "grad_norm": 0.11528504639863968, "learning_rate": 0.002, "loss": 2.3416, "step": 254920 }, { "epoch": 0.9854880858499173, "grad_norm": 0.10999160259962082, "learning_rate": 0.002, "loss": 2.3267, "step": 254930 }, { "epoch": 0.9855267430533006, "grad_norm": 0.10249609500169754, "learning_rate": 0.002, "loss": 2.3553, "step": 254940 }, { "epoch": 0.9855654002566838, "grad_norm": 0.10020183771848679, "learning_rate": 0.002, "loss": 2.3344, "step": 254950 }, { "epoch": 0.9856040574600671, "grad_norm": 0.10220799595117569, "learning_rate": 0.002, "loss": 2.328, "step": 254960 }, { "epoch": 0.9856427146634504, "grad_norm": 0.12004160135984421, "learning_rate": 0.002, "loss": 2.3357, "step": 254970 }, { "epoch": 0.9856813718668337, "grad_norm": 0.09195999801158905, "learning_rate": 0.002, "loss": 2.3421, "step": 254980 }, { "epoch": 0.9857200290702169, "grad_norm": 0.09734586626291275, "learning_rate": 0.002, "loss": 2.3291, "step": 254990 }, { "epoch": 0.9857586862736002, "grad_norm": 0.11250615119934082, "learning_rate": 0.002, "loss": 2.342, "step": 255000 }, { "epoch": 0.9857973434769836, "grad_norm": 0.10436253994703293, "learning_rate": 0.002, "loss": 2.3411, "step": 255010 }, { "epoch": 0.9858360006803668, "grad_norm": 0.1116916686296463, "learning_rate": 0.002, "loss": 2.3438, "step": 255020 }, { "epoch": 0.9858746578837501, "grad_norm": 0.10826195776462555, "learning_rate": 0.002, "loss": 2.3396, "step": 255030 }, { "epoch": 0.9859133150871333, "grad_norm": 0.11349769681692123, "learning_rate": 0.002, "loss": 2.3432, "step": 255040 }, { "epoch": 0.9859519722905166, "grad_norm": 0.10054189711809158, "learning_rate": 0.002, "loss": 2.3323, "step": 255050 }, { "epoch": 0.9859906294938999, "grad_norm": 0.12647701799869537, "learning_rate": 0.002, "loss": 2.3248, "step": 255060 }, { "epoch": 0.9860292866972832, "grad_norm": 0.10084088146686554, "learning_rate": 0.002, "loss": 2.3518, "step": 255070 }, { "epoch": 0.9860679439006664, "grad_norm": 0.09174734354019165, "learning_rate": 0.002, "loss": 2.3252, "step": 255080 }, { "epoch": 0.9861066011040497, "grad_norm": 0.11622127145528793, "learning_rate": 0.002, "loss": 2.3413, "step": 255090 }, { "epoch": 0.986145258307433, "grad_norm": 0.12576478719711304, "learning_rate": 0.002, "loss": 2.3328, "step": 255100 }, { "epoch": 0.9861839155108163, "grad_norm": 0.09602378308773041, "learning_rate": 0.002, "loss": 2.3467, "step": 255110 }, { "epoch": 0.9862225727141996, "grad_norm": 0.115997813642025, "learning_rate": 0.002, "loss": 2.3318, "step": 255120 }, { "epoch": 0.9862612299175828, "grad_norm": 0.11473080515861511, "learning_rate": 0.002, "loss": 2.3494, "step": 255130 }, { "epoch": 0.9862998871209662, "grad_norm": 0.09651309251785278, "learning_rate": 0.002, "loss": 2.3321, "step": 255140 }, { "epoch": 0.9863385443243494, "grad_norm": 0.13024041056632996, "learning_rate": 0.002, "loss": 2.3505, "step": 255150 }, { "epoch": 0.9863772015277327, "grad_norm": 0.10573989152908325, "learning_rate": 0.002, "loss": 2.3311, "step": 255160 }, { "epoch": 0.9864158587311159, "grad_norm": 0.08838653564453125, "learning_rate": 0.002, "loss": 2.3426, "step": 255170 }, { "epoch": 0.9864545159344993, "grad_norm": 0.10317710041999817, "learning_rate": 0.002, "loss": 2.3459, "step": 255180 }, { "epoch": 0.9864931731378825, "grad_norm": 0.09811824560165405, "learning_rate": 0.002, "loss": 2.3307, "step": 255190 }, { "epoch": 0.9865318303412658, "grad_norm": 0.0969930961728096, "learning_rate": 0.002, "loss": 2.3464, "step": 255200 }, { "epoch": 0.986570487544649, "grad_norm": 0.11122286319732666, "learning_rate": 0.002, "loss": 2.3412, "step": 255210 }, { "epoch": 0.9866091447480323, "grad_norm": 0.11096516996622086, "learning_rate": 0.002, "loss": 2.3305, "step": 255220 }, { "epoch": 0.9866478019514157, "grad_norm": 0.11507048457860947, "learning_rate": 0.002, "loss": 2.3329, "step": 255230 }, { "epoch": 0.9866864591547989, "grad_norm": 0.09455648064613342, "learning_rate": 0.002, "loss": 2.3314, "step": 255240 }, { "epoch": 0.9867251163581822, "grad_norm": 0.1153191328048706, "learning_rate": 0.002, "loss": 2.3315, "step": 255250 }, { "epoch": 0.9867637735615654, "grad_norm": 0.10693097859621048, "learning_rate": 0.002, "loss": 2.3108, "step": 255260 }, { "epoch": 0.9868024307649488, "grad_norm": 0.10604626685380936, "learning_rate": 0.002, "loss": 2.3453, "step": 255270 }, { "epoch": 0.986841087968332, "grad_norm": 0.11790741235017776, "learning_rate": 0.002, "loss": 2.3423, "step": 255280 }, { "epoch": 0.9868797451717153, "grad_norm": 0.10942420363426208, "learning_rate": 0.002, "loss": 2.3372, "step": 255290 }, { "epoch": 0.9869184023750985, "grad_norm": 0.12136127054691315, "learning_rate": 0.002, "loss": 2.3318, "step": 255300 }, { "epoch": 0.9869570595784819, "grad_norm": 0.09772010147571564, "learning_rate": 0.002, "loss": 2.3371, "step": 255310 }, { "epoch": 0.9869957167818652, "grad_norm": 0.10985453426837921, "learning_rate": 0.002, "loss": 2.3426, "step": 255320 }, { "epoch": 0.9870343739852484, "grad_norm": 0.11444909125566483, "learning_rate": 0.002, "loss": 2.3347, "step": 255330 }, { "epoch": 0.9870730311886317, "grad_norm": 0.1355230212211609, "learning_rate": 0.002, "loss": 2.3468, "step": 255340 }, { "epoch": 0.987111688392015, "grad_norm": 0.09789875894784927, "learning_rate": 0.002, "loss": 2.3302, "step": 255350 }, { "epoch": 0.9871503455953983, "grad_norm": 0.10760276764631271, "learning_rate": 0.002, "loss": 2.3497, "step": 255360 }, { "epoch": 0.9871890027987815, "grad_norm": 0.10690094530582428, "learning_rate": 0.002, "loss": 2.3344, "step": 255370 }, { "epoch": 0.9872276600021648, "grad_norm": 0.10276754200458527, "learning_rate": 0.002, "loss": 2.3289, "step": 255380 }, { "epoch": 0.987266317205548, "grad_norm": 0.12542317807674408, "learning_rate": 0.002, "loss": 2.3385, "step": 255390 }, { "epoch": 0.9873049744089314, "grad_norm": 0.11923978477716446, "learning_rate": 0.002, "loss": 2.3548, "step": 255400 }, { "epoch": 0.9873436316123146, "grad_norm": 0.11488944292068481, "learning_rate": 0.002, "loss": 2.3336, "step": 255410 }, { "epoch": 0.9873822888156979, "grad_norm": 0.09592791646718979, "learning_rate": 0.002, "loss": 2.3296, "step": 255420 }, { "epoch": 0.9874209460190811, "grad_norm": 0.09236177057027817, "learning_rate": 0.002, "loss": 2.325, "step": 255430 }, { "epoch": 0.9874596032224645, "grad_norm": 0.09595596790313721, "learning_rate": 0.002, "loss": 2.3461, "step": 255440 }, { "epoch": 0.9874982604258478, "grad_norm": 0.15593257546424866, "learning_rate": 0.002, "loss": 2.3358, "step": 255450 }, { "epoch": 0.987536917629231, "grad_norm": 0.11803868412971497, "learning_rate": 0.002, "loss": 2.3489, "step": 255460 }, { "epoch": 0.9875755748326143, "grad_norm": 0.1036602035164833, "learning_rate": 0.002, "loss": 2.3269, "step": 255470 }, { "epoch": 0.9876142320359976, "grad_norm": 0.1197872906923294, "learning_rate": 0.002, "loss": 2.3158, "step": 255480 }, { "epoch": 0.9876528892393809, "grad_norm": 0.1072472557425499, "learning_rate": 0.002, "loss": 2.3437, "step": 255490 }, { "epoch": 0.9876915464427641, "grad_norm": 0.10736659169197083, "learning_rate": 0.002, "loss": 2.3252, "step": 255500 }, { "epoch": 0.9877302036461474, "grad_norm": 0.09682945162057877, "learning_rate": 0.002, "loss": 2.3192, "step": 255510 }, { "epoch": 0.9877688608495307, "grad_norm": 0.11917953938245773, "learning_rate": 0.002, "loss": 2.3452, "step": 255520 }, { "epoch": 0.987807518052914, "grad_norm": 0.10262294858694077, "learning_rate": 0.002, "loss": 2.3449, "step": 255530 }, { "epoch": 0.9878461752562973, "grad_norm": 0.10043973475694656, "learning_rate": 0.002, "loss": 2.3378, "step": 255540 }, { "epoch": 0.9878848324596805, "grad_norm": 0.10268932580947876, "learning_rate": 0.002, "loss": 2.3386, "step": 255550 }, { "epoch": 0.9879234896630639, "grad_norm": 0.10502326488494873, "learning_rate": 0.002, "loss": 2.3567, "step": 255560 }, { "epoch": 0.9879621468664471, "grad_norm": 0.11357767134904861, "learning_rate": 0.002, "loss": 2.331, "step": 255570 }, { "epoch": 0.9880008040698304, "grad_norm": 0.11702119559049606, "learning_rate": 0.002, "loss": 2.3355, "step": 255580 }, { "epoch": 0.9880394612732136, "grad_norm": 0.11545008420944214, "learning_rate": 0.002, "loss": 2.3138, "step": 255590 }, { "epoch": 0.9880781184765969, "grad_norm": 0.1151541993021965, "learning_rate": 0.002, "loss": 2.3384, "step": 255600 }, { "epoch": 0.9881167756799802, "grad_norm": 0.09862946718931198, "learning_rate": 0.002, "loss": 2.3577, "step": 255610 }, { "epoch": 0.9881554328833635, "grad_norm": 0.10757492482662201, "learning_rate": 0.002, "loss": 2.3322, "step": 255620 }, { "epoch": 0.9881940900867467, "grad_norm": 0.09744302183389664, "learning_rate": 0.002, "loss": 2.3455, "step": 255630 }, { "epoch": 0.98823274729013, "grad_norm": 0.11994163691997528, "learning_rate": 0.002, "loss": 2.3214, "step": 255640 }, { "epoch": 0.9882714044935134, "grad_norm": 0.09365137666463852, "learning_rate": 0.002, "loss": 2.3367, "step": 255650 }, { "epoch": 0.9883100616968966, "grad_norm": 0.09933947026729584, "learning_rate": 0.002, "loss": 2.3297, "step": 255660 }, { "epoch": 0.9883487189002799, "grad_norm": 0.11144649237394333, "learning_rate": 0.002, "loss": 2.3363, "step": 255670 }, { "epoch": 0.9883873761036631, "grad_norm": 0.11128731071949005, "learning_rate": 0.002, "loss": 2.3358, "step": 255680 }, { "epoch": 0.9884260333070465, "grad_norm": 0.09201089292764664, "learning_rate": 0.002, "loss": 2.3336, "step": 255690 }, { "epoch": 0.9884646905104297, "grad_norm": 0.11140462756156921, "learning_rate": 0.002, "loss": 2.3364, "step": 255700 }, { "epoch": 0.988503347713813, "grad_norm": 0.09715986996889114, "learning_rate": 0.002, "loss": 2.3273, "step": 255710 }, { "epoch": 0.9885420049171962, "grad_norm": 0.10743650048971176, "learning_rate": 0.002, "loss": 2.3417, "step": 255720 }, { "epoch": 0.9885806621205796, "grad_norm": 0.09190218895673752, "learning_rate": 0.002, "loss": 2.3136, "step": 255730 }, { "epoch": 0.9886193193239629, "grad_norm": 0.10867530852556229, "learning_rate": 0.002, "loss": 2.3507, "step": 255740 }, { "epoch": 0.9886579765273461, "grad_norm": 0.2617291510105133, "learning_rate": 0.002, "loss": 2.3352, "step": 255750 }, { "epoch": 0.9886966337307294, "grad_norm": 0.11458270251750946, "learning_rate": 0.002, "loss": 2.3431, "step": 255760 }, { "epoch": 0.9887352909341126, "grad_norm": 0.254932165145874, "learning_rate": 0.002, "loss": 2.4027, "step": 255770 }, { "epoch": 0.988773948137496, "grad_norm": 0.10507343709468842, "learning_rate": 0.002, "loss": 2.3572, "step": 255780 }, { "epoch": 0.9888126053408792, "grad_norm": 0.09200027585029602, "learning_rate": 0.002, "loss": 2.3393, "step": 255790 }, { "epoch": 0.9888512625442625, "grad_norm": 0.0957023873925209, "learning_rate": 0.002, "loss": 2.3371, "step": 255800 }, { "epoch": 0.9888899197476457, "grad_norm": 0.09974458813667297, "learning_rate": 0.002, "loss": 2.3386, "step": 255810 }, { "epoch": 0.9889285769510291, "grad_norm": 0.09365065395832062, "learning_rate": 0.002, "loss": 2.3267, "step": 255820 }, { "epoch": 0.9889672341544123, "grad_norm": 0.13055384159088135, "learning_rate": 0.002, "loss": 2.3293, "step": 255830 }, { "epoch": 0.9890058913577956, "grad_norm": 0.09168808907270432, "learning_rate": 0.002, "loss": 2.3295, "step": 255840 }, { "epoch": 0.9890445485611788, "grad_norm": 0.10091964900493622, "learning_rate": 0.002, "loss": 2.3479, "step": 255850 }, { "epoch": 0.9890832057645622, "grad_norm": 0.10866480320692062, "learning_rate": 0.002, "loss": 2.3467, "step": 255860 }, { "epoch": 0.9891218629679455, "grad_norm": 0.10408901423215866, "learning_rate": 0.002, "loss": 2.3363, "step": 255870 }, { "epoch": 0.9891605201713287, "grad_norm": 0.10550463944673538, "learning_rate": 0.002, "loss": 2.3345, "step": 255880 }, { "epoch": 0.989199177374712, "grad_norm": 0.09939587116241455, "learning_rate": 0.002, "loss": 2.3365, "step": 255890 }, { "epoch": 0.9892378345780953, "grad_norm": 0.11436154693365097, "learning_rate": 0.002, "loss": 2.3315, "step": 255900 }, { "epoch": 0.9892764917814786, "grad_norm": 0.10249326378107071, "learning_rate": 0.002, "loss": 2.3262, "step": 255910 }, { "epoch": 0.9893151489848618, "grad_norm": 0.15078838169574738, "learning_rate": 0.002, "loss": 2.3358, "step": 255920 }, { "epoch": 0.9893538061882451, "grad_norm": 0.10776419937610626, "learning_rate": 0.002, "loss": 2.3514, "step": 255930 }, { "epoch": 0.9893924633916283, "grad_norm": 0.10219588130712509, "learning_rate": 0.002, "loss": 2.3418, "step": 255940 }, { "epoch": 0.9894311205950117, "grad_norm": 0.10183137655258179, "learning_rate": 0.002, "loss": 2.3238, "step": 255950 }, { "epoch": 0.989469777798395, "grad_norm": 0.10559652000665665, "learning_rate": 0.002, "loss": 2.3246, "step": 255960 }, { "epoch": 0.9895084350017782, "grad_norm": 0.1247977614402771, "learning_rate": 0.002, "loss": 2.349, "step": 255970 }, { "epoch": 0.9895470922051615, "grad_norm": 0.10177358239889145, "learning_rate": 0.002, "loss": 2.3438, "step": 255980 }, { "epoch": 0.9895857494085448, "grad_norm": 0.11221577972173691, "learning_rate": 0.002, "loss": 2.3246, "step": 255990 }, { "epoch": 0.9896244066119281, "grad_norm": 0.11370981484651566, "learning_rate": 0.002, "loss": 2.3443, "step": 256000 }, { "epoch": 0.9896630638153113, "grad_norm": 0.11815855652093887, "learning_rate": 0.002, "loss": 2.3169, "step": 256010 }, { "epoch": 0.9897017210186946, "grad_norm": 0.09536910802125931, "learning_rate": 0.002, "loss": 2.339, "step": 256020 }, { "epoch": 0.9897403782220779, "grad_norm": 0.10650273412466049, "learning_rate": 0.002, "loss": 2.3299, "step": 256030 }, { "epoch": 0.9897790354254612, "grad_norm": 0.11287032067775726, "learning_rate": 0.002, "loss": 2.3299, "step": 256040 }, { "epoch": 0.9898176926288444, "grad_norm": 0.09238874912261963, "learning_rate": 0.002, "loss": 2.3174, "step": 256050 }, { "epoch": 0.9898563498322277, "grad_norm": 0.12202656269073486, "learning_rate": 0.002, "loss": 2.3304, "step": 256060 }, { "epoch": 0.9898950070356111, "grad_norm": 0.10122062265872955, "learning_rate": 0.002, "loss": 2.3349, "step": 256070 }, { "epoch": 0.9899336642389943, "grad_norm": 0.09519728273153305, "learning_rate": 0.002, "loss": 2.3445, "step": 256080 }, { "epoch": 0.9899723214423776, "grad_norm": 0.10702326148748398, "learning_rate": 0.002, "loss": 2.3419, "step": 256090 }, { "epoch": 0.9900109786457608, "grad_norm": 0.09455457329750061, "learning_rate": 0.002, "loss": 2.3391, "step": 256100 }, { "epoch": 0.9900496358491442, "grad_norm": 0.1183152049779892, "learning_rate": 0.002, "loss": 2.3545, "step": 256110 }, { "epoch": 0.9900882930525274, "grad_norm": 0.11568870395421982, "learning_rate": 0.002, "loss": 2.3383, "step": 256120 }, { "epoch": 0.9901269502559107, "grad_norm": 0.10681305825710297, "learning_rate": 0.002, "loss": 2.3411, "step": 256130 }, { "epoch": 0.9901656074592939, "grad_norm": 0.10074838250875473, "learning_rate": 0.002, "loss": 2.3414, "step": 256140 }, { "epoch": 0.9902042646626772, "grad_norm": 0.0929526686668396, "learning_rate": 0.002, "loss": 2.3388, "step": 256150 }, { "epoch": 0.9902429218660606, "grad_norm": 0.12454530596733093, "learning_rate": 0.002, "loss": 2.3439, "step": 256160 }, { "epoch": 0.9902815790694438, "grad_norm": 0.11118060350418091, "learning_rate": 0.002, "loss": 2.3545, "step": 256170 }, { "epoch": 0.9903202362728271, "grad_norm": 0.10692273825407028, "learning_rate": 0.002, "loss": 2.3504, "step": 256180 }, { "epoch": 0.9903588934762103, "grad_norm": 0.09192261099815369, "learning_rate": 0.002, "loss": 2.3316, "step": 256190 }, { "epoch": 0.9903975506795937, "grad_norm": 0.10425573587417603, "learning_rate": 0.002, "loss": 2.3366, "step": 256200 }, { "epoch": 0.9904362078829769, "grad_norm": 0.09590397030115128, "learning_rate": 0.002, "loss": 2.3256, "step": 256210 }, { "epoch": 0.9904748650863602, "grad_norm": 0.10007577389478683, "learning_rate": 0.002, "loss": 2.3267, "step": 256220 }, { "epoch": 0.9905135222897434, "grad_norm": 0.17764490842819214, "learning_rate": 0.002, "loss": 2.3362, "step": 256230 }, { "epoch": 0.9905521794931268, "grad_norm": 0.10875571519136429, "learning_rate": 0.002, "loss": 2.3301, "step": 256240 }, { "epoch": 0.99059083669651, "grad_norm": 0.10980333387851715, "learning_rate": 0.002, "loss": 2.3419, "step": 256250 }, { "epoch": 0.9906294938998933, "grad_norm": 0.09446674585342407, "learning_rate": 0.002, "loss": 2.3329, "step": 256260 }, { "epoch": 0.9906681511032766, "grad_norm": 0.1014462485909462, "learning_rate": 0.002, "loss": 2.3356, "step": 256270 }, { "epoch": 0.9907068083066599, "grad_norm": 0.10417579859495163, "learning_rate": 0.002, "loss": 2.3398, "step": 256280 }, { "epoch": 0.9907454655100432, "grad_norm": 0.22922813892364502, "learning_rate": 0.002, "loss": 2.3533, "step": 256290 }, { "epoch": 0.9907841227134264, "grad_norm": 0.10854848474264145, "learning_rate": 0.002, "loss": 2.3446, "step": 256300 }, { "epoch": 0.9908227799168097, "grad_norm": 0.11280990391969681, "learning_rate": 0.002, "loss": 2.3407, "step": 256310 }, { "epoch": 0.9908614371201929, "grad_norm": 0.11322092264890671, "learning_rate": 0.002, "loss": 2.347, "step": 256320 }, { "epoch": 0.9909000943235763, "grad_norm": 0.10999466478824615, "learning_rate": 0.002, "loss": 2.3517, "step": 256330 }, { "epoch": 0.9909387515269595, "grad_norm": 0.10721048712730408, "learning_rate": 0.002, "loss": 2.3211, "step": 256340 }, { "epoch": 0.9909774087303428, "grad_norm": 0.09981708973646164, "learning_rate": 0.002, "loss": 2.3473, "step": 256350 }, { "epoch": 0.991016065933726, "grad_norm": 0.0954086109995842, "learning_rate": 0.002, "loss": 2.3342, "step": 256360 }, { "epoch": 0.9910547231371094, "grad_norm": 0.4622000753879547, "learning_rate": 0.002, "loss": 2.3409, "step": 256370 }, { "epoch": 0.9910933803404927, "grad_norm": 0.1136743351817131, "learning_rate": 0.002, "loss": 2.3235, "step": 256380 }, { "epoch": 0.9911320375438759, "grad_norm": 0.14929740130901337, "learning_rate": 0.002, "loss": 2.3371, "step": 256390 }, { "epoch": 0.9911706947472592, "grad_norm": 0.10695463418960571, "learning_rate": 0.002, "loss": 2.3334, "step": 256400 }, { "epoch": 0.9912093519506425, "grad_norm": 0.10876549780368805, "learning_rate": 0.002, "loss": 2.3337, "step": 256410 }, { "epoch": 0.9912480091540258, "grad_norm": 0.2180985063314438, "learning_rate": 0.002, "loss": 2.3587, "step": 256420 }, { "epoch": 0.991286666357409, "grad_norm": 0.10567354410886765, "learning_rate": 0.002, "loss": 2.3456, "step": 256430 }, { "epoch": 0.9913253235607923, "grad_norm": 0.11088450998067856, "learning_rate": 0.002, "loss": 2.3375, "step": 256440 }, { "epoch": 0.9913639807641756, "grad_norm": 0.10130184143781662, "learning_rate": 0.002, "loss": 2.3444, "step": 256450 }, { "epoch": 0.9914026379675589, "grad_norm": 0.12747453153133392, "learning_rate": 0.002, "loss": 2.3363, "step": 256460 }, { "epoch": 0.9914412951709421, "grad_norm": 0.09747633337974548, "learning_rate": 0.002, "loss": 2.3468, "step": 256470 }, { "epoch": 0.9914799523743254, "grad_norm": 0.09843626618385315, "learning_rate": 0.002, "loss": 2.3364, "step": 256480 }, { "epoch": 0.9915186095777088, "grad_norm": 0.08928278088569641, "learning_rate": 0.002, "loss": 2.3161, "step": 256490 }, { "epoch": 0.991557266781092, "grad_norm": 0.10301223397254944, "learning_rate": 0.002, "loss": 2.3419, "step": 256500 }, { "epoch": 0.9915959239844753, "grad_norm": 0.12475394457578659, "learning_rate": 0.002, "loss": 2.3279, "step": 256510 }, { "epoch": 0.9916345811878585, "grad_norm": 0.09833080321550369, "learning_rate": 0.002, "loss": 2.344, "step": 256520 }, { "epoch": 0.9916732383912418, "grad_norm": 0.10509419441223145, "learning_rate": 0.002, "loss": 2.3201, "step": 256530 }, { "epoch": 0.9917118955946251, "grad_norm": 0.10112323611974716, "learning_rate": 0.002, "loss": 2.3336, "step": 256540 }, { "epoch": 0.9917505527980084, "grad_norm": 0.10240601748228073, "learning_rate": 0.002, "loss": 2.3389, "step": 256550 }, { "epoch": 0.9917892100013916, "grad_norm": 0.09574146568775177, "learning_rate": 0.002, "loss": 2.342, "step": 256560 }, { "epoch": 0.9918278672047749, "grad_norm": 0.10088356584310532, "learning_rate": 0.002, "loss": 2.3466, "step": 256570 }, { "epoch": 0.9918665244081583, "grad_norm": 0.1093062236905098, "learning_rate": 0.002, "loss": 2.3242, "step": 256580 }, { "epoch": 0.9919051816115415, "grad_norm": 0.09802067279815674, "learning_rate": 0.002, "loss": 2.3488, "step": 256590 }, { "epoch": 0.9919438388149248, "grad_norm": 0.10470987111330032, "learning_rate": 0.002, "loss": 2.3333, "step": 256600 }, { "epoch": 0.991982496018308, "grad_norm": 0.1036287471652031, "learning_rate": 0.002, "loss": 2.3434, "step": 256610 }, { "epoch": 0.9920211532216914, "grad_norm": 0.11956336349248886, "learning_rate": 0.002, "loss": 2.3494, "step": 256620 }, { "epoch": 0.9920598104250746, "grad_norm": 0.08456283807754517, "learning_rate": 0.002, "loss": 2.3422, "step": 256630 }, { "epoch": 0.9920984676284579, "grad_norm": 0.0958782359957695, "learning_rate": 0.002, "loss": 2.3427, "step": 256640 }, { "epoch": 0.9921371248318411, "grad_norm": 0.09258156269788742, "learning_rate": 0.002, "loss": 2.3427, "step": 256650 }, { "epoch": 0.9921757820352245, "grad_norm": 0.09780330955982208, "learning_rate": 0.002, "loss": 2.34, "step": 256660 }, { "epoch": 0.9922144392386077, "grad_norm": 0.17852285504341125, "learning_rate": 0.002, "loss": 2.3438, "step": 256670 }, { "epoch": 0.992253096441991, "grad_norm": 0.0965234786272049, "learning_rate": 0.002, "loss": 2.3347, "step": 256680 }, { "epoch": 0.9922917536453743, "grad_norm": 0.0944899246096611, "learning_rate": 0.002, "loss": 2.3473, "step": 256690 }, { "epoch": 0.9923304108487575, "grad_norm": 0.16657981276512146, "learning_rate": 0.002, "loss": 2.3359, "step": 256700 }, { "epoch": 0.9923690680521409, "grad_norm": 0.09872959554195404, "learning_rate": 0.002, "loss": 2.3305, "step": 256710 }, { "epoch": 0.9924077252555241, "grad_norm": 0.12205319851636887, "learning_rate": 0.002, "loss": 2.3368, "step": 256720 }, { "epoch": 0.9924463824589074, "grad_norm": 0.12135138362646103, "learning_rate": 0.002, "loss": 2.3497, "step": 256730 }, { "epoch": 0.9924850396622906, "grad_norm": 0.10703414678573608, "learning_rate": 0.002, "loss": 2.3292, "step": 256740 }, { "epoch": 0.992523696865674, "grad_norm": 0.12538368999958038, "learning_rate": 0.002, "loss": 2.3452, "step": 256750 }, { "epoch": 0.9925623540690572, "grad_norm": 0.10660772770643234, "learning_rate": 0.002, "loss": 2.3432, "step": 256760 }, { "epoch": 0.9926010112724405, "grad_norm": 0.13139641284942627, "learning_rate": 0.002, "loss": 2.3394, "step": 256770 }, { "epoch": 0.9926396684758237, "grad_norm": 0.12074771523475647, "learning_rate": 0.002, "loss": 2.3392, "step": 256780 }, { "epoch": 0.9926783256792071, "grad_norm": 0.10254442691802979, "learning_rate": 0.002, "loss": 2.3334, "step": 256790 }, { "epoch": 0.9927169828825904, "grad_norm": 0.1065768152475357, "learning_rate": 0.002, "loss": 2.3516, "step": 256800 }, { "epoch": 0.9927556400859736, "grad_norm": 0.09868630766868591, "learning_rate": 0.002, "loss": 2.3372, "step": 256810 }, { "epoch": 0.9927942972893569, "grad_norm": 0.10020441561937332, "learning_rate": 0.002, "loss": 2.3417, "step": 256820 }, { "epoch": 0.9928329544927402, "grad_norm": 0.10684265196323395, "learning_rate": 0.002, "loss": 2.3406, "step": 256830 }, { "epoch": 0.9928716116961235, "grad_norm": 0.11520854383707047, "learning_rate": 0.002, "loss": 2.3446, "step": 256840 }, { "epoch": 0.9929102688995067, "grad_norm": 0.11689134687185287, "learning_rate": 0.002, "loss": 2.3365, "step": 256850 }, { "epoch": 0.99294892610289, "grad_norm": 0.1267421841621399, "learning_rate": 0.002, "loss": 2.3496, "step": 256860 }, { "epoch": 0.9929875833062732, "grad_norm": 0.11704300343990326, "learning_rate": 0.002, "loss": 2.3378, "step": 256870 }, { "epoch": 0.9930262405096566, "grad_norm": 0.10719385743141174, "learning_rate": 0.002, "loss": 2.3549, "step": 256880 }, { "epoch": 0.9930648977130399, "grad_norm": 0.13121972978115082, "learning_rate": 0.002, "loss": 2.3328, "step": 256890 }, { "epoch": 0.9931035549164231, "grad_norm": 0.1221286877989769, "learning_rate": 0.002, "loss": 2.3455, "step": 256900 }, { "epoch": 0.9931422121198064, "grad_norm": 0.11407453566789627, "learning_rate": 0.002, "loss": 2.3312, "step": 256910 }, { "epoch": 0.9931808693231897, "grad_norm": 0.10373591631650925, "learning_rate": 0.002, "loss": 2.3589, "step": 256920 }, { "epoch": 0.993219526526573, "grad_norm": 0.10496452450752258, "learning_rate": 0.002, "loss": 2.3321, "step": 256930 }, { "epoch": 0.9932581837299562, "grad_norm": 0.10863498598337173, "learning_rate": 0.002, "loss": 2.3416, "step": 256940 }, { "epoch": 0.9932968409333395, "grad_norm": 0.10201474279165268, "learning_rate": 0.002, "loss": 2.3413, "step": 256950 }, { "epoch": 0.9933354981367228, "grad_norm": 0.09551186114549637, "learning_rate": 0.002, "loss": 2.3413, "step": 256960 }, { "epoch": 0.9933741553401061, "grad_norm": 0.14234507083892822, "learning_rate": 0.002, "loss": 2.3292, "step": 256970 }, { "epoch": 0.9934128125434893, "grad_norm": 0.15089447796344757, "learning_rate": 0.002, "loss": 2.3498, "step": 256980 }, { "epoch": 0.9934514697468726, "grad_norm": 0.11496597528457642, "learning_rate": 0.002, "loss": 2.3364, "step": 256990 }, { "epoch": 0.993490126950256, "grad_norm": 0.10370314121246338, "learning_rate": 0.002, "loss": 2.3332, "step": 257000 }, { "epoch": 0.9935287841536392, "grad_norm": 0.13056083023548126, "learning_rate": 0.002, "loss": 2.334, "step": 257010 }, { "epoch": 0.9935674413570225, "grad_norm": 0.10173903405666351, "learning_rate": 0.002, "loss": 2.3392, "step": 257020 }, { "epoch": 0.9936060985604057, "grad_norm": 0.1256549209356308, "learning_rate": 0.002, "loss": 2.3264, "step": 257030 }, { "epoch": 0.9936447557637891, "grad_norm": 0.10343006998300552, "learning_rate": 0.002, "loss": 2.3448, "step": 257040 }, { "epoch": 0.9936834129671723, "grad_norm": 0.1029481515288353, "learning_rate": 0.002, "loss": 2.327, "step": 257050 }, { "epoch": 0.9937220701705556, "grad_norm": 0.10993833094835281, "learning_rate": 0.002, "loss": 2.3526, "step": 257060 }, { "epoch": 0.9937607273739388, "grad_norm": 0.11532630026340485, "learning_rate": 0.002, "loss": 2.3407, "step": 257070 }, { "epoch": 0.9937993845773221, "grad_norm": 0.10593671351671219, "learning_rate": 0.002, "loss": 2.3299, "step": 257080 }, { "epoch": 0.9938380417807055, "grad_norm": 0.0915307104587555, "learning_rate": 0.002, "loss": 2.3417, "step": 257090 }, { "epoch": 0.9938766989840887, "grad_norm": 0.22633057832717896, "learning_rate": 0.002, "loss": 2.3209, "step": 257100 }, { "epoch": 0.993915356187472, "grad_norm": 0.16773845255374908, "learning_rate": 0.002, "loss": 2.3272, "step": 257110 }, { "epoch": 0.9939540133908552, "grad_norm": 0.116732157766819, "learning_rate": 0.002, "loss": 2.3293, "step": 257120 }, { "epoch": 0.9939926705942386, "grad_norm": 0.11831034719944, "learning_rate": 0.002, "loss": 2.3085, "step": 257130 }, { "epoch": 0.9940313277976218, "grad_norm": 0.09759490191936493, "learning_rate": 0.002, "loss": 2.3315, "step": 257140 }, { "epoch": 0.9940699850010051, "grad_norm": 0.11319709569215775, "learning_rate": 0.002, "loss": 2.3494, "step": 257150 }, { "epoch": 0.9941086422043883, "grad_norm": 0.10495991259813309, "learning_rate": 0.002, "loss": 2.3188, "step": 257160 }, { "epoch": 0.9941472994077717, "grad_norm": 0.10347853600978851, "learning_rate": 0.002, "loss": 2.3446, "step": 257170 }, { "epoch": 0.9941859566111549, "grad_norm": 0.11005652695894241, "learning_rate": 0.002, "loss": 2.3321, "step": 257180 }, { "epoch": 0.9942246138145382, "grad_norm": 0.1012812927365303, "learning_rate": 0.002, "loss": 2.3269, "step": 257190 }, { "epoch": 0.9942632710179214, "grad_norm": 0.12478403002023697, "learning_rate": 0.002, "loss": 2.3486, "step": 257200 }, { "epoch": 0.9943019282213048, "grad_norm": 0.09518378973007202, "learning_rate": 0.002, "loss": 2.333, "step": 257210 }, { "epoch": 0.9943405854246881, "grad_norm": 0.1230977401137352, "learning_rate": 0.002, "loss": 2.3351, "step": 257220 }, { "epoch": 0.9943792426280713, "grad_norm": 0.10036604851484299, "learning_rate": 0.002, "loss": 2.3267, "step": 257230 }, { "epoch": 0.9944178998314546, "grad_norm": 0.13655316829681396, "learning_rate": 0.002, "loss": 2.3503, "step": 257240 }, { "epoch": 0.9944565570348378, "grad_norm": 0.10106083005666733, "learning_rate": 0.002, "loss": 2.3465, "step": 257250 }, { "epoch": 0.9944952142382212, "grad_norm": 0.28169330954551697, "learning_rate": 0.002, "loss": 2.3467, "step": 257260 }, { "epoch": 0.9945338714416044, "grad_norm": 0.1256679892539978, "learning_rate": 0.002, "loss": 2.3311, "step": 257270 }, { "epoch": 0.9945725286449877, "grad_norm": 0.10080401599407196, "learning_rate": 0.002, "loss": 2.3322, "step": 257280 }, { "epoch": 0.9946111858483709, "grad_norm": 0.09396111220121384, "learning_rate": 0.002, "loss": 2.3368, "step": 257290 }, { "epoch": 0.9946498430517543, "grad_norm": 0.1262788027524948, "learning_rate": 0.002, "loss": 2.324, "step": 257300 }, { "epoch": 0.9946885002551376, "grad_norm": 0.12998312711715698, "learning_rate": 0.002, "loss": 2.3416, "step": 257310 }, { "epoch": 0.9947271574585208, "grad_norm": 0.10756916552782059, "learning_rate": 0.002, "loss": 2.3429, "step": 257320 }, { "epoch": 0.9947658146619041, "grad_norm": 0.10928453505039215, "learning_rate": 0.002, "loss": 2.3314, "step": 257330 }, { "epoch": 0.9948044718652874, "grad_norm": 0.1297762244939804, "learning_rate": 0.002, "loss": 2.3366, "step": 257340 }, { "epoch": 0.9948431290686707, "grad_norm": 0.11227850615978241, "learning_rate": 0.002, "loss": 2.3361, "step": 257350 }, { "epoch": 0.9948817862720539, "grad_norm": 0.10288013517856598, "learning_rate": 0.002, "loss": 2.3289, "step": 257360 }, { "epoch": 0.9949204434754372, "grad_norm": 0.08381501585245132, "learning_rate": 0.002, "loss": 2.3657, "step": 257370 }, { "epoch": 0.9949591006788205, "grad_norm": 0.13382337987422943, "learning_rate": 0.002, "loss": 2.3324, "step": 257380 }, { "epoch": 0.9949977578822038, "grad_norm": 0.09629426896572113, "learning_rate": 0.002, "loss": 2.3392, "step": 257390 }, { "epoch": 0.995036415085587, "grad_norm": 0.10021605342626572, "learning_rate": 0.002, "loss": 2.3359, "step": 257400 }, { "epoch": 0.9950750722889703, "grad_norm": 0.10886742919683456, "learning_rate": 0.002, "loss": 2.3231, "step": 257410 }, { "epoch": 0.9951137294923537, "grad_norm": 0.42458826303482056, "learning_rate": 0.002, "loss": 2.3399, "step": 257420 }, { "epoch": 0.9951523866957369, "grad_norm": 0.11212582141160965, "learning_rate": 0.002, "loss": 2.3442, "step": 257430 }, { "epoch": 0.9951910438991202, "grad_norm": 0.09543899446725845, "learning_rate": 0.002, "loss": 2.3373, "step": 257440 }, { "epoch": 0.9952297011025034, "grad_norm": 0.1293252855539322, "learning_rate": 0.002, "loss": 2.3306, "step": 257450 }, { "epoch": 0.9952683583058867, "grad_norm": 0.11018099635839462, "learning_rate": 0.002, "loss": 2.3338, "step": 257460 }, { "epoch": 0.99530701550927, "grad_norm": 0.09845305234193802, "learning_rate": 0.002, "loss": 2.3338, "step": 257470 }, { "epoch": 0.9953456727126533, "grad_norm": 0.09646732360124588, "learning_rate": 0.002, "loss": 2.3416, "step": 257480 }, { "epoch": 0.9953843299160365, "grad_norm": 0.11497314274311066, "learning_rate": 0.002, "loss": 2.3377, "step": 257490 }, { "epoch": 0.9954229871194198, "grad_norm": 0.11051710695028305, "learning_rate": 0.002, "loss": 2.3164, "step": 257500 }, { "epoch": 0.9954616443228032, "grad_norm": 0.12046661227941513, "learning_rate": 0.002, "loss": 2.3361, "step": 257510 }, { "epoch": 0.9955003015261864, "grad_norm": 0.10718783736228943, "learning_rate": 0.002, "loss": 2.3411, "step": 257520 }, { "epoch": 0.9955389587295697, "grad_norm": 0.18187186121940613, "learning_rate": 0.002, "loss": 2.3361, "step": 257530 }, { "epoch": 0.9955776159329529, "grad_norm": 0.11338195949792862, "learning_rate": 0.002, "loss": 2.3363, "step": 257540 }, { "epoch": 0.9956162731363363, "grad_norm": 0.10064757615327835, "learning_rate": 0.002, "loss": 2.3405, "step": 257550 }, { "epoch": 0.9956549303397195, "grad_norm": 0.11266288161277771, "learning_rate": 0.002, "loss": 2.3493, "step": 257560 }, { "epoch": 0.9956935875431028, "grad_norm": 0.12426048517227173, "learning_rate": 0.002, "loss": 2.3302, "step": 257570 }, { "epoch": 0.995732244746486, "grad_norm": 0.10261692106723785, "learning_rate": 0.002, "loss": 2.3324, "step": 257580 }, { "epoch": 0.9957709019498694, "grad_norm": 0.12305546551942825, "learning_rate": 0.002, "loss": 2.3422, "step": 257590 }, { "epoch": 0.9958095591532526, "grad_norm": 0.1324758678674698, "learning_rate": 0.002, "loss": 2.3431, "step": 257600 }, { "epoch": 0.9958482163566359, "grad_norm": 0.13306169211864471, "learning_rate": 0.002, "loss": 2.3339, "step": 257610 }, { "epoch": 0.9958868735600191, "grad_norm": 0.09178532660007477, "learning_rate": 0.002, "loss": 2.3299, "step": 257620 }, { "epoch": 0.9959255307634024, "grad_norm": 0.13694193959236145, "learning_rate": 0.002, "loss": 2.3375, "step": 257630 }, { "epoch": 0.9959641879667858, "grad_norm": 0.09993527829647064, "learning_rate": 0.002, "loss": 2.3369, "step": 257640 }, { "epoch": 0.996002845170169, "grad_norm": 0.1072765439748764, "learning_rate": 0.002, "loss": 2.3528, "step": 257650 }, { "epoch": 0.9960415023735523, "grad_norm": 0.1019691601395607, "learning_rate": 0.002, "loss": 2.3324, "step": 257660 }, { "epoch": 0.9960801595769355, "grad_norm": 0.129447802901268, "learning_rate": 0.002, "loss": 2.3426, "step": 257670 }, { "epoch": 0.9961188167803189, "grad_norm": 0.0991927981376648, "learning_rate": 0.002, "loss": 2.3393, "step": 257680 }, { "epoch": 0.9961574739837021, "grad_norm": 0.10918325185775757, "learning_rate": 0.002, "loss": 2.3402, "step": 257690 }, { "epoch": 0.9961961311870854, "grad_norm": 0.12434779852628708, "learning_rate": 0.002, "loss": 2.3432, "step": 257700 }, { "epoch": 0.9962347883904686, "grad_norm": 0.09883172065019608, "learning_rate": 0.002, "loss": 2.3366, "step": 257710 }, { "epoch": 0.996273445593852, "grad_norm": 0.10056095570325851, "learning_rate": 0.002, "loss": 2.336, "step": 257720 }, { "epoch": 0.9963121027972353, "grad_norm": 0.114386647939682, "learning_rate": 0.002, "loss": 2.3329, "step": 257730 }, { "epoch": 0.9963507600006185, "grad_norm": 0.3775371015071869, "learning_rate": 0.002, "loss": 2.3227, "step": 257740 }, { "epoch": 0.9963894172040018, "grad_norm": 0.1052108183503151, "learning_rate": 0.002, "loss": 2.3306, "step": 257750 }, { "epoch": 0.9964280744073851, "grad_norm": 0.12209247797727585, "learning_rate": 0.002, "loss": 2.3382, "step": 257760 }, { "epoch": 0.9964667316107684, "grad_norm": 0.10437662899494171, "learning_rate": 0.002, "loss": 2.3376, "step": 257770 }, { "epoch": 0.9965053888141516, "grad_norm": 0.10273770987987518, "learning_rate": 0.002, "loss": 2.3351, "step": 257780 }, { "epoch": 0.9965440460175349, "grad_norm": 0.12768568098545074, "learning_rate": 0.002, "loss": 2.3513, "step": 257790 }, { "epoch": 0.9965827032209181, "grad_norm": 0.10613568872213364, "learning_rate": 0.002, "loss": 2.3439, "step": 257800 }, { "epoch": 0.9966213604243015, "grad_norm": 0.09213811159133911, "learning_rate": 0.002, "loss": 2.3294, "step": 257810 }, { "epoch": 0.9966600176276847, "grad_norm": 0.10028841346502304, "learning_rate": 0.002, "loss": 2.3328, "step": 257820 }, { "epoch": 0.996698674831068, "grad_norm": 0.12576673924922943, "learning_rate": 0.002, "loss": 2.3354, "step": 257830 }, { "epoch": 0.9967373320344513, "grad_norm": 0.09395378828048706, "learning_rate": 0.002, "loss": 2.3349, "step": 257840 }, { "epoch": 0.9967759892378346, "grad_norm": 0.105169877409935, "learning_rate": 0.002, "loss": 2.3366, "step": 257850 }, { "epoch": 0.9968146464412179, "grad_norm": 0.10482071340084076, "learning_rate": 0.002, "loss": 2.3194, "step": 257860 }, { "epoch": 0.9968533036446011, "grad_norm": 0.11198008805513382, "learning_rate": 0.002, "loss": 2.3437, "step": 257870 }, { "epoch": 0.9968919608479844, "grad_norm": 0.09476982802152634, "learning_rate": 0.002, "loss": 2.3335, "step": 257880 }, { "epoch": 0.9969306180513677, "grad_norm": 0.18312402069568634, "learning_rate": 0.002, "loss": 2.3358, "step": 257890 }, { "epoch": 0.996969275254751, "grad_norm": 0.1149301528930664, "learning_rate": 0.002, "loss": 2.3236, "step": 257900 }, { "epoch": 0.9970079324581342, "grad_norm": 0.10394993424415588, "learning_rate": 0.002, "loss": 2.3378, "step": 257910 }, { "epoch": 0.9970465896615175, "grad_norm": 0.12158011645078659, "learning_rate": 0.002, "loss": 2.3364, "step": 257920 }, { "epoch": 0.9970852468649009, "grad_norm": 0.116344153881073, "learning_rate": 0.002, "loss": 2.3426, "step": 257930 }, { "epoch": 0.9971239040682841, "grad_norm": 0.1335514634847641, "learning_rate": 0.002, "loss": 2.3281, "step": 257940 }, { "epoch": 0.9971625612716674, "grad_norm": 0.1063564270734787, "learning_rate": 0.002, "loss": 2.3315, "step": 257950 }, { "epoch": 0.9972012184750506, "grad_norm": 0.11535150557756424, "learning_rate": 0.002, "loss": 2.3366, "step": 257960 }, { "epoch": 0.997239875678434, "grad_norm": 0.12290379405021667, "learning_rate": 0.002, "loss": 2.3392, "step": 257970 }, { "epoch": 0.9972785328818172, "grad_norm": 0.09599297493696213, "learning_rate": 0.002, "loss": 2.3377, "step": 257980 }, { "epoch": 0.9973171900852005, "grad_norm": 0.12629227340221405, "learning_rate": 0.002, "loss": 2.3379, "step": 257990 }, { "epoch": 0.9973558472885837, "grad_norm": 0.10190927237272263, "learning_rate": 0.002, "loss": 2.3407, "step": 258000 }, { "epoch": 0.997394504491967, "grad_norm": 0.09525350481271744, "learning_rate": 0.002, "loss": 2.3456, "step": 258010 }, { "epoch": 0.9974331616953503, "grad_norm": 0.14495059847831726, "learning_rate": 0.002, "loss": 2.3513, "step": 258020 }, { "epoch": 0.9974718188987336, "grad_norm": 0.10044555366039276, "learning_rate": 0.002, "loss": 2.3361, "step": 258030 }, { "epoch": 0.9975104761021168, "grad_norm": 0.10116809606552124, "learning_rate": 0.002, "loss": 2.3335, "step": 258040 }, { "epoch": 0.9975491333055001, "grad_norm": 0.11794326454401016, "learning_rate": 0.002, "loss": 2.3607, "step": 258050 }, { "epoch": 0.9975877905088835, "grad_norm": 0.10003200173377991, "learning_rate": 0.002, "loss": 2.3263, "step": 258060 }, { "epoch": 0.9976264477122667, "grad_norm": 0.11192117631435394, "learning_rate": 0.002, "loss": 2.3337, "step": 258070 }, { "epoch": 0.99766510491565, "grad_norm": 0.1364094614982605, "learning_rate": 0.002, "loss": 2.3401, "step": 258080 }, { "epoch": 0.9977037621190332, "grad_norm": 0.11394291371107101, "learning_rate": 0.002, "loss": 2.3341, "step": 258090 }, { "epoch": 0.9977424193224166, "grad_norm": 0.11761811375617981, "learning_rate": 0.002, "loss": 2.3344, "step": 258100 }, { "epoch": 0.9977810765257998, "grad_norm": 0.10144181549549103, "learning_rate": 0.002, "loss": 2.3313, "step": 258110 }, { "epoch": 0.9978197337291831, "grad_norm": 0.09599994122982025, "learning_rate": 0.002, "loss": 2.3266, "step": 258120 }, { "epoch": 0.9978583909325663, "grad_norm": 0.09742031246423721, "learning_rate": 0.002, "loss": 2.3416, "step": 258130 }, { "epoch": 0.9978970481359497, "grad_norm": 0.09783077985048294, "learning_rate": 0.002, "loss": 2.3459, "step": 258140 }, { "epoch": 0.997935705339333, "grad_norm": 0.09153680503368378, "learning_rate": 0.002, "loss": 2.3466, "step": 258150 }, { "epoch": 0.9979743625427162, "grad_norm": 0.126260906457901, "learning_rate": 0.002, "loss": 2.3321, "step": 258160 }, { "epoch": 0.9980130197460995, "grad_norm": 0.11532141268253326, "learning_rate": 0.002, "loss": 2.3595, "step": 258170 }, { "epoch": 0.9980516769494827, "grad_norm": 0.09739511460065842, "learning_rate": 0.002, "loss": 2.336, "step": 258180 }, { "epoch": 0.9980903341528661, "grad_norm": 0.10290549695491791, "learning_rate": 0.002, "loss": 2.3195, "step": 258190 }, { "epoch": 0.9981289913562493, "grad_norm": 0.11158601194620132, "learning_rate": 0.002, "loss": 2.3313, "step": 258200 }, { "epoch": 0.9981676485596326, "grad_norm": 0.14581574499607086, "learning_rate": 0.002, "loss": 2.3363, "step": 258210 }, { "epoch": 0.9982063057630158, "grad_norm": 0.13148726522922516, "learning_rate": 0.002, "loss": 2.3313, "step": 258220 }, { "epoch": 0.9982449629663992, "grad_norm": 0.10082720220088959, "learning_rate": 0.002, "loss": 2.3411, "step": 258230 }, { "epoch": 0.9982836201697824, "grad_norm": 0.10381853580474854, "learning_rate": 0.002, "loss": 2.3308, "step": 258240 }, { "epoch": 0.9983222773731657, "grad_norm": 0.10770632326602936, "learning_rate": 0.002, "loss": 2.3429, "step": 258250 }, { "epoch": 0.998360934576549, "grad_norm": 0.11605487763881683, "learning_rate": 0.002, "loss": 2.3419, "step": 258260 }, { "epoch": 0.9983995917799323, "grad_norm": 0.10654358565807343, "learning_rate": 0.002, "loss": 2.3275, "step": 258270 }, { "epoch": 0.9984382489833156, "grad_norm": 0.09872237592935562, "learning_rate": 0.002, "loss": 2.3506, "step": 258280 }, { "epoch": 0.9984769061866988, "grad_norm": 0.13964754343032837, "learning_rate": 0.002, "loss": 2.3479, "step": 258290 }, { "epoch": 0.9985155633900821, "grad_norm": 0.09526875615119934, "learning_rate": 0.002, "loss": 2.3491, "step": 258300 }, { "epoch": 0.9985542205934654, "grad_norm": 0.14932149648666382, "learning_rate": 0.002, "loss": 2.3358, "step": 258310 }, { "epoch": 0.9985928777968487, "grad_norm": 0.11767978221178055, "learning_rate": 0.002, "loss": 2.3447, "step": 258320 }, { "epoch": 0.9986315350002319, "grad_norm": 0.10140397399663925, "learning_rate": 0.002, "loss": 2.3381, "step": 258330 }, { "epoch": 0.9986701922036152, "grad_norm": 0.09302809834480286, "learning_rate": 0.002, "loss": 2.3397, "step": 258340 }, { "epoch": 0.9987088494069986, "grad_norm": 0.09450498968362808, "learning_rate": 0.002, "loss": 2.3337, "step": 258350 }, { "epoch": 0.9987475066103818, "grad_norm": 0.12323355674743652, "learning_rate": 0.002, "loss": 2.3207, "step": 258360 }, { "epoch": 0.9987861638137651, "grad_norm": 0.1029418408870697, "learning_rate": 0.002, "loss": 2.3319, "step": 258370 }, { "epoch": 0.9988248210171483, "grad_norm": 0.09626761823892593, "learning_rate": 0.002, "loss": 2.3289, "step": 258380 }, { "epoch": 0.9988634782205316, "grad_norm": 0.1211252510547638, "learning_rate": 0.002, "loss": 2.3302, "step": 258390 }, { "epoch": 0.9989021354239149, "grad_norm": 0.1048799678683281, "learning_rate": 0.002, "loss": 2.3446, "step": 258400 }, { "epoch": 0.9989407926272982, "grad_norm": 0.11532396078109741, "learning_rate": 0.002, "loss": 2.334, "step": 258410 }, { "epoch": 0.9989794498306814, "grad_norm": 0.10050155967473984, "learning_rate": 0.002, "loss": 2.33, "step": 258420 }, { "epoch": 0.9990181070340647, "grad_norm": 0.10442036390304565, "learning_rate": 0.002, "loss": 2.3418, "step": 258430 }, { "epoch": 0.999056764237448, "grad_norm": 0.12391044199466705, "learning_rate": 0.002, "loss": 2.3393, "step": 258440 }, { "epoch": 0.9990954214408313, "grad_norm": 0.11113929003477097, "learning_rate": 0.002, "loss": 2.3435, "step": 258450 }, { "epoch": 0.9991340786442146, "grad_norm": 0.1350424885749817, "learning_rate": 0.002, "loss": 2.3489, "step": 258460 }, { "epoch": 0.9991727358475978, "grad_norm": 0.10169805586338043, "learning_rate": 0.002, "loss": 2.3352, "step": 258470 }, { "epoch": 0.9992113930509812, "grad_norm": 0.0997602790594101, "learning_rate": 0.002, "loss": 2.3235, "step": 258480 }, { "epoch": 0.9992500502543644, "grad_norm": 0.10066650807857513, "learning_rate": 0.002, "loss": 2.342, "step": 258490 }, { "epoch": 0.9992887074577477, "grad_norm": 0.09506989270448685, "learning_rate": 0.002, "loss": 2.3345, "step": 258500 }, { "epoch": 0.9993273646611309, "grad_norm": 0.1080443412065506, "learning_rate": 0.002, "loss": 2.339, "step": 258510 }, { "epoch": 0.9993660218645143, "grad_norm": 0.09656565636396408, "learning_rate": 0.002, "loss": 2.3225, "step": 258520 }, { "epoch": 0.9994046790678975, "grad_norm": 0.15320773422718048, "learning_rate": 0.002, "loss": 2.3358, "step": 258530 }, { "epoch": 0.9994433362712808, "grad_norm": 0.09774304926395416, "learning_rate": 0.002, "loss": 2.3502, "step": 258540 }, { "epoch": 0.999481993474664, "grad_norm": 0.10777298361063004, "learning_rate": 0.002, "loss": 2.347, "step": 258550 }, { "epoch": 0.9995206506780473, "grad_norm": 0.139897882938385, "learning_rate": 0.002, "loss": 2.3426, "step": 258560 }, { "epoch": 0.9995593078814307, "grad_norm": 0.10244569927453995, "learning_rate": 0.002, "loss": 2.3323, "step": 258570 }, { "epoch": 0.9995979650848139, "grad_norm": 0.12334243953227997, "learning_rate": 0.002, "loss": 2.3382, "step": 258580 }, { "epoch": 0.9996366222881972, "grad_norm": 0.10992878675460815, "learning_rate": 0.002, "loss": 2.3343, "step": 258590 }, { "epoch": 0.9996752794915804, "grad_norm": 0.099768728017807, "learning_rate": 0.002, "loss": 2.3277, "step": 258600 }, { "epoch": 0.9997139366949638, "grad_norm": 0.11886005103588104, "learning_rate": 0.002, "loss": 2.3457, "step": 258610 }, { "epoch": 0.999752593898347, "grad_norm": 0.10944291204214096, "learning_rate": 0.002, "loss": 2.3385, "step": 258620 }, { "epoch": 0.9997912511017303, "grad_norm": 0.1227748841047287, "learning_rate": 0.002, "loss": 2.3373, "step": 258630 }, { "epoch": 0.9998299083051135, "grad_norm": 0.12421202659606934, "learning_rate": 0.002, "loss": 2.3496, "step": 258640 }, { "epoch": 0.9998685655084969, "grad_norm": 0.10084223002195358, "learning_rate": 0.002, "loss": 2.3438, "step": 258650 }, { "epoch": 0.9999072227118802, "grad_norm": 0.10667677968740463, "learning_rate": 0.002, "loss": 2.3414, "step": 258660 }, { "epoch": 0.9999458799152634, "grad_norm": 0.10123629122972488, "learning_rate": 0.002, "loss": 2.3319, "step": 258670 }, { "epoch": 0.9999845371186467, "grad_norm": 0.10575992614030838, "learning_rate": 0.002, "loss": 2.3476, "step": 258680 }, { "epoch": 1.00002319432203, "grad_norm": 0.10037583857774734, "learning_rate": 0.002, "loss": 2.3196, "step": 258690 }, { "epoch": 1.0000618515254132, "grad_norm": 0.09392248094081879, "learning_rate": 0.002, "loss": 2.3379, "step": 258700 }, { "epoch": 1.0001005087287966, "grad_norm": 0.09586737304925919, "learning_rate": 0.002, "loss": 2.3291, "step": 258710 }, { "epoch": 1.0001391659321799, "grad_norm": 0.11475492268800735, "learning_rate": 0.002, "loss": 2.3304, "step": 258720 }, { "epoch": 1.0001778231355631, "grad_norm": 0.12680473923683167, "learning_rate": 0.002, "loss": 2.3469, "step": 258730 }, { "epoch": 1.0002164803389464, "grad_norm": 0.0987839326262474, "learning_rate": 0.002, "loss": 2.3208, "step": 258740 }, { "epoch": 1.0002551375423296, "grad_norm": 0.10396572202444077, "learning_rate": 0.002, "loss": 2.323, "step": 258750 }, { "epoch": 1.000293794745713, "grad_norm": 0.1215694472193718, "learning_rate": 0.002, "loss": 2.3292, "step": 258760 }, { "epoch": 1.0003324519490961, "grad_norm": 0.11470723897218704, "learning_rate": 0.002, "loss": 2.3381, "step": 258770 }, { "epoch": 1.0003711091524794, "grad_norm": 0.1048058271408081, "learning_rate": 0.002, "loss": 2.3416, "step": 258780 }, { "epoch": 1.0004097663558627, "grad_norm": 0.10398370772600174, "learning_rate": 0.002, "loss": 2.3439, "step": 258790 }, { "epoch": 1.0004484235592461, "grad_norm": 0.0954103097319603, "learning_rate": 0.002, "loss": 2.3239, "step": 258800 }, { "epoch": 1.0004870807626294, "grad_norm": 0.10868433117866516, "learning_rate": 0.002, "loss": 2.3435, "step": 258810 }, { "epoch": 1.0005257379660126, "grad_norm": 0.11799705773591995, "learning_rate": 0.002, "loss": 2.3426, "step": 258820 }, { "epoch": 1.0005643951693959, "grad_norm": 0.10871309787034988, "learning_rate": 0.002, "loss": 2.3359, "step": 258830 }, { "epoch": 1.0006030523727791, "grad_norm": 0.12025268375873566, "learning_rate": 0.002, "loss": 2.3504, "step": 258840 }, { "epoch": 1.0006417095761624, "grad_norm": 0.08687405288219452, "learning_rate": 0.002, "loss": 2.3284, "step": 258850 }, { "epoch": 1.0006803667795456, "grad_norm": 0.10406842082738876, "learning_rate": 0.002, "loss": 2.3272, "step": 258860 }, { "epoch": 1.000719023982929, "grad_norm": 0.1017787903547287, "learning_rate": 0.002, "loss": 2.3448, "step": 258870 }, { "epoch": 1.0007576811863124, "grad_norm": 0.10971411317586899, "learning_rate": 0.002, "loss": 2.347, "step": 258880 }, { "epoch": 1.0007963383896956, "grad_norm": 0.1039801836013794, "learning_rate": 0.002, "loss": 2.3389, "step": 258890 }, { "epoch": 1.0008349955930789, "grad_norm": 0.10778878629207611, "learning_rate": 0.002, "loss": 2.325, "step": 258900 }, { "epoch": 1.0008736527964621, "grad_norm": 0.12391100823879242, "learning_rate": 0.002, "loss": 2.3326, "step": 258910 }, { "epoch": 1.0009123099998454, "grad_norm": 0.09689292311668396, "learning_rate": 0.002, "loss": 2.3457, "step": 258920 }, { "epoch": 1.0009509672032286, "grad_norm": 0.09480072557926178, "learning_rate": 0.002, "loss": 2.3427, "step": 258930 }, { "epoch": 1.0009896244066119, "grad_norm": 0.11858639866113663, "learning_rate": 0.002, "loss": 2.3412, "step": 258940 }, { "epoch": 1.0010282816099951, "grad_norm": 0.11604545265436172, "learning_rate": 0.002, "loss": 2.3246, "step": 258950 }, { "epoch": 1.0010669388133784, "grad_norm": 0.10790597647428513, "learning_rate": 0.002, "loss": 2.3479, "step": 258960 }, { "epoch": 1.0011055960167619, "grad_norm": 0.10130016505718231, "learning_rate": 0.002, "loss": 2.3312, "step": 258970 }, { "epoch": 1.001144253220145, "grad_norm": 0.12981268763542175, "learning_rate": 0.002, "loss": 2.3512, "step": 258980 }, { "epoch": 1.0011829104235284, "grad_norm": 0.1396598368883133, "learning_rate": 0.002, "loss": 2.3582, "step": 258990 }, { "epoch": 1.0012215676269116, "grad_norm": 0.11186467111110687, "learning_rate": 0.002, "loss": 2.3516, "step": 259000 }, { "epoch": 1.0012602248302949, "grad_norm": 0.09468837082386017, "learning_rate": 0.002, "loss": 2.3475, "step": 259010 }, { "epoch": 1.0012988820336781, "grad_norm": 0.10041584819555283, "learning_rate": 0.002, "loss": 2.3491, "step": 259020 }, { "epoch": 1.0013375392370614, "grad_norm": 0.1317100077867508, "learning_rate": 0.002, "loss": 2.3257, "step": 259030 }, { "epoch": 1.0013761964404446, "grad_norm": 0.11795981228351593, "learning_rate": 0.002, "loss": 2.3421, "step": 259040 }, { "epoch": 1.001414853643828, "grad_norm": 0.09735662490129471, "learning_rate": 0.002, "loss": 2.3332, "step": 259050 }, { "epoch": 1.0014535108472113, "grad_norm": 0.12275316566228867, "learning_rate": 0.002, "loss": 2.3427, "step": 259060 }, { "epoch": 1.0014921680505946, "grad_norm": 0.10865990072488785, "learning_rate": 0.002, "loss": 2.3442, "step": 259070 }, { "epoch": 1.0015308252539779, "grad_norm": 0.1025635376572609, "learning_rate": 0.002, "loss": 2.354, "step": 259080 }, { "epoch": 1.001569482457361, "grad_norm": 0.10392650216817856, "learning_rate": 0.002, "loss": 2.3383, "step": 259090 }, { "epoch": 1.0016081396607444, "grad_norm": 0.10056158155202866, "learning_rate": 0.002, "loss": 2.3131, "step": 259100 }, { "epoch": 1.0016467968641276, "grad_norm": 0.10843063145875931, "learning_rate": 0.002, "loss": 2.3414, "step": 259110 }, { "epoch": 1.0016854540675109, "grad_norm": 0.09778635203838348, "learning_rate": 0.002, "loss": 2.337, "step": 259120 }, { "epoch": 1.0017241112708941, "grad_norm": 0.09872616827487946, "learning_rate": 0.002, "loss": 2.3377, "step": 259130 }, { "epoch": 1.0017627684742776, "grad_norm": 0.09871741384267807, "learning_rate": 0.002, "loss": 2.337, "step": 259140 }, { "epoch": 1.0018014256776608, "grad_norm": 0.10286332666873932, "learning_rate": 0.002, "loss": 2.3292, "step": 259150 }, { "epoch": 1.001840082881044, "grad_norm": 0.09847522526979446, "learning_rate": 0.002, "loss": 2.3393, "step": 259160 }, { "epoch": 1.0018787400844273, "grad_norm": 0.10867820680141449, "learning_rate": 0.002, "loss": 2.3432, "step": 259170 }, { "epoch": 1.0019173972878106, "grad_norm": 0.10226430743932724, "learning_rate": 0.002, "loss": 2.3358, "step": 259180 }, { "epoch": 1.0019560544911938, "grad_norm": 0.09926961362361908, "learning_rate": 0.002, "loss": 2.3379, "step": 259190 }, { "epoch": 1.001994711694577, "grad_norm": 0.10008293390274048, "learning_rate": 0.002, "loss": 2.3491, "step": 259200 }, { "epoch": 1.0020333688979604, "grad_norm": 0.10106901824474335, "learning_rate": 0.002, "loss": 2.3453, "step": 259210 }, { "epoch": 1.0020720261013438, "grad_norm": 0.1036130040884018, "learning_rate": 0.002, "loss": 2.3398, "step": 259220 }, { "epoch": 1.002110683304727, "grad_norm": 0.10225784033536911, "learning_rate": 0.002, "loss": 2.334, "step": 259230 }, { "epoch": 1.0021493405081103, "grad_norm": 0.08691437542438507, "learning_rate": 0.002, "loss": 2.3268, "step": 259240 }, { "epoch": 1.0021879977114936, "grad_norm": 0.09674886614084244, "learning_rate": 0.002, "loss": 2.3279, "step": 259250 }, { "epoch": 1.0022266549148768, "grad_norm": 0.15050897002220154, "learning_rate": 0.002, "loss": 2.3241, "step": 259260 }, { "epoch": 1.00226531211826, "grad_norm": 0.17266719043254852, "learning_rate": 0.002, "loss": 2.3408, "step": 259270 }, { "epoch": 1.0023039693216433, "grad_norm": 0.09772799909114838, "learning_rate": 0.002, "loss": 2.3363, "step": 259280 }, { "epoch": 1.0023426265250266, "grad_norm": 0.09189768880605698, "learning_rate": 0.002, "loss": 2.3476, "step": 259290 }, { "epoch": 1.0023812837284098, "grad_norm": 0.10858861356973648, "learning_rate": 0.002, "loss": 2.3408, "step": 259300 }, { "epoch": 1.0024199409317933, "grad_norm": 0.12308477610349655, "learning_rate": 0.002, "loss": 2.3437, "step": 259310 }, { "epoch": 1.0024585981351766, "grad_norm": 0.14335885643959045, "learning_rate": 0.002, "loss": 2.3334, "step": 259320 }, { "epoch": 1.0024972553385598, "grad_norm": 0.09966814517974854, "learning_rate": 0.002, "loss": 2.3395, "step": 259330 }, { "epoch": 1.002535912541943, "grad_norm": 0.09803139418363571, "learning_rate": 0.002, "loss": 2.3358, "step": 259340 }, { "epoch": 1.0025745697453263, "grad_norm": 0.14151626825332642, "learning_rate": 0.002, "loss": 2.3378, "step": 259350 }, { "epoch": 1.0026132269487096, "grad_norm": 0.11047150939702988, "learning_rate": 0.002, "loss": 2.3444, "step": 259360 }, { "epoch": 1.0026518841520928, "grad_norm": 0.09904196858406067, "learning_rate": 0.002, "loss": 2.3222, "step": 259370 }, { "epoch": 1.002690541355476, "grad_norm": 0.10631304979324341, "learning_rate": 0.002, "loss": 2.3441, "step": 259380 }, { "epoch": 1.0027291985588596, "grad_norm": 0.1100512444972992, "learning_rate": 0.002, "loss": 2.328, "step": 259390 }, { "epoch": 1.0027678557622428, "grad_norm": 0.11194992810487747, "learning_rate": 0.002, "loss": 2.3229, "step": 259400 }, { "epoch": 1.002806512965626, "grad_norm": 0.12639199197292328, "learning_rate": 0.002, "loss": 2.3443, "step": 259410 }, { "epoch": 1.0028451701690093, "grad_norm": 0.12143406271934509, "learning_rate": 0.002, "loss": 2.3486, "step": 259420 }, { "epoch": 1.0028838273723926, "grad_norm": 0.10168673098087311, "learning_rate": 0.002, "loss": 2.3332, "step": 259430 }, { "epoch": 1.0029224845757758, "grad_norm": 0.11748029291629791, "learning_rate": 0.002, "loss": 2.3496, "step": 259440 }, { "epoch": 1.002961141779159, "grad_norm": 0.10045807808637619, "learning_rate": 0.002, "loss": 2.3357, "step": 259450 }, { "epoch": 1.0029997989825423, "grad_norm": 0.09811057150363922, "learning_rate": 0.002, "loss": 2.3486, "step": 259460 }, { "epoch": 1.0030384561859256, "grad_norm": 0.11925587058067322, "learning_rate": 0.002, "loss": 2.333, "step": 259470 }, { "epoch": 1.003077113389309, "grad_norm": 0.12439731508493423, "learning_rate": 0.002, "loss": 2.3271, "step": 259480 }, { "epoch": 1.0031157705926923, "grad_norm": 0.12637203931808472, "learning_rate": 0.002, "loss": 2.3248, "step": 259490 }, { "epoch": 1.0031544277960756, "grad_norm": 0.10005202889442444, "learning_rate": 0.002, "loss": 2.3391, "step": 259500 }, { "epoch": 1.0031930849994588, "grad_norm": 0.10126623511314392, "learning_rate": 0.002, "loss": 2.3467, "step": 259510 }, { "epoch": 1.003231742202842, "grad_norm": 0.12067006528377533, "learning_rate": 0.002, "loss": 2.3413, "step": 259520 }, { "epoch": 1.0032703994062253, "grad_norm": 0.11962800472974777, "learning_rate": 0.002, "loss": 2.3282, "step": 259530 }, { "epoch": 1.0033090566096086, "grad_norm": 0.09837370365858078, "learning_rate": 0.002, "loss": 2.3202, "step": 259540 }, { "epoch": 1.0033477138129918, "grad_norm": 0.11886280030012131, "learning_rate": 0.002, "loss": 2.3518, "step": 259550 }, { "epoch": 1.0033863710163753, "grad_norm": 0.09033774584531784, "learning_rate": 0.002, "loss": 2.3267, "step": 259560 }, { "epoch": 1.0034250282197585, "grad_norm": 0.10335791856050491, "learning_rate": 0.002, "loss": 2.3463, "step": 259570 }, { "epoch": 1.0034636854231418, "grad_norm": 0.09330695122480392, "learning_rate": 0.002, "loss": 2.338, "step": 259580 }, { "epoch": 1.003502342626525, "grad_norm": 0.12462907284498215, "learning_rate": 0.002, "loss": 2.3496, "step": 259590 }, { "epoch": 1.0035409998299083, "grad_norm": 0.09722462296485901, "learning_rate": 0.002, "loss": 2.3356, "step": 259600 }, { "epoch": 1.0035796570332916, "grad_norm": 0.11706508696079254, "learning_rate": 0.002, "loss": 2.3436, "step": 259610 }, { "epoch": 1.0036183142366748, "grad_norm": 0.09267829358577728, "learning_rate": 0.002, "loss": 2.3444, "step": 259620 }, { "epoch": 1.003656971440058, "grad_norm": 0.08400312811136246, "learning_rate": 0.002, "loss": 2.3491, "step": 259630 }, { "epoch": 1.0036956286434415, "grad_norm": 0.1081552729010582, "learning_rate": 0.002, "loss": 2.3326, "step": 259640 }, { "epoch": 1.0037342858468248, "grad_norm": 0.10565217584371567, "learning_rate": 0.002, "loss": 2.3539, "step": 259650 }, { "epoch": 1.003772943050208, "grad_norm": 0.09283998608589172, "learning_rate": 0.002, "loss": 2.3444, "step": 259660 }, { "epoch": 1.0038116002535913, "grad_norm": 0.12594252824783325, "learning_rate": 0.002, "loss": 2.3358, "step": 259670 }, { "epoch": 1.0038502574569745, "grad_norm": 0.10351262241601944, "learning_rate": 0.002, "loss": 2.3419, "step": 259680 }, { "epoch": 1.0038889146603578, "grad_norm": 0.09758039563894272, "learning_rate": 0.002, "loss": 2.3393, "step": 259690 }, { "epoch": 1.003927571863741, "grad_norm": 0.11524217575788498, "learning_rate": 0.002, "loss": 2.3264, "step": 259700 }, { "epoch": 1.0039662290671243, "grad_norm": 0.08860163390636444, "learning_rate": 0.002, "loss": 2.3458, "step": 259710 }, { "epoch": 1.0040048862705075, "grad_norm": 0.09536821395158768, "learning_rate": 0.002, "loss": 2.3443, "step": 259720 }, { "epoch": 1.004043543473891, "grad_norm": 0.11455459147691727, "learning_rate": 0.002, "loss": 2.3369, "step": 259730 }, { "epoch": 1.0040822006772743, "grad_norm": 0.12582819163799286, "learning_rate": 0.002, "loss": 2.3503, "step": 259740 }, { "epoch": 1.0041208578806575, "grad_norm": 0.11551017314195633, "learning_rate": 0.002, "loss": 2.3276, "step": 259750 }, { "epoch": 1.0041595150840408, "grad_norm": 0.11985752731561661, "learning_rate": 0.002, "loss": 2.3551, "step": 259760 }, { "epoch": 1.004198172287424, "grad_norm": 0.09666941314935684, "learning_rate": 0.002, "loss": 2.3374, "step": 259770 }, { "epoch": 1.0042368294908073, "grad_norm": 0.11292658746242523, "learning_rate": 0.002, "loss": 2.3434, "step": 259780 }, { "epoch": 1.0042754866941905, "grad_norm": 0.10751032084226608, "learning_rate": 0.002, "loss": 2.3193, "step": 259790 }, { "epoch": 1.0043141438975738, "grad_norm": 0.09483782202005386, "learning_rate": 0.002, "loss": 2.3404, "step": 259800 }, { "epoch": 1.0043528011009573, "grad_norm": 0.10770095884799957, "learning_rate": 0.002, "loss": 2.3464, "step": 259810 }, { "epoch": 1.0043914583043405, "grad_norm": 0.10307003557682037, "learning_rate": 0.002, "loss": 2.3311, "step": 259820 }, { "epoch": 1.0044301155077238, "grad_norm": 0.1362147182226181, "learning_rate": 0.002, "loss": 2.3349, "step": 259830 }, { "epoch": 1.004468772711107, "grad_norm": 0.11033639311790466, "learning_rate": 0.002, "loss": 2.3319, "step": 259840 }, { "epoch": 1.0045074299144903, "grad_norm": 0.09604737907648087, "learning_rate": 0.002, "loss": 2.316, "step": 259850 }, { "epoch": 1.0045460871178735, "grad_norm": 0.10238654911518097, "learning_rate": 0.002, "loss": 2.3434, "step": 259860 }, { "epoch": 1.0045847443212568, "grad_norm": 0.10481748729944229, "learning_rate": 0.002, "loss": 2.3315, "step": 259870 }, { "epoch": 1.00462340152464, "grad_norm": 0.11596349626779556, "learning_rate": 0.002, "loss": 2.3308, "step": 259880 }, { "epoch": 1.0046620587280233, "grad_norm": 0.13182884454727173, "learning_rate": 0.002, "loss": 2.344, "step": 259890 }, { "epoch": 1.0047007159314068, "grad_norm": 0.11005352437496185, "learning_rate": 0.002, "loss": 2.3298, "step": 259900 }, { "epoch": 1.00473937313479, "grad_norm": 0.09401649236679077, "learning_rate": 0.002, "loss": 2.3335, "step": 259910 }, { "epoch": 1.0047780303381733, "grad_norm": 0.10460685193538666, "learning_rate": 0.002, "loss": 2.3265, "step": 259920 }, { "epoch": 1.0048166875415565, "grad_norm": 0.10680817812681198, "learning_rate": 0.002, "loss": 2.3567, "step": 259930 }, { "epoch": 1.0048553447449398, "grad_norm": 0.17173266410827637, "learning_rate": 0.002, "loss": 2.3272, "step": 259940 }, { "epoch": 1.004894001948323, "grad_norm": 0.08766689896583557, "learning_rate": 0.002, "loss": 2.3331, "step": 259950 }, { "epoch": 1.0049326591517063, "grad_norm": 0.11202475428581238, "learning_rate": 0.002, "loss": 2.3285, "step": 259960 }, { "epoch": 1.0049713163550895, "grad_norm": 0.10411037504673004, "learning_rate": 0.002, "loss": 2.3287, "step": 259970 }, { "epoch": 1.005009973558473, "grad_norm": 0.12385007739067078, "learning_rate": 0.002, "loss": 2.3516, "step": 259980 }, { "epoch": 1.0050486307618562, "grad_norm": 0.10399666428565979, "learning_rate": 0.002, "loss": 2.3293, "step": 259990 }, { "epoch": 1.0050872879652395, "grad_norm": 0.10041926056146622, "learning_rate": 0.002, "loss": 2.337, "step": 260000 }, { "epoch": 1.0051259451686227, "grad_norm": 0.11362820863723755, "learning_rate": 0.002, "loss": 2.3385, "step": 260010 }, { "epoch": 1.005164602372006, "grad_norm": 0.09621165692806244, "learning_rate": 0.002, "loss": 2.343, "step": 260020 }, { "epoch": 1.0052032595753893, "grad_norm": 0.09594116359949112, "learning_rate": 0.002, "loss": 2.3375, "step": 260030 }, { "epoch": 1.0052419167787725, "grad_norm": 0.12816180288791656, "learning_rate": 0.002, "loss": 2.3347, "step": 260040 }, { "epoch": 1.0052805739821558, "grad_norm": 0.09673391282558441, "learning_rate": 0.002, "loss": 2.3331, "step": 260050 }, { "epoch": 1.005319231185539, "grad_norm": 0.10709298402070999, "learning_rate": 0.002, "loss": 2.3409, "step": 260060 }, { "epoch": 1.0053578883889225, "grad_norm": 0.10160128772258759, "learning_rate": 0.002, "loss": 2.34, "step": 260070 }, { "epoch": 1.0053965455923057, "grad_norm": 0.09211761504411697, "learning_rate": 0.002, "loss": 2.3372, "step": 260080 }, { "epoch": 1.005435202795689, "grad_norm": 0.10863563418388367, "learning_rate": 0.002, "loss": 2.348, "step": 260090 }, { "epoch": 1.0054738599990722, "grad_norm": 0.1012507751584053, "learning_rate": 0.002, "loss": 2.3412, "step": 260100 }, { "epoch": 1.0055125172024555, "grad_norm": 0.10079700499773026, "learning_rate": 0.002, "loss": 2.3236, "step": 260110 }, { "epoch": 1.0055511744058387, "grad_norm": 0.11268491297960281, "learning_rate": 0.002, "loss": 2.331, "step": 260120 }, { "epoch": 1.005589831609222, "grad_norm": 0.10046052187681198, "learning_rate": 0.002, "loss": 2.3356, "step": 260130 }, { "epoch": 1.0056284888126052, "grad_norm": 0.12722858786582947, "learning_rate": 0.002, "loss": 2.337, "step": 260140 }, { "epoch": 1.0056671460159887, "grad_norm": 0.11442416161298752, "learning_rate": 0.002, "loss": 2.359, "step": 260150 }, { "epoch": 1.005705803219372, "grad_norm": 0.09657229483127594, "learning_rate": 0.002, "loss": 2.3451, "step": 260160 }, { "epoch": 1.0057444604227552, "grad_norm": 0.11371700465679169, "learning_rate": 0.002, "loss": 2.3355, "step": 260170 }, { "epoch": 1.0057831176261385, "grad_norm": 0.11046253144741058, "learning_rate": 0.002, "loss": 2.3602, "step": 260180 }, { "epoch": 1.0058217748295217, "grad_norm": 0.10766205191612244, "learning_rate": 0.002, "loss": 2.3247, "step": 260190 }, { "epoch": 1.005860432032905, "grad_norm": 0.1016693189740181, "learning_rate": 0.002, "loss": 2.3316, "step": 260200 }, { "epoch": 1.0058990892362882, "grad_norm": 0.11253286153078079, "learning_rate": 0.002, "loss": 2.3257, "step": 260210 }, { "epoch": 1.0059377464396715, "grad_norm": 0.1164514496922493, "learning_rate": 0.002, "loss": 2.3331, "step": 260220 }, { "epoch": 1.0059764036430547, "grad_norm": 0.1074240505695343, "learning_rate": 0.002, "loss": 2.3391, "step": 260230 }, { "epoch": 1.0060150608464382, "grad_norm": 0.09927522391080856, "learning_rate": 0.002, "loss": 2.3402, "step": 260240 }, { "epoch": 1.0060537180498215, "grad_norm": 0.11250677704811096, "learning_rate": 0.002, "loss": 2.3369, "step": 260250 }, { "epoch": 1.0060923752532047, "grad_norm": 0.1204577088356018, "learning_rate": 0.002, "loss": 2.3345, "step": 260260 }, { "epoch": 1.006131032456588, "grad_norm": 0.10001301020383835, "learning_rate": 0.002, "loss": 2.345, "step": 260270 }, { "epoch": 1.0061696896599712, "grad_norm": 0.12086349725723267, "learning_rate": 0.002, "loss": 2.3139, "step": 260280 }, { "epoch": 1.0062083468633545, "grad_norm": 0.11192157119512558, "learning_rate": 0.002, "loss": 2.3207, "step": 260290 }, { "epoch": 1.0062470040667377, "grad_norm": 0.1001996248960495, "learning_rate": 0.002, "loss": 2.3406, "step": 260300 }, { "epoch": 1.006285661270121, "grad_norm": 0.10504887998104095, "learning_rate": 0.002, "loss": 2.3292, "step": 260310 }, { "epoch": 1.0063243184735045, "grad_norm": 0.1161704808473587, "learning_rate": 0.002, "loss": 2.3373, "step": 260320 }, { "epoch": 1.0063629756768877, "grad_norm": 0.10507213324308395, "learning_rate": 0.002, "loss": 2.3256, "step": 260330 }, { "epoch": 1.006401632880271, "grad_norm": 0.09793032705783844, "learning_rate": 0.002, "loss": 2.3366, "step": 260340 }, { "epoch": 1.0064402900836542, "grad_norm": 0.11236788332462311, "learning_rate": 0.002, "loss": 2.3556, "step": 260350 }, { "epoch": 1.0064789472870375, "grad_norm": 0.10597644746303558, "learning_rate": 0.002, "loss": 2.3276, "step": 260360 }, { "epoch": 1.0065176044904207, "grad_norm": 0.11315464228391647, "learning_rate": 0.002, "loss": 2.3342, "step": 260370 }, { "epoch": 1.006556261693804, "grad_norm": 0.1039297804236412, "learning_rate": 0.002, "loss": 2.3355, "step": 260380 }, { "epoch": 1.0065949188971872, "grad_norm": 0.09415727108716965, "learning_rate": 0.002, "loss": 2.3304, "step": 260390 }, { "epoch": 1.0066335761005705, "grad_norm": 0.11566637456417084, "learning_rate": 0.002, "loss": 2.3275, "step": 260400 }, { "epoch": 1.006672233303954, "grad_norm": 0.1017017811536789, "learning_rate": 0.002, "loss": 2.3398, "step": 260410 }, { "epoch": 1.0067108905073372, "grad_norm": 0.13970237970352173, "learning_rate": 0.002, "loss": 2.34, "step": 260420 }, { "epoch": 1.0067495477107204, "grad_norm": 0.11136095970869064, "learning_rate": 0.002, "loss": 2.3342, "step": 260430 }, { "epoch": 1.0067882049141037, "grad_norm": 0.10176298767328262, "learning_rate": 0.002, "loss": 2.3338, "step": 260440 }, { "epoch": 1.006826862117487, "grad_norm": 0.08913923054933548, "learning_rate": 0.002, "loss": 2.336, "step": 260450 }, { "epoch": 1.0068655193208702, "grad_norm": 0.10855156928300858, "learning_rate": 0.002, "loss": 2.3371, "step": 260460 }, { "epoch": 1.0069041765242535, "grad_norm": 0.11180077493190765, "learning_rate": 0.002, "loss": 2.338, "step": 260470 }, { "epoch": 1.0069428337276367, "grad_norm": 0.10207480937242508, "learning_rate": 0.002, "loss": 2.3582, "step": 260480 }, { "epoch": 1.0069814909310202, "grad_norm": 0.08677595853805542, "learning_rate": 0.002, "loss": 2.3395, "step": 260490 }, { "epoch": 1.0070201481344034, "grad_norm": 0.13296319544315338, "learning_rate": 0.002, "loss": 2.3366, "step": 260500 }, { "epoch": 1.0070588053377867, "grad_norm": 0.08792676031589508, "learning_rate": 0.002, "loss": 2.34, "step": 260510 }, { "epoch": 1.00709746254117, "grad_norm": 0.09438177943229675, "learning_rate": 0.002, "loss": 2.348, "step": 260520 }, { "epoch": 1.0071361197445532, "grad_norm": 0.1061626598238945, "learning_rate": 0.002, "loss": 2.3389, "step": 260530 }, { "epoch": 1.0071747769479364, "grad_norm": 0.10752874612808228, "learning_rate": 0.002, "loss": 2.3253, "step": 260540 }, { "epoch": 1.0072134341513197, "grad_norm": 0.12163309752941132, "learning_rate": 0.002, "loss": 2.3459, "step": 260550 }, { "epoch": 1.007252091354703, "grad_norm": 0.13164447247982025, "learning_rate": 0.002, "loss": 2.3425, "step": 260560 }, { "epoch": 1.0072907485580864, "grad_norm": 0.09046389162540436, "learning_rate": 0.002, "loss": 2.3523, "step": 260570 }, { "epoch": 1.0073294057614697, "grad_norm": 0.10954193770885468, "learning_rate": 0.002, "loss": 2.3301, "step": 260580 }, { "epoch": 1.007368062964853, "grad_norm": 0.1164688989520073, "learning_rate": 0.002, "loss": 2.3279, "step": 260590 }, { "epoch": 1.0074067201682362, "grad_norm": 0.10950072854757309, "learning_rate": 0.002, "loss": 2.3278, "step": 260600 }, { "epoch": 1.0074453773716194, "grad_norm": 0.09672633558511734, "learning_rate": 0.002, "loss": 2.3299, "step": 260610 }, { "epoch": 1.0074840345750027, "grad_norm": 0.10479829460382462, "learning_rate": 0.002, "loss": 2.3497, "step": 260620 }, { "epoch": 1.007522691778386, "grad_norm": 0.09484247863292694, "learning_rate": 0.002, "loss": 2.3417, "step": 260630 }, { "epoch": 1.0075613489817692, "grad_norm": 0.09945641458034515, "learning_rate": 0.002, "loss": 2.3376, "step": 260640 }, { "epoch": 1.0076000061851524, "grad_norm": 0.11775811016559601, "learning_rate": 0.002, "loss": 2.331, "step": 260650 }, { "epoch": 1.007638663388536, "grad_norm": 0.09762194007635117, "learning_rate": 0.002, "loss": 2.3327, "step": 260660 }, { "epoch": 1.0076773205919192, "grad_norm": 0.11502894759178162, "learning_rate": 0.002, "loss": 2.3368, "step": 260670 }, { "epoch": 1.0077159777953024, "grad_norm": 0.10429210960865021, "learning_rate": 0.002, "loss": 2.3519, "step": 260680 }, { "epoch": 1.0077546349986857, "grad_norm": 0.11102215945720673, "learning_rate": 0.002, "loss": 2.3351, "step": 260690 }, { "epoch": 1.007793292202069, "grad_norm": 0.10406568646430969, "learning_rate": 0.002, "loss": 2.3525, "step": 260700 }, { "epoch": 1.0078319494054522, "grad_norm": 0.0996653363108635, "learning_rate": 0.002, "loss": 2.3494, "step": 260710 }, { "epoch": 1.0078706066088354, "grad_norm": 0.10210791230201721, "learning_rate": 0.002, "loss": 2.3415, "step": 260720 }, { "epoch": 1.0079092638122187, "grad_norm": 0.10391468554735184, "learning_rate": 0.002, "loss": 2.3337, "step": 260730 }, { "epoch": 1.0079479210156022, "grad_norm": 0.09697168320417404, "learning_rate": 0.002, "loss": 2.3371, "step": 260740 }, { "epoch": 1.0079865782189854, "grad_norm": 0.10640589147806168, "learning_rate": 0.002, "loss": 2.3311, "step": 260750 }, { "epoch": 1.0080252354223687, "grad_norm": 0.12679150700569153, "learning_rate": 0.002, "loss": 2.3309, "step": 260760 }, { "epoch": 1.008063892625752, "grad_norm": 0.08968043327331543, "learning_rate": 0.002, "loss": 2.3297, "step": 260770 }, { "epoch": 1.0081025498291352, "grad_norm": 0.11260325461626053, "learning_rate": 0.002, "loss": 2.3437, "step": 260780 }, { "epoch": 1.0081412070325184, "grad_norm": 0.09950599074363708, "learning_rate": 0.002, "loss": 2.331, "step": 260790 }, { "epoch": 1.0081798642359017, "grad_norm": 0.10537993907928467, "learning_rate": 0.002, "loss": 2.3293, "step": 260800 }, { "epoch": 1.008218521439285, "grad_norm": 0.10507483780384064, "learning_rate": 0.002, "loss": 2.3558, "step": 260810 }, { "epoch": 1.0082571786426682, "grad_norm": 0.09803906828165054, "learning_rate": 0.002, "loss": 2.345, "step": 260820 }, { "epoch": 1.0082958358460516, "grad_norm": 0.10181978344917297, "learning_rate": 0.002, "loss": 2.3252, "step": 260830 }, { "epoch": 1.008334493049435, "grad_norm": 0.10805722326040268, "learning_rate": 0.002, "loss": 2.3288, "step": 260840 }, { "epoch": 1.0083731502528182, "grad_norm": 0.2941838204860687, "learning_rate": 0.002, "loss": 2.3394, "step": 260850 }, { "epoch": 1.0084118074562014, "grad_norm": 0.10703596472740173, "learning_rate": 0.002, "loss": 2.3396, "step": 260860 }, { "epoch": 1.0084504646595847, "grad_norm": 0.10350608825683594, "learning_rate": 0.002, "loss": 2.3368, "step": 260870 }, { "epoch": 1.008489121862968, "grad_norm": 0.09086059778928757, "learning_rate": 0.002, "loss": 2.3325, "step": 260880 }, { "epoch": 1.0085277790663512, "grad_norm": 0.09934677183628082, "learning_rate": 0.002, "loss": 2.3358, "step": 260890 }, { "epoch": 1.0085664362697344, "grad_norm": 0.10361968725919724, "learning_rate": 0.002, "loss": 2.3386, "step": 260900 }, { "epoch": 1.0086050934731179, "grad_norm": 0.10142628848552704, "learning_rate": 0.002, "loss": 2.345, "step": 260910 }, { "epoch": 1.0086437506765011, "grad_norm": 0.10995934903621674, "learning_rate": 0.002, "loss": 2.3223, "step": 260920 }, { "epoch": 1.0086824078798844, "grad_norm": 0.10714113712310791, "learning_rate": 0.002, "loss": 2.3448, "step": 260930 }, { "epoch": 1.0087210650832676, "grad_norm": 0.09014647454023361, "learning_rate": 0.002, "loss": 2.3388, "step": 260940 }, { "epoch": 1.008759722286651, "grad_norm": 0.10555219650268555, "learning_rate": 0.002, "loss": 2.3397, "step": 260950 }, { "epoch": 1.0087983794900341, "grad_norm": 0.10361549258232117, "learning_rate": 0.002, "loss": 2.326, "step": 260960 }, { "epoch": 1.0088370366934174, "grad_norm": 0.10561522841453552, "learning_rate": 0.002, "loss": 2.3472, "step": 260970 }, { "epoch": 1.0088756938968007, "grad_norm": 0.09323658794164658, "learning_rate": 0.002, "loss": 2.3259, "step": 260980 }, { "epoch": 1.008914351100184, "grad_norm": 0.11476288735866547, "learning_rate": 0.002, "loss": 2.3388, "step": 260990 }, { "epoch": 1.0089530083035674, "grad_norm": 0.11546418070793152, "learning_rate": 0.002, "loss": 2.3257, "step": 261000 }, { "epoch": 1.0089916655069506, "grad_norm": 0.10268153995275497, "learning_rate": 0.002, "loss": 2.3519, "step": 261010 }, { "epoch": 1.0090303227103339, "grad_norm": 0.10294366627931595, "learning_rate": 0.002, "loss": 2.3433, "step": 261020 }, { "epoch": 1.0090689799137171, "grad_norm": 0.09683190286159515, "learning_rate": 0.002, "loss": 2.344, "step": 261030 }, { "epoch": 1.0091076371171004, "grad_norm": 0.098160021007061, "learning_rate": 0.002, "loss": 2.3415, "step": 261040 }, { "epoch": 1.0091462943204836, "grad_norm": 0.10156324505805969, "learning_rate": 0.002, "loss": 2.3352, "step": 261050 }, { "epoch": 1.009184951523867, "grad_norm": 0.10965844243764877, "learning_rate": 0.002, "loss": 2.343, "step": 261060 }, { "epoch": 1.0092236087272501, "grad_norm": 0.1027294471859932, "learning_rate": 0.002, "loss": 2.3452, "step": 261070 }, { "epoch": 1.0092622659306336, "grad_norm": 0.0865500271320343, "learning_rate": 0.002, "loss": 2.3319, "step": 261080 }, { "epoch": 1.0093009231340169, "grad_norm": 0.13334614038467407, "learning_rate": 0.002, "loss": 2.3242, "step": 261090 }, { "epoch": 1.0093395803374001, "grad_norm": 0.11170141398906708, "learning_rate": 0.002, "loss": 2.3408, "step": 261100 }, { "epoch": 1.0093782375407834, "grad_norm": 0.1030377596616745, "learning_rate": 0.002, "loss": 2.3355, "step": 261110 }, { "epoch": 1.0094168947441666, "grad_norm": 0.11204218864440918, "learning_rate": 0.002, "loss": 2.344, "step": 261120 }, { "epoch": 1.0094555519475499, "grad_norm": 0.10371985286474228, "learning_rate": 0.002, "loss": 2.3325, "step": 261130 }, { "epoch": 1.0094942091509331, "grad_norm": 0.10569999366998672, "learning_rate": 0.002, "loss": 2.3387, "step": 261140 }, { "epoch": 1.0095328663543164, "grad_norm": 0.10509838908910751, "learning_rate": 0.002, "loss": 2.3257, "step": 261150 }, { "epoch": 1.0095715235576996, "grad_norm": 0.11228816211223602, "learning_rate": 0.002, "loss": 2.3293, "step": 261160 }, { "epoch": 1.009610180761083, "grad_norm": 0.11870377510786057, "learning_rate": 0.002, "loss": 2.3374, "step": 261170 }, { "epoch": 1.0096488379644664, "grad_norm": 0.11041771620512009, "learning_rate": 0.002, "loss": 2.3452, "step": 261180 }, { "epoch": 1.0096874951678496, "grad_norm": 0.09817316383123398, "learning_rate": 0.002, "loss": 2.3426, "step": 261190 }, { "epoch": 1.0097261523712329, "grad_norm": 0.10386999696493149, "learning_rate": 0.002, "loss": 2.3352, "step": 261200 }, { "epoch": 1.0097648095746161, "grad_norm": 0.09739833325147629, "learning_rate": 0.002, "loss": 2.3416, "step": 261210 }, { "epoch": 1.0098034667779994, "grad_norm": 0.10959730297327042, "learning_rate": 0.002, "loss": 2.331, "step": 261220 }, { "epoch": 1.0098421239813826, "grad_norm": 0.11688784509897232, "learning_rate": 0.002, "loss": 2.3497, "step": 261230 }, { "epoch": 1.0098807811847659, "grad_norm": 0.1169438585639, "learning_rate": 0.002, "loss": 2.3376, "step": 261240 }, { "epoch": 1.0099194383881493, "grad_norm": 0.1012745201587677, "learning_rate": 0.002, "loss": 2.33, "step": 261250 }, { "epoch": 1.0099580955915326, "grad_norm": 0.09531766176223755, "learning_rate": 0.002, "loss": 2.3393, "step": 261260 }, { "epoch": 1.0099967527949159, "grad_norm": 0.10129254311323166, "learning_rate": 0.002, "loss": 2.3356, "step": 261270 }, { "epoch": 1.010035409998299, "grad_norm": 0.10263433307409286, "learning_rate": 0.002, "loss": 2.3279, "step": 261280 }, { "epoch": 1.0100740672016824, "grad_norm": 0.09622534364461899, "learning_rate": 0.002, "loss": 2.3553, "step": 261290 }, { "epoch": 1.0101127244050656, "grad_norm": 0.10295294225215912, "learning_rate": 0.002, "loss": 2.3254, "step": 261300 }, { "epoch": 1.0101513816084489, "grad_norm": 0.0946008712053299, "learning_rate": 0.002, "loss": 2.3436, "step": 261310 }, { "epoch": 1.0101900388118321, "grad_norm": 0.10893122851848602, "learning_rate": 0.002, "loss": 2.338, "step": 261320 }, { "epoch": 1.0102286960152154, "grad_norm": 0.09725047647953033, "learning_rate": 0.002, "loss": 2.3507, "step": 261330 }, { "epoch": 1.0102673532185988, "grad_norm": 0.10373464226722717, "learning_rate": 0.002, "loss": 2.3191, "step": 261340 }, { "epoch": 1.010306010421982, "grad_norm": 0.4837754964828491, "learning_rate": 0.002, "loss": 2.3376, "step": 261350 }, { "epoch": 1.0103446676253653, "grad_norm": 0.14557918906211853, "learning_rate": 0.002, "loss": 2.3505, "step": 261360 }, { "epoch": 1.0103833248287486, "grad_norm": 0.13766615092754364, "learning_rate": 0.002, "loss": 2.3487, "step": 261370 }, { "epoch": 1.0104219820321318, "grad_norm": 0.09187277406454086, "learning_rate": 0.002, "loss": 2.3432, "step": 261380 }, { "epoch": 1.010460639235515, "grad_norm": 0.10806089639663696, "learning_rate": 0.002, "loss": 2.3491, "step": 261390 }, { "epoch": 1.0104992964388984, "grad_norm": 0.1004478856921196, "learning_rate": 0.002, "loss": 2.3457, "step": 261400 }, { "epoch": 1.0105379536422816, "grad_norm": 0.1010182648897171, "learning_rate": 0.002, "loss": 2.3285, "step": 261410 }, { "epoch": 1.010576610845665, "grad_norm": 0.2028651237487793, "learning_rate": 0.002, "loss": 2.3351, "step": 261420 }, { "epoch": 1.0106152680490483, "grad_norm": 0.090111143887043, "learning_rate": 0.002, "loss": 2.3288, "step": 261430 }, { "epoch": 1.0106539252524316, "grad_norm": 0.10565896332263947, "learning_rate": 0.002, "loss": 2.3294, "step": 261440 }, { "epoch": 1.0106925824558148, "grad_norm": 0.1016477420926094, "learning_rate": 0.002, "loss": 2.3336, "step": 261450 }, { "epoch": 1.010731239659198, "grad_norm": 0.09837300330400467, "learning_rate": 0.002, "loss": 2.3408, "step": 261460 }, { "epoch": 1.0107698968625813, "grad_norm": 0.10926554352045059, "learning_rate": 0.002, "loss": 2.3214, "step": 261470 }, { "epoch": 1.0108085540659646, "grad_norm": 0.10980924963951111, "learning_rate": 0.002, "loss": 2.3383, "step": 261480 }, { "epoch": 1.0108472112693478, "grad_norm": 0.10417355597019196, "learning_rate": 0.002, "loss": 2.3371, "step": 261490 }, { "epoch": 1.010885868472731, "grad_norm": 0.09010007232427597, "learning_rate": 0.002, "loss": 2.3374, "step": 261500 }, { "epoch": 1.0109245256761146, "grad_norm": 0.14487899839878082, "learning_rate": 0.002, "loss": 2.3434, "step": 261510 }, { "epoch": 1.0109631828794978, "grad_norm": 0.10193762928247452, "learning_rate": 0.002, "loss": 2.35, "step": 261520 }, { "epoch": 1.011001840082881, "grad_norm": 0.09536083042621613, "learning_rate": 0.002, "loss": 2.3451, "step": 261530 }, { "epoch": 1.0110404972862643, "grad_norm": 0.10808293521404266, "learning_rate": 0.002, "loss": 2.3316, "step": 261540 }, { "epoch": 1.0110791544896476, "grad_norm": 0.10147858411073685, "learning_rate": 0.002, "loss": 2.3405, "step": 261550 }, { "epoch": 1.0111178116930308, "grad_norm": 0.14660899341106415, "learning_rate": 0.002, "loss": 2.3353, "step": 261560 }, { "epoch": 1.011156468896414, "grad_norm": 0.09802790731191635, "learning_rate": 0.002, "loss": 2.3396, "step": 261570 }, { "epoch": 1.0111951260997973, "grad_norm": 0.09951503574848175, "learning_rate": 0.002, "loss": 2.3361, "step": 261580 }, { "epoch": 1.0112337833031808, "grad_norm": 0.12643538415431976, "learning_rate": 0.002, "loss": 2.3426, "step": 261590 }, { "epoch": 1.011272440506564, "grad_norm": 0.1129726767539978, "learning_rate": 0.002, "loss": 2.3334, "step": 261600 }, { "epoch": 1.0113110977099473, "grad_norm": 0.0968206450343132, "learning_rate": 0.002, "loss": 2.3489, "step": 261610 }, { "epoch": 1.0113497549133306, "grad_norm": 0.11443905532360077, "learning_rate": 0.002, "loss": 2.3329, "step": 261620 }, { "epoch": 1.0113884121167138, "grad_norm": 0.12192221730947495, "learning_rate": 0.002, "loss": 2.3388, "step": 261630 }, { "epoch": 1.011427069320097, "grad_norm": 0.10034405440092087, "learning_rate": 0.002, "loss": 2.3409, "step": 261640 }, { "epoch": 1.0114657265234803, "grad_norm": 0.10083135962486267, "learning_rate": 0.002, "loss": 2.3489, "step": 261650 }, { "epoch": 1.0115043837268636, "grad_norm": 0.12016676366329193, "learning_rate": 0.002, "loss": 2.3347, "step": 261660 }, { "epoch": 1.011543040930247, "grad_norm": 0.09202968329191208, "learning_rate": 0.002, "loss": 2.3409, "step": 261670 }, { "epoch": 1.0115816981336303, "grad_norm": 0.09863679111003876, "learning_rate": 0.002, "loss": 2.3597, "step": 261680 }, { "epoch": 1.0116203553370136, "grad_norm": 0.11649753898382187, "learning_rate": 0.002, "loss": 2.3318, "step": 261690 }, { "epoch": 1.0116590125403968, "grad_norm": 0.10803171992301941, "learning_rate": 0.002, "loss": 2.3486, "step": 261700 }, { "epoch": 1.01169766974378, "grad_norm": 0.1016639918088913, "learning_rate": 0.002, "loss": 2.3505, "step": 261710 }, { "epoch": 1.0117363269471633, "grad_norm": 0.0989474281668663, "learning_rate": 0.002, "loss": 2.3406, "step": 261720 }, { "epoch": 1.0117749841505466, "grad_norm": 0.09995424002408981, "learning_rate": 0.002, "loss": 2.3344, "step": 261730 }, { "epoch": 1.0118136413539298, "grad_norm": 0.10237865895032883, "learning_rate": 0.002, "loss": 2.3341, "step": 261740 }, { "epoch": 1.011852298557313, "grad_norm": 0.10621260851621628, "learning_rate": 0.002, "loss": 2.3205, "step": 261750 }, { "epoch": 1.0118909557606965, "grad_norm": 0.1114802435040474, "learning_rate": 0.002, "loss": 2.3308, "step": 261760 }, { "epoch": 1.0119296129640798, "grad_norm": 0.1145864799618721, "learning_rate": 0.002, "loss": 2.3417, "step": 261770 }, { "epoch": 1.011968270167463, "grad_norm": 0.10577882826328278, "learning_rate": 0.002, "loss": 2.349, "step": 261780 }, { "epoch": 1.0120069273708463, "grad_norm": 0.10166753083467484, "learning_rate": 0.002, "loss": 2.3388, "step": 261790 }, { "epoch": 1.0120455845742296, "grad_norm": 0.1139196902513504, "learning_rate": 0.002, "loss": 2.3304, "step": 261800 }, { "epoch": 1.0120842417776128, "grad_norm": 0.0951429158449173, "learning_rate": 0.002, "loss": 2.3427, "step": 261810 }, { "epoch": 1.012122898980996, "grad_norm": 0.19654862582683563, "learning_rate": 0.002, "loss": 2.3547, "step": 261820 }, { "epoch": 1.0121615561843793, "grad_norm": 0.09855328500270844, "learning_rate": 0.002, "loss": 2.3356, "step": 261830 }, { "epoch": 1.0122002133877628, "grad_norm": 0.10782697051763535, "learning_rate": 0.002, "loss": 2.3358, "step": 261840 }, { "epoch": 1.012238870591146, "grad_norm": 0.11967076361179352, "learning_rate": 0.002, "loss": 2.3422, "step": 261850 }, { "epoch": 1.0122775277945293, "grad_norm": 0.089723140001297, "learning_rate": 0.002, "loss": 2.3264, "step": 261860 }, { "epoch": 1.0123161849979125, "grad_norm": 0.09120474755764008, "learning_rate": 0.002, "loss": 2.3292, "step": 261870 }, { "epoch": 1.0123548422012958, "grad_norm": 0.09240848571062088, "learning_rate": 0.002, "loss": 2.326, "step": 261880 }, { "epoch": 1.012393499404679, "grad_norm": 0.10871592164039612, "learning_rate": 0.002, "loss": 2.342, "step": 261890 }, { "epoch": 1.0124321566080623, "grad_norm": 0.11382023990154266, "learning_rate": 0.002, "loss": 2.3554, "step": 261900 }, { "epoch": 1.0124708138114455, "grad_norm": 0.1023283526301384, "learning_rate": 0.002, "loss": 2.338, "step": 261910 }, { "epoch": 1.0125094710148288, "grad_norm": 0.1051417887210846, "learning_rate": 0.002, "loss": 2.3436, "step": 261920 }, { "epoch": 1.0125481282182123, "grad_norm": 0.1110028624534607, "learning_rate": 0.002, "loss": 2.3289, "step": 261930 }, { "epoch": 1.0125867854215955, "grad_norm": 0.09721856564283371, "learning_rate": 0.002, "loss": 2.3265, "step": 261940 }, { "epoch": 1.0126254426249788, "grad_norm": 0.1076245903968811, "learning_rate": 0.002, "loss": 2.3366, "step": 261950 }, { "epoch": 1.012664099828362, "grad_norm": 0.09990669041872025, "learning_rate": 0.002, "loss": 2.3326, "step": 261960 }, { "epoch": 1.0127027570317453, "grad_norm": 0.14250290393829346, "learning_rate": 0.002, "loss": 2.3521, "step": 261970 }, { "epoch": 1.0127414142351285, "grad_norm": 0.10246880352497101, "learning_rate": 0.002, "loss": 2.327, "step": 261980 }, { "epoch": 1.0127800714385118, "grad_norm": 0.09257150441408157, "learning_rate": 0.002, "loss": 2.3513, "step": 261990 }, { "epoch": 1.012818728641895, "grad_norm": 0.10628538578748703, "learning_rate": 0.002, "loss": 2.3352, "step": 262000 }, { "epoch": 1.0128573858452785, "grad_norm": 0.08506090193986893, "learning_rate": 0.002, "loss": 2.3292, "step": 262010 }, { "epoch": 1.0128960430486618, "grad_norm": 0.1248772144317627, "learning_rate": 0.002, "loss": 2.3312, "step": 262020 }, { "epoch": 1.012934700252045, "grad_norm": 0.10065978765487671, "learning_rate": 0.002, "loss": 2.3472, "step": 262030 }, { "epoch": 1.0129733574554283, "grad_norm": 0.09866821765899658, "learning_rate": 0.002, "loss": 2.3479, "step": 262040 }, { "epoch": 1.0130120146588115, "grad_norm": 0.10731372237205505, "learning_rate": 0.002, "loss": 2.3346, "step": 262050 }, { "epoch": 1.0130506718621948, "grad_norm": 0.10754463076591492, "learning_rate": 0.002, "loss": 2.3427, "step": 262060 }, { "epoch": 1.013089329065578, "grad_norm": 0.09564726054668427, "learning_rate": 0.002, "loss": 2.3333, "step": 262070 }, { "epoch": 1.0131279862689613, "grad_norm": 0.10373189300298691, "learning_rate": 0.002, "loss": 2.3299, "step": 262080 }, { "epoch": 1.0131666434723445, "grad_norm": 0.09389154613018036, "learning_rate": 0.002, "loss": 2.34, "step": 262090 }, { "epoch": 1.013205300675728, "grad_norm": 0.1045018807053566, "learning_rate": 0.002, "loss": 2.3462, "step": 262100 }, { "epoch": 1.0132439578791113, "grad_norm": 0.10585897415876389, "learning_rate": 0.002, "loss": 2.341, "step": 262110 }, { "epoch": 1.0132826150824945, "grad_norm": 0.09949793666601181, "learning_rate": 0.002, "loss": 2.3515, "step": 262120 }, { "epoch": 1.0133212722858778, "grad_norm": 0.14061234891414642, "learning_rate": 0.002, "loss": 2.3303, "step": 262130 }, { "epoch": 1.013359929489261, "grad_norm": 0.0932084396481514, "learning_rate": 0.002, "loss": 2.3499, "step": 262140 }, { "epoch": 1.0133985866926443, "grad_norm": 0.12497919052839279, "learning_rate": 0.002, "loss": 2.3263, "step": 262150 }, { "epoch": 1.0134372438960275, "grad_norm": 0.11568643897771835, "learning_rate": 0.002, "loss": 2.3354, "step": 262160 }, { "epoch": 1.0134759010994108, "grad_norm": 0.10818599164485931, "learning_rate": 0.002, "loss": 2.3307, "step": 262170 }, { "epoch": 1.0135145583027942, "grad_norm": 0.09462061524391174, "learning_rate": 0.002, "loss": 2.3377, "step": 262180 }, { "epoch": 1.0135532155061775, "grad_norm": 0.1294683963060379, "learning_rate": 0.002, "loss": 2.3539, "step": 262190 }, { "epoch": 1.0135918727095607, "grad_norm": 0.0871722474694252, "learning_rate": 0.002, "loss": 2.328, "step": 262200 }, { "epoch": 1.013630529912944, "grad_norm": 0.10529077798128128, "learning_rate": 0.002, "loss": 2.3493, "step": 262210 }, { "epoch": 1.0136691871163273, "grad_norm": 0.10764256864786148, "learning_rate": 0.002, "loss": 2.3456, "step": 262220 }, { "epoch": 1.0137078443197105, "grad_norm": 0.11791141331195831, "learning_rate": 0.002, "loss": 2.3359, "step": 262230 }, { "epoch": 1.0137465015230938, "grad_norm": 0.13781200349330902, "learning_rate": 0.002, "loss": 2.3457, "step": 262240 }, { "epoch": 1.013785158726477, "grad_norm": 0.13462291657924652, "learning_rate": 0.002, "loss": 2.3383, "step": 262250 }, { "epoch": 1.0138238159298603, "grad_norm": 0.0965033695101738, "learning_rate": 0.002, "loss": 2.3213, "step": 262260 }, { "epoch": 1.0138624731332437, "grad_norm": 0.08449462056159973, "learning_rate": 0.002, "loss": 2.3388, "step": 262270 }, { "epoch": 1.013901130336627, "grad_norm": 0.10011344403028488, "learning_rate": 0.002, "loss": 2.3378, "step": 262280 }, { "epoch": 1.0139397875400102, "grad_norm": 0.1139802634716034, "learning_rate": 0.002, "loss": 2.3448, "step": 262290 }, { "epoch": 1.0139784447433935, "grad_norm": 0.11183168739080429, "learning_rate": 0.002, "loss": 2.3406, "step": 262300 }, { "epoch": 1.0140171019467767, "grad_norm": 0.09366361796855927, "learning_rate": 0.002, "loss": 2.345, "step": 262310 }, { "epoch": 1.01405575915016, "grad_norm": 0.11254223436117172, "learning_rate": 0.002, "loss": 2.3358, "step": 262320 }, { "epoch": 1.0140944163535432, "grad_norm": 0.10272639989852905, "learning_rate": 0.002, "loss": 2.3437, "step": 262330 }, { "epoch": 1.0141330735569265, "grad_norm": 0.11101660132408142, "learning_rate": 0.002, "loss": 2.3429, "step": 262340 }, { "epoch": 1.01417173076031, "grad_norm": 0.1064949706196785, "learning_rate": 0.002, "loss": 2.3312, "step": 262350 }, { "epoch": 1.0142103879636932, "grad_norm": 0.12453845888376236, "learning_rate": 0.002, "loss": 2.3291, "step": 262360 }, { "epoch": 1.0142490451670765, "grad_norm": 0.129965141415596, "learning_rate": 0.002, "loss": 2.3468, "step": 262370 }, { "epoch": 1.0142877023704597, "grad_norm": 0.10267557948827744, "learning_rate": 0.002, "loss": 2.3228, "step": 262380 }, { "epoch": 1.014326359573843, "grad_norm": 0.10027313977479935, "learning_rate": 0.002, "loss": 2.353, "step": 262390 }, { "epoch": 1.0143650167772262, "grad_norm": 0.09122640639543533, "learning_rate": 0.002, "loss": 2.3421, "step": 262400 }, { "epoch": 1.0144036739806095, "grad_norm": 0.11867644637823105, "learning_rate": 0.002, "loss": 2.3345, "step": 262410 }, { "epoch": 1.0144423311839927, "grad_norm": 0.11216876655817032, "learning_rate": 0.002, "loss": 2.3311, "step": 262420 }, { "epoch": 1.0144809883873762, "grad_norm": 0.10928855091333389, "learning_rate": 0.002, "loss": 2.3237, "step": 262430 }, { "epoch": 1.0145196455907595, "grad_norm": 0.09858279675245285, "learning_rate": 0.002, "loss": 2.3473, "step": 262440 }, { "epoch": 1.0145583027941427, "grad_norm": 0.09995760768651962, "learning_rate": 0.002, "loss": 2.333, "step": 262450 }, { "epoch": 1.014596959997526, "grad_norm": 0.12695086002349854, "learning_rate": 0.002, "loss": 2.3367, "step": 262460 }, { "epoch": 1.0146356172009092, "grad_norm": 0.08780010044574738, "learning_rate": 0.002, "loss": 2.3448, "step": 262470 }, { "epoch": 1.0146742744042925, "grad_norm": 0.10888690501451492, "learning_rate": 0.002, "loss": 2.335, "step": 262480 }, { "epoch": 1.0147129316076757, "grad_norm": 0.0955919399857521, "learning_rate": 0.002, "loss": 2.3338, "step": 262490 }, { "epoch": 1.014751588811059, "grad_norm": 0.10459954291582108, "learning_rate": 0.002, "loss": 2.3344, "step": 262500 }, { "epoch": 1.0147902460144422, "grad_norm": 0.10299643874168396, "learning_rate": 0.002, "loss": 2.3213, "step": 262510 }, { "epoch": 1.0148289032178257, "grad_norm": 0.10566985607147217, "learning_rate": 0.002, "loss": 2.3249, "step": 262520 }, { "epoch": 1.014867560421209, "grad_norm": 0.10248414427042007, "learning_rate": 0.002, "loss": 2.3281, "step": 262530 }, { "epoch": 1.0149062176245922, "grad_norm": 0.09281566739082336, "learning_rate": 0.002, "loss": 2.3469, "step": 262540 }, { "epoch": 1.0149448748279755, "grad_norm": 0.10866890102624893, "learning_rate": 0.002, "loss": 2.3335, "step": 262550 }, { "epoch": 1.0149835320313587, "grad_norm": 0.10817757993936539, "learning_rate": 0.002, "loss": 2.3351, "step": 262560 }, { "epoch": 1.015022189234742, "grad_norm": 0.10576806217432022, "learning_rate": 0.002, "loss": 2.346, "step": 262570 }, { "epoch": 1.0150608464381252, "grad_norm": 0.10843439400196075, "learning_rate": 0.002, "loss": 2.3492, "step": 262580 }, { "epoch": 1.0150995036415085, "grad_norm": 0.09107603132724762, "learning_rate": 0.002, "loss": 2.3299, "step": 262590 }, { "epoch": 1.015138160844892, "grad_norm": 0.10346271842718124, "learning_rate": 0.002, "loss": 2.3333, "step": 262600 }, { "epoch": 1.0151768180482752, "grad_norm": 0.10407814383506775, "learning_rate": 0.002, "loss": 2.337, "step": 262610 }, { "epoch": 1.0152154752516585, "grad_norm": 0.09561081975698471, "learning_rate": 0.002, "loss": 2.3452, "step": 262620 }, { "epoch": 1.0152541324550417, "grad_norm": 0.09386394917964935, "learning_rate": 0.002, "loss": 2.341, "step": 262630 }, { "epoch": 1.015292789658425, "grad_norm": 0.13108910620212555, "learning_rate": 0.002, "loss": 2.3406, "step": 262640 }, { "epoch": 1.0153314468618082, "grad_norm": 0.1079326868057251, "learning_rate": 0.002, "loss": 2.3387, "step": 262650 }, { "epoch": 1.0153701040651915, "grad_norm": 0.12318453937768936, "learning_rate": 0.002, "loss": 2.3442, "step": 262660 }, { "epoch": 1.0154087612685747, "grad_norm": 0.10267659276723862, "learning_rate": 0.002, "loss": 2.3379, "step": 262670 }, { "epoch": 1.015447418471958, "grad_norm": 0.10497715324163437, "learning_rate": 0.002, "loss": 2.358, "step": 262680 }, { "epoch": 1.0154860756753414, "grad_norm": 0.1006128340959549, "learning_rate": 0.002, "loss": 2.3326, "step": 262690 }, { "epoch": 1.0155247328787247, "grad_norm": 0.10568059980869293, "learning_rate": 0.002, "loss": 2.3461, "step": 262700 }, { "epoch": 1.015563390082108, "grad_norm": 0.09955970197916031, "learning_rate": 0.002, "loss": 2.3254, "step": 262710 }, { "epoch": 1.0156020472854912, "grad_norm": 0.10910652577877045, "learning_rate": 0.002, "loss": 2.3377, "step": 262720 }, { "epoch": 1.0156407044888744, "grad_norm": 0.11949540674686432, "learning_rate": 0.002, "loss": 2.3278, "step": 262730 }, { "epoch": 1.0156793616922577, "grad_norm": 0.1253557801246643, "learning_rate": 0.002, "loss": 2.3258, "step": 262740 }, { "epoch": 1.015718018895641, "grad_norm": 0.11321611702442169, "learning_rate": 0.002, "loss": 2.3453, "step": 262750 }, { "epoch": 1.0157566760990242, "grad_norm": 0.0908082127571106, "learning_rate": 0.002, "loss": 2.3261, "step": 262760 }, { "epoch": 1.0157953333024077, "grad_norm": 0.1201312318444252, "learning_rate": 0.002, "loss": 2.3471, "step": 262770 }, { "epoch": 1.015833990505791, "grad_norm": 0.10379394143819809, "learning_rate": 0.002, "loss": 2.3521, "step": 262780 }, { "epoch": 1.0158726477091742, "grad_norm": 0.10312970727682114, "learning_rate": 0.002, "loss": 2.3436, "step": 262790 }, { "epoch": 1.0159113049125574, "grad_norm": 0.1026928648352623, "learning_rate": 0.002, "loss": 2.3336, "step": 262800 }, { "epoch": 1.0159499621159407, "grad_norm": 0.10575632005929947, "learning_rate": 0.002, "loss": 2.3277, "step": 262810 }, { "epoch": 1.015988619319324, "grad_norm": 0.11193332076072693, "learning_rate": 0.002, "loss": 2.3396, "step": 262820 }, { "epoch": 1.0160272765227072, "grad_norm": 0.11323326081037521, "learning_rate": 0.002, "loss": 2.3255, "step": 262830 }, { "epoch": 1.0160659337260904, "grad_norm": 0.13184423744678497, "learning_rate": 0.002, "loss": 2.3588, "step": 262840 }, { "epoch": 1.0161045909294737, "grad_norm": 0.10372630506753922, "learning_rate": 0.002, "loss": 2.3502, "step": 262850 }, { "epoch": 1.0161432481328572, "grad_norm": 0.08913901448249817, "learning_rate": 0.002, "loss": 2.3301, "step": 262860 }, { "epoch": 1.0161819053362404, "grad_norm": 0.10794923454523087, "learning_rate": 0.002, "loss": 2.3338, "step": 262870 }, { "epoch": 1.0162205625396237, "grad_norm": 0.09986615180969238, "learning_rate": 0.002, "loss": 2.3382, "step": 262880 }, { "epoch": 1.016259219743007, "grad_norm": 0.10644876956939697, "learning_rate": 0.002, "loss": 2.3391, "step": 262890 }, { "epoch": 1.0162978769463902, "grad_norm": 0.11530308425426483, "learning_rate": 0.002, "loss": 2.3456, "step": 262900 }, { "epoch": 1.0163365341497734, "grad_norm": 0.1062793955206871, "learning_rate": 0.002, "loss": 2.3369, "step": 262910 }, { "epoch": 1.0163751913531567, "grad_norm": 0.10342157632112503, "learning_rate": 0.002, "loss": 2.3407, "step": 262920 }, { "epoch": 1.01641384855654, "grad_norm": 0.09291832149028778, "learning_rate": 0.002, "loss": 2.3504, "step": 262930 }, { "epoch": 1.0164525057599234, "grad_norm": 0.09074714034795761, "learning_rate": 0.002, "loss": 2.3306, "step": 262940 }, { "epoch": 1.0164911629633067, "grad_norm": 0.1163024976849556, "learning_rate": 0.002, "loss": 2.3458, "step": 262950 }, { "epoch": 1.01652982016669, "grad_norm": 0.12023130804300308, "learning_rate": 0.002, "loss": 2.3446, "step": 262960 }, { "epoch": 1.0165684773700732, "grad_norm": 0.12284992635250092, "learning_rate": 0.002, "loss": 2.3349, "step": 262970 }, { "epoch": 1.0166071345734564, "grad_norm": 0.09862682223320007, "learning_rate": 0.002, "loss": 2.3464, "step": 262980 }, { "epoch": 1.0166457917768397, "grad_norm": 0.09955253452062607, "learning_rate": 0.002, "loss": 2.341, "step": 262990 }, { "epoch": 1.016684448980223, "grad_norm": 0.1211884468793869, "learning_rate": 0.002, "loss": 2.3467, "step": 263000 }, { "epoch": 1.0167231061836062, "grad_norm": 0.11086461693048477, "learning_rate": 0.002, "loss": 2.3426, "step": 263010 }, { "epoch": 1.0167617633869894, "grad_norm": 0.09472446143627167, "learning_rate": 0.002, "loss": 2.3481, "step": 263020 }, { "epoch": 1.016800420590373, "grad_norm": 0.11582635343074799, "learning_rate": 0.002, "loss": 2.3453, "step": 263030 }, { "epoch": 1.0168390777937562, "grad_norm": 0.09361955523490906, "learning_rate": 0.002, "loss": 2.3407, "step": 263040 }, { "epoch": 1.0168777349971394, "grad_norm": 0.11129572242498398, "learning_rate": 0.002, "loss": 2.3362, "step": 263050 }, { "epoch": 1.0169163922005227, "grad_norm": 0.10749483853578568, "learning_rate": 0.002, "loss": 2.3297, "step": 263060 }, { "epoch": 1.016955049403906, "grad_norm": 0.10947995632886887, "learning_rate": 0.002, "loss": 2.339, "step": 263070 }, { "epoch": 1.0169937066072892, "grad_norm": 0.09116199612617493, "learning_rate": 0.002, "loss": 2.3445, "step": 263080 }, { "epoch": 1.0170323638106724, "grad_norm": 0.11516207456588745, "learning_rate": 0.002, "loss": 2.3229, "step": 263090 }, { "epoch": 1.0170710210140557, "grad_norm": 0.10246528685092926, "learning_rate": 0.002, "loss": 2.3286, "step": 263100 }, { "epoch": 1.0171096782174391, "grad_norm": 0.10460419207811356, "learning_rate": 0.002, "loss": 2.3457, "step": 263110 }, { "epoch": 1.0171483354208224, "grad_norm": 0.10269016772508621, "learning_rate": 0.002, "loss": 2.3439, "step": 263120 }, { "epoch": 1.0171869926242056, "grad_norm": 0.0924811139702797, "learning_rate": 0.002, "loss": 2.3284, "step": 263130 }, { "epoch": 1.017225649827589, "grad_norm": 0.10778999328613281, "learning_rate": 0.002, "loss": 2.3363, "step": 263140 }, { "epoch": 1.0172643070309721, "grad_norm": 0.10321035981178284, "learning_rate": 0.002, "loss": 2.3301, "step": 263150 }, { "epoch": 1.0173029642343554, "grad_norm": 0.10112766176462173, "learning_rate": 0.002, "loss": 2.3263, "step": 263160 }, { "epoch": 1.0173416214377387, "grad_norm": 0.08981072902679443, "learning_rate": 0.002, "loss": 2.3401, "step": 263170 }, { "epoch": 1.017380278641122, "grad_norm": 0.1108328253030777, "learning_rate": 0.002, "loss": 2.3335, "step": 263180 }, { "epoch": 1.0174189358445052, "grad_norm": 0.17304444313049316, "learning_rate": 0.002, "loss": 2.3458, "step": 263190 }, { "epoch": 1.0174575930478886, "grad_norm": 0.13185708224773407, "learning_rate": 0.002, "loss": 2.3421, "step": 263200 }, { "epoch": 1.0174962502512719, "grad_norm": 0.12104866653680801, "learning_rate": 0.002, "loss": 2.334, "step": 263210 }, { "epoch": 1.0175349074546551, "grad_norm": 0.10302011668682098, "learning_rate": 0.002, "loss": 2.3496, "step": 263220 }, { "epoch": 1.0175735646580384, "grad_norm": 0.09544021636247635, "learning_rate": 0.002, "loss": 2.3316, "step": 263230 }, { "epoch": 1.0176122218614216, "grad_norm": 0.10438983142375946, "learning_rate": 0.002, "loss": 2.3272, "step": 263240 }, { "epoch": 1.017650879064805, "grad_norm": 0.341610312461853, "learning_rate": 0.002, "loss": 2.343, "step": 263250 }, { "epoch": 1.0176895362681881, "grad_norm": 0.2851675748825073, "learning_rate": 0.002, "loss": 2.3384, "step": 263260 }, { "epoch": 1.0177281934715714, "grad_norm": 0.40568825602531433, "learning_rate": 0.002, "loss": 2.3302, "step": 263270 }, { "epoch": 1.0177668506749549, "grad_norm": 0.10089606791734695, "learning_rate": 0.002, "loss": 2.3417, "step": 263280 }, { "epoch": 1.0178055078783381, "grad_norm": 0.10405469685792923, "learning_rate": 0.002, "loss": 2.3495, "step": 263290 }, { "epoch": 1.0178441650817214, "grad_norm": 0.1027701199054718, "learning_rate": 0.002, "loss": 2.3377, "step": 263300 }, { "epoch": 1.0178828222851046, "grad_norm": 0.09409894049167633, "learning_rate": 0.002, "loss": 2.3381, "step": 263310 }, { "epoch": 1.0179214794884879, "grad_norm": 0.12788711488246918, "learning_rate": 0.002, "loss": 2.3453, "step": 263320 }, { "epoch": 1.0179601366918711, "grad_norm": 0.12433791160583496, "learning_rate": 0.002, "loss": 2.3244, "step": 263330 }, { "epoch": 1.0179987938952544, "grad_norm": 0.0981535017490387, "learning_rate": 0.002, "loss": 2.3255, "step": 263340 }, { "epoch": 1.0180374510986376, "grad_norm": 0.1006656065583229, "learning_rate": 0.002, "loss": 2.3309, "step": 263350 }, { "epoch": 1.0180761083020209, "grad_norm": 0.12272592633962631, "learning_rate": 0.002, "loss": 2.3181, "step": 263360 }, { "epoch": 1.0181147655054044, "grad_norm": 0.10499448329210281, "learning_rate": 0.002, "loss": 2.3477, "step": 263370 }, { "epoch": 1.0181534227087876, "grad_norm": 0.09188871830701828, "learning_rate": 0.002, "loss": 2.3425, "step": 263380 }, { "epoch": 1.0181920799121709, "grad_norm": 0.10030457377433777, "learning_rate": 0.002, "loss": 2.3376, "step": 263390 }, { "epoch": 1.0182307371155541, "grad_norm": 0.11879902333021164, "learning_rate": 0.002, "loss": 2.3422, "step": 263400 }, { "epoch": 1.0182693943189374, "grad_norm": 0.09785284847021103, "learning_rate": 0.002, "loss": 2.3535, "step": 263410 }, { "epoch": 1.0183080515223206, "grad_norm": 0.10970498621463776, "learning_rate": 0.002, "loss": 2.3496, "step": 263420 }, { "epoch": 1.0183467087257039, "grad_norm": 0.10424668341875076, "learning_rate": 0.002, "loss": 2.3294, "step": 263430 }, { "epoch": 1.0183853659290871, "grad_norm": 0.09910289198160172, "learning_rate": 0.002, "loss": 2.3554, "step": 263440 }, { "epoch": 1.0184240231324706, "grad_norm": 0.09556056559085846, "learning_rate": 0.002, "loss": 2.3448, "step": 263450 }, { "epoch": 1.0184626803358539, "grad_norm": 0.09163806587457657, "learning_rate": 0.002, "loss": 2.3388, "step": 263460 }, { "epoch": 1.018501337539237, "grad_norm": 0.11754380911588669, "learning_rate": 0.002, "loss": 2.3355, "step": 263470 }, { "epoch": 1.0185399947426204, "grad_norm": 0.10090960562229156, "learning_rate": 0.002, "loss": 2.3419, "step": 263480 }, { "epoch": 1.0185786519460036, "grad_norm": 0.10696564614772797, "learning_rate": 0.002, "loss": 2.3503, "step": 263490 }, { "epoch": 1.0186173091493869, "grad_norm": 0.11893752962350845, "learning_rate": 0.002, "loss": 2.3482, "step": 263500 }, { "epoch": 1.0186559663527701, "grad_norm": 0.12111511826515198, "learning_rate": 0.002, "loss": 2.3446, "step": 263510 }, { "epoch": 1.0186946235561534, "grad_norm": 0.10253813862800598, "learning_rate": 0.002, "loss": 2.3393, "step": 263520 }, { "epoch": 1.0187332807595368, "grad_norm": 0.09811771661043167, "learning_rate": 0.002, "loss": 2.3524, "step": 263530 }, { "epoch": 1.01877193796292, "grad_norm": 0.09001462906599045, "learning_rate": 0.002, "loss": 2.3262, "step": 263540 }, { "epoch": 1.0188105951663033, "grad_norm": 0.11998558044433594, "learning_rate": 0.002, "loss": 2.3263, "step": 263550 }, { "epoch": 1.0188492523696866, "grad_norm": 0.11388974636793137, "learning_rate": 0.002, "loss": 2.3253, "step": 263560 }, { "epoch": 1.0188879095730698, "grad_norm": 0.10066687315702438, "learning_rate": 0.002, "loss": 2.3468, "step": 263570 }, { "epoch": 1.018926566776453, "grad_norm": 0.09237883985042572, "learning_rate": 0.002, "loss": 2.3387, "step": 263580 }, { "epoch": 1.0189652239798364, "grad_norm": 0.1033594161272049, "learning_rate": 0.002, "loss": 2.3381, "step": 263590 }, { "epoch": 1.0190038811832196, "grad_norm": 0.12771809101104736, "learning_rate": 0.002, "loss": 2.3289, "step": 263600 }, { "epoch": 1.0190425383866029, "grad_norm": 0.10033108294010162, "learning_rate": 0.002, "loss": 2.3369, "step": 263610 }, { "epoch": 1.0190811955899863, "grad_norm": 0.09493099898099899, "learning_rate": 0.002, "loss": 2.3469, "step": 263620 }, { "epoch": 1.0191198527933696, "grad_norm": 0.10437005013227463, "learning_rate": 0.002, "loss": 2.3272, "step": 263630 }, { "epoch": 1.0191585099967528, "grad_norm": 0.10589440166950226, "learning_rate": 0.002, "loss": 2.3334, "step": 263640 }, { "epoch": 1.019197167200136, "grad_norm": 0.12936919927597046, "learning_rate": 0.002, "loss": 2.3383, "step": 263650 }, { "epoch": 1.0192358244035193, "grad_norm": 0.09363681823015213, "learning_rate": 0.002, "loss": 2.3483, "step": 263660 }, { "epoch": 1.0192744816069026, "grad_norm": 0.122968889772892, "learning_rate": 0.002, "loss": 2.3386, "step": 263670 }, { "epoch": 1.0193131388102858, "grad_norm": 0.09942426532506943, "learning_rate": 0.002, "loss": 2.3265, "step": 263680 }, { "epoch": 1.019351796013669, "grad_norm": 0.09376782178878784, "learning_rate": 0.002, "loss": 2.3411, "step": 263690 }, { "epoch": 1.0193904532170526, "grad_norm": 0.08810889720916748, "learning_rate": 0.002, "loss": 2.3291, "step": 263700 }, { "epoch": 1.0194291104204358, "grad_norm": 0.11189023405313492, "learning_rate": 0.002, "loss": 2.3359, "step": 263710 }, { "epoch": 1.019467767623819, "grad_norm": 0.10335662215948105, "learning_rate": 0.002, "loss": 2.3429, "step": 263720 }, { "epoch": 1.0195064248272023, "grad_norm": 0.10368465632200241, "learning_rate": 0.002, "loss": 2.3407, "step": 263730 }, { "epoch": 1.0195450820305856, "grad_norm": 0.10068806260824203, "learning_rate": 0.002, "loss": 2.3446, "step": 263740 }, { "epoch": 1.0195837392339688, "grad_norm": 0.10646512359380722, "learning_rate": 0.002, "loss": 2.3308, "step": 263750 }, { "epoch": 1.019622396437352, "grad_norm": 0.09691212326288223, "learning_rate": 0.002, "loss": 2.3447, "step": 263760 }, { "epoch": 1.0196610536407353, "grad_norm": 0.11735524982213974, "learning_rate": 0.002, "loss": 2.3519, "step": 263770 }, { "epoch": 1.0196997108441186, "grad_norm": 0.09249353408813477, "learning_rate": 0.002, "loss": 2.3494, "step": 263780 }, { "epoch": 1.019738368047502, "grad_norm": 0.10997557640075684, "learning_rate": 0.002, "loss": 2.3356, "step": 263790 }, { "epoch": 1.0197770252508853, "grad_norm": 0.11306982487440109, "learning_rate": 0.002, "loss": 2.3411, "step": 263800 }, { "epoch": 1.0198156824542686, "grad_norm": 0.11018241196870804, "learning_rate": 0.002, "loss": 2.3477, "step": 263810 }, { "epoch": 1.0198543396576518, "grad_norm": 0.12374841421842575, "learning_rate": 0.002, "loss": 2.3394, "step": 263820 }, { "epoch": 1.019892996861035, "grad_norm": 0.10849356651306152, "learning_rate": 0.002, "loss": 2.3384, "step": 263830 }, { "epoch": 1.0199316540644183, "grad_norm": 0.09676603972911835, "learning_rate": 0.002, "loss": 2.3328, "step": 263840 }, { "epoch": 1.0199703112678016, "grad_norm": 0.11422029882669449, "learning_rate": 0.002, "loss": 2.3294, "step": 263850 }, { "epoch": 1.0200089684711848, "grad_norm": 0.10637599229812622, "learning_rate": 0.002, "loss": 2.334, "step": 263860 }, { "epoch": 1.0200476256745683, "grad_norm": 0.10892908275127411, "learning_rate": 0.002, "loss": 2.3478, "step": 263870 }, { "epoch": 1.0200862828779516, "grad_norm": 0.0975891724228859, "learning_rate": 0.002, "loss": 2.3364, "step": 263880 }, { "epoch": 1.0201249400813348, "grad_norm": 0.10563914477825165, "learning_rate": 0.002, "loss": 2.3348, "step": 263890 }, { "epoch": 1.020163597284718, "grad_norm": 0.13005097210407257, "learning_rate": 0.002, "loss": 2.3429, "step": 263900 }, { "epoch": 1.0202022544881013, "grad_norm": 0.10534238070249557, "learning_rate": 0.002, "loss": 2.3524, "step": 263910 }, { "epoch": 1.0202409116914846, "grad_norm": 0.11824945360422134, "learning_rate": 0.002, "loss": 2.3413, "step": 263920 }, { "epoch": 1.0202795688948678, "grad_norm": 0.1261029839515686, "learning_rate": 0.002, "loss": 2.3285, "step": 263930 }, { "epoch": 1.020318226098251, "grad_norm": 0.10610140115022659, "learning_rate": 0.002, "loss": 2.3405, "step": 263940 }, { "epoch": 1.0203568833016343, "grad_norm": 0.09728217124938965, "learning_rate": 0.002, "loss": 2.3391, "step": 263950 }, { "epoch": 1.0203955405050178, "grad_norm": 0.11157053709030151, "learning_rate": 0.002, "loss": 2.3224, "step": 263960 }, { "epoch": 1.020434197708401, "grad_norm": 0.10309412330389023, "learning_rate": 0.002, "loss": 2.3403, "step": 263970 }, { "epoch": 1.0204728549117843, "grad_norm": 0.1188223734498024, "learning_rate": 0.002, "loss": 2.3363, "step": 263980 }, { "epoch": 1.0205115121151676, "grad_norm": 0.10410062223672867, "learning_rate": 0.002, "loss": 2.3285, "step": 263990 }, { "epoch": 1.0205501693185508, "grad_norm": 0.12492463737726212, "learning_rate": 0.002, "loss": 2.345, "step": 264000 }, { "epoch": 1.020588826521934, "grad_norm": 0.12025908380746841, "learning_rate": 0.002, "loss": 2.3467, "step": 264010 }, { "epoch": 1.0206274837253173, "grad_norm": 0.10721295326948166, "learning_rate": 0.002, "loss": 2.3291, "step": 264020 }, { "epoch": 1.0206661409287006, "grad_norm": 0.0982886552810669, "learning_rate": 0.002, "loss": 2.3511, "step": 264030 }, { "epoch": 1.020704798132084, "grad_norm": 0.091331847012043, "learning_rate": 0.002, "loss": 2.3374, "step": 264040 }, { "epoch": 1.0207434553354673, "grad_norm": 0.08860914409160614, "learning_rate": 0.002, "loss": 2.3398, "step": 264050 }, { "epoch": 1.0207821125388505, "grad_norm": 0.10097967088222504, "learning_rate": 0.002, "loss": 2.3142, "step": 264060 }, { "epoch": 1.0208207697422338, "grad_norm": 0.09905195981264114, "learning_rate": 0.002, "loss": 2.3354, "step": 264070 }, { "epoch": 1.020859426945617, "grad_norm": 0.09901610761880875, "learning_rate": 0.002, "loss": 2.3518, "step": 264080 }, { "epoch": 1.0208980841490003, "grad_norm": 0.11977200210094452, "learning_rate": 0.002, "loss": 2.3406, "step": 264090 }, { "epoch": 1.0209367413523835, "grad_norm": 0.10924248397350311, "learning_rate": 0.002, "loss": 2.3407, "step": 264100 }, { "epoch": 1.0209753985557668, "grad_norm": 0.12424816191196442, "learning_rate": 0.002, "loss": 2.3395, "step": 264110 }, { "epoch": 1.02101405575915, "grad_norm": 0.10222361236810684, "learning_rate": 0.002, "loss": 2.3374, "step": 264120 }, { "epoch": 1.0210527129625335, "grad_norm": 0.09633691608905792, "learning_rate": 0.002, "loss": 2.3238, "step": 264130 }, { "epoch": 1.0210913701659168, "grad_norm": 0.1274564415216446, "learning_rate": 0.002, "loss": 2.3423, "step": 264140 }, { "epoch": 1.0211300273693, "grad_norm": 0.09376756846904755, "learning_rate": 0.002, "loss": 2.3287, "step": 264150 }, { "epoch": 1.0211686845726833, "grad_norm": 0.10471328347921371, "learning_rate": 0.002, "loss": 2.3337, "step": 264160 }, { "epoch": 1.0212073417760665, "grad_norm": 0.1049564778804779, "learning_rate": 0.002, "loss": 2.3411, "step": 264170 }, { "epoch": 1.0212459989794498, "grad_norm": 0.12720157206058502, "learning_rate": 0.002, "loss": 2.3405, "step": 264180 }, { "epoch": 1.021284656182833, "grad_norm": 0.10102929174900055, "learning_rate": 0.002, "loss": 2.3458, "step": 264190 }, { "epoch": 1.0213233133862163, "grad_norm": 0.09525764733552933, "learning_rate": 0.002, "loss": 2.33, "step": 264200 }, { "epoch": 1.0213619705895998, "grad_norm": 0.10542795062065125, "learning_rate": 0.002, "loss": 2.3539, "step": 264210 }, { "epoch": 1.021400627792983, "grad_norm": 0.09321821480989456, "learning_rate": 0.002, "loss": 2.3307, "step": 264220 }, { "epoch": 1.0214392849963663, "grad_norm": 0.09394440799951553, "learning_rate": 0.002, "loss": 2.326, "step": 264230 }, { "epoch": 1.0214779421997495, "grad_norm": 0.10297603905200958, "learning_rate": 0.002, "loss": 2.3297, "step": 264240 }, { "epoch": 1.0215165994031328, "grad_norm": 0.11685995757579803, "learning_rate": 0.002, "loss": 2.3469, "step": 264250 }, { "epoch": 1.021555256606516, "grad_norm": 0.09865585714578629, "learning_rate": 0.002, "loss": 2.342, "step": 264260 }, { "epoch": 1.0215939138098993, "grad_norm": 0.09554792940616608, "learning_rate": 0.002, "loss": 2.3329, "step": 264270 }, { "epoch": 1.0216325710132825, "grad_norm": 0.10560063272714615, "learning_rate": 0.002, "loss": 2.3473, "step": 264280 }, { "epoch": 1.021671228216666, "grad_norm": 0.13222475349903107, "learning_rate": 0.002, "loss": 2.3372, "step": 264290 }, { "epoch": 1.0217098854200493, "grad_norm": 0.09948401898145676, "learning_rate": 0.002, "loss": 2.335, "step": 264300 }, { "epoch": 1.0217485426234325, "grad_norm": 0.09130743891000748, "learning_rate": 0.002, "loss": 2.3422, "step": 264310 }, { "epoch": 1.0217871998268158, "grad_norm": 0.12201028317213058, "learning_rate": 0.002, "loss": 2.3383, "step": 264320 }, { "epoch": 1.021825857030199, "grad_norm": 0.0886576846241951, "learning_rate": 0.002, "loss": 2.3322, "step": 264330 }, { "epoch": 1.0218645142335823, "grad_norm": 0.10200691223144531, "learning_rate": 0.002, "loss": 2.3263, "step": 264340 }, { "epoch": 1.0219031714369655, "grad_norm": 0.10952626913785934, "learning_rate": 0.002, "loss": 2.3375, "step": 264350 }, { "epoch": 1.0219418286403488, "grad_norm": 0.12101206928491592, "learning_rate": 0.002, "loss": 2.3353, "step": 264360 }, { "epoch": 1.021980485843732, "grad_norm": 0.1118728369474411, "learning_rate": 0.002, "loss": 2.3446, "step": 264370 }, { "epoch": 1.0220191430471155, "grad_norm": 0.08517512679100037, "learning_rate": 0.002, "loss": 2.3429, "step": 264380 }, { "epoch": 1.0220578002504987, "grad_norm": 0.11177657544612885, "learning_rate": 0.002, "loss": 2.3236, "step": 264390 }, { "epoch": 1.022096457453882, "grad_norm": 0.09982944279909134, "learning_rate": 0.002, "loss": 2.346, "step": 264400 }, { "epoch": 1.0221351146572653, "grad_norm": 0.09357168525457382, "learning_rate": 0.002, "loss": 2.3485, "step": 264410 }, { "epoch": 1.0221737718606485, "grad_norm": 0.11061622947454453, "learning_rate": 0.002, "loss": 2.3319, "step": 264420 }, { "epoch": 1.0222124290640318, "grad_norm": 0.09883379191160202, "learning_rate": 0.002, "loss": 2.3409, "step": 264430 }, { "epoch": 1.022251086267415, "grad_norm": 0.09657022356987, "learning_rate": 0.002, "loss": 2.3355, "step": 264440 }, { "epoch": 1.0222897434707983, "grad_norm": 0.08338421583175659, "learning_rate": 0.002, "loss": 2.3398, "step": 264450 }, { "epoch": 1.0223284006741817, "grad_norm": 0.15151934325695038, "learning_rate": 0.002, "loss": 2.3351, "step": 264460 }, { "epoch": 1.022367057877565, "grad_norm": 0.11291121691465378, "learning_rate": 0.002, "loss": 2.3397, "step": 264470 }, { "epoch": 1.0224057150809482, "grad_norm": 0.09912551939487457, "learning_rate": 0.002, "loss": 2.3272, "step": 264480 }, { "epoch": 1.0224443722843315, "grad_norm": 0.1053970605134964, "learning_rate": 0.002, "loss": 2.3126, "step": 264490 }, { "epoch": 1.0224830294877147, "grad_norm": 0.11392966657876968, "learning_rate": 0.002, "loss": 2.3486, "step": 264500 }, { "epoch": 1.022521686691098, "grad_norm": 0.10310008376836777, "learning_rate": 0.002, "loss": 2.3343, "step": 264510 }, { "epoch": 1.0225603438944812, "grad_norm": 0.1299499273300171, "learning_rate": 0.002, "loss": 2.3348, "step": 264520 }, { "epoch": 1.0225990010978645, "grad_norm": 0.08739864081144333, "learning_rate": 0.002, "loss": 2.3485, "step": 264530 }, { "epoch": 1.0226376583012478, "grad_norm": 0.11028322577476501, "learning_rate": 0.002, "loss": 2.3353, "step": 264540 }, { "epoch": 1.0226763155046312, "grad_norm": 0.0937427282333374, "learning_rate": 0.002, "loss": 2.3652, "step": 264550 }, { "epoch": 1.0227149727080145, "grad_norm": 0.11551933735609055, "learning_rate": 0.002, "loss": 2.3248, "step": 264560 }, { "epoch": 1.0227536299113977, "grad_norm": 0.12646719813346863, "learning_rate": 0.002, "loss": 2.3308, "step": 264570 }, { "epoch": 1.022792287114781, "grad_norm": 0.09259982407093048, "learning_rate": 0.002, "loss": 2.337, "step": 264580 }, { "epoch": 1.0228309443181642, "grad_norm": 0.19581899046897888, "learning_rate": 0.002, "loss": 2.3349, "step": 264590 }, { "epoch": 1.0228696015215475, "grad_norm": 0.09492383897304535, "learning_rate": 0.002, "loss": 2.3423, "step": 264600 }, { "epoch": 1.0229082587249307, "grad_norm": 0.10611309111118317, "learning_rate": 0.002, "loss": 2.3336, "step": 264610 }, { "epoch": 1.022946915928314, "grad_norm": 0.09840042144060135, "learning_rate": 0.002, "loss": 2.3358, "step": 264620 }, { "epoch": 1.0229855731316975, "grad_norm": 0.10329700261354446, "learning_rate": 0.002, "loss": 2.3372, "step": 264630 }, { "epoch": 1.0230242303350807, "grad_norm": 0.12647895514965057, "learning_rate": 0.002, "loss": 2.3343, "step": 264640 }, { "epoch": 1.023062887538464, "grad_norm": 0.09441398084163666, "learning_rate": 0.002, "loss": 2.337, "step": 264650 }, { "epoch": 1.0231015447418472, "grad_norm": 0.08766429871320724, "learning_rate": 0.002, "loss": 2.3471, "step": 264660 }, { "epoch": 1.0231402019452305, "grad_norm": 0.3329355716705322, "learning_rate": 0.002, "loss": 2.3434, "step": 264670 }, { "epoch": 1.0231788591486137, "grad_norm": 0.11475207656621933, "learning_rate": 0.002, "loss": 2.3461, "step": 264680 }, { "epoch": 1.023217516351997, "grad_norm": 0.09671181440353394, "learning_rate": 0.002, "loss": 2.3272, "step": 264690 }, { "epoch": 1.0232561735553802, "grad_norm": 0.09747885167598724, "learning_rate": 0.002, "loss": 2.329, "step": 264700 }, { "epoch": 1.0232948307587635, "grad_norm": 0.10627662390470505, "learning_rate": 0.002, "loss": 2.3377, "step": 264710 }, { "epoch": 1.023333487962147, "grad_norm": 0.11692077666521072, "learning_rate": 0.002, "loss": 2.3249, "step": 264720 }, { "epoch": 1.0233721451655302, "grad_norm": 0.1147197037935257, "learning_rate": 0.002, "loss": 2.3361, "step": 264730 }, { "epoch": 1.0234108023689135, "grad_norm": 0.09349524974822998, "learning_rate": 0.002, "loss": 2.3414, "step": 264740 }, { "epoch": 1.0234494595722967, "grad_norm": 0.09220931679010391, "learning_rate": 0.002, "loss": 2.3432, "step": 264750 }, { "epoch": 1.02348811677568, "grad_norm": 0.11551348865032196, "learning_rate": 0.002, "loss": 2.3365, "step": 264760 }, { "epoch": 1.0235267739790632, "grad_norm": 0.09737013280391693, "learning_rate": 0.002, "loss": 2.3435, "step": 264770 }, { "epoch": 1.0235654311824465, "grad_norm": 0.1254809945821762, "learning_rate": 0.002, "loss": 2.3425, "step": 264780 }, { "epoch": 1.0236040883858297, "grad_norm": 0.10588192939758301, "learning_rate": 0.002, "loss": 2.3494, "step": 264790 }, { "epoch": 1.0236427455892132, "grad_norm": 0.11644317954778671, "learning_rate": 0.002, "loss": 2.3574, "step": 264800 }, { "epoch": 1.0236814027925965, "grad_norm": 0.16818253695964813, "learning_rate": 0.002, "loss": 2.3422, "step": 264810 }, { "epoch": 1.0237200599959797, "grad_norm": 0.10291523486375809, "learning_rate": 0.002, "loss": 2.3279, "step": 264820 }, { "epoch": 1.023758717199363, "grad_norm": 0.12808586657047272, "learning_rate": 0.002, "loss": 2.345, "step": 264830 }, { "epoch": 1.0237973744027462, "grad_norm": 0.10281138122081757, "learning_rate": 0.002, "loss": 2.3417, "step": 264840 }, { "epoch": 1.0238360316061295, "grad_norm": 0.11928651481866837, "learning_rate": 0.002, "loss": 2.3434, "step": 264850 }, { "epoch": 1.0238746888095127, "grad_norm": 0.09416400641202927, "learning_rate": 0.002, "loss": 2.3221, "step": 264860 }, { "epoch": 1.023913346012896, "grad_norm": 0.10814538598060608, "learning_rate": 0.002, "loss": 2.3357, "step": 264870 }, { "epoch": 1.0239520032162792, "grad_norm": 0.10571702569723129, "learning_rate": 0.002, "loss": 2.3346, "step": 264880 }, { "epoch": 1.0239906604196627, "grad_norm": 0.1148458868265152, "learning_rate": 0.002, "loss": 2.3324, "step": 264890 }, { "epoch": 1.024029317623046, "grad_norm": 0.10211604833602905, "learning_rate": 0.002, "loss": 2.3404, "step": 264900 }, { "epoch": 1.0240679748264292, "grad_norm": 0.10494969040155411, "learning_rate": 0.002, "loss": 2.3292, "step": 264910 }, { "epoch": 1.0241066320298124, "grad_norm": 0.1249437928199768, "learning_rate": 0.002, "loss": 2.3322, "step": 264920 }, { "epoch": 1.0241452892331957, "grad_norm": 0.13109232485294342, "learning_rate": 0.002, "loss": 2.3356, "step": 264930 }, { "epoch": 1.024183946436579, "grad_norm": 0.0910966545343399, "learning_rate": 0.002, "loss": 2.3307, "step": 264940 }, { "epoch": 1.0242226036399622, "grad_norm": 0.10233844816684723, "learning_rate": 0.002, "loss": 2.3451, "step": 264950 }, { "epoch": 1.0242612608433455, "grad_norm": 0.10650918632745743, "learning_rate": 0.002, "loss": 2.329, "step": 264960 }, { "epoch": 1.024299918046729, "grad_norm": 0.13375632464885712, "learning_rate": 0.002, "loss": 2.3421, "step": 264970 }, { "epoch": 1.0243385752501122, "grad_norm": 0.09612470120191574, "learning_rate": 0.002, "loss": 2.3344, "step": 264980 }, { "epoch": 1.0243772324534954, "grad_norm": 0.12482687085866928, "learning_rate": 0.002, "loss": 2.323, "step": 264990 }, { "epoch": 1.0244158896568787, "grad_norm": 0.09300002455711365, "learning_rate": 0.002, "loss": 2.3277, "step": 265000 }, { "epoch": 1.024454546860262, "grad_norm": 0.10589489340782166, "learning_rate": 0.002, "loss": 2.3401, "step": 265010 }, { "epoch": 1.0244932040636452, "grad_norm": 0.10188213735818863, "learning_rate": 0.002, "loss": 2.3361, "step": 265020 }, { "epoch": 1.0245318612670284, "grad_norm": 0.1173492819070816, "learning_rate": 0.002, "loss": 2.3351, "step": 265030 }, { "epoch": 1.0245705184704117, "grad_norm": 0.09907522052526474, "learning_rate": 0.002, "loss": 2.346, "step": 265040 }, { "epoch": 1.024609175673795, "grad_norm": 0.1274970918893814, "learning_rate": 0.002, "loss": 2.3356, "step": 265050 }, { "epoch": 1.0246478328771784, "grad_norm": 0.14310483634471893, "learning_rate": 0.002, "loss": 2.3501, "step": 265060 }, { "epoch": 1.0246864900805617, "grad_norm": 0.10303078591823578, "learning_rate": 0.002, "loss": 2.3478, "step": 265070 }, { "epoch": 1.024725147283945, "grad_norm": 0.122581347823143, "learning_rate": 0.002, "loss": 2.3417, "step": 265080 }, { "epoch": 1.0247638044873282, "grad_norm": 0.0957183688879013, "learning_rate": 0.002, "loss": 2.3405, "step": 265090 }, { "epoch": 1.0248024616907114, "grad_norm": 0.09310661256313324, "learning_rate": 0.002, "loss": 2.3326, "step": 265100 }, { "epoch": 1.0248411188940947, "grad_norm": 0.1101442500948906, "learning_rate": 0.002, "loss": 2.3462, "step": 265110 }, { "epoch": 1.024879776097478, "grad_norm": 0.09229353815317154, "learning_rate": 0.002, "loss": 2.3341, "step": 265120 }, { "epoch": 1.0249184333008612, "grad_norm": 0.12286297231912613, "learning_rate": 0.002, "loss": 2.3266, "step": 265130 }, { "epoch": 1.0249570905042447, "grad_norm": 0.11856617778539658, "learning_rate": 0.002, "loss": 2.3336, "step": 265140 }, { "epoch": 1.024995747707628, "grad_norm": 0.10073439031839371, "learning_rate": 0.002, "loss": 2.3484, "step": 265150 }, { "epoch": 1.0250344049110112, "grad_norm": 0.12509649991989136, "learning_rate": 0.002, "loss": 2.3406, "step": 265160 }, { "epoch": 1.0250730621143944, "grad_norm": 0.09993588179349899, "learning_rate": 0.002, "loss": 2.3391, "step": 265170 }, { "epoch": 1.0251117193177777, "grad_norm": 0.14093492925167084, "learning_rate": 0.002, "loss": 2.3393, "step": 265180 }, { "epoch": 1.025150376521161, "grad_norm": 0.10278402268886566, "learning_rate": 0.002, "loss": 2.3248, "step": 265190 }, { "epoch": 1.0251890337245442, "grad_norm": 0.12884950637817383, "learning_rate": 0.002, "loss": 2.3386, "step": 265200 }, { "epoch": 1.0252276909279274, "grad_norm": 0.09993290901184082, "learning_rate": 0.002, "loss": 2.3263, "step": 265210 }, { "epoch": 1.0252663481313107, "grad_norm": 0.1100497916340828, "learning_rate": 0.002, "loss": 2.3289, "step": 265220 }, { "epoch": 1.0253050053346942, "grad_norm": 0.11035642772912979, "learning_rate": 0.002, "loss": 2.3445, "step": 265230 }, { "epoch": 1.0253436625380774, "grad_norm": 0.10592029988765717, "learning_rate": 0.002, "loss": 2.3429, "step": 265240 }, { "epoch": 1.0253823197414607, "grad_norm": 0.10400436818599701, "learning_rate": 0.002, "loss": 2.3348, "step": 265250 }, { "epoch": 1.025420976944844, "grad_norm": 0.11346562951803207, "learning_rate": 0.002, "loss": 2.3374, "step": 265260 }, { "epoch": 1.0254596341482272, "grad_norm": 0.12035985291004181, "learning_rate": 0.002, "loss": 2.3317, "step": 265270 }, { "epoch": 1.0254982913516104, "grad_norm": 0.1068260669708252, "learning_rate": 0.002, "loss": 2.3252, "step": 265280 }, { "epoch": 1.0255369485549937, "grad_norm": 0.09974275529384613, "learning_rate": 0.002, "loss": 2.3252, "step": 265290 }, { "epoch": 1.025575605758377, "grad_norm": 0.14729996025562286, "learning_rate": 0.002, "loss": 2.3333, "step": 265300 }, { "epoch": 1.0256142629617604, "grad_norm": 0.11352241039276123, "learning_rate": 0.002, "loss": 2.3425, "step": 265310 }, { "epoch": 1.0256529201651436, "grad_norm": 0.09459713101387024, "learning_rate": 0.002, "loss": 2.3444, "step": 265320 }, { "epoch": 1.025691577368527, "grad_norm": 0.10980570316314697, "learning_rate": 0.002, "loss": 2.3442, "step": 265330 }, { "epoch": 1.0257302345719101, "grad_norm": 0.12916293740272522, "learning_rate": 0.002, "loss": 2.3313, "step": 265340 }, { "epoch": 1.0257688917752934, "grad_norm": 0.11712485551834106, "learning_rate": 0.002, "loss": 2.3349, "step": 265350 }, { "epoch": 1.0258075489786767, "grad_norm": 0.09614215046167374, "learning_rate": 0.002, "loss": 2.3296, "step": 265360 }, { "epoch": 1.02584620618206, "grad_norm": 0.1279195100069046, "learning_rate": 0.002, "loss": 2.3476, "step": 265370 }, { "epoch": 1.0258848633854432, "grad_norm": 0.10815688222646713, "learning_rate": 0.002, "loss": 2.339, "step": 265380 }, { "epoch": 1.0259235205888264, "grad_norm": 0.10971692949533463, "learning_rate": 0.002, "loss": 2.3478, "step": 265390 }, { "epoch": 1.0259621777922099, "grad_norm": 0.09672383219003677, "learning_rate": 0.002, "loss": 2.3389, "step": 265400 }, { "epoch": 1.0260008349955931, "grad_norm": 0.21441972255706787, "learning_rate": 0.002, "loss": 2.3289, "step": 265410 }, { "epoch": 1.0260394921989764, "grad_norm": 0.10706193000078201, "learning_rate": 0.002, "loss": 2.3318, "step": 265420 }, { "epoch": 1.0260781494023596, "grad_norm": 0.09036446362733841, "learning_rate": 0.002, "loss": 2.3418, "step": 265430 }, { "epoch": 1.026116806605743, "grad_norm": 0.09791683405637741, "learning_rate": 0.002, "loss": 2.3402, "step": 265440 }, { "epoch": 1.0261554638091261, "grad_norm": 0.09223045408725739, "learning_rate": 0.002, "loss": 2.3546, "step": 265450 }, { "epoch": 1.0261941210125094, "grad_norm": 0.09813971072435379, "learning_rate": 0.002, "loss": 2.3371, "step": 265460 }, { "epoch": 1.0262327782158926, "grad_norm": 0.09205476194620132, "learning_rate": 0.002, "loss": 2.3276, "step": 265470 }, { "epoch": 1.0262714354192761, "grad_norm": 0.10239709168672562, "learning_rate": 0.002, "loss": 2.3421, "step": 265480 }, { "epoch": 1.0263100926226594, "grad_norm": 0.10483129322528839, "learning_rate": 0.002, "loss": 2.325, "step": 265490 }, { "epoch": 1.0263487498260426, "grad_norm": 0.10401138663291931, "learning_rate": 0.002, "loss": 2.3215, "step": 265500 }, { "epoch": 1.0263874070294259, "grad_norm": 0.1368025243282318, "learning_rate": 0.002, "loss": 2.3444, "step": 265510 }, { "epoch": 1.0264260642328091, "grad_norm": 0.11704922467470169, "learning_rate": 0.002, "loss": 2.3278, "step": 265520 }, { "epoch": 1.0264647214361924, "grad_norm": 0.11174594610929489, "learning_rate": 0.002, "loss": 2.3301, "step": 265530 }, { "epoch": 1.0265033786395756, "grad_norm": 0.10312842577695847, "learning_rate": 0.002, "loss": 2.3381, "step": 265540 }, { "epoch": 1.0265420358429589, "grad_norm": 0.10455312579870224, "learning_rate": 0.002, "loss": 2.338, "step": 265550 }, { "epoch": 1.0265806930463424, "grad_norm": 0.09767155349254608, "learning_rate": 0.002, "loss": 2.3275, "step": 265560 }, { "epoch": 1.0266193502497256, "grad_norm": 0.10167326033115387, "learning_rate": 0.002, "loss": 2.3372, "step": 265570 }, { "epoch": 1.0266580074531089, "grad_norm": 0.12255837023258209, "learning_rate": 0.002, "loss": 2.3228, "step": 265580 }, { "epoch": 1.0266966646564921, "grad_norm": 0.10716545581817627, "learning_rate": 0.002, "loss": 2.3368, "step": 265590 }, { "epoch": 1.0267353218598754, "grad_norm": 0.10378210246562958, "learning_rate": 0.002, "loss": 2.3303, "step": 265600 }, { "epoch": 1.0267739790632586, "grad_norm": 0.09660319238901138, "learning_rate": 0.002, "loss": 2.3298, "step": 265610 }, { "epoch": 1.0268126362666419, "grad_norm": 0.10769104957580566, "learning_rate": 0.002, "loss": 2.3433, "step": 265620 }, { "epoch": 1.0268512934700251, "grad_norm": 0.09757567197084427, "learning_rate": 0.002, "loss": 2.3302, "step": 265630 }, { "epoch": 1.0268899506734084, "grad_norm": 0.11647003144025803, "learning_rate": 0.002, "loss": 2.3409, "step": 265640 }, { "epoch": 1.0269286078767919, "grad_norm": 0.10532090067863464, "learning_rate": 0.002, "loss": 2.3396, "step": 265650 }, { "epoch": 1.026967265080175, "grad_norm": 0.11361627280712128, "learning_rate": 0.002, "loss": 2.3448, "step": 265660 }, { "epoch": 1.0270059222835584, "grad_norm": 0.09371964633464813, "learning_rate": 0.002, "loss": 2.3409, "step": 265670 }, { "epoch": 1.0270445794869416, "grad_norm": 0.09509408473968506, "learning_rate": 0.002, "loss": 2.3321, "step": 265680 }, { "epoch": 1.0270832366903249, "grad_norm": 0.11384610086679459, "learning_rate": 0.002, "loss": 2.3424, "step": 265690 }, { "epoch": 1.0271218938937081, "grad_norm": 0.10529225319623947, "learning_rate": 0.002, "loss": 2.3281, "step": 265700 }, { "epoch": 1.0271605510970914, "grad_norm": 0.1519152969121933, "learning_rate": 0.002, "loss": 2.3271, "step": 265710 }, { "epoch": 1.0271992083004746, "grad_norm": 0.10594785213470459, "learning_rate": 0.002, "loss": 2.3315, "step": 265720 }, { "epoch": 1.027237865503858, "grad_norm": 0.10499383509159088, "learning_rate": 0.002, "loss": 2.3429, "step": 265730 }, { "epoch": 1.0272765227072413, "grad_norm": 0.08883464336395264, "learning_rate": 0.002, "loss": 2.3412, "step": 265740 }, { "epoch": 1.0273151799106246, "grad_norm": 0.09568743407726288, "learning_rate": 0.002, "loss": 2.3213, "step": 265750 }, { "epoch": 1.0273538371140079, "grad_norm": 0.11473828554153442, "learning_rate": 0.002, "loss": 2.3377, "step": 265760 }, { "epoch": 1.027392494317391, "grad_norm": 0.11438367515802383, "learning_rate": 0.002, "loss": 2.3291, "step": 265770 }, { "epoch": 1.0274311515207744, "grad_norm": 0.12594550848007202, "learning_rate": 0.002, "loss": 2.3281, "step": 265780 }, { "epoch": 1.0274698087241576, "grad_norm": 0.16199305653572083, "learning_rate": 0.002, "loss": 2.334, "step": 265790 }, { "epoch": 1.0275084659275409, "grad_norm": 0.10604202747344971, "learning_rate": 0.002, "loss": 2.3329, "step": 265800 }, { "epoch": 1.0275471231309241, "grad_norm": 0.12076123803853989, "learning_rate": 0.002, "loss": 2.3512, "step": 265810 }, { "epoch": 1.0275857803343076, "grad_norm": 0.10725189000368118, "learning_rate": 0.002, "loss": 2.3379, "step": 265820 }, { "epoch": 1.0276244375376908, "grad_norm": 0.10174400359392166, "learning_rate": 0.002, "loss": 2.3328, "step": 265830 }, { "epoch": 1.027663094741074, "grad_norm": 0.10425542294979095, "learning_rate": 0.002, "loss": 2.3265, "step": 265840 }, { "epoch": 1.0277017519444573, "grad_norm": 0.091111920773983, "learning_rate": 0.002, "loss": 2.3441, "step": 265850 }, { "epoch": 1.0277404091478406, "grad_norm": 0.11550716310739517, "learning_rate": 0.002, "loss": 2.3375, "step": 265860 }, { "epoch": 1.0277790663512238, "grad_norm": 0.12211589515209198, "learning_rate": 0.002, "loss": 2.3309, "step": 265870 }, { "epoch": 1.027817723554607, "grad_norm": 0.09479061514139175, "learning_rate": 0.002, "loss": 2.3368, "step": 265880 }, { "epoch": 1.0278563807579904, "grad_norm": 0.17094501852989197, "learning_rate": 0.002, "loss": 2.3421, "step": 265890 }, { "epoch": 1.0278950379613738, "grad_norm": 0.11208199709653854, "learning_rate": 0.002, "loss": 2.3288, "step": 265900 }, { "epoch": 1.027933695164757, "grad_norm": 0.10838782042264938, "learning_rate": 0.002, "loss": 2.3416, "step": 265910 }, { "epoch": 1.0279723523681403, "grad_norm": 0.11082717031240463, "learning_rate": 0.002, "loss": 2.3332, "step": 265920 }, { "epoch": 1.0280110095715236, "grad_norm": 0.10645514726638794, "learning_rate": 0.002, "loss": 2.344, "step": 265930 }, { "epoch": 1.0280496667749068, "grad_norm": 0.0926586240530014, "learning_rate": 0.002, "loss": 2.328, "step": 265940 }, { "epoch": 1.02808832397829, "grad_norm": 0.10408720374107361, "learning_rate": 0.002, "loss": 2.3298, "step": 265950 }, { "epoch": 1.0281269811816733, "grad_norm": 0.09145691990852356, "learning_rate": 0.002, "loss": 2.3308, "step": 265960 }, { "epoch": 1.0281656383850566, "grad_norm": 0.09304022043943405, "learning_rate": 0.002, "loss": 2.3328, "step": 265970 }, { "epoch": 1.0282042955884398, "grad_norm": 0.10211073607206345, "learning_rate": 0.002, "loss": 2.3623, "step": 265980 }, { "epoch": 1.0282429527918233, "grad_norm": 0.10932856053113937, "learning_rate": 0.002, "loss": 2.3296, "step": 265990 }, { "epoch": 1.0282816099952066, "grad_norm": 0.1237453743815422, "learning_rate": 0.002, "loss": 2.3327, "step": 266000 }, { "epoch": 1.0283202671985898, "grad_norm": 0.09511668980121613, "learning_rate": 0.002, "loss": 2.338, "step": 266010 }, { "epoch": 1.028358924401973, "grad_norm": 0.10278719663619995, "learning_rate": 0.002, "loss": 2.323, "step": 266020 }, { "epoch": 1.0283975816053563, "grad_norm": 0.1019146591424942, "learning_rate": 0.002, "loss": 2.3532, "step": 266030 }, { "epoch": 1.0284362388087396, "grad_norm": 0.10987775772809982, "learning_rate": 0.002, "loss": 2.3477, "step": 266040 }, { "epoch": 1.0284748960121228, "grad_norm": 0.11195337027311325, "learning_rate": 0.002, "loss": 2.3455, "step": 266050 }, { "epoch": 1.028513553215506, "grad_norm": 0.11950255185365677, "learning_rate": 0.002, "loss": 2.3382, "step": 266060 }, { "epoch": 1.0285522104188896, "grad_norm": 0.1296747326850891, "learning_rate": 0.002, "loss": 2.3441, "step": 266070 }, { "epoch": 1.0285908676222728, "grad_norm": 0.09848557412624359, "learning_rate": 0.002, "loss": 2.3239, "step": 266080 }, { "epoch": 1.028629524825656, "grad_norm": 0.09893085807561874, "learning_rate": 0.002, "loss": 2.3485, "step": 266090 }, { "epoch": 1.0286681820290393, "grad_norm": 0.09465838223695755, "learning_rate": 0.002, "loss": 2.3362, "step": 266100 }, { "epoch": 1.0287068392324226, "grad_norm": 0.09347520768642426, "learning_rate": 0.002, "loss": 2.3321, "step": 266110 }, { "epoch": 1.0287454964358058, "grad_norm": 0.11662112921476364, "learning_rate": 0.002, "loss": 2.3482, "step": 266120 }, { "epoch": 1.028784153639189, "grad_norm": 0.1154218465089798, "learning_rate": 0.002, "loss": 2.3347, "step": 266130 }, { "epoch": 1.0288228108425723, "grad_norm": 0.08964736014604568, "learning_rate": 0.002, "loss": 2.3349, "step": 266140 }, { "epoch": 1.0288614680459558, "grad_norm": 0.11205974221229553, "learning_rate": 0.002, "loss": 2.3308, "step": 266150 }, { "epoch": 1.028900125249339, "grad_norm": 0.10498930513858795, "learning_rate": 0.002, "loss": 2.3422, "step": 266160 }, { "epoch": 1.0289387824527223, "grad_norm": 0.1054898351430893, "learning_rate": 0.002, "loss": 2.3293, "step": 266170 }, { "epoch": 1.0289774396561056, "grad_norm": 0.11319541186094284, "learning_rate": 0.002, "loss": 2.3494, "step": 266180 }, { "epoch": 1.0290160968594888, "grad_norm": 0.10512629896402359, "learning_rate": 0.002, "loss": 2.3289, "step": 266190 }, { "epoch": 1.029054754062872, "grad_norm": 0.11395827680826187, "learning_rate": 0.002, "loss": 2.3262, "step": 266200 }, { "epoch": 1.0290934112662553, "grad_norm": 0.11131727695465088, "learning_rate": 0.002, "loss": 2.3332, "step": 266210 }, { "epoch": 1.0291320684696386, "grad_norm": 0.12667366862297058, "learning_rate": 0.002, "loss": 2.3365, "step": 266220 }, { "epoch": 1.0291707256730218, "grad_norm": 0.10688159614801407, "learning_rate": 0.002, "loss": 2.3376, "step": 266230 }, { "epoch": 1.0292093828764053, "grad_norm": 0.10538176447153091, "learning_rate": 0.002, "loss": 2.3336, "step": 266240 }, { "epoch": 1.0292480400797885, "grad_norm": 0.1173299103975296, "learning_rate": 0.002, "loss": 2.3422, "step": 266250 }, { "epoch": 1.0292866972831718, "grad_norm": 0.11167753487825394, "learning_rate": 0.002, "loss": 2.3337, "step": 266260 }, { "epoch": 1.029325354486555, "grad_norm": 0.09789314866065979, "learning_rate": 0.002, "loss": 2.3336, "step": 266270 }, { "epoch": 1.0293640116899383, "grad_norm": 0.11912738531827927, "learning_rate": 0.002, "loss": 2.3351, "step": 266280 }, { "epoch": 1.0294026688933215, "grad_norm": 0.10356725007295609, "learning_rate": 0.002, "loss": 2.3167, "step": 266290 }, { "epoch": 1.0294413260967048, "grad_norm": 0.11253173649311066, "learning_rate": 0.002, "loss": 2.3409, "step": 266300 }, { "epoch": 1.029479983300088, "grad_norm": 0.09901507943868637, "learning_rate": 0.002, "loss": 2.3311, "step": 266310 }, { "epoch": 1.0295186405034715, "grad_norm": 0.11068379133939743, "learning_rate": 0.002, "loss": 2.3199, "step": 266320 }, { "epoch": 1.0295572977068548, "grad_norm": 0.09040426462888718, "learning_rate": 0.002, "loss": 2.337, "step": 266330 }, { "epoch": 1.029595954910238, "grad_norm": 0.10851092636585236, "learning_rate": 0.002, "loss": 2.3308, "step": 266340 }, { "epoch": 1.0296346121136213, "grad_norm": 0.139420285820961, "learning_rate": 0.002, "loss": 2.3518, "step": 266350 }, { "epoch": 1.0296732693170045, "grad_norm": 0.12576444447040558, "learning_rate": 0.002, "loss": 2.3249, "step": 266360 }, { "epoch": 1.0297119265203878, "grad_norm": 0.08843178302049637, "learning_rate": 0.002, "loss": 2.3484, "step": 266370 }, { "epoch": 1.029750583723771, "grad_norm": 0.10211913287639618, "learning_rate": 0.002, "loss": 2.3409, "step": 266380 }, { "epoch": 1.0297892409271543, "grad_norm": 0.11484231054782867, "learning_rate": 0.002, "loss": 2.3386, "step": 266390 }, { "epoch": 1.0298278981305375, "grad_norm": 0.13214653730392456, "learning_rate": 0.002, "loss": 2.3315, "step": 266400 }, { "epoch": 1.029866555333921, "grad_norm": 0.11222288012504578, "learning_rate": 0.002, "loss": 2.3466, "step": 266410 }, { "epoch": 1.0299052125373043, "grad_norm": 0.10335548967123032, "learning_rate": 0.002, "loss": 2.3357, "step": 266420 }, { "epoch": 1.0299438697406875, "grad_norm": 0.0991593673825264, "learning_rate": 0.002, "loss": 2.3288, "step": 266430 }, { "epoch": 1.0299825269440708, "grad_norm": 0.09273135662078857, "learning_rate": 0.002, "loss": 2.3402, "step": 266440 }, { "epoch": 1.030021184147454, "grad_norm": 0.1204661950469017, "learning_rate": 0.002, "loss": 2.3344, "step": 266450 }, { "epoch": 1.0300598413508373, "grad_norm": 0.10633353143930435, "learning_rate": 0.002, "loss": 2.3565, "step": 266460 }, { "epoch": 1.0300984985542205, "grad_norm": 0.09771783649921417, "learning_rate": 0.002, "loss": 2.3353, "step": 266470 }, { "epoch": 1.0301371557576038, "grad_norm": 0.11200734972953796, "learning_rate": 0.002, "loss": 2.3311, "step": 266480 }, { "epoch": 1.0301758129609873, "grad_norm": 0.08626644313335419, "learning_rate": 0.002, "loss": 2.3485, "step": 266490 }, { "epoch": 1.0302144701643705, "grad_norm": 0.11899591237306595, "learning_rate": 0.002, "loss": 2.3432, "step": 266500 }, { "epoch": 1.0302531273677538, "grad_norm": 0.10089579224586487, "learning_rate": 0.002, "loss": 2.3358, "step": 266510 }, { "epoch": 1.030291784571137, "grad_norm": 0.1076754629611969, "learning_rate": 0.002, "loss": 2.3312, "step": 266520 }, { "epoch": 1.0303304417745203, "grad_norm": 0.11318105459213257, "learning_rate": 0.002, "loss": 2.3541, "step": 266530 }, { "epoch": 1.0303690989779035, "grad_norm": 0.10871787369251251, "learning_rate": 0.002, "loss": 2.328, "step": 266540 }, { "epoch": 1.0304077561812868, "grad_norm": 0.09567765146493912, "learning_rate": 0.002, "loss": 2.3484, "step": 266550 }, { "epoch": 1.03044641338467, "grad_norm": 0.13377155363559723, "learning_rate": 0.002, "loss": 2.3361, "step": 266560 }, { "epoch": 1.0304850705880533, "grad_norm": 0.10268377512693405, "learning_rate": 0.002, "loss": 2.3375, "step": 266570 }, { "epoch": 1.0305237277914367, "grad_norm": 0.12028588354587555, "learning_rate": 0.002, "loss": 2.3362, "step": 266580 }, { "epoch": 1.03056238499482, "grad_norm": 0.1063971221446991, "learning_rate": 0.002, "loss": 2.3364, "step": 266590 }, { "epoch": 1.0306010421982033, "grad_norm": 0.12419982999563217, "learning_rate": 0.002, "loss": 2.3346, "step": 266600 }, { "epoch": 1.0306396994015865, "grad_norm": 0.11007034033536911, "learning_rate": 0.002, "loss": 2.3473, "step": 266610 }, { "epoch": 1.0306783566049698, "grad_norm": 0.10347622632980347, "learning_rate": 0.002, "loss": 2.3271, "step": 266620 }, { "epoch": 1.030717013808353, "grad_norm": 0.10151407867670059, "learning_rate": 0.002, "loss": 2.3374, "step": 266630 }, { "epoch": 1.0307556710117363, "grad_norm": 0.1266144961118698, "learning_rate": 0.002, "loss": 2.3328, "step": 266640 }, { "epoch": 1.0307943282151195, "grad_norm": 0.10101339221000671, "learning_rate": 0.002, "loss": 2.3403, "step": 266650 }, { "epoch": 1.030832985418503, "grad_norm": 0.11579057574272156, "learning_rate": 0.002, "loss": 2.3284, "step": 266660 }, { "epoch": 1.0308716426218862, "grad_norm": 0.10573630034923553, "learning_rate": 0.002, "loss": 2.3298, "step": 266670 }, { "epoch": 1.0309102998252695, "grad_norm": 0.1263340413570404, "learning_rate": 0.002, "loss": 2.328, "step": 266680 }, { "epoch": 1.0309489570286527, "grad_norm": 0.09813540428876877, "learning_rate": 0.002, "loss": 2.3174, "step": 266690 }, { "epoch": 1.030987614232036, "grad_norm": 0.10262829065322876, "learning_rate": 0.002, "loss": 2.3408, "step": 266700 }, { "epoch": 1.0310262714354193, "grad_norm": 0.1933591216802597, "learning_rate": 0.002, "loss": 2.3303, "step": 266710 }, { "epoch": 1.0310649286388025, "grad_norm": 0.11691686511039734, "learning_rate": 0.002, "loss": 2.3461, "step": 266720 }, { "epoch": 1.0311035858421858, "grad_norm": 0.10910283774137497, "learning_rate": 0.002, "loss": 2.3205, "step": 266730 }, { "epoch": 1.031142243045569, "grad_norm": 0.09778497368097305, "learning_rate": 0.002, "loss": 2.352, "step": 266740 }, { "epoch": 1.0311809002489525, "grad_norm": 0.12451784312725067, "learning_rate": 0.002, "loss": 2.3498, "step": 266750 }, { "epoch": 1.0312195574523357, "grad_norm": 0.10196122527122498, "learning_rate": 0.002, "loss": 2.3234, "step": 266760 }, { "epoch": 1.031258214655719, "grad_norm": 0.10703619569540024, "learning_rate": 0.002, "loss": 2.336, "step": 266770 }, { "epoch": 1.0312968718591022, "grad_norm": 0.1130760908126831, "learning_rate": 0.002, "loss": 2.3387, "step": 266780 }, { "epoch": 1.0313355290624855, "grad_norm": 0.12371481209993362, "learning_rate": 0.002, "loss": 2.3541, "step": 266790 }, { "epoch": 1.0313741862658687, "grad_norm": 0.11786756664514542, "learning_rate": 0.002, "loss": 2.3379, "step": 266800 }, { "epoch": 1.031412843469252, "grad_norm": 0.11346852779388428, "learning_rate": 0.002, "loss": 2.3403, "step": 266810 }, { "epoch": 1.0314515006726352, "grad_norm": 0.09816392511129379, "learning_rate": 0.002, "loss": 2.3412, "step": 266820 }, { "epoch": 1.0314901578760187, "grad_norm": 0.09450547397136688, "learning_rate": 0.002, "loss": 2.3424, "step": 266830 }, { "epoch": 1.031528815079402, "grad_norm": 0.1281440556049347, "learning_rate": 0.002, "loss": 2.3352, "step": 266840 }, { "epoch": 1.0315674722827852, "grad_norm": 0.10921774804592133, "learning_rate": 0.002, "loss": 2.3338, "step": 266850 }, { "epoch": 1.0316061294861685, "grad_norm": 0.12215537577867508, "learning_rate": 0.002, "loss": 2.3384, "step": 266860 }, { "epoch": 1.0316447866895517, "grad_norm": 0.09862463921308517, "learning_rate": 0.002, "loss": 2.3402, "step": 266870 }, { "epoch": 1.031683443892935, "grad_norm": 0.30553138256073, "learning_rate": 0.002, "loss": 2.3513, "step": 266880 }, { "epoch": 1.0317221010963182, "grad_norm": 0.10710947215557098, "learning_rate": 0.002, "loss": 2.3382, "step": 266890 }, { "epoch": 1.0317607582997015, "grad_norm": 0.09189295023679733, "learning_rate": 0.002, "loss": 2.3463, "step": 266900 }, { "epoch": 1.0317994155030847, "grad_norm": 0.10326312482357025, "learning_rate": 0.002, "loss": 2.3393, "step": 266910 }, { "epoch": 1.0318380727064682, "grad_norm": 0.10674124211072922, "learning_rate": 0.002, "loss": 2.3438, "step": 266920 }, { "epoch": 1.0318767299098515, "grad_norm": 0.11148896813392639, "learning_rate": 0.002, "loss": 2.3386, "step": 266930 }, { "epoch": 1.0319153871132347, "grad_norm": 0.10347460955381393, "learning_rate": 0.002, "loss": 2.3409, "step": 266940 }, { "epoch": 1.031954044316618, "grad_norm": 0.09875006228685379, "learning_rate": 0.002, "loss": 2.3371, "step": 266950 }, { "epoch": 1.0319927015200012, "grad_norm": 0.129219651222229, "learning_rate": 0.002, "loss": 2.319, "step": 266960 }, { "epoch": 1.0320313587233845, "grad_norm": 0.11608944833278656, "learning_rate": 0.002, "loss": 2.3202, "step": 266970 }, { "epoch": 1.0320700159267677, "grad_norm": 0.1069372147321701, "learning_rate": 0.002, "loss": 2.3313, "step": 266980 }, { "epoch": 1.032108673130151, "grad_norm": 0.08900687843561172, "learning_rate": 0.002, "loss": 2.3249, "step": 266990 }, { "epoch": 1.0321473303335345, "grad_norm": 0.11886018514633179, "learning_rate": 0.002, "loss": 2.3501, "step": 267000 }, { "epoch": 1.0321859875369177, "grad_norm": 0.08972641080617905, "learning_rate": 0.002, "loss": 2.3273, "step": 267010 }, { "epoch": 1.032224644740301, "grad_norm": 0.10585705190896988, "learning_rate": 0.002, "loss": 2.3511, "step": 267020 }, { "epoch": 1.0322633019436842, "grad_norm": 0.10356702655553818, "learning_rate": 0.002, "loss": 2.332, "step": 267030 }, { "epoch": 1.0323019591470675, "grad_norm": 0.09595673531293869, "learning_rate": 0.002, "loss": 2.3342, "step": 267040 }, { "epoch": 1.0323406163504507, "grad_norm": 0.10113181918859482, "learning_rate": 0.002, "loss": 2.326, "step": 267050 }, { "epoch": 1.032379273553834, "grad_norm": 0.11091704666614532, "learning_rate": 0.002, "loss": 2.3422, "step": 267060 }, { "epoch": 1.0324179307572172, "grad_norm": 0.10351219028234482, "learning_rate": 0.002, "loss": 2.3426, "step": 267070 }, { "epoch": 1.0324565879606005, "grad_norm": 0.12491386383771896, "learning_rate": 0.002, "loss": 2.3272, "step": 267080 }, { "epoch": 1.032495245163984, "grad_norm": 0.10733221471309662, "learning_rate": 0.002, "loss": 2.3477, "step": 267090 }, { "epoch": 1.0325339023673672, "grad_norm": 0.10156510770320892, "learning_rate": 0.002, "loss": 2.3382, "step": 267100 }, { "epoch": 1.0325725595707504, "grad_norm": 0.10368265956640244, "learning_rate": 0.002, "loss": 2.3353, "step": 267110 }, { "epoch": 1.0326112167741337, "grad_norm": 0.10400926321744919, "learning_rate": 0.002, "loss": 2.3309, "step": 267120 }, { "epoch": 1.032649873977517, "grad_norm": 0.1019158810377121, "learning_rate": 0.002, "loss": 2.3368, "step": 267130 }, { "epoch": 1.0326885311809002, "grad_norm": 0.11867933720350266, "learning_rate": 0.002, "loss": 2.3339, "step": 267140 }, { "epoch": 1.0327271883842835, "grad_norm": 0.09400109946727753, "learning_rate": 0.002, "loss": 2.3361, "step": 267150 }, { "epoch": 1.0327658455876667, "grad_norm": 0.10336810350418091, "learning_rate": 0.002, "loss": 2.3297, "step": 267160 }, { "epoch": 1.0328045027910502, "grad_norm": 0.11142666637897491, "learning_rate": 0.002, "loss": 2.3645, "step": 267170 }, { "epoch": 1.0328431599944334, "grad_norm": 0.13898703455924988, "learning_rate": 0.002, "loss": 2.3335, "step": 267180 }, { "epoch": 1.0328818171978167, "grad_norm": 0.09304480254650116, "learning_rate": 0.002, "loss": 2.3418, "step": 267190 }, { "epoch": 1.0329204744012, "grad_norm": 0.11063506454229355, "learning_rate": 0.002, "loss": 2.3522, "step": 267200 }, { "epoch": 1.0329591316045832, "grad_norm": 0.09229174256324768, "learning_rate": 0.002, "loss": 2.3357, "step": 267210 }, { "epoch": 1.0329977888079664, "grad_norm": 0.09612909704446793, "learning_rate": 0.002, "loss": 2.3407, "step": 267220 }, { "epoch": 1.0330364460113497, "grad_norm": 0.11524896323680878, "learning_rate": 0.002, "loss": 2.3292, "step": 267230 }, { "epoch": 1.033075103214733, "grad_norm": 0.10607946664094925, "learning_rate": 0.002, "loss": 2.3367, "step": 267240 }, { "epoch": 1.0331137604181162, "grad_norm": 0.09210071712732315, "learning_rate": 0.002, "loss": 2.3335, "step": 267250 }, { "epoch": 1.0331524176214997, "grad_norm": 0.11773253232240677, "learning_rate": 0.002, "loss": 2.3422, "step": 267260 }, { "epoch": 1.033191074824883, "grad_norm": 0.10352618247270584, "learning_rate": 0.002, "loss": 2.3402, "step": 267270 }, { "epoch": 1.0332297320282662, "grad_norm": 0.1059403195977211, "learning_rate": 0.002, "loss": 2.3505, "step": 267280 }, { "epoch": 1.0332683892316494, "grad_norm": 0.09368275105953217, "learning_rate": 0.002, "loss": 2.3381, "step": 267290 }, { "epoch": 1.0333070464350327, "grad_norm": 0.1071181371808052, "learning_rate": 0.002, "loss": 2.3417, "step": 267300 }, { "epoch": 1.033345703638416, "grad_norm": 0.14391173422336578, "learning_rate": 0.002, "loss": 2.3405, "step": 267310 }, { "epoch": 1.0333843608417992, "grad_norm": 0.0961400493979454, "learning_rate": 0.002, "loss": 2.327, "step": 267320 }, { "epoch": 1.0334230180451824, "grad_norm": 0.09671345353126526, "learning_rate": 0.002, "loss": 2.3431, "step": 267330 }, { "epoch": 1.033461675248566, "grad_norm": 0.13067911565303802, "learning_rate": 0.002, "loss": 2.3266, "step": 267340 }, { "epoch": 1.0335003324519492, "grad_norm": 0.11352329701185226, "learning_rate": 0.002, "loss": 2.342, "step": 267350 }, { "epoch": 1.0335389896553324, "grad_norm": 0.12045929580926895, "learning_rate": 0.002, "loss": 2.3399, "step": 267360 }, { "epoch": 1.0335776468587157, "grad_norm": 0.09980279952287674, "learning_rate": 0.002, "loss": 2.3327, "step": 267370 }, { "epoch": 1.033616304062099, "grad_norm": 0.10935485363006592, "learning_rate": 0.002, "loss": 2.3419, "step": 267380 }, { "epoch": 1.0336549612654822, "grad_norm": 0.12776319682598114, "learning_rate": 0.002, "loss": 2.3344, "step": 267390 }, { "epoch": 1.0336936184688654, "grad_norm": 0.09210210293531418, "learning_rate": 0.002, "loss": 2.3353, "step": 267400 }, { "epoch": 1.0337322756722487, "grad_norm": 0.09530036896467209, "learning_rate": 0.002, "loss": 2.3418, "step": 267410 }, { "epoch": 1.033770932875632, "grad_norm": 0.12296126782894135, "learning_rate": 0.002, "loss": 2.3237, "step": 267420 }, { "epoch": 1.0338095900790154, "grad_norm": 0.11421650648117065, "learning_rate": 0.002, "loss": 2.3197, "step": 267430 }, { "epoch": 1.0338482472823987, "grad_norm": 0.10140615701675415, "learning_rate": 0.002, "loss": 2.3382, "step": 267440 }, { "epoch": 1.033886904485782, "grad_norm": 0.10139352083206177, "learning_rate": 0.002, "loss": 2.3332, "step": 267450 }, { "epoch": 1.0339255616891652, "grad_norm": 0.09594139456748962, "learning_rate": 0.002, "loss": 2.3308, "step": 267460 }, { "epoch": 1.0339642188925484, "grad_norm": 0.09860043227672577, "learning_rate": 0.002, "loss": 2.3363, "step": 267470 }, { "epoch": 1.0340028760959317, "grad_norm": 0.10134763270616531, "learning_rate": 0.002, "loss": 2.3398, "step": 267480 }, { "epoch": 1.034041533299315, "grad_norm": 0.1237870305776596, "learning_rate": 0.002, "loss": 2.3262, "step": 267490 }, { "epoch": 1.0340801905026982, "grad_norm": 0.08835762739181519, "learning_rate": 0.002, "loss": 2.3342, "step": 267500 }, { "epoch": 1.0341188477060816, "grad_norm": 0.09981214255094528, "learning_rate": 0.002, "loss": 2.3312, "step": 267510 }, { "epoch": 1.034157504909465, "grad_norm": 0.10252374410629272, "learning_rate": 0.002, "loss": 2.3356, "step": 267520 }, { "epoch": 1.0341961621128481, "grad_norm": 0.1146230399608612, "learning_rate": 0.002, "loss": 2.3385, "step": 267530 }, { "epoch": 1.0342348193162314, "grad_norm": 0.09919324517250061, "learning_rate": 0.002, "loss": 2.3346, "step": 267540 }, { "epoch": 1.0342734765196147, "grad_norm": 0.10665534436702728, "learning_rate": 0.002, "loss": 2.3269, "step": 267550 }, { "epoch": 1.034312133722998, "grad_norm": 0.11877695471048355, "learning_rate": 0.002, "loss": 2.3284, "step": 267560 }, { "epoch": 1.0343507909263812, "grad_norm": 0.11180389672517776, "learning_rate": 0.002, "loss": 2.327, "step": 267570 }, { "epoch": 1.0343894481297644, "grad_norm": 0.10479209572076797, "learning_rate": 0.002, "loss": 2.351, "step": 267580 }, { "epoch": 1.0344281053331479, "grad_norm": 0.12032832950353622, "learning_rate": 0.002, "loss": 2.3426, "step": 267590 }, { "epoch": 1.0344667625365311, "grad_norm": 0.10242997854948044, "learning_rate": 0.002, "loss": 2.3367, "step": 267600 }, { "epoch": 1.0345054197399144, "grad_norm": 0.12337980419397354, "learning_rate": 0.002, "loss": 2.3456, "step": 267610 }, { "epoch": 1.0345440769432976, "grad_norm": 0.1257912516593933, "learning_rate": 0.002, "loss": 2.3308, "step": 267620 }, { "epoch": 1.034582734146681, "grad_norm": 0.09377360343933105, "learning_rate": 0.002, "loss": 2.3324, "step": 267630 }, { "epoch": 1.0346213913500641, "grad_norm": 0.1034860759973526, "learning_rate": 0.002, "loss": 2.3362, "step": 267640 }, { "epoch": 1.0346600485534474, "grad_norm": 0.11057710647583008, "learning_rate": 0.002, "loss": 2.3484, "step": 267650 }, { "epoch": 1.0346987057568307, "grad_norm": 0.11116885393857956, "learning_rate": 0.002, "loss": 2.3375, "step": 267660 }, { "epoch": 1.034737362960214, "grad_norm": 0.1062694787979126, "learning_rate": 0.002, "loss": 2.3356, "step": 267670 }, { "epoch": 1.0347760201635974, "grad_norm": 0.10646285116672516, "learning_rate": 0.002, "loss": 2.3336, "step": 267680 }, { "epoch": 1.0348146773669806, "grad_norm": 0.0952630415558815, "learning_rate": 0.002, "loss": 2.3218, "step": 267690 }, { "epoch": 1.0348533345703639, "grad_norm": 0.12547989189624786, "learning_rate": 0.002, "loss": 2.3469, "step": 267700 }, { "epoch": 1.0348919917737471, "grad_norm": 0.09910428524017334, "learning_rate": 0.002, "loss": 2.3454, "step": 267710 }, { "epoch": 1.0349306489771304, "grad_norm": 0.09709003567695618, "learning_rate": 0.002, "loss": 2.3377, "step": 267720 }, { "epoch": 1.0349693061805136, "grad_norm": 0.08974325656890869, "learning_rate": 0.002, "loss": 2.3332, "step": 267730 }, { "epoch": 1.035007963383897, "grad_norm": 0.09742540866136551, "learning_rate": 0.002, "loss": 2.3412, "step": 267740 }, { "epoch": 1.0350466205872801, "grad_norm": 0.1201322078704834, "learning_rate": 0.002, "loss": 2.325, "step": 267750 }, { "epoch": 1.0350852777906636, "grad_norm": 0.11878392845392227, "learning_rate": 0.002, "loss": 2.3432, "step": 267760 }, { "epoch": 1.0351239349940469, "grad_norm": 0.09439965337514877, "learning_rate": 0.002, "loss": 2.3335, "step": 267770 }, { "epoch": 1.0351625921974301, "grad_norm": 0.09864119440317154, "learning_rate": 0.002, "loss": 2.3377, "step": 267780 }, { "epoch": 1.0352012494008134, "grad_norm": 0.10157059133052826, "learning_rate": 0.002, "loss": 2.3457, "step": 267790 }, { "epoch": 1.0352399066041966, "grad_norm": 0.0926649197936058, "learning_rate": 0.002, "loss": 2.3533, "step": 267800 }, { "epoch": 1.0352785638075799, "grad_norm": 0.0927080512046814, "learning_rate": 0.002, "loss": 2.3195, "step": 267810 }, { "epoch": 1.0353172210109631, "grad_norm": 0.10915268212556839, "learning_rate": 0.002, "loss": 2.3291, "step": 267820 }, { "epoch": 1.0353558782143464, "grad_norm": 0.11072403192520142, "learning_rate": 0.002, "loss": 2.341, "step": 267830 }, { "epoch": 1.0353945354177296, "grad_norm": 0.1306406855583191, "learning_rate": 0.002, "loss": 2.3426, "step": 267840 }, { "epoch": 1.035433192621113, "grad_norm": 0.09433692693710327, "learning_rate": 0.002, "loss": 2.3315, "step": 267850 }, { "epoch": 1.0354718498244964, "grad_norm": 0.1059279516339302, "learning_rate": 0.002, "loss": 2.3328, "step": 267860 }, { "epoch": 1.0355105070278796, "grad_norm": 0.10944026708602905, "learning_rate": 0.002, "loss": 2.3452, "step": 267870 }, { "epoch": 1.0355491642312629, "grad_norm": 0.12454365193843842, "learning_rate": 0.002, "loss": 2.331, "step": 267880 }, { "epoch": 1.0355878214346461, "grad_norm": 0.11097417026758194, "learning_rate": 0.002, "loss": 2.3198, "step": 267890 }, { "epoch": 1.0356264786380294, "grad_norm": 0.13012464344501495, "learning_rate": 0.002, "loss": 2.3339, "step": 267900 }, { "epoch": 1.0356651358414126, "grad_norm": 0.10675875097513199, "learning_rate": 0.002, "loss": 2.3433, "step": 267910 }, { "epoch": 1.0357037930447959, "grad_norm": 0.10206145793199539, "learning_rate": 0.002, "loss": 2.3491, "step": 267920 }, { "epoch": 1.0357424502481793, "grad_norm": 0.11607825756072998, "learning_rate": 0.002, "loss": 2.3405, "step": 267930 }, { "epoch": 1.0357811074515626, "grad_norm": 0.11091554164886475, "learning_rate": 0.002, "loss": 2.335, "step": 267940 }, { "epoch": 1.0358197646549459, "grad_norm": 0.09264187514781952, "learning_rate": 0.002, "loss": 2.3391, "step": 267950 }, { "epoch": 1.035858421858329, "grad_norm": 0.09681179374456406, "learning_rate": 0.002, "loss": 2.3382, "step": 267960 }, { "epoch": 1.0358970790617124, "grad_norm": 0.1079772561788559, "learning_rate": 0.002, "loss": 2.3355, "step": 267970 }, { "epoch": 1.0359357362650956, "grad_norm": 0.1027929037809372, "learning_rate": 0.002, "loss": 2.3311, "step": 267980 }, { "epoch": 1.0359743934684789, "grad_norm": 0.12011872231960297, "learning_rate": 0.002, "loss": 2.34, "step": 267990 }, { "epoch": 1.0360130506718621, "grad_norm": 0.1037503257393837, "learning_rate": 0.002, "loss": 2.3278, "step": 268000 }, { "epoch": 1.0360517078752456, "grad_norm": 0.11351514607667923, "learning_rate": 0.002, "loss": 2.3404, "step": 268010 }, { "epoch": 1.0360903650786288, "grad_norm": 0.12285458296537399, "learning_rate": 0.002, "loss": 2.3403, "step": 268020 }, { "epoch": 1.036129022282012, "grad_norm": 0.11562503129243851, "learning_rate": 0.002, "loss": 2.3378, "step": 268030 }, { "epoch": 1.0361676794853953, "grad_norm": 0.10960686206817627, "learning_rate": 0.002, "loss": 2.3391, "step": 268040 }, { "epoch": 1.0362063366887786, "grad_norm": 0.0934433564543724, "learning_rate": 0.002, "loss": 2.3397, "step": 268050 }, { "epoch": 1.0362449938921618, "grad_norm": 0.11285009235143661, "learning_rate": 0.002, "loss": 2.3371, "step": 268060 }, { "epoch": 1.036283651095545, "grad_norm": 0.09301579743623734, "learning_rate": 0.002, "loss": 2.3403, "step": 268070 }, { "epoch": 1.0363223082989284, "grad_norm": 0.09829026460647583, "learning_rate": 0.002, "loss": 2.3328, "step": 268080 }, { "epoch": 1.0363609655023116, "grad_norm": 0.12021401524543762, "learning_rate": 0.002, "loss": 2.3216, "step": 268090 }, { "epoch": 1.036399622705695, "grad_norm": 0.12203650176525116, "learning_rate": 0.002, "loss": 2.3306, "step": 268100 }, { "epoch": 1.0364382799090783, "grad_norm": 0.09933951497077942, "learning_rate": 0.002, "loss": 2.3347, "step": 268110 }, { "epoch": 1.0364769371124616, "grad_norm": 0.1088651716709137, "learning_rate": 0.002, "loss": 2.3251, "step": 268120 }, { "epoch": 1.0365155943158448, "grad_norm": 0.09777121245861053, "learning_rate": 0.002, "loss": 2.3329, "step": 268130 }, { "epoch": 1.036554251519228, "grad_norm": 0.11730097979307175, "learning_rate": 0.002, "loss": 2.3258, "step": 268140 }, { "epoch": 1.0365929087226113, "grad_norm": 0.10772929340600967, "learning_rate": 0.002, "loss": 2.3368, "step": 268150 }, { "epoch": 1.0366315659259946, "grad_norm": 0.10943850874900818, "learning_rate": 0.002, "loss": 2.327, "step": 268160 }, { "epoch": 1.0366702231293778, "grad_norm": 0.10009908676147461, "learning_rate": 0.002, "loss": 2.3359, "step": 268170 }, { "epoch": 1.0367088803327613, "grad_norm": 0.1684296876192093, "learning_rate": 0.002, "loss": 2.3387, "step": 268180 }, { "epoch": 1.0367475375361446, "grad_norm": 0.11017098277807236, "learning_rate": 0.002, "loss": 2.3432, "step": 268190 }, { "epoch": 1.0367861947395278, "grad_norm": 0.14785271883010864, "learning_rate": 0.002, "loss": 2.3376, "step": 268200 }, { "epoch": 1.036824851942911, "grad_norm": 0.12890072166919708, "learning_rate": 0.002, "loss": 2.3512, "step": 268210 }, { "epoch": 1.0368635091462943, "grad_norm": 0.12205611169338226, "learning_rate": 0.002, "loss": 2.342, "step": 268220 }, { "epoch": 1.0369021663496776, "grad_norm": 0.10493602603673935, "learning_rate": 0.002, "loss": 2.3298, "step": 268230 }, { "epoch": 1.0369408235530608, "grad_norm": 0.09823311865329742, "learning_rate": 0.002, "loss": 2.3326, "step": 268240 }, { "epoch": 1.036979480756444, "grad_norm": 0.10882266610860825, "learning_rate": 0.002, "loss": 2.3459, "step": 268250 }, { "epoch": 1.0370181379598273, "grad_norm": 0.12620101869106293, "learning_rate": 0.002, "loss": 2.3487, "step": 268260 }, { "epoch": 1.0370567951632108, "grad_norm": 0.11481660604476929, "learning_rate": 0.002, "loss": 2.3195, "step": 268270 }, { "epoch": 1.037095452366594, "grad_norm": 0.12165917456150055, "learning_rate": 0.002, "loss": 2.3358, "step": 268280 }, { "epoch": 1.0371341095699773, "grad_norm": 0.09268329292535782, "learning_rate": 0.002, "loss": 2.3303, "step": 268290 }, { "epoch": 1.0371727667733606, "grad_norm": 0.11170244961977005, "learning_rate": 0.002, "loss": 2.333, "step": 268300 }, { "epoch": 1.0372114239767438, "grad_norm": 0.09712453186511993, "learning_rate": 0.002, "loss": 2.3439, "step": 268310 }, { "epoch": 1.037250081180127, "grad_norm": 0.11297538876533508, "learning_rate": 0.002, "loss": 2.3386, "step": 268320 }, { "epoch": 1.0372887383835103, "grad_norm": 0.10871253907680511, "learning_rate": 0.002, "loss": 2.3287, "step": 268330 }, { "epoch": 1.0373273955868936, "grad_norm": 0.11911670118570328, "learning_rate": 0.002, "loss": 2.335, "step": 268340 }, { "epoch": 1.037366052790277, "grad_norm": 0.09114481508731842, "learning_rate": 0.002, "loss": 2.3553, "step": 268350 }, { "epoch": 1.0374047099936603, "grad_norm": 0.09637667238712311, "learning_rate": 0.002, "loss": 2.3306, "step": 268360 }, { "epoch": 1.0374433671970436, "grad_norm": 0.10665112733840942, "learning_rate": 0.002, "loss": 2.3363, "step": 268370 }, { "epoch": 1.0374820244004268, "grad_norm": 0.11053548753261566, "learning_rate": 0.002, "loss": 2.3431, "step": 268380 }, { "epoch": 1.03752068160381, "grad_norm": 0.12966634333133698, "learning_rate": 0.002, "loss": 2.339, "step": 268390 }, { "epoch": 1.0375593388071933, "grad_norm": 0.12053807824850082, "learning_rate": 0.002, "loss": 2.3542, "step": 268400 }, { "epoch": 1.0375979960105766, "grad_norm": 0.09861911833286285, "learning_rate": 0.002, "loss": 2.3326, "step": 268410 }, { "epoch": 1.0376366532139598, "grad_norm": 0.14827530086040497, "learning_rate": 0.002, "loss": 2.3457, "step": 268420 }, { "epoch": 1.037675310417343, "grad_norm": 0.10329598933458328, "learning_rate": 0.002, "loss": 2.3253, "step": 268430 }, { "epoch": 1.0377139676207265, "grad_norm": 0.09511538594961166, "learning_rate": 0.002, "loss": 2.3403, "step": 268440 }, { "epoch": 1.0377526248241098, "grad_norm": 0.12171187996864319, "learning_rate": 0.002, "loss": 2.3336, "step": 268450 }, { "epoch": 1.037791282027493, "grad_norm": 0.09742235392332077, "learning_rate": 0.002, "loss": 2.341, "step": 268460 }, { "epoch": 1.0378299392308763, "grad_norm": 0.1101989820599556, "learning_rate": 0.002, "loss": 2.3379, "step": 268470 }, { "epoch": 1.0378685964342595, "grad_norm": 0.11267191171646118, "learning_rate": 0.002, "loss": 2.3165, "step": 268480 }, { "epoch": 1.0379072536376428, "grad_norm": 0.09148108959197998, "learning_rate": 0.002, "loss": 2.3349, "step": 268490 }, { "epoch": 1.037945910841026, "grad_norm": 0.10221163928508759, "learning_rate": 0.002, "loss": 2.3141, "step": 268500 }, { "epoch": 1.0379845680444093, "grad_norm": 0.1174292266368866, "learning_rate": 0.002, "loss": 2.333, "step": 268510 }, { "epoch": 1.0380232252477928, "grad_norm": 0.09777496010065079, "learning_rate": 0.002, "loss": 2.3306, "step": 268520 }, { "epoch": 1.038061882451176, "grad_norm": 0.1312064230442047, "learning_rate": 0.002, "loss": 2.3192, "step": 268530 }, { "epoch": 1.0381005396545593, "grad_norm": 0.11722839623689651, "learning_rate": 0.002, "loss": 2.3302, "step": 268540 }, { "epoch": 1.0381391968579425, "grad_norm": 0.09800203889608383, "learning_rate": 0.002, "loss": 2.3492, "step": 268550 }, { "epoch": 1.0381778540613258, "grad_norm": 0.09853968769311905, "learning_rate": 0.002, "loss": 2.3375, "step": 268560 }, { "epoch": 1.038216511264709, "grad_norm": 0.1263112872838974, "learning_rate": 0.002, "loss": 2.3388, "step": 268570 }, { "epoch": 1.0382551684680923, "grad_norm": 0.1005529835820198, "learning_rate": 0.002, "loss": 2.3484, "step": 268580 }, { "epoch": 1.0382938256714755, "grad_norm": 0.11430198699235916, "learning_rate": 0.002, "loss": 2.3268, "step": 268590 }, { "epoch": 1.0383324828748588, "grad_norm": 0.12453008443117142, "learning_rate": 0.002, "loss": 2.3389, "step": 268600 }, { "epoch": 1.0383711400782423, "grad_norm": 0.09846989065408707, "learning_rate": 0.002, "loss": 2.3527, "step": 268610 }, { "epoch": 1.0384097972816255, "grad_norm": 0.11411017179489136, "learning_rate": 0.002, "loss": 2.3282, "step": 268620 }, { "epoch": 1.0384484544850088, "grad_norm": 0.10042417049407959, "learning_rate": 0.002, "loss": 2.3521, "step": 268630 }, { "epoch": 1.038487111688392, "grad_norm": 0.10524924844503403, "learning_rate": 0.002, "loss": 2.3325, "step": 268640 }, { "epoch": 1.0385257688917753, "grad_norm": 0.11033865064382553, "learning_rate": 0.002, "loss": 2.3283, "step": 268650 }, { "epoch": 1.0385644260951585, "grad_norm": 0.1116894781589508, "learning_rate": 0.002, "loss": 2.3525, "step": 268660 }, { "epoch": 1.0386030832985418, "grad_norm": 0.10501192510128021, "learning_rate": 0.002, "loss": 2.3478, "step": 268670 }, { "epoch": 1.038641740501925, "grad_norm": 0.10203655809164047, "learning_rate": 0.002, "loss": 2.3371, "step": 268680 }, { "epoch": 1.0386803977053085, "grad_norm": 0.09671195596456528, "learning_rate": 0.002, "loss": 2.3514, "step": 268690 }, { "epoch": 1.0387190549086918, "grad_norm": 0.11293695122003555, "learning_rate": 0.002, "loss": 2.3262, "step": 268700 }, { "epoch": 1.038757712112075, "grad_norm": 0.10207619518041611, "learning_rate": 0.002, "loss": 2.3301, "step": 268710 }, { "epoch": 1.0387963693154583, "grad_norm": 0.10498257726430893, "learning_rate": 0.002, "loss": 2.3309, "step": 268720 }, { "epoch": 1.0388350265188415, "grad_norm": 0.08968181908130646, "learning_rate": 0.002, "loss": 2.3386, "step": 268730 }, { "epoch": 1.0388736837222248, "grad_norm": 0.11083420366048813, "learning_rate": 0.002, "loss": 2.3215, "step": 268740 }, { "epoch": 1.038912340925608, "grad_norm": 0.09098499268293381, "learning_rate": 0.002, "loss": 2.3392, "step": 268750 }, { "epoch": 1.0389509981289913, "grad_norm": 0.14875374734401703, "learning_rate": 0.002, "loss": 2.3094, "step": 268760 }, { "epoch": 1.0389896553323745, "grad_norm": 0.10430346429347992, "learning_rate": 0.002, "loss": 2.3259, "step": 268770 }, { "epoch": 1.039028312535758, "grad_norm": 0.09911978244781494, "learning_rate": 0.002, "loss": 2.3413, "step": 268780 }, { "epoch": 1.0390669697391413, "grad_norm": 0.10024331510066986, "learning_rate": 0.002, "loss": 2.3464, "step": 268790 }, { "epoch": 1.0391056269425245, "grad_norm": 0.0970604345202446, "learning_rate": 0.002, "loss": 2.3361, "step": 268800 }, { "epoch": 1.0391442841459078, "grad_norm": 0.09608924388885498, "learning_rate": 0.002, "loss": 2.3313, "step": 268810 }, { "epoch": 1.039182941349291, "grad_norm": 0.1133587583899498, "learning_rate": 0.002, "loss": 2.3475, "step": 268820 }, { "epoch": 1.0392215985526743, "grad_norm": 0.09642942249774933, "learning_rate": 0.002, "loss": 2.3252, "step": 268830 }, { "epoch": 1.0392602557560575, "grad_norm": 0.10437743365764618, "learning_rate": 0.002, "loss": 2.3242, "step": 268840 }, { "epoch": 1.0392989129594408, "grad_norm": 0.1008305475115776, "learning_rate": 0.002, "loss": 2.3342, "step": 268850 }, { "epoch": 1.0393375701628242, "grad_norm": 0.10197924822568893, "learning_rate": 0.002, "loss": 2.3342, "step": 268860 }, { "epoch": 1.0393762273662075, "grad_norm": 0.113011933863163, "learning_rate": 0.002, "loss": 2.3352, "step": 268870 }, { "epoch": 1.0394148845695907, "grad_norm": 0.10384301096200943, "learning_rate": 0.002, "loss": 2.3368, "step": 268880 }, { "epoch": 1.039453541772974, "grad_norm": 0.11199820786714554, "learning_rate": 0.002, "loss": 2.3396, "step": 268890 }, { "epoch": 1.0394921989763573, "grad_norm": 0.09927856922149658, "learning_rate": 0.002, "loss": 2.3434, "step": 268900 }, { "epoch": 1.0395308561797405, "grad_norm": 0.0970437228679657, "learning_rate": 0.002, "loss": 2.3408, "step": 268910 }, { "epoch": 1.0395695133831238, "grad_norm": 0.10486312955617905, "learning_rate": 0.002, "loss": 2.3289, "step": 268920 }, { "epoch": 1.039608170586507, "grad_norm": 0.10456685721874237, "learning_rate": 0.002, "loss": 2.3482, "step": 268930 }, { "epoch": 1.0396468277898903, "grad_norm": 0.10691561549901962, "learning_rate": 0.002, "loss": 2.3415, "step": 268940 }, { "epoch": 1.0396854849932737, "grad_norm": 0.09602787345647812, "learning_rate": 0.002, "loss": 2.353, "step": 268950 }, { "epoch": 1.039724142196657, "grad_norm": 0.10299375653266907, "learning_rate": 0.002, "loss": 2.3362, "step": 268960 }, { "epoch": 1.0397627994000402, "grad_norm": 0.10255037248134613, "learning_rate": 0.002, "loss": 2.3595, "step": 268970 }, { "epoch": 1.0398014566034235, "grad_norm": 0.1187383309006691, "learning_rate": 0.002, "loss": 2.351, "step": 268980 }, { "epoch": 1.0398401138068067, "grad_norm": 0.10880286991596222, "learning_rate": 0.002, "loss": 2.3388, "step": 268990 }, { "epoch": 1.03987877101019, "grad_norm": 0.11786052584648132, "learning_rate": 0.002, "loss": 2.3439, "step": 269000 }, { "epoch": 1.0399174282135732, "grad_norm": 0.11997605115175247, "learning_rate": 0.002, "loss": 2.3302, "step": 269010 }, { "epoch": 1.0399560854169565, "grad_norm": 0.1119430735707283, "learning_rate": 0.002, "loss": 2.3614, "step": 269020 }, { "epoch": 1.03999474262034, "grad_norm": 0.10212235152721405, "learning_rate": 0.002, "loss": 2.3251, "step": 269030 }, { "epoch": 1.0400333998237232, "grad_norm": 0.13267627358436584, "learning_rate": 0.002, "loss": 2.3347, "step": 269040 }, { "epoch": 1.0400720570271065, "grad_norm": 0.0917455404996872, "learning_rate": 0.002, "loss": 2.3281, "step": 269050 }, { "epoch": 1.0401107142304897, "grad_norm": 0.11168273538351059, "learning_rate": 0.002, "loss": 2.3421, "step": 269060 }, { "epoch": 1.040149371433873, "grad_norm": 0.10595767199993134, "learning_rate": 0.002, "loss": 2.3448, "step": 269070 }, { "epoch": 1.0401880286372562, "grad_norm": 0.15403233468532562, "learning_rate": 0.002, "loss": 2.3443, "step": 269080 }, { "epoch": 1.0402266858406395, "grad_norm": 0.10600115358829498, "learning_rate": 0.002, "loss": 2.3332, "step": 269090 }, { "epoch": 1.0402653430440227, "grad_norm": 0.11918964236974716, "learning_rate": 0.002, "loss": 2.3373, "step": 269100 }, { "epoch": 1.040304000247406, "grad_norm": 0.10387977957725525, "learning_rate": 0.002, "loss": 2.3382, "step": 269110 }, { "epoch": 1.0403426574507895, "grad_norm": 0.09495062381029129, "learning_rate": 0.002, "loss": 2.3464, "step": 269120 }, { "epoch": 1.0403813146541727, "grad_norm": 0.12769466638565063, "learning_rate": 0.002, "loss": 2.3466, "step": 269130 }, { "epoch": 1.040419971857556, "grad_norm": 0.08914276212453842, "learning_rate": 0.002, "loss": 2.3275, "step": 269140 }, { "epoch": 1.0404586290609392, "grad_norm": 0.11335669457912445, "learning_rate": 0.002, "loss": 2.3366, "step": 269150 }, { "epoch": 1.0404972862643225, "grad_norm": 0.10183510929346085, "learning_rate": 0.002, "loss": 2.3398, "step": 269160 }, { "epoch": 1.0405359434677057, "grad_norm": 0.12850168347358704, "learning_rate": 0.002, "loss": 2.3494, "step": 269170 }, { "epoch": 1.040574600671089, "grad_norm": 0.12186004221439362, "learning_rate": 0.002, "loss": 2.3312, "step": 269180 }, { "epoch": 1.0406132578744722, "grad_norm": 0.09551847726106644, "learning_rate": 0.002, "loss": 2.3436, "step": 269190 }, { "epoch": 1.0406519150778557, "grad_norm": 0.09610810875892639, "learning_rate": 0.002, "loss": 2.3217, "step": 269200 }, { "epoch": 1.040690572281239, "grad_norm": 0.10870260745286942, "learning_rate": 0.002, "loss": 2.3419, "step": 269210 }, { "epoch": 1.0407292294846222, "grad_norm": 0.08997610211372375, "learning_rate": 0.002, "loss": 2.3365, "step": 269220 }, { "epoch": 1.0407678866880055, "grad_norm": 0.12256580591201782, "learning_rate": 0.002, "loss": 2.327, "step": 269230 }, { "epoch": 1.0408065438913887, "grad_norm": 0.09302297234535217, "learning_rate": 0.002, "loss": 2.3369, "step": 269240 }, { "epoch": 1.040845201094772, "grad_norm": 0.12043951451778412, "learning_rate": 0.002, "loss": 2.3623, "step": 269250 }, { "epoch": 1.0408838582981552, "grad_norm": 0.10426823794841766, "learning_rate": 0.002, "loss": 2.3401, "step": 269260 }, { "epoch": 1.0409225155015385, "grad_norm": 0.13067109882831573, "learning_rate": 0.002, "loss": 2.3222, "step": 269270 }, { "epoch": 1.0409611727049217, "grad_norm": 0.08865130692720413, "learning_rate": 0.002, "loss": 2.3226, "step": 269280 }, { "epoch": 1.0409998299083052, "grad_norm": 0.16649721562862396, "learning_rate": 0.002, "loss": 2.3412, "step": 269290 }, { "epoch": 1.0410384871116884, "grad_norm": 0.09243342280387878, "learning_rate": 0.002, "loss": 2.3436, "step": 269300 }, { "epoch": 1.0410771443150717, "grad_norm": 0.14059138298034668, "learning_rate": 0.002, "loss": 2.3455, "step": 269310 }, { "epoch": 1.041115801518455, "grad_norm": 0.11420193314552307, "learning_rate": 0.002, "loss": 2.3354, "step": 269320 }, { "epoch": 1.0411544587218382, "grad_norm": 0.11908956617116928, "learning_rate": 0.002, "loss": 2.3141, "step": 269330 }, { "epoch": 1.0411931159252215, "grad_norm": 0.11710888892412186, "learning_rate": 0.002, "loss": 2.3332, "step": 269340 }, { "epoch": 1.0412317731286047, "grad_norm": 0.10175123810768127, "learning_rate": 0.002, "loss": 2.3296, "step": 269350 }, { "epoch": 1.041270430331988, "grad_norm": 0.17233812808990479, "learning_rate": 0.002, "loss": 2.3454, "step": 269360 }, { "epoch": 1.0413090875353714, "grad_norm": 0.09387019276618958, "learning_rate": 0.002, "loss": 2.3389, "step": 269370 }, { "epoch": 1.0413477447387547, "grad_norm": 0.10078072547912598, "learning_rate": 0.002, "loss": 2.3183, "step": 269380 }, { "epoch": 1.041386401942138, "grad_norm": 0.09858350455760956, "learning_rate": 0.002, "loss": 2.3307, "step": 269390 }, { "epoch": 1.0414250591455212, "grad_norm": 0.09058808535337448, "learning_rate": 0.002, "loss": 2.3273, "step": 269400 }, { "epoch": 1.0414637163489044, "grad_norm": 0.10851434618234634, "learning_rate": 0.002, "loss": 2.3257, "step": 269410 }, { "epoch": 1.0415023735522877, "grad_norm": 0.13273876905441284, "learning_rate": 0.002, "loss": 2.3297, "step": 269420 }, { "epoch": 1.041541030755671, "grad_norm": 0.0915561392903328, "learning_rate": 0.002, "loss": 2.3172, "step": 269430 }, { "epoch": 1.0415796879590542, "grad_norm": 0.09703870862722397, "learning_rate": 0.002, "loss": 2.3372, "step": 269440 }, { "epoch": 1.0416183451624377, "grad_norm": 0.10767421126365662, "learning_rate": 0.002, "loss": 2.3416, "step": 269450 }, { "epoch": 1.041657002365821, "grad_norm": 0.09860119223594666, "learning_rate": 0.002, "loss": 2.324, "step": 269460 }, { "epoch": 1.0416956595692042, "grad_norm": 0.10003165155649185, "learning_rate": 0.002, "loss": 2.3235, "step": 269470 }, { "epoch": 1.0417343167725874, "grad_norm": 0.152597114443779, "learning_rate": 0.002, "loss": 2.3492, "step": 269480 }, { "epoch": 1.0417729739759707, "grad_norm": 0.10329585522413254, "learning_rate": 0.002, "loss": 2.3165, "step": 269490 }, { "epoch": 1.041811631179354, "grad_norm": 0.1033838540315628, "learning_rate": 0.002, "loss": 2.3351, "step": 269500 }, { "epoch": 1.0418502883827372, "grad_norm": 0.1202467754483223, "learning_rate": 0.002, "loss": 2.3399, "step": 269510 }, { "epoch": 1.0418889455861204, "grad_norm": 0.0996757373213768, "learning_rate": 0.002, "loss": 2.3374, "step": 269520 }, { "epoch": 1.0419276027895037, "grad_norm": 0.09567472338676453, "learning_rate": 0.002, "loss": 2.3352, "step": 269530 }, { "epoch": 1.0419662599928872, "grad_norm": 0.1093759760260582, "learning_rate": 0.002, "loss": 2.3426, "step": 269540 }, { "epoch": 1.0420049171962704, "grad_norm": 0.101670041680336, "learning_rate": 0.002, "loss": 2.3362, "step": 269550 }, { "epoch": 1.0420435743996537, "grad_norm": 0.10272194445133209, "learning_rate": 0.002, "loss": 2.3187, "step": 269560 }, { "epoch": 1.042082231603037, "grad_norm": 0.11370494216680527, "learning_rate": 0.002, "loss": 2.3354, "step": 269570 }, { "epoch": 1.0421208888064202, "grad_norm": 0.11007405072450638, "learning_rate": 0.002, "loss": 2.335, "step": 269580 }, { "epoch": 1.0421595460098034, "grad_norm": 0.11395785212516785, "learning_rate": 0.002, "loss": 2.3498, "step": 269590 }, { "epoch": 1.0421982032131867, "grad_norm": 0.09112124145030975, "learning_rate": 0.002, "loss": 2.3614, "step": 269600 }, { "epoch": 1.04223686041657, "grad_norm": 0.09444142878055573, "learning_rate": 0.002, "loss": 2.338, "step": 269610 }, { "epoch": 1.0422755176199534, "grad_norm": 0.11114094406366348, "learning_rate": 0.002, "loss": 2.3453, "step": 269620 }, { "epoch": 1.0423141748233367, "grad_norm": 0.10980421304702759, "learning_rate": 0.002, "loss": 2.3261, "step": 269630 }, { "epoch": 1.04235283202672, "grad_norm": 0.1392664611339569, "learning_rate": 0.002, "loss": 2.3401, "step": 269640 }, { "epoch": 1.0423914892301032, "grad_norm": 0.1121130958199501, "learning_rate": 0.002, "loss": 2.3372, "step": 269650 }, { "epoch": 1.0424301464334864, "grad_norm": 0.10922688990831375, "learning_rate": 0.002, "loss": 2.3307, "step": 269660 }, { "epoch": 1.0424688036368697, "grad_norm": 0.10173194855451584, "learning_rate": 0.002, "loss": 2.3383, "step": 269670 }, { "epoch": 1.042507460840253, "grad_norm": 0.10193554311990738, "learning_rate": 0.002, "loss": 2.3498, "step": 269680 }, { "epoch": 1.0425461180436362, "grad_norm": 0.12612669169902802, "learning_rate": 0.002, "loss": 2.3323, "step": 269690 }, { "epoch": 1.0425847752470194, "grad_norm": 0.0927710235118866, "learning_rate": 0.002, "loss": 2.3347, "step": 269700 }, { "epoch": 1.042623432450403, "grad_norm": 0.11005710810422897, "learning_rate": 0.002, "loss": 2.3579, "step": 269710 }, { "epoch": 1.0426620896537862, "grad_norm": 0.1214102953672409, "learning_rate": 0.002, "loss": 2.333, "step": 269720 }, { "epoch": 1.0427007468571694, "grad_norm": 0.11738359928131104, "learning_rate": 0.002, "loss": 2.3364, "step": 269730 }, { "epoch": 1.0427394040605527, "grad_norm": 0.11865021288394928, "learning_rate": 0.002, "loss": 2.3335, "step": 269740 }, { "epoch": 1.042778061263936, "grad_norm": 0.0988292321562767, "learning_rate": 0.002, "loss": 2.3334, "step": 269750 }, { "epoch": 1.0428167184673192, "grad_norm": 0.09761074930429459, "learning_rate": 0.002, "loss": 2.3368, "step": 269760 }, { "epoch": 1.0428553756707024, "grad_norm": 0.12573882937431335, "learning_rate": 0.002, "loss": 2.3271, "step": 269770 }, { "epoch": 1.0428940328740857, "grad_norm": 0.11221905052661896, "learning_rate": 0.002, "loss": 2.3407, "step": 269780 }, { "epoch": 1.0429326900774691, "grad_norm": 0.12006543576717377, "learning_rate": 0.002, "loss": 2.3407, "step": 269790 }, { "epoch": 1.0429713472808524, "grad_norm": 0.09778149425983429, "learning_rate": 0.002, "loss": 2.3394, "step": 269800 }, { "epoch": 1.0430100044842356, "grad_norm": 0.08791869878768921, "learning_rate": 0.002, "loss": 2.3383, "step": 269810 }, { "epoch": 1.043048661687619, "grad_norm": 0.11982083320617676, "learning_rate": 0.002, "loss": 2.3427, "step": 269820 }, { "epoch": 1.0430873188910021, "grad_norm": 0.10641071200370789, "learning_rate": 0.002, "loss": 2.3419, "step": 269830 }, { "epoch": 1.0431259760943854, "grad_norm": 0.10933824628591537, "learning_rate": 0.002, "loss": 2.3378, "step": 269840 }, { "epoch": 1.0431646332977687, "grad_norm": 0.09866813570261002, "learning_rate": 0.002, "loss": 2.3237, "step": 269850 }, { "epoch": 1.043203290501152, "grad_norm": 0.09645739197731018, "learning_rate": 0.002, "loss": 2.3433, "step": 269860 }, { "epoch": 1.0432419477045354, "grad_norm": 0.10199181735515594, "learning_rate": 0.002, "loss": 2.3433, "step": 269870 }, { "epoch": 1.0432806049079186, "grad_norm": 0.10536587238311768, "learning_rate": 0.002, "loss": 2.3364, "step": 269880 }, { "epoch": 1.0433192621113019, "grad_norm": 0.11171141266822815, "learning_rate": 0.002, "loss": 2.341, "step": 269890 }, { "epoch": 1.0433579193146851, "grad_norm": 0.09901822358369827, "learning_rate": 0.002, "loss": 2.3343, "step": 269900 }, { "epoch": 1.0433965765180684, "grad_norm": 0.09818220883607864, "learning_rate": 0.002, "loss": 2.3514, "step": 269910 }, { "epoch": 1.0434352337214516, "grad_norm": 0.10862912237644196, "learning_rate": 0.002, "loss": 2.3335, "step": 269920 }, { "epoch": 1.043473890924835, "grad_norm": 0.10346197336912155, "learning_rate": 0.002, "loss": 2.3258, "step": 269930 }, { "epoch": 1.0435125481282181, "grad_norm": 0.12310784310102463, "learning_rate": 0.002, "loss": 2.3162, "step": 269940 }, { "epoch": 1.0435512053316014, "grad_norm": 0.11298996955156326, "learning_rate": 0.002, "loss": 2.3596, "step": 269950 }, { "epoch": 1.0435898625349849, "grad_norm": 0.11274628341197968, "learning_rate": 0.002, "loss": 2.3398, "step": 269960 }, { "epoch": 1.0436285197383681, "grad_norm": 0.13122490048408508, "learning_rate": 0.002, "loss": 2.3342, "step": 269970 }, { "epoch": 1.0436671769417514, "grad_norm": 0.10723333805799484, "learning_rate": 0.002, "loss": 2.3203, "step": 269980 }, { "epoch": 1.0437058341451346, "grad_norm": 0.09092576801776886, "learning_rate": 0.002, "loss": 2.3546, "step": 269990 }, { "epoch": 1.0437444913485179, "grad_norm": 0.11523299664258957, "learning_rate": 0.002, "loss": 2.3305, "step": 270000 }, { "epoch": 1.0437831485519011, "grad_norm": 0.10284633189439774, "learning_rate": 0.002, "loss": 2.3226, "step": 270010 }, { "epoch": 1.0438218057552844, "grad_norm": 0.09350597113370895, "learning_rate": 0.002, "loss": 2.3332, "step": 270020 }, { "epoch": 1.0438604629586676, "grad_norm": 0.10208304971456528, "learning_rate": 0.002, "loss": 2.3278, "step": 270030 }, { "epoch": 1.043899120162051, "grad_norm": 0.1192924901843071, "learning_rate": 0.002, "loss": 2.3137, "step": 270040 }, { "epoch": 1.0439377773654344, "grad_norm": 0.10541343688964844, "learning_rate": 0.002, "loss": 2.3391, "step": 270050 }, { "epoch": 1.0439764345688176, "grad_norm": 0.10207168012857437, "learning_rate": 0.002, "loss": 2.3433, "step": 270060 }, { "epoch": 1.0440150917722009, "grad_norm": 0.09780795872211456, "learning_rate": 0.002, "loss": 2.3537, "step": 270070 }, { "epoch": 1.0440537489755841, "grad_norm": 0.10557345300912857, "learning_rate": 0.002, "loss": 2.3481, "step": 270080 }, { "epoch": 1.0440924061789674, "grad_norm": 0.11492825299501419, "learning_rate": 0.002, "loss": 2.3438, "step": 270090 }, { "epoch": 1.0441310633823506, "grad_norm": 0.1282857358455658, "learning_rate": 0.002, "loss": 2.3402, "step": 270100 }, { "epoch": 1.0441697205857339, "grad_norm": 0.09961901605129242, "learning_rate": 0.002, "loss": 2.3343, "step": 270110 }, { "epoch": 1.0442083777891171, "grad_norm": 0.09308364242315292, "learning_rate": 0.002, "loss": 2.3508, "step": 270120 }, { "epoch": 1.0442470349925006, "grad_norm": 0.08572640269994736, "learning_rate": 0.002, "loss": 2.3363, "step": 270130 }, { "epoch": 1.0442856921958839, "grad_norm": 0.08826728910207748, "learning_rate": 0.002, "loss": 2.3338, "step": 270140 }, { "epoch": 1.044324349399267, "grad_norm": 0.12766070663928986, "learning_rate": 0.002, "loss": 2.3426, "step": 270150 }, { "epoch": 1.0443630066026504, "grad_norm": 0.1239904910326004, "learning_rate": 0.002, "loss": 2.3395, "step": 270160 }, { "epoch": 1.0444016638060336, "grad_norm": 0.10418543219566345, "learning_rate": 0.002, "loss": 2.3463, "step": 270170 }, { "epoch": 1.0444403210094169, "grad_norm": 0.09708482027053833, "learning_rate": 0.002, "loss": 2.348, "step": 270180 }, { "epoch": 1.0444789782128001, "grad_norm": 0.10282531380653381, "learning_rate": 0.002, "loss": 2.3249, "step": 270190 }, { "epoch": 1.0445176354161834, "grad_norm": 0.1008395329117775, "learning_rate": 0.002, "loss": 2.322, "step": 270200 }, { "epoch": 1.0445562926195668, "grad_norm": 0.117550790309906, "learning_rate": 0.002, "loss": 2.3302, "step": 270210 }, { "epoch": 1.04459494982295, "grad_norm": 0.11662206053733826, "learning_rate": 0.002, "loss": 2.3374, "step": 270220 }, { "epoch": 1.0446336070263333, "grad_norm": 0.09839627146720886, "learning_rate": 0.002, "loss": 2.3416, "step": 270230 }, { "epoch": 1.0446722642297166, "grad_norm": 0.10696960985660553, "learning_rate": 0.002, "loss": 2.3508, "step": 270240 }, { "epoch": 1.0447109214330998, "grad_norm": 0.10411490499973297, "learning_rate": 0.002, "loss": 2.3453, "step": 270250 }, { "epoch": 1.044749578636483, "grad_norm": 0.10055858641862869, "learning_rate": 0.002, "loss": 2.3436, "step": 270260 }, { "epoch": 1.0447882358398664, "grad_norm": 0.10897232592105865, "learning_rate": 0.002, "loss": 2.3288, "step": 270270 }, { "epoch": 1.0448268930432496, "grad_norm": 0.12357335537672043, "learning_rate": 0.002, "loss": 2.3311, "step": 270280 }, { "epoch": 1.0448655502466329, "grad_norm": 0.10149303078651428, "learning_rate": 0.002, "loss": 2.3427, "step": 270290 }, { "epoch": 1.0449042074500163, "grad_norm": 0.09007223695516586, "learning_rate": 0.002, "loss": 2.343, "step": 270300 }, { "epoch": 1.0449428646533996, "grad_norm": 0.09397382289171219, "learning_rate": 0.002, "loss": 2.33, "step": 270310 }, { "epoch": 1.0449815218567828, "grad_norm": 0.11514809727668762, "learning_rate": 0.002, "loss": 2.3379, "step": 270320 }, { "epoch": 1.045020179060166, "grad_norm": 0.10506939142942429, "learning_rate": 0.002, "loss": 2.3211, "step": 270330 }, { "epoch": 1.0450588362635493, "grad_norm": 0.10700733959674835, "learning_rate": 0.002, "loss": 2.3298, "step": 270340 }, { "epoch": 1.0450974934669326, "grad_norm": 0.10480210930109024, "learning_rate": 0.002, "loss": 2.3442, "step": 270350 }, { "epoch": 1.0451361506703158, "grad_norm": 0.11787199229001999, "learning_rate": 0.002, "loss": 2.3414, "step": 270360 }, { "epoch": 1.045174807873699, "grad_norm": 0.09324786067008972, "learning_rate": 0.002, "loss": 2.3278, "step": 270370 }, { "epoch": 1.0452134650770826, "grad_norm": 0.10228384286165237, "learning_rate": 0.002, "loss": 2.3372, "step": 270380 }, { "epoch": 1.0452521222804658, "grad_norm": 0.11133359372615814, "learning_rate": 0.002, "loss": 2.3377, "step": 270390 }, { "epoch": 1.045290779483849, "grad_norm": 0.10327267646789551, "learning_rate": 0.002, "loss": 2.3471, "step": 270400 }, { "epoch": 1.0453294366872323, "grad_norm": 0.09917426854372025, "learning_rate": 0.002, "loss": 2.346, "step": 270410 }, { "epoch": 1.0453680938906156, "grad_norm": 0.114399753510952, "learning_rate": 0.002, "loss": 2.329, "step": 270420 }, { "epoch": 1.0454067510939988, "grad_norm": 0.10123814642429352, "learning_rate": 0.002, "loss": 2.3504, "step": 270430 }, { "epoch": 1.045445408297382, "grad_norm": 0.08698641508817673, "learning_rate": 0.002, "loss": 2.3255, "step": 270440 }, { "epoch": 1.0454840655007653, "grad_norm": 0.11683791130781174, "learning_rate": 0.002, "loss": 2.3531, "step": 270450 }, { "epoch": 1.0455227227041486, "grad_norm": 0.13808204233646393, "learning_rate": 0.002, "loss": 2.3443, "step": 270460 }, { "epoch": 1.045561379907532, "grad_norm": 0.11392877250909805, "learning_rate": 0.002, "loss": 2.3638, "step": 270470 }, { "epoch": 1.0456000371109153, "grad_norm": 0.09933704882860184, "learning_rate": 0.002, "loss": 2.3413, "step": 270480 }, { "epoch": 1.0456386943142986, "grad_norm": 0.10919066518545151, "learning_rate": 0.002, "loss": 2.3322, "step": 270490 }, { "epoch": 1.0456773515176818, "grad_norm": 0.10833816230297089, "learning_rate": 0.002, "loss": 2.316, "step": 270500 }, { "epoch": 1.045716008721065, "grad_norm": 0.1263931691646576, "learning_rate": 0.002, "loss": 2.3376, "step": 270510 }, { "epoch": 1.0457546659244483, "grad_norm": 0.11145421117544174, "learning_rate": 0.002, "loss": 2.3409, "step": 270520 }, { "epoch": 1.0457933231278316, "grad_norm": 0.10142406821250916, "learning_rate": 0.002, "loss": 2.3334, "step": 270530 }, { "epoch": 1.0458319803312148, "grad_norm": 0.11987996101379395, "learning_rate": 0.002, "loss": 2.3321, "step": 270540 }, { "epoch": 1.0458706375345983, "grad_norm": 0.10346390306949615, "learning_rate": 0.002, "loss": 2.3342, "step": 270550 }, { "epoch": 1.0459092947379816, "grad_norm": 0.11690594255924225, "learning_rate": 0.002, "loss": 2.3365, "step": 270560 }, { "epoch": 1.0459479519413648, "grad_norm": 0.10042134672403336, "learning_rate": 0.002, "loss": 2.3294, "step": 270570 }, { "epoch": 1.045986609144748, "grad_norm": 0.10842636227607727, "learning_rate": 0.002, "loss": 2.34, "step": 270580 }, { "epoch": 1.0460252663481313, "grad_norm": 0.12476224452257156, "learning_rate": 0.002, "loss": 2.3474, "step": 270590 }, { "epoch": 1.0460639235515146, "grad_norm": 0.11158926784992218, "learning_rate": 0.002, "loss": 2.342, "step": 270600 }, { "epoch": 1.0461025807548978, "grad_norm": 0.09732253104448318, "learning_rate": 0.002, "loss": 2.3366, "step": 270610 }, { "epoch": 1.046141237958281, "grad_norm": 0.10630382597446442, "learning_rate": 0.002, "loss": 2.3402, "step": 270620 }, { "epoch": 1.0461798951616643, "grad_norm": 0.10902847349643707, "learning_rate": 0.002, "loss": 2.3482, "step": 270630 }, { "epoch": 1.0462185523650478, "grad_norm": 0.09461641311645508, "learning_rate": 0.002, "loss": 2.3405, "step": 270640 }, { "epoch": 1.046257209568431, "grad_norm": 0.10040676593780518, "learning_rate": 0.002, "loss": 2.3401, "step": 270650 }, { "epoch": 1.0462958667718143, "grad_norm": 0.1279398798942566, "learning_rate": 0.002, "loss": 2.3377, "step": 270660 }, { "epoch": 1.0463345239751976, "grad_norm": 0.08386406302452087, "learning_rate": 0.002, "loss": 2.333, "step": 270670 }, { "epoch": 1.0463731811785808, "grad_norm": 0.09664589166641235, "learning_rate": 0.002, "loss": 2.3387, "step": 270680 }, { "epoch": 1.046411838381964, "grad_norm": 0.09919747710227966, "learning_rate": 0.002, "loss": 2.3218, "step": 270690 }, { "epoch": 1.0464504955853473, "grad_norm": 0.11124635487794876, "learning_rate": 0.002, "loss": 2.3451, "step": 270700 }, { "epoch": 1.0464891527887306, "grad_norm": 0.09929902851581573, "learning_rate": 0.002, "loss": 2.3289, "step": 270710 }, { "epoch": 1.046527809992114, "grad_norm": 0.11733206361532211, "learning_rate": 0.002, "loss": 2.3386, "step": 270720 }, { "epoch": 1.0465664671954973, "grad_norm": 0.09919170290231705, "learning_rate": 0.002, "loss": 2.3281, "step": 270730 }, { "epoch": 1.0466051243988805, "grad_norm": 0.09976255893707275, "learning_rate": 0.002, "loss": 2.3504, "step": 270740 }, { "epoch": 1.0466437816022638, "grad_norm": 0.09964817017316818, "learning_rate": 0.002, "loss": 2.3262, "step": 270750 }, { "epoch": 1.046682438805647, "grad_norm": 0.08482794463634491, "learning_rate": 0.002, "loss": 2.3305, "step": 270760 }, { "epoch": 1.0467210960090303, "grad_norm": 0.12965020537376404, "learning_rate": 0.002, "loss": 2.3345, "step": 270770 }, { "epoch": 1.0467597532124135, "grad_norm": 0.09799892455339432, "learning_rate": 0.002, "loss": 2.3433, "step": 270780 }, { "epoch": 1.0467984104157968, "grad_norm": 0.12030211836099625, "learning_rate": 0.002, "loss": 2.3169, "step": 270790 }, { "epoch": 1.04683706761918, "grad_norm": 0.13467024266719818, "learning_rate": 0.002, "loss": 2.3445, "step": 270800 }, { "epoch": 1.0468757248225635, "grad_norm": 0.08914663642644882, "learning_rate": 0.002, "loss": 2.3179, "step": 270810 }, { "epoch": 1.0469143820259468, "grad_norm": 0.09452791512012482, "learning_rate": 0.002, "loss": 2.3394, "step": 270820 }, { "epoch": 1.04695303922933, "grad_norm": 0.11398884654045105, "learning_rate": 0.002, "loss": 2.3231, "step": 270830 }, { "epoch": 1.0469916964327133, "grad_norm": 0.9981908798217773, "learning_rate": 0.002, "loss": 2.3318, "step": 270840 }, { "epoch": 1.0470303536360965, "grad_norm": 0.11038073152303696, "learning_rate": 0.002, "loss": 2.328, "step": 270850 }, { "epoch": 1.0470690108394798, "grad_norm": 0.10655945539474487, "learning_rate": 0.002, "loss": 2.3499, "step": 270860 }, { "epoch": 1.047107668042863, "grad_norm": 0.1006389707326889, "learning_rate": 0.002, "loss": 2.3255, "step": 270870 }, { "epoch": 1.0471463252462463, "grad_norm": 0.095772385597229, "learning_rate": 0.002, "loss": 2.3487, "step": 270880 }, { "epoch": 1.0471849824496298, "grad_norm": 0.1254042536020279, "learning_rate": 0.002, "loss": 2.3411, "step": 270890 }, { "epoch": 1.047223639653013, "grad_norm": 0.09856385737657547, "learning_rate": 0.002, "loss": 2.3422, "step": 270900 }, { "epoch": 1.0472622968563963, "grad_norm": 0.1072583720088005, "learning_rate": 0.002, "loss": 2.3362, "step": 270910 }, { "epoch": 1.0473009540597795, "grad_norm": 0.09270235896110535, "learning_rate": 0.002, "loss": 2.3532, "step": 270920 }, { "epoch": 1.0473396112631628, "grad_norm": 0.12417781352996826, "learning_rate": 0.002, "loss": 2.3343, "step": 270930 }, { "epoch": 1.047378268466546, "grad_norm": 0.11933035403490067, "learning_rate": 0.002, "loss": 2.3421, "step": 270940 }, { "epoch": 1.0474169256699293, "grad_norm": 0.09769248962402344, "learning_rate": 0.002, "loss": 2.3373, "step": 270950 }, { "epoch": 1.0474555828733125, "grad_norm": 0.10664346814155579, "learning_rate": 0.002, "loss": 2.3265, "step": 270960 }, { "epoch": 1.0474942400766958, "grad_norm": 0.12095224112272263, "learning_rate": 0.002, "loss": 2.3271, "step": 270970 }, { "epoch": 1.0475328972800793, "grad_norm": 0.10238323360681534, "learning_rate": 0.002, "loss": 2.3373, "step": 270980 }, { "epoch": 1.0475715544834625, "grad_norm": 0.11612109839916229, "learning_rate": 0.002, "loss": 2.3301, "step": 270990 }, { "epoch": 1.0476102116868458, "grad_norm": 0.10432593524456024, "learning_rate": 0.002, "loss": 2.3427, "step": 271000 }, { "epoch": 1.047648868890229, "grad_norm": 0.09392142295837402, "learning_rate": 0.002, "loss": 2.3576, "step": 271010 }, { "epoch": 1.0476875260936123, "grad_norm": 0.08814319223165512, "learning_rate": 0.002, "loss": 2.3368, "step": 271020 }, { "epoch": 1.0477261832969955, "grad_norm": 0.1037273108959198, "learning_rate": 0.002, "loss": 2.3464, "step": 271030 }, { "epoch": 1.0477648405003788, "grad_norm": 0.10812348872423172, "learning_rate": 0.002, "loss": 2.3336, "step": 271040 }, { "epoch": 1.047803497703762, "grad_norm": 0.1279609352350235, "learning_rate": 0.002, "loss": 2.3466, "step": 271050 }, { "epoch": 1.0478421549071455, "grad_norm": 0.0913081243634224, "learning_rate": 0.002, "loss": 2.3385, "step": 271060 }, { "epoch": 1.0478808121105287, "grad_norm": 0.11548645049333572, "learning_rate": 0.002, "loss": 2.3405, "step": 271070 }, { "epoch": 1.047919469313912, "grad_norm": 0.13509738445281982, "learning_rate": 0.002, "loss": 2.3319, "step": 271080 }, { "epoch": 1.0479581265172953, "grad_norm": 0.11165101826190948, "learning_rate": 0.002, "loss": 2.3478, "step": 271090 }, { "epoch": 1.0479967837206785, "grad_norm": 0.10891595482826233, "learning_rate": 0.002, "loss": 2.3447, "step": 271100 }, { "epoch": 1.0480354409240618, "grad_norm": 0.09405533224344254, "learning_rate": 0.002, "loss": 2.3351, "step": 271110 }, { "epoch": 1.048074098127445, "grad_norm": 0.09612392634153366, "learning_rate": 0.002, "loss": 2.3416, "step": 271120 }, { "epoch": 1.0481127553308283, "grad_norm": 0.0851694643497467, "learning_rate": 0.002, "loss": 2.339, "step": 271130 }, { "epoch": 1.0481514125342115, "grad_norm": 0.1357465535402298, "learning_rate": 0.002, "loss": 2.3524, "step": 271140 }, { "epoch": 1.048190069737595, "grad_norm": 0.10865718126296997, "learning_rate": 0.002, "loss": 2.3611, "step": 271150 }, { "epoch": 1.0482287269409782, "grad_norm": 0.09514014422893524, "learning_rate": 0.002, "loss": 2.3339, "step": 271160 }, { "epoch": 1.0482673841443615, "grad_norm": 0.09729630500078201, "learning_rate": 0.002, "loss": 2.3509, "step": 271170 }, { "epoch": 1.0483060413477447, "grad_norm": 0.10377205163240433, "learning_rate": 0.002, "loss": 2.32, "step": 271180 }, { "epoch": 1.048344698551128, "grad_norm": 0.11447707563638687, "learning_rate": 0.002, "loss": 2.3344, "step": 271190 }, { "epoch": 1.0483833557545112, "grad_norm": 0.09187958389520645, "learning_rate": 0.002, "loss": 2.3303, "step": 271200 }, { "epoch": 1.0484220129578945, "grad_norm": 0.11575819551944733, "learning_rate": 0.002, "loss": 2.3407, "step": 271210 }, { "epoch": 1.0484606701612778, "grad_norm": 0.14068150520324707, "learning_rate": 0.002, "loss": 2.328, "step": 271220 }, { "epoch": 1.0484993273646612, "grad_norm": 0.10853346437215805, "learning_rate": 0.002, "loss": 2.3356, "step": 271230 }, { "epoch": 1.0485379845680445, "grad_norm": 0.09836877137422562, "learning_rate": 0.002, "loss": 2.3231, "step": 271240 }, { "epoch": 1.0485766417714277, "grad_norm": 0.11626017093658447, "learning_rate": 0.002, "loss": 2.3579, "step": 271250 }, { "epoch": 1.048615298974811, "grad_norm": 0.09962525218725204, "learning_rate": 0.002, "loss": 2.3447, "step": 271260 }, { "epoch": 1.0486539561781942, "grad_norm": 0.10745836049318314, "learning_rate": 0.002, "loss": 2.3261, "step": 271270 }, { "epoch": 1.0486926133815775, "grad_norm": 0.12288927286863327, "learning_rate": 0.002, "loss": 2.3299, "step": 271280 }, { "epoch": 1.0487312705849607, "grad_norm": 0.10615988075733185, "learning_rate": 0.002, "loss": 2.3361, "step": 271290 }, { "epoch": 1.048769927788344, "grad_norm": 0.11295727640390396, "learning_rate": 0.002, "loss": 2.3345, "step": 271300 }, { "epoch": 1.0488085849917272, "grad_norm": 0.09330426156520844, "learning_rate": 0.002, "loss": 2.3282, "step": 271310 }, { "epoch": 1.0488472421951107, "grad_norm": 0.1008860170841217, "learning_rate": 0.002, "loss": 2.3415, "step": 271320 }, { "epoch": 1.048885899398494, "grad_norm": 0.12525293231010437, "learning_rate": 0.002, "loss": 2.3307, "step": 271330 }, { "epoch": 1.0489245566018772, "grad_norm": 0.09700564295053482, "learning_rate": 0.002, "loss": 2.3388, "step": 271340 }, { "epoch": 1.0489632138052605, "grad_norm": 0.10422427207231522, "learning_rate": 0.002, "loss": 2.3259, "step": 271350 }, { "epoch": 1.0490018710086437, "grad_norm": 0.10832743346691132, "learning_rate": 0.002, "loss": 2.3473, "step": 271360 }, { "epoch": 1.049040528212027, "grad_norm": 0.1054845005273819, "learning_rate": 0.002, "loss": 2.3319, "step": 271370 }, { "epoch": 1.0490791854154102, "grad_norm": 0.09151072055101395, "learning_rate": 0.002, "loss": 2.3416, "step": 271380 }, { "epoch": 1.0491178426187935, "grad_norm": 0.10409058630466461, "learning_rate": 0.002, "loss": 2.3314, "step": 271390 }, { "epoch": 1.049156499822177, "grad_norm": 0.09544691443443298, "learning_rate": 0.002, "loss": 2.3455, "step": 271400 }, { "epoch": 1.0491951570255602, "grad_norm": 0.09302859753370285, "learning_rate": 0.002, "loss": 2.3373, "step": 271410 }, { "epoch": 1.0492338142289435, "grad_norm": 0.10048230737447739, "learning_rate": 0.002, "loss": 2.3382, "step": 271420 }, { "epoch": 1.0492724714323267, "grad_norm": 0.10445299744606018, "learning_rate": 0.002, "loss": 2.3331, "step": 271430 }, { "epoch": 1.04931112863571, "grad_norm": 0.12577003240585327, "learning_rate": 0.002, "loss": 2.3389, "step": 271440 }, { "epoch": 1.0493497858390932, "grad_norm": 0.11436501145362854, "learning_rate": 0.002, "loss": 2.3519, "step": 271450 }, { "epoch": 1.0493884430424765, "grad_norm": 0.1273229718208313, "learning_rate": 0.002, "loss": 2.3464, "step": 271460 }, { "epoch": 1.0494271002458597, "grad_norm": 0.11267320066690445, "learning_rate": 0.002, "loss": 2.3175, "step": 271470 }, { "epoch": 1.0494657574492432, "grad_norm": 0.09845497459173203, "learning_rate": 0.002, "loss": 2.3189, "step": 271480 }, { "epoch": 1.0495044146526264, "grad_norm": 0.10332329571247101, "learning_rate": 0.002, "loss": 2.3379, "step": 271490 }, { "epoch": 1.0495430718560097, "grad_norm": 0.09913578629493713, "learning_rate": 0.002, "loss": 2.3398, "step": 271500 }, { "epoch": 1.049581729059393, "grad_norm": 0.11222704499959946, "learning_rate": 0.002, "loss": 2.3507, "step": 271510 }, { "epoch": 1.0496203862627762, "grad_norm": 0.13589952886104584, "learning_rate": 0.002, "loss": 2.3358, "step": 271520 }, { "epoch": 1.0496590434661595, "grad_norm": 0.09837301075458527, "learning_rate": 0.002, "loss": 2.3443, "step": 271530 }, { "epoch": 1.0496977006695427, "grad_norm": 0.10624351352453232, "learning_rate": 0.002, "loss": 2.3306, "step": 271540 }, { "epoch": 1.049736357872926, "grad_norm": 0.11693647503852844, "learning_rate": 0.002, "loss": 2.3349, "step": 271550 }, { "epoch": 1.0497750150763092, "grad_norm": 0.09240395575761795, "learning_rate": 0.002, "loss": 2.3362, "step": 271560 }, { "epoch": 1.0498136722796927, "grad_norm": 0.09947966039180756, "learning_rate": 0.002, "loss": 2.3376, "step": 271570 }, { "epoch": 1.049852329483076, "grad_norm": 0.10705848783254623, "learning_rate": 0.002, "loss": 2.3479, "step": 271580 }, { "epoch": 1.0498909866864592, "grad_norm": 0.1160568818449974, "learning_rate": 0.002, "loss": 2.3341, "step": 271590 }, { "epoch": 1.0499296438898424, "grad_norm": 0.10577414929866791, "learning_rate": 0.002, "loss": 2.3413, "step": 271600 }, { "epoch": 1.0499683010932257, "grad_norm": 0.09413161128759384, "learning_rate": 0.002, "loss": 2.3245, "step": 271610 }, { "epoch": 1.050006958296609, "grad_norm": 0.10215846449136734, "learning_rate": 0.002, "loss": 2.3368, "step": 271620 }, { "epoch": 1.0500456154999922, "grad_norm": 0.1168697401881218, "learning_rate": 0.002, "loss": 2.3388, "step": 271630 }, { "epoch": 1.0500842727033755, "grad_norm": 0.09911565482616425, "learning_rate": 0.002, "loss": 2.3387, "step": 271640 }, { "epoch": 1.050122929906759, "grad_norm": 0.09280645102262497, "learning_rate": 0.002, "loss": 2.3365, "step": 271650 }, { "epoch": 1.0501615871101422, "grad_norm": 0.0974106714129448, "learning_rate": 0.002, "loss": 2.3296, "step": 271660 }, { "epoch": 1.0502002443135254, "grad_norm": 0.10810904204845428, "learning_rate": 0.002, "loss": 2.3399, "step": 271670 }, { "epoch": 1.0502389015169087, "grad_norm": 0.10441002994775772, "learning_rate": 0.002, "loss": 2.3243, "step": 271680 }, { "epoch": 1.050277558720292, "grad_norm": 0.11806993186473846, "learning_rate": 0.002, "loss": 2.3464, "step": 271690 }, { "epoch": 1.0503162159236752, "grad_norm": 0.10563837736845016, "learning_rate": 0.002, "loss": 2.3499, "step": 271700 }, { "epoch": 1.0503548731270584, "grad_norm": 0.10502533614635468, "learning_rate": 0.002, "loss": 2.349, "step": 271710 }, { "epoch": 1.0503935303304417, "grad_norm": 0.10693574696779251, "learning_rate": 0.002, "loss": 2.3368, "step": 271720 }, { "epoch": 1.050432187533825, "grad_norm": 0.09122729301452637, "learning_rate": 0.002, "loss": 2.3158, "step": 271730 }, { "epoch": 1.0504708447372084, "grad_norm": 0.11178099364042282, "learning_rate": 0.002, "loss": 2.3423, "step": 271740 }, { "epoch": 1.0505095019405917, "grad_norm": 0.11062789708375931, "learning_rate": 0.002, "loss": 2.3325, "step": 271750 }, { "epoch": 1.050548159143975, "grad_norm": 0.09843680262565613, "learning_rate": 0.002, "loss": 2.3417, "step": 271760 }, { "epoch": 1.0505868163473582, "grad_norm": 0.1031612977385521, "learning_rate": 0.002, "loss": 2.348, "step": 271770 }, { "epoch": 1.0506254735507414, "grad_norm": 0.12016744911670685, "learning_rate": 0.002, "loss": 2.3318, "step": 271780 }, { "epoch": 1.0506641307541247, "grad_norm": 0.11719794571399689, "learning_rate": 0.002, "loss": 2.3255, "step": 271790 }, { "epoch": 1.050702787957508, "grad_norm": 0.09663298726081848, "learning_rate": 0.002, "loss": 2.3241, "step": 271800 }, { "epoch": 1.0507414451608912, "grad_norm": 0.12161391973495483, "learning_rate": 0.002, "loss": 2.3175, "step": 271810 }, { "epoch": 1.0507801023642747, "grad_norm": 0.12131401151418686, "learning_rate": 0.002, "loss": 2.3515, "step": 271820 }, { "epoch": 1.050818759567658, "grad_norm": 0.0994456559419632, "learning_rate": 0.002, "loss": 2.3268, "step": 271830 }, { "epoch": 1.0508574167710412, "grad_norm": 0.09268373996019363, "learning_rate": 0.002, "loss": 2.33, "step": 271840 }, { "epoch": 1.0508960739744244, "grad_norm": 0.09143264591693878, "learning_rate": 0.002, "loss": 2.3293, "step": 271850 }, { "epoch": 1.0509347311778077, "grad_norm": 0.10940791666507721, "learning_rate": 0.002, "loss": 2.3389, "step": 271860 }, { "epoch": 1.050973388381191, "grad_norm": 0.14378370344638824, "learning_rate": 0.002, "loss": 2.3522, "step": 271870 }, { "epoch": 1.0510120455845742, "grad_norm": 0.09318773448467255, "learning_rate": 0.002, "loss": 2.333, "step": 271880 }, { "epoch": 1.0510507027879574, "grad_norm": 0.11682747304439545, "learning_rate": 0.002, "loss": 2.344, "step": 271890 }, { "epoch": 1.051089359991341, "grad_norm": 0.10547391325235367, "learning_rate": 0.002, "loss": 2.3401, "step": 271900 }, { "epoch": 1.0511280171947242, "grad_norm": 0.09252913296222687, "learning_rate": 0.002, "loss": 2.3498, "step": 271910 }, { "epoch": 1.0511666743981074, "grad_norm": 0.12861141562461853, "learning_rate": 0.002, "loss": 2.333, "step": 271920 }, { "epoch": 1.0512053316014907, "grad_norm": 0.09657534211874008, "learning_rate": 0.002, "loss": 2.3535, "step": 271930 }, { "epoch": 1.051243988804874, "grad_norm": 0.10140632092952728, "learning_rate": 0.002, "loss": 2.3352, "step": 271940 }, { "epoch": 1.0512826460082572, "grad_norm": 0.10031257569789886, "learning_rate": 0.002, "loss": 2.3207, "step": 271950 }, { "epoch": 1.0513213032116404, "grad_norm": 0.10987421125173569, "learning_rate": 0.002, "loss": 2.3298, "step": 271960 }, { "epoch": 1.0513599604150237, "grad_norm": 0.10414572060108185, "learning_rate": 0.002, "loss": 2.3431, "step": 271970 }, { "epoch": 1.051398617618407, "grad_norm": 0.12101298570632935, "learning_rate": 0.002, "loss": 2.3517, "step": 271980 }, { "epoch": 1.0514372748217904, "grad_norm": 0.09400589764118195, "learning_rate": 0.002, "loss": 2.3319, "step": 271990 }, { "epoch": 1.0514759320251736, "grad_norm": 0.10872562229633331, "learning_rate": 0.002, "loss": 2.3225, "step": 272000 }, { "epoch": 1.051514589228557, "grad_norm": 0.11514881998300552, "learning_rate": 0.002, "loss": 2.3464, "step": 272010 }, { "epoch": 1.0515532464319401, "grad_norm": 0.11787637323141098, "learning_rate": 0.002, "loss": 2.3232, "step": 272020 }, { "epoch": 1.0515919036353234, "grad_norm": 0.12149105221033096, "learning_rate": 0.002, "loss": 2.3357, "step": 272030 }, { "epoch": 1.0516305608387067, "grad_norm": 0.10233311355113983, "learning_rate": 0.002, "loss": 2.3338, "step": 272040 }, { "epoch": 1.05166921804209, "grad_norm": 0.11979241669178009, "learning_rate": 0.002, "loss": 2.3277, "step": 272050 }, { "epoch": 1.0517078752454732, "grad_norm": 0.10569998621940613, "learning_rate": 0.002, "loss": 2.3385, "step": 272060 }, { "epoch": 1.0517465324488566, "grad_norm": 0.09993526339530945, "learning_rate": 0.002, "loss": 2.3346, "step": 272070 }, { "epoch": 1.0517851896522399, "grad_norm": 0.11679543554782867, "learning_rate": 0.002, "loss": 2.3321, "step": 272080 }, { "epoch": 1.0518238468556231, "grad_norm": 0.09242987632751465, "learning_rate": 0.002, "loss": 2.3323, "step": 272090 }, { "epoch": 1.0518625040590064, "grad_norm": 0.0978369191288948, "learning_rate": 0.002, "loss": 2.3412, "step": 272100 }, { "epoch": 1.0519011612623896, "grad_norm": 0.10136271268129349, "learning_rate": 0.002, "loss": 2.3512, "step": 272110 }, { "epoch": 1.051939818465773, "grad_norm": 0.10128985345363617, "learning_rate": 0.002, "loss": 2.3421, "step": 272120 }, { "epoch": 1.0519784756691561, "grad_norm": 0.0996379554271698, "learning_rate": 0.002, "loss": 2.3458, "step": 272130 }, { "epoch": 1.0520171328725394, "grad_norm": 0.09816455841064453, "learning_rate": 0.002, "loss": 2.3259, "step": 272140 }, { "epoch": 1.0520557900759226, "grad_norm": 0.13871274888515472, "learning_rate": 0.002, "loss": 2.3319, "step": 272150 }, { "epoch": 1.0520944472793061, "grad_norm": 0.10068456828594208, "learning_rate": 0.002, "loss": 2.339, "step": 272160 }, { "epoch": 1.0521331044826894, "grad_norm": 0.10515620559453964, "learning_rate": 0.002, "loss": 2.316, "step": 272170 }, { "epoch": 1.0521717616860726, "grad_norm": 0.11310466378927231, "learning_rate": 0.002, "loss": 2.3322, "step": 272180 }, { "epoch": 1.0522104188894559, "grad_norm": 0.10717508941888809, "learning_rate": 0.002, "loss": 2.3369, "step": 272190 }, { "epoch": 1.0522490760928391, "grad_norm": 0.10464346408843994, "learning_rate": 0.002, "loss": 2.3448, "step": 272200 }, { "epoch": 1.0522877332962224, "grad_norm": 0.10172848403453827, "learning_rate": 0.002, "loss": 2.3425, "step": 272210 }, { "epoch": 1.0523263904996056, "grad_norm": 0.10452727228403091, "learning_rate": 0.002, "loss": 2.3317, "step": 272220 }, { "epoch": 1.0523650477029889, "grad_norm": 0.15429966151714325, "learning_rate": 0.002, "loss": 2.3429, "step": 272230 }, { "epoch": 1.0524037049063724, "grad_norm": 0.09801948070526123, "learning_rate": 0.002, "loss": 2.3336, "step": 272240 }, { "epoch": 1.0524423621097556, "grad_norm": 0.09865277260541916, "learning_rate": 0.002, "loss": 2.3284, "step": 272250 }, { "epoch": 1.0524810193131389, "grad_norm": 0.11756690591573715, "learning_rate": 0.002, "loss": 2.3488, "step": 272260 }, { "epoch": 1.0525196765165221, "grad_norm": 0.11615061014890671, "learning_rate": 0.002, "loss": 2.3319, "step": 272270 }, { "epoch": 1.0525583337199054, "grad_norm": 0.1278165876865387, "learning_rate": 0.002, "loss": 2.3503, "step": 272280 }, { "epoch": 1.0525969909232886, "grad_norm": 0.10149218887090683, "learning_rate": 0.002, "loss": 2.3318, "step": 272290 }, { "epoch": 1.0526356481266719, "grad_norm": 0.1761365681886673, "learning_rate": 0.002, "loss": 2.3327, "step": 272300 }, { "epoch": 1.0526743053300551, "grad_norm": 0.09013681858778, "learning_rate": 0.002, "loss": 2.3324, "step": 272310 }, { "epoch": 1.0527129625334384, "grad_norm": 0.10677545517683029, "learning_rate": 0.002, "loss": 2.335, "step": 272320 }, { "epoch": 1.0527516197368219, "grad_norm": 0.11683090031147003, "learning_rate": 0.002, "loss": 2.3395, "step": 272330 }, { "epoch": 1.052790276940205, "grad_norm": 0.10520520806312561, "learning_rate": 0.002, "loss": 2.3577, "step": 272340 }, { "epoch": 1.0528289341435884, "grad_norm": 0.10680434852838516, "learning_rate": 0.002, "loss": 2.3434, "step": 272350 }, { "epoch": 1.0528675913469716, "grad_norm": 0.12004678696393967, "learning_rate": 0.002, "loss": 2.3245, "step": 272360 }, { "epoch": 1.0529062485503549, "grad_norm": 0.0969165787100792, "learning_rate": 0.002, "loss": 2.3594, "step": 272370 }, { "epoch": 1.0529449057537381, "grad_norm": 0.17345726490020752, "learning_rate": 0.002, "loss": 2.3457, "step": 272380 }, { "epoch": 1.0529835629571214, "grad_norm": 0.10361625254154205, "learning_rate": 0.002, "loss": 2.3353, "step": 272390 }, { "epoch": 1.0530222201605046, "grad_norm": 0.1054096519947052, "learning_rate": 0.002, "loss": 2.3417, "step": 272400 }, { "epoch": 1.053060877363888, "grad_norm": 0.09320559352636337, "learning_rate": 0.002, "loss": 2.3273, "step": 272410 }, { "epoch": 1.0530995345672713, "grad_norm": 0.0882129818201065, "learning_rate": 0.002, "loss": 2.3424, "step": 272420 }, { "epoch": 1.0531381917706546, "grad_norm": 0.12000753730535507, "learning_rate": 0.002, "loss": 2.335, "step": 272430 }, { "epoch": 1.0531768489740378, "grad_norm": 0.11470437794923782, "learning_rate": 0.002, "loss": 2.3365, "step": 272440 }, { "epoch": 1.053215506177421, "grad_norm": 0.11474449932575226, "learning_rate": 0.002, "loss": 2.3475, "step": 272450 }, { "epoch": 1.0532541633808044, "grad_norm": 0.10656072199344635, "learning_rate": 0.002, "loss": 2.3264, "step": 272460 }, { "epoch": 1.0532928205841876, "grad_norm": 0.10870281606912613, "learning_rate": 0.002, "loss": 2.3308, "step": 272470 }, { "epoch": 1.0533314777875709, "grad_norm": 0.09416912496089935, "learning_rate": 0.002, "loss": 2.3276, "step": 272480 }, { "epoch": 1.053370134990954, "grad_norm": 0.11438991129398346, "learning_rate": 0.002, "loss": 2.3397, "step": 272490 }, { "epoch": 1.0534087921943376, "grad_norm": 0.0996420681476593, "learning_rate": 0.002, "loss": 2.3481, "step": 272500 }, { "epoch": 1.0534474493977208, "grad_norm": 0.11108607798814774, "learning_rate": 0.002, "loss": 2.3307, "step": 272510 }, { "epoch": 1.053486106601104, "grad_norm": 0.09957139939069748, "learning_rate": 0.002, "loss": 2.3265, "step": 272520 }, { "epoch": 1.0535247638044873, "grad_norm": 0.11952586472034454, "learning_rate": 0.002, "loss": 2.3265, "step": 272530 }, { "epoch": 1.0535634210078706, "grad_norm": 0.22855331003665924, "learning_rate": 0.002, "loss": 2.3306, "step": 272540 }, { "epoch": 1.0536020782112538, "grad_norm": 0.10648853331804276, "learning_rate": 0.002, "loss": 2.3465, "step": 272550 }, { "epoch": 1.053640735414637, "grad_norm": 0.10383137315511703, "learning_rate": 0.002, "loss": 2.3265, "step": 272560 }, { "epoch": 1.0536793926180203, "grad_norm": 0.10470456629991531, "learning_rate": 0.002, "loss": 2.3378, "step": 272570 }, { "epoch": 1.0537180498214038, "grad_norm": 0.09900840371847153, "learning_rate": 0.002, "loss": 2.3306, "step": 272580 }, { "epoch": 1.053756707024787, "grad_norm": 0.09683623909950256, "learning_rate": 0.002, "loss": 2.3145, "step": 272590 }, { "epoch": 1.0537953642281703, "grad_norm": 0.09178218990564346, "learning_rate": 0.002, "loss": 2.3443, "step": 272600 }, { "epoch": 1.0538340214315536, "grad_norm": 0.10908043384552002, "learning_rate": 0.002, "loss": 2.324, "step": 272610 }, { "epoch": 1.0538726786349368, "grad_norm": 0.1315956562757492, "learning_rate": 0.002, "loss": 2.3316, "step": 272620 }, { "epoch": 1.05391133583832, "grad_norm": 0.10674086958169937, "learning_rate": 0.002, "loss": 2.3456, "step": 272630 }, { "epoch": 1.0539499930417033, "grad_norm": 0.10858672112226486, "learning_rate": 0.002, "loss": 2.3395, "step": 272640 }, { "epoch": 1.0539886502450866, "grad_norm": 0.0922374576330185, "learning_rate": 0.002, "loss": 2.341, "step": 272650 }, { "epoch": 1.0540273074484698, "grad_norm": 0.10420143604278564, "learning_rate": 0.002, "loss": 2.3288, "step": 272660 }, { "epoch": 1.0540659646518533, "grad_norm": 0.10324354469776154, "learning_rate": 0.002, "loss": 2.3516, "step": 272670 }, { "epoch": 1.0541046218552366, "grad_norm": 0.12462551146745682, "learning_rate": 0.002, "loss": 2.3369, "step": 272680 }, { "epoch": 1.0541432790586198, "grad_norm": 0.10152140259742737, "learning_rate": 0.002, "loss": 2.3361, "step": 272690 }, { "epoch": 1.054181936262003, "grad_norm": 0.1041497215628624, "learning_rate": 0.002, "loss": 2.333, "step": 272700 }, { "epoch": 1.0542205934653863, "grad_norm": 0.10439198464155197, "learning_rate": 0.002, "loss": 2.3359, "step": 272710 }, { "epoch": 1.0542592506687696, "grad_norm": 0.09257284551858902, "learning_rate": 0.002, "loss": 2.3292, "step": 272720 }, { "epoch": 1.0542979078721528, "grad_norm": 0.11677654087543488, "learning_rate": 0.002, "loss": 2.3444, "step": 272730 }, { "epoch": 1.054336565075536, "grad_norm": 0.12653188407421112, "learning_rate": 0.002, "loss": 2.3369, "step": 272740 }, { "epoch": 1.0543752222789196, "grad_norm": 0.10007433593273163, "learning_rate": 0.002, "loss": 2.3388, "step": 272750 }, { "epoch": 1.0544138794823028, "grad_norm": 0.10211939364671707, "learning_rate": 0.002, "loss": 2.3586, "step": 272760 }, { "epoch": 1.054452536685686, "grad_norm": 0.09609940648078918, "learning_rate": 0.002, "loss": 2.3514, "step": 272770 }, { "epoch": 1.0544911938890693, "grad_norm": 0.11618515849113464, "learning_rate": 0.002, "loss": 2.3342, "step": 272780 }, { "epoch": 1.0545298510924526, "grad_norm": 0.10952292382717133, "learning_rate": 0.002, "loss": 2.3263, "step": 272790 }, { "epoch": 1.0545685082958358, "grad_norm": 0.11988136917352676, "learning_rate": 0.002, "loss": 2.3414, "step": 272800 }, { "epoch": 1.054607165499219, "grad_norm": 0.09441050887107849, "learning_rate": 0.002, "loss": 2.338, "step": 272810 }, { "epoch": 1.0546458227026023, "grad_norm": 0.11049690842628479, "learning_rate": 0.002, "loss": 2.3153, "step": 272820 }, { "epoch": 1.0546844799059856, "grad_norm": 0.10169093310832977, "learning_rate": 0.002, "loss": 2.3453, "step": 272830 }, { "epoch": 1.054723137109369, "grad_norm": 0.09456352889537811, "learning_rate": 0.002, "loss": 2.3296, "step": 272840 }, { "epoch": 1.0547617943127523, "grad_norm": 0.09587942808866501, "learning_rate": 0.002, "loss": 2.3337, "step": 272850 }, { "epoch": 1.0548004515161356, "grad_norm": 0.10359784215688705, "learning_rate": 0.002, "loss": 2.3143, "step": 272860 }, { "epoch": 1.0548391087195188, "grad_norm": 0.11548492312431335, "learning_rate": 0.002, "loss": 2.3279, "step": 272870 }, { "epoch": 1.054877765922902, "grad_norm": 0.11817555874586105, "learning_rate": 0.002, "loss": 2.3268, "step": 272880 }, { "epoch": 1.0549164231262853, "grad_norm": 0.1032852828502655, "learning_rate": 0.002, "loss": 2.3346, "step": 272890 }, { "epoch": 1.0549550803296686, "grad_norm": 0.08856106549501419, "learning_rate": 0.002, "loss": 2.3218, "step": 272900 }, { "epoch": 1.0549937375330518, "grad_norm": 0.10905643552541733, "learning_rate": 0.002, "loss": 2.3384, "step": 272910 }, { "epoch": 1.0550323947364353, "grad_norm": 0.11078491061925888, "learning_rate": 0.002, "loss": 2.3374, "step": 272920 }, { "epoch": 1.0550710519398185, "grad_norm": 0.09490056335926056, "learning_rate": 0.002, "loss": 2.3384, "step": 272930 }, { "epoch": 1.0551097091432018, "grad_norm": 0.09796487540006638, "learning_rate": 0.002, "loss": 2.3344, "step": 272940 }, { "epoch": 1.055148366346585, "grad_norm": 0.10905220359563828, "learning_rate": 0.002, "loss": 2.3373, "step": 272950 }, { "epoch": 1.0551870235499683, "grad_norm": 0.09488983452320099, "learning_rate": 0.002, "loss": 2.3206, "step": 272960 }, { "epoch": 1.0552256807533515, "grad_norm": 0.09224867075681686, "learning_rate": 0.002, "loss": 2.3396, "step": 272970 }, { "epoch": 1.0552643379567348, "grad_norm": 0.1428574025630951, "learning_rate": 0.002, "loss": 2.3349, "step": 272980 }, { "epoch": 1.055302995160118, "grad_norm": 0.11071029305458069, "learning_rate": 0.002, "loss": 2.3384, "step": 272990 }, { "epoch": 1.0553416523635013, "grad_norm": 0.0985381081700325, "learning_rate": 0.002, "loss": 2.3456, "step": 273000 }, { "epoch": 1.0553803095668848, "grad_norm": 0.10195576399564743, "learning_rate": 0.002, "loss": 2.3426, "step": 273010 }, { "epoch": 1.055418966770268, "grad_norm": 0.12107205390930176, "learning_rate": 0.002, "loss": 2.3272, "step": 273020 }, { "epoch": 1.0554576239736513, "grad_norm": 0.0928567424416542, "learning_rate": 0.002, "loss": 2.327, "step": 273030 }, { "epoch": 1.0554962811770345, "grad_norm": 0.10422313958406448, "learning_rate": 0.002, "loss": 2.3241, "step": 273040 }, { "epoch": 1.0555349383804178, "grad_norm": 0.1095958948135376, "learning_rate": 0.002, "loss": 2.3405, "step": 273050 }, { "epoch": 1.055573595583801, "grad_norm": 0.10636148601770401, "learning_rate": 0.002, "loss": 2.3362, "step": 273060 }, { "epoch": 1.0556122527871843, "grad_norm": 0.12696272134780884, "learning_rate": 0.002, "loss": 2.3445, "step": 273070 }, { "epoch": 1.0556509099905675, "grad_norm": 0.09934327751398087, "learning_rate": 0.002, "loss": 2.34, "step": 273080 }, { "epoch": 1.055689567193951, "grad_norm": 0.09703146666288376, "learning_rate": 0.002, "loss": 2.3352, "step": 273090 }, { "epoch": 1.0557282243973343, "grad_norm": 0.11481180787086487, "learning_rate": 0.002, "loss": 2.325, "step": 273100 }, { "epoch": 1.0557668816007175, "grad_norm": 0.11768441647291183, "learning_rate": 0.002, "loss": 2.3334, "step": 273110 }, { "epoch": 1.0558055388041008, "grad_norm": 0.1007218062877655, "learning_rate": 0.002, "loss": 2.3316, "step": 273120 }, { "epoch": 1.055844196007484, "grad_norm": 0.11573312431573868, "learning_rate": 0.002, "loss": 2.3428, "step": 273130 }, { "epoch": 1.0558828532108673, "grad_norm": 0.0907236784696579, "learning_rate": 0.002, "loss": 2.3285, "step": 273140 }, { "epoch": 1.0559215104142505, "grad_norm": 0.0962614119052887, "learning_rate": 0.002, "loss": 2.3369, "step": 273150 }, { "epoch": 1.0559601676176338, "grad_norm": 0.09105678647756577, "learning_rate": 0.002, "loss": 2.3262, "step": 273160 }, { "epoch": 1.055998824821017, "grad_norm": 0.09578508883714676, "learning_rate": 0.002, "loss": 2.3232, "step": 273170 }, { "epoch": 1.0560374820244005, "grad_norm": 0.09904925525188446, "learning_rate": 0.002, "loss": 2.3372, "step": 273180 }, { "epoch": 1.0560761392277838, "grad_norm": 0.09582720696926117, "learning_rate": 0.002, "loss": 2.3343, "step": 273190 }, { "epoch": 1.056114796431167, "grad_norm": 0.11303769797086716, "learning_rate": 0.002, "loss": 2.343, "step": 273200 }, { "epoch": 1.0561534536345503, "grad_norm": 0.10091835260391235, "learning_rate": 0.002, "loss": 2.3269, "step": 273210 }, { "epoch": 1.0561921108379335, "grad_norm": 0.10374817997217178, "learning_rate": 0.002, "loss": 2.3385, "step": 273220 }, { "epoch": 1.0562307680413168, "grad_norm": 0.14822401106357574, "learning_rate": 0.002, "loss": 2.3394, "step": 273230 }, { "epoch": 1.0562694252447, "grad_norm": 0.08519788831472397, "learning_rate": 0.002, "loss": 2.3417, "step": 273240 }, { "epoch": 1.0563080824480833, "grad_norm": 0.10144971311092377, "learning_rate": 0.002, "loss": 2.3415, "step": 273250 }, { "epoch": 1.0563467396514667, "grad_norm": 0.13993439078330994, "learning_rate": 0.002, "loss": 2.3379, "step": 273260 }, { "epoch": 1.05638539685485, "grad_norm": 0.10113970935344696, "learning_rate": 0.002, "loss": 2.3445, "step": 273270 }, { "epoch": 1.0564240540582333, "grad_norm": 0.10195260494947433, "learning_rate": 0.002, "loss": 2.3294, "step": 273280 }, { "epoch": 1.0564627112616165, "grad_norm": 0.10974332690238953, "learning_rate": 0.002, "loss": 2.3477, "step": 273290 }, { "epoch": 1.0565013684649998, "grad_norm": 0.09610135853290558, "learning_rate": 0.002, "loss": 2.3345, "step": 273300 }, { "epoch": 1.056540025668383, "grad_norm": 0.10756216943264008, "learning_rate": 0.002, "loss": 2.3274, "step": 273310 }, { "epoch": 1.0565786828717663, "grad_norm": 0.09635698795318604, "learning_rate": 0.002, "loss": 2.341, "step": 273320 }, { "epoch": 1.0566173400751495, "grad_norm": 0.11100976914167404, "learning_rate": 0.002, "loss": 2.3273, "step": 273330 }, { "epoch": 1.056655997278533, "grad_norm": 0.093922458589077, "learning_rate": 0.002, "loss": 2.3479, "step": 273340 }, { "epoch": 1.0566946544819162, "grad_norm": 0.0890863686800003, "learning_rate": 0.002, "loss": 2.3252, "step": 273350 }, { "epoch": 1.0567333116852995, "grad_norm": 0.11728120595216751, "learning_rate": 0.002, "loss": 2.3307, "step": 273360 }, { "epoch": 1.0567719688886827, "grad_norm": 0.12343502789735794, "learning_rate": 0.002, "loss": 2.3299, "step": 273370 }, { "epoch": 1.056810626092066, "grad_norm": 0.112137570977211, "learning_rate": 0.002, "loss": 2.3298, "step": 273380 }, { "epoch": 1.0568492832954492, "grad_norm": 0.10779105126857758, "learning_rate": 0.002, "loss": 2.3237, "step": 273390 }, { "epoch": 1.0568879404988325, "grad_norm": 0.0963764414191246, "learning_rate": 0.002, "loss": 2.3309, "step": 273400 }, { "epoch": 1.0569265977022158, "grad_norm": 0.09978457540273666, "learning_rate": 0.002, "loss": 2.3524, "step": 273410 }, { "epoch": 1.056965254905599, "grad_norm": 0.11221887916326523, "learning_rate": 0.002, "loss": 2.3213, "step": 273420 }, { "epoch": 1.0570039121089825, "grad_norm": 0.12512801587581635, "learning_rate": 0.002, "loss": 2.3401, "step": 273430 }, { "epoch": 1.0570425693123657, "grad_norm": 0.11520524322986603, "learning_rate": 0.002, "loss": 2.34, "step": 273440 }, { "epoch": 1.057081226515749, "grad_norm": 0.09933692216873169, "learning_rate": 0.002, "loss": 2.3274, "step": 273450 }, { "epoch": 1.0571198837191322, "grad_norm": 0.09837357699871063, "learning_rate": 0.002, "loss": 2.3347, "step": 273460 }, { "epoch": 1.0571585409225155, "grad_norm": 0.09668579697608948, "learning_rate": 0.002, "loss": 2.3429, "step": 273470 }, { "epoch": 1.0571971981258987, "grad_norm": 0.10653632134199142, "learning_rate": 0.002, "loss": 2.319, "step": 273480 }, { "epoch": 1.057235855329282, "grad_norm": 0.1118902936577797, "learning_rate": 0.002, "loss": 2.3329, "step": 273490 }, { "epoch": 1.0572745125326652, "grad_norm": 0.11374566704034805, "learning_rate": 0.002, "loss": 2.3422, "step": 273500 }, { "epoch": 1.0573131697360487, "grad_norm": 0.09796427935361862, "learning_rate": 0.002, "loss": 2.3424, "step": 273510 }, { "epoch": 1.057351826939432, "grad_norm": 0.10345358401536942, "learning_rate": 0.002, "loss": 2.3414, "step": 273520 }, { "epoch": 1.0573904841428152, "grad_norm": 0.12856706976890564, "learning_rate": 0.002, "loss": 2.3497, "step": 273530 }, { "epoch": 1.0574291413461985, "grad_norm": 0.10860460251569748, "learning_rate": 0.002, "loss": 2.3414, "step": 273540 }, { "epoch": 1.0574677985495817, "grad_norm": 0.09792128205299377, "learning_rate": 0.002, "loss": 2.3165, "step": 273550 }, { "epoch": 1.057506455752965, "grad_norm": 0.10655204206705093, "learning_rate": 0.002, "loss": 2.3269, "step": 273560 }, { "epoch": 1.0575451129563482, "grad_norm": 0.10076314955949783, "learning_rate": 0.002, "loss": 2.3435, "step": 273570 }, { "epoch": 1.0575837701597315, "grad_norm": 0.0985211580991745, "learning_rate": 0.002, "loss": 2.3417, "step": 273580 }, { "epoch": 1.0576224273631147, "grad_norm": 0.09232288599014282, "learning_rate": 0.002, "loss": 2.3457, "step": 273590 }, { "epoch": 1.0576610845664982, "grad_norm": 0.10147222131490707, "learning_rate": 0.002, "loss": 2.3281, "step": 273600 }, { "epoch": 1.0576997417698815, "grad_norm": 0.11548271775245667, "learning_rate": 0.002, "loss": 2.3362, "step": 273610 }, { "epoch": 1.0577383989732647, "grad_norm": 0.10642686486244202, "learning_rate": 0.002, "loss": 2.3393, "step": 273620 }, { "epoch": 1.057777056176648, "grad_norm": 0.10518043488264084, "learning_rate": 0.002, "loss": 2.3226, "step": 273630 }, { "epoch": 1.0578157133800312, "grad_norm": 0.10492758452892303, "learning_rate": 0.002, "loss": 2.3087, "step": 273640 }, { "epoch": 1.0578543705834145, "grad_norm": 0.10234058648347855, "learning_rate": 0.002, "loss": 2.3447, "step": 273650 }, { "epoch": 1.0578930277867977, "grad_norm": 0.09478472173213959, "learning_rate": 0.002, "loss": 2.3317, "step": 273660 }, { "epoch": 1.057931684990181, "grad_norm": 0.10300685465335846, "learning_rate": 0.002, "loss": 2.3439, "step": 273670 }, { "epoch": 1.0579703421935645, "grad_norm": 0.09842425584793091, "learning_rate": 0.002, "loss": 2.3261, "step": 273680 }, { "epoch": 1.0580089993969477, "grad_norm": 0.11219187080860138, "learning_rate": 0.002, "loss": 2.3379, "step": 273690 }, { "epoch": 1.058047656600331, "grad_norm": 0.11639194190502167, "learning_rate": 0.002, "loss": 2.3449, "step": 273700 }, { "epoch": 1.0580863138037142, "grad_norm": 0.1035163626074791, "learning_rate": 0.002, "loss": 2.3366, "step": 273710 }, { "epoch": 1.0581249710070975, "grad_norm": 0.11216370761394501, "learning_rate": 0.002, "loss": 2.3327, "step": 273720 }, { "epoch": 1.0581636282104807, "grad_norm": 0.12635914981365204, "learning_rate": 0.002, "loss": 2.3308, "step": 273730 }, { "epoch": 1.058202285413864, "grad_norm": 0.10730802267789841, "learning_rate": 0.002, "loss": 2.328, "step": 273740 }, { "epoch": 1.0582409426172472, "grad_norm": 0.13340860605239868, "learning_rate": 0.002, "loss": 2.3273, "step": 273750 }, { "epoch": 1.0582795998206307, "grad_norm": 0.11904604732990265, "learning_rate": 0.002, "loss": 2.3363, "step": 273760 }, { "epoch": 1.058318257024014, "grad_norm": 0.09762408584356308, "learning_rate": 0.002, "loss": 2.3307, "step": 273770 }, { "epoch": 1.0583569142273972, "grad_norm": 0.09792578965425491, "learning_rate": 0.002, "loss": 2.3294, "step": 273780 }, { "epoch": 1.0583955714307804, "grad_norm": 0.09640190005302429, "learning_rate": 0.002, "loss": 2.3424, "step": 273790 }, { "epoch": 1.0584342286341637, "grad_norm": 0.10028199106454849, "learning_rate": 0.002, "loss": 2.3368, "step": 273800 }, { "epoch": 1.058472885837547, "grad_norm": 0.09049591422080994, "learning_rate": 0.002, "loss": 2.3391, "step": 273810 }, { "epoch": 1.0585115430409302, "grad_norm": 0.10669384151697159, "learning_rate": 0.002, "loss": 2.3393, "step": 273820 }, { "epoch": 1.0585502002443135, "grad_norm": 0.11356060951948166, "learning_rate": 0.002, "loss": 2.3499, "step": 273830 }, { "epoch": 1.0585888574476967, "grad_norm": 0.09744501858949661, "learning_rate": 0.002, "loss": 2.3418, "step": 273840 }, { "epoch": 1.0586275146510802, "grad_norm": 0.13544464111328125, "learning_rate": 0.002, "loss": 2.322, "step": 273850 }, { "epoch": 1.0586661718544634, "grad_norm": 0.11132447421550751, "learning_rate": 0.002, "loss": 2.3373, "step": 273860 }, { "epoch": 1.0587048290578467, "grad_norm": 0.11165434122085571, "learning_rate": 0.002, "loss": 2.3438, "step": 273870 }, { "epoch": 1.05874348626123, "grad_norm": 0.09332270920276642, "learning_rate": 0.002, "loss": 2.3425, "step": 273880 }, { "epoch": 1.0587821434646132, "grad_norm": 0.10789580643177032, "learning_rate": 0.002, "loss": 2.3331, "step": 273890 }, { "epoch": 1.0588208006679964, "grad_norm": 0.10863988101482391, "learning_rate": 0.002, "loss": 2.3543, "step": 273900 }, { "epoch": 1.0588594578713797, "grad_norm": 0.11976487934589386, "learning_rate": 0.002, "loss": 2.337, "step": 273910 }, { "epoch": 1.058898115074763, "grad_norm": 0.1331743597984314, "learning_rate": 0.002, "loss": 2.344, "step": 273920 }, { "epoch": 1.0589367722781464, "grad_norm": 0.09090913832187653, "learning_rate": 0.002, "loss": 2.341, "step": 273930 }, { "epoch": 1.0589754294815297, "grad_norm": 0.10430900752544403, "learning_rate": 0.002, "loss": 2.3191, "step": 273940 }, { "epoch": 1.059014086684913, "grad_norm": 0.08921810984611511, "learning_rate": 0.002, "loss": 2.3384, "step": 273950 }, { "epoch": 1.0590527438882962, "grad_norm": 0.10172532498836517, "learning_rate": 0.002, "loss": 2.3312, "step": 273960 }, { "epoch": 1.0590914010916794, "grad_norm": 0.11895634233951569, "learning_rate": 0.002, "loss": 2.3368, "step": 273970 }, { "epoch": 1.0591300582950627, "grad_norm": 0.12765932083129883, "learning_rate": 0.002, "loss": 2.3675, "step": 273980 }, { "epoch": 1.059168715498446, "grad_norm": 0.09840631484985352, "learning_rate": 0.002, "loss": 2.3306, "step": 273990 }, { "epoch": 1.0592073727018292, "grad_norm": 0.1438383013010025, "learning_rate": 0.002, "loss": 2.3195, "step": 274000 }, { "epoch": 1.0592460299052124, "grad_norm": 0.09646342694759369, "learning_rate": 0.002, "loss": 2.3222, "step": 274010 }, { "epoch": 1.059284687108596, "grad_norm": 0.10129228979349136, "learning_rate": 0.002, "loss": 2.341, "step": 274020 }, { "epoch": 1.0593233443119792, "grad_norm": 0.13618502020835876, "learning_rate": 0.002, "loss": 2.3318, "step": 274030 }, { "epoch": 1.0593620015153624, "grad_norm": 0.11484713852405548, "learning_rate": 0.002, "loss": 2.3246, "step": 274040 }, { "epoch": 1.0594006587187457, "grad_norm": 0.09245438128709793, "learning_rate": 0.002, "loss": 2.3501, "step": 274050 }, { "epoch": 1.059439315922129, "grad_norm": 0.0981937125325203, "learning_rate": 0.002, "loss": 2.3401, "step": 274060 }, { "epoch": 1.0594779731255122, "grad_norm": 0.13430973887443542, "learning_rate": 0.002, "loss": 2.3409, "step": 274070 }, { "epoch": 1.0595166303288954, "grad_norm": 0.10338521003723145, "learning_rate": 0.002, "loss": 2.3225, "step": 274080 }, { "epoch": 1.0595552875322787, "grad_norm": 0.09139207750558853, "learning_rate": 0.002, "loss": 2.3344, "step": 274090 }, { "epoch": 1.0595939447356622, "grad_norm": 0.10357996821403503, "learning_rate": 0.002, "loss": 2.3412, "step": 274100 }, { "epoch": 1.0596326019390454, "grad_norm": 0.1256159543991089, "learning_rate": 0.002, "loss": 2.3352, "step": 274110 }, { "epoch": 1.0596712591424287, "grad_norm": 0.10506030172109604, "learning_rate": 0.002, "loss": 2.341, "step": 274120 }, { "epoch": 1.059709916345812, "grad_norm": 0.16365769505500793, "learning_rate": 0.002, "loss": 2.3396, "step": 274130 }, { "epoch": 1.0597485735491952, "grad_norm": 0.10342303663492203, "learning_rate": 0.002, "loss": 2.3258, "step": 274140 }, { "epoch": 1.0597872307525784, "grad_norm": 0.10078897327184677, "learning_rate": 0.002, "loss": 2.3311, "step": 274150 }, { "epoch": 1.0598258879559617, "grad_norm": 0.14313046634197235, "learning_rate": 0.002, "loss": 2.3443, "step": 274160 }, { "epoch": 1.059864545159345, "grad_norm": 0.11433429270982742, "learning_rate": 0.002, "loss": 2.3324, "step": 274170 }, { "epoch": 1.0599032023627282, "grad_norm": 0.11010809987783432, "learning_rate": 0.002, "loss": 2.3406, "step": 274180 }, { "epoch": 1.0599418595661116, "grad_norm": 0.09769423305988312, "learning_rate": 0.002, "loss": 2.3381, "step": 274190 }, { "epoch": 1.059980516769495, "grad_norm": 0.13408128917217255, "learning_rate": 0.002, "loss": 2.351, "step": 274200 }, { "epoch": 1.0600191739728781, "grad_norm": 0.10010013729333878, "learning_rate": 0.002, "loss": 2.3308, "step": 274210 }, { "epoch": 1.0600578311762614, "grad_norm": 0.09443888068199158, "learning_rate": 0.002, "loss": 2.3428, "step": 274220 }, { "epoch": 1.0600964883796447, "grad_norm": 0.10674253851175308, "learning_rate": 0.002, "loss": 2.3406, "step": 274230 }, { "epoch": 1.060135145583028, "grad_norm": 0.09683874249458313, "learning_rate": 0.002, "loss": 2.3435, "step": 274240 }, { "epoch": 1.0601738027864112, "grad_norm": 0.11076508462429047, "learning_rate": 0.002, "loss": 2.331, "step": 274250 }, { "epoch": 1.0602124599897944, "grad_norm": 0.12387531995773315, "learning_rate": 0.002, "loss": 2.3414, "step": 274260 }, { "epoch": 1.0602511171931779, "grad_norm": 0.10102979093790054, "learning_rate": 0.002, "loss": 2.3311, "step": 274270 }, { "epoch": 1.0602897743965611, "grad_norm": 0.09049500524997711, "learning_rate": 0.002, "loss": 2.3361, "step": 274280 }, { "epoch": 1.0603284315999444, "grad_norm": 0.10448785871267319, "learning_rate": 0.002, "loss": 2.3344, "step": 274290 }, { "epoch": 1.0603670888033276, "grad_norm": 0.12147627770900726, "learning_rate": 0.002, "loss": 2.3416, "step": 274300 }, { "epoch": 1.060405746006711, "grad_norm": 0.11697643250226974, "learning_rate": 0.002, "loss": 2.3406, "step": 274310 }, { "epoch": 1.0604444032100941, "grad_norm": 0.09097470343112946, "learning_rate": 0.002, "loss": 2.3221, "step": 274320 }, { "epoch": 1.0604830604134774, "grad_norm": 0.11141699552536011, "learning_rate": 0.002, "loss": 2.3364, "step": 274330 }, { "epoch": 1.0605217176168606, "grad_norm": 0.11854037642478943, "learning_rate": 0.002, "loss": 2.3385, "step": 274340 }, { "epoch": 1.060560374820244, "grad_norm": 0.10185335576534271, "learning_rate": 0.002, "loss": 2.3511, "step": 274350 }, { "epoch": 1.0605990320236274, "grad_norm": 0.10460841655731201, "learning_rate": 0.002, "loss": 2.3314, "step": 274360 }, { "epoch": 1.0606376892270106, "grad_norm": 0.10948032140731812, "learning_rate": 0.002, "loss": 2.328, "step": 274370 }, { "epoch": 1.0606763464303939, "grad_norm": 0.14033067226409912, "learning_rate": 0.002, "loss": 2.3427, "step": 274380 }, { "epoch": 1.0607150036337771, "grad_norm": 0.10076010227203369, "learning_rate": 0.002, "loss": 2.348, "step": 274390 }, { "epoch": 1.0607536608371604, "grad_norm": 0.09487166255712509, "learning_rate": 0.002, "loss": 2.3178, "step": 274400 }, { "epoch": 1.0607923180405436, "grad_norm": 0.113301582634449, "learning_rate": 0.002, "loss": 2.3388, "step": 274410 }, { "epoch": 1.0608309752439269, "grad_norm": 0.2377205640077591, "learning_rate": 0.002, "loss": 2.3285, "step": 274420 }, { "epoch": 1.0608696324473101, "grad_norm": 0.11768513172864914, "learning_rate": 0.002, "loss": 2.3277, "step": 274430 }, { "epoch": 1.0609082896506936, "grad_norm": 0.09380123764276505, "learning_rate": 0.002, "loss": 2.325, "step": 274440 }, { "epoch": 1.0609469468540769, "grad_norm": 0.10629244893789291, "learning_rate": 0.002, "loss": 2.3316, "step": 274450 }, { "epoch": 1.0609856040574601, "grad_norm": 0.11308005452156067, "learning_rate": 0.002, "loss": 2.3391, "step": 274460 }, { "epoch": 1.0610242612608434, "grad_norm": 0.11594302952289581, "learning_rate": 0.002, "loss": 2.3184, "step": 274470 }, { "epoch": 1.0610629184642266, "grad_norm": 0.11383994668722153, "learning_rate": 0.002, "loss": 2.3272, "step": 274480 }, { "epoch": 1.0611015756676099, "grad_norm": 0.10329388827085495, "learning_rate": 0.002, "loss": 2.3342, "step": 274490 }, { "epoch": 1.0611402328709931, "grad_norm": 0.11112868785858154, "learning_rate": 0.002, "loss": 2.3474, "step": 274500 }, { "epoch": 1.0611788900743764, "grad_norm": 0.10998545587062836, "learning_rate": 0.002, "loss": 2.3361, "step": 274510 }, { "epoch": 1.0612175472777596, "grad_norm": 0.09963172674179077, "learning_rate": 0.002, "loss": 2.3458, "step": 274520 }, { "epoch": 1.061256204481143, "grad_norm": 0.11295843869447708, "learning_rate": 0.002, "loss": 2.3437, "step": 274530 }, { "epoch": 1.0612948616845264, "grad_norm": 0.10796195268630981, "learning_rate": 0.002, "loss": 2.3171, "step": 274540 }, { "epoch": 1.0613335188879096, "grad_norm": 0.14060470461845398, "learning_rate": 0.002, "loss": 2.3236, "step": 274550 }, { "epoch": 1.0613721760912929, "grad_norm": 0.09685548394918442, "learning_rate": 0.002, "loss": 2.3391, "step": 274560 }, { "epoch": 1.0614108332946761, "grad_norm": 0.105289526283741, "learning_rate": 0.002, "loss": 2.3324, "step": 274570 }, { "epoch": 1.0614494904980594, "grad_norm": 0.11777745932340622, "learning_rate": 0.002, "loss": 2.3224, "step": 274580 }, { "epoch": 1.0614881477014426, "grad_norm": 0.11257560551166534, "learning_rate": 0.002, "loss": 2.3267, "step": 274590 }, { "epoch": 1.0615268049048259, "grad_norm": 0.11551818251609802, "learning_rate": 0.002, "loss": 2.3511, "step": 274600 }, { "epoch": 1.0615654621082093, "grad_norm": 0.10022301971912384, "learning_rate": 0.002, "loss": 2.3489, "step": 274610 }, { "epoch": 1.0616041193115926, "grad_norm": 0.10636663436889648, "learning_rate": 0.002, "loss": 2.3385, "step": 274620 }, { "epoch": 1.0616427765149758, "grad_norm": 0.09114213287830353, "learning_rate": 0.002, "loss": 2.3249, "step": 274630 }, { "epoch": 1.061681433718359, "grad_norm": 0.09075837582349777, "learning_rate": 0.002, "loss": 2.3339, "step": 274640 }, { "epoch": 1.0617200909217424, "grad_norm": 0.1268124133348465, "learning_rate": 0.002, "loss": 2.3326, "step": 274650 }, { "epoch": 1.0617587481251256, "grad_norm": 0.08955233544111252, "learning_rate": 0.002, "loss": 2.3459, "step": 274660 }, { "epoch": 1.0617974053285089, "grad_norm": 0.09312479943037033, "learning_rate": 0.002, "loss": 2.3303, "step": 274670 }, { "epoch": 1.061836062531892, "grad_norm": 0.10527783632278442, "learning_rate": 0.002, "loss": 2.3412, "step": 274680 }, { "epoch": 1.0618747197352754, "grad_norm": 0.09795436263084412, "learning_rate": 0.002, "loss": 2.3376, "step": 274690 }, { "epoch": 1.0619133769386588, "grad_norm": 0.11870022118091583, "learning_rate": 0.002, "loss": 2.3311, "step": 274700 }, { "epoch": 1.061952034142042, "grad_norm": 0.10405212640762329, "learning_rate": 0.002, "loss": 2.3373, "step": 274710 }, { "epoch": 1.0619906913454253, "grad_norm": 0.11261092126369476, "learning_rate": 0.002, "loss": 2.3414, "step": 274720 }, { "epoch": 1.0620293485488086, "grad_norm": 0.11202120780944824, "learning_rate": 0.002, "loss": 2.3414, "step": 274730 }, { "epoch": 1.0620680057521918, "grad_norm": 0.1228702962398529, "learning_rate": 0.002, "loss": 2.3487, "step": 274740 }, { "epoch": 1.062106662955575, "grad_norm": 0.10662976652383804, "learning_rate": 0.002, "loss": 2.3223, "step": 274750 }, { "epoch": 1.0621453201589584, "grad_norm": 0.10735514760017395, "learning_rate": 0.002, "loss": 2.3361, "step": 274760 }, { "epoch": 1.0621839773623416, "grad_norm": 0.09139461070299149, "learning_rate": 0.002, "loss": 2.335, "step": 274770 }, { "epoch": 1.062222634565725, "grad_norm": 0.11393001675605774, "learning_rate": 0.002, "loss": 2.3387, "step": 274780 }, { "epoch": 1.0622612917691083, "grad_norm": 0.1101556345820427, "learning_rate": 0.002, "loss": 2.3412, "step": 274790 }, { "epoch": 1.0622999489724916, "grad_norm": 0.09900113195180893, "learning_rate": 0.002, "loss": 2.3304, "step": 274800 }, { "epoch": 1.0623386061758748, "grad_norm": 0.1438235342502594, "learning_rate": 0.002, "loss": 2.3284, "step": 274810 }, { "epoch": 1.062377263379258, "grad_norm": 0.10571251809597015, "learning_rate": 0.002, "loss": 2.3291, "step": 274820 }, { "epoch": 1.0624159205826413, "grad_norm": 0.10640508681535721, "learning_rate": 0.002, "loss": 2.3448, "step": 274830 }, { "epoch": 1.0624545777860246, "grad_norm": 0.09718748182058334, "learning_rate": 0.002, "loss": 2.3355, "step": 274840 }, { "epoch": 1.0624932349894078, "grad_norm": 0.11124593019485474, "learning_rate": 0.002, "loss": 2.3214, "step": 274850 }, { "epoch": 1.062531892192791, "grad_norm": 0.11500213295221329, "learning_rate": 0.002, "loss": 2.3179, "step": 274860 }, { "epoch": 1.0625705493961746, "grad_norm": 0.0943385511636734, "learning_rate": 0.002, "loss": 2.3275, "step": 274870 }, { "epoch": 1.0626092065995578, "grad_norm": 0.1425260305404663, "learning_rate": 0.002, "loss": 2.3328, "step": 274880 }, { "epoch": 1.062647863802941, "grad_norm": 0.10338190197944641, "learning_rate": 0.002, "loss": 2.3383, "step": 274890 }, { "epoch": 1.0626865210063243, "grad_norm": 0.10322872549295425, "learning_rate": 0.002, "loss": 2.3314, "step": 274900 }, { "epoch": 1.0627251782097076, "grad_norm": 0.1208379790186882, "learning_rate": 0.002, "loss": 2.3392, "step": 274910 }, { "epoch": 1.0627638354130908, "grad_norm": 0.1032503992319107, "learning_rate": 0.002, "loss": 2.3268, "step": 274920 }, { "epoch": 1.062802492616474, "grad_norm": 0.10165394842624664, "learning_rate": 0.002, "loss": 2.3366, "step": 274930 }, { "epoch": 1.0628411498198573, "grad_norm": 0.09076716005802155, "learning_rate": 0.002, "loss": 2.3282, "step": 274940 }, { "epoch": 1.0628798070232408, "grad_norm": 0.10745106637477875, "learning_rate": 0.002, "loss": 2.3127, "step": 274950 }, { "epoch": 1.062918464226624, "grad_norm": 0.1258857101202011, "learning_rate": 0.002, "loss": 2.3299, "step": 274960 }, { "epoch": 1.0629571214300073, "grad_norm": 0.10054262727499008, "learning_rate": 0.002, "loss": 2.3331, "step": 274970 }, { "epoch": 1.0629957786333906, "grad_norm": 0.11825623363256454, "learning_rate": 0.002, "loss": 2.3514, "step": 274980 }, { "epoch": 1.0630344358367738, "grad_norm": 0.09283744543790817, "learning_rate": 0.002, "loss": 2.3143, "step": 274990 }, { "epoch": 1.063073093040157, "grad_norm": 0.11862016469240189, "learning_rate": 0.002, "loss": 2.3469, "step": 275000 }, { "epoch": 1.0631117502435403, "grad_norm": 0.09345857799053192, "learning_rate": 0.002, "loss": 2.3331, "step": 275010 }, { "epoch": 1.0631504074469236, "grad_norm": 0.0908292606472969, "learning_rate": 0.002, "loss": 2.3366, "step": 275020 }, { "epoch": 1.0631890646503068, "grad_norm": 0.10507423430681229, "learning_rate": 0.002, "loss": 2.3187, "step": 275030 }, { "epoch": 1.0632277218536903, "grad_norm": 0.114536352455616, "learning_rate": 0.002, "loss": 2.3297, "step": 275040 }, { "epoch": 1.0632663790570736, "grad_norm": 0.09364758431911469, "learning_rate": 0.002, "loss": 2.3284, "step": 275050 }, { "epoch": 1.0633050362604568, "grad_norm": 0.11448070406913757, "learning_rate": 0.002, "loss": 2.3454, "step": 275060 }, { "epoch": 1.06334369346384, "grad_norm": 0.09450113028287888, "learning_rate": 0.002, "loss": 2.3426, "step": 275070 }, { "epoch": 1.0633823506672233, "grad_norm": 1.2738556861877441, "learning_rate": 0.002, "loss": 2.3204, "step": 275080 }, { "epoch": 1.0634210078706066, "grad_norm": 0.11922585964202881, "learning_rate": 0.002, "loss": 2.3313, "step": 275090 }, { "epoch": 1.0634596650739898, "grad_norm": 0.10912361741065979, "learning_rate": 0.002, "loss": 2.3386, "step": 275100 }, { "epoch": 1.063498322277373, "grad_norm": 0.10273371636867523, "learning_rate": 0.002, "loss": 2.3365, "step": 275110 }, { "epoch": 1.0635369794807565, "grad_norm": 0.10637617111206055, "learning_rate": 0.002, "loss": 2.3313, "step": 275120 }, { "epoch": 1.0635756366841398, "grad_norm": 0.11244622617959976, "learning_rate": 0.002, "loss": 2.3335, "step": 275130 }, { "epoch": 1.063614293887523, "grad_norm": 0.10040455311536789, "learning_rate": 0.002, "loss": 2.3423, "step": 275140 }, { "epoch": 1.0636529510909063, "grad_norm": 0.13314113020896912, "learning_rate": 0.002, "loss": 2.3363, "step": 275150 }, { "epoch": 1.0636916082942895, "grad_norm": 0.10646568983793259, "learning_rate": 0.002, "loss": 2.3364, "step": 275160 }, { "epoch": 1.0637302654976728, "grad_norm": 0.10095401108264923, "learning_rate": 0.002, "loss": 2.32, "step": 275170 }, { "epoch": 1.063768922701056, "grad_norm": 0.0974881574511528, "learning_rate": 0.002, "loss": 2.3467, "step": 275180 }, { "epoch": 1.0638075799044393, "grad_norm": 0.0964067354798317, "learning_rate": 0.002, "loss": 2.3291, "step": 275190 }, { "epoch": 1.0638462371078226, "grad_norm": 0.1156691312789917, "learning_rate": 0.002, "loss": 2.359, "step": 275200 }, { "epoch": 1.063884894311206, "grad_norm": 0.11896368861198425, "learning_rate": 0.002, "loss": 2.3378, "step": 275210 }, { "epoch": 1.0639235515145893, "grad_norm": 0.11752109229564667, "learning_rate": 0.002, "loss": 2.3426, "step": 275220 }, { "epoch": 1.0639622087179725, "grad_norm": 0.11022363603115082, "learning_rate": 0.002, "loss": 2.3337, "step": 275230 }, { "epoch": 1.0640008659213558, "grad_norm": 0.09471013396978378, "learning_rate": 0.002, "loss": 2.349, "step": 275240 }, { "epoch": 1.064039523124739, "grad_norm": 0.10919588059186935, "learning_rate": 0.002, "loss": 2.3358, "step": 275250 }, { "epoch": 1.0640781803281223, "grad_norm": 0.11964140087366104, "learning_rate": 0.002, "loss": 2.3394, "step": 275260 }, { "epoch": 1.0641168375315055, "grad_norm": 0.11185113340616226, "learning_rate": 0.002, "loss": 2.3395, "step": 275270 }, { "epoch": 1.0641554947348888, "grad_norm": 0.14997375011444092, "learning_rate": 0.002, "loss": 2.3326, "step": 275280 }, { "epoch": 1.0641941519382723, "grad_norm": 0.11473798006772995, "learning_rate": 0.002, "loss": 2.3379, "step": 275290 }, { "epoch": 1.0642328091416555, "grad_norm": 0.1455710083246231, "learning_rate": 0.002, "loss": 2.3404, "step": 275300 }, { "epoch": 1.0642714663450388, "grad_norm": 0.10797169804573059, "learning_rate": 0.002, "loss": 2.3555, "step": 275310 }, { "epoch": 1.064310123548422, "grad_norm": 0.10607920587062836, "learning_rate": 0.002, "loss": 2.3463, "step": 275320 }, { "epoch": 1.0643487807518053, "grad_norm": 0.10385407507419586, "learning_rate": 0.002, "loss": 2.3419, "step": 275330 }, { "epoch": 1.0643874379551885, "grad_norm": 0.15248897671699524, "learning_rate": 0.002, "loss": 2.3272, "step": 275340 }, { "epoch": 1.0644260951585718, "grad_norm": 0.10435408353805542, "learning_rate": 0.002, "loss": 2.3395, "step": 275350 }, { "epoch": 1.064464752361955, "grad_norm": 0.1083153784275055, "learning_rate": 0.002, "loss": 2.3199, "step": 275360 }, { "epoch": 1.0645034095653383, "grad_norm": 0.10544626414775848, "learning_rate": 0.002, "loss": 2.3445, "step": 275370 }, { "epoch": 1.0645420667687218, "grad_norm": 0.11898812651634216, "learning_rate": 0.002, "loss": 2.3318, "step": 275380 }, { "epoch": 1.064580723972105, "grad_norm": 0.09902945905923843, "learning_rate": 0.002, "loss": 2.3448, "step": 275390 }, { "epoch": 1.0646193811754883, "grad_norm": 0.1146288812160492, "learning_rate": 0.002, "loss": 2.3357, "step": 275400 }, { "epoch": 1.0646580383788715, "grad_norm": 0.12026480585336685, "learning_rate": 0.002, "loss": 2.3391, "step": 275410 }, { "epoch": 1.0646966955822548, "grad_norm": 0.4733019471168518, "learning_rate": 0.002, "loss": 2.3443, "step": 275420 }, { "epoch": 1.064735352785638, "grad_norm": 0.11432819813489914, "learning_rate": 0.002, "loss": 2.3478, "step": 275430 }, { "epoch": 1.0647740099890213, "grad_norm": 0.11281923949718475, "learning_rate": 0.002, "loss": 2.3331, "step": 275440 }, { "epoch": 1.0648126671924047, "grad_norm": 0.11215028911828995, "learning_rate": 0.002, "loss": 2.3318, "step": 275450 }, { "epoch": 1.064851324395788, "grad_norm": 0.10150204598903656, "learning_rate": 0.002, "loss": 2.3431, "step": 275460 }, { "epoch": 1.0648899815991713, "grad_norm": 0.09803950786590576, "learning_rate": 0.002, "loss": 2.3411, "step": 275470 }, { "epoch": 1.0649286388025545, "grad_norm": 0.10600266605615616, "learning_rate": 0.002, "loss": 2.3176, "step": 275480 }, { "epoch": 1.0649672960059378, "grad_norm": 0.1063169315457344, "learning_rate": 0.002, "loss": 2.3431, "step": 275490 }, { "epoch": 1.065005953209321, "grad_norm": 0.08664971590042114, "learning_rate": 0.002, "loss": 2.3268, "step": 275500 }, { "epoch": 1.0650446104127043, "grad_norm": 0.1225690245628357, "learning_rate": 0.002, "loss": 2.3423, "step": 275510 }, { "epoch": 1.0650832676160875, "grad_norm": 0.11429854482412338, "learning_rate": 0.002, "loss": 2.3463, "step": 275520 }, { "epoch": 1.0651219248194708, "grad_norm": 0.11013131588697433, "learning_rate": 0.002, "loss": 2.3502, "step": 275530 }, { "epoch": 1.0651605820228542, "grad_norm": 0.09335508197546005, "learning_rate": 0.002, "loss": 2.3106, "step": 275540 }, { "epoch": 1.0651992392262375, "grad_norm": 0.11192896962165833, "learning_rate": 0.002, "loss": 2.3438, "step": 275550 }, { "epoch": 1.0652378964296207, "grad_norm": 0.09795791655778885, "learning_rate": 0.002, "loss": 2.3401, "step": 275560 }, { "epoch": 1.065276553633004, "grad_norm": 0.11031638830900192, "learning_rate": 0.002, "loss": 2.3455, "step": 275570 }, { "epoch": 1.0653152108363872, "grad_norm": 0.11539456248283386, "learning_rate": 0.002, "loss": 2.3404, "step": 275580 }, { "epoch": 1.0653538680397705, "grad_norm": 0.130585178732872, "learning_rate": 0.002, "loss": 2.3169, "step": 275590 }, { "epoch": 1.0653925252431538, "grad_norm": 0.10120509564876556, "learning_rate": 0.002, "loss": 2.3396, "step": 275600 }, { "epoch": 1.065431182446537, "grad_norm": 0.09373080730438232, "learning_rate": 0.002, "loss": 2.3326, "step": 275610 }, { "epoch": 1.0654698396499205, "grad_norm": 0.09474208205938339, "learning_rate": 0.002, "loss": 2.3337, "step": 275620 }, { "epoch": 1.0655084968533037, "grad_norm": 0.12688371539115906, "learning_rate": 0.002, "loss": 2.3447, "step": 275630 }, { "epoch": 1.065547154056687, "grad_norm": 0.13162842392921448, "learning_rate": 0.002, "loss": 2.3372, "step": 275640 }, { "epoch": 1.0655858112600702, "grad_norm": 0.10550396889448166, "learning_rate": 0.002, "loss": 2.3476, "step": 275650 }, { "epoch": 1.0656244684634535, "grad_norm": 0.10006772726774216, "learning_rate": 0.002, "loss": 2.3204, "step": 275660 }, { "epoch": 1.0656631256668367, "grad_norm": 0.11802761256694794, "learning_rate": 0.002, "loss": 2.3237, "step": 275670 }, { "epoch": 1.06570178287022, "grad_norm": 0.11319725215435028, "learning_rate": 0.002, "loss": 2.3507, "step": 275680 }, { "epoch": 1.0657404400736032, "grad_norm": 0.10054709017276764, "learning_rate": 0.002, "loss": 2.3382, "step": 275690 }, { "epoch": 1.0657790972769865, "grad_norm": 0.11254961788654327, "learning_rate": 0.002, "loss": 2.3465, "step": 275700 }, { "epoch": 1.06581775448037, "grad_norm": 0.09680163115262985, "learning_rate": 0.002, "loss": 2.3401, "step": 275710 }, { "epoch": 1.0658564116837532, "grad_norm": 0.10819884389638901, "learning_rate": 0.002, "loss": 2.3312, "step": 275720 }, { "epoch": 1.0658950688871365, "grad_norm": 0.11223629862070084, "learning_rate": 0.002, "loss": 2.3424, "step": 275730 }, { "epoch": 1.0659337260905197, "grad_norm": 0.1306917816400528, "learning_rate": 0.002, "loss": 2.3411, "step": 275740 }, { "epoch": 1.065972383293903, "grad_norm": 0.08868222683668137, "learning_rate": 0.002, "loss": 2.349, "step": 275750 }, { "epoch": 1.0660110404972862, "grad_norm": 0.10296325385570526, "learning_rate": 0.002, "loss": 2.3286, "step": 275760 }, { "epoch": 1.0660496977006695, "grad_norm": 0.10850217193365097, "learning_rate": 0.002, "loss": 2.3391, "step": 275770 }, { "epoch": 1.0660883549040527, "grad_norm": 0.11120878159999847, "learning_rate": 0.002, "loss": 2.3469, "step": 275780 }, { "epoch": 1.0661270121074362, "grad_norm": 0.093369260430336, "learning_rate": 0.002, "loss": 2.3347, "step": 275790 }, { "epoch": 1.0661656693108195, "grad_norm": 0.1033010482788086, "learning_rate": 0.002, "loss": 2.3417, "step": 275800 }, { "epoch": 1.0662043265142027, "grad_norm": 0.10789181292057037, "learning_rate": 0.002, "loss": 2.346, "step": 275810 }, { "epoch": 1.066242983717586, "grad_norm": 0.09879382699728012, "learning_rate": 0.002, "loss": 2.3464, "step": 275820 }, { "epoch": 1.0662816409209692, "grad_norm": 0.10098763555288315, "learning_rate": 0.002, "loss": 2.3408, "step": 275830 }, { "epoch": 1.0663202981243525, "grad_norm": 0.10814845561981201, "learning_rate": 0.002, "loss": 2.3305, "step": 275840 }, { "epoch": 1.0663589553277357, "grad_norm": 0.11707577854394913, "learning_rate": 0.002, "loss": 2.3257, "step": 275850 }, { "epoch": 1.066397612531119, "grad_norm": 0.10472466051578522, "learning_rate": 0.002, "loss": 2.3419, "step": 275860 }, { "epoch": 1.0664362697345022, "grad_norm": 0.11059992760419846, "learning_rate": 0.002, "loss": 2.3433, "step": 275870 }, { "epoch": 1.0664749269378857, "grad_norm": 0.114679716527462, "learning_rate": 0.002, "loss": 2.3432, "step": 275880 }, { "epoch": 1.066513584141269, "grad_norm": 0.09872438758611679, "learning_rate": 0.002, "loss": 2.3317, "step": 275890 }, { "epoch": 1.0665522413446522, "grad_norm": 0.10187365114688873, "learning_rate": 0.002, "loss": 2.3427, "step": 275900 }, { "epoch": 1.0665908985480355, "grad_norm": 0.13637171685695648, "learning_rate": 0.002, "loss": 2.318, "step": 275910 }, { "epoch": 1.0666295557514187, "grad_norm": 0.09725106507539749, "learning_rate": 0.002, "loss": 2.3159, "step": 275920 }, { "epoch": 1.066668212954802, "grad_norm": 0.12772177159786224, "learning_rate": 0.002, "loss": 2.3601, "step": 275930 }, { "epoch": 1.0667068701581852, "grad_norm": 0.10485053062438965, "learning_rate": 0.002, "loss": 2.3329, "step": 275940 }, { "epoch": 1.0667455273615685, "grad_norm": 0.10371177643537521, "learning_rate": 0.002, "loss": 2.3331, "step": 275950 }, { "epoch": 1.066784184564952, "grad_norm": 0.09366276115179062, "learning_rate": 0.002, "loss": 2.3378, "step": 275960 }, { "epoch": 1.0668228417683352, "grad_norm": 0.09731849282979965, "learning_rate": 0.002, "loss": 2.3432, "step": 275970 }, { "epoch": 1.0668614989717184, "grad_norm": 0.09356456249952316, "learning_rate": 0.002, "loss": 2.3431, "step": 275980 }, { "epoch": 1.0669001561751017, "grad_norm": 0.1003258153796196, "learning_rate": 0.002, "loss": 2.3318, "step": 275990 }, { "epoch": 1.066938813378485, "grad_norm": 0.13170093297958374, "learning_rate": 0.002, "loss": 2.3379, "step": 276000 }, { "epoch": 1.0669774705818682, "grad_norm": 0.09846068918704987, "learning_rate": 0.002, "loss": 2.3351, "step": 276010 }, { "epoch": 1.0670161277852515, "grad_norm": 0.12985065579414368, "learning_rate": 0.002, "loss": 2.3266, "step": 276020 }, { "epoch": 1.0670547849886347, "grad_norm": 0.11032890528440475, "learning_rate": 0.002, "loss": 2.3461, "step": 276030 }, { "epoch": 1.067093442192018, "grad_norm": 0.10538095980882645, "learning_rate": 0.002, "loss": 2.3413, "step": 276040 }, { "epoch": 1.0671320993954014, "grad_norm": 0.10463589429855347, "learning_rate": 0.002, "loss": 2.3273, "step": 276050 }, { "epoch": 1.0671707565987847, "grad_norm": 0.11432314664125443, "learning_rate": 0.002, "loss": 2.3323, "step": 276060 }, { "epoch": 1.067209413802168, "grad_norm": 0.11011043936014175, "learning_rate": 0.002, "loss": 2.3451, "step": 276070 }, { "epoch": 1.0672480710055512, "grad_norm": 0.08819034695625305, "learning_rate": 0.002, "loss": 2.3357, "step": 276080 }, { "epoch": 1.0672867282089344, "grad_norm": 0.09931288659572601, "learning_rate": 0.002, "loss": 2.3394, "step": 276090 }, { "epoch": 1.0673253854123177, "grad_norm": 0.09332785755395889, "learning_rate": 0.002, "loss": 2.3321, "step": 276100 }, { "epoch": 1.067364042615701, "grad_norm": 0.10240274667739868, "learning_rate": 0.002, "loss": 2.3328, "step": 276110 }, { "epoch": 1.0674026998190842, "grad_norm": 0.09615787118673325, "learning_rate": 0.002, "loss": 2.338, "step": 276120 }, { "epoch": 1.0674413570224677, "grad_norm": 0.11123973876237869, "learning_rate": 0.002, "loss": 2.3249, "step": 276130 }, { "epoch": 1.067480014225851, "grad_norm": 0.11293863505125046, "learning_rate": 0.002, "loss": 2.3414, "step": 276140 }, { "epoch": 1.0675186714292342, "grad_norm": 0.12291529029607773, "learning_rate": 0.002, "loss": 2.3276, "step": 276150 }, { "epoch": 1.0675573286326174, "grad_norm": 0.10829268395900726, "learning_rate": 0.002, "loss": 2.3267, "step": 276160 }, { "epoch": 1.0675959858360007, "grad_norm": 0.09903152287006378, "learning_rate": 0.002, "loss": 2.3412, "step": 276170 }, { "epoch": 1.067634643039384, "grad_norm": 0.10907009243965149, "learning_rate": 0.002, "loss": 2.3474, "step": 276180 }, { "epoch": 1.0676733002427672, "grad_norm": 0.09899713099002838, "learning_rate": 0.002, "loss": 2.3328, "step": 276190 }, { "epoch": 1.0677119574461504, "grad_norm": 0.12115491181612015, "learning_rate": 0.002, "loss": 2.3433, "step": 276200 }, { "epoch": 1.0677506146495337, "grad_norm": 0.090184785425663, "learning_rate": 0.002, "loss": 2.3361, "step": 276210 }, { "epoch": 1.0677892718529172, "grad_norm": 0.10801292955875397, "learning_rate": 0.002, "loss": 2.3528, "step": 276220 }, { "epoch": 1.0678279290563004, "grad_norm": 0.09343937039375305, "learning_rate": 0.002, "loss": 2.3302, "step": 276230 }, { "epoch": 1.0678665862596837, "grad_norm": 0.10986505448818207, "learning_rate": 0.002, "loss": 2.3333, "step": 276240 }, { "epoch": 1.067905243463067, "grad_norm": 0.11481022089719772, "learning_rate": 0.002, "loss": 2.328, "step": 276250 }, { "epoch": 1.0679439006664502, "grad_norm": 0.12644775211811066, "learning_rate": 0.002, "loss": 2.3405, "step": 276260 }, { "epoch": 1.0679825578698334, "grad_norm": 0.11024738848209381, "learning_rate": 0.002, "loss": 2.346, "step": 276270 }, { "epoch": 1.0680212150732167, "grad_norm": 0.0977511778473854, "learning_rate": 0.002, "loss": 2.349, "step": 276280 }, { "epoch": 1.0680598722766, "grad_norm": 0.09417412430047989, "learning_rate": 0.002, "loss": 2.3323, "step": 276290 }, { "epoch": 1.0680985294799834, "grad_norm": 0.10336937010288239, "learning_rate": 0.002, "loss": 2.3377, "step": 276300 }, { "epoch": 1.0681371866833667, "grad_norm": 0.5236482620239258, "learning_rate": 0.002, "loss": 2.3313, "step": 276310 }, { "epoch": 1.06817584388675, "grad_norm": 0.0997222289443016, "learning_rate": 0.002, "loss": 2.3448, "step": 276320 }, { "epoch": 1.0682145010901332, "grad_norm": 0.10604297369718552, "learning_rate": 0.002, "loss": 2.3495, "step": 276330 }, { "epoch": 1.0682531582935164, "grad_norm": 0.10758452862501144, "learning_rate": 0.002, "loss": 2.3476, "step": 276340 }, { "epoch": 1.0682918154968997, "grad_norm": 0.11882123351097107, "learning_rate": 0.002, "loss": 2.3464, "step": 276350 }, { "epoch": 1.068330472700283, "grad_norm": 0.09060604125261307, "learning_rate": 0.002, "loss": 2.3405, "step": 276360 }, { "epoch": 1.0683691299036662, "grad_norm": 0.8458299040794373, "learning_rate": 0.002, "loss": 2.3711, "step": 276370 }, { "epoch": 1.0684077871070494, "grad_norm": 0.1441306173801422, "learning_rate": 0.002, "loss": 2.3462, "step": 276380 }, { "epoch": 1.068446444310433, "grad_norm": 0.11246976256370544, "learning_rate": 0.002, "loss": 2.3359, "step": 276390 }, { "epoch": 1.0684851015138161, "grad_norm": 0.09019514173269272, "learning_rate": 0.002, "loss": 2.3511, "step": 276400 }, { "epoch": 1.0685237587171994, "grad_norm": 0.10581713914871216, "learning_rate": 0.002, "loss": 2.3349, "step": 276410 }, { "epoch": 1.0685624159205827, "grad_norm": 0.10104959458112717, "learning_rate": 0.002, "loss": 2.3276, "step": 276420 }, { "epoch": 1.068601073123966, "grad_norm": 0.12771807610988617, "learning_rate": 0.002, "loss": 2.3492, "step": 276430 }, { "epoch": 1.0686397303273492, "grad_norm": 0.13741634786128998, "learning_rate": 0.002, "loss": 2.3333, "step": 276440 }, { "epoch": 1.0686783875307324, "grad_norm": 0.09425902366638184, "learning_rate": 0.002, "loss": 2.3357, "step": 276450 }, { "epoch": 1.0687170447341157, "grad_norm": 0.11592981219291687, "learning_rate": 0.002, "loss": 2.322, "step": 276460 }, { "epoch": 1.0687557019374991, "grad_norm": 0.09700343012809753, "learning_rate": 0.002, "loss": 2.3328, "step": 276470 }, { "epoch": 1.0687943591408824, "grad_norm": 0.11174482107162476, "learning_rate": 0.002, "loss": 2.3319, "step": 276480 }, { "epoch": 1.0688330163442656, "grad_norm": 0.11250240355730057, "learning_rate": 0.002, "loss": 2.337, "step": 276490 }, { "epoch": 1.068871673547649, "grad_norm": 0.1096969023346901, "learning_rate": 0.002, "loss": 2.3242, "step": 276500 }, { "epoch": 1.0689103307510321, "grad_norm": 0.0991690382361412, "learning_rate": 0.002, "loss": 2.3308, "step": 276510 }, { "epoch": 1.0689489879544154, "grad_norm": 0.10980476438999176, "learning_rate": 0.002, "loss": 2.3349, "step": 276520 }, { "epoch": 1.0689876451577986, "grad_norm": 0.10511131584644318, "learning_rate": 0.002, "loss": 2.3374, "step": 276530 }, { "epoch": 1.069026302361182, "grad_norm": 0.10734134167432785, "learning_rate": 0.002, "loss": 2.3282, "step": 276540 }, { "epoch": 1.0690649595645652, "grad_norm": 0.10478593409061432, "learning_rate": 0.002, "loss": 2.3315, "step": 276550 }, { "epoch": 1.0691036167679486, "grad_norm": 0.10274649411439896, "learning_rate": 0.002, "loss": 2.3402, "step": 276560 }, { "epoch": 1.0691422739713319, "grad_norm": 0.09574639052152634, "learning_rate": 0.002, "loss": 2.3311, "step": 276570 }, { "epoch": 1.0691809311747151, "grad_norm": 0.10025924444198608, "learning_rate": 0.002, "loss": 2.347, "step": 276580 }, { "epoch": 1.0692195883780984, "grad_norm": 0.09721271693706512, "learning_rate": 0.002, "loss": 2.3359, "step": 276590 }, { "epoch": 1.0692582455814816, "grad_norm": 0.10139498859643936, "learning_rate": 0.002, "loss": 2.336, "step": 276600 }, { "epoch": 1.0692969027848649, "grad_norm": 0.09656251966953278, "learning_rate": 0.002, "loss": 2.3355, "step": 276610 }, { "epoch": 1.0693355599882481, "grad_norm": 0.12592603266239166, "learning_rate": 0.002, "loss": 2.3315, "step": 276620 }, { "epoch": 1.0693742171916314, "grad_norm": 0.1186826229095459, "learning_rate": 0.002, "loss": 2.3249, "step": 276630 }, { "epoch": 1.0694128743950149, "grad_norm": 0.12251210957765579, "learning_rate": 0.002, "loss": 2.3344, "step": 276640 }, { "epoch": 1.0694515315983981, "grad_norm": 0.08772478997707367, "learning_rate": 0.002, "loss": 2.3456, "step": 276650 }, { "epoch": 1.0694901888017814, "grad_norm": 0.11454391479492188, "learning_rate": 0.002, "loss": 2.3358, "step": 276660 }, { "epoch": 1.0695288460051646, "grad_norm": 0.10247767716646194, "learning_rate": 0.002, "loss": 2.3476, "step": 276670 }, { "epoch": 1.0695675032085479, "grad_norm": 0.11828066408634186, "learning_rate": 0.002, "loss": 2.3339, "step": 276680 }, { "epoch": 1.0696061604119311, "grad_norm": 0.11170656979084015, "learning_rate": 0.002, "loss": 2.3325, "step": 276690 }, { "epoch": 1.0696448176153144, "grad_norm": 0.11362114548683167, "learning_rate": 0.002, "loss": 2.3539, "step": 276700 }, { "epoch": 1.0696834748186976, "grad_norm": 0.10755255818367004, "learning_rate": 0.002, "loss": 2.3335, "step": 276710 }, { "epoch": 1.0697221320220809, "grad_norm": 0.08972054719924927, "learning_rate": 0.002, "loss": 2.3429, "step": 276720 }, { "epoch": 1.0697607892254644, "grad_norm": 0.1052330732345581, "learning_rate": 0.002, "loss": 2.3343, "step": 276730 }, { "epoch": 1.0697994464288476, "grad_norm": 0.09455091506242752, "learning_rate": 0.002, "loss": 2.3336, "step": 276740 }, { "epoch": 1.0698381036322309, "grad_norm": 0.09553559124469757, "learning_rate": 0.002, "loss": 2.3381, "step": 276750 }, { "epoch": 1.0698767608356141, "grad_norm": 0.11882331967353821, "learning_rate": 0.002, "loss": 2.346, "step": 276760 }, { "epoch": 1.0699154180389974, "grad_norm": 0.09439017623662949, "learning_rate": 0.002, "loss": 2.3367, "step": 276770 }, { "epoch": 1.0699540752423806, "grad_norm": 0.10479924082756042, "learning_rate": 0.002, "loss": 2.3403, "step": 276780 }, { "epoch": 1.0699927324457639, "grad_norm": 0.10761447995901108, "learning_rate": 0.002, "loss": 2.3507, "step": 276790 }, { "epoch": 1.0700313896491471, "grad_norm": 0.10427835583686829, "learning_rate": 0.002, "loss": 2.3465, "step": 276800 }, { "epoch": 1.0700700468525306, "grad_norm": 0.10986112803220749, "learning_rate": 0.002, "loss": 2.3332, "step": 276810 }, { "epoch": 1.0701087040559139, "grad_norm": 0.11656492203474045, "learning_rate": 0.002, "loss": 2.3329, "step": 276820 }, { "epoch": 1.070147361259297, "grad_norm": 0.11677859723567963, "learning_rate": 0.002, "loss": 2.346, "step": 276830 }, { "epoch": 1.0701860184626804, "grad_norm": 0.10161464661359787, "learning_rate": 0.002, "loss": 2.3282, "step": 276840 }, { "epoch": 1.0702246756660636, "grad_norm": 0.11238999664783478, "learning_rate": 0.002, "loss": 2.3356, "step": 276850 }, { "epoch": 1.0702633328694469, "grad_norm": 0.10180098563432693, "learning_rate": 0.002, "loss": 2.3457, "step": 276860 }, { "epoch": 1.0703019900728301, "grad_norm": 0.09605445712804794, "learning_rate": 0.002, "loss": 2.3316, "step": 276870 }, { "epoch": 1.0703406472762134, "grad_norm": 0.09548425674438477, "learning_rate": 0.002, "loss": 2.338, "step": 276880 }, { "epoch": 1.0703793044795966, "grad_norm": 0.1020766943693161, "learning_rate": 0.002, "loss": 2.3454, "step": 276890 }, { "epoch": 1.07041796168298, "grad_norm": 0.11540831625461578, "learning_rate": 0.002, "loss": 2.3384, "step": 276900 }, { "epoch": 1.0704566188863633, "grad_norm": 0.10443674027919769, "learning_rate": 0.002, "loss": 2.3344, "step": 276910 }, { "epoch": 1.0704952760897466, "grad_norm": 0.1141781434416771, "learning_rate": 0.002, "loss": 2.334, "step": 276920 }, { "epoch": 1.0705339332931298, "grad_norm": 0.11112864315509796, "learning_rate": 0.002, "loss": 2.3352, "step": 276930 }, { "epoch": 1.070572590496513, "grad_norm": 0.0906246155500412, "learning_rate": 0.002, "loss": 2.3344, "step": 276940 }, { "epoch": 1.0706112476998964, "grad_norm": 0.10664301365613937, "learning_rate": 0.002, "loss": 2.3247, "step": 276950 }, { "epoch": 1.0706499049032796, "grad_norm": 0.12030696123838425, "learning_rate": 0.002, "loss": 2.3362, "step": 276960 }, { "epoch": 1.0706885621066629, "grad_norm": 0.09777712821960449, "learning_rate": 0.002, "loss": 2.3391, "step": 276970 }, { "epoch": 1.0707272193100463, "grad_norm": 0.10626299679279327, "learning_rate": 0.002, "loss": 2.3511, "step": 276980 }, { "epoch": 1.0707658765134296, "grad_norm": 0.11611949652433395, "learning_rate": 0.002, "loss": 2.3377, "step": 276990 }, { "epoch": 1.0708045337168128, "grad_norm": 0.09844252467155457, "learning_rate": 0.002, "loss": 2.3329, "step": 277000 }, { "epoch": 1.070843190920196, "grad_norm": 0.10335033386945724, "learning_rate": 0.002, "loss": 2.3285, "step": 277010 }, { "epoch": 1.0708818481235793, "grad_norm": 0.10708364844322205, "learning_rate": 0.002, "loss": 2.3386, "step": 277020 }, { "epoch": 1.0709205053269626, "grad_norm": 0.10955175757408142, "learning_rate": 0.002, "loss": 2.3403, "step": 277030 }, { "epoch": 1.0709591625303458, "grad_norm": 0.10562895983457565, "learning_rate": 0.002, "loss": 2.3415, "step": 277040 }, { "epoch": 1.070997819733729, "grad_norm": 0.10760527104139328, "learning_rate": 0.002, "loss": 2.3391, "step": 277050 }, { "epoch": 1.0710364769371123, "grad_norm": 0.08946993947029114, "learning_rate": 0.002, "loss": 2.3331, "step": 277060 }, { "epoch": 1.0710751341404958, "grad_norm": 0.09455015510320663, "learning_rate": 0.002, "loss": 2.3269, "step": 277070 }, { "epoch": 1.071113791343879, "grad_norm": 0.1026642695069313, "learning_rate": 0.002, "loss": 2.34, "step": 277080 }, { "epoch": 1.0711524485472623, "grad_norm": 0.09760484099388123, "learning_rate": 0.002, "loss": 2.3282, "step": 277090 }, { "epoch": 1.0711911057506456, "grad_norm": 0.12051312625408173, "learning_rate": 0.002, "loss": 2.3491, "step": 277100 }, { "epoch": 1.0712297629540288, "grad_norm": 0.10644880682229996, "learning_rate": 0.002, "loss": 2.3401, "step": 277110 }, { "epoch": 1.071268420157412, "grad_norm": 0.11814972013235092, "learning_rate": 0.002, "loss": 2.3365, "step": 277120 }, { "epoch": 1.0713070773607953, "grad_norm": 0.09355330467224121, "learning_rate": 0.002, "loss": 2.3592, "step": 277130 }, { "epoch": 1.0713457345641786, "grad_norm": 0.09649022668600082, "learning_rate": 0.002, "loss": 2.3284, "step": 277140 }, { "epoch": 1.071384391767562, "grad_norm": 0.10202883183956146, "learning_rate": 0.002, "loss": 2.3503, "step": 277150 }, { "epoch": 1.0714230489709453, "grad_norm": 0.10278471559286118, "learning_rate": 0.002, "loss": 2.3317, "step": 277160 }, { "epoch": 1.0714617061743286, "grad_norm": 0.12783558666706085, "learning_rate": 0.002, "loss": 2.3311, "step": 277170 }, { "epoch": 1.0715003633777118, "grad_norm": 0.09995948523283005, "learning_rate": 0.002, "loss": 2.3403, "step": 277180 }, { "epoch": 1.071539020581095, "grad_norm": 0.11257115751504898, "learning_rate": 0.002, "loss": 2.3402, "step": 277190 }, { "epoch": 1.0715776777844783, "grad_norm": 0.11943518370389938, "learning_rate": 0.002, "loss": 2.3272, "step": 277200 }, { "epoch": 1.0716163349878616, "grad_norm": 0.09310497343540192, "learning_rate": 0.002, "loss": 2.3367, "step": 277210 }, { "epoch": 1.0716549921912448, "grad_norm": 0.10629613697528839, "learning_rate": 0.002, "loss": 2.3243, "step": 277220 }, { "epoch": 1.071693649394628, "grad_norm": 0.1634482592344284, "learning_rate": 0.002, "loss": 2.3241, "step": 277230 }, { "epoch": 1.0717323065980116, "grad_norm": 0.1426219344139099, "learning_rate": 0.002, "loss": 2.3361, "step": 277240 }, { "epoch": 1.0717709638013948, "grad_norm": 0.09856656938791275, "learning_rate": 0.002, "loss": 2.3265, "step": 277250 }, { "epoch": 1.071809621004778, "grad_norm": 0.11183300614356995, "learning_rate": 0.002, "loss": 2.3495, "step": 277260 }, { "epoch": 1.0718482782081613, "grad_norm": 0.16957798600196838, "learning_rate": 0.002, "loss": 2.334, "step": 277270 }, { "epoch": 1.0718869354115446, "grad_norm": 0.2407703846693039, "learning_rate": 0.002, "loss": 2.342, "step": 277280 }, { "epoch": 1.0719255926149278, "grad_norm": 0.11991997063159943, "learning_rate": 0.002, "loss": 2.3468, "step": 277290 }, { "epoch": 1.071964249818311, "grad_norm": 0.09134931117296219, "learning_rate": 0.002, "loss": 2.3357, "step": 277300 }, { "epoch": 1.0720029070216945, "grad_norm": 0.11510300636291504, "learning_rate": 0.002, "loss": 2.3402, "step": 277310 }, { "epoch": 1.0720415642250778, "grad_norm": 0.09177210181951523, "learning_rate": 0.002, "loss": 2.3419, "step": 277320 }, { "epoch": 1.072080221428461, "grad_norm": 0.10126060247421265, "learning_rate": 0.002, "loss": 2.3358, "step": 277330 }, { "epoch": 1.0721188786318443, "grad_norm": 0.11494698375463486, "learning_rate": 0.002, "loss": 2.339, "step": 277340 }, { "epoch": 1.0721575358352275, "grad_norm": 0.11380572617053986, "learning_rate": 0.002, "loss": 2.3455, "step": 277350 }, { "epoch": 1.0721961930386108, "grad_norm": 0.10477299988269806, "learning_rate": 0.002, "loss": 2.333, "step": 277360 }, { "epoch": 1.072234850241994, "grad_norm": 0.1232742890715599, "learning_rate": 0.002, "loss": 2.3481, "step": 277370 }, { "epoch": 1.0722735074453773, "grad_norm": 0.1035618707537651, "learning_rate": 0.002, "loss": 2.3429, "step": 277380 }, { "epoch": 1.0723121646487606, "grad_norm": 0.1024998351931572, "learning_rate": 0.002, "loss": 2.3396, "step": 277390 }, { "epoch": 1.072350821852144, "grad_norm": 0.09486802667379379, "learning_rate": 0.002, "loss": 2.3255, "step": 277400 }, { "epoch": 1.0723894790555273, "grad_norm": 0.11042434722185135, "learning_rate": 0.002, "loss": 2.3334, "step": 277410 }, { "epoch": 1.0724281362589105, "grad_norm": 0.09347493946552277, "learning_rate": 0.002, "loss": 2.3385, "step": 277420 }, { "epoch": 1.0724667934622938, "grad_norm": 0.09824241697788239, "learning_rate": 0.002, "loss": 2.336, "step": 277430 }, { "epoch": 1.072505450665677, "grad_norm": 0.10214419662952423, "learning_rate": 0.002, "loss": 2.3385, "step": 277440 }, { "epoch": 1.0725441078690603, "grad_norm": 0.09179384261369705, "learning_rate": 0.002, "loss": 2.3312, "step": 277450 }, { "epoch": 1.0725827650724435, "grad_norm": 0.11253994703292847, "learning_rate": 0.002, "loss": 2.3164, "step": 277460 }, { "epoch": 1.0726214222758268, "grad_norm": 0.09997082501649857, "learning_rate": 0.002, "loss": 2.348, "step": 277470 }, { "epoch": 1.0726600794792103, "grad_norm": 0.09391092509031296, "learning_rate": 0.002, "loss": 2.3258, "step": 277480 }, { "epoch": 1.0726987366825935, "grad_norm": 0.1391114741563797, "learning_rate": 0.002, "loss": 2.3383, "step": 277490 }, { "epoch": 1.0727373938859768, "grad_norm": 0.10522706061601639, "learning_rate": 0.002, "loss": 2.3326, "step": 277500 }, { "epoch": 1.07277605108936, "grad_norm": 0.1032128557562828, "learning_rate": 0.002, "loss": 2.3307, "step": 277510 }, { "epoch": 1.0728147082927433, "grad_norm": 0.12092791497707367, "learning_rate": 0.002, "loss": 2.3213, "step": 277520 }, { "epoch": 1.0728533654961265, "grad_norm": 0.1165420338511467, "learning_rate": 0.002, "loss": 2.348, "step": 277530 }, { "epoch": 1.0728920226995098, "grad_norm": 0.10182865709066391, "learning_rate": 0.002, "loss": 2.3352, "step": 277540 }, { "epoch": 1.072930679902893, "grad_norm": 0.10662321746349335, "learning_rate": 0.002, "loss": 2.3226, "step": 277550 }, { "epoch": 1.0729693371062763, "grad_norm": 0.09967391192913055, "learning_rate": 0.002, "loss": 2.3358, "step": 277560 }, { "epoch": 1.0730079943096598, "grad_norm": 0.08776018023490906, "learning_rate": 0.002, "loss": 2.3279, "step": 277570 }, { "epoch": 1.073046651513043, "grad_norm": 0.09252668172121048, "learning_rate": 0.002, "loss": 2.3316, "step": 277580 }, { "epoch": 1.0730853087164263, "grad_norm": 0.09517596662044525, "learning_rate": 0.002, "loss": 2.3364, "step": 277590 }, { "epoch": 1.0731239659198095, "grad_norm": 0.12813012301921844, "learning_rate": 0.002, "loss": 2.3422, "step": 277600 }, { "epoch": 1.0731626231231928, "grad_norm": 0.09909941256046295, "learning_rate": 0.002, "loss": 2.3401, "step": 277610 }, { "epoch": 1.073201280326576, "grad_norm": 0.10072305053472519, "learning_rate": 0.002, "loss": 2.3545, "step": 277620 }, { "epoch": 1.0732399375299593, "grad_norm": 0.18498556315898895, "learning_rate": 0.002, "loss": 2.3415, "step": 277630 }, { "epoch": 1.0732785947333425, "grad_norm": 0.10503578931093216, "learning_rate": 0.002, "loss": 2.3329, "step": 277640 }, { "epoch": 1.073317251936726, "grad_norm": 0.10219155251979828, "learning_rate": 0.002, "loss": 2.3376, "step": 277650 }, { "epoch": 1.0733559091401093, "grad_norm": 0.1031673327088356, "learning_rate": 0.002, "loss": 2.3344, "step": 277660 }, { "epoch": 1.0733945663434925, "grad_norm": 0.10291045904159546, "learning_rate": 0.002, "loss": 2.3404, "step": 277670 }, { "epoch": 1.0734332235468758, "grad_norm": 0.10479222238063812, "learning_rate": 0.002, "loss": 2.3411, "step": 277680 }, { "epoch": 1.073471880750259, "grad_norm": 0.11077100038528442, "learning_rate": 0.002, "loss": 2.3477, "step": 277690 }, { "epoch": 1.0735105379536423, "grad_norm": 0.10235881060361862, "learning_rate": 0.002, "loss": 2.3278, "step": 277700 }, { "epoch": 1.0735491951570255, "grad_norm": 0.10528463870286942, "learning_rate": 0.002, "loss": 2.3184, "step": 277710 }, { "epoch": 1.0735878523604088, "grad_norm": 0.11103863269090652, "learning_rate": 0.002, "loss": 2.3276, "step": 277720 }, { "epoch": 1.073626509563792, "grad_norm": 0.11000587791204453, "learning_rate": 0.002, "loss": 2.3308, "step": 277730 }, { "epoch": 1.0736651667671755, "grad_norm": 0.1038767471909523, "learning_rate": 0.002, "loss": 2.3261, "step": 277740 }, { "epoch": 1.0737038239705587, "grad_norm": 0.10358269512653351, "learning_rate": 0.002, "loss": 2.3251, "step": 277750 }, { "epoch": 1.073742481173942, "grad_norm": 0.12331962585449219, "learning_rate": 0.002, "loss": 2.3356, "step": 277760 }, { "epoch": 1.0737811383773253, "grad_norm": 0.10131274908781052, "learning_rate": 0.002, "loss": 2.337, "step": 277770 }, { "epoch": 1.0738197955807085, "grad_norm": 0.10317271202802658, "learning_rate": 0.002, "loss": 2.3308, "step": 277780 }, { "epoch": 1.0738584527840918, "grad_norm": 0.10653816163539886, "learning_rate": 0.002, "loss": 2.3571, "step": 277790 }, { "epoch": 1.073897109987475, "grad_norm": 0.09283388406038284, "learning_rate": 0.002, "loss": 2.3429, "step": 277800 }, { "epoch": 1.0739357671908583, "grad_norm": 0.08693074434995651, "learning_rate": 0.002, "loss": 2.3472, "step": 277810 }, { "epoch": 1.0739744243942417, "grad_norm": 0.10669288039207458, "learning_rate": 0.002, "loss": 2.3402, "step": 277820 }, { "epoch": 1.074013081597625, "grad_norm": 0.11283266544342041, "learning_rate": 0.002, "loss": 2.3356, "step": 277830 }, { "epoch": 1.0740517388010082, "grad_norm": 0.10510426014661789, "learning_rate": 0.002, "loss": 2.3311, "step": 277840 }, { "epoch": 1.0740903960043915, "grad_norm": 0.09539424628019333, "learning_rate": 0.002, "loss": 2.3353, "step": 277850 }, { "epoch": 1.0741290532077747, "grad_norm": 0.10931488126516342, "learning_rate": 0.002, "loss": 2.3311, "step": 277860 }, { "epoch": 1.074167710411158, "grad_norm": 0.1264454573392868, "learning_rate": 0.002, "loss": 2.3261, "step": 277870 }, { "epoch": 1.0742063676145412, "grad_norm": 0.09714031964540482, "learning_rate": 0.002, "loss": 2.3254, "step": 277880 }, { "epoch": 1.0742450248179245, "grad_norm": 0.08943731337785721, "learning_rate": 0.002, "loss": 2.3416, "step": 277890 }, { "epoch": 1.0742836820213078, "grad_norm": 0.09937284141778946, "learning_rate": 0.002, "loss": 2.3183, "step": 277900 }, { "epoch": 1.0743223392246912, "grad_norm": 0.0941234901547432, "learning_rate": 0.002, "loss": 2.3338, "step": 277910 }, { "epoch": 1.0743609964280745, "grad_norm": 0.11058460175991058, "learning_rate": 0.002, "loss": 2.3282, "step": 277920 }, { "epoch": 1.0743996536314577, "grad_norm": 0.10181267559528351, "learning_rate": 0.002, "loss": 2.3436, "step": 277930 }, { "epoch": 1.074438310834841, "grad_norm": 0.11269213259220123, "learning_rate": 0.002, "loss": 2.3285, "step": 277940 }, { "epoch": 1.0744769680382242, "grad_norm": 0.1178668662905693, "learning_rate": 0.002, "loss": 2.3305, "step": 277950 }, { "epoch": 1.0745156252416075, "grad_norm": 0.10448413342237473, "learning_rate": 0.002, "loss": 2.3249, "step": 277960 }, { "epoch": 1.0745542824449907, "grad_norm": 0.0995522066950798, "learning_rate": 0.002, "loss": 2.3401, "step": 277970 }, { "epoch": 1.074592939648374, "grad_norm": 0.10711553692817688, "learning_rate": 0.002, "loss": 2.3432, "step": 277980 }, { "epoch": 1.0746315968517575, "grad_norm": 0.12523172795772552, "learning_rate": 0.002, "loss": 2.3462, "step": 277990 }, { "epoch": 1.0746702540551407, "grad_norm": 0.09922770410776138, "learning_rate": 0.002, "loss": 2.3205, "step": 278000 }, { "epoch": 1.074708911258524, "grad_norm": 0.09114769101142883, "learning_rate": 0.002, "loss": 2.322, "step": 278010 }, { "epoch": 1.0747475684619072, "grad_norm": 0.11168397963047028, "learning_rate": 0.002, "loss": 2.355, "step": 278020 }, { "epoch": 1.0747862256652905, "grad_norm": 0.12937746942043304, "learning_rate": 0.002, "loss": 2.3585, "step": 278030 }, { "epoch": 1.0748248828686737, "grad_norm": 0.10723833739757538, "learning_rate": 0.002, "loss": 2.3408, "step": 278040 }, { "epoch": 1.074863540072057, "grad_norm": 0.1325829029083252, "learning_rate": 0.002, "loss": 2.329, "step": 278050 }, { "epoch": 1.0749021972754402, "grad_norm": 0.10723695904016495, "learning_rate": 0.002, "loss": 2.3291, "step": 278060 }, { "epoch": 1.0749408544788235, "grad_norm": 0.09187041223049164, "learning_rate": 0.002, "loss": 2.3443, "step": 278070 }, { "epoch": 1.074979511682207, "grad_norm": 0.1059562936425209, "learning_rate": 0.002, "loss": 2.3327, "step": 278080 }, { "epoch": 1.0750181688855902, "grad_norm": 0.10393907129764557, "learning_rate": 0.002, "loss": 2.3395, "step": 278090 }, { "epoch": 1.0750568260889735, "grad_norm": 0.11185088008642197, "learning_rate": 0.002, "loss": 2.3457, "step": 278100 }, { "epoch": 1.0750954832923567, "grad_norm": 0.10708445310592651, "learning_rate": 0.002, "loss": 2.3358, "step": 278110 }, { "epoch": 1.07513414049574, "grad_norm": 0.10166383534669876, "learning_rate": 0.002, "loss": 2.3373, "step": 278120 }, { "epoch": 1.0751727976991232, "grad_norm": 0.1215904951095581, "learning_rate": 0.002, "loss": 2.3113, "step": 278130 }, { "epoch": 1.0752114549025065, "grad_norm": 0.11430537700653076, "learning_rate": 0.002, "loss": 2.352, "step": 278140 }, { "epoch": 1.0752501121058897, "grad_norm": 0.09518372267484665, "learning_rate": 0.002, "loss": 2.3368, "step": 278150 }, { "epoch": 1.0752887693092732, "grad_norm": 0.11318797618150711, "learning_rate": 0.002, "loss": 2.322, "step": 278160 }, { "epoch": 1.0753274265126564, "grad_norm": 0.0915597602725029, "learning_rate": 0.002, "loss": 2.3195, "step": 278170 }, { "epoch": 1.0753660837160397, "grad_norm": 0.1077663004398346, "learning_rate": 0.002, "loss": 2.3409, "step": 278180 }, { "epoch": 1.075404740919423, "grad_norm": 0.09900057315826416, "learning_rate": 0.002, "loss": 2.3519, "step": 278190 }, { "epoch": 1.0754433981228062, "grad_norm": 0.12153781205415726, "learning_rate": 0.002, "loss": 2.3238, "step": 278200 }, { "epoch": 1.0754820553261895, "grad_norm": 0.10533221065998077, "learning_rate": 0.002, "loss": 2.3191, "step": 278210 }, { "epoch": 1.0755207125295727, "grad_norm": 0.10090882331132889, "learning_rate": 0.002, "loss": 2.3428, "step": 278220 }, { "epoch": 1.075559369732956, "grad_norm": 0.09460879117250443, "learning_rate": 0.002, "loss": 2.3408, "step": 278230 }, { "epoch": 1.0755980269363392, "grad_norm": 0.11132773756980896, "learning_rate": 0.002, "loss": 2.3394, "step": 278240 }, { "epoch": 1.0756366841397227, "grad_norm": 0.1132529079914093, "learning_rate": 0.002, "loss": 2.3373, "step": 278250 }, { "epoch": 1.075675341343106, "grad_norm": 0.09819561243057251, "learning_rate": 0.002, "loss": 2.3279, "step": 278260 }, { "epoch": 1.0757139985464892, "grad_norm": 0.11191277205944061, "learning_rate": 0.002, "loss": 2.3418, "step": 278270 }, { "epoch": 1.0757526557498724, "grad_norm": 0.10459837317466736, "learning_rate": 0.002, "loss": 2.3446, "step": 278280 }, { "epoch": 1.0757913129532557, "grad_norm": 0.09608481824398041, "learning_rate": 0.002, "loss": 2.3392, "step": 278290 }, { "epoch": 1.075829970156639, "grad_norm": 0.10903522372245789, "learning_rate": 0.002, "loss": 2.355, "step": 278300 }, { "epoch": 1.0758686273600222, "grad_norm": 0.11461076885461807, "learning_rate": 0.002, "loss": 2.3277, "step": 278310 }, { "epoch": 1.0759072845634055, "grad_norm": 0.12711842358112335, "learning_rate": 0.002, "loss": 2.3247, "step": 278320 }, { "epoch": 1.075945941766789, "grad_norm": 0.11210744082927704, "learning_rate": 0.002, "loss": 2.3312, "step": 278330 }, { "epoch": 1.0759845989701722, "grad_norm": 0.09837070852518082, "learning_rate": 0.002, "loss": 2.3484, "step": 278340 }, { "epoch": 1.0760232561735554, "grad_norm": 0.12900109589099884, "learning_rate": 0.002, "loss": 2.3309, "step": 278350 }, { "epoch": 1.0760619133769387, "grad_norm": 0.10696274787187576, "learning_rate": 0.002, "loss": 2.3264, "step": 278360 }, { "epoch": 1.076100570580322, "grad_norm": 0.09934203326702118, "learning_rate": 0.002, "loss": 2.3294, "step": 278370 }, { "epoch": 1.0761392277837052, "grad_norm": 0.10716903209686279, "learning_rate": 0.002, "loss": 2.3204, "step": 278380 }, { "epoch": 1.0761778849870884, "grad_norm": 0.09976894408464432, "learning_rate": 0.002, "loss": 2.3367, "step": 278390 }, { "epoch": 1.0762165421904717, "grad_norm": 0.1285540908575058, "learning_rate": 0.002, "loss": 2.3427, "step": 278400 }, { "epoch": 1.076255199393855, "grad_norm": 0.09722083061933517, "learning_rate": 0.002, "loss": 2.3487, "step": 278410 }, { "epoch": 1.0762938565972384, "grad_norm": 0.10156688094139099, "learning_rate": 0.002, "loss": 2.326, "step": 278420 }, { "epoch": 1.0763325138006217, "grad_norm": 0.15742437541484833, "learning_rate": 0.002, "loss": 2.3486, "step": 278430 }, { "epoch": 1.076371171004005, "grad_norm": 0.11908959597349167, "learning_rate": 0.002, "loss": 2.3379, "step": 278440 }, { "epoch": 1.0764098282073882, "grad_norm": 0.17446118593215942, "learning_rate": 0.002, "loss": 2.3318, "step": 278450 }, { "epoch": 1.0764484854107714, "grad_norm": 0.10834288597106934, "learning_rate": 0.002, "loss": 2.3262, "step": 278460 }, { "epoch": 1.0764871426141547, "grad_norm": 0.09273096174001694, "learning_rate": 0.002, "loss": 2.3402, "step": 278470 }, { "epoch": 1.076525799817538, "grad_norm": 0.10428832471370697, "learning_rate": 0.002, "loss": 2.3335, "step": 278480 }, { "epoch": 1.0765644570209212, "grad_norm": 0.10714827477931976, "learning_rate": 0.002, "loss": 2.3403, "step": 278490 }, { "epoch": 1.0766031142243047, "grad_norm": 0.09862946718931198, "learning_rate": 0.002, "loss": 2.3276, "step": 278500 }, { "epoch": 1.076641771427688, "grad_norm": 0.0923583135008812, "learning_rate": 0.002, "loss": 2.3419, "step": 278510 }, { "epoch": 1.0766804286310712, "grad_norm": 0.12656864523887634, "learning_rate": 0.002, "loss": 2.3304, "step": 278520 }, { "epoch": 1.0767190858344544, "grad_norm": 0.09982597082853317, "learning_rate": 0.002, "loss": 2.3477, "step": 278530 }, { "epoch": 1.0767577430378377, "grad_norm": 0.09666809439659119, "learning_rate": 0.002, "loss": 2.3163, "step": 278540 }, { "epoch": 1.076796400241221, "grad_norm": 0.10843471437692642, "learning_rate": 0.002, "loss": 2.3157, "step": 278550 }, { "epoch": 1.0768350574446042, "grad_norm": 0.10855158418416977, "learning_rate": 0.002, "loss": 2.3352, "step": 278560 }, { "epoch": 1.0768737146479874, "grad_norm": 0.11012399196624756, "learning_rate": 0.002, "loss": 2.3469, "step": 278570 }, { "epoch": 1.0769123718513707, "grad_norm": 0.1115560233592987, "learning_rate": 0.002, "loss": 2.3351, "step": 278580 }, { "epoch": 1.0769510290547541, "grad_norm": 0.09982690215110779, "learning_rate": 0.002, "loss": 2.3483, "step": 278590 }, { "epoch": 1.0769896862581374, "grad_norm": 0.10116339474916458, "learning_rate": 0.002, "loss": 2.3526, "step": 278600 }, { "epoch": 1.0770283434615207, "grad_norm": 0.09681201726198196, "learning_rate": 0.002, "loss": 2.3432, "step": 278610 }, { "epoch": 1.077067000664904, "grad_norm": 0.15509819984436035, "learning_rate": 0.002, "loss": 2.3198, "step": 278620 }, { "epoch": 1.0771056578682872, "grad_norm": 0.10198183357715607, "learning_rate": 0.002, "loss": 2.3285, "step": 278630 }, { "epoch": 1.0771443150716704, "grad_norm": 0.09804889559745789, "learning_rate": 0.002, "loss": 2.3297, "step": 278640 }, { "epoch": 1.0771829722750537, "grad_norm": 0.10165011882781982, "learning_rate": 0.002, "loss": 2.3386, "step": 278650 }, { "epoch": 1.077221629478437, "grad_norm": 0.11190210282802582, "learning_rate": 0.002, "loss": 2.3285, "step": 278660 }, { "epoch": 1.0772602866818204, "grad_norm": 0.09899290651082993, "learning_rate": 0.002, "loss": 2.3198, "step": 278670 }, { "epoch": 1.0772989438852036, "grad_norm": 0.09202511608600616, "learning_rate": 0.002, "loss": 2.3087, "step": 278680 }, { "epoch": 1.077337601088587, "grad_norm": 0.1107383444905281, "learning_rate": 0.002, "loss": 2.3404, "step": 278690 }, { "epoch": 1.0773762582919701, "grad_norm": 0.10847750306129456, "learning_rate": 0.002, "loss": 2.3344, "step": 278700 }, { "epoch": 1.0774149154953534, "grad_norm": 0.1263248175382614, "learning_rate": 0.002, "loss": 2.3275, "step": 278710 }, { "epoch": 1.0774535726987367, "grad_norm": 0.1095813661813736, "learning_rate": 0.002, "loss": 2.3175, "step": 278720 }, { "epoch": 1.07749222990212, "grad_norm": 0.10310844331979752, "learning_rate": 0.002, "loss": 2.3354, "step": 278730 }, { "epoch": 1.0775308871055032, "grad_norm": 0.11807134002447128, "learning_rate": 0.002, "loss": 2.327, "step": 278740 }, { "epoch": 1.0775695443088864, "grad_norm": 0.09761965274810791, "learning_rate": 0.002, "loss": 2.3437, "step": 278750 }, { "epoch": 1.0776082015122699, "grad_norm": 0.118479885160923, "learning_rate": 0.002, "loss": 2.3335, "step": 278760 }, { "epoch": 1.0776468587156531, "grad_norm": 0.10395986586809158, "learning_rate": 0.002, "loss": 2.3406, "step": 278770 }, { "epoch": 1.0776855159190364, "grad_norm": 0.09768043458461761, "learning_rate": 0.002, "loss": 2.3205, "step": 278780 }, { "epoch": 1.0777241731224196, "grad_norm": 0.11547421663999557, "learning_rate": 0.002, "loss": 2.3347, "step": 278790 }, { "epoch": 1.077762830325803, "grad_norm": 0.13452501595020294, "learning_rate": 0.002, "loss": 2.3331, "step": 278800 }, { "epoch": 1.0778014875291861, "grad_norm": 0.1132790595293045, "learning_rate": 0.002, "loss": 2.3555, "step": 278810 }, { "epoch": 1.0778401447325694, "grad_norm": 0.10164622217416763, "learning_rate": 0.002, "loss": 2.3237, "step": 278820 }, { "epoch": 1.0778788019359526, "grad_norm": 0.11321406066417694, "learning_rate": 0.002, "loss": 2.3469, "step": 278830 }, { "epoch": 1.0779174591393361, "grad_norm": 0.12079408764839172, "learning_rate": 0.002, "loss": 2.3354, "step": 278840 }, { "epoch": 1.0779561163427194, "grad_norm": 0.10696251690387726, "learning_rate": 0.002, "loss": 2.3289, "step": 278850 }, { "epoch": 1.0779947735461026, "grad_norm": 0.10298726707696915, "learning_rate": 0.002, "loss": 2.3399, "step": 278860 }, { "epoch": 1.0780334307494859, "grad_norm": 0.11193158477544785, "learning_rate": 0.002, "loss": 2.3373, "step": 278870 }, { "epoch": 1.0780720879528691, "grad_norm": 0.09726030379533768, "learning_rate": 0.002, "loss": 2.323, "step": 278880 }, { "epoch": 1.0781107451562524, "grad_norm": 0.0914492979645729, "learning_rate": 0.002, "loss": 2.3365, "step": 278890 }, { "epoch": 1.0781494023596356, "grad_norm": 0.10119852423667908, "learning_rate": 0.002, "loss": 2.3189, "step": 278900 }, { "epoch": 1.0781880595630189, "grad_norm": 0.10391935706138611, "learning_rate": 0.002, "loss": 2.3511, "step": 278910 }, { "epoch": 1.0782267167664021, "grad_norm": 0.110045425593853, "learning_rate": 0.002, "loss": 2.3329, "step": 278920 }, { "epoch": 1.0782653739697856, "grad_norm": 0.10475011169910431, "learning_rate": 0.002, "loss": 2.3444, "step": 278930 }, { "epoch": 1.0783040311731689, "grad_norm": 0.09570727497339249, "learning_rate": 0.002, "loss": 2.3348, "step": 278940 }, { "epoch": 1.0783426883765521, "grad_norm": 0.10496969521045685, "learning_rate": 0.002, "loss": 2.3276, "step": 278950 }, { "epoch": 1.0783813455799354, "grad_norm": 0.10734902322292328, "learning_rate": 0.002, "loss": 2.3399, "step": 278960 }, { "epoch": 1.0784200027833186, "grad_norm": 0.11177080869674683, "learning_rate": 0.002, "loss": 2.3469, "step": 278970 }, { "epoch": 1.0784586599867019, "grad_norm": 0.12020063400268555, "learning_rate": 0.002, "loss": 2.3422, "step": 278980 }, { "epoch": 1.0784973171900851, "grad_norm": 0.09938409179449081, "learning_rate": 0.002, "loss": 2.3379, "step": 278990 }, { "epoch": 1.0785359743934684, "grad_norm": 0.10291053354740143, "learning_rate": 0.002, "loss": 2.3601, "step": 279000 }, { "epoch": 1.0785746315968519, "grad_norm": 0.10316527634859085, "learning_rate": 0.002, "loss": 2.3308, "step": 279010 }, { "epoch": 1.078613288800235, "grad_norm": 0.12356262654066086, "learning_rate": 0.002, "loss": 2.3294, "step": 279020 }, { "epoch": 1.0786519460036184, "grad_norm": 0.10434281080961227, "learning_rate": 0.002, "loss": 2.3312, "step": 279030 }, { "epoch": 1.0786906032070016, "grad_norm": 0.09368117153644562, "learning_rate": 0.002, "loss": 2.3337, "step": 279040 }, { "epoch": 1.0787292604103849, "grad_norm": 0.09900877624750137, "learning_rate": 0.002, "loss": 2.3394, "step": 279050 }, { "epoch": 1.0787679176137681, "grad_norm": 0.10736262798309326, "learning_rate": 0.002, "loss": 2.3315, "step": 279060 }, { "epoch": 1.0788065748171514, "grad_norm": 0.08920001238584518, "learning_rate": 0.002, "loss": 2.3413, "step": 279070 }, { "epoch": 1.0788452320205346, "grad_norm": 0.1025761291384697, "learning_rate": 0.002, "loss": 2.3377, "step": 279080 }, { "epoch": 1.0788838892239179, "grad_norm": 0.12492604553699493, "learning_rate": 0.002, "loss": 2.3398, "step": 279090 }, { "epoch": 1.0789225464273013, "grad_norm": 0.12873578071594238, "learning_rate": 0.002, "loss": 2.3321, "step": 279100 }, { "epoch": 1.0789612036306846, "grad_norm": 0.11375364661216736, "learning_rate": 0.002, "loss": 2.341, "step": 279110 }, { "epoch": 1.0789998608340678, "grad_norm": 0.12128067016601562, "learning_rate": 0.002, "loss": 2.3452, "step": 279120 }, { "epoch": 1.079038518037451, "grad_norm": 0.10374868661165237, "learning_rate": 0.002, "loss": 2.3442, "step": 279130 }, { "epoch": 1.0790771752408344, "grad_norm": 0.10124023258686066, "learning_rate": 0.002, "loss": 2.3307, "step": 279140 }, { "epoch": 1.0791158324442176, "grad_norm": 0.13224327564239502, "learning_rate": 0.002, "loss": 2.3411, "step": 279150 }, { "epoch": 1.0791544896476009, "grad_norm": 0.10030703991651535, "learning_rate": 0.002, "loss": 2.3468, "step": 279160 }, { "epoch": 1.079193146850984, "grad_norm": 0.12761624157428741, "learning_rate": 0.002, "loss": 2.3203, "step": 279170 }, { "epoch": 1.0792318040543676, "grad_norm": 0.10995358228683472, "learning_rate": 0.002, "loss": 2.3205, "step": 279180 }, { "epoch": 1.0792704612577508, "grad_norm": 0.10545569658279419, "learning_rate": 0.002, "loss": 2.3393, "step": 279190 }, { "epoch": 1.079309118461134, "grad_norm": 0.09752767533063889, "learning_rate": 0.002, "loss": 2.3302, "step": 279200 }, { "epoch": 1.0793477756645173, "grad_norm": 0.09510950744152069, "learning_rate": 0.002, "loss": 2.3331, "step": 279210 }, { "epoch": 1.0793864328679006, "grad_norm": 0.10693711042404175, "learning_rate": 0.002, "loss": 2.3394, "step": 279220 }, { "epoch": 1.0794250900712838, "grad_norm": 0.09939834475517273, "learning_rate": 0.002, "loss": 2.3336, "step": 279230 }, { "epoch": 1.079463747274667, "grad_norm": 0.11319193243980408, "learning_rate": 0.002, "loss": 2.3337, "step": 279240 }, { "epoch": 1.0795024044780503, "grad_norm": 0.08854492753744125, "learning_rate": 0.002, "loss": 2.3409, "step": 279250 }, { "epoch": 1.0795410616814338, "grad_norm": 0.10071007162332535, "learning_rate": 0.002, "loss": 2.3409, "step": 279260 }, { "epoch": 1.079579718884817, "grad_norm": 0.10331925004720688, "learning_rate": 0.002, "loss": 2.3366, "step": 279270 }, { "epoch": 1.0796183760882003, "grad_norm": 0.09988714009523392, "learning_rate": 0.002, "loss": 2.3352, "step": 279280 }, { "epoch": 1.0796570332915836, "grad_norm": 0.11048047989606857, "learning_rate": 0.002, "loss": 2.3449, "step": 279290 }, { "epoch": 1.0796956904949668, "grad_norm": 0.0965648666024208, "learning_rate": 0.002, "loss": 2.337, "step": 279300 }, { "epoch": 1.07973434769835, "grad_norm": 0.08937753736972809, "learning_rate": 0.002, "loss": 2.3348, "step": 279310 }, { "epoch": 1.0797730049017333, "grad_norm": 0.13992008566856384, "learning_rate": 0.002, "loss": 2.346, "step": 279320 }, { "epoch": 1.0798116621051166, "grad_norm": 0.09776229411363602, "learning_rate": 0.002, "loss": 2.3173, "step": 279330 }, { "epoch": 1.0798503193085, "grad_norm": 0.11652453243732452, "learning_rate": 0.002, "loss": 2.3306, "step": 279340 }, { "epoch": 1.0798889765118833, "grad_norm": 0.3779715597629547, "learning_rate": 0.002, "loss": 2.351, "step": 279350 }, { "epoch": 1.0799276337152666, "grad_norm": 0.24565216898918152, "learning_rate": 0.002, "loss": 2.3535, "step": 279360 }, { "epoch": 1.0799662909186498, "grad_norm": 0.1088612824678421, "learning_rate": 0.002, "loss": 2.3527, "step": 279370 }, { "epoch": 1.080004948122033, "grad_norm": 0.10705023258924484, "learning_rate": 0.002, "loss": 2.3299, "step": 279380 }, { "epoch": 1.0800436053254163, "grad_norm": 0.10938013345003128, "learning_rate": 0.002, "loss": 2.3278, "step": 279390 }, { "epoch": 1.0800822625287996, "grad_norm": 0.09505293518304825, "learning_rate": 0.002, "loss": 2.3513, "step": 279400 }, { "epoch": 1.0801209197321828, "grad_norm": 0.10040805488824844, "learning_rate": 0.002, "loss": 2.3379, "step": 279410 }, { "epoch": 1.080159576935566, "grad_norm": 0.10634848475456238, "learning_rate": 0.002, "loss": 2.3371, "step": 279420 }, { "epoch": 1.0801982341389496, "grad_norm": 0.1252964287996292, "learning_rate": 0.002, "loss": 2.322, "step": 279430 }, { "epoch": 1.0802368913423328, "grad_norm": 0.08823366463184357, "learning_rate": 0.002, "loss": 2.3301, "step": 279440 }, { "epoch": 1.080275548545716, "grad_norm": 0.11154643446207047, "learning_rate": 0.002, "loss": 2.3357, "step": 279450 }, { "epoch": 1.0803142057490993, "grad_norm": 0.10544943064451218, "learning_rate": 0.002, "loss": 2.3327, "step": 279460 }, { "epoch": 1.0803528629524826, "grad_norm": 0.13477879762649536, "learning_rate": 0.002, "loss": 2.3202, "step": 279470 }, { "epoch": 1.0803915201558658, "grad_norm": 0.1104428619146347, "learning_rate": 0.002, "loss": 2.3397, "step": 279480 }, { "epoch": 1.080430177359249, "grad_norm": 0.0944984182715416, "learning_rate": 0.002, "loss": 2.3366, "step": 279490 }, { "epoch": 1.0804688345626323, "grad_norm": 0.1066855788230896, "learning_rate": 0.002, "loss": 2.3402, "step": 279500 }, { "epoch": 1.0805074917660158, "grad_norm": 0.10000107437372208, "learning_rate": 0.002, "loss": 2.3358, "step": 279510 }, { "epoch": 1.080546148969399, "grad_norm": 0.09573391079902649, "learning_rate": 0.002, "loss": 2.3334, "step": 279520 }, { "epoch": 1.0805848061727823, "grad_norm": 0.12505954504013062, "learning_rate": 0.002, "loss": 2.3396, "step": 279530 }, { "epoch": 1.0806234633761655, "grad_norm": 0.10334816575050354, "learning_rate": 0.002, "loss": 2.3292, "step": 279540 }, { "epoch": 1.0806621205795488, "grad_norm": 0.09821376204490662, "learning_rate": 0.002, "loss": 2.3348, "step": 279550 }, { "epoch": 1.080700777782932, "grad_norm": 0.09971542656421661, "learning_rate": 0.002, "loss": 2.3459, "step": 279560 }, { "epoch": 1.0807394349863153, "grad_norm": 0.11195044964551926, "learning_rate": 0.002, "loss": 2.3274, "step": 279570 }, { "epoch": 1.0807780921896986, "grad_norm": 0.11056491732597351, "learning_rate": 0.002, "loss": 2.3445, "step": 279580 }, { "epoch": 1.0808167493930818, "grad_norm": 0.09117507189512253, "learning_rate": 0.002, "loss": 2.3431, "step": 279590 }, { "epoch": 1.0808554065964653, "grad_norm": 0.10031966120004654, "learning_rate": 0.002, "loss": 2.3293, "step": 279600 }, { "epoch": 1.0808940637998485, "grad_norm": 0.09302040189504623, "learning_rate": 0.002, "loss": 2.3374, "step": 279610 }, { "epoch": 1.0809327210032318, "grad_norm": 0.1287396401166916, "learning_rate": 0.002, "loss": 2.3321, "step": 279620 }, { "epoch": 1.080971378206615, "grad_norm": 0.1282958984375, "learning_rate": 0.002, "loss": 2.3318, "step": 279630 }, { "epoch": 1.0810100354099983, "grad_norm": 0.09668225795030594, "learning_rate": 0.002, "loss": 2.3327, "step": 279640 }, { "epoch": 1.0810486926133815, "grad_norm": 0.12607425451278687, "learning_rate": 0.002, "loss": 2.3315, "step": 279650 }, { "epoch": 1.0810873498167648, "grad_norm": 0.10190489888191223, "learning_rate": 0.002, "loss": 2.3454, "step": 279660 }, { "epoch": 1.081126007020148, "grad_norm": 0.11293112486600876, "learning_rate": 0.002, "loss": 2.3333, "step": 279670 }, { "epoch": 1.0811646642235315, "grad_norm": 0.11115790903568268, "learning_rate": 0.002, "loss": 2.3427, "step": 279680 }, { "epoch": 1.0812033214269148, "grad_norm": 0.10267027467489243, "learning_rate": 0.002, "loss": 2.3164, "step": 279690 }, { "epoch": 1.081241978630298, "grad_norm": 0.10083723813295364, "learning_rate": 0.002, "loss": 2.3397, "step": 279700 }, { "epoch": 1.0812806358336813, "grad_norm": 0.09403855353593826, "learning_rate": 0.002, "loss": 2.3416, "step": 279710 }, { "epoch": 1.0813192930370645, "grad_norm": 0.0995630994439125, "learning_rate": 0.002, "loss": 2.3305, "step": 279720 }, { "epoch": 1.0813579502404478, "grad_norm": 0.11248226463794708, "learning_rate": 0.002, "loss": 2.3498, "step": 279730 }, { "epoch": 1.081396607443831, "grad_norm": 0.11201859265565872, "learning_rate": 0.002, "loss": 2.3275, "step": 279740 }, { "epoch": 1.0814352646472143, "grad_norm": 0.08905567973852158, "learning_rate": 0.002, "loss": 2.3501, "step": 279750 }, { "epoch": 1.0814739218505975, "grad_norm": 0.09506616741418839, "learning_rate": 0.002, "loss": 2.3288, "step": 279760 }, { "epoch": 1.081512579053981, "grad_norm": 0.09160199016332626, "learning_rate": 0.002, "loss": 2.3311, "step": 279770 }, { "epoch": 1.0815512362573643, "grad_norm": 0.12843123078346252, "learning_rate": 0.002, "loss": 2.3423, "step": 279780 }, { "epoch": 1.0815898934607475, "grad_norm": 0.12316249310970306, "learning_rate": 0.002, "loss": 2.3358, "step": 279790 }, { "epoch": 1.0816285506641308, "grad_norm": 0.11130484938621521, "learning_rate": 0.002, "loss": 2.3531, "step": 279800 }, { "epoch": 1.081667207867514, "grad_norm": 0.10220065712928772, "learning_rate": 0.002, "loss": 2.3338, "step": 279810 }, { "epoch": 1.0817058650708973, "grad_norm": 0.11963673681020737, "learning_rate": 0.002, "loss": 2.3205, "step": 279820 }, { "epoch": 1.0817445222742805, "grad_norm": 0.1078323945403099, "learning_rate": 0.002, "loss": 2.352, "step": 279830 }, { "epoch": 1.0817831794776638, "grad_norm": 0.10187908262014389, "learning_rate": 0.002, "loss": 2.3523, "step": 279840 }, { "epoch": 1.0818218366810473, "grad_norm": 0.10245156288146973, "learning_rate": 0.002, "loss": 2.3365, "step": 279850 }, { "epoch": 1.0818604938844305, "grad_norm": 0.10593272000551224, "learning_rate": 0.002, "loss": 2.3432, "step": 279860 }, { "epoch": 1.0818991510878138, "grad_norm": 0.11321359872817993, "learning_rate": 0.002, "loss": 2.3303, "step": 279870 }, { "epoch": 1.081937808291197, "grad_norm": 0.11144161224365234, "learning_rate": 0.002, "loss": 2.3404, "step": 279880 }, { "epoch": 1.0819764654945803, "grad_norm": 0.10398919135332108, "learning_rate": 0.002, "loss": 2.3256, "step": 279890 }, { "epoch": 1.0820151226979635, "grad_norm": 0.10995649546384811, "learning_rate": 0.002, "loss": 2.3243, "step": 279900 }, { "epoch": 1.0820537799013468, "grad_norm": 0.1734006255865097, "learning_rate": 0.002, "loss": 2.3413, "step": 279910 }, { "epoch": 1.08209243710473, "grad_norm": 0.09477046877145767, "learning_rate": 0.002, "loss": 2.3199, "step": 279920 }, { "epoch": 1.0821310943081133, "grad_norm": 0.11546581983566284, "learning_rate": 0.002, "loss": 2.338, "step": 279930 }, { "epoch": 1.0821697515114967, "grad_norm": 0.10040906816720963, "learning_rate": 0.002, "loss": 2.3437, "step": 279940 }, { "epoch": 1.08220840871488, "grad_norm": 0.09605206549167633, "learning_rate": 0.002, "loss": 2.3419, "step": 279950 }, { "epoch": 1.0822470659182633, "grad_norm": 0.12556235492229462, "learning_rate": 0.002, "loss": 2.3379, "step": 279960 }, { "epoch": 1.0822857231216465, "grad_norm": 0.10956462472677231, "learning_rate": 0.002, "loss": 2.3293, "step": 279970 }, { "epoch": 1.0823243803250298, "grad_norm": 0.10256555676460266, "learning_rate": 0.002, "loss": 2.3456, "step": 279980 }, { "epoch": 1.082363037528413, "grad_norm": 0.10385078191757202, "learning_rate": 0.002, "loss": 2.325, "step": 279990 }, { "epoch": 1.0824016947317963, "grad_norm": 0.11647751182317734, "learning_rate": 0.002, "loss": 2.33, "step": 280000 }, { "epoch": 1.0824403519351795, "grad_norm": 0.10998006910085678, "learning_rate": 0.002, "loss": 2.3317, "step": 280010 }, { "epoch": 1.082479009138563, "grad_norm": 0.09821558743715286, "learning_rate": 0.002, "loss": 2.3238, "step": 280020 }, { "epoch": 1.0825176663419462, "grad_norm": 0.10078676044940948, "learning_rate": 0.002, "loss": 2.3318, "step": 280030 }, { "epoch": 1.0825563235453295, "grad_norm": 0.10271522402763367, "learning_rate": 0.002, "loss": 2.3457, "step": 280040 }, { "epoch": 1.0825949807487127, "grad_norm": 0.09726682305335999, "learning_rate": 0.002, "loss": 2.3468, "step": 280050 }, { "epoch": 1.082633637952096, "grad_norm": 0.09116078913211823, "learning_rate": 0.002, "loss": 2.3395, "step": 280060 }, { "epoch": 1.0826722951554792, "grad_norm": 0.10180582851171494, "learning_rate": 0.002, "loss": 2.3398, "step": 280070 }, { "epoch": 1.0827109523588625, "grad_norm": 0.10629677027463913, "learning_rate": 0.002, "loss": 2.3354, "step": 280080 }, { "epoch": 1.0827496095622458, "grad_norm": 0.1072465181350708, "learning_rate": 0.002, "loss": 2.3503, "step": 280090 }, { "epoch": 1.082788266765629, "grad_norm": 0.10737710446119308, "learning_rate": 0.002, "loss": 2.338, "step": 280100 }, { "epoch": 1.0828269239690125, "grad_norm": 0.10657298564910889, "learning_rate": 0.002, "loss": 2.3304, "step": 280110 }, { "epoch": 1.0828655811723957, "grad_norm": 0.12205453217029572, "learning_rate": 0.002, "loss": 2.3257, "step": 280120 }, { "epoch": 1.082904238375779, "grad_norm": 0.11014603823423386, "learning_rate": 0.002, "loss": 2.3445, "step": 280130 }, { "epoch": 1.0829428955791622, "grad_norm": 0.261272132396698, "learning_rate": 0.002, "loss": 2.3294, "step": 280140 }, { "epoch": 1.0829815527825455, "grad_norm": 0.10859159380197525, "learning_rate": 0.002, "loss": 2.3258, "step": 280150 }, { "epoch": 1.0830202099859287, "grad_norm": 0.11122594028711319, "learning_rate": 0.002, "loss": 2.3433, "step": 280160 }, { "epoch": 1.083058867189312, "grad_norm": 0.11559804528951645, "learning_rate": 0.002, "loss": 2.3195, "step": 280170 }, { "epoch": 1.0830975243926952, "grad_norm": 0.0992172583937645, "learning_rate": 0.002, "loss": 2.3536, "step": 280180 }, { "epoch": 1.0831361815960787, "grad_norm": 0.12179453670978546, "learning_rate": 0.002, "loss": 2.3322, "step": 280190 }, { "epoch": 1.083174838799462, "grad_norm": 0.10722031444311142, "learning_rate": 0.002, "loss": 2.3158, "step": 280200 }, { "epoch": 1.0832134960028452, "grad_norm": 0.09454517811536789, "learning_rate": 0.002, "loss": 2.3416, "step": 280210 }, { "epoch": 1.0832521532062285, "grad_norm": 0.11187735199928284, "learning_rate": 0.002, "loss": 2.341, "step": 280220 }, { "epoch": 1.0832908104096117, "grad_norm": 0.13172794878482819, "learning_rate": 0.002, "loss": 2.3378, "step": 280230 }, { "epoch": 1.083329467612995, "grad_norm": 0.10378477722406387, "learning_rate": 0.002, "loss": 2.3385, "step": 280240 }, { "epoch": 1.0833681248163782, "grad_norm": 0.10433642566204071, "learning_rate": 0.002, "loss": 2.3307, "step": 280250 }, { "epoch": 1.0834067820197615, "grad_norm": 0.10065358877182007, "learning_rate": 0.002, "loss": 2.3367, "step": 280260 }, { "epoch": 1.0834454392231447, "grad_norm": 0.09691182523965836, "learning_rate": 0.002, "loss": 2.3381, "step": 280270 }, { "epoch": 1.0834840964265282, "grad_norm": 0.10980410128831863, "learning_rate": 0.002, "loss": 2.3435, "step": 280280 }, { "epoch": 1.0835227536299115, "grad_norm": 0.11095762252807617, "learning_rate": 0.002, "loss": 2.34, "step": 280290 }, { "epoch": 1.0835614108332947, "grad_norm": 0.09890416264533997, "learning_rate": 0.002, "loss": 2.3312, "step": 280300 }, { "epoch": 1.083600068036678, "grad_norm": 0.13483504951000214, "learning_rate": 0.002, "loss": 2.3469, "step": 280310 }, { "epoch": 1.0836387252400612, "grad_norm": 0.10112215578556061, "learning_rate": 0.002, "loss": 2.3176, "step": 280320 }, { "epoch": 1.0836773824434445, "grad_norm": 0.1662277728319168, "learning_rate": 0.002, "loss": 2.3432, "step": 280330 }, { "epoch": 1.0837160396468277, "grad_norm": 0.1037745475769043, "learning_rate": 0.002, "loss": 2.3421, "step": 280340 }, { "epoch": 1.083754696850211, "grad_norm": 0.11243646591901779, "learning_rate": 0.002, "loss": 2.3465, "step": 280350 }, { "epoch": 1.0837933540535944, "grad_norm": 0.1472591906785965, "learning_rate": 0.002, "loss": 2.3207, "step": 280360 }, { "epoch": 1.0838320112569777, "grad_norm": 0.092743881046772, "learning_rate": 0.002, "loss": 2.3446, "step": 280370 }, { "epoch": 1.083870668460361, "grad_norm": 0.10754603147506714, "learning_rate": 0.002, "loss": 2.3405, "step": 280380 }, { "epoch": 1.0839093256637442, "grad_norm": 0.160426527261734, "learning_rate": 0.002, "loss": 2.3409, "step": 280390 }, { "epoch": 1.0839479828671275, "grad_norm": 0.11159920692443848, "learning_rate": 0.002, "loss": 2.3302, "step": 280400 }, { "epoch": 1.0839866400705107, "grad_norm": 0.10037899762392044, "learning_rate": 0.002, "loss": 2.349, "step": 280410 }, { "epoch": 1.084025297273894, "grad_norm": 0.1004137396812439, "learning_rate": 0.002, "loss": 2.3317, "step": 280420 }, { "epoch": 1.0840639544772772, "grad_norm": 0.10265239328145981, "learning_rate": 0.002, "loss": 2.3372, "step": 280430 }, { "epoch": 1.0841026116806605, "grad_norm": 0.10469073057174683, "learning_rate": 0.002, "loss": 2.344, "step": 280440 }, { "epoch": 1.084141268884044, "grad_norm": 0.10935996472835541, "learning_rate": 0.002, "loss": 2.3443, "step": 280450 }, { "epoch": 1.0841799260874272, "grad_norm": 0.09472064673900604, "learning_rate": 0.002, "loss": 2.3408, "step": 280460 }, { "epoch": 1.0842185832908104, "grad_norm": 0.16456358134746552, "learning_rate": 0.002, "loss": 2.3367, "step": 280470 }, { "epoch": 1.0842572404941937, "grad_norm": 0.10961253941059113, "learning_rate": 0.002, "loss": 2.3308, "step": 280480 }, { "epoch": 1.084295897697577, "grad_norm": 0.12051593512296677, "learning_rate": 0.002, "loss": 2.3337, "step": 280490 }, { "epoch": 1.0843345549009602, "grad_norm": 0.10590102523565292, "learning_rate": 0.002, "loss": 2.3433, "step": 280500 }, { "epoch": 1.0843732121043435, "grad_norm": 0.08928204327821732, "learning_rate": 0.002, "loss": 2.3416, "step": 280510 }, { "epoch": 1.0844118693077267, "grad_norm": 0.11208263784646988, "learning_rate": 0.002, "loss": 2.3453, "step": 280520 }, { "epoch": 1.0844505265111102, "grad_norm": 0.115069180727005, "learning_rate": 0.002, "loss": 2.3347, "step": 280530 }, { "epoch": 1.0844891837144934, "grad_norm": 0.10561076551675797, "learning_rate": 0.002, "loss": 2.3305, "step": 280540 }, { "epoch": 1.0845278409178767, "grad_norm": 0.12431753426790237, "learning_rate": 0.002, "loss": 2.3426, "step": 280550 }, { "epoch": 1.08456649812126, "grad_norm": 0.09250196069478989, "learning_rate": 0.002, "loss": 2.3272, "step": 280560 }, { "epoch": 1.0846051553246432, "grad_norm": 0.10803579539060593, "learning_rate": 0.002, "loss": 2.3242, "step": 280570 }, { "epoch": 1.0846438125280264, "grad_norm": 0.10429500788450241, "learning_rate": 0.002, "loss": 2.3385, "step": 280580 }, { "epoch": 1.0846824697314097, "grad_norm": 0.09534043818712234, "learning_rate": 0.002, "loss": 2.3322, "step": 280590 }, { "epoch": 1.084721126934793, "grad_norm": 0.10288545489311218, "learning_rate": 0.002, "loss": 2.3366, "step": 280600 }, { "epoch": 1.0847597841381762, "grad_norm": 0.12578828632831573, "learning_rate": 0.002, "loss": 2.3457, "step": 280610 }, { "epoch": 1.0847984413415597, "grad_norm": 0.1013014167547226, "learning_rate": 0.002, "loss": 2.3506, "step": 280620 }, { "epoch": 1.084837098544943, "grad_norm": 0.10470990836620331, "learning_rate": 0.002, "loss": 2.341, "step": 280630 }, { "epoch": 1.0848757557483262, "grad_norm": 0.10608610510826111, "learning_rate": 0.002, "loss": 2.3307, "step": 280640 }, { "epoch": 1.0849144129517094, "grad_norm": 0.1138029396533966, "learning_rate": 0.002, "loss": 2.3311, "step": 280650 }, { "epoch": 1.0849530701550927, "grad_norm": 0.13247482478618622, "learning_rate": 0.002, "loss": 2.3284, "step": 280660 }, { "epoch": 1.084991727358476, "grad_norm": 0.09688462316989899, "learning_rate": 0.002, "loss": 2.3293, "step": 280670 }, { "epoch": 1.0850303845618592, "grad_norm": 0.10027146339416504, "learning_rate": 0.002, "loss": 2.3463, "step": 280680 }, { "epoch": 1.0850690417652424, "grad_norm": 0.09682323783636093, "learning_rate": 0.002, "loss": 2.3425, "step": 280690 }, { "epoch": 1.085107698968626, "grad_norm": 0.0977088138461113, "learning_rate": 0.002, "loss": 2.3543, "step": 280700 }, { "epoch": 1.0851463561720092, "grad_norm": 0.11356858164072037, "learning_rate": 0.002, "loss": 2.3279, "step": 280710 }, { "epoch": 1.0851850133753924, "grad_norm": 0.10534996539354324, "learning_rate": 0.002, "loss": 2.3477, "step": 280720 }, { "epoch": 1.0852236705787757, "grad_norm": 0.14064523577690125, "learning_rate": 0.002, "loss": 2.334, "step": 280730 }, { "epoch": 1.085262327782159, "grad_norm": 0.108973428606987, "learning_rate": 0.002, "loss": 2.3312, "step": 280740 }, { "epoch": 1.0853009849855422, "grad_norm": 0.10470664501190186, "learning_rate": 0.002, "loss": 2.3313, "step": 280750 }, { "epoch": 1.0853396421889254, "grad_norm": 0.10709752142429352, "learning_rate": 0.002, "loss": 2.3328, "step": 280760 }, { "epoch": 1.0853782993923087, "grad_norm": 0.1092754602432251, "learning_rate": 0.002, "loss": 2.3512, "step": 280770 }, { "epoch": 1.085416956595692, "grad_norm": 0.11230559647083282, "learning_rate": 0.002, "loss": 2.3417, "step": 280780 }, { "epoch": 1.0854556137990754, "grad_norm": 0.09335718303918839, "learning_rate": 0.002, "loss": 2.347, "step": 280790 }, { "epoch": 1.0854942710024587, "grad_norm": 0.1012687161564827, "learning_rate": 0.002, "loss": 2.3291, "step": 280800 }, { "epoch": 1.085532928205842, "grad_norm": 0.09886537492275238, "learning_rate": 0.002, "loss": 2.3377, "step": 280810 }, { "epoch": 1.0855715854092252, "grad_norm": 0.10019920766353607, "learning_rate": 0.002, "loss": 2.3315, "step": 280820 }, { "epoch": 1.0856102426126084, "grad_norm": 0.12431039661169052, "learning_rate": 0.002, "loss": 2.3437, "step": 280830 }, { "epoch": 1.0856488998159917, "grad_norm": 0.1461450308561325, "learning_rate": 0.002, "loss": 2.34, "step": 280840 }, { "epoch": 1.085687557019375, "grad_norm": 0.0981675386428833, "learning_rate": 0.002, "loss": 2.3365, "step": 280850 }, { "epoch": 1.0857262142227582, "grad_norm": 0.09918622672557831, "learning_rate": 0.002, "loss": 2.3374, "step": 280860 }, { "epoch": 1.0857648714261416, "grad_norm": 0.0869884192943573, "learning_rate": 0.002, "loss": 2.329, "step": 280870 }, { "epoch": 1.085803528629525, "grad_norm": 0.10177048295736313, "learning_rate": 0.002, "loss": 2.3397, "step": 280880 }, { "epoch": 1.0858421858329081, "grad_norm": 0.12214681506156921, "learning_rate": 0.002, "loss": 2.3417, "step": 280890 }, { "epoch": 1.0858808430362914, "grad_norm": 0.10338126868009567, "learning_rate": 0.002, "loss": 2.3388, "step": 280900 }, { "epoch": 1.0859195002396747, "grad_norm": 0.09947532415390015, "learning_rate": 0.002, "loss": 2.3456, "step": 280910 }, { "epoch": 1.085958157443058, "grad_norm": 0.11874710768461227, "learning_rate": 0.002, "loss": 2.3422, "step": 280920 }, { "epoch": 1.0859968146464412, "grad_norm": 0.10474177449941635, "learning_rate": 0.002, "loss": 2.3389, "step": 280930 }, { "epoch": 1.0860354718498244, "grad_norm": 0.10702405124902725, "learning_rate": 0.002, "loss": 2.3256, "step": 280940 }, { "epoch": 1.0860741290532077, "grad_norm": 0.11294613033533096, "learning_rate": 0.002, "loss": 2.3487, "step": 280950 }, { "epoch": 1.0861127862565911, "grad_norm": 0.11382812261581421, "learning_rate": 0.002, "loss": 2.3445, "step": 280960 }, { "epoch": 1.0861514434599744, "grad_norm": 0.10009848326444626, "learning_rate": 0.002, "loss": 2.3274, "step": 280970 }, { "epoch": 1.0861901006633576, "grad_norm": 0.09343680739402771, "learning_rate": 0.002, "loss": 2.3344, "step": 280980 }, { "epoch": 1.086228757866741, "grad_norm": 0.11350204795598984, "learning_rate": 0.002, "loss": 2.3328, "step": 280990 }, { "epoch": 1.0862674150701241, "grad_norm": 0.11360235512256622, "learning_rate": 0.002, "loss": 2.3492, "step": 281000 }, { "epoch": 1.0863060722735074, "grad_norm": 0.0953127071261406, "learning_rate": 0.002, "loss": 2.3314, "step": 281010 }, { "epoch": 1.0863447294768906, "grad_norm": 0.10144411772489548, "learning_rate": 0.002, "loss": 2.3364, "step": 281020 }, { "epoch": 1.086383386680274, "grad_norm": 0.1047971174120903, "learning_rate": 0.002, "loss": 2.3432, "step": 281030 }, { "epoch": 1.0864220438836574, "grad_norm": 0.11271246522665024, "learning_rate": 0.002, "loss": 2.3444, "step": 281040 }, { "epoch": 1.0864607010870406, "grad_norm": 0.1031818762421608, "learning_rate": 0.002, "loss": 2.3374, "step": 281050 }, { "epoch": 1.0864993582904239, "grad_norm": 0.10849371552467346, "learning_rate": 0.002, "loss": 2.3531, "step": 281060 }, { "epoch": 1.0865380154938071, "grad_norm": 0.10233020782470703, "learning_rate": 0.002, "loss": 2.3162, "step": 281070 }, { "epoch": 1.0865766726971904, "grad_norm": 0.10694386065006256, "learning_rate": 0.002, "loss": 2.3358, "step": 281080 }, { "epoch": 1.0866153299005736, "grad_norm": 0.11533127725124359, "learning_rate": 0.002, "loss": 2.3401, "step": 281090 }, { "epoch": 1.0866539871039569, "grad_norm": 0.11579183489084244, "learning_rate": 0.002, "loss": 2.3455, "step": 281100 }, { "epoch": 1.0866926443073401, "grad_norm": 0.12135932594537735, "learning_rate": 0.002, "loss": 2.3318, "step": 281110 }, { "epoch": 1.0867313015107234, "grad_norm": 0.08877555280923843, "learning_rate": 0.002, "loss": 2.3382, "step": 281120 }, { "epoch": 1.0867699587141069, "grad_norm": 0.12630535662174225, "learning_rate": 0.002, "loss": 2.3489, "step": 281130 }, { "epoch": 1.0868086159174901, "grad_norm": 0.09699002653360367, "learning_rate": 0.002, "loss": 2.321, "step": 281140 }, { "epoch": 1.0868472731208734, "grad_norm": 0.1044619083404541, "learning_rate": 0.002, "loss": 2.3625, "step": 281150 }, { "epoch": 1.0868859303242566, "grad_norm": 0.10375456511974335, "learning_rate": 0.002, "loss": 2.3305, "step": 281160 }, { "epoch": 1.0869245875276399, "grad_norm": 0.10800100117921829, "learning_rate": 0.002, "loss": 2.3378, "step": 281170 }, { "epoch": 1.0869632447310231, "grad_norm": 0.11725430935621262, "learning_rate": 0.002, "loss": 2.3395, "step": 281180 }, { "epoch": 1.0870019019344064, "grad_norm": 0.11250857263803482, "learning_rate": 0.002, "loss": 2.3362, "step": 281190 }, { "epoch": 1.0870405591377899, "grad_norm": 0.11485522985458374, "learning_rate": 0.002, "loss": 2.333, "step": 281200 }, { "epoch": 1.087079216341173, "grad_norm": 0.09864205867052078, "learning_rate": 0.002, "loss": 2.3427, "step": 281210 }, { "epoch": 1.0871178735445564, "grad_norm": 0.12152174115180969, "learning_rate": 0.002, "loss": 2.3319, "step": 281220 }, { "epoch": 1.0871565307479396, "grad_norm": 0.1147390827536583, "learning_rate": 0.002, "loss": 2.3334, "step": 281230 }, { "epoch": 1.0871951879513229, "grad_norm": 0.11349363625049591, "learning_rate": 0.002, "loss": 2.3404, "step": 281240 }, { "epoch": 1.0872338451547061, "grad_norm": 0.09549278020858765, "learning_rate": 0.002, "loss": 2.3382, "step": 281250 }, { "epoch": 1.0872725023580894, "grad_norm": 0.10776299983263016, "learning_rate": 0.002, "loss": 2.3332, "step": 281260 }, { "epoch": 1.0873111595614726, "grad_norm": 0.09684637188911438, "learning_rate": 0.002, "loss": 2.3352, "step": 281270 }, { "epoch": 1.0873498167648559, "grad_norm": 0.10151070356369019, "learning_rate": 0.002, "loss": 2.356, "step": 281280 }, { "epoch": 1.0873884739682393, "grad_norm": 0.10506577789783478, "learning_rate": 0.002, "loss": 2.3496, "step": 281290 }, { "epoch": 1.0874271311716226, "grad_norm": 0.10208103805780411, "learning_rate": 0.002, "loss": 2.3291, "step": 281300 }, { "epoch": 1.0874657883750058, "grad_norm": 0.1129027009010315, "learning_rate": 0.002, "loss": 2.3409, "step": 281310 }, { "epoch": 1.087504445578389, "grad_norm": 0.09576416015625, "learning_rate": 0.002, "loss": 2.3437, "step": 281320 }, { "epoch": 1.0875431027817724, "grad_norm": 0.10639072954654694, "learning_rate": 0.002, "loss": 2.3149, "step": 281330 }, { "epoch": 1.0875817599851556, "grad_norm": 0.10176964104175568, "learning_rate": 0.002, "loss": 2.3295, "step": 281340 }, { "epoch": 1.0876204171885389, "grad_norm": 0.09694419801235199, "learning_rate": 0.002, "loss": 2.3288, "step": 281350 }, { "epoch": 1.087659074391922, "grad_norm": 0.1049187034368515, "learning_rate": 0.002, "loss": 2.3288, "step": 281360 }, { "epoch": 1.0876977315953056, "grad_norm": 0.10904958844184875, "learning_rate": 0.002, "loss": 2.3297, "step": 281370 }, { "epoch": 1.0877363887986888, "grad_norm": 0.10921395570039749, "learning_rate": 0.002, "loss": 2.3404, "step": 281380 }, { "epoch": 1.087775046002072, "grad_norm": 0.11888540536165237, "learning_rate": 0.002, "loss": 2.3361, "step": 281390 }, { "epoch": 1.0878137032054553, "grad_norm": 0.1021900326013565, "learning_rate": 0.002, "loss": 2.3472, "step": 281400 }, { "epoch": 1.0878523604088386, "grad_norm": 0.13177058100700378, "learning_rate": 0.002, "loss": 2.3374, "step": 281410 }, { "epoch": 1.0878910176122218, "grad_norm": 0.10043606162071228, "learning_rate": 0.002, "loss": 2.343, "step": 281420 }, { "epoch": 1.087929674815605, "grad_norm": 0.11822836846113205, "learning_rate": 0.002, "loss": 2.3285, "step": 281430 }, { "epoch": 1.0879683320189883, "grad_norm": 0.12574489414691925, "learning_rate": 0.002, "loss": 2.3454, "step": 281440 }, { "epoch": 1.0880069892223716, "grad_norm": 0.1164129227399826, "learning_rate": 0.002, "loss": 2.3317, "step": 281450 }, { "epoch": 1.088045646425755, "grad_norm": 0.10353794693946838, "learning_rate": 0.002, "loss": 2.352, "step": 281460 }, { "epoch": 1.0880843036291383, "grad_norm": 0.10000605136156082, "learning_rate": 0.002, "loss": 2.3437, "step": 281470 }, { "epoch": 1.0881229608325216, "grad_norm": 0.12134052067995071, "learning_rate": 0.002, "loss": 2.3438, "step": 281480 }, { "epoch": 1.0881616180359048, "grad_norm": 0.11231108754873276, "learning_rate": 0.002, "loss": 2.333, "step": 281490 }, { "epoch": 1.088200275239288, "grad_norm": 0.10282287746667862, "learning_rate": 0.002, "loss": 2.3225, "step": 281500 }, { "epoch": 1.0882389324426713, "grad_norm": 0.09950326383113861, "learning_rate": 0.002, "loss": 2.3522, "step": 281510 }, { "epoch": 1.0882775896460546, "grad_norm": 0.09664419293403625, "learning_rate": 0.002, "loss": 2.3274, "step": 281520 }, { "epoch": 1.0883162468494378, "grad_norm": 0.10877744108438492, "learning_rate": 0.002, "loss": 2.3421, "step": 281530 }, { "epoch": 1.0883549040528213, "grad_norm": 0.1068381667137146, "learning_rate": 0.002, "loss": 2.3297, "step": 281540 }, { "epoch": 1.0883935612562046, "grad_norm": 0.12714095413684845, "learning_rate": 0.002, "loss": 2.3253, "step": 281550 }, { "epoch": 1.0884322184595878, "grad_norm": 0.11153408885002136, "learning_rate": 0.002, "loss": 2.3423, "step": 281560 }, { "epoch": 1.088470875662971, "grad_norm": 0.0963999554514885, "learning_rate": 0.002, "loss": 2.3392, "step": 281570 }, { "epoch": 1.0885095328663543, "grad_norm": 0.09225306659936905, "learning_rate": 0.002, "loss": 2.3423, "step": 281580 }, { "epoch": 1.0885481900697376, "grad_norm": 0.08588103950023651, "learning_rate": 0.002, "loss": 2.3536, "step": 281590 }, { "epoch": 1.0885868472731208, "grad_norm": 0.1292261779308319, "learning_rate": 0.002, "loss": 2.3523, "step": 281600 }, { "epoch": 1.088625504476504, "grad_norm": 0.09912700951099396, "learning_rate": 0.002, "loss": 2.3427, "step": 281610 }, { "epoch": 1.0886641616798873, "grad_norm": 0.08940158784389496, "learning_rate": 0.002, "loss": 2.3305, "step": 281620 }, { "epoch": 1.0887028188832708, "grad_norm": 0.12613654136657715, "learning_rate": 0.002, "loss": 2.3422, "step": 281630 }, { "epoch": 1.088741476086654, "grad_norm": 0.10662415623664856, "learning_rate": 0.002, "loss": 2.3279, "step": 281640 }, { "epoch": 1.0887801332900373, "grad_norm": 0.10559440404176712, "learning_rate": 0.002, "loss": 2.3366, "step": 281650 }, { "epoch": 1.0888187904934206, "grad_norm": 0.10085096955299377, "learning_rate": 0.002, "loss": 2.3462, "step": 281660 }, { "epoch": 1.0888574476968038, "grad_norm": 0.10817304253578186, "learning_rate": 0.002, "loss": 2.3193, "step": 281670 }, { "epoch": 1.088896104900187, "grad_norm": 0.09820788353681564, "learning_rate": 0.002, "loss": 2.3386, "step": 281680 }, { "epoch": 1.0889347621035703, "grad_norm": 0.1037544384598732, "learning_rate": 0.002, "loss": 2.3372, "step": 281690 }, { "epoch": 1.0889734193069536, "grad_norm": 0.11510848253965378, "learning_rate": 0.002, "loss": 2.3309, "step": 281700 }, { "epoch": 1.089012076510337, "grad_norm": 0.0945771336555481, "learning_rate": 0.002, "loss": 2.3324, "step": 281710 }, { "epoch": 1.0890507337137203, "grad_norm": 0.08993828296661377, "learning_rate": 0.002, "loss": 2.3519, "step": 281720 }, { "epoch": 1.0890893909171036, "grad_norm": 0.11030997335910797, "learning_rate": 0.002, "loss": 2.3468, "step": 281730 }, { "epoch": 1.0891280481204868, "grad_norm": 0.0994657352566719, "learning_rate": 0.002, "loss": 2.3403, "step": 281740 }, { "epoch": 1.08916670532387, "grad_norm": 0.10831445455551147, "learning_rate": 0.002, "loss": 2.3342, "step": 281750 }, { "epoch": 1.0892053625272533, "grad_norm": 0.11834604293107986, "learning_rate": 0.002, "loss": 2.3411, "step": 281760 }, { "epoch": 1.0892440197306366, "grad_norm": 0.09943471103906631, "learning_rate": 0.002, "loss": 2.3397, "step": 281770 }, { "epoch": 1.0892826769340198, "grad_norm": 0.09234745800495148, "learning_rate": 0.002, "loss": 2.3356, "step": 281780 }, { "epoch": 1.089321334137403, "grad_norm": 0.10414262861013412, "learning_rate": 0.002, "loss": 2.3361, "step": 281790 }, { "epoch": 1.0893599913407865, "grad_norm": 0.11926162987947464, "learning_rate": 0.002, "loss": 2.3198, "step": 281800 }, { "epoch": 1.0893986485441698, "grad_norm": 0.10056176781654358, "learning_rate": 0.002, "loss": 2.3271, "step": 281810 }, { "epoch": 1.089437305747553, "grad_norm": 0.10224136710166931, "learning_rate": 0.002, "loss": 2.336, "step": 281820 }, { "epoch": 1.0894759629509363, "grad_norm": 0.09794122725725174, "learning_rate": 0.002, "loss": 2.3379, "step": 281830 }, { "epoch": 1.0895146201543195, "grad_norm": 0.1126994714140892, "learning_rate": 0.002, "loss": 2.332, "step": 281840 }, { "epoch": 1.0895532773577028, "grad_norm": 0.09958003461360931, "learning_rate": 0.002, "loss": 2.3367, "step": 281850 }, { "epoch": 1.089591934561086, "grad_norm": 0.10423572361469269, "learning_rate": 0.002, "loss": 2.3305, "step": 281860 }, { "epoch": 1.0896305917644693, "grad_norm": 0.10851602256298065, "learning_rate": 0.002, "loss": 2.3153, "step": 281870 }, { "epoch": 1.0896692489678528, "grad_norm": 0.1614455133676529, "learning_rate": 0.002, "loss": 2.336, "step": 281880 }, { "epoch": 1.089707906171236, "grad_norm": 0.10868556052446365, "learning_rate": 0.002, "loss": 2.3204, "step": 281890 }, { "epoch": 1.0897465633746193, "grad_norm": 0.1033228188753128, "learning_rate": 0.002, "loss": 2.3346, "step": 281900 }, { "epoch": 1.0897852205780025, "grad_norm": 0.10506334900856018, "learning_rate": 0.002, "loss": 2.35, "step": 281910 }, { "epoch": 1.0898238777813858, "grad_norm": 0.10521656274795532, "learning_rate": 0.002, "loss": 2.3332, "step": 281920 }, { "epoch": 1.089862534984769, "grad_norm": 0.10547047853469849, "learning_rate": 0.002, "loss": 2.3438, "step": 281930 }, { "epoch": 1.0899011921881523, "grad_norm": 0.11806923151016235, "learning_rate": 0.002, "loss": 2.3296, "step": 281940 }, { "epoch": 1.0899398493915355, "grad_norm": 0.10370279103517532, "learning_rate": 0.002, "loss": 2.3472, "step": 281950 }, { "epoch": 1.0899785065949188, "grad_norm": 0.1059202328324318, "learning_rate": 0.002, "loss": 2.3347, "step": 281960 }, { "epoch": 1.0900171637983023, "grad_norm": 0.09515325725078583, "learning_rate": 0.002, "loss": 2.3372, "step": 281970 }, { "epoch": 1.0900558210016855, "grad_norm": 0.13132987916469574, "learning_rate": 0.002, "loss": 2.3394, "step": 281980 }, { "epoch": 1.0900944782050688, "grad_norm": 0.14355821907520294, "learning_rate": 0.002, "loss": 2.3344, "step": 281990 }, { "epoch": 1.090133135408452, "grad_norm": 0.10220211744308472, "learning_rate": 0.002, "loss": 2.3364, "step": 282000 }, { "epoch": 1.0901717926118353, "grad_norm": 0.09334767609834671, "learning_rate": 0.002, "loss": 2.3292, "step": 282010 }, { "epoch": 1.0902104498152185, "grad_norm": 0.12202389538288116, "learning_rate": 0.002, "loss": 2.3304, "step": 282020 }, { "epoch": 1.0902491070186018, "grad_norm": 0.12086988985538483, "learning_rate": 0.002, "loss": 2.337, "step": 282030 }, { "epoch": 1.090287764221985, "grad_norm": 0.11102332174777985, "learning_rate": 0.002, "loss": 2.3564, "step": 282040 }, { "epoch": 1.0903264214253685, "grad_norm": 0.1081869900226593, "learning_rate": 0.002, "loss": 2.3317, "step": 282050 }, { "epoch": 1.0903650786287518, "grad_norm": 0.09771132469177246, "learning_rate": 0.002, "loss": 2.3316, "step": 282060 }, { "epoch": 1.090403735832135, "grad_norm": 0.1110697090625763, "learning_rate": 0.002, "loss": 2.3449, "step": 282070 }, { "epoch": 1.0904423930355183, "grad_norm": 0.11461424827575684, "learning_rate": 0.002, "loss": 2.3412, "step": 282080 }, { "epoch": 1.0904810502389015, "grad_norm": 0.1096828505396843, "learning_rate": 0.002, "loss": 2.3427, "step": 282090 }, { "epoch": 1.0905197074422848, "grad_norm": 0.10947354882955551, "learning_rate": 0.002, "loss": 2.3383, "step": 282100 }, { "epoch": 1.090558364645668, "grad_norm": 0.09177210927009583, "learning_rate": 0.002, "loss": 2.3302, "step": 282110 }, { "epoch": 1.0905970218490513, "grad_norm": 0.10618321597576141, "learning_rate": 0.002, "loss": 2.3542, "step": 282120 }, { "epoch": 1.0906356790524345, "grad_norm": 0.10912807285785675, "learning_rate": 0.002, "loss": 2.3327, "step": 282130 }, { "epoch": 1.090674336255818, "grad_norm": 0.12251587957143784, "learning_rate": 0.002, "loss": 2.3272, "step": 282140 }, { "epoch": 1.0907129934592013, "grad_norm": 0.09408677369356155, "learning_rate": 0.002, "loss": 2.3363, "step": 282150 }, { "epoch": 1.0907516506625845, "grad_norm": 0.11050599068403244, "learning_rate": 0.002, "loss": 2.3315, "step": 282160 }, { "epoch": 1.0907903078659678, "grad_norm": 0.11192735284566879, "learning_rate": 0.002, "loss": 2.3333, "step": 282170 }, { "epoch": 1.090828965069351, "grad_norm": 0.09625013172626495, "learning_rate": 0.002, "loss": 2.3286, "step": 282180 }, { "epoch": 1.0908676222727343, "grad_norm": 0.12688453495502472, "learning_rate": 0.002, "loss": 2.3413, "step": 282190 }, { "epoch": 1.0909062794761175, "grad_norm": 0.09461814165115356, "learning_rate": 0.002, "loss": 2.3226, "step": 282200 }, { "epoch": 1.0909449366795008, "grad_norm": 0.08834907412528992, "learning_rate": 0.002, "loss": 2.3414, "step": 282210 }, { "epoch": 1.0909835938828842, "grad_norm": 0.10458917915821075, "learning_rate": 0.002, "loss": 2.3423, "step": 282220 }, { "epoch": 1.0910222510862675, "grad_norm": 0.1085110753774643, "learning_rate": 0.002, "loss": 2.3374, "step": 282230 }, { "epoch": 1.0910609082896507, "grad_norm": 0.0959525927901268, "learning_rate": 0.002, "loss": 2.332, "step": 282240 }, { "epoch": 1.091099565493034, "grad_norm": 0.09756126999855042, "learning_rate": 0.002, "loss": 2.3293, "step": 282250 }, { "epoch": 1.0911382226964172, "grad_norm": 0.11785954982042313, "learning_rate": 0.002, "loss": 2.3497, "step": 282260 }, { "epoch": 1.0911768798998005, "grad_norm": 0.09997006505727768, "learning_rate": 0.002, "loss": 2.333, "step": 282270 }, { "epoch": 1.0912155371031838, "grad_norm": 0.10260170698165894, "learning_rate": 0.002, "loss": 2.3474, "step": 282280 }, { "epoch": 1.091254194306567, "grad_norm": 0.11056840419769287, "learning_rate": 0.002, "loss": 2.3358, "step": 282290 }, { "epoch": 1.0912928515099503, "grad_norm": 0.08586467802524567, "learning_rate": 0.002, "loss": 2.3286, "step": 282300 }, { "epoch": 1.0913315087133337, "grad_norm": 0.09769351780414581, "learning_rate": 0.002, "loss": 2.3149, "step": 282310 }, { "epoch": 1.091370165916717, "grad_norm": 0.09916350245475769, "learning_rate": 0.002, "loss": 2.3354, "step": 282320 }, { "epoch": 1.0914088231201002, "grad_norm": 0.14137159287929535, "learning_rate": 0.002, "loss": 2.3372, "step": 282330 }, { "epoch": 1.0914474803234835, "grad_norm": 0.10548698902130127, "learning_rate": 0.002, "loss": 2.326, "step": 282340 }, { "epoch": 1.0914861375268667, "grad_norm": 0.1067422479391098, "learning_rate": 0.002, "loss": 2.3247, "step": 282350 }, { "epoch": 1.09152479473025, "grad_norm": 0.09344740211963654, "learning_rate": 0.002, "loss": 2.3307, "step": 282360 }, { "epoch": 1.0915634519336332, "grad_norm": 0.1000688299536705, "learning_rate": 0.002, "loss": 2.3551, "step": 282370 }, { "epoch": 1.0916021091370165, "grad_norm": 0.10962257534265518, "learning_rate": 0.002, "loss": 2.3374, "step": 282380 }, { "epoch": 1.0916407663404, "grad_norm": 0.10238444805145264, "learning_rate": 0.002, "loss": 2.3377, "step": 282390 }, { "epoch": 1.0916794235437832, "grad_norm": 0.09976116567850113, "learning_rate": 0.002, "loss": 2.341, "step": 282400 }, { "epoch": 1.0917180807471665, "grad_norm": 0.10841300338506699, "learning_rate": 0.002, "loss": 2.3373, "step": 282410 }, { "epoch": 1.0917567379505497, "grad_norm": 0.13784433901309967, "learning_rate": 0.002, "loss": 2.3317, "step": 282420 }, { "epoch": 1.091795395153933, "grad_norm": 0.10153056681156158, "learning_rate": 0.002, "loss": 2.3397, "step": 282430 }, { "epoch": 1.0918340523573162, "grad_norm": 0.0959775373339653, "learning_rate": 0.002, "loss": 2.324, "step": 282440 }, { "epoch": 1.0918727095606995, "grad_norm": 0.11032044887542725, "learning_rate": 0.002, "loss": 2.3347, "step": 282450 }, { "epoch": 1.0919113667640827, "grad_norm": 0.08857570588588715, "learning_rate": 0.002, "loss": 2.3376, "step": 282460 }, { "epoch": 1.091950023967466, "grad_norm": 0.10108328610658646, "learning_rate": 0.002, "loss": 2.3335, "step": 282470 }, { "epoch": 1.0919886811708495, "grad_norm": 0.10817533731460571, "learning_rate": 0.002, "loss": 2.3359, "step": 282480 }, { "epoch": 1.0920273383742327, "grad_norm": 0.10717236250638962, "learning_rate": 0.002, "loss": 2.3334, "step": 282490 }, { "epoch": 1.092065995577616, "grad_norm": 0.10586033016443253, "learning_rate": 0.002, "loss": 2.3429, "step": 282500 }, { "epoch": 1.0921046527809992, "grad_norm": 0.10583069175481796, "learning_rate": 0.002, "loss": 2.3504, "step": 282510 }, { "epoch": 1.0921433099843825, "grad_norm": 0.09219476580619812, "learning_rate": 0.002, "loss": 2.3269, "step": 282520 }, { "epoch": 1.0921819671877657, "grad_norm": 0.10465552657842636, "learning_rate": 0.002, "loss": 2.3298, "step": 282530 }, { "epoch": 1.092220624391149, "grad_norm": 0.10524141043424606, "learning_rate": 0.002, "loss": 2.3223, "step": 282540 }, { "epoch": 1.0922592815945322, "grad_norm": 0.09466832876205444, "learning_rate": 0.002, "loss": 2.3574, "step": 282550 }, { "epoch": 1.0922979387979157, "grad_norm": 0.11899077147245407, "learning_rate": 0.002, "loss": 2.3371, "step": 282560 }, { "epoch": 1.092336596001299, "grad_norm": 0.11153442412614822, "learning_rate": 0.002, "loss": 2.3449, "step": 282570 }, { "epoch": 1.0923752532046822, "grad_norm": 0.1049443930387497, "learning_rate": 0.002, "loss": 2.3447, "step": 282580 }, { "epoch": 1.0924139104080655, "grad_norm": 0.11074576526880264, "learning_rate": 0.002, "loss": 2.3314, "step": 282590 }, { "epoch": 1.0924525676114487, "grad_norm": 0.10997039824724197, "learning_rate": 0.002, "loss": 2.3355, "step": 282600 }, { "epoch": 1.092491224814832, "grad_norm": 0.11313261091709137, "learning_rate": 0.002, "loss": 2.3427, "step": 282610 }, { "epoch": 1.0925298820182152, "grad_norm": 0.09633355587720871, "learning_rate": 0.002, "loss": 2.3523, "step": 282620 }, { "epoch": 1.0925685392215985, "grad_norm": 0.09769116342067719, "learning_rate": 0.002, "loss": 2.3271, "step": 282630 }, { "epoch": 1.0926071964249817, "grad_norm": 0.10594171285629272, "learning_rate": 0.002, "loss": 2.3399, "step": 282640 }, { "epoch": 1.0926458536283652, "grad_norm": 0.09079165756702423, "learning_rate": 0.002, "loss": 2.327, "step": 282650 }, { "epoch": 1.0926845108317484, "grad_norm": 0.10845083743333817, "learning_rate": 0.002, "loss": 2.3153, "step": 282660 }, { "epoch": 1.0927231680351317, "grad_norm": 0.09342625737190247, "learning_rate": 0.002, "loss": 2.3152, "step": 282670 }, { "epoch": 1.092761825238515, "grad_norm": 0.09645923227071762, "learning_rate": 0.002, "loss": 2.351, "step": 282680 }, { "epoch": 1.0928004824418982, "grad_norm": 0.10519500821828842, "learning_rate": 0.002, "loss": 2.3303, "step": 282690 }, { "epoch": 1.0928391396452815, "grad_norm": 0.10223992168903351, "learning_rate": 0.002, "loss": 2.3279, "step": 282700 }, { "epoch": 1.0928777968486647, "grad_norm": 0.10256168246269226, "learning_rate": 0.002, "loss": 2.3284, "step": 282710 }, { "epoch": 1.092916454052048, "grad_norm": 0.11982840299606323, "learning_rate": 0.002, "loss": 2.3272, "step": 282720 }, { "epoch": 1.0929551112554314, "grad_norm": 0.1197512075304985, "learning_rate": 0.002, "loss": 2.3389, "step": 282730 }, { "epoch": 1.0929937684588147, "grad_norm": 0.1088634803891182, "learning_rate": 0.002, "loss": 2.3309, "step": 282740 }, { "epoch": 1.093032425662198, "grad_norm": 0.1055222898721695, "learning_rate": 0.002, "loss": 2.3462, "step": 282750 }, { "epoch": 1.0930710828655812, "grad_norm": 0.10009009391069412, "learning_rate": 0.002, "loss": 2.3227, "step": 282760 }, { "epoch": 1.0931097400689644, "grad_norm": 0.09391296654939651, "learning_rate": 0.002, "loss": 2.3321, "step": 282770 }, { "epoch": 1.0931483972723477, "grad_norm": 0.11217600107192993, "learning_rate": 0.002, "loss": 2.3392, "step": 282780 }, { "epoch": 1.093187054475731, "grad_norm": 0.13397294282913208, "learning_rate": 0.002, "loss": 2.3368, "step": 282790 }, { "epoch": 1.0932257116791142, "grad_norm": 0.09961531311273575, "learning_rate": 0.002, "loss": 2.3265, "step": 282800 }, { "epoch": 1.0932643688824975, "grad_norm": 0.09814653545618057, "learning_rate": 0.002, "loss": 2.3276, "step": 282810 }, { "epoch": 1.093303026085881, "grad_norm": 0.12853848934173584, "learning_rate": 0.002, "loss": 2.3461, "step": 282820 }, { "epoch": 1.0933416832892642, "grad_norm": 0.10931167006492615, "learning_rate": 0.002, "loss": 2.3336, "step": 282830 }, { "epoch": 1.0933803404926474, "grad_norm": 0.11069802194833755, "learning_rate": 0.002, "loss": 2.3498, "step": 282840 }, { "epoch": 1.0934189976960307, "grad_norm": 0.09355204552412033, "learning_rate": 0.002, "loss": 2.3439, "step": 282850 }, { "epoch": 1.093457654899414, "grad_norm": 0.09880665689706802, "learning_rate": 0.002, "loss": 2.3517, "step": 282860 }, { "epoch": 1.0934963121027972, "grad_norm": 0.09627640247344971, "learning_rate": 0.002, "loss": 2.3397, "step": 282870 }, { "epoch": 1.0935349693061804, "grad_norm": 0.11856389045715332, "learning_rate": 0.002, "loss": 2.3225, "step": 282880 }, { "epoch": 1.0935736265095637, "grad_norm": 0.09591594338417053, "learning_rate": 0.002, "loss": 2.3273, "step": 282890 }, { "epoch": 1.0936122837129472, "grad_norm": 0.10306254774332047, "learning_rate": 0.002, "loss": 2.3212, "step": 282900 }, { "epoch": 1.0936509409163304, "grad_norm": 0.11224587261676788, "learning_rate": 0.002, "loss": 2.3206, "step": 282910 }, { "epoch": 1.0936895981197137, "grad_norm": 0.10437082499265671, "learning_rate": 0.002, "loss": 2.3223, "step": 282920 }, { "epoch": 1.093728255323097, "grad_norm": 0.09855062514543533, "learning_rate": 0.002, "loss": 2.336, "step": 282930 }, { "epoch": 1.0937669125264802, "grad_norm": 0.09843055158853531, "learning_rate": 0.002, "loss": 2.3487, "step": 282940 }, { "epoch": 1.0938055697298634, "grad_norm": 0.09790752083063126, "learning_rate": 0.002, "loss": 2.3495, "step": 282950 }, { "epoch": 1.0938442269332467, "grad_norm": 0.10575347393751144, "learning_rate": 0.002, "loss": 2.3359, "step": 282960 }, { "epoch": 1.09388288413663, "grad_norm": 0.12459485232830048, "learning_rate": 0.002, "loss": 2.3509, "step": 282970 }, { "epoch": 1.0939215413400132, "grad_norm": 0.11834020912647247, "learning_rate": 0.002, "loss": 2.3436, "step": 282980 }, { "epoch": 1.0939601985433967, "grad_norm": 0.11081720888614655, "learning_rate": 0.002, "loss": 2.3525, "step": 282990 }, { "epoch": 1.09399885574678, "grad_norm": 0.11029912531375885, "learning_rate": 0.002, "loss": 2.3283, "step": 283000 }, { "epoch": 1.0940375129501632, "grad_norm": 0.12420996278524399, "learning_rate": 0.002, "loss": 2.3203, "step": 283010 }, { "epoch": 1.0940761701535464, "grad_norm": 0.09996948391199112, "learning_rate": 0.002, "loss": 2.3366, "step": 283020 }, { "epoch": 1.0941148273569297, "grad_norm": 0.11960510909557343, "learning_rate": 0.002, "loss": 2.3371, "step": 283030 }, { "epoch": 1.094153484560313, "grad_norm": 0.10635130107402802, "learning_rate": 0.002, "loss": 2.3455, "step": 283040 }, { "epoch": 1.0941921417636962, "grad_norm": 0.14550523459911346, "learning_rate": 0.002, "loss": 2.3278, "step": 283050 }, { "epoch": 1.0942307989670796, "grad_norm": 0.09189361333847046, "learning_rate": 0.002, "loss": 2.3347, "step": 283060 }, { "epoch": 1.094269456170463, "grad_norm": 0.12127846479415894, "learning_rate": 0.002, "loss": 2.3416, "step": 283070 }, { "epoch": 1.0943081133738461, "grad_norm": 0.10634903609752655, "learning_rate": 0.002, "loss": 2.3428, "step": 283080 }, { "epoch": 1.0943467705772294, "grad_norm": 0.0875033587217331, "learning_rate": 0.002, "loss": 2.3224, "step": 283090 }, { "epoch": 1.0943854277806127, "grad_norm": 0.10889355093240738, "learning_rate": 0.002, "loss": 2.3436, "step": 283100 }, { "epoch": 1.094424084983996, "grad_norm": 0.11864157766103745, "learning_rate": 0.002, "loss": 2.3325, "step": 283110 }, { "epoch": 1.0944627421873792, "grad_norm": 0.09440959244966507, "learning_rate": 0.002, "loss": 2.3377, "step": 283120 }, { "epoch": 1.0945013993907624, "grad_norm": 0.10142505168914795, "learning_rate": 0.002, "loss": 2.3394, "step": 283130 }, { "epoch": 1.0945400565941457, "grad_norm": 0.095737025141716, "learning_rate": 0.002, "loss": 2.3398, "step": 283140 }, { "epoch": 1.0945787137975291, "grad_norm": 0.10328061878681183, "learning_rate": 0.002, "loss": 2.3403, "step": 283150 }, { "epoch": 1.0946173710009124, "grad_norm": 0.11649847775697708, "learning_rate": 0.002, "loss": 2.3332, "step": 283160 }, { "epoch": 1.0946560282042956, "grad_norm": 0.10745200514793396, "learning_rate": 0.002, "loss": 2.3288, "step": 283170 }, { "epoch": 1.094694685407679, "grad_norm": 0.10119928419589996, "learning_rate": 0.002, "loss": 2.3345, "step": 283180 }, { "epoch": 1.0947333426110621, "grad_norm": 0.102198526263237, "learning_rate": 0.002, "loss": 2.338, "step": 283190 }, { "epoch": 1.0947719998144454, "grad_norm": 0.10957236588001251, "learning_rate": 0.002, "loss": 2.3489, "step": 283200 }, { "epoch": 1.0948106570178286, "grad_norm": 0.09801110625267029, "learning_rate": 0.002, "loss": 2.3369, "step": 283210 }, { "epoch": 1.094849314221212, "grad_norm": 0.1274397224187851, "learning_rate": 0.002, "loss": 2.3429, "step": 283220 }, { "epoch": 1.0948879714245954, "grad_norm": 0.08464301377534866, "learning_rate": 0.002, "loss": 2.3491, "step": 283230 }, { "epoch": 1.0949266286279786, "grad_norm": 0.095365509390831, "learning_rate": 0.002, "loss": 2.3404, "step": 283240 }, { "epoch": 1.0949652858313619, "grad_norm": 0.09696754068136215, "learning_rate": 0.002, "loss": 2.3368, "step": 283250 }, { "epoch": 1.0950039430347451, "grad_norm": 0.10455650091171265, "learning_rate": 0.002, "loss": 2.3373, "step": 283260 }, { "epoch": 1.0950426002381284, "grad_norm": 0.08448006212711334, "learning_rate": 0.002, "loss": 2.3406, "step": 283270 }, { "epoch": 1.0950812574415116, "grad_norm": 0.10643161088228226, "learning_rate": 0.002, "loss": 2.3437, "step": 283280 }, { "epoch": 1.0951199146448949, "grad_norm": 0.1184401661157608, "learning_rate": 0.002, "loss": 2.3403, "step": 283290 }, { "epoch": 1.0951585718482781, "grad_norm": 0.10242758691310883, "learning_rate": 0.002, "loss": 2.3256, "step": 283300 }, { "epoch": 1.0951972290516614, "grad_norm": 0.09504953771829605, "learning_rate": 0.002, "loss": 2.3435, "step": 283310 }, { "epoch": 1.0952358862550449, "grad_norm": 0.0992271676659584, "learning_rate": 0.002, "loss": 2.3325, "step": 283320 }, { "epoch": 1.0952745434584281, "grad_norm": 0.0895295962691307, "learning_rate": 0.002, "loss": 2.3287, "step": 283330 }, { "epoch": 1.0953132006618114, "grad_norm": 0.09662551432847977, "learning_rate": 0.002, "loss": 2.3165, "step": 283340 }, { "epoch": 1.0953518578651946, "grad_norm": 0.10198549926280975, "learning_rate": 0.002, "loss": 2.329, "step": 283350 }, { "epoch": 1.0953905150685779, "grad_norm": 0.11558642238378525, "learning_rate": 0.002, "loss": 2.3401, "step": 283360 }, { "epoch": 1.0954291722719611, "grad_norm": 0.10037699341773987, "learning_rate": 0.002, "loss": 2.347, "step": 283370 }, { "epoch": 1.0954678294753444, "grad_norm": 0.10268152505159378, "learning_rate": 0.002, "loss": 2.3322, "step": 283380 }, { "epoch": 1.0955064866787276, "grad_norm": 0.08938860893249512, "learning_rate": 0.002, "loss": 2.3201, "step": 283390 }, { "epoch": 1.095545143882111, "grad_norm": 0.10257000476121902, "learning_rate": 0.002, "loss": 2.3305, "step": 283400 }, { "epoch": 1.0955838010854944, "grad_norm": 0.10406453162431717, "learning_rate": 0.002, "loss": 2.3381, "step": 283410 }, { "epoch": 1.0956224582888776, "grad_norm": 0.09739171713590622, "learning_rate": 0.002, "loss": 2.3128, "step": 283420 }, { "epoch": 1.0956611154922609, "grad_norm": 0.10173606127500534, "learning_rate": 0.002, "loss": 2.3301, "step": 283430 }, { "epoch": 1.0956997726956441, "grad_norm": 0.10406725853681564, "learning_rate": 0.002, "loss": 2.3345, "step": 283440 }, { "epoch": 1.0957384298990274, "grad_norm": 0.11148425191640854, "learning_rate": 0.002, "loss": 2.3362, "step": 283450 }, { "epoch": 1.0957770871024106, "grad_norm": 0.13934779167175293, "learning_rate": 0.002, "loss": 2.336, "step": 283460 }, { "epoch": 1.0958157443057939, "grad_norm": 0.11460437625646591, "learning_rate": 0.002, "loss": 2.3463, "step": 283470 }, { "epoch": 1.0958544015091771, "grad_norm": 0.10291483998298645, "learning_rate": 0.002, "loss": 2.3241, "step": 283480 }, { "epoch": 1.0958930587125606, "grad_norm": 0.1973194181919098, "learning_rate": 0.002, "loss": 2.3347, "step": 283490 }, { "epoch": 1.0959317159159438, "grad_norm": 0.09999255836009979, "learning_rate": 0.002, "loss": 2.3284, "step": 283500 }, { "epoch": 1.095970373119327, "grad_norm": 0.09152089804410934, "learning_rate": 0.002, "loss": 2.3576, "step": 283510 }, { "epoch": 1.0960090303227104, "grad_norm": 0.10186208039522171, "learning_rate": 0.002, "loss": 2.3372, "step": 283520 }, { "epoch": 1.0960476875260936, "grad_norm": 0.09846815466880798, "learning_rate": 0.002, "loss": 2.3374, "step": 283530 }, { "epoch": 1.0960863447294769, "grad_norm": 0.1407434195280075, "learning_rate": 0.002, "loss": 2.3544, "step": 283540 }, { "epoch": 1.09612500193286, "grad_norm": 0.10553040355443954, "learning_rate": 0.002, "loss": 2.3337, "step": 283550 }, { "epoch": 1.0961636591362434, "grad_norm": 0.10117257386445999, "learning_rate": 0.002, "loss": 2.3537, "step": 283560 }, { "epoch": 1.0962023163396268, "grad_norm": 0.10986392199993134, "learning_rate": 0.002, "loss": 2.3527, "step": 283570 }, { "epoch": 1.09624097354301, "grad_norm": 0.09731899946928024, "learning_rate": 0.002, "loss": 2.3192, "step": 283580 }, { "epoch": 1.0962796307463933, "grad_norm": 0.10193801671266556, "learning_rate": 0.002, "loss": 2.3275, "step": 283590 }, { "epoch": 1.0963182879497766, "grad_norm": 0.10751131922006607, "learning_rate": 0.002, "loss": 2.3397, "step": 283600 }, { "epoch": 1.0963569451531598, "grad_norm": 0.10056516528129578, "learning_rate": 0.002, "loss": 2.3245, "step": 283610 }, { "epoch": 1.096395602356543, "grad_norm": 0.08993778377771378, "learning_rate": 0.002, "loss": 2.3269, "step": 283620 }, { "epoch": 1.0964342595599263, "grad_norm": 0.1073274165391922, "learning_rate": 0.002, "loss": 2.3401, "step": 283630 }, { "epoch": 1.0964729167633096, "grad_norm": 0.13375040888786316, "learning_rate": 0.002, "loss": 2.3334, "step": 283640 }, { "epoch": 1.0965115739666929, "grad_norm": 0.11124551296234131, "learning_rate": 0.002, "loss": 2.3328, "step": 283650 }, { "epoch": 1.0965502311700763, "grad_norm": 0.09685186296701431, "learning_rate": 0.002, "loss": 2.3302, "step": 283660 }, { "epoch": 1.0965888883734596, "grad_norm": 0.10944927483797073, "learning_rate": 0.002, "loss": 2.3376, "step": 283670 }, { "epoch": 1.0966275455768428, "grad_norm": 0.12088717520236969, "learning_rate": 0.002, "loss": 2.33, "step": 283680 }, { "epoch": 1.096666202780226, "grad_norm": 0.09783034771680832, "learning_rate": 0.002, "loss": 2.3282, "step": 283690 }, { "epoch": 1.0967048599836093, "grad_norm": 0.09728636592626572, "learning_rate": 0.002, "loss": 2.3339, "step": 283700 }, { "epoch": 1.0967435171869926, "grad_norm": 0.11335409432649612, "learning_rate": 0.002, "loss": 2.3328, "step": 283710 }, { "epoch": 1.0967821743903758, "grad_norm": 0.13359227776527405, "learning_rate": 0.002, "loss": 2.3337, "step": 283720 }, { "epoch": 1.096820831593759, "grad_norm": 0.12501873075962067, "learning_rate": 0.002, "loss": 2.3351, "step": 283730 }, { "epoch": 1.0968594887971426, "grad_norm": 0.10961022973060608, "learning_rate": 0.002, "loss": 2.3396, "step": 283740 }, { "epoch": 1.0968981460005258, "grad_norm": 0.10510764271020889, "learning_rate": 0.002, "loss": 2.3354, "step": 283750 }, { "epoch": 1.096936803203909, "grad_norm": 0.26109975576400757, "learning_rate": 0.002, "loss": 2.3365, "step": 283760 }, { "epoch": 1.0969754604072923, "grad_norm": 0.1035788282752037, "learning_rate": 0.002, "loss": 2.3453, "step": 283770 }, { "epoch": 1.0970141176106756, "grad_norm": 0.14167214930057526, "learning_rate": 0.002, "loss": 2.3424, "step": 283780 }, { "epoch": 1.0970527748140588, "grad_norm": 0.09841074794530869, "learning_rate": 0.002, "loss": 2.3415, "step": 283790 }, { "epoch": 1.097091432017442, "grad_norm": 0.10851003229618073, "learning_rate": 0.002, "loss": 2.3346, "step": 283800 }, { "epoch": 1.0971300892208253, "grad_norm": 0.09611783176660538, "learning_rate": 0.002, "loss": 2.3328, "step": 283810 }, { "epoch": 1.0971687464242086, "grad_norm": 0.09295455366373062, "learning_rate": 0.002, "loss": 2.3353, "step": 283820 }, { "epoch": 1.097207403627592, "grad_norm": 0.12964671850204468, "learning_rate": 0.002, "loss": 2.336, "step": 283830 }, { "epoch": 1.0972460608309753, "grad_norm": 0.09662358462810516, "learning_rate": 0.002, "loss": 2.338, "step": 283840 }, { "epoch": 1.0972847180343586, "grad_norm": 0.10117034614086151, "learning_rate": 0.002, "loss": 2.3268, "step": 283850 }, { "epoch": 1.0973233752377418, "grad_norm": 0.10756699740886688, "learning_rate": 0.002, "loss": 2.3418, "step": 283860 }, { "epoch": 1.097362032441125, "grad_norm": 0.1286441534757614, "learning_rate": 0.002, "loss": 2.3326, "step": 283870 }, { "epoch": 1.0974006896445083, "grad_norm": 0.1278308480978012, "learning_rate": 0.002, "loss": 2.3377, "step": 283880 }, { "epoch": 1.0974393468478916, "grad_norm": 0.09650461375713348, "learning_rate": 0.002, "loss": 2.3343, "step": 283890 }, { "epoch": 1.0974780040512748, "grad_norm": 0.10499653965234756, "learning_rate": 0.002, "loss": 2.3444, "step": 283900 }, { "epoch": 1.0975166612546583, "grad_norm": 0.12353569269180298, "learning_rate": 0.002, "loss": 2.3329, "step": 283910 }, { "epoch": 1.0975553184580416, "grad_norm": 0.11407862603664398, "learning_rate": 0.002, "loss": 2.332, "step": 283920 }, { "epoch": 1.0975939756614248, "grad_norm": 0.11378373950719833, "learning_rate": 0.002, "loss": 2.337, "step": 283930 }, { "epoch": 1.097632632864808, "grad_norm": 0.09341233223676682, "learning_rate": 0.002, "loss": 2.3406, "step": 283940 }, { "epoch": 1.0976712900681913, "grad_norm": 0.1038387194275856, "learning_rate": 0.002, "loss": 2.3328, "step": 283950 }, { "epoch": 1.0977099472715746, "grad_norm": 0.10045725107192993, "learning_rate": 0.002, "loss": 2.34, "step": 283960 }, { "epoch": 1.0977486044749578, "grad_norm": 0.11936244368553162, "learning_rate": 0.002, "loss": 2.3356, "step": 283970 }, { "epoch": 1.097787261678341, "grad_norm": 0.12233182042837143, "learning_rate": 0.002, "loss": 2.3378, "step": 283980 }, { "epoch": 1.0978259188817243, "grad_norm": 0.10054139047861099, "learning_rate": 0.002, "loss": 2.3289, "step": 283990 }, { "epoch": 1.0978645760851078, "grad_norm": 0.11717696487903595, "learning_rate": 0.002, "loss": 2.3452, "step": 284000 }, { "epoch": 1.097903233288491, "grad_norm": 0.11514868587255478, "learning_rate": 0.002, "loss": 2.3435, "step": 284010 }, { "epoch": 1.0979418904918743, "grad_norm": 0.10135221481323242, "learning_rate": 0.002, "loss": 2.3422, "step": 284020 }, { "epoch": 1.0979805476952575, "grad_norm": 0.11172813177108765, "learning_rate": 0.002, "loss": 2.3289, "step": 284030 }, { "epoch": 1.0980192048986408, "grad_norm": 0.13704566657543182, "learning_rate": 0.002, "loss": 2.3297, "step": 284040 }, { "epoch": 1.098057862102024, "grad_norm": 0.1290348768234253, "learning_rate": 0.002, "loss": 2.3397, "step": 284050 }, { "epoch": 1.0980965193054073, "grad_norm": 0.10477565228939056, "learning_rate": 0.002, "loss": 2.3304, "step": 284060 }, { "epoch": 1.0981351765087906, "grad_norm": 0.0943203717470169, "learning_rate": 0.002, "loss": 2.3405, "step": 284070 }, { "epoch": 1.098173833712174, "grad_norm": 0.11145610362291336, "learning_rate": 0.002, "loss": 2.3316, "step": 284080 }, { "epoch": 1.0982124909155573, "grad_norm": 0.09936654567718506, "learning_rate": 0.002, "loss": 2.3355, "step": 284090 }, { "epoch": 1.0982511481189405, "grad_norm": 0.09357469528913498, "learning_rate": 0.002, "loss": 2.3251, "step": 284100 }, { "epoch": 1.0982898053223238, "grad_norm": 0.11341989040374756, "learning_rate": 0.002, "loss": 2.3313, "step": 284110 }, { "epoch": 1.098328462525707, "grad_norm": 0.11302851140499115, "learning_rate": 0.002, "loss": 2.338, "step": 284120 }, { "epoch": 1.0983671197290903, "grad_norm": 0.11179227381944656, "learning_rate": 0.002, "loss": 2.3402, "step": 284130 }, { "epoch": 1.0984057769324735, "grad_norm": 0.10397867113351822, "learning_rate": 0.002, "loss": 2.3326, "step": 284140 }, { "epoch": 1.0984444341358568, "grad_norm": 0.10592572391033173, "learning_rate": 0.002, "loss": 2.3366, "step": 284150 }, { "epoch": 1.09848309133924, "grad_norm": 0.1135161817073822, "learning_rate": 0.002, "loss": 2.3421, "step": 284160 }, { "epoch": 1.0985217485426235, "grad_norm": 0.11015531420707703, "learning_rate": 0.002, "loss": 2.332, "step": 284170 }, { "epoch": 1.0985604057460068, "grad_norm": 0.1006748378276825, "learning_rate": 0.002, "loss": 2.3221, "step": 284180 }, { "epoch": 1.09859906294939, "grad_norm": 0.10288757085800171, "learning_rate": 0.002, "loss": 2.3292, "step": 284190 }, { "epoch": 1.0986377201527733, "grad_norm": 0.11791105568408966, "learning_rate": 0.002, "loss": 2.3515, "step": 284200 }, { "epoch": 1.0986763773561565, "grad_norm": 0.1182391345500946, "learning_rate": 0.002, "loss": 2.3557, "step": 284210 }, { "epoch": 1.0987150345595398, "grad_norm": 0.10134794563055038, "learning_rate": 0.002, "loss": 2.3477, "step": 284220 }, { "epoch": 1.098753691762923, "grad_norm": 0.10538896173238754, "learning_rate": 0.002, "loss": 2.3335, "step": 284230 }, { "epoch": 1.0987923489663063, "grad_norm": 0.11533650755882263, "learning_rate": 0.002, "loss": 2.3355, "step": 284240 }, { "epoch": 1.0988310061696898, "grad_norm": 0.10452184826135635, "learning_rate": 0.002, "loss": 2.3429, "step": 284250 }, { "epoch": 1.098869663373073, "grad_norm": 0.10536464303731918, "learning_rate": 0.002, "loss": 2.3473, "step": 284260 }, { "epoch": 1.0989083205764563, "grad_norm": 0.09663750976324081, "learning_rate": 0.002, "loss": 2.3162, "step": 284270 }, { "epoch": 1.0989469777798395, "grad_norm": 0.10678671300411224, "learning_rate": 0.002, "loss": 2.3359, "step": 284280 }, { "epoch": 1.0989856349832228, "grad_norm": 0.11602350324392319, "learning_rate": 0.002, "loss": 2.3327, "step": 284290 }, { "epoch": 1.099024292186606, "grad_norm": 0.09180068224668503, "learning_rate": 0.002, "loss": 2.3376, "step": 284300 }, { "epoch": 1.0990629493899893, "grad_norm": 0.1271265298128128, "learning_rate": 0.002, "loss": 2.3472, "step": 284310 }, { "epoch": 1.0991016065933725, "grad_norm": 0.12973609566688538, "learning_rate": 0.002, "loss": 2.3151, "step": 284320 }, { "epoch": 1.0991402637967558, "grad_norm": 0.10404635965824127, "learning_rate": 0.002, "loss": 2.3421, "step": 284330 }, { "epoch": 1.0991789210001393, "grad_norm": 0.10287823528051376, "learning_rate": 0.002, "loss": 2.3403, "step": 284340 }, { "epoch": 1.0992175782035225, "grad_norm": 0.11792373657226562, "learning_rate": 0.002, "loss": 2.3142, "step": 284350 }, { "epoch": 1.0992562354069058, "grad_norm": 0.10006173700094223, "learning_rate": 0.002, "loss": 2.3161, "step": 284360 }, { "epoch": 1.099294892610289, "grad_norm": 0.10750042647123337, "learning_rate": 0.002, "loss": 2.3454, "step": 284370 }, { "epoch": 1.0993335498136723, "grad_norm": 0.10897422581911087, "learning_rate": 0.002, "loss": 2.3271, "step": 284380 }, { "epoch": 1.0993722070170555, "grad_norm": 0.11446550488471985, "learning_rate": 0.002, "loss": 2.3456, "step": 284390 }, { "epoch": 1.0994108642204388, "grad_norm": 0.10375721007585526, "learning_rate": 0.002, "loss": 2.3374, "step": 284400 }, { "epoch": 1.099449521423822, "grad_norm": 0.10286027938127518, "learning_rate": 0.002, "loss": 2.3448, "step": 284410 }, { "epoch": 1.0994881786272055, "grad_norm": 0.09958858788013458, "learning_rate": 0.002, "loss": 2.3166, "step": 284420 }, { "epoch": 1.0995268358305887, "grad_norm": 0.11231593787670135, "learning_rate": 0.002, "loss": 2.3249, "step": 284430 }, { "epoch": 1.099565493033972, "grad_norm": 0.0965258777141571, "learning_rate": 0.002, "loss": 2.3151, "step": 284440 }, { "epoch": 1.0996041502373552, "grad_norm": 0.12888574600219727, "learning_rate": 0.002, "loss": 2.3443, "step": 284450 }, { "epoch": 1.0996428074407385, "grad_norm": 0.10070165991783142, "learning_rate": 0.002, "loss": 2.3403, "step": 284460 }, { "epoch": 1.0996814646441218, "grad_norm": 0.10629080981016159, "learning_rate": 0.002, "loss": 2.3284, "step": 284470 }, { "epoch": 1.099720121847505, "grad_norm": 0.10859460383653641, "learning_rate": 0.002, "loss": 2.3381, "step": 284480 }, { "epoch": 1.0997587790508883, "grad_norm": 0.1041555181145668, "learning_rate": 0.002, "loss": 2.3354, "step": 284490 }, { "epoch": 1.0997974362542715, "grad_norm": 0.1404908150434494, "learning_rate": 0.002, "loss": 2.3445, "step": 284500 }, { "epoch": 1.099836093457655, "grad_norm": 0.10661852359771729, "learning_rate": 0.002, "loss": 2.3554, "step": 284510 }, { "epoch": 1.0998747506610382, "grad_norm": 0.09594159573316574, "learning_rate": 0.002, "loss": 2.3383, "step": 284520 }, { "epoch": 1.0999134078644215, "grad_norm": 0.10165949165821075, "learning_rate": 0.002, "loss": 2.3255, "step": 284530 }, { "epoch": 1.0999520650678047, "grad_norm": 0.10713981091976166, "learning_rate": 0.002, "loss": 2.3405, "step": 284540 }, { "epoch": 1.099990722271188, "grad_norm": 0.1109931617975235, "learning_rate": 0.002, "loss": 2.3176, "step": 284550 }, { "epoch": 1.1000293794745712, "grad_norm": 0.09798242896795273, "learning_rate": 0.002, "loss": 2.3504, "step": 284560 }, { "epoch": 1.1000680366779545, "grad_norm": 0.11462962627410889, "learning_rate": 0.002, "loss": 2.328, "step": 284570 }, { "epoch": 1.1001066938813377, "grad_norm": 0.10036194324493408, "learning_rate": 0.002, "loss": 2.3325, "step": 284580 }, { "epoch": 1.1001453510847212, "grad_norm": 0.0940588042140007, "learning_rate": 0.002, "loss": 2.3331, "step": 284590 }, { "epoch": 1.1001840082881045, "grad_norm": 0.10205405950546265, "learning_rate": 0.002, "loss": 2.327, "step": 284600 }, { "epoch": 1.1002226654914877, "grad_norm": 0.09708960354328156, "learning_rate": 0.002, "loss": 2.3313, "step": 284610 }, { "epoch": 1.100261322694871, "grad_norm": 0.09543051570653915, "learning_rate": 0.002, "loss": 2.3504, "step": 284620 }, { "epoch": 1.1002999798982542, "grad_norm": 0.11247290670871735, "learning_rate": 0.002, "loss": 2.3444, "step": 284630 }, { "epoch": 1.1003386371016375, "grad_norm": 0.10105713456869125, "learning_rate": 0.002, "loss": 2.3336, "step": 284640 }, { "epoch": 1.1003772943050207, "grad_norm": 0.09401721507310867, "learning_rate": 0.002, "loss": 2.3197, "step": 284650 }, { "epoch": 1.100415951508404, "grad_norm": 0.11368951946496964, "learning_rate": 0.002, "loss": 2.336, "step": 284660 }, { "epoch": 1.1004546087117872, "grad_norm": 0.09477970004081726, "learning_rate": 0.002, "loss": 2.3406, "step": 284670 }, { "epoch": 1.1004932659151707, "grad_norm": 0.10316511243581772, "learning_rate": 0.002, "loss": 2.3148, "step": 284680 }, { "epoch": 1.100531923118554, "grad_norm": 0.09212981164455414, "learning_rate": 0.002, "loss": 2.3316, "step": 284690 }, { "epoch": 1.1005705803219372, "grad_norm": 0.106972336769104, "learning_rate": 0.002, "loss": 2.3459, "step": 284700 }, { "epoch": 1.1006092375253205, "grad_norm": 0.093800850212574, "learning_rate": 0.002, "loss": 2.3475, "step": 284710 }, { "epoch": 1.1006478947287037, "grad_norm": 0.1049603745341301, "learning_rate": 0.002, "loss": 2.3486, "step": 284720 }, { "epoch": 1.100686551932087, "grad_norm": 0.09647928178310394, "learning_rate": 0.002, "loss": 2.3274, "step": 284730 }, { "epoch": 1.1007252091354702, "grad_norm": 0.10955628752708435, "learning_rate": 0.002, "loss": 2.3314, "step": 284740 }, { "epoch": 1.1007638663388535, "grad_norm": 0.10220403969287872, "learning_rate": 0.002, "loss": 2.3183, "step": 284750 }, { "epoch": 1.100802523542237, "grad_norm": 0.09642922133207321, "learning_rate": 0.002, "loss": 2.3429, "step": 284760 }, { "epoch": 1.1008411807456202, "grad_norm": 0.10072393715381622, "learning_rate": 0.002, "loss": 2.3276, "step": 284770 }, { "epoch": 1.1008798379490035, "grad_norm": 0.11426430195569992, "learning_rate": 0.002, "loss": 2.3381, "step": 284780 }, { "epoch": 1.1009184951523867, "grad_norm": 0.10772670805454254, "learning_rate": 0.002, "loss": 2.3251, "step": 284790 }, { "epoch": 1.10095715235577, "grad_norm": 0.14087091386318207, "learning_rate": 0.002, "loss": 2.3239, "step": 284800 }, { "epoch": 1.1009958095591532, "grad_norm": 0.10744642466306686, "learning_rate": 0.002, "loss": 2.3341, "step": 284810 }, { "epoch": 1.1010344667625365, "grad_norm": 0.09577398002147675, "learning_rate": 0.002, "loss": 2.3266, "step": 284820 }, { "epoch": 1.1010731239659197, "grad_norm": 0.09843907505273819, "learning_rate": 0.002, "loss": 2.3364, "step": 284830 }, { "epoch": 1.101111781169303, "grad_norm": 0.10781969875097275, "learning_rate": 0.002, "loss": 2.339, "step": 284840 }, { "epoch": 1.1011504383726864, "grad_norm": 0.10822036117315292, "learning_rate": 0.002, "loss": 2.3296, "step": 284850 }, { "epoch": 1.1011890955760697, "grad_norm": 0.11118780076503754, "learning_rate": 0.002, "loss": 2.3328, "step": 284860 }, { "epoch": 1.101227752779453, "grad_norm": 0.13004349172115326, "learning_rate": 0.002, "loss": 2.3407, "step": 284870 }, { "epoch": 1.1012664099828362, "grad_norm": 0.1018972173333168, "learning_rate": 0.002, "loss": 2.327, "step": 284880 }, { "epoch": 1.1013050671862195, "grad_norm": 0.1026332899928093, "learning_rate": 0.002, "loss": 2.34, "step": 284890 }, { "epoch": 1.1013437243896027, "grad_norm": 0.09754814952611923, "learning_rate": 0.002, "loss": 2.3382, "step": 284900 }, { "epoch": 1.101382381592986, "grad_norm": 0.11591901630163193, "learning_rate": 0.002, "loss": 2.3452, "step": 284910 }, { "epoch": 1.1014210387963692, "grad_norm": 0.1105770617723465, "learning_rate": 0.002, "loss": 2.3248, "step": 284920 }, { "epoch": 1.1014596959997527, "grad_norm": 0.11102866381406784, "learning_rate": 0.002, "loss": 2.3354, "step": 284930 }, { "epoch": 1.101498353203136, "grad_norm": 0.10752061009407043, "learning_rate": 0.002, "loss": 2.3327, "step": 284940 }, { "epoch": 1.1015370104065192, "grad_norm": 0.11592989414930344, "learning_rate": 0.002, "loss": 2.3382, "step": 284950 }, { "epoch": 1.1015756676099024, "grad_norm": 0.09817852824926376, "learning_rate": 0.002, "loss": 2.3369, "step": 284960 }, { "epoch": 1.1016143248132857, "grad_norm": 0.1352873593568802, "learning_rate": 0.002, "loss": 2.3436, "step": 284970 }, { "epoch": 1.101652982016669, "grad_norm": 0.10397396981716156, "learning_rate": 0.002, "loss": 2.3299, "step": 284980 }, { "epoch": 1.1016916392200522, "grad_norm": 0.1036774218082428, "learning_rate": 0.002, "loss": 2.3364, "step": 284990 }, { "epoch": 1.1017302964234355, "grad_norm": 0.10826187580823898, "learning_rate": 0.002, "loss": 2.338, "step": 285000 }, { "epoch": 1.101768953626819, "grad_norm": 0.11108260601758957, "learning_rate": 0.002, "loss": 2.3443, "step": 285010 }, { "epoch": 1.1018076108302022, "grad_norm": 0.10326624661684036, "learning_rate": 0.002, "loss": 2.3378, "step": 285020 }, { "epoch": 1.1018462680335854, "grad_norm": 0.11157377809286118, "learning_rate": 0.002, "loss": 2.3234, "step": 285030 }, { "epoch": 1.1018849252369687, "grad_norm": 0.10896344482898712, "learning_rate": 0.002, "loss": 2.3217, "step": 285040 }, { "epoch": 1.101923582440352, "grad_norm": 0.11324446648359299, "learning_rate": 0.002, "loss": 2.3463, "step": 285050 }, { "epoch": 1.1019622396437352, "grad_norm": 0.11675186455249786, "learning_rate": 0.002, "loss": 2.3442, "step": 285060 }, { "epoch": 1.1020008968471184, "grad_norm": 0.40514060854911804, "learning_rate": 0.002, "loss": 2.3221, "step": 285070 }, { "epoch": 1.1020395540505017, "grad_norm": 0.10736236721277237, "learning_rate": 0.002, "loss": 2.3319, "step": 285080 }, { "epoch": 1.1020782112538852, "grad_norm": 0.1119396984577179, "learning_rate": 0.002, "loss": 2.3311, "step": 285090 }, { "epoch": 1.1021168684572684, "grad_norm": 0.10223685950040817, "learning_rate": 0.002, "loss": 2.3388, "step": 285100 }, { "epoch": 1.1021555256606517, "grad_norm": 0.11727441847324371, "learning_rate": 0.002, "loss": 2.3331, "step": 285110 }, { "epoch": 1.102194182864035, "grad_norm": 0.10487577319145203, "learning_rate": 0.002, "loss": 2.3423, "step": 285120 }, { "epoch": 1.1022328400674182, "grad_norm": 0.10361069440841675, "learning_rate": 0.002, "loss": 2.3246, "step": 285130 }, { "epoch": 1.1022714972708014, "grad_norm": 0.1039617732167244, "learning_rate": 0.002, "loss": 2.3436, "step": 285140 }, { "epoch": 1.1023101544741847, "grad_norm": 0.1025695726275444, "learning_rate": 0.002, "loss": 2.3262, "step": 285150 }, { "epoch": 1.102348811677568, "grad_norm": 0.13202959299087524, "learning_rate": 0.002, "loss": 2.3407, "step": 285160 }, { "epoch": 1.1023874688809512, "grad_norm": 0.10830264538526535, "learning_rate": 0.002, "loss": 2.3441, "step": 285170 }, { "epoch": 1.1024261260843347, "grad_norm": 0.08623657375574112, "learning_rate": 0.002, "loss": 2.3426, "step": 285180 }, { "epoch": 1.102464783287718, "grad_norm": 0.1080300509929657, "learning_rate": 0.002, "loss": 2.3457, "step": 285190 }, { "epoch": 1.1025034404911012, "grad_norm": 0.10596869140863419, "learning_rate": 0.002, "loss": 2.3176, "step": 285200 }, { "epoch": 1.1025420976944844, "grad_norm": 0.1066625639796257, "learning_rate": 0.002, "loss": 2.3485, "step": 285210 }, { "epoch": 1.1025807548978677, "grad_norm": 0.11383505910634995, "learning_rate": 0.002, "loss": 2.3261, "step": 285220 }, { "epoch": 1.102619412101251, "grad_norm": 0.148569256067276, "learning_rate": 0.002, "loss": 2.3301, "step": 285230 }, { "epoch": 1.1026580693046342, "grad_norm": 0.1070183590054512, "learning_rate": 0.002, "loss": 2.3387, "step": 285240 }, { "epoch": 1.1026967265080174, "grad_norm": 0.1264624297618866, "learning_rate": 0.002, "loss": 2.3316, "step": 285250 }, { "epoch": 1.102735383711401, "grad_norm": 0.10144893825054169, "learning_rate": 0.002, "loss": 2.3272, "step": 285260 }, { "epoch": 1.1027740409147841, "grad_norm": 0.11170484870672226, "learning_rate": 0.002, "loss": 2.3306, "step": 285270 }, { "epoch": 1.1028126981181674, "grad_norm": 0.10286292433738708, "learning_rate": 0.002, "loss": 2.3394, "step": 285280 }, { "epoch": 1.1028513553215507, "grad_norm": 0.10856574028730392, "learning_rate": 0.002, "loss": 2.3579, "step": 285290 }, { "epoch": 1.102890012524934, "grad_norm": 0.09397318214178085, "learning_rate": 0.002, "loss": 2.3483, "step": 285300 }, { "epoch": 1.1029286697283172, "grad_norm": 0.133082315325737, "learning_rate": 0.002, "loss": 2.3402, "step": 285310 }, { "epoch": 1.1029673269317004, "grad_norm": 0.1034841313958168, "learning_rate": 0.002, "loss": 2.3167, "step": 285320 }, { "epoch": 1.1030059841350837, "grad_norm": 0.10122717171907425, "learning_rate": 0.002, "loss": 2.333, "step": 285330 }, { "epoch": 1.103044641338467, "grad_norm": 0.09922084212303162, "learning_rate": 0.002, "loss": 2.3383, "step": 285340 }, { "epoch": 1.1030832985418504, "grad_norm": 0.10910443961620331, "learning_rate": 0.002, "loss": 2.3367, "step": 285350 }, { "epoch": 1.1031219557452336, "grad_norm": 0.13533568382263184, "learning_rate": 0.002, "loss": 2.3397, "step": 285360 }, { "epoch": 1.103160612948617, "grad_norm": 0.09651536494493484, "learning_rate": 0.002, "loss": 2.3365, "step": 285370 }, { "epoch": 1.1031992701520001, "grad_norm": 0.0913858413696289, "learning_rate": 0.002, "loss": 2.3367, "step": 285380 }, { "epoch": 1.1032379273553834, "grad_norm": 0.13411878049373627, "learning_rate": 0.002, "loss": 2.3332, "step": 285390 }, { "epoch": 1.1032765845587666, "grad_norm": 0.10474178194999695, "learning_rate": 0.002, "loss": 2.3399, "step": 285400 }, { "epoch": 1.10331524176215, "grad_norm": 0.11313706636428833, "learning_rate": 0.002, "loss": 2.3332, "step": 285410 }, { "epoch": 1.1033538989655332, "grad_norm": 0.11625546962022781, "learning_rate": 0.002, "loss": 2.3273, "step": 285420 }, { "epoch": 1.1033925561689166, "grad_norm": 0.10761766880750656, "learning_rate": 0.002, "loss": 2.3382, "step": 285430 }, { "epoch": 1.1034312133722999, "grad_norm": 0.1149444431066513, "learning_rate": 0.002, "loss": 2.3322, "step": 285440 }, { "epoch": 1.1034698705756831, "grad_norm": 0.10345727950334549, "learning_rate": 0.002, "loss": 2.33, "step": 285450 }, { "epoch": 1.1035085277790664, "grad_norm": 0.10542980581521988, "learning_rate": 0.002, "loss": 2.3331, "step": 285460 }, { "epoch": 1.1035471849824496, "grad_norm": 0.12415088713169098, "learning_rate": 0.002, "loss": 2.3355, "step": 285470 }, { "epoch": 1.1035858421858329, "grad_norm": 0.09921295195817947, "learning_rate": 0.002, "loss": 2.3177, "step": 285480 }, { "epoch": 1.1036244993892161, "grad_norm": 0.10105586051940918, "learning_rate": 0.002, "loss": 2.3357, "step": 285490 }, { "epoch": 1.1036631565925994, "grad_norm": 0.13473346829414368, "learning_rate": 0.002, "loss": 2.3202, "step": 285500 }, { "epoch": 1.1037018137959826, "grad_norm": 0.11584562808275223, "learning_rate": 0.002, "loss": 2.3387, "step": 285510 }, { "epoch": 1.1037404709993661, "grad_norm": 0.09862792491912842, "learning_rate": 0.002, "loss": 2.3301, "step": 285520 }, { "epoch": 1.1037791282027494, "grad_norm": 0.08858393132686615, "learning_rate": 0.002, "loss": 2.3379, "step": 285530 }, { "epoch": 1.1038177854061326, "grad_norm": 0.1095161959528923, "learning_rate": 0.002, "loss": 2.3322, "step": 285540 }, { "epoch": 1.1038564426095159, "grad_norm": 0.10374027490615845, "learning_rate": 0.002, "loss": 2.3293, "step": 285550 }, { "epoch": 1.1038950998128991, "grad_norm": 0.10438241809606552, "learning_rate": 0.002, "loss": 2.3444, "step": 285560 }, { "epoch": 1.1039337570162824, "grad_norm": 0.09749176353216171, "learning_rate": 0.002, "loss": 2.3451, "step": 285570 }, { "epoch": 1.1039724142196656, "grad_norm": 0.11155575513839722, "learning_rate": 0.002, "loss": 2.3409, "step": 285580 }, { "epoch": 1.1040110714230489, "grad_norm": 0.13335822522640228, "learning_rate": 0.002, "loss": 2.344, "step": 285590 }, { "epoch": 1.1040497286264324, "grad_norm": 0.08708234131336212, "learning_rate": 0.002, "loss": 2.3355, "step": 285600 }, { "epoch": 1.1040883858298156, "grad_norm": 0.09702277183532715, "learning_rate": 0.002, "loss": 2.3376, "step": 285610 }, { "epoch": 1.1041270430331989, "grad_norm": 0.10341376811265945, "learning_rate": 0.002, "loss": 2.3408, "step": 285620 }, { "epoch": 1.1041657002365821, "grad_norm": 0.09655317664146423, "learning_rate": 0.002, "loss": 2.3283, "step": 285630 }, { "epoch": 1.1042043574399654, "grad_norm": 0.09124155342578888, "learning_rate": 0.002, "loss": 2.3403, "step": 285640 }, { "epoch": 1.1042430146433486, "grad_norm": 0.10956746339797974, "learning_rate": 0.002, "loss": 2.3363, "step": 285650 }, { "epoch": 1.1042816718467319, "grad_norm": 0.10166729986667633, "learning_rate": 0.002, "loss": 2.3429, "step": 285660 }, { "epoch": 1.1043203290501151, "grad_norm": 0.14822007715702057, "learning_rate": 0.002, "loss": 2.323, "step": 285670 }, { "epoch": 1.1043589862534984, "grad_norm": 0.09753800928592682, "learning_rate": 0.002, "loss": 2.342, "step": 285680 }, { "epoch": 1.1043976434568819, "grad_norm": 0.10967541486024857, "learning_rate": 0.002, "loss": 2.3224, "step": 285690 }, { "epoch": 1.104436300660265, "grad_norm": 0.10217584669589996, "learning_rate": 0.002, "loss": 2.3345, "step": 285700 }, { "epoch": 1.1044749578636484, "grad_norm": 0.10899412631988525, "learning_rate": 0.002, "loss": 2.3341, "step": 285710 }, { "epoch": 1.1045136150670316, "grad_norm": 0.0949438065290451, "learning_rate": 0.002, "loss": 2.3448, "step": 285720 }, { "epoch": 1.1045522722704149, "grad_norm": 0.10611841827630997, "learning_rate": 0.002, "loss": 2.3279, "step": 285730 }, { "epoch": 1.104590929473798, "grad_norm": 0.1734570562839508, "learning_rate": 0.002, "loss": 2.3287, "step": 285740 }, { "epoch": 1.1046295866771814, "grad_norm": 0.12010052055120468, "learning_rate": 0.002, "loss": 2.3229, "step": 285750 }, { "epoch": 1.1046682438805646, "grad_norm": 0.10046463459730148, "learning_rate": 0.002, "loss": 2.3199, "step": 285760 }, { "epoch": 1.104706901083948, "grad_norm": 0.14872701466083527, "learning_rate": 0.002, "loss": 2.3407, "step": 285770 }, { "epoch": 1.1047455582873313, "grad_norm": 0.09705018252134323, "learning_rate": 0.002, "loss": 2.3431, "step": 285780 }, { "epoch": 1.1047842154907146, "grad_norm": 0.11013457924127579, "learning_rate": 0.002, "loss": 2.329, "step": 285790 }, { "epoch": 1.1048228726940978, "grad_norm": 0.09360580146312714, "learning_rate": 0.002, "loss": 2.329, "step": 285800 }, { "epoch": 1.104861529897481, "grad_norm": 0.09901615977287292, "learning_rate": 0.002, "loss": 2.3291, "step": 285810 }, { "epoch": 1.1049001871008644, "grad_norm": 0.09901581704616547, "learning_rate": 0.002, "loss": 2.3382, "step": 285820 }, { "epoch": 1.1049388443042476, "grad_norm": 0.11049401015043259, "learning_rate": 0.002, "loss": 2.3185, "step": 285830 }, { "epoch": 1.1049775015076309, "grad_norm": 0.09339258819818497, "learning_rate": 0.002, "loss": 2.334, "step": 285840 }, { "epoch": 1.105016158711014, "grad_norm": 0.08979971706867218, "learning_rate": 0.002, "loss": 2.3576, "step": 285850 }, { "epoch": 1.1050548159143976, "grad_norm": 0.0979980006814003, "learning_rate": 0.002, "loss": 2.3347, "step": 285860 }, { "epoch": 1.1050934731177808, "grad_norm": 0.11645156890153885, "learning_rate": 0.002, "loss": 2.3387, "step": 285870 }, { "epoch": 1.105132130321164, "grad_norm": 0.10296425223350525, "learning_rate": 0.002, "loss": 2.3416, "step": 285880 }, { "epoch": 1.1051707875245473, "grad_norm": 0.10710039734840393, "learning_rate": 0.002, "loss": 2.3444, "step": 285890 }, { "epoch": 1.1052094447279306, "grad_norm": 0.10021751374006271, "learning_rate": 0.002, "loss": 2.3365, "step": 285900 }, { "epoch": 1.1052481019313138, "grad_norm": 0.12587647140026093, "learning_rate": 0.002, "loss": 2.3378, "step": 285910 }, { "epoch": 1.105286759134697, "grad_norm": 0.10164447128772736, "learning_rate": 0.002, "loss": 2.3329, "step": 285920 }, { "epoch": 1.1053254163380803, "grad_norm": 0.094952292740345, "learning_rate": 0.002, "loss": 2.3279, "step": 285930 }, { "epoch": 1.1053640735414638, "grad_norm": 0.12019840627908707, "learning_rate": 0.002, "loss": 2.3437, "step": 285940 }, { "epoch": 1.105402730744847, "grad_norm": 0.12803016602993011, "learning_rate": 0.002, "loss": 2.33, "step": 285950 }, { "epoch": 1.1054413879482303, "grad_norm": 0.11855556070804596, "learning_rate": 0.002, "loss": 2.3358, "step": 285960 }, { "epoch": 1.1054800451516136, "grad_norm": 0.0914088562130928, "learning_rate": 0.002, "loss": 2.3454, "step": 285970 }, { "epoch": 1.1055187023549968, "grad_norm": 0.10145406424999237, "learning_rate": 0.002, "loss": 2.3306, "step": 285980 }, { "epoch": 1.10555735955838, "grad_norm": 0.1333356648683548, "learning_rate": 0.002, "loss": 2.32, "step": 285990 }, { "epoch": 1.1055960167617633, "grad_norm": 0.12203472852706909, "learning_rate": 0.002, "loss": 2.3367, "step": 286000 }, { "epoch": 1.1056346739651466, "grad_norm": 0.11965616047382355, "learning_rate": 0.002, "loss": 2.3295, "step": 286010 }, { "epoch": 1.1056733311685298, "grad_norm": 0.09209474176168442, "learning_rate": 0.002, "loss": 2.3407, "step": 286020 }, { "epoch": 1.1057119883719133, "grad_norm": 0.0947568342089653, "learning_rate": 0.002, "loss": 2.3324, "step": 286030 }, { "epoch": 1.1057506455752966, "grad_norm": 0.11331100016832352, "learning_rate": 0.002, "loss": 2.3321, "step": 286040 }, { "epoch": 1.1057893027786798, "grad_norm": 0.09801577776670456, "learning_rate": 0.002, "loss": 2.3308, "step": 286050 }, { "epoch": 1.105827959982063, "grad_norm": 0.09460748732089996, "learning_rate": 0.002, "loss": 2.3275, "step": 286060 }, { "epoch": 1.1058666171854463, "grad_norm": 0.10627475380897522, "learning_rate": 0.002, "loss": 2.3277, "step": 286070 }, { "epoch": 1.1059052743888296, "grad_norm": 0.10834797471761703, "learning_rate": 0.002, "loss": 2.3345, "step": 286080 }, { "epoch": 1.1059439315922128, "grad_norm": 0.1028374433517456, "learning_rate": 0.002, "loss": 2.3294, "step": 286090 }, { "epoch": 1.105982588795596, "grad_norm": 0.130677729845047, "learning_rate": 0.002, "loss": 2.3419, "step": 286100 }, { "epoch": 1.1060212459989796, "grad_norm": 0.09175800532102585, "learning_rate": 0.002, "loss": 2.3228, "step": 286110 }, { "epoch": 1.1060599032023628, "grad_norm": 0.1281922161579132, "learning_rate": 0.002, "loss": 2.3406, "step": 286120 }, { "epoch": 1.106098560405746, "grad_norm": 0.10538359731435776, "learning_rate": 0.002, "loss": 2.3538, "step": 286130 }, { "epoch": 1.1061372176091293, "grad_norm": 0.1097087562084198, "learning_rate": 0.002, "loss": 2.3411, "step": 286140 }, { "epoch": 1.1061758748125126, "grad_norm": 0.09895108640193939, "learning_rate": 0.002, "loss": 2.3361, "step": 286150 }, { "epoch": 1.1062145320158958, "grad_norm": 0.11682061851024628, "learning_rate": 0.002, "loss": 2.3253, "step": 286160 }, { "epoch": 1.106253189219279, "grad_norm": 0.09236068278551102, "learning_rate": 0.002, "loss": 2.3283, "step": 286170 }, { "epoch": 1.1062918464226623, "grad_norm": 0.1044696792960167, "learning_rate": 0.002, "loss": 2.3382, "step": 286180 }, { "epoch": 1.1063305036260456, "grad_norm": 0.10135937482118607, "learning_rate": 0.002, "loss": 2.3326, "step": 286190 }, { "epoch": 1.106369160829429, "grad_norm": 0.09955133497714996, "learning_rate": 0.002, "loss": 2.3399, "step": 286200 }, { "epoch": 1.1064078180328123, "grad_norm": 0.11692699790000916, "learning_rate": 0.002, "loss": 2.3371, "step": 286210 }, { "epoch": 1.1064464752361955, "grad_norm": 0.0946819931268692, "learning_rate": 0.002, "loss": 2.3399, "step": 286220 }, { "epoch": 1.1064851324395788, "grad_norm": 0.10226627439260483, "learning_rate": 0.002, "loss": 2.3444, "step": 286230 }, { "epoch": 1.106523789642962, "grad_norm": 0.10247496515512466, "learning_rate": 0.002, "loss": 2.3374, "step": 286240 }, { "epoch": 1.1065624468463453, "grad_norm": 0.10509783029556274, "learning_rate": 0.002, "loss": 2.3266, "step": 286250 }, { "epoch": 1.1066011040497286, "grad_norm": 0.11241637170314789, "learning_rate": 0.002, "loss": 2.3329, "step": 286260 }, { "epoch": 1.1066397612531118, "grad_norm": 0.16422449052333832, "learning_rate": 0.002, "loss": 2.3329, "step": 286270 }, { "epoch": 1.1066784184564953, "grad_norm": 0.11735934764146805, "learning_rate": 0.002, "loss": 2.3512, "step": 286280 }, { "epoch": 1.1067170756598785, "grad_norm": 0.09991582483053207, "learning_rate": 0.002, "loss": 2.3182, "step": 286290 }, { "epoch": 1.1067557328632618, "grad_norm": 0.09954442828893661, "learning_rate": 0.002, "loss": 2.326, "step": 286300 }, { "epoch": 1.106794390066645, "grad_norm": 0.10756408423185349, "learning_rate": 0.002, "loss": 2.3304, "step": 286310 }, { "epoch": 1.1068330472700283, "grad_norm": 0.10762269049882889, "learning_rate": 0.002, "loss": 2.3455, "step": 286320 }, { "epoch": 1.1068717044734115, "grad_norm": 0.1275537610054016, "learning_rate": 0.002, "loss": 2.3364, "step": 286330 }, { "epoch": 1.1069103616767948, "grad_norm": 0.10587187111377716, "learning_rate": 0.002, "loss": 2.3102, "step": 286340 }, { "epoch": 1.106949018880178, "grad_norm": 0.10666114091873169, "learning_rate": 0.002, "loss": 2.3202, "step": 286350 }, { "epoch": 1.1069876760835613, "grad_norm": 0.11461252719163895, "learning_rate": 0.002, "loss": 2.3375, "step": 286360 }, { "epoch": 1.1070263332869448, "grad_norm": 0.13306303322315216, "learning_rate": 0.002, "loss": 2.3397, "step": 286370 }, { "epoch": 1.107064990490328, "grad_norm": 0.10906513780355453, "learning_rate": 0.002, "loss": 2.3535, "step": 286380 }, { "epoch": 1.1071036476937113, "grad_norm": 0.10084366053342819, "learning_rate": 0.002, "loss": 2.339, "step": 286390 }, { "epoch": 1.1071423048970945, "grad_norm": 0.1198674812912941, "learning_rate": 0.002, "loss": 2.3278, "step": 286400 }, { "epoch": 1.1071809621004778, "grad_norm": 0.09597591310739517, "learning_rate": 0.002, "loss": 2.334, "step": 286410 }, { "epoch": 1.107219619303861, "grad_norm": 0.12501464784145355, "learning_rate": 0.002, "loss": 2.3481, "step": 286420 }, { "epoch": 1.1072582765072443, "grad_norm": 0.10793590545654297, "learning_rate": 0.002, "loss": 2.3519, "step": 286430 }, { "epoch": 1.1072969337106275, "grad_norm": 0.09429333359003067, "learning_rate": 0.002, "loss": 2.328, "step": 286440 }, { "epoch": 1.107335590914011, "grad_norm": 0.09729223698377609, "learning_rate": 0.002, "loss": 2.3374, "step": 286450 }, { "epoch": 1.1073742481173943, "grad_norm": 0.09174039214849472, "learning_rate": 0.002, "loss": 2.3372, "step": 286460 }, { "epoch": 1.1074129053207775, "grad_norm": 0.10584468394517899, "learning_rate": 0.002, "loss": 2.3458, "step": 286470 }, { "epoch": 1.1074515625241608, "grad_norm": 0.10441362112760544, "learning_rate": 0.002, "loss": 2.3192, "step": 286480 }, { "epoch": 1.107490219727544, "grad_norm": 0.09315589815378189, "learning_rate": 0.002, "loss": 2.3401, "step": 286490 }, { "epoch": 1.1075288769309273, "grad_norm": 0.08834546059370041, "learning_rate": 0.002, "loss": 2.3247, "step": 286500 }, { "epoch": 1.1075675341343105, "grad_norm": 0.10272207856178284, "learning_rate": 0.002, "loss": 2.3236, "step": 286510 }, { "epoch": 1.1076061913376938, "grad_norm": 0.15574143826961517, "learning_rate": 0.002, "loss": 2.3308, "step": 286520 }, { "epoch": 1.107644848541077, "grad_norm": 0.09827958792448044, "learning_rate": 0.002, "loss": 2.3434, "step": 286530 }, { "epoch": 1.1076835057444605, "grad_norm": 0.10926888883113861, "learning_rate": 0.002, "loss": 2.3285, "step": 286540 }, { "epoch": 1.1077221629478438, "grad_norm": 0.10553573817014694, "learning_rate": 0.002, "loss": 2.3188, "step": 286550 }, { "epoch": 1.107760820151227, "grad_norm": 0.1055358499288559, "learning_rate": 0.002, "loss": 2.3279, "step": 286560 }, { "epoch": 1.1077994773546103, "grad_norm": 0.11329042911529541, "learning_rate": 0.002, "loss": 2.3422, "step": 286570 }, { "epoch": 1.1078381345579935, "grad_norm": 0.09704306721687317, "learning_rate": 0.002, "loss": 2.3355, "step": 286580 }, { "epoch": 1.1078767917613768, "grad_norm": 0.10216046124696732, "learning_rate": 0.002, "loss": 2.3249, "step": 286590 }, { "epoch": 1.10791544896476, "grad_norm": 0.10295818001031876, "learning_rate": 0.002, "loss": 2.3345, "step": 286600 }, { "epoch": 1.1079541061681433, "grad_norm": 0.14099784195423126, "learning_rate": 0.002, "loss": 2.3286, "step": 286610 }, { "epoch": 1.1079927633715267, "grad_norm": 0.10787280648946762, "learning_rate": 0.002, "loss": 2.324, "step": 286620 }, { "epoch": 1.10803142057491, "grad_norm": 0.10273903608322144, "learning_rate": 0.002, "loss": 2.3351, "step": 286630 }, { "epoch": 1.1080700777782932, "grad_norm": 0.09775515645742416, "learning_rate": 0.002, "loss": 2.3366, "step": 286640 }, { "epoch": 1.1081087349816765, "grad_norm": 0.11061781644821167, "learning_rate": 0.002, "loss": 2.3551, "step": 286650 }, { "epoch": 1.1081473921850598, "grad_norm": 0.12733496725559235, "learning_rate": 0.002, "loss": 2.3465, "step": 286660 }, { "epoch": 1.108186049388443, "grad_norm": 0.1342155486345291, "learning_rate": 0.002, "loss": 2.3301, "step": 286670 }, { "epoch": 1.1082247065918263, "grad_norm": 0.09409618377685547, "learning_rate": 0.002, "loss": 2.3298, "step": 286680 }, { "epoch": 1.1082633637952095, "grad_norm": 0.10852969437837601, "learning_rate": 0.002, "loss": 2.3471, "step": 286690 }, { "epoch": 1.1083020209985928, "grad_norm": 0.09696200489997864, "learning_rate": 0.002, "loss": 2.3256, "step": 286700 }, { "epoch": 1.1083406782019762, "grad_norm": 0.09392782300710678, "learning_rate": 0.002, "loss": 2.3521, "step": 286710 }, { "epoch": 1.1083793354053595, "grad_norm": 0.11639557033777237, "learning_rate": 0.002, "loss": 2.3281, "step": 286720 }, { "epoch": 1.1084179926087427, "grad_norm": 0.11509211361408234, "learning_rate": 0.002, "loss": 2.3371, "step": 286730 }, { "epoch": 1.108456649812126, "grad_norm": 0.09146511554718018, "learning_rate": 0.002, "loss": 2.3351, "step": 286740 }, { "epoch": 1.1084953070155092, "grad_norm": 0.11421307176351547, "learning_rate": 0.002, "loss": 2.3445, "step": 286750 }, { "epoch": 1.1085339642188925, "grad_norm": 0.10285865515470505, "learning_rate": 0.002, "loss": 2.3387, "step": 286760 }, { "epoch": 1.1085726214222758, "grad_norm": 0.10659022629261017, "learning_rate": 0.002, "loss": 2.3303, "step": 286770 }, { "epoch": 1.108611278625659, "grad_norm": 0.0977044478058815, "learning_rate": 0.002, "loss": 2.3371, "step": 286780 }, { "epoch": 1.1086499358290425, "grad_norm": 0.11840414255857468, "learning_rate": 0.002, "loss": 2.3473, "step": 286790 }, { "epoch": 1.1086885930324257, "grad_norm": 0.09854122251272202, "learning_rate": 0.002, "loss": 2.3353, "step": 286800 }, { "epoch": 1.108727250235809, "grad_norm": 0.10114125907421112, "learning_rate": 0.002, "loss": 2.3252, "step": 286810 }, { "epoch": 1.1087659074391922, "grad_norm": 0.10757792741060257, "learning_rate": 0.002, "loss": 2.3464, "step": 286820 }, { "epoch": 1.1088045646425755, "grad_norm": 0.1165936067700386, "learning_rate": 0.002, "loss": 2.3269, "step": 286830 }, { "epoch": 1.1088432218459587, "grad_norm": 0.10642943531274796, "learning_rate": 0.002, "loss": 2.3559, "step": 286840 }, { "epoch": 1.108881879049342, "grad_norm": 0.19730453193187714, "learning_rate": 0.002, "loss": 2.3468, "step": 286850 }, { "epoch": 1.1089205362527252, "grad_norm": 0.10663972049951553, "learning_rate": 0.002, "loss": 2.3288, "step": 286860 }, { "epoch": 1.1089591934561085, "grad_norm": 0.09699961543083191, "learning_rate": 0.002, "loss": 2.3362, "step": 286870 }, { "epoch": 1.108997850659492, "grad_norm": 0.09914124757051468, "learning_rate": 0.002, "loss": 2.3395, "step": 286880 }, { "epoch": 1.1090365078628752, "grad_norm": 0.09312078356742859, "learning_rate": 0.002, "loss": 2.3382, "step": 286890 }, { "epoch": 1.1090751650662585, "grad_norm": 0.11414426565170288, "learning_rate": 0.002, "loss": 2.3427, "step": 286900 }, { "epoch": 1.1091138222696417, "grad_norm": 0.1020946130156517, "learning_rate": 0.002, "loss": 2.3462, "step": 286910 }, { "epoch": 1.109152479473025, "grad_norm": 0.10086391866207123, "learning_rate": 0.002, "loss": 2.3148, "step": 286920 }, { "epoch": 1.1091911366764082, "grad_norm": 0.10533702373504639, "learning_rate": 0.002, "loss": 2.3336, "step": 286930 }, { "epoch": 1.1092297938797915, "grad_norm": 0.09942932426929474, "learning_rate": 0.002, "loss": 2.3298, "step": 286940 }, { "epoch": 1.109268451083175, "grad_norm": 0.0986490547657013, "learning_rate": 0.002, "loss": 2.3372, "step": 286950 }, { "epoch": 1.1093071082865582, "grad_norm": 0.10131828486919403, "learning_rate": 0.002, "loss": 2.338, "step": 286960 }, { "epoch": 1.1093457654899415, "grad_norm": 0.10056748241186142, "learning_rate": 0.002, "loss": 2.3384, "step": 286970 }, { "epoch": 1.1093844226933247, "grad_norm": 0.12299492210149765, "learning_rate": 0.002, "loss": 2.3442, "step": 286980 }, { "epoch": 1.109423079896708, "grad_norm": 0.09333730489015579, "learning_rate": 0.002, "loss": 2.3212, "step": 286990 }, { "epoch": 1.1094617371000912, "grad_norm": 0.10080965608358383, "learning_rate": 0.002, "loss": 2.3415, "step": 287000 }, { "epoch": 1.1095003943034745, "grad_norm": 0.09615743905305862, "learning_rate": 0.002, "loss": 2.331, "step": 287010 }, { "epoch": 1.1095390515068577, "grad_norm": 0.10049605369567871, "learning_rate": 0.002, "loss": 2.3369, "step": 287020 }, { "epoch": 1.109577708710241, "grad_norm": 0.10170258581638336, "learning_rate": 0.002, "loss": 2.3478, "step": 287030 }, { "epoch": 1.1096163659136244, "grad_norm": 0.09737537056207657, "learning_rate": 0.002, "loss": 2.337, "step": 287040 }, { "epoch": 1.1096550231170077, "grad_norm": 0.12915514409542084, "learning_rate": 0.002, "loss": 2.3185, "step": 287050 }, { "epoch": 1.109693680320391, "grad_norm": 0.12582509219646454, "learning_rate": 0.002, "loss": 2.3429, "step": 287060 }, { "epoch": 1.1097323375237742, "grad_norm": 0.6325317025184631, "learning_rate": 0.002, "loss": 2.3406, "step": 287070 }, { "epoch": 1.1097709947271575, "grad_norm": 0.09826202690601349, "learning_rate": 0.002, "loss": 2.3233, "step": 287080 }, { "epoch": 1.1098096519305407, "grad_norm": 0.10656698793172836, "learning_rate": 0.002, "loss": 2.3397, "step": 287090 }, { "epoch": 1.109848309133924, "grad_norm": 0.10194915533065796, "learning_rate": 0.002, "loss": 2.3339, "step": 287100 }, { "epoch": 1.1098869663373072, "grad_norm": 0.10883122682571411, "learning_rate": 0.002, "loss": 2.3496, "step": 287110 }, { "epoch": 1.1099256235406907, "grad_norm": 0.09845606237649918, "learning_rate": 0.002, "loss": 2.3325, "step": 287120 }, { "epoch": 1.109964280744074, "grad_norm": 0.11428073793649673, "learning_rate": 0.002, "loss": 2.3375, "step": 287130 }, { "epoch": 1.1100029379474572, "grad_norm": 0.1102704256772995, "learning_rate": 0.002, "loss": 2.3385, "step": 287140 }, { "epoch": 1.1100415951508404, "grad_norm": 0.11757481098175049, "learning_rate": 0.002, "loss": 2.3226, "step": 287150 }, { "epoch": 1.1100802523542237, "grad_norm": 0.1087174043059349, "learning_rate": 0.002, "loss": 2.3301, "step": 287160 }, { "epoch": 1.110118909557607, "grad_norm": 0.09554096311330795, "learning_rate": 0.002, "loss": 2.3306, "step": 287170 }, { "epoch": 1.1101575667609902, "grad_norm": 0.13887332379817963, "learning_rate": 0.002, "loss": 2.3394, "step": 287180 }, { "epoch": 1.1101962239643735, "grad_norm": 0.11097030341625214, "learning_rate": 0.002, "loss": 2.3267, "step": 287190 }, { "epoch": 1.1102348811677567, "grad_norm": 0.09805213660001755, "learning_rate": 0.002, "loss": 2.3273, "step": 287200 }, { "epoch": 1.1102735383711402, "grad_norm": 0.10877746343612671, "learning_rate": 0.002, "loss": 2.3203, "step": 287210 }, { "epoch": 1.1103121955745234, "grad_norm": 0.09850569069385529, "learning_rate": 0.002, "loss": 2.3314, "step": 287220 }, { "epoch": 1.1103508527779067, "grad_norm": 0.10622706264257431, "learning_rate": 0.002, "loss": 2.3366, "step": 287230 }, { "epoch": 1.11038950998129, "grad_norm": 0.10253924876451492, "learning_rate": 0.002, "loss": 2.3363, "step": 287240 }, { "epoch": 1.1104281671846732, "grad_norm": 0.10554666072130203, "learning_rate": 0.002, "loss": 2.3277, "step": 287250 }, { "epoch": 1.1104668243880564, "grad_norm": 0.1024741381406784, "learning_rate": 0.002, "loss": 2.325, "step": 287260 }, { "epoch": 1.1105054815914397, "grad_norm": 0.12398812174797058, "learning_rate": 0.002, "loss": 2.3252, "step": 287270 }, { "epoch": 1.110544138794823, "grad_norm": 0.09940765053033829, "learning_rate": 0.002, "loss": 2.3365, "step": 287280 }, { "epoch": 1.1105827959982064, "grad_norm": 0.09944300353527069, "learning_rate": 0.002, "loss": 2.3304, "step": 287290 }, { "epoch": 1.1106214532015897, "grad_norm": 0.10488318651914597, "learning_rate": 0.002, "loss": 2.3378, "step": 287300 }, { "epoch": 1.110660110404973, "grad_norm": 0.11721440404653549, "learning_rate": 0.002, "loss": 2.3381, "step": 287310 }, { "epoch": 1.1106987676083562, "grad_norm": 0.10939953476190567, "learning_rate": 0.002, "loss": 2.3372, "step": 287320 }, { "epoch": 1.1107374248117394, "grad_norm": 0.0925823301076889, "learning_rate": 0.002, "loss": 2.3403, "step": 287330 }, { "epoch": 1.1107760820151227, "grad_norm": 0.08701872080564499, "learning_rate": 0.002, "loss": 2.34, "step": 287340 }, { "epoch": 1.110814739218506, "grad_norm": 0.10976503044366837, "learning_rate": 0.002, "loss": 2.3352, "step": 287350 }, { "epoch": 1.1108533964218892, "grad_norm": 0.1309625506401062, "learning_rate": 0.002, "loss": 2.3542, "step": 287360 }, { "epoch": 1.1108920536252724, "grad_norm": 0.09811020642518997, "learning_rate": 0.002, "loss": 2.3394, "step": 287370 }, { "epoch": 1.110930710828656, "grad_norm": 0.10170017927885056, "learning_rate": 0.002, "loss": 2.3378, "step": 287380 }, { "epoch": 1.1109693680320392, "grad_norm": 0.09560930728912354, "learning_rate": 0.002, "loss": 2.3405, "step": 287390 }, { "epoch": 1.1110080252354224, "grad_norm": 0.11622206121683121, "learning_rate": 0.002, "loss": 2.3325, "step": 287400 }, { "epoch": 1.1110466824388057, "grad_norm": 0.10059899091720581, "learning_rate": 0.002, "loss": 2.3264, "step": 287410 }, { "epoch": 1.111085339642189, "grad_norm": 0.0940239429473877, "learning_rate": 0.002, "loss": 2.3382, "step": 287420 }, { "epoch": 1.1111239968455722, "grad_norm": 0.13035322725772858, "learning_rate": 0.002, "loss": 2.341, "step": 287430 }, { "epoch": 1.1111626540489554, "grad_norm": 0.11438962817192078, "learning_rate": 0.002, "loss": 2.3322, "step": 287440 }, { "epoch": 1.1112013112523387, "grad_norm": 0.08248197287321091, "learning_rate": 0.002, "loss": 2.3414, "step": 287450 }, { "epoch": 1.1112399684557221, "grad_norm": 0.10871239006519318, "learning_rate": 0.002, "loss": 2.3363, "step": 287460 }, { "epoch": 1.1112786256591054, "grad_norm": 0.1135963648557663, "learning_rate": 0.002, "loss": 2.3303, "step": 287470 }, { "epoch": 1.1113172828624887, "grad_norm": 0.12674102187156677, "learning_rate": 0.002, "loss": 2.3244, "step": 287480 }, { "epoch": 1.111355940065872, "grad_norm": 0.10381345450878143, "learning_rate": 0.002, "loss": 2.3295, "step": 287490 }, { "epoch": 1.1113945972692552, "grad_norm": 0.10797566920518875, "learning_rate": 0.002, "loss": 2.3341, "step": 287500 }, { "epoch": 1.1114332544726384, "grad_norm": 0.09637517482042313, "learning_rate": 0.002, "loss": 2.3387, "step": 287510 }, { "epoch": 1.1114719116760217, "grad_norm": 0.09481417387723923, "learning_rate": 0.002, "loss": 2.35, "step": 287520 }, { "epoch": 1.111510568879405, "grad_norm": 0.11646459251642227, "learning_rate": 0.002, "loss": 2.3319, "step": 287530 }, { "epoch": 1.1115492260827882, "grad_norm": 0.10901686549186707, "learning_rate": 0.002, "loss": 2.3284, "step": 287540 }, { "epoch": 1.1115878832861716, "grad_norm": 0.0974387377500534, "learning_rate": 0.002, "loss": 2.3459, "step": 287550 }, { "epoch": 1.111626540489555, "grad_norm": 0.12838047742843628, "learning_rate": 0.002, "loss": 2.3265, "step": 287560 }, { "epoch": 1.1116651976929381, "grad_norm": 0.2370108962059021, "learning_rate": 0.002, "loss": 2.3322, "step": 287570 }, { "epoch": 1.1117038548963214, "grad_norm": 0.27904149889945984, "learning_rate": 0.002, "loss": 2.333, "step": 287580 }, { "epoch": 1.1117425120997046, "grad_norm": 0.13837091624736786, "learning_rate": 0.002, "loss": 2.3364, "step": 287590 }, { "epoch": 1.111781169303088, "grad_norm": 0.10299310088157654, "learning_rate": 0.002, "loss": 2.338, "step": 287600 }, { "epoch": 1.1118198265064712, "grad_norm": 0.10843832790851593, "learning_rate": 0.002, "loss": 2.3235, "step": 287610 }, { "epoch": 1.1118584837098544, "grad_norm": 0.11448937654495239, "learning_rate": 0.002, "loss": 2.3183, "step": 287620 }, { "epoch": 1.1118971409132379, "grad_norm": 0.10186512768268585, "learning_rate": 0.002, "loss": 2.3443, "step": 287630 }, { "epoch": 1.1119357981166211, "grad_norm": 0.12241533398628235, "learning_rate": 0.002, "loss": 2.3316, "step": 287640 }, { "epoch": 1.1119744553200044, "grad_norm": 0.11468174308538437, "learning_rate": 0.002, "loss": 2.3472, "step": 287650 }, { "epoch": 1.1120131125233876, "grad_norm": 0.11049031466245651, "learning_rate": 0.002, "loss": 2.3523, "step": 287660 }, { "epoch": 1.112051769726771, "grad_norm": 0.10353521257638931, "learning_rate": 0.002, "loss": 2.3352, "step": 287670 }, { "epoch": 1.1120904269301541, "grad_norm": 0.11140059679746628, "learning_rate": 0.002, "loss": 2.3352, "step": 287680 }, { "epoch": 1.1121290841335374, "grad_norm": 0.09088698029518127, "learning_rate": 0.002, "loss": 2.3289, "step": 287690 }, { "epoch": 1.1121677413369206, "grad_norm": 0.12379549443721771, "learning_rate": 0.002, "loss": 2.3436, "step": 287700 }, { "epoch": 1.112206398540304, "grad_norm": 0.11208084970712662, "learning_rate": 0.002, "loss": 2.3351, "step": 287710 }, { "epoch": 1.1122450557436874, "grad_norm": 0.10558558255434036, "learning_rate": 0.002, "loss": 2.3353, "step": 287720 }, { "epoch": 1.1122837129470706, "grad_norm": 0.10928310453891754, "learning_rate": 0.002, "loss": 2.3519, "step": 287730 }, { "epoch": 1.1123223701504539, "grad_norm": 0.12338408082723618, "learning_rate": 0.002, "loss": 2.3347, "step": 287740 }, { "epoch": 1.1123610273538371, "grad_norm": 0.16096697747707367, "learning_rate": 0.002, "loss": 2.3326, "step": 287750 }, { "epoch": 1.1123996845572204, "grad_norm": 0.10622543096542358, "learning_rate": 0.002, "loss": 2.3377, "step": 287760 }, { "epoch": 1.1124383417606036, "grad_norm": 0.12482261657714844, "learning_rate": 0.002, "loss": 2.3395, "step": 287770 }, { "epoch": 1.1124769989639869, "grad_norm": 0.09466690570116043, "learning_rate": 0.002, "loss": 2.3228, "step": 287780 }, { "epoch": 1.1125156561673701, "grad_norm": 0.1149539053440094, "learning_rate": 0.002, "loss": 2.3351, "step": 287790 }, { "epoch": 1.1125543133707536, "grad_norm": 0.10789894312620163, "learning_rate": 0.002, "loss": 2.3246, "step": 287800 }, { "epoch": 1.1125929705741369, "grad_norm": 0.11976473778486252, "learning_rate": 0.002, "loss": 2.305, "step": 287810 }, { "epoch": 1.1126316277775201, "grad_norm": 0.09605703502893448, "learning_rate": 0.002, "loss": 2.3415, "step": 287820 }, { "epoch": 1.1126702849809034, "grad_norm": 0.0961158275604248, "learning_rate": 0.002, "loss": 2.3416, "step": 287830 }, { "epoch": 1.1127089421842866, "grad_norm": 0.11573757231235504, "learning_rate": 0.002, "loss": 2.3458, "step": 287840 }, { "epoch": 1.1127475993876699, "grad_norm": 0.10903476923704147, "learning_rate": 0.002, "loss": 2.3273, "step": 287850 }, { "epoch": 1.1127862565910531, "grad_norm": 0.14516030251979828, "learning_rate": 0.002, "loss": 2.3482, "step": 287860 }, { "epoch": 1.1128249137944364, "grad_norm": 0.10482911020517349, "learning_rate": 0.002, "loss": 2.3295, "step": 287870 }, { "epoch": 1.1128635709978196, "grad_norm": 0.09693562984466553, "learning_rate": 0.002, "loss": 2.3262, "step": 287880 }, { "epoch": 1.112902228201203, "grad_norm": 0.10743766278028488, "learning_rate": 0.002, "loss": 2.3291, "step": 287890 }, { "epoch": 1.1129408854045864, "grad_norm": 0.11316178739070892, "learning_rate": 0.002, "loss": 2.3435, "step": 287900 }, { "epoch": 1.1129795426079696, "grad_norm": 0.11615628749132156, "learning_rate": 0.002, "loss": 2.3407, "step": 287910 }, { "epoch": 1.1130181998113529, "grad_norm": 0.09100455045700073, "learning_rate": 0.002, "loss": 2.351, "step": 287920 }, { "epoch": 1.1130568570147361, "grad_norm": 0.10525690019130707, "learning_rate": 0.002, "loss": 2.3486, "step": 287930 }, { "epoch": 1.1130955142181194, "grad_norm": 0.10136166214942932, "learning_rate": 0.002, "loss": 2.3386, "step": 287940 }, { "epoch": 1.1131341714215026, "grad_norm": 0.1073971837759018, "learning_rate": 0.002, "loss": 2.3412, "step": 287950 }, { "epoch": 1.1131728286248859, "grad_norm": 0.11636967957019806, "learning_rate": 0.002, "loss": 2.3437, "step": 287960 }, { "epoch": 1.1132114858282693, "grad_norm": 0.10656551271677017, "learning_rate": 0.002, "loss": 2.3307, "step": 287970 }, { "epoch": 1.1132501430316526, "grad_norm": 0.11081990599632263, "learning_rate": 0.002, "loss": 2.3336, "step": 287980 }, { "epoch": 1.1132888002350358, "grad_norm": 0.11542163044214249, "learning_rate": 0.002, "loss": 2.3241, "step": 287990 }, { "epoch": 1.113327457438419, "grad_norm": 0.09533045440912247, "learning_rate": 0.002, "loss": 2.3261, "step": 288000 }, { "epoch": 1.1133661146418024, "grad_norm": 0.09483199566602707, "learning_rate": 0.002, "loss": 2.3295, "step": 288010 }, { "epoch": 1.1134047718451856, "grad_norm": 0.114728644490242, "learning_rate": 0.002, "loss": 2.324, "step": 288020 }, { "epoch": 1.1134434290485689, "grad_norm": 0.11476054787635803, "learning_rate": 0.002, "loss": 2.3437, "step": 288030 }, { "epoch": 1.113482086251952, "grad_norm": 0.10087236016988754, "learning_rate": 0.002, "loss": 2.3285, "step": 288040 }, { "epoch": 1.1135207434553354, "grad_norm": 0.10353070497512817, "learning_rate": 0.002, "loss": 2.3397, "step": 288050 }, { "epoch": 1.1135594006587188, "grad_norm": 0.1241433322429657, "learning_rate": 0.002, "loss": 2.3393, "step": 288060 }, { "epoch": 1.113598057862102, "grad_norm": 0.09710206091403961, "learning_rate": 0.002, "loss": 2.342, "step": 288070 }, { "epoch": 1.1136367150654853, "grad_norm": 0.11962775886058807, "learning_rate": 0.002, "loss": 2.3395, "step": 288080 }, { "epoch": 1.1136753722688686, "grad_norm": 0.1040009930729866, "learning_rate": 0.002, "loss": 2.344, "step": 288090 }, { "epoch": 1.1137140294722518, "grad_norm": 0.13171091675758362, "learning_rate": 0.002, "loss": 2.3348, "step": 288100 }, { "epoch": 1.113752686675635, "grad_norm": 0.09785674512386322, "learning_rate": 0.002, "loss": 2.3223, "step": 288110 }, { "epoch": 1.1137913438790183, "grad_norm": 0.10102818161249161, "learning_rate": 0.002, "loss": 2.3364, "step": 288120 }, { "epoch": 1.1138300010824016, "grad_norm": 0.10420387238264084, "learning_rate": 0.002, "loss": 2.3511, "step": 288130 }, { "epoch": 1.113868658285785, "grad_norm": 0.10302894562482834, "learning_rate": 0.002, "loss": 2.3373, "step": 288140 }, { "epoch": 1.1139073154891683, "grad_norm": 0.1063724160194397, "learning_rate": 0.002, "loss": 2.3382, "step": 288150 }, { "epoch": 1.1139459726925516, "grad_norm": 0.09987162053585052, "learning_rate": 0.002, "loss": 2.3383, "step": 288160 }, { "epoch": 1.1139846298959348, "grad_norm": 0.10557962954044342, "learning_rate": 0.002, "loss": 2.3327, "step": 288170 }, { "epoch": 1.114023287099318, "grad_norm": 0.10160104185342789, "learning_rate": 0.002, "loss": 2.3219, "step": 288180 }, { "epoch": 1.1140619443027013, "grad_norm": 0.0960058942437172, "learning_rate": 0.002, "loss": 2.3451, "step": 288190 }, { "epoch": 1.1141006015060846, "grad_norm": 0.09926880151033401, "learning_rate": 0.002, "loss": 2.3364, "step": 288200 }, { "epoch": 1.1141392587094678, "grad_norm": 0.10220940411090851, "learning_rate": 0.002, "loss": 2.3481, "step": 288210 }, { "epoch": 1.114177915912851, "grad_norm": 0.11922407895326614, "learning_rate": 0.002, "loss": 2.3467, "step": 288220 }, { "epoch": 1.1142165731162346, "grad_norm": 0.10637892037630081, "learning_rate": 0.002, "loss": 2.3328, "step": 288230 }, { "epoch": 1.1142552303196178, "grad_norm": 0.0881463959813118, "learning_rate": 0.002, "loss": 2.3224, "step": 288240 }, { "epoch": 1.114293887523001, "grad_norm": 0.10221447050571442, "learning_rate": 0.002, "loss": 2.3461, "step": 288250 }, { "epoch": 1.1143325447263843, "grad_norm": 0.12899447977542877, "learning_rate": 0.002, "loss": 2.3435, "step": 288260 }, { "epoch": 1.1143712019297676, "grad_norm": 0.12827207148075104, "learning_rate": 0.002, "loss": 2.3333, "step": 288270 }, { "epoch": 1.1144098591331508, "grad_norm": 0.10625769942998886, "learning_rate": 0.002, "loss": 2.3471, "step": 288280 }, { "epoch": 1.114448516336534, "grad_norm": 0.09953627735376358, "learning_rate": 0.002, "loss": 2.3454, "step": 288290 }, { "epoch": 1.1144871735399173, "grad_norm": 0.11532321572303772, "learning_rate": 0.002, "loss": 2.3366, "step": 288300 }, { "epoch": 1.1145258307433008, "grad_norm": 0.10999681055545807, "learning_rate": 0.002, "loss": 2.3384, "step": 288310 }, { "epoch": 1.114564487946684, "grad_norm": 0.13596197962760925, "learning_rate": 0.002, "loss": 2.3496, "step": 288320 }, { "epoch": 1.1146031451500673, "grad_norm": 0.10923830419778824, "learning_rate": 0.002, "loss": 2.3275, "step": 288330 }, { "epoch": 1.1146418023534506, "grad_norm": 0.10871526598930359, "learning_rate": 0.002, "loss": 2.3394, "step": 288340 }, { "epoch": 1.1146804595568338, "grad_norm": 0.12883463501930237, "learning_rate": 0.002, "loss": 2.339, "step": 288350 }, { "epoch": 1.114719116760217, "grad_norm": 0.11219342797994614, "learning_rate": 0.002, "loss": 2.3521, "step": 288360 }, { "epoch": 1.1147577739636003, "grad_norm": 0.10813488811254501, "learning_rate": 0.002, "loss": 2.3274, "step": 288370 }, { "epoch": 1.1147964311669836, "grad_norm": 0.12277258187532425, "learning_rate": 0.002, "loss": 2.3329, "step": 288380 }, { "epoch": 1.1148350883703668, "grad_norm": 0.09526970237493515, "learning_rate": 0.002, "loss": 2.3499, "step": 288390 }, { "epoch": 1.1148737455737503, "grad_norm": 0.09309110045433044, "learning_rate": 0.002, "loss": 2.3449, "step": 288400 }, { "epoch": 1.1149124027771335, "grad_norm": 0.1104973778128624, "learning_rate": 0.002, "loss": 2.3416, "step": 288410 }, { "epoch": 1.1149510599805168, "grad_norm": 0.09410668164491653, "learning_rate": 0.002, "loss": 2.3345, "step": 288420 }, { "epoch": 1.1149897171839, "grad_norm": 0.0946255549788475, "learning_rate": 0.002, "loss": 2.3525, "step": 288430 }, { "epoch": 1.1150283743872833, "grad_norm": 0.10899773985147476, "learning_rate": 0.002, "loss": 2.3348, "step": 288440 }, { "epoch": 1.1150670315906666, "grad_norm": 0.1305028200149536, "learning_rate": 0.002, "loss": 2.342, "step": 288450 }, { "epoch": 1.1151056887940498, "grad_norm": 0.11407119035720825, "learning_rate": 0.002, "loss": 2.3342, "step": 288460 }, { "epoch": 1.115144345997433, "grad_norm": 0.10499758273363113, "learning_rate": 0.002, "loss": 2.3271, "step": 288470 }, { "epoch": 1.1151830032008165, "grad_norm": 0.11272277683019638, "learning_rate": 0.002, "loss": 2.339, "step": 288480 }, { "epoch": 1.1152216604041998, "grad_norm": 0.10035009682178497, "learning_rate": 0.002, "loss": 2.3351, "step": 288490 }, { "epoch": 1.115260317607583, "grad_norm": 0.10695807635784149, "learning_rate": 0.002, "loss": 2.3391, "step": 288500 }, { "epoch": 1.1152989748109663, "grad_norm": 0.10339631140232086, "learning_rate": 0.002, "loss": 2.3137, "step": 288510 }, { "epoch": 1.1153376320143495, "grad_norm": 0.09223701804876328, "learning_rate": 0.002, "loss": 2.3355, "step": 288520 }, { "epoch": 1.1153762892177328, "grad_norm": 0.1100502610206604, "learning_rate": 0.002, "loss": 2.3327, "step": 288530 }, { "epoch": 1.115414946421116, "grad_norm": 0.1022772565484047, "learning_rate": 0.002, "loss": 2.3308, "step": 288540 }, { "epoch": 1.1154536036244993, "grad_norm": 0.09872733801603317, "learning_rate": 0.002, "loss": 2.3409, "step": 288550 }, { "epoch": 1.1154922608278826, "grad_norm": 0.10790759325027466, "learning_rate": 0.002, "loss": 2.32, "step": 288560 }, { "epoch": 1.115530918031266, "grad_norm": 0.11124914139509201, "learning_rate": 0.002, "loss": 2.3369, "step": 288570 }, { "epoch": 1.1155695752346493, "grad_norm": 0.11812636256217957, "learning_rate": 0.002, "loss": 2.3396, "step": 288580 }, { "epoch": 1.1156082324380325, "grad_norm": 0.12386882305145264, "learning_rate": 0.002, "loss": 2.3478, "step": 288590 }, { "epoch": 1.1156468896414158, "grad_norm": 0.10007230192422867, "learning_rate": 0.002, "loss": 2.3337, "step": 288600 }, { "epoch": 1.115685546844799, "grad_norm": 0.11115007102489471, "learning_rate": 0.002, "loss": 2.338, "step": 288610 }, { "epoch": 1.1157242040481823, "grad_norm": 0.11161840707063675, "learning_rate": 0.002, "loss": 2.3402, "step": 288620 }, { "epoch": 1.1157628612515655, "grad_norm": 0.10194922238588333, "learning_rate": 0.002, "loss": 2.3481, "step": 288630 }, { "epoch": 1.1158015184549488, "grad_norm": 0.11764828115701675, "learning_rate": 0.002, "loss": 2.3374, "step": 288640 }, { "epoch": 1.1158401756583323, "grad_norm": 0.1009848415851593, "learning_rate": 0.002, "loss": 2.3326, "step": 288650 }, { "epoch": 1.1158788328617155, "grad_norm": 0.14312507212162018, "learning_rate": 0.002, "loss": 2.3264, "step": 288660 }, { "epoch": 1.1159174900650988, "grad_norm": 0.1186060905456543, "learning_rate": 0.002, "loss": 2.3397, "step": 288670 }, { "epoch": 1.115956147268482, "grad_norm": 0.10075625777244568, "learning_rate": 0.002, "loss": 2.3412, "step": 288680 }, { "epoch": 1.1159948044718653, "grad_norm": 0.5035440325737, "learning_rate": 0.002, "loss": 2.3331, "step": 288690 }, { "epoch": 1.1160334616752485, "grad_norm": 0.11094953864812851, "learning_rate": 0.002, "loss": 2.3428, "step": 288700 }, { "epoch": 1.1160721188786318, "grad_norm": 0.08963083475828171, "learning_rate": 0.002, "loss": 2.3446, "step": 288710 }, { "epoch": 1.116110776082015, "grad_norm": 0.11137071996927261, "learning_rate": 0.002, "loss": 2.3292, "step": 288720 }, { "epoch": 1.1161494332853983, "grad_norm": 0.09859320521354675, "learning_rate": 0.002, "loss": 2.3324, "step": 288730 }, { "epoch": 1.1161880904887818, "grad_norm": 0.10735145956277847, "learning_rate": 0.002, "loss": 2.3264, "step": 288740 }, { "epoch": 1.116226747692165, "grad_norm": 0.10665154457092285, "learning_rate": 0.002, "loss": 2.3323, "step": 288750 }, { "epoch": 1.1162654048955483, "grad_norm": 0.09725417196750641, "learning_rate": 0.002, "loss": 2.3262, "step": 288760 }, { "epoch": 1.1163040620989315, "grad_norm": 0.11257147789001465, "learning_rate": 0.002, "loss": 2.3348, "step": 288770 }, { "epoch": 1.1163427193023148, "grad_norm": 0.12395235151052475, "learning_rate": 0.002, "loss": 2.3374, "step": 288780 }, { "epoch": 1.116381376505698, "grad_norm": 0.09967972338199615, "learning_rate": 0.002, "loss": 2.338, "step": 288790 }, { "epoch": 1.1164200337090813, "grad_norm": 0.10118284821510315, "learning_rate": 0.002, "loss": 2.3283, "step": 288800 }, { "epoch": 1.1164586909124647, "grad_norm": 0.10583081096410751, "learning_rate": 0.002, "loss": 2.3446, "step": 288810 }, { "epoch": 1.116497348115848, "grad_norm": 0.1105639636516571, "learning_rate": 0.002, "loss": 2.3414, "step": 288820 }, { "epoch": 1.1165360053192313, "grad_norm": 0.10586509108543396, "learning_rate": 0.002, "loss": 2.3254, "step": 288830 }, { "epoch": 1.1165746625226145, "grad_norm": 0.10803575068712234, "learning_rate": 0.002, "loss": 2.3241, "step": 288840 }, { "epoch": 1.1166133197259978, "grad_norm": 0.0964178740978241, "learning_rate": 0.002, "loss": 2.3413, "step": 288850 }, { "epoch": 1.116651976929381, "grad_norm": 0.11801418662071228, "learning_rate": 0.002, "loss": 2.3275, "step": 288860 }, { "epoch": 1.1166906341327643, "grad_norm": 0.10151393711566925, "learning_rate": 0.002, "loss": 2.3314, "step": 288870 }, { "epoch": 1.1167292913361475, "grad_norm": 0.0936862900853157, "learning_rate": 0.002, "loss": 2.3269, "step": 288880 }, { "epoch": 1.1167679485395308, "grad_norm": 0.11406335979700089, "learning_rate": 0.002, "loss": 2.3389, "step": 288890 }, { "epoch": 1.1168066057429142, "grad_norm": 0.1116715669631958, "learning_rate": 0.002, "loss": 2.3174, "step": 288900 }, { "epoch": 1.1168452629462975, "grad_norm": 0.10459590703248978, "learning_rate": 0.002, "loss": 2.352, "step": 288910 }, { "epoch": 1.1168839201496807, "grad_norm": 0.11433672904968262, "learning_rate": 0.002, "loss": 2.3454, "step": 288920 }, { "epoch": 1.116922577353064, "grad_norm": 0.09518077969551086, "learning_rate": 0.002, "loss": 2.3365, "step": 288930 }, { "epoch": 1.1169612345564472, "grad_norm": 0.11603035777807236, "learning_rate": 0.002, "loss": 2.3432, "step": 288940 }, { "epoch": 1.1169998917598305, "grad_norm": 0.11251766979694366, "learning_rate": 0.002, "loss": 2.3447, "step": 288950 }, { "epoch": 1.1170385489632138, "grad_norm": 0.09062660485506058, "learning_rate": 0.002, "loss": 2.329, "step": 288960 }, { "epoch": 1.117077206166597, "grad_norm": 0.1127210482954979, "learning_rate": 0.002, "loss": 2.325, "step": 288970 }, { "epoch": 1.1171158633699805, "grad_norm": 0.1010056659579277, "learning_rate": 0.002, "loss": 2.3475, "step": 288980 }, { "epoch": 1.1171545205733637, "grad_norm": 0.09560646116733551, "learning_rate": 0.002, "loss": 2.3243, "step": 288990 }, { "epoch": 1.117193177776747, "grad_norm": 0.12088503688573837, "learning_rate": 0.002, "loss": 2.3427, "step": 289000 }, { "epoch": 1.1172318349801302, "grad_norm": 0.1045701801776886, "learning_rate": 0.002, "loss": 2.3437, "step": 289010 }, { "epoch": 1.1172704921835135, "grad_norm": 0.10648337751626968, "learning_rate": 0.002, "loss": 2.3446, "step": 289020 }, { "epoch": 1.1173091493868967, "grad_norm": 0.10930231958627701, "learning_rate": 0.002, "loss": 2.3289, "step": 289030 }, { "epoch": 1.11734780659028, "grad_norm": 0.11172724515199661, "learning_rate": 0.002, "loss": 2.3305, "step": 289040 }, { "epoch": 1.1173864637936632, "grad_norm": 0.10152576118707657, "learning_rate": 0.002, "loss": 2.3342, "step": 289050 }, { "epoch": 1.1174251209970465, "grad_norm": 0.14760203659534454, "learning_rate": 0.002, "loss": 2.358, "step": 289060 }, { "epoch": 1.11746377820043, "grad_norm": 0.08888938277959824, "learning_rate": 0.002, "loss": 2.3426, "step": 289070 }, { "epoch": 1.1175024354038132, "grad_norm": 0.09549479931592941, "learning_rate": 0.002, "loss": 2.3332, "step": 289080 }, { "epoch": 1.1175410926071965, "grad_norm": 0.11979442834854126, "learning_rate": 0.002, "loss": 2.3382, "step": 289090 }, { "epoch": 1.1175797498105797, "grad_norm": 0.1341460645198822, "learning_rate": 0.002, "loss": 2.3282, "step": 289100 }, { "epoch": 1.117618407013963, "grad_norm": 0.11074167490005493, "learning_rate": 0.002, "loss": 2.3383, "step": 289110 }, { "epoch": 1.1176570642173462, "grad_norm": 0.10645493119955063, "learning_rate": 0.002, "loss": 2.3354, "step": 289120 }, { "epoch": 1.1176957214207295, "grad_norm": 0.10313550382852554, "learning_rate": 0.002, "loss": 2.3393, "step": 289130 }, { "epoch": 1.1177343786241127, "grad_norm": 0.1280437409877777, "learning_rate": 0.002, "loss": 2.3316, "step": 289140 }, { "epoch": 1.1177730358274962, "grad_norm": 0.10288901627063751, "learning_rate": 0.002, "loss": 2.3386, "step": 289150 }, { "epoch": 1.1178116930308795, "grad_norm": 0.12505581974983215, "learning_rate": 0.002, "loss": 2.3383, "step": 289160 }, { "epoch": 1.1178503502342627, "grad_norm": 0.09792499244213104, "learning_rate": 0.002, "loss": 2.3281, "step": 289170 }, { "epoch": 1.117889007437646, "grad_norm": 0.10279227793216705, "learning_rate": 0.002, "loss": 2.3319, "step": 289180 }, { "epoch": 1.1179276646410292, "grad_norm": 0.10783105343580246, "learning_rate": 0.002, "loss": 2.3441, "step": 289190 }, { "epoch": 1.1179663218444125, "grad_norm": 0.09635636955499649, "learning_rate": 0.002, "loss": 2.3406, "step": 289200 }, { "epoch": 1.1180049790477957, "grad_norm": 0.10871084779500961, "learning_rate": 0.002, "loss": 2.3425, "step": 289210 }, { "epoch": 1.118043636251179, "grad_norm": 0.11938074976205826, "learning_rate": 0.002, "loss": 2.344, "step": 289220 }, { "epoch": 1.1180822934545622, "grad_norm": 0.12168192863464355, "learning_rate": 0.002, "loss": 2.3501, "step": 289230 }, { "epoch": 1.1181209506579457, "grad_norm": 0.09364253282546997, "learning_rate": 0.002, "loss": 2.3442, "step": 289240 }, { "epoch": 1.118159607861329, "grad_norm": 0.0911431536078453, "learning_rate": 0.002, "loss": 2.3419, "step": 289250 }, { "epoch": 1.1181982650647122, "grad_norm": 0.10865811258554459, "learning_rate": 0.002, "loss": 2.3296, "step": 289260 }, { "epoch": 1.1182369222680955, "grad_norm": 0.13914142549037933, "learning_rate": 0.002, "loss": 2.3305, "step": 289270 }, { "epoch": 1.1182755794714787, "grad_norm": 0.11465007066726685, "learning_rate": 0.002, "loss": 2.3631, "step": 289280 }, { "epoch": 1.118314236674862, "grad_norm": 0.12156081199645996, "learning_rate": 0.002, "loss": 2.3552, "step": 289290 }, { "epoch": 1.1183528938782452, "grad_norm": 0.10695220530033112, "learning_rate": 0.002, "loss": 2.3377, "step": 289300 }, { "epoch": 1.1183915510816285, "grad_norm": 0.10531539469957352, "learning_rate": 0.002, "loss": 2.3406, "step": 289310 }, { "epoch": 1.118430208285012, "grad_norm": 0.09656589478254318, "learning_rate": 0.002, "loss": 2.3197, "step": 289320 }, { "epoch": 1.1184688654883952, "grad_norm": 0.10795163363218307, "learning_rate": 0.002, "loss": 2.3362, "step": 289330 }, { "epoch": 1.1185075226917784, "grad_norm": 0.10545086860656738, "learning_rate": 0.002, "loss": 2.3299, "step": 289340 }, { "epoch": 1.1185461798951617, "grad_norm": 0.10557561367750168, "learning_rate": 0.002, "loss": 2.3262, "step": 289350 }, { "epoch": 1.118584837098545, "grad_norm": 0.11986632645130157, "learning_rate": 0.002, "loss": 2.325, "step": 289360 }, { "epoch": 1.1186234943019282, "grad_norm": 0.12289483100175858, "learning_rate": 0.002, "loss": 2.3242, "step": 289370 }, { "epoch": 1.1186621515053115, "grad_norm": 0.1210024505853653, "learning_rate": 0.002, "loss": 2.3328, "step": 289380 }, { "epoch": 1.1187008087086947, "grad_norm": 0.10017368942499161, "learning_rate": 0.002, "loss": 2.3259, "step": 289390 }, { "epoch": 1.118739465912078, "grad_norm": 0.09998578578233719, "learning_rate": 0.002, "loss": 2.3478, "step": 289400 }, { "epoch": 1.1187781231154614, "grad_norm": 0.09792304039001465, "learning_rate": 0.002, "loss": 2.3416, "step": 289410 }, { "epoch": 1.1188167803188447, "grad_norm": 0.09898721426725388, "learning_rate": 0.002, "loss": 2.3385, "step": 289420 }, { "epoch": 1.118855437522228, "grad_norm": 0.15388897061347961, "learning_rate": 0.002, "loss": 2.336, "step": 289430 }, { "epoch": 1.1188940947256112, "grad_norm": 0.11085615307092667, "learning_rate": 0.002, "loss": 2.3398, "step": 289440 }, { "epoch": 1.1189327519289944, "grad_norm": 0.11091116815805435, "learning_rate": 0.002, "loss": 2.3246, "step": 289450 }, { "epoch": 1.1189714091323777, "grad_norm": 0.11337563395500183, "learning_rate": 0.002, "loss": 2.3266, "step": 289460 }, { "epoch": 1.119010066335761, "grad_norm": 0.12272960692644119, "learning_rate": 0.002, "loss": 2.3414, "step": 289470 }, { "epoch": 1.1190487235391442, "grad_norm": 0.09862526506185532, "learning_rate": 0.002, "loss": 2.3153, "step": 289480 }, { "epoch": 1.1190873807425277, "grad_norm": 0.11311447620391846, "learning_rate": 0.002, "loss": 2.3319, "step": 289490 }, { "epoch": 1.119126037945911, "grad_norm": 0.10457554459571838, "learning_rate": 0.002, "loss": 2.3306, "step": 289500 }, { "epoch": 1.1191646951492942, "grad_norm": 0.11485693603754044, "learning_rate": 0.002, "loss": 2.3374, "step": 289510 }, { "epoch": 1.1192033523526774, "grad_norm": 0.11195486038923264, "learning_rate": 0.002, "loss": 2.3541, "step": 289520 }, { "epoch": 1.1192420095560607, "grad_norm": 0.10413413494825363, "learning_rate": 0.002, "loss": 2.3304, "step": 289530 }, { "epoch": 1.119280666759444, "grad_norm": 0.11456961184740067, "learning_rate": 0.002, "loss": 2.3337, "step": 289540 }, { "epoch": 1.1193193239628272, "grad_norm": 0.1050366684794426, "learning_rate": 0.002, "loss": 2.3496, "step": 289550 }, { "epoch": 1.1193579811662104, "grad_norm": 0.08729544281959534, "learning_rate": 0.002, "loss": 2.3331, "step": 289560 }, { "epoch": 1.1193966383695937, "grad_norm": 0.15180206298828125, "learning_rate": 0.002, "loss": 2.3376, "step": 289570 }, { "epoch": 1.1194352955729772, "grad_norm": 0.11148621886968613, "learning_rate": 0.002, "loss": 2.3447, "step": 289580 }, { "epoch": 1.1194739527763604, "grad_norm": 0.11813264340162277, "learning_rate": 0.002, "loss": 2.352, "step": 289590 }, { "epoch": 1.1195126099797437, "grad_norm": 0.11504266411066055, "learning_rate": 0.002, "loss": 2.3664, "step": 289600 }, { "epoch": 1.119551267183127, "grad_norm": 0.10125447064638138, "learning_rate": 0.002, "loss": 2.3428, "step": 289610 }, { "epoch": 1.1195899243865102, "grad_norm": 0.11368642747402191, "learning_rate": 0.002, "loss": 2.3384, "step": 289620 }, { "epoch": 1.1196285815898934, "grad_norm": 0.10616200417280197, "learning_rate": 0.002, "loss": 2.3325, "step": 289630 }, { "epoch": 1.1196672387932767, "grad_norm": 0.08843506127595901, "learning_rate": 0.002, "loss": 2.3143, "step": 289640 }, { "epoch": 1.11970589599666, "grad_norm": 0.12057457864284515, "learning_rate": 0.002, "loss": 2.3311, "step": 289650 }, { "epoch": 1.1197445532000434, "grad_norm": 0.1454624980688095, "learning_rate": 0.002, "loss": 2.331, "step": 289660 }, { "epoch": 1.1197832104034267, "grad_norm": 0.11299949139356613, "learning_rate": 0.002, "loss": 2.3418, "step": 289670 }, { "epoch": 1.11982186760681, "grad_norm": 0.11968392133712769, "learning_rate": 0.002, "loss": 2.3333, "step": 289680 }, { "epoch": 1.1198605248101932, "grad_norm": 0.12136146426200867, "learning_rate": 0.002, "loss": 2.345, "step": 289690 }, { "epoch": 1.1198991820135764, "grad_norm": 0.09192265570163727, "learning_rate": 0.002, "loss": 2.3418, "step": 289700 }, { "epoch": 1.1199378392169597, "grad_norm": 0.12679505348205566, "learning_rate": 0.002, "loss": 2.3405, "step": 289710 }, { "epoch": 1.119976496420343, "grad_norm": 0.1034976989030838, "learning_rate": 0.002, "loss": 2.3408, "step": 289720 }, { "epoch": 1.1200151536237262, "grad_norm": 0.08856305480003357, "learning_rate": 0.002, "loss": 2.3336, "step": 289730 }, { "epoch": 1.1200538108271094, "grad_norm": 0.10530061274766922, "learning_rate": 0.002, "loss": 2.3244, "step": 289740 }, { "epoch": 1.120092468030493, "grad_norm": 0.09523380547761917, "learning_rate": 0.002, "loss": 2.3372, "step": 289750 }, { "epoch": 1.1201311252338761, "grad_norm": 0.13746485114097595, "learning_rate": 0.002, "loss": 2.3377, "step": 289760 }, { "epoch": 1.1201697824372594, "grad_norm": 0.09969547390937805, "learning_rate": 0.002, "loss": 2.349, "step": 289770 }, { "epoch": 1.1202084396406427, "grad_norm": 0.09542639553546906, "learning_rate": 0.002, "loss": 2.3277, "step": 289780 }, { "epoch": 1.120247096844026, "grad_norm": 0.13123148679733276, "learning_rate": 0.002, "loss": 2.3269, "step": 289790 }, { "epoch": 1.1202857540474092, "grad_norm": 0.09224528819322586, "learning_rate": 0.002, "loss": 2.3386, "step": 289800 }, { "epoch": 1.1203244112507924, "grad_norm": 0.10775623470544815, "learning_rate": 0.002, "loss": 2.3293, "step": 289810 }, { "epoch": 1.1203630684541757, "grad_norm": 0.11774013191461563, "learning_rate": 0.002, "loss": 2.3379, "step": 289820 }, { "epoch": 1.1204017256575591, "grad_norm": 0.16113704442977905, "learning_rate": 0.002, "loss": 2.3448, "step": 289830 }, { "epoch": 1.1204403828609424, "grad_norm": 0.08824397623538971, "learning_rate": 0.002, "loss": 2.3129, "step": 289840 }, { "epoch": 1.1204790400643256, "grad_norm": 0.09775109589099884, "learning_rate": 0.002, "loss": 2.343, "step": 289850 }, { "epoch": 1.120517697267709, "grad_norm": 0.12996326386928558, "learning_rate": 0.002, "loss": 2.3408, "step": 289860 }, { "epoch": 1.1205563544710921, "grad_norm": 0.0891638770699501, "learning_rate": 0.002, "loss": 2.3392, "step": 289870 }, { "epoch": 1.1205950116744754, "grad_norm": 0.10942413657903671, "learning_rate": 0.002, "loss": 2.3435, "step": 289880 }, { "epoch": 1.1206336688778586, "grad_norm": 0.14186042547225952, "learning_rate": 0.002, "loss": 2.3303, "step": 289890 }, { "epoch": 1.120672326081242, "grad_norm": 0.09692166745662689, "learning_rate": 0.002, "loss": 2.3379, "step": 289900 }, { "epoch": 1.1207109832846252, "grad_norm": 0.1097760796546936, "learning_rate": 0.002, "loss": 2.3524, "step": 289910 }, { "epoch": 1.1207496404880086, "grad_norm": 0.1349506974220276, "learning_rate": 0.002, "loss": 2.3325, "step": 289920 }, { "epoch": 1.1207882976913919, "grad_norm": 0.10145654529333115, "learning_rate": 0.002, "loss": 2.3366, "step": 289930 }, { "epoch": 1.1208269548947751, "grad_norm": 0.10704918950796127, "learning_rate": 0.002, "loss": 2.3431, "step": 289940 }, { "epoch": 1.1208656120981584, "grad_norm": 0.10984811186790466, "learning_rate": 0.002, "loss": 2.3329, "step": 289950 }, { "epoch": 1.1209042693015416, "grad_norm": 0.09716042876243591, "learning_rate": 0.002, "loss": 2.3292, "step": 289960 }, { "epoch": 1.1209429265049249, "grad_norm": 0.10437486320734024, "learning_rate": 0.002, "loss": 2.3491, "step": 289970 }, { "epoch": 1.1209815837083081, "grad_norm": 0.11297482252120972, "learning_rate": 0.002, "loss": 2.3516, "step": 289980 }, { "epoch": 1.1210202409116914, "grad_norm": 0.09970725327730179, "learning_rate": 0.002, "loss": 2.3379, "step": 289990 }, { "epoch": 1.1210588981150749, "grad_norm": 0.09292304515838623, "learning_rate": 0.002, "loss": 2.3323, "step": 290000 }, { "epoch": 1.1210975553184581, "grad_norm": 0.09936968982219696, "learning_rate": 0.002, "loss": 2.3289, "step": 290010 }, { "epoch": 1.1211362125218414, "grad_norm": 0.145578533411026, "learning_rate": 0.002, "loss": 2.327, "step": 290020 }, { "epoch": 1.1211748697252246, "grad_norm": 0.11210444569587708, "learning_rate": 0.002, "loss": 2.3327, "step": 290030 }, { "epoch": 1.1212135269286079, "grad_norm": 0.09582260251045227, "learning_rate": 0.002, "loss": 2.3241, "step": 290040 }, { "epoch": 1.1212521841319911, "grad_norm": 0.10336029529571533, "learning_rate": 0.002, "loss": 2.3407, "step": 290050 }, { "epoch": 1.1212908413353744, "grad_norm": 0.17279626429080963, "learning_rate": 0.002, "loss": 2.3496, "step": 290060 }, { "epoch": 1.1213294985387576, "grad_norm": 0.10698701441287994, "learning_rate": 0.002, "loss": 2.346, "step": 290070 }, { "epoch": 1.1213681557421409, "grad_norm": 0.10927004367113113, "learning_rate": 0.002, "loss": 2.3364, "step": 290080 }, { "epoch": 1.1214068129455244, "grad_norm": 0.09581339359283447, "learning_rate": 0.002, "loss": 2.3373, "step": 290090 }, { "epoch": 1.1214454701489076, "grad_norm": 0.10079756379127502, "learning_rate": 0.002, "loss": 2.3395, "step": 290100 }, { "epoch": 1.1214841273522909, "grad_norm": 0.11049097031354904, "learning_rate": 0.002, "loss": 2.3282, "step": 290110 }, { "epoch": 1.1215227845556741, "grad_norm": 0.09853102266788483, "learning_rate": 0.002, "loss": 2.3309, "step": 290120 }, { "epoch": 1.1215614417590574, "grad_norm": 0.0913471132516861, "learning_rate": 0.002, "loss": 2.3292, "step": 290130 }, { "epoch": 1.1216000989624406, "grad_norm": 0.09522217512130737, "learning_rate": 0.002, "loss": 2.3477, "step": 290140 }, { "epoch": 1.1216387561658239, "grad_norm": 0.1073422059416771, "learning_rate": 0.002, "loss": 2.3368, "step": 290150 }, { "epoch": 1.1216774133692071, "grad_norm": 0.13044096529483795, "learning_rate": 0.002, "loss": 2.343, "step": 290160 }, { "epoch": 1.1217160705725906, "grad_norm": 0.10940760374069214, "learning_rate": 0.002, "loss": 2.3386, "step": 290170 }, { "epoch": 1.1217547277759738, "grad_norm": 0.09089474380016327, "learning_rate": 0.002, "loss": 2.3344, "step": 290180 }, { "epoch": 1.121793384979357, "grad_norm": 0.09686536341905594, "learning_rate": 0.002, "loss": 2.332, "step": 290190 }, { "epoch": 1.1218320421827404, "grad_norm": 0.10884518176317215, "learning_rate": 0.002, "loss": 2.3334, "step": 290200 }, { "epoch": 1.1218706993861236, "grad_norm": 0.1591804176568985, "learning_rate": 0.002, "loss": 2.3443, "step": 290210 }, { "epoch": 1.1219093565895069, "grad_norm": 0.10680185258388519, "learning_rate": 0.002, "loss": 2.3445, "step": 290220 }, { "epoch": 1.12194801379289, "grad_norm": 0.10056709498167038, "learning_rate": 0.002, "loss": 2.3506, "step": 290230 }, { "epoch": 1.1219866709962734, "grad_norm": 0.0994231328368187, "learning_rate": 0.002, "loss": 2.3446, "step": 290240 }, { "epoch": 1.1220253281996566, "grad_norm": 0.09971922636032104, "learning_rate": 0.002, "loss": 2.3268, "step": 290250 }, { "epoch": 1.12206398540304, "grad_norm": 0.10332693159580231, "learning_rate": 0.002, "loss": 2.3245, "step": 290260 }, { "epoch": 1.1221026426064233, "grad_norm": 0.11420253664255142, "learning_rate": 0.002, "loss": 2.3277, "step": 290270 }, { "epoch": 1.1221412998098066, "grad_norm": 0.09633500128984451, "learning_rate": 0.002, "loss": 2.3277, "step": 290280 }, { "epoch": 1.1221799570131898, "grad_norm": 0.0984421819448471, "learning_rate": 0.002, "loss": 2.3344, "step": 290290 }, { "epoch": 1.122218614216573, "grad_norm": 0.11268351227045059, "learning_rate": 0.002, "loss": 2.3597, "step": 290300 }, { "epoch": 1.1222572714199563, "grad_norm": 0.1098770722746849, "learning_rate": 0.002, "loss": 2.327, "step": 290310 }, { "epoch": 1.1222959286233396, "grad_norm": 0.10104798525571823, "learning_rate": 0.002, "loss": 2.3317, "step": 290320 }, { "epoch": 1.1223345858267229, "grad_norm": 0.1057971939444542, "learning_rate": 0.002, "loss": 2.3299, "step": 290330 }, { "epoch": 1.1223732430301063, "grad_norm": 0.09403365850448608, "learning_rate": 0.002, "loss": 2.3494, "step": 290340 }, { "epoch": 1.1224119002334896, "grad_norm": 0.10322824120521545, "learning_rate": 0.002, "loss": 2.3454, "step": 290350 }, { "epoch": 1.1224505574368728, "grad_norm": 0.10056138783693314, "learning_rate": 0.002, "loss": 2.3216, "step": 290360 }, { "epoch": 1.122489214640256, "grad_norm": 0.10949905216693878, "learning_rate": 0.002, "loss": 2.3255, "step": 290370 }, { "epoch": 1.1225278718436393, "grad_norm": 0.10780070722103119, "learning_rate": 0.002, "loss": 2.3412, "step": 290380 }, { "epoch": 1.1225665290470226, "grad_norm": 0.12997755408287048, "learning_rate": 0.002, "loss": 2.341, "step": 290390 }, { "epoch": 1.1226051862504058, "grad_norm": 0.09493321180343628, "learning_rate": 0.002, "loss": 2.3201, "step": 290400 }, { "epoch": 1.122643843453789, "grad_norm": 0.09830449521541595, "learning_rate": 0.002, "loss": 2.3324, "step": 290410 }, { "epoch": 1.1226825006571723, "grad_norm": 0.1011434867978096, "learning_rate": 0.002, "loss": 2.3445, "step": 290420 }, { "epoch": 1.1227211578605558, "grad_norm": 0.09244860708713531, "learning_rate": 0.002, "loss": 2.3517, "step": 290430 }, { "epoch": 1.122759815063939, "grad_norm": 0.09907836467027664, "learning_rate": 0.002, "loss": 2.3263, "step": 290440 }, { "epoch": 1.1227984722673223, "grad_norm": 0.1255566030740738, "learning_rate": 0.002, "loss": 2.3287, "step": 290450 }, { "epoch": 1.1228371294707056, "grad_norm": 0.08803509920835495, "learning_rate": 0.002, "loss": 2.323, "step": 290460 }, { "epoch": 1.1228757866740888, "grad_norm": 0.1252477765083313, "learning_rate": 0.002, "loss": 2.3477, "step": 290470 }, { "epoch": 1.122914443877472, "grad_norm": 0.09476697444915771, "learning_rate": 0.002, "loss": 2.3388, "step": 290480 }, { "epoch": 1.1229531010808553, "grad_norm": 0.11161638051271439, "learning_rate": 0.002, "loss": 2.3575, "step": 290490 }, { "epoch": 1.1229917582842386, "grad_norm": 0.1074063628911972, "learning_rate": 0.002, "loss": 2.3239, "step": 290500 }, { "epoch": 1.123030415487622, "grad_norm": 0.10495628416538239, "learning_rate": 0.002, "loss": 2.3371, "step": 290510 }, { "epoch": 1.1230690726910053, "grad_norm": 0.12283427268266678, "learning_rate": 0.002, "loss": 2.3371, "step": 290520 }, { "epoch": 1.1231077298943886, "grad_norm": 0.11145826429128647, "learning_rate": 0.002, "loss": 2.3482, "step": 290530 }, { "epoch": 1.1231463870977718, "grad_norm": 0.13109144568443298, "learning_rate": 0.002, "loss": 2.3569, "step": 290540 }, { "epoch": 1.123185044301155, "grad_norm": 0.1077161654829979, "learning_rate": 0.002, "loss": 2.331, "step": 290550 }, { "epoch": 1.1232237015045383, "grad_norm": 0.09395164251327515, "learning_rate": 0.002, "loss": 2.3371, "step": 290560 }, { "epoch": 1.1232623587079216, "grad_norm": 0.10779568552970886, "learning_rate": 0.002, "loss": 2.3327, "step": 290570 }, { "epoch": 1.1233010159113048, "grad_norm": 0.10034944862127304, "learning_rate": 0.002, "loss": 2.3418, "step": 290580 }, { "epoch": 1.123339673114688, "grad_norm": 0.09337441623210907, "learning_rate": 0.002, "loss": 2.3442, "step": 290590 }, { "epoch": 1.1233783303180715, "grad_norm": 0.0911400094628334, "learning_rate": 0.002, "loss": 2.3397, "step": 290600 }, { "epoch": 1.1234169875214548, "grad_norm": 0.12469466775655746, "learning_rate": 0.002, "loss": 2.3419, "step": 290610 }, { "epoch": 1.123455644724838, "grad_norm": 0.11057650297880173, "learning_rate": 0.002, "loss": 2.3399, "step": 290620 }, { "epoch": 1.1234943019282213, "grad_norm": 0.09163448214530945, "learning_rate": 0.002, "loss": 2.3334, "step": 290630 }, { "epoch": 1.1235329591316046, "grad_norm": 0.10629388689994812, "learning_rate": 0.002, "loss": 2.3417, "step": 290640 }, { "epoch": 1.1235716163349878, "grad_norm": 0.13407862186431885, "learning_rate": 0.002, "loss": 2.3292, "step": 290650 }, { "epoch": 1.123610273538371, "grad_norm": 0.11412046104669571, "learning_rate": 0.002, "loss": 2.337, "step": 290660 }, { "epoch": 1.1236489307417543, "grad_norm": 0.1208498477935791, "learning_rate": 0.002, "loss": 2.3558, "step": 290670 }, { "epoch": 1.1236875879451378, "grad_norm": 0.1076798141002655, "learning_rate": 0.002, "loss": 2.3378, "step": 290680 }, { "epoch": 1.123726245148521, "grad_norm": 0.12939608097076416, "learning_rate": 0.002, "loss": 2.3426, "step": 290690 }, { "epoch": 1.1237649023519043, "grad_norm": 0.10172143578529358, "learning_rate": 0.002, "loss": 2.3308, "step": 290700 }, { "epoch": 1.1238035595552875, "grad_norm": 0.1000906303524971, "learning_rate": 0.002, "loss": 2.3201, "step": 290710 }, { "epoch": 1.1238422167586708, "grad_norm": 0.09072905033826828, "learning_rate": 0.002, "loss": 2.3424, "step": 290720 }, { "epoch": 1.123880873962054, "grad_norm": 0.10595636069774628, "learning_rate": 0.002, "loss": 2.3421, "step": 290730 }, { "epoch": 1.1239195311654373, "grad_norm": 0.11296257376670837, "learning_rate": 0.002, "loss": 2.3492, "step": 290740 }, { "epoch": 1.1239581883688206, "grad_norm": 0.11922469735145569, "learning_rate": 0.002, "loss": 2.3304, "step": 290750 }, { "epoch": 1.1239968455722038, "grad_norm": 0.10842528939247131, "learning_rate": 0.002, "loss": 2.3276, "step": 290760 }, { "epoch": 1.1240355027755873, "grad_norm": 0.11093436181545258, "learning_rate": 0.002, "loss": 2.346, "step": 290770 }, { "epoch": 1.1240741599789705, "grad_norm": 0.10337400436401367, "learning_rate": 0.002, "loss": 2.3575, "step": 290780 }, { "epoch": 1.1241128171823538, "grad_norm": 0.11542078852653503, "learning_rate": 0.002, "loss": 2.3384, "step": 290790 }, { "epoch": 1.124151474385737, "grad_norm": 0.10995732247829437, "learning_rate": 0.002, "loss": 2.3528, "step": 290800 }, { "epoch": 1.1241901315891203, "grad_norm": 0.09430845826864243, "learning_rate": 0.002, "loss": 2.3249, "step": 290810 }, { "epoch": 1.1242287887925035, "grad_norm": 0.1120004877448082, "learning_rate": 0.002, "loss": 2.3346, "step": 290820 }, { "epoch": 1.1242674459958868, "grad_norm": 0.107681505382061, "learning_rate": 0.002, "loss": 2.3216, "step": 290830 }, { "epoch": 1.1243061031992703, "grad_norm": 0.11644504219293594, "learning_rate": 0.002, "loss": 2.3547, "step": 290840 }, { "epoch": 1.1243447604026535, "grad_norm": 0.101902537047863, "learning_rate": 0.002, "loss": 2.3307, "step": 290850 }, { "epoch": 1.1243834176060368, "grad_norm": 0.10107220709323883, "learning_rate": 0.002, "loss": 2.3319, "step": 290860 }, { "epoch": 1.12442207480942, "grad_norm": 0.11538407951593399, "learning_rate": 0.002, "loss": 2.3437, "step": 290870 }, { "epoch": 1.1244607320128033, "grad_norm": 0.1046702191233635, "learning_rate": 0.002, "loss": 2.3367, "step": 290880 }, { "epoch": 1.1244993892161865, "grad_norm": 0.13055744767189026, "learning_rate": 0.002, "loss": 2.3279, "step": 290890 }, { "epoch": 1.1245380464195698, "grad_norm": 0.11719472706317902, "learning_rate": 0.002, "loss": 2.3439, "step": 290900 }, { "epoch": 1.124576703622953, "grad_norm": 0.10073716193437576, "learning_rate": 0.002, "loss": 2.3368, "step": 290910 }, { "epoch": 1.1246153608263363, "grad_norm": 0.12202101200819016, "learning_rate": 0.002, "loss": 2.3412, "step": 290920 }, { "epoch": 1.1246540180297198, "grad_norm": 0.10007891803979874, "learning_rate": 0.002, "loss": 2.34, "step": 290930 }, { "epoch": 1.124692675233103, "grad_norm": 0.09889797866344452, "learning_rate": 0.002, "loss": 2.3303, "step": 290940 }, { "epoch": 1.1247313324364863, "grad_norm": 0.10457437485456467, "learning_rate": 0.002, "loss": 2.3439, "step": 290950 }, { "epoch": 1.1247699896398695, "grad_norm": 0.10528910905122757, "learning_rate": 0.002, "loss": 2.3257, "step": 290960 }, { "epoch": 1.1248086468432528, "grad_norm": 0.9624165296554565, "learning_rate": 0.002, "loss": 2.3185, "step": 290970 }, { "epoch": 1.124847304046636, "grad_norm": 0.14060012996196747, "learning_rate": 0.002, "loss": 2.3605, "step": 290980 }, { "epoch": 1.1248859612500193, "grad_norm": 0.10160759091377258, "learning_rate": 0.002, "loss": 2.3387, "step": 290990 }, { "epoch": 1.1249246184534025, "grad_norm": 0.09019457548856735, "learning_rate": 0.002, "loss": 2.3371, "step": 291000 }, { "epoch": 1.124963275656786, "grad_norm": 0.09293685853481293, "learning_rate": 0.002, "loss": 2.3453, "step": 291010 }, { "epoch": 1.1250019328601693, "grad_norm": 0.09362807124853134, "learning_rate": 0.002, "loss": 2.3391, "step": 291020 }, { "epoch": 1.1250405900635525, "grad_norm": 0.09911637008190155, "learning_rate": 0.002, "loss": 2.3326, "step": 291030 }, { "epoch": 1.1250792472669358, "grad_norm": 0.09766800701618195, "learning_rate": 0.002, "loss": 2.3401, "step": 291040 }, { "epoch": 1.125117904470319, "grad_norm": 0.09793352335691452, "learning_rate": 0.002, "loss": 2.3236, "step": 291050 }, { "epoch": 1.1251565616737023, "grad_norm": 0.11317580938339233, "learning_rate": 0.002, "loss": 2.3399, "step": 291060 }, { "epoch": 1.1251952188770855, "grad_norm": 0.0960904210805893, "learning_rate": 0.002, "loss": 2.3505, "step": 291070 }, { "epoch": 1.1252338760804688, "grad_norm": 0.1038578525185585, "learning_rate": 0.002, "loss": 2.3384, "step": 291080 }, { "epoch": 1.125272533283852, "grad_norm": 0.10069358348846436, "learning_rate": 0.002, "loss": 2.3435, "step": 291090 }, { "epoch": 1.1253111904872353, "grad_norm": 0.10367664694786072, "learning_rate": 0.002, "loss": 2.3176, "step": 291100 }, { "epoch": 1.1253498476906187, "grad_norm": 0.1242256611585617, "learning_rate": 0.002, "loss": 2.3369, "step": 291110 }, { "epoch": 1.125388504894002, "grad_norm": 0.09930729866027832, "learning_rate": 0.002, "loss": 2.3337, "step": 291120 }, { "epoch": 1.1254271620973852, "grad_norm": 0.12044630199670792, "learning_rate": 0.002, "loss": 2.3326, "step": 291130 }, { "epoch": 1.1254658193007685, "grad_norm": 0.12741614878177643, "learning_rate": 0.002, "loss": 2.3102, "step": 291140 }, { "epoch": 1.1255044765041518, "grad_norm": 0.0974070355296135, "learning_rate": 0.002, "loss": 2.3409, "step": 291150 }, { "epoch": 1.125543133707535, "grad_norm": 0.1045672744512558, "learning_rate": 0.002, "loss": 2.3477, "step": 291160 }, { "epoch": 1.1255817909109183, "grad_norm": 0.11492281407117844, "learning_rate": 0.002, "loss": 2.3421, "step": 291170 }, { "epoch": 1.1256204481143017, "grad_norm": 0.09922520816326141, "learning_rate": 0.002, "loss": 2.36, "step": 291180 }, { "epoch": 1.125659105317685, "grad_norm": 0.09569668769836426, "learning_rate": 0.002, "loss": 2.3393, "step": 291190 }, { "epoch": 1.1256977625210682, "grad_norm": 0.10939368605613708, "learning_rate": 0.002, "loss": 2.3407, "step": 291200 }, { "epoch": 1.1257364197244515, "grad_norm": 0.1325932741165161, "learning_rate": 0.002, "loss": 2.339, "step": 291210 }, { "epoch": 1.1257750769278347, "grad_norm": 0.0986105427145958, "learning_rate": 0.002, "loss": 2.3372, "step": 291220 }, { "epoch": 1.125813734131218, "grad_norm": 0.08473266661167145, "learning_rate": 0.002, "loss": 2.3328, "step": 291230 }, { "epoch": 1.1258523913346012, "grad_norm": 0.12451715022325516, "learning_rate": 0.002, "loss": 2.3551, "step": 291240 }, { "epoch": 1.1258910485379845, "grad_norm": 0.10106877237558365, "learning_rate": 0.002, "loss": 2.3482, "step": 291250 }, { "epoch": 1.1259297057413677, "grad_norm": 0.112881600856781, "learning_rate": 0.002, "loss": 2.3388, "step": 291260 }, { "epoch": 1.125968362944751, "grad_norm": 0.10713592171669006, "learning_rate": 0.002, "loss": 2.3422, "step": 291270 }, { "epoch": 1.1260070201481345, "grad_norm": 0.11898888647556305, "learning_rate": 0.002, "loss": 2.3485, "step": 291280 }, { "epoch": 1.1260456773515177, "grad_norm": 0.11185794323682785, "learning_rate": 0.002, "loss": 2.3528, "step": 291290 }, { "epoch": 1.126084334554901, "grad_norm": 0.10397381335496902, "learning_rate": 0.002, "loss": 2.339, "step": 291300 }, { "epoch": 1.1261229917582842, "grad_norm": 0.1448792964220047, "learning_rate": 0.002, "loss": 2.3387, "step": 291310 }, { "epoch": 1.1261616489616675, "grad_norm": 0.10253781825304031, "learning_rate": 0.002, "loss": 2.3411, "step": 291320 }, { "epoch": 1.1262003061650507, "grad_norm": 0.1232328712940216, "learning_rate": 0.002, "loss": 2.32, "step": 291330 }, { "epoch": 1.126238963368434, "grad_norm": 0.10629157721996307, "learning_rate": 0.002, "loss": 2.3279, "step": 291340 }, { "epoch": 1.1262776205718175, "grad_norm": 0.09541931748390198, "learning_rate": 0.002, "loss": 2.3466, "step": 291350 }, { "epoch": 1.1263162777752007, "grad_norm": 0.1253042221069336, "learning_rate": 0.002, "loss": 2.3327, "step": 291360 }, { "epoch": 1.126354934978584, "grad_norm": 0.10665005445480347, "learning_rate": 0.002, "loss": 2.3221, "step": 291370 }, { "epoch": 1.1263935921819672, "grad_norm": 0.11243661493062973, "learning_rate": 0.002, "loss": 2.3358, "step": 291380 }, { "epoch": 1.1264322493853505, "grad_norm": 0.11163606494665146, "learning_rate": 0.002, "loss": 2.3243, "step": 291390 }, { "epoch": 1.1264709065887337, "grad_norm": 0.13230501115322113, "learning_rate": 0.002, "loss": 2.3381, "step": 291400 }, { "epoch": 1.126509563792117, "grad_norm": 0.09509193152189255, "learning_rate": 0.002, "loss": 2.3432, "step": 291410 }, { "epoch": 1.1265482209955002, "grad_norm": 0.08772409707307816, "learning_rate": 0.002, "loss": 2.342, "step": 291420 }, { "epoch": 1.1265868781988835, "grad_norm": 0.11761298775672913, "learning_rate": 0.002, "loss": 2.3491, "step": 291430 }, { "epoch": 1.126625535402267, "grad_norm": 0.11829674988985062, "learning_rate": 0.002, "loss": 2.3439, "step": 291440 }, { "epoch": 1.1266641926056502, "grad_norm": 0.11085585504770279, "learning_rate": 0.002, "loss": 2.3354, "step": 291450 }, { "epoch": 1.1267028498090335, "grad_norm": 0.11346784234046936, "learning_rate": 0.002, "loss": 2.3312, "step": 291460 }, { "epoch": 1.1267415070124167, "grad_norm": 0.09546907246112823, "learning_rate": 0.002, "loss": 2.3359, "step": 291470 }, { "epoch": 1.1267801642158, "grad_norm": 0.09734796732664108, "learning_rate": 0.002, "loss": 2.3411, "step": 291480 }, { "epoch": 1.1268188214191832, "grad_norm": 0.11880217492580414, "learning_rate": 0.002, "loss": 2.3309, "step": 291490 }, { "epoch": 1.1268574786225665, "grad_norm": 0.09316064417362213, "learning_rate": 0.002, "loss": 2.3446, "step": 291500 }, { "epoch": 1.1268961358259497, "grad_norm": 0.1146712377667427, "learning_rate": 0.002, "loss": 2.3324, "step": 291510 }, { "epoch": 1.1269347930293332, "grad_norm": 0.10328290611505508, "learning_rate": 0.002, "loss": 2.3385, "step": 291520 }, { "epoch": 1.1269734502327164, "grad_norm": 0.10888945311307907, "learning_rate": 0.002, "loss": 2.3482, "step": 291530 }, { "epoch": 1.1270121074360997, "grad_norm": 0.10232578217983246, "learning_rate": 0.002, "loss": 2.3445, "step": 291540 }, { "epoch": 1.127050764639483, "grad_norm": 0.1258421689271927, "learning_rate": 0.002, "loss": 2.3254, "step": 291550 }, { "epoch": 1.1270894218428662, "grad_norm": 0.11606115847826004, "learning_rate": 0.002, "loss": 2.3404, "step": 291560 }, { "epoch": 1.1271280790462495, "grad_norm": 0.10178054124116898, "learning_rate": 0.002, "loss": 2.3395, "step": 291570 }, { "epoch": 1.1271667362496327, "grad_norm": 0.11443013697862625, "learning_rate": 0.002, "loss": 2.3438, "step": 291580 }, { "epoch": 1.127205393453016, "grad_norm": 0.10536891967058182, "learning_rate": 0.002, "loss": 2.3224, "step": 291590 }, { "epoch": 1.1272440506563992, "grad_norm": 0.10703325271606445, "learning_rate": 0.002, "loss": 2.3339, "step": 291600 }, { "epoch": 1.1272827078597827, "grad_norm": 0.10174395889043808, "learning_rate": 0.002, "loss": 2.3137, "step": 291610 }, { "epoch": 1.127321365063166, "grad_norm": 0.09756621718406677, "learning_rate": 0.002, "loss": 2.3427, "step": 291620 }, { "epoch": 1.1273600222665492, "grad_norm": 0.1081177219748497, "learning_rate": 0.002, "loss": 2.3404, "step": 291630 }, { "epoch": 1.1273986794699324, "grad_norm": 0.09706825017929077, "learning_rate": 0.002, "loss": 2.3335, "step": 291640 }, { "epoch": 1.1274373366733157, "grad_norm": 0.09932103008031845, "learning_rate": 0.002, "loss": 2.3439, "step": 291650 }, { "epoch": 1.127475993876699, "grad_norm": 0.11982505768537521, "learning_rate": 0.002, "loss": 2.3472, "step": 291660 }, { "epoch": 1.1275146510800822, "grad_norm": 0.10125798732042313, "learning_rate": 0.002, "loss": 2.3285, "step": 291670 }, { "epoch": 1.1275533082834654, "grad_norm": 0.10993444174528122, "learning_rate": 0.002, "loss": 2.3312, "step": 291680 }, { "epoch": 1.127591965486849, "grad_norm": 0.10214158892631531, "learning_rate": 0.002, "loss": 2.3329, "step": 291690 }, { "epoch": 1.1276306226902322, "grad_norm": 0.09644022583961487, "learning_rate": 0.002, "loss": 2.3408, "step": 291700 }, { "epoch": 1.1276692798936154, "grad_norm": 0.09081563353538513, "learning_rate": 0.002, "loss": 2.3189, "step": 291710 }, { "epoch": 1.1277079370969987, "grad_norm": 0.09693627059459686, "learning_rate": 0.002, "loss": 2.3357, "step": 291720 }, { "epoch": 1.127746594300382, "grad_norm": 0.11326735466718674, "learning_rate": 0.002, "loss": 2.3365, "step": 291730 }, { "epoch": 1.1277852515037652, "grad_norm": 0.09546099603176117, "learning_rate": 0.002, "loss": 2.342, "step": 291740 }, { "epoch": 1.1278239087071484, "grad_norm": 0.11614954471588135, "learning_rate": 0.002, "loss": 2.3243, "step": 291750 }, { "epoch": 1.1278625659105317, "grad_norm": 0.11270710825920105, "learning_rate": 0.002, "loss": 2.3305, "step": 291760 }, { "epoch": 1.127901223113915, "grad_norm": 0.10175687819719315, "learning_rate": 0.002, "loss": 2.3365, "step": 291770 }, { "epoch": 1.1279398803172984, "grad_norm": 0.10057220607995987, "learning_rate": 0.002, "loss": 2.3286, "step": 291780 }, { "epoch": 1.1279785375206817, "grad_norm": 0.14043523371219635, "learning_rate": 0.002, "loss": 2.3388, "step": 291790 }, { "epoch": 1.128017194724065, "grad_norm": 0.11015049368143082, "learning_rate": 0.002, "loss": 2.3405, "step": 291800 }, { "epoch": 1.1280558519274482, "grad_norm": 0.10724800825119019, "learning_rate": 0.002, "loss": 2.341, "step": 291810 }, { "epoch": 1.1280945091308314, "grad_norm": 0.489633709192276, "learning_rate": 0.002, "loss": 2.3351, "step": 291820 }, { "epoch": 1.1281331663342147, "grad_norm": 0.10834350436925888, "learning_rate": 0.002, "loss": 2.3231, "step": 291830 }, { "epoch": 1.128171823537598, "grad_norm": 0.09968345612287521, "learning_rate": 0.002, "loss": 2.324, "step": 291840 }, { "epoch": 1.1282104807409812, "grad_norm": 0.10822063684463501, "learning_rate": 0.002, "loss": 2.3326, "step": 291850 }, { "epoch": 1.1282491379443647, "grad_norm": 0.10708516091108322, "learning_rate": 0.002, "loss": 2.3288, "step": 291860 }, { "epoch": 1.128287795147748, "grad_norm": 0.09746289253234863, "learning_rate": 0.002, "loss": 2.328, "step": 291870 }, { "epoch": 1.1283264523511312, "grad_norm": 0.10932601988315582, "learning_rate": 0.002, "loss": 2.3167, "step": 291880 }, { "epoch": 1.1283651095545144, "grad_norm": 1.7511025667190552, "learning_rate": 0.002, "loss": 2.3578, "step": 291890 }, { "epoch": 1.1284037667578977, "grad_norm": 0.10689739137887955, "learning_rate": 0.002, "loss": 2.3562, "step": 291900 }, { "epoch": 1.128442423961281, "grad_norm": 0.09519015997648239, "learning_rate": 0.002, "loss": 2.3274, "step": 291910 }, { "epoch": 1.1284810811646642, "grad_norm": 0.10214191675186157, "learning_rate": 0.002, "loss": 2.3378, "step": 291920 }, { "epoch": 1.1285197383680474, "grad_norm": 0.09518104791641235, "learning_rate": 0.002, "loss": 2.308, "step": 291930 }, { "epoch": 1.1285583955714307, "grad_norm": 0.10087645053863525, "learning_rate": 0.002, "loss": 2.3412, "step": 291940 }, { "epoch": 1.1285970527748141, "grad_norm": 0.12696702778339386, "learning_rate": 0.002, "loss": 2.3357, "step": 291950 }, { "epoch": 1.1286357099781974, "grad_norm": 0.09882255643606186, "learning_rate": 0.002, "loss": 2.3399, "step": 291960 }, { "epoch": 1.1286743671815807, "grad_norm": 0.11024424433708191, "learning_rate": 0.002, "loss": 2.3437, "step": 291970 }, { "epoch": 1.128713024384964, "grad_norm": 0.09846694767475128, "learning_rate": 0.002, "loss": 2.3387, "step": 291980 }, { "epoch": 1.1287516815883472, "grad_norm": 0.10065692663192749, "learning_rate": 0.002, "loss": 2.3329, "step": 291990 }, { "epoch": 1.1287903387917304, "grad_norm": 0.09468679875135422, "learning_rate": 0.002, "loss": 2.3258, "step": 292000 }, { "epoch": 1.1288289959951137, "grad_norm": 0.1300666779279709, "learning_rate": 0.002, "loss": 2.33, "step": 292010 }, { "epoch": 1.128867653198497, "grad_norm": 0.10579036921262741, "learning_rate": 0.002, "loss": 2.3372, "step": 292020 }, { "epoch": 1.1289063104018804, "grad_norm": 0.10410472005605698, "learning_rate": 0.002, "loss": 2.3349, "step": 292030 }, { "epoch": 1.1289449676052636, "grad_norm": 0.11222629994153976, "learning_rate": 0.002, "loss": 2.3486, "step": 292040 }, { "epoch": 1.128983624808647, "grad_norm": 0.09962747246026993, "learning_rate": 0.002, "loss": 2.346, "step": 292050 }, { "epoch": 1.1290222820120301, "grad_norm": 0.11796198785305023, "learning_rate": 0.002, "loss": 2.3409, "step": 292060 }, { "epoch": 1.1290609392154134, "grad_norm": 0.10702431201934814, "learning_rate": 0.002, "loss": 2.3407, "step": 292070 }, { "epoch": 1.1290995964187966, "grad_norm": 0.08998136967420578, "learning_rate": 0.002, "loss": 2.3417, "step": 292080 }, { "epoch": 1.12913825362218, "grad_norm": 0.09148484468460083, "learning_rate": 0.002, "loss": 2.3424, "step": 292090 }, { "epoch": 1.1291769108255632, "grad_norm": 0.09372007101774216, "learning_rate": 0.002, "loss": 2.3416, "step": 292100 }, { "epoch": 1.1292155680289464, "grad_norm": 0.11189445853233337, "learning_rate": 0.002, "loss": 2.3438, "step": 292110 }, { "epoch": 1.1292542252323299, "grad_norm": 0.09926964342594147, "learning_rate": 0.002, "loss": 2.3199, "step": 292120 }, { "epoch": 1.1292928824357131, "grad_norm": 0.08940834552049637, "learning_rate": 0.002, "loss": 2.3402, "step": 292130 }, { "epoch": 1.1293315396390964, "grad_norm": 0.12416701018810272, "learning_rate": 0.002, "loss": 2.3444, "step": 292140 }, { "epoch": 1.1293701968424796, "grad_norm": 0.10040237009525299, "learning_rate": 0.002, "loss": 2.3442, "step": 292150 }, { "epoch": 1.1294088540458629, "grad_norm": 0.11909114569425583, "learning_rate": 0.002, "loss": 2.3289, "step": 292160 }, { "epoch": 1.1294475112492461, "grad_norm": 0.30783000588417053, "learning_rate": 0.002, "loss": 2.3242, "step": 292170 }, { "epoch": 1.1294861684526294, "grad_norm": 0.1081785336136818, "learning_rate": 0.002, "loss": 2.3473, "step": 292180 }, { "epoch": 1.1295248256560129, "grad_norm": 0.10198457539081573, "learning_rate": 0.002, "loss": 2.3452, "step": 292190 }, { "epoch": 1.1295634828593961, "grad_norm": 0.09565374255180359, "learning_rate": 0.002, "loss": 2.3272, "step": 292200 }, { "epoch": 1.1296021400627794, "grad_norm": 0.09713221341371536, "learning_rate": 0.002, "loss": 2.336, "step": 292210 }, { "epoch": 1.1296407972661626, "grad_norm": 0.09174959361553192, "learning_rate": 0.002, "loss": 2.3342, "step": 292220 }, { "epoch": 1.1296794544695459, "grad_norm": 0.11424946784973145, "learning_rate": 0.002, "loss": 2.339, "step": 292230 }, { "epoch": 1.1297181116729291, "grad_norm": 0.10965334624052048, "learning_rate": 0.002, "loss": 2.3206, "step": 292240 }, { "epoch": 1.1297567688763124, "grad_norm": 0.10149752348661423, "learning_rate": 0.002, "loss": 2.3265, "step": 292250 }, { "epoch": 1.1297954260796956, "grad_norm": 0.13889697194099426, "learning_rate": 0.002, "loss": 2.3295, "step": 292260 }, { "epoch": 1.1298340832830789, "grad_norm": 0.10565648227930069, "learning_rate": 0.002, "loss": 2.3465, "step": 292270 }, { "epoch": 1.1298727404864621, "grad_norm": 0.10034672915935516, "learning_rate": 0.002, "loss": 2.336, "step": 292280 }, { "epoch": 1.1299113976898456, "grad_norm": 0.10778245329856873, "learning_rate": 0.002, "loss": 2.3372, "step": 292290 }, { "epoch": 1.1299500548932289, "grad_norm": 0.11974408477544785, "learning_rate": 0.002, "loss": 2.3318, "step": 292300 }, { "epoch": 1.1299887120966121, "grad_norm": 0.09993696212768555, "learning_rate": 0.002, "loss": 2.3385, "step": 292310 }, { "epoch": 1.1300273692999954, "grad_norm": 0.09732171893119812, "learning_rate": 0.002, "loss": 2.3364, "step": 292320 }, { "epoch": 1.1300660265033786, "grad_norm": 0.11085110902786255, "learning_rate": 0.002, "loss": 2.3497, "step": 292330 }, { "epoch": 1.1301046837067619, "grad_norm": 0.10859154909849167, "learning_rate": 0.002, "loss": 2.3345, "step": 292340 }, { "epoch": 1.1301433409101451, "grad_norm": 0.08565965294837952, "learning_rate": 0.002, "loss": 2.3438, "step": 292350 }, { "epoch": 1.1301819981135286, "grad_norm": 0.10185325145721436, "learning_rate": 0.002, "loss": 2.3437, "step": 292360 }, { "epoch": 1.1302206553169118, "grad_norm": 0.09620902687311172, "learning_rate": 0.002, "loss": 2.329, "step": 292370 }, { "epoch": 1.130259312520295, "grad_norm": 0.10088915377855301, "learning_rate": 0.002, "loss": 2.3313, "step": 292380 }, { "epoch": 1.1302979697236784, "grad_norm": 0.12231367826461792, "learning_rate": 0.002, "loss": 2.3583, "step": 292390 }, { "epoch": 1.1303366269270616, "grad_norm": 0.11737716943025589, "learning_rate": 0.002, "loss": 2.3293, "step": 292400 }, { "epoch": 1.1303752841304449, "grad_norm": 0.12546178698539734, "learning_rate": 0.002, "loss": 2.3313, "step": 292410 }, { "epoch": 1.130413941333828, "grad_norm": 0.09996787458658218, "learning_rate": 0.002, "loss": 2.347, "step": 292420 }, { "epoch": 1.1304525985372114, "grad_norm": 0.10157563537359238, "learning_rate": 0.002, "loss": 2.3291, "step": 292430 }, { "epoch": 1.1304912557405946, "grad_norm": 0.09826963394880295, "learning_rate": 0.002, "loss": 2.3375, "step": 292440 }, { "epoch": 1.1305299129439779, "grad_norm": 0.10715880990028381, "learning_rate": 0.002, "loss": 2.332, "step": 292450 }, { "epoch": 1.1305685701473613, "grad_norm": 0.1121690645813942, "learning_rate": 0.002, "loss": 2.3281, "step": 292460 }, { "epoch": 1.1306072273507446, "grad_norm": 0.10457079857587814, "learning_rate": 0.002, "loss": 2.3329, "step": 292470 }, { "epoch": 1.1306458845541278, "grad_norm": 0.10679171979427338, "learning_rate": 0.002, "loss": 2.3354, "step": 292480 }, { "epoch": 1.130684541757511, "grad_norm": 0.0944804698228836, "learning_rate": 0.002, "loss": 2.3333, "step": 292490 }, { "epoch": 1.1307231989608943, "grad_norm": 0.0977306142449379, "learning_rate": 0.002, "loss": 2.3262, "step": 292500 }, { "epoch": 1.1307618561642776, "grad_norm": 0.10510288178920746, "learning_rate": 0.002, "loss": 2.3407, "step": 292510 }, { "epoch": 1.1308005133676609, "grad_norm": 0.12686295807361603, "learning_rate": 0.002, "loss": 2.3413, "step": 292520 }, { "epoch": 1.1308391705710443, "grad_norm": 0.11069813370704651, "learning_rate": 0.002, "loss": 2.3356, "step": 292530 }, { "epoch": 1.1308778277744276, "grad_norm": 0.11886830627918243, "learning_rate": 0.002, "loss": 2.3288, "step": 292540 }, { "epoch": 1.1309164849778108, "grad_norm": 0.09395457059144974, "learning_rate": 0.002, "loss": 2.3286, "step": 292550 }, { "epoch": 1.130955142181194, "grad_norm": 0.09358373284339905, "learning_rate": 0.002, "loss": 2.3419, "step": 292560 }, { "epoch": 1.1309937993845773, "grad_norm": 0.10663779079914093, "learning_rate": 0.002, "loss": 2.3346, "step": 292570 }, { "epoch": 1.1310324565879606, "grad_norm": 0.10025444626808167, "learning_rate": 0.002, "loss": 2.3436, "step": 292580 }, { "epoch": 1.1310711137913438, "grad_norm": 0.10433932393789291, "learning_rate": 0.002, "loss": 2.3303, "step": 292590 }, { "epoch": 1.131109770994727, "grad_norm": 0.1081257089972496, "learning_rate": 0.002, "loss": 2.3248, "step": 292600 }, { "epoch": 1.1311484281981103, "grad_norm": 0.09624449908733368, "learning_rate": 0.002, "loss": 2.3403, "step": 292610 }, { "epoch": 1.1311870854014936, "grad_norm": 0.09565295279026031, "learning_rate": 0.002, "loss": 2.3324, "step": 292620 }, { "epoch": 1.131225742604877, "grad_norm": 0.11357071995735168, "learning_rate": 0.002, "loss": 2.3368, "step": 292630 }, { "epoch": 1.1312643998082603, "grad_norm": 0.09906624257564545, "learning_rate": 0.002, "loss": 2.3158, "step": 292640 }, { "epoch": 1.1313030570116436, "grad_norm": 0.11691075563430786, "learning_rate": 0.002, "loss": 2.334, "step": 292650 }, { "epoch": 1.1313417142150268, "grad_norm": 0.10988210141658783, "learning_rate": 0.002, "loss": 2.3151, "step": 292660 }, { "epoch": 1.13138037141841, "grad_norm": 0.08801021426916122, "learning_rate": 0.002, "loss": 2.3472, "step": 292670 }, { "epoch": 1.1314190286217933, "grad_norm": 0.11762183904647827, "learning_rate": 0.002, "loss": 2.3442, "step": 292680 }, { "epoch": 1.1314576858251766, "grad_norm": 0.10153577476739883, "learning_rate": 0.002, "loss": 2.3256, "step": 292690 }, { "epoch": 1.13149634302856, "grad_norm": 0.11427845060825348, "learning_rate": 0.002, "loss": 2.3381, "step": 292700 }, { "epoch": 1.1315350002319433, "grad_norm": 0.12625622749328613, "learning_rate": 0.002, "loss": 2.3263, "step": 292710 }, { "epoch": 1.1315736574353266, "grad_norm": 0.10108830034732819, "learning_rate": 0.002, "loss": 2.3418, "step": 292720 }, { "epoch": 1.1316123146387098, "grad_norm": 0.09629391878843307, "learning_rate": 0.002, "loss": 2.346, "step": 292730 }, { "epoch": 1.131650971842093, "grad_norm": 0.09989041835069656, "learning_rate": 0.002, "loss": 2.348, "step": 292740 }, { "epoch": 1.1316896290454763, "grad_norm": 0.10820818692445755, "learning_rate": 0.002, "loss": 2.3271, "step": 292750 }, { "epoch": 1.1317282862488596, "grad_norm": 0.10253668576478958, "learning_rate": 0.002, "loss": 2.3427, "step": 292760 }, { "epoch": 1.1317669434522428, "grad_norm": 0.09603530168533325, "learning_rate": 0.002, "loss": 2.3278, "step": 292770 }, { "epoch": 1.131805600655626, "grad_norm": 0.1039159968495369, "learning_rate": 0.002, "loss": 2.3361, "step": 292780 }, { "epoch": 1.1318442578590093, "grad_norm": 0.10775238275527954, "learning_rate": 0.002, "loss": 2.331, "step": 292790 }, { "epoch": 1.1318829150623928, "grad_norm": 0.09700698405504227, "learning_rate": 0.002, "loss": 2.3302, "step": 292800 }, { "epoch": 1.131921572265776, "grad_norm": 0.09702009707689285, "learning_rate": 0.002, "loss": 2.3305, "step": 292810 }, { "epoch": 1.1319602294691593, "grad_norm": 0.09258618950843811, "learning_rate": 0.002, "loss": 2.3507, "step": 292820 }, { "epoch": 1.1319988866725426, "grad_norm": 0.1123087927699089, "learning_rate": 0.002, "loss": 2.3379, "step": 292830 }, { "epoch": 1.1320375438759258, "grad_norm": 0.0948568657040596, "learning_rate": 0.002, "loss": 2.3302, "step": 292840 }, { "epoch": 1.132076201079309, "grad_norm": 0.1217011883854866, "learning_rate": 0.002, "loss": 2.3265, "step": 292850 }, { "epoch": 1.1321148582826923, "grad_norm": 0.10090646147727966, "learning_rate": 0.002, "loss": 2.3238, "step": 292860 }, { "epoch": 1.1321535154860758, "grad_norm": 0.10404011607170105, "learning_rate": 0.002, "loss": 2.3368, "step": 292870 }, { "epoch": 1.132192172689459, "grad_norm": 0.1336078643798828, "learning_rate": 0.002, "loss": 2.3447, "step": 292880 }, { "epoch": 1.1322308298928423, "grad_norm": 0.12429642677307129, "learning_rate": 0.002, "loss": 2.3432, "step": 292890 }, { "epoch": 1.1322694870962255, "grad_norm": 0.1276293247938156, "learning_rate": 0.002, "loss": 2.3377, "step": 292900 }, { "epoch": 1.1323081442996088, "grad_norm": 0.09542718529701233, "learning_rate": 0.002, "loss": 2.3418, "step": 292910 }, { "epoch": 1.132346801502992, "grad_norm": 0.1164063885807991, "learning_rate": 0.002, "loss": 2.3379, "step": 292920 }, { "epoch": 1.1323854587063753, "grad_norm": 0.10636220127344131, "learning_rate": 0.002, "loss": 2.3444, "step": 292930 }, { "epoch": 1.1324241159097586, "grad_norm": 0.09025134891271591, "learning_rate": 0.002, "loss": 2.3472, "step": 292940 }, { "epoch": 1.1324627731131418, "grad_norm": 0.11995851248502731, "learning_rate": 0.002, "loss": 2.3441, "step": 292950 }, { "epoch": 1.132501430316525, "grad_norm": 0.1027119979262352, "learning_rate": 0.002, "loss": 2.3378, "step": 292960 }, { "epoch": 1.1325400875199085, "grad_norm": 0.10810244828462601, "learning_rate": 0.002, "loss": 2.3355, "step": 292970 }, { "epoch": 1.1325787447232918, "grad_norm": 0.10233033448457718, "learning_rate": 0.002, "loss": 2.342, "step": 292980 }, { "epoch": 1.132617401926675, "grad_norm": 0.10700412839651108, "learning_rate": 0.002, "loss": 2.3393, "step": 292990 }, { "epoch": 1.1326560591300583, "grad_norm": 0.11041572690010071, "learning_rate": 0.002, "loss": 2.3384, "step": 293000 }, { "epoch": 1.1326947163334415, "grad_norm": 0.10478068143129349, "learning_rate": 0.002, "loss": 2.3273, "step": 293010 }, { "epoch": 1.1327333735368248, "grad_norm": 0.10832428187131882, "learning_rate": 0.002, "loss": 2.3424, "step": 293020 }, { "epoch": 1.132772030740208, "grad_norm": 0.12409119307994843, "learning_rate": 0.002, "loss": 2.3242, "step": 293030 }, { "epoch": 1.1328106879435915, "grad_norm": 0.0951986163854599, "learning_rate": 0.002, "loss": 2.3357, "step": 293040 }, { "epoch": 1.1328493451469748, "grad_norm": 0.09725385904312134, "learning_rate": 0.002, "loss": 2.3262, "step": 293050 }, { "epoch": 1.132888002350358, "grad_norm": 0.12577664852142334, "learning_rate": 0.002, "loss": 2.3378, "step": 293060 }, { "epoch": 1.1329266595537413, "grad_norm": 0.12517467141151428, "learning_rate": 0.002, "loss": 2.3436, "step": 293070 }, { "epoch": 1.1329653167571245, "grad_norm": 0.10554498434066772, "learning_rate": 0.002, "loss": 2.3315, "step": 293080 }, { "epoch": 1.1330039739605078, "grad_norm": 0.09695900976657867, "learning_rate": 0.002, "loss": 2.3427, "step": 293090 }, { "epoch": 1.133042631163891, "grad_norm": 0.1009320542216301, "learning_rate": 0.002, "loss": 2.3262, "step": 293100 }, { "epoch": 1.1330812883672743, "grad_norm": 0.09868354350328445, "learning_rate": 0.002, "loss": 2.3376, "step": 293110 }, { "epoch": 1.1331199455706575, "grad_norm": 0.10926561802625656, "learning_rate": 0.002, "loss": 2.3289, "step": 293120 }, { "epoch": 1.1331586027740408, "grad_norm": 0.11301583796739578, "learning_rate": 0.002, "loss": 2.337, "step": 293130 }, { "epoch": 1.1331972599774243, "grad_norm": 0.08914438635110855, "learning_rate": 0.002, "loss": 2.3415, "step": 293140 }, { "epoch": 1.1332359171808075, "grad_norm": 0.10104355961084366, "learning_rate": 0.002, "loss": 2.344, "step": 293150 }, { "epoch": 1.1332745743841908, "grad_norm": 0.10801784694194794, "learning_rate": 0.002, "loss": 2.3373, "step": 293160 }, { "epoch": 1.133313231587574, "grad_norm": 0.11123783141374588, "learning_rate": 0.002, "loss": 2.3263, "step": 293170 }, { "epoch": 1.1333518887909573, "grad_norm": 0.09961264580488205, "learning_rate": 0.002, "loss": 2.331, "step": 293180 }, { "epoch": 1.1333905459943405, "grad_norm": 0.09766477346420288, "learning_rate": 0.002, "loss": 2.3345, "step": 293190 }, { "epoch": 1.1334292031977238, "grad_norm": 0.11179253458976746, "learning_rate": 0.002, "loss": 2.3396, "step": 293200 }, { "epoch": 1.1334678604011073, "grad_norm": 0.14136448502540588, "learning_rate": 0.002, "loss": 2.3263, "step": 293210 }, { "epoch": 1.1335065176044905, "grad_norm": 0.100206658244133, "learning_rate": 0.002, "loss": 2.3224, "step": 293220 }, { "epoch": 1.1335451748078738, "grad_norm": 0.09034978598356247, "learning_rate": 0.002, "loss": 2.3408, "step": 293230 }, { "epoch": 1.133583832011257, "grad_norm": 0.10155314952135086, "learning_rate": 0.002, "loss": 2.3316, "step": 293240 }, { "epoch": 1.1336224892146403, "grad_norm": 0.09789247810840607, "learning_rate": 0.002, "loss": 2.3479, "step": 293250 }, { "epoch": 1.1336611464180235, "grad_norm": 0.12007251381874084, "learning_rate": 0.002, "loss": 2.3364, "step": 293260 }, { "epoch": 1.1336998036214068, "grad_norm": 0.0898607075214386, "learning_rate": 0.002, "loss": 2.3411, "step": 293270 }, { "epoch": 1.13373846082479, "grad_norm": 0.09242238104343414, "learning_rate": 0.002, "loss": 2.3457, "step": 293280 }, { "epoch": 1.1337771180281733, "grad_norm": 0.13049384951591492, "learning_rate": 0.002, "loss": 2.3502, "step": 293290 }, { "epoch": 1.1338157752315567, "grad_norm": 0.10886628925800323, "learning_rate": 0.002, "loss": 2.3223, "step": 293300 }, { "epoch": 1.13385443243494, "grad_norm": 0.10137798637151718, "learning_rate": 0.002, "loss": 2.3215, "step": 293310 }, { "epoch": 1.1338930896383232, "grad_norm": 0.09787803143262863, "learning_rate": 0.002, "loss": 2.3403, "step": 293320 }, { "epoch": 1.1339317468417065, "grad_norm": 0.11174280196428299, "learning_rate": 0.002, "loss": 2.3311, "step": 293330 }, { "epoch": 1.1339704040450898, "grad_norm": 0.11473164707422256, "learning_rate": 0.002, "loss": 2.3341, "step": 293340 }, { "epoch": 1.134009061248473, "grad_norm": 0.10111039876937866, "learning_rate": 0.002, "loss": 2.3211, "step": 293350 }, { "epoch": 1.1340477184518563, "grad_norm": 0.1075422465801239, "learning_rate": 0.002, "loss": 2.3389, "step": 293360 }, { "epoch": 1.1340863756552395, "grad_norm": 0.11484236270189285, "learning_rate": 0.002, "loss": 2.3285, "step": 293370 }, { "epoch": 1.134125032858623, "grad_norm": 0.09201221913099289, "learning_rate": 0.002, "loss": 2.3257, "step": 293380 }, { "epoch": 1.1341636900620062, "grad_norm": 0.12520328164100647, "learning_rate": 0.002, "loss": 2.3353, "step": 293390 }, { "epoch": 1.1342023472653895, "grad_norm": 0.09377523511648178, "learning_rate": 0.002, "loss": 2.3412, "step": 293400 }, { "epoch": 1.1342410044687727, "grad_norm": 0.11587250977754593, "learning_rate": 0.002, "loss": 2.3158, "step": 293410 }, { "epoch": 1.134279661672156, "grad_norm": 0.10029492527246475, "learning_rate": 0.002, "loss": 2.3471, "step": 293420 }, { "epoch": 1.1343183188755392, "grad_norm": 0.09807023406028748, "learning_rate": 0.002, "loss": 2.3485, "step": 293430 }, { "epoch": 1.1343569760789225, "grad_norm": 0.10845301300287247, "learning_rate": 0.002, "loss": 2.3484, "step": 293440 }, { "epoch": 1.1343956332823057, "grad_norm": 0.11254344135522842, "learning_rate": 0.002, "loss": 2.3434, "step": 293450 }, { "epoch": 1.134434290485689, "grad_norm": 0.09739566594362259, "learning_rate": 0.002, "loss": 2.3384, "step": 293460 }, { "epoch": 1.1344729476890725, "grad_norm": 0.11345074325799942, "learning_rate": 0.002, "loss": 2.3327, "step": 293470 }, { "epoch": 1.1345116048924557, "grad_norm": 0.11020570993423462, "learning_rate": 0.002, "loss": 2.3427, "step": 293480 }, { "epoch": 1.134550262095839, "grad_norm": 0.095384381711483, "learning_rate": 0.002, "loss": 2.3311, "step": 293490 }, { "epoch": 1.1345889192992222, "grad_norm": 0.11963685601949692, "learning_rate": 0.002, "loss": 2.3318, "step": 293500 }, { "epoch": 1.1346275765026055, "grad_norm": 0.11871231347322464, "learning_rate": 0.002, "loss": 2.3294, "step": 293510 }, { "epoch": 1.1346662337059887, "grad_norm": 0.10440956801176071, "learning_rate": 0.002, "loss": 2.3373, "step": 293520 }, { "epoch": 1.134704890909372, "grad_norm": 0.12999387085437775, "learning_rate": 0.002, "loss": 2.3427, "step": 293530 }, { "epoch": 1.1347435481127552, "grad_norm": 0.1252586394548416, "learning_rate": 0.002, "loss": 2.3379, "step": 293540 }, { "epoch": 1.1347822053161387, "grad_norm": 0.09814410656690598, "learning_rate": 0.002, "loss": 2.3536, "step": 293550 }, { "epoch": 1.134820862519522, "grad_norm": 0.10317713022232056, "learning_rate": 0.002, "loss": 2.3212, "step": 293560 }, { "epoch": 1.1348595197229052, "grad_norm": 0.09887121617794037, "learning_rate": 0.002, "loss": 2.3392, "step": 293570 }, { "epoch": 1.1348981769262885, "grad_norm": 0.09725435823202133, "learning_rate": 0.002, "loss": 2.3362, "step": 293580 }, { "epoch": 1.1349368341296717, "grad_norm": 0.11813397705554962, "learning_rate": 0.002, "loss": 2.3411, "step": 293590 }, { "epoch": 1.134975491333055, "grad_norm": 0.10130724310874939, "learning_rate": 0.002, "loss": 2.3437, "step": 293600 }, { "epoch": 1.1350141485364382, "grad_norm": 0.10235709697008133, "learning_rate": 0.002, "loss": 2.3342, "step": 293610 }, { "epoch": 1.1350528057398215, "grad_norm": 0.11193295568227768, "learning_rate": 0.002, "loss": 2.3325, "step": 293620 }, { "epoch": 1.1350914629432047, "grad_norm": 0.11110543459653854, "learning_rate": 0.002, "loss": 2.3505, "step": 293630 }, { "epoch": 1.1351301201465882, "grad_norm": 0.0979127287864685, "learning_rate": 0.002, "loss": 2.3324, "step": 293640 }, { "epoch": 1.1351687773499715, "grad_norm": 0.09800735116004944, "learning_rate": 0.002, "loss": 2.3263, "step": 293650 }, { "epoch": 1.1352074345533547, "grad_norm": 0.13615325093269348, "learning_rate": 0.002, "loss": 2.3373, "step": 293660 }, { "epoch": 1.135246091756738, "grad_norm": 0.09840956330299377, "learning_rate": 0.002, "loss": 2.3373, "step": 293670 }, { "epoch": 1.1352847489601212, "grad_norm": 0.09474692493677139, "learning_rate": 0.002, "loss": 2.3424, "step": 293680 }, { "epoch": 1.1353234061635045, "grad_norm": 0.12110727280378342, "learning_rate": 0.002, "loss": 2.3287, "step": 293690 }, { "epoch": 1.1353620633668877, "grad_norm": 0.10657018423080444, "learning_rate": 0.002, "loss": 2.3368, "step": 293700 }, { "epoch": 1.135400720570271, "grad_norm": 0.10020346194505692, "learning_rate": 0.002, "loss": 2.3357, "step": 293710 }, { "epoch": 1.1354393777736544, "grad_norm": 0.09850382059812546, "learning_rate": 0.002, "loss": 2.3411, "step": 293720 }, { "epoch": 1.1354780349770377, "grad_norm": 0.10717561841011047, "learning_rate": 0.002, "loss": 2.3214, "step": 293730 }, { "epoch": 1.135516692180421, "grad_norm": 0.1006290391087532, "learning_rate": 0.002, "loss": 2.324, "step": 293740 }, { "epoch": 1.1355553493838042, "grad_norm": 0.10350465029478073, "learning_rate": 0.002, "loss": 2.3494, "step": 293750 }, { "epoch": 1.1355940065871875, "grad_norm": 0.1130458265542984, "learning_rate": 0.002, "loss": 2.3366, "step": 293760 }, { "epoch": 1.1356326637905707, "grad_norm": 0.11281120777130127, "learning_rate": 0.002, "loss": 2.3237, "step": 293770 }, { "epoch": 1.135671320993954, "grad_norm": 0.10524564981460571, "learning_rate": 0.002, "loss": 2.3313, "step": 293780 }, { "epoch": 1.1357099781973372, "grad_norm": 0.11162056028842926, "learning_rate": 0.002, "loss": 2.349, "step": 293790 }, { "epoch": 1.1357486354007205, "grad_norm": 0.10000617057085037, "learning_rate": 0.002, "loss": 2.3386, "step": 293800 }, { "epoch": 1.135787292604104, "grad_norm": 0.12025784701108932, "learning_rate": 0.002, "loss": 2.3336, "step": 293810 }, { "epoch": 1.1358259498074872, "grad_norm": 0.09196845442056656, "learning_rate": 0.002, "loss": 2.3351, "step": 293820 }, { "epoch": 1.1358646070108704, "grad_norm": 0.1028822734951973, "learning_rate": 0.002, "loss": 2.3381, "step": 293830 }, { "epoch": 1.1359032642142537, "grad_norm": 0.11393461376428604, "learning_rate": 0.002, "loss": 2.3316, "step": 293840 }, { "epoch": 1.135941921417637, "grad_norm": 0.11156242340803146, "learning_rate": 0.002, "loss": 2.3445, "step": 293850 }, { "epoch": 1.1359805786210202, "grad_norm": 0.127973273396492, "learning_rate": 0.002, "loss": 2.3235, "step": 293860 }, { "epoch": 1.1360192358244035, "grad_norm": 0.11327163130044937, "learning_rate": 0.002, "loss": 2.3322, "step": 293870 }, { "epoch": 1.1360578930277867, "grad_norm": 0.0905095785856247, "learning_rate": 0.002, "loss": 2.3308, "step": 293880 }, { "epoch": 1.1360965502311702, "grad_norm": 0.09544403851032257, "learning_rate": 0.002, "loss": 2.3457, "step": 293890 }, { "epoch": 1.1361352074345534, "grad_norm": 0.11277344822883606, "learning_rate": 0.002, "loss": 2.3421, "step": 293900 }, { "epoch": 1.1361738646379367, "grad_norm": 0.12489354610443115, "learning_rate": 0.002, "loss": 2.3396, "step": 293910 }, { "epoch": 1.13621252184132, "grad_norm": 0.11960267275571823, "learning_rate": 0.002, "loss": 2.3425, "step": 293920 }, { "epoch": 1.1362511790447032, "grad_norm": 0.11858020722866058, "learning_rate": 0.002, "loss": 2.3492, "step": 293930 }, { "epoch": 1.1362898362480864, "grad_norm": 0.10492021590471268, "learning_rate": 0.002, "loss": 2.3273, "step": 293940 }, { "epoch": 1.1363284934514697, "grad_norm": 0.11255805194377899, "learning_rate": 0.002, "loss": 2.334, "step": 293950 }, { "epoch": 1.136367150654853, "grad_norm": 0.09281715750694275, "learning_rate": 0.002, "loss": 2.349, "step": 293960 }, { "epoch": 1.1364058078582362, "grad_norm": 0.11357801407575607, "learning_rate": 0.002, "loss": 2.3213, "step": 293970 }, { "epoch": 1.1364444650616197, "grad_norm": 0.1001327857375145, "learning_rate": 0.002, "loss": 2.3303, "step": 293980 }, { "epoch": 1.136483122265003, "grad_norm": 0.09641039371490479, "learning_rate": 0.002, "loss": 2.334, "step": 293990 }, { "epoch": 1.1365217794683862, "grad_norm": 0.13236850500106812, "learning_rate": 0.002, "loss": 2.3323, "step": 294000 }, { "epoch": 1.1365604366717694, "grad_norm": 0.10867318511009216, "learning_rate": 0.002, "loss": 2.3337, "step": 294010 }, { "epoch": 1.1365990938751527, "grad_norm": 0.10816536098718643, "learning_rate": 0.002, "loss": 2.3436, "step": 294020 }, { "epoch": 1.136637751078536, "grad_norm": 0.11130563914775848, "learning_rate": 0.002, "loss": 2.3239, "step": 294030 }, { "epoch": 1.1366764082819192, "grad_norm": 0.11933492124080658, "learning_rate": 0.002, "loss": 2.3344, "step": 294040 }, { "epoch": 1.1367150654853027, "grad_norm": 0.10061101615428925, "learning_rate": 0.002, "loss": 2.355, "step": 294050 }, { "epoch": 1.136753722688686, "grad_norm": 0.11716160923242569, "learning_rate": 0.002, "loss": 2.3374, "step": 294060 }, { "epoch": 1.1367923798920692, "grad_norm": 0.10237450897693634, "learning_rate": 0.002, "loss": 2.3347, "step": 294070 }, { "epoch": 1.1368310370954524, "grad_norm": 0.10022053867578506, "learning_rate": 0.002, "loss": 2.3383, "step": 294080 }, { "epoch": 1.1368696942988357, "grad_norm": 0.10771946609020233, "learning_rate": 0.002, "loss": 2.3408, "step": 294090 }, { "epoch": 1.136908351502219, "grad_norm": 0.11842872202396393, "learning_rate": 0.002, "loss": 2.3475, "step": 294100 }, { "epoch": 1.1369470087056022, "grad_norm": 0.11161211878061295, "learning_rate": 0.002, "loss": 2.3437, "step": 294110 }, { "epoch": 1.1369856659089854, "grad_norm": 0.09512294828891754, "learning_rate": 0.002, "loss": 2.3285, "step": 294120 }, { "epoch": 1.1370243231123687, "grad_norm": 0.0964144691824913, "learning_rate": 0.002, "loss": 2.3379, "step": 294130 }, { "epoch": 1.137062980315752, "grad_norm": 0.09910161048173904, "learning_rate": 0.002, "loss": 2.3382, "step": 294140 }, { "epoch": 1.1371016375191354, "grad_norm": 0.09445329755544662, "learning_rate": 0.002, "loss": 2.3456, "step": 294150 }, { "epoch": 1.1371402947225187, "grad_norm": 0.11625587195158005, "learning_rate": 0.002, "loss": 2.3424, "step": 294160 }, { "epoch": 1.137178951925902, "grad_norm": 0.1302066296339035, "learning_rate": 0.002, "loss": 2.3287, "step": 294170 }, { "epoch": 1.1372176091292852, "grad_norm": 0.11113950610160828, "learning_rate": 0.002, "loss": 2.3216, "step": 294180 }, { "epoch": 1.1372562663326684, "grad_norm": 0.09936251491308212, "learning_rate": 0.002, "loss": 2.3331, "step": 294190 }, { "epoch": 1.1372949235360517, "grad_norm": 0.09935726970434189, "learning_rate": 0.002, "loss": 2.3294, "step": 294200 }, { "epoch": 1.137333580739435, "grad_norm": 0.10621356219053268, "learning_rate": 0.002, "loss": 2.3372, "step": 294210 }, { "epoch": 1.1373722379428184, "grad_norm": 0.11173443496227264, "learning_rate": 0.002, "loss": 2.3541, "step": 294220 }, { "epoch": 1.1374108951462016, "grad_norm": 0.10126882791519165, "learning_rate": 0.002, "loss": 2.3308, "step": 294230 }, { "epoch": 1.137449552349585, "grad_norm": 0.10050792247056961, "learning_rate": 0.002, "loss": 2.3548, "step": 294240 }, { "epoch": 1.1374882095529681, "grad_norm": 0.10208075493574142, "learning_rate": 0.002, "loss": 2.3523, "step": 294250 }, { "epoch": 1.1375268667563514, "grad_norm": 0.11423249542713165, "learning_rate": 0.002, "loss": 2.3308, "step": 294260 }, { "epoch": 1.1375655239597346, "grad_norm": 0.0945185050368309, "learning_rate": 0.002, "loss": 2.3419, "step": 294270 }, { "epoch": 1.137604181163118, "grad_norm": 0.09072756767272949, "learning_rate": 0.002, "loss": 2.3241, "step": 294280 }, { "epoch": 1.1376428383665012, "grad_norm": 0.10278897732496262, "learning_rate": 0.002, "loss": 2.3507, "step": 294290 }, { "epoch": 1.1376814955698844, "grad_norm": 0.12045719474554062, "learning_rate": 0.002, "loss": 2.3318, "step": 294300 }, { "epoch": 1.1377201527732677, "grad_norm": 0.1090647280216217, "learning_rate": 0.002, "loss": 2.3403, "step": 294310 }, { "epoch": 1.1377588099766511, "grad_norm": 0.0932658389210701, "learning_rate": 0.002, "loss": 2.3366, "step": 294320 }, { "epoch": 1.1377974671800344, "grad_norm": 0.11652059853076935, "learning_rate": 0.002, "loss": 2.3297, "step": 294330 }, { "epoch": 1.1378361243834176, "grad_norm": 0.13362693786621094, "learning_rate": 0.002, "loss": 2.3386, "step": 294340 }, { "epoch": 1.1378747815868009, "grad_norm": 0.09870358556509018, "learning_rate": 0.002, "loss": 2.336, "step": 294350 }, { "epoch": 1.1379134387901841, "grad_norm": 0.14037330448627472, "learning_rate": 0.002, "loss": 2.3285, "step": 294360 }, { "epoch": 1.1379520959935674, "grad_norm": 0.10037653893232346, "learning_rate": 0.002, "loss": 2.3332, "step": 294370 }, { "epoch": 1.1379907531969506, "grad_norm": 0.0925574079155922, "learning_rate": 0.002, "loss": 2.3346, "step": 294380 }, { "epoch": 1.1380294104003341, "grad_norm": 0.09062647819519043, "learning_rate": 0.002, "loss": 2.3348, "step": 294390 }, { "epoch": 1.1380680676037174, "grad_norm": 0.09871121495962143, "learning_rate": 0.002, "loss": 2.3265, "step": 294400 }, { "epoch": 1.1381067248071006, "grad_norm": 0.10257646441459656, "learning_rate": 0.002, "loss": 2.3372, "step": 294410 }, { "epoch": 1.1381453820104839, "grad_norm": 0.12807287275791168, "learning_rate": 0.002, "loss": 2.3461, "step": 294420 }, { "epoch": 1.1381840392138671, "grad_norm": 0.14499905705451965, "learning_rate": 0.002, "loss": 2.3356, "step": 294430 }, { "epoch": 1.1382226964172504, "grad_norm": 0.10224590450525284, "learning_rate": 0.002, "loss": 2.3365, "step": 294440 }, { "epoch": 1.1382613536206336, "grad_norm": 0.08267837762832642, "learning_rate": 0.002, "loss": 2.3431, "step": 294450 }, { "epoch": 1.1383000108240169, "grad_norm": 0.10924314707517624, "learning_rate": 0.002, "loss": 2.3215, "step": 294460 }, { "epoch": 1.1383386680274001, "grad_norm": 0.10160164535045624, "learning_rate": 0.002, "loss": 2.32, "step": 294470 }, { "epoch": 1.1383773252307834, "grad_norm": 0.10632597655057907, "learning_rate": 0.002, "loss": 2.341, "step": 294480 }, { "epoch": 1.1384159824341669, "grad_norm": 0.13016681373119354, "learning_rate": 0.002, "loss": 2.3238, "step": 294490 }, { "epoch": 1.1384546396375501, "grad_norm": 0.1169414222240448, "learning_rate": 0.002, "loss": 2.3444, "step": 294500 }, { "epoch": 1.1384932968409334, "grad_norm": 0.09101717919111252, "learning_rate": 0.002, "loss": 2.3279, "step": 294510 }, { "epoch": 1.1385319540443166, "grad_norm": 0.09878969192504883, "learning_rate": 0.002, "loss": 2.3464, "step": 294520 }, { "epoch": 1.1385706112476999, "grad_norm": 0.09379914402961731, "learning_rate": 0.002, "loss": 2.3415, "step": 294530 }, { "epoch": 1.1386092684510831, "grad_norm": 0.10676123946905136, "learning_rate": 0.002, "loss": 2.345, "step": 294540 }, { "epoch": 1.1386479256544664, "grad_norm": 0.10265262424945831, "learning_rate": 0.002, "loss": 2.3294, "step": 294550 }, { "epoch": 1.1386865828578498, "grad_norm": 0.11115993559360504, "learning_rate": 0.002, "loss": 2.349, "step": 294560 }, { "epoch": 1.138725240061233, "grad_norm": 0.1104055792093277, "learning_rate": 0.002, "loss": 2.34, "step": 294570 }, { "epoch": 1.1387638972646164, "grad_norm": 0.10094203054904938, "learning_rate": 0.002, "loss": 2.3276, "step": 294580 }, { "epoch": 1.1388025544679996, "grad_norm": 0.09028852730989456, "learning_rate": 0.002, "loss": 2.3426, "step": 294590 }, { "epoch": 1.1388412116713829, "grad_norm": 0.09736517071723938, "learning_rate": 0.002, "loss": 2.3415, "step": 294600 }, { "epoch": 1.138879868874766, "grad_norm": 0.12073744833469391, "learning_rate": 0.002, "loss": 2.315, "step": 294610 }, { "epoch": 1.1389185260781494, "grad_norm": 0.10712353140115738, "learning_rate": 0.002, "loss": 2.3188, "step": 294620 }, { "epoch": 1.1389571832815326, "grad_norm": 0.11008912324905396, "learning_rate": 0.002, "loss": 2.3354, "step": 294630 }, { "epoch": 1.1389958404849159, "grad_norm": 0.0936817154288292, "learning_rate": 0.002, "loss": 2.3381, "step": 294640 }, { "epoch": 1.1390344976882991, "grad_norm": 0.11594562977552414, "learning_rate": 0.002, "loss": 2.3312, "step": 294650 }, { "epoch": 1.1390731548916826, "grad_norm": 0.10609795898199081, "learning_rate": 0.002, "loss": 2.3348, "step": 294660 }, { "epoch": 1.1391118120950658, "grad_norm": 0.12127210199832916, "learning_rate": 0.002, "loss": 2.3362, "step": 294670 }, { "epoch": 1.139150469298449, "grad_norm": 0.08909579366445541, "learning_rate": 0.002, "loss": 2.3406, "step": 294680 }, { "epoch": 1.1391891265018323, "grad_norm": 0.12188173830509186, "learning_rate": 0.002, "loss": 2.3388, "step": 294690 }, { "epoch": 1.1392277837052156, "grad_norm": 0.10340917110443115, "learning_rate": 0.002, "loss": 2.3403, "step": 294700 }, { "epoch": 1.1392664409085989, "grad_norm": 0.10992109775543213, "learning_rate": 0.002, "loss": 2.3379, "step": 294710 }, { "epoch": 1.139305098111982, "grad_norm": 0.0943150594830513, "learning_rate": 0.002, "loss": 2.3258, "step": 294720 }, { "epoch": 1.1393437553153656, "grad_norm": 0.10311193764209747, "learning_rate": 0.002, "loss": 2.3301, "step": 294730 }, { "epoch": 1.1393824125187488, "grad_norm": 0.09406863898038864, "learning_rate": 0.002, "loss": 2.3237, "step": 294740 }, { "epoch": 1.139421069722132, "grad_norm": 0.10221472382545471, "learning_rate": 0.002, "loss": 2.3221, "step": 294750 }, { "epoch": 1.1394597269255153, "grad_norm": 0.13521109521389008, "learning_rate": 0.002, "loss": 2.333, "step": 294760 }, { "epoch": 1.1394983841288986, "grad_norm": 0.09720505774021149, "learning_rate": 0.002, "loss": 2.3388, "step": 294770 }, { "epoch": 1.1395370413322818, "grad_norm": 0.1073853075504303, "learning_rate": 0.002, "loss": 2.3386, "step": 294780 }, { "epoch": 1.139575698535665, "grad_norm": 0.11814963817596436, "learning_rate": 0.002, "loss": 2.3366, "step": 294790 }, { "epoch": 1.1396143557390483, "grad_norm": 0.10372687876224518, "learning_rate": 0.002, "loss": 2.3347, "step": 294800 }, { "epoch": 1.1396530129424316, "grad_norm": 0.11656109243631363, "learning_rate": 0.002, "loss": 2.3445, "step": 294810 }, { "epoch": 1.1396916701458149, "grad_norm": 0.08834312111139297, "learning_rate": 0.002, "loss": 2.3349, "step": 294820 }, { "epoch": 1.1397303273491983, "grad_norm": 0.1092064306139946, "learning_rate": 0.002, "loss": 2.3413, "step": 294830 }, { "epoch": 1.1397689845525816, "grad_norm": 0.10597414523363113, "learning_rate": 0.002, "loss": 2.3396, "step": 294840 }, { "epoch": 1.1398076417559648, "grad_norm": 0.09589572250843048, "learning_rate": 0.002, "loss": 2.322, "step": 294850 }, { "epoch": 1.139846298959348, "grad_norm": 0.12765908241271973, "learning_rate": 0.002, "loss": 2.3463, "step": 294860 }, { "epoch": 1.1398849561627313, "grad_norm": 0.11571625620126724, "learning_rate": 0.002, "loss": 2.3315, "step": 294870 }, { "epoch": 1.1399236133661146, "grad_norm": 0.09366437047719955, "learning_rate": 0.002, "loss": 2.3264, "step": 294880 }, { "epoch": 1.1399622705694978, "grad_norm": 0.08736945688724518, "learning_rate": 0.002, "loss": 2.3407, "step": 294890 }, { "epoch": 1.1400009277728813, "grad_norm": 0.0926312580704689, "learning_rate": 0.002, "loss": 2.3341, "step": 294900 }, { "epoch": 1.1400395849762646, "grad_norm": 0.12359138578176498, "learning_rate": 0.002, "loss": 2.3273, "step": 294910 }, { "epoch": 1.1400782421796478, "grad_norm": 0.09568643569946289, "learning_rate": 0.002, "loss": 2.3315, "step": 294920 }, { "epoch": 1.140116899383031, "grad_norm": 0.09694010764360428, "learning_rate": 0.002, "loss": 2.3397, "step": 294930 }, { "epoch": 1.1401555565864143, "grad_norm": 0.10709650069475174, "learning_rate": 0.002, "loss": 2.3334, "step": 294940 }, { "epoch": 1.1401942137897976, "grad_norm": 0.09399951994419098, "learning_rate": 0.002, "loss": 2.338, "step": 294950 }, { "epoch": 1.1402328709931808, "grad_norm": 0.1101020947098732, "learning_rate": 0.002, "loss": 2.3289, "step": 294960 }, { "epoch": 1.140271528196564, "grad_norm": 0.10791051387786865, "learning_rate": 0.002, "loss": 2.3348, "step": 294970 }, { "epoch": 1.1403101853999473, "grad_norm": 0.09474851936101913, "learning_rate": 0.002, "loss": 2.3402, "step": 294980 }, { "epoch": 1.1403488426033306, "grad_norm": 0.1254984587430954, "learning_rate": 0.002, "loss": 2.3388, "step": 294990 }, { "epoch": 1.140387499806714, "grad_norm": 0.11828399449586868, "learning_rate": 0.002, "loss": 2.3409, "step": 295000 }, { "epoch": 1.1404261570100973, "grad_norm": 0.13409629464149475, "learning_rate": 0.002, "loss": 2.323, "step": 295010 }, { "epoch": 1.1404648142134806, "grad_norm": 0.09929110109806061, "learning_rate": 0.002, "loss": 2.334, "step": 295020 }, { "epoch": 1.1405034714168638, "grad_norm": 0.08490047603845596, "learning_rate": 0.002, "loss": 2.323, "step": 295030 }, { "epoch": 1.140542128620247, "grad_norm": 0.1046157255768776, "learning_rate": 0.002, "loss": 2.3365, "step": 295040 }, { "epoch": 1.1405807858236303, "grad_norm": 0.10370510071516037, "learning_rate": 0.002, "loss": 2.3251, "step": 295050 }, { "epoch": 1.1406194430270136, "grad_norm": 0.09449782967567444, "learning_rate": 0.002, "loss": 2.3433, "step": 295060 }, { "epoch": 1.140658100230397, "grad_norm": 0.10334462672472, "learning_rate": 0.002, "loss": 2.3474, "step": 295070 }, { "epoch": 1.1406967574337803, "grad_norm": 0.11590588837862015, "learning_rate": 0.002, "loss": 2.3438, "step": 295080 }, { "epoch": 1.1407354146371635, "grad_norm": 0.10096020996570587, "learning_rate": 0.002, "loss": 2.3513, "step": 295090 }, { "epoch": 1.1407740718405468, "grad_norm": 0.09558035433292389, "learning_rate": 0.002, "loss": 2.3284, "step": 295100 }, { "epoch": 1.14081272904393, "grad_norm": 0.09858120232820511, "learning_rate": 0.002, "loss": 2.3452, "step": 295110 }, { "epoch": 1.1408513862473133, "grad_norm": 0.09994387626647949, "learning_rate": 0.002, "loss": 2.323, "step": 295120 }, { "epoch": 1.1408900434506966, "grad_norm": 0.08849941194057465, "learning_rate": 0.002, "loss": 2.3442, "step": 295130 }, { "epoch": 1.1409287006540798, "grad_norm": 0.123213991522789, "learning_rate": 0.002, "loss": 2.3365, "step": 295140 }, { "epoch": 1.140967357857463, "grad_norm": 0.11973105370998383, "learning_rate": 0.002, "loss": 2.3287, "step": 295150 }, { "epoch": 1.1410060150608465, "grad_norm": 0.10440967977046967, "learning_rate": 0.002, "loss": 2.3357, "step": 295160 }, { "epoch": 1.1410446722642298, "grad_norm": 0.0943630114197731, "learning_rate": 0.002, "loss": 2.3164, "step": 295170 }, { "epoch": 1.141083329467613, "grad_norm": 0.10222385823726654, "learning_rate": 0.002, "loss": 2.3243, "step": 295180 }, { "epoch": 1.1411219866709963, "grad_norm": 0.12588584423065186, "learning_rate": 0.002, "loss": 2.3409, "step": 295190 }, { "epoch": 1.1411606438743795, "grad_norm": 0.0961066260933876, "learning_rate": 0.002, "loss": 2.3299, "step": 295200 }, { "epoch": 1.1411993010777628, "grad_norm": 0.13759472966194153, "learning_rate": 0.002, "loss": 2.3166, "step": 295210 }, { "epoch": 1.141237958281146, "grad_norm": 0.1053820252418518, "learning_rate": 0.002, "loss": 2.3395, "step": 295220 }, { "epoch": 1.1412766154845293, "grad_norm": 0.11537367105484009, "learning_rate": 0.002, "loss": 2.3352, "step": 295230 }, { "epoch": 1.1413152726879128, "grad_norm": 0.10057997703552246, "learning_rate": 0.002, "loss": 2.34, "step": 295240 }, { "epoch": 1.141353929891296, "grad_norm": 0.13626791536808014, "learning_rate": 0.002, "loss": 2.348, "step": 295250 }, { "epoch": 1.1413925870946793, "grad_norm": 0.10184498876333237, "learning_rate": 0.002, "loss": 2.34, "step": 295260 }, { "epoch": 1.1414312442980625, "grad_norm": 0.11100531369447708, "learning_rate": 0.002, "loss": 2.3547, "step": 295270 }, { "epoch": 1.1414699015014458, "grad_norm": 0.09096559882164001, "learning_rate": 0.002, "loss": 2.3522, "step": 295280 }, { "epoch": 1.141508558704829, "grad_norm": 0.09893319010734558, "learning_rate": 0.002, "loss": 2.3361, "step": 295290 }, { "epoch": 1.1415472159082123, "grad_norm": 0.09062279015779495, "learning_rate": 0.002, "loss": 2.3375, "step": 295300 }, { "epoch": 1.1415858731115955, "grad_norm": 0.10352206230163574, "learning_rate": 0.002, "loss": 2.3392, "step": 295310 }, { "epoch": 1.1416245303149788, "grad_norm": 0.7122738361358643, "learning_rate": 0.002, "loss": 2.3403, "step": 295320 }, { "epoch": 1.1416631875183623, "grad_norm": 0.11961215734481812, "learning_rate": 0.002, "loss": 2.3433, "step": 295330 }, { "epoch": 1.1417018447217455, "grad_norm": 0.15325365960597992, "learning_rate": 0.002, "loss": 2.3499, "step": 295340 }, { "epoch": 1.1417405019251288, "grad_norm": 0.10539396852254868, "learning_rate": 0.002, "loss": 2.3473, "step": 295350 }, { "epoch": 1.141779159128512, "grad_norm": 0.09125971049070358, "learning_rate": 0.002, "loss": 2.3275, "step": 295360 }, { "epoch": 1.1418178163318953, "grad_norm": 0.11987537890672684, "learning_rate": 0.002, "loss": 2.3389, "step": 295370 }, { "epoch": 1.1418564735352785, "grad_norm": 0.11500542610883713, "learning_rate": 0.002, "loss": 2.331, "step": 295380 }, { "epoch": 1.1418951307386618, "grad_norm": 0.15385165810585022, "learning_rate": 0.002, "loss": 2.3348, "step": 295390 }, { "epoch": 1.141933787942045, "grad_norm": 0.09489123523235321, "learning_rate": 0.002, "loss": 2.3281, "step": 295400 }, { "epoch": 1.1419724451454285, "grad_norm": 0.10632327198982239, "learning_rate": 0.002, "loss": 2.3418, "step": 295410 }, { "epoch": 1.1420111023488118, "grad_norm": 0.09892427176237106, "learning_rate": 0.002, "loss": 2.3146, "step": 295420 }, { "epoch": 1.142049759552195, "grad_norm": 0.1019333004951477, "learning_rate": 0.002, "loss": 2.3405, "step": 295430 }, { "epoch": 1.1420884167555783, "grad_norm": 0.10518952459096909, "learning_rate": 0.002, "loss": 2.3308, "step": 295440 }, { "epoch": 1.1421270739589615, "grad_norm": 0.09612604230642319, "learning_rate": 0.002, "loss": 2.338, "step": 295450 }, { "epoch": 1.1421657311623448, "grad_norm": 0.11424669623374939, "learning_rate": 0.002, "loss": 2.3406, "step": 295460 }, { "epoch": 1.142204388365728, "grad_norm": 0.10660576075315475, "learning_rate": 0.002, "loss": 2.3321, "step": 295470 }, { "epoch": 1.1422430455691113, "grad_norm": 0.09305384755134583, "learning_rate": 0.002, "loss": 2.3307, "step": 295480 }, { "epoch": 1.1422817027724945, "grad_norm": 0.12523122131824493, "learning_rate": 0.002, "loss": 2.3254, "step": 295490 }, { "epoch": 1.142320359975878, "grad_norm": 0.10106904804706573, "learning_rate": 0.002, "loss": 2.3372, "step": 295500 }, { "epoch": 1.1423590171792612, "grad_norm": 0.10420478880405426, "learning_rate": 0.002, "loss": 2.3561, "step": 295510 }, { "epoch": 1.1423976743826445, "grad_norm": 0.11749018728733063, "learning_rate": 0.002, "loss": 2.3345, "step": 295520 }, { "epoch": 1.1424363315860278, "grad_norm": 0.10532240569591522, "learning_rate": 0.002, "loss": 2.3402, "step": 295530 }, { "epoch": 1.142474988789411, "grad_norm": 0.10158240050077438, "learning_rate": 0.002, "loss": 2.3417, "step": 295540 }, { "epoch": 1.1425136459927943, "grad_norm": 0.10830393433570862, "learning_rate": 0.002, "loss": 2.3281, "step": 295550 }, { "epoch": 1.1425523031961775, "grad_norm": 0.11087879538536072, "learning_rate": 0.002, "loss": 2.3334, "step": 295560 }, { "epoch": 1.1425909603995608, "grad_norm": 0.1073136031627655, "learning_rate": 0.002, "loss": 2.3342, "step": 295570 }, { "epoch": 1.1426296176029442, "grad_norm": 0.09680242836475372, "learning_rate": 0.002, "loss": 2.3204, "step": 295580 }, { "epoch": 1.1426682748063275, "grad_norm": 0.12555821239948273, "learning_rate": 0.002, "loss": 2.3361, "step": 295590 }, { "epoch": 1.1427069320097107, "grad_norm": 0.11523910611867905, "learning_rate": 0.002, "loss": 2.3456, "step": 295600 }, { "epoch": 1.142745589213094, "grad_norm": 0.1234579086303711, "learning_rate": 0.002, "loss": 2.3289, "step": 295610 }, { "epoch": 1.1427842464164772, "grad_norm": 0.08822614699602127, "learning_rate": 0.002, "loss": 2.3377, "step": 295620 }, { "epoch": 1.1428229036198605, "grad_norm": 0.0989428460597992, "learning_rate": 0.002, "loss": 2.3391, "step": 295630 }, { "epoch": 1.1428615608232437, "grad_norm": 0.09494653344154358, "learning_rate": 0.002, "loss": 2.3381, "step": 295640 }, { "epoch": 1.142900218026627, "grad_norm": 0.09739215672016144, "learning_rate": 0.002, "loss": 2.3351, "step": 295650 }, { "epoch": 1.1429388752300103, "grad_norm": 0.1287994533777237, "learning_rate": 0.002, "loss": 2.3371, "step": 295660 }, { "epoch": 1.1429775324333937, "grad_norm": 0.08983709663152695, "learning_rate": 0.002, "loss": 2.3507, "step": 295670 }, { "epoch": 1.143016189636777, "grad_norm": 0.10030065476894379, "learning_rate": 0.002, "loss": 2.3424, "step": 295680 }, { "epoch": 1.1430548468401602, "grad_norm": 0.10107168555259705, "learning_rate": 0.002, "loss": 2.3422, "step": 295690 }, { "epoch": 1.1430935040435435, "grad_norm": 0.10440707206726074, "learning_rate": 0.002, "loss": 2.3446, "step": 295700 }, { "epoch": 1.1431321612469267, "grad_norm": 0.11294665187597275, "learning_rate": 0.002, "loss": 2.3427, "step": 295710 }, { "epoch": 1.14317081845031, "grad_norm": 0.10706926882266998, "learning_rate": 0.002, "loss": 2.3173, "step": 295720 }, { "epoch": 1.1432094756536932, "grad_norm": 0.17112603783607483, "learning_rate": 0.002, "loss": 2.334, "step": 295730 }, { "epoch": 1.1432481328570765, "grad_norm": 0.10437934100627899, "learning_rate": 0.002, "loss": 2.3179, "step": 295740 }, { "epoch": 1.14328679006046, "grad_norm": 0.10511988401412964, "learning_rate": 0.002, "loss": 2.3254, "step": 295750 }, { "epoch": 1.1433254472638432, "grad_norm": 0.10877574980258942, "learning_rate": 0.002, "loss": 2.3492, "step": 295760 }, { "epoch": 1.1433641044672265, "grad_norm": 0.11442890018224716, "learning_rate": 0.002, "loss": 2.3542, "step": 295770 }, { "epoch": 1.1434027616706097, "grad_norm": 0.08420255035161972, "learning_rate": 0.002, "loss": 2.3484, "step": 295780 }, { "epoch": 1.143441418873993, "grad_norm": 0.11311393231153488, "learning_rate": 0.002, "loss": 2.3337, "step": 295790 }, { "epoch": 1.1434800760773762, "grad_norm": 0.09536857157945633, "learning_rate": 0.002, "loss": 2.3423, "step": 295800 }, { "epoch": 1.1435187332807595, "grad_norm": 0.1102205440402031, "learning_rate": 0.002, "loss": 2.3426, "step": 295810 }, { "epoch": 1.1435573904841427, "grad_norm": 0.10846175998449326, "learning_rate": 0.002, "loss": 2.3339, "step": 295820 }, { "epoch": 1.143596047687526, "grad_norm": 0.12481531500816345, "learning_rate": 0.002, "loss": 2.342, "step": 295830 }, { "epoch": 1.1436347048909095, "grad_norm": 0.10244545340538025, "learning_rate": 0.002, "loss": 2.3363, "step": 295840 }, { "epoch": 1.1436733620942927, "grad_norm": 0.09871228784322739, "learning_rate": 0.002, "loss": 2.3365, "step": 295850 }, { "epoch": 1.143712019297676, "grad_norm": 0.0947028174996376, "learning_rate": 0.002, "loss": 2.3341, "step": 295860 }, { "epoch": 1.1437506765010592, "grad_norm": 0.10122165083885193, "learning_rate": 0.002, "loss": 2.3289, "step": 295870 }, { "epoch": 1.1437893337044425, "grad_norm": 0.11432994157075882, "learning_rate": 0.002, "loss": 2.3492, "step": 295880 }, { "epoch": 1.1438279909078257, "grad_norm": 0.10737597197294235, "learning_rate": 0.002, "loss": 2.3526, "step": 295890 }, { "epoch": 1.143866648111209, "grad_norm": 0.18943317234516144, "learning_rate": 0.002, "loss": 2.3324, "step": 295900 }, { "epoch": 1.1439053053145922, "grad_norm": 0.09859231859445572, "learning_rate": 0.002, "loss": 2.3369, "step": 295910 }, { "epoch": 1.1439439625179757, "grad_norm": 0.09982761740684509, "learning_rate": 0.002, "loss": 2.324, "step": 295920 }, { "epoch": 1.143982619721359, "grad_norm": 0.11415493488311768, "learning_rate": 0.002, "loss": 2.3332, "step": 295930 }, { "epoch": 1.1440212769247422, "grad_norm": 0.10270734131336212, "learning_rate": 0.002, "loss": 2.3221, "step": 295940 }, { "epoch": 1.1440599341281255, "grad_norm": 0.10434996336698532, "learning_rate": 0.002, "loss": 2.3374, "step": 295950 }, { "epoch": 1.1440985913315087, "grad_norm": 0.09721355885267258, "learning_rate": 0.002, "loss": 2.3402, "step": 295960 }, { "epoch": 1.144137248534892, "grad_norm": 0.11060193181037903, "learning_rate": 0.002, "loss": 2.3192, "step": 295970 }, { "epoch": 1.1441759057382752, "grad_norm": 0.10203656554222107, "learning_rate": 0.002, "loss": 2.3508, "step": 295980 }, { "epoch": 1.1442145629416585, "grad_norm": 0.10176026076078415, "learning_rate": 0.002, "loss": 2.3359, "step": 295990 }, { "epoch": 1.1442532201450417, "grad_norm": 0.09184641391038895, "learning_rate": 0.002, "loss": 2.336, "step": 296000 }, { "epoch": 1.1442918773484252, "grad_norm": 0.10682286322116852, "learning_rate": 0.002, "loss": 2.3477, "step": 296010 }, { "epoch": 1.1443305345518084, "grad_norm": 0.10524826496839523, "learning_rate": 0.002, "loss": 2.3338, "step": 296020 }, { "epoch": 1.1443691917551917, "grad_norm": 0.08950001001358032, "learning_rate": 0.002, "loss": 2.331, "step": 296030 }, { "epoch": 1.144407848958575, "grad_norm": 0.14437347650527954, "learning_rate": 0.002, "loss": 2.3357, "step": 296040 }, { "epoch": 1.1444465061619582, "grad_norm": 0.09879963099956512, "learning_rate": 0.002, "loss": 2.3449, "step": 296050 }, { "epoch": 1.1444851633653415, "grad_norm": 0.10273423790931702, "learning_rate": 0.002, "loss": 2.34, "step": 296060 }, { "epoch": 1.1445238205687247, "grad_norm": 0.09035829454660416, "learning_rate": 0.002, "loss": 2.3288, "step": 296070 }, { "epoch": 1.1445624777721082, "grad_norm": 0.1273248940706253, "learning_rate": 0.002, "loss": 2.3336, "step": 296080 }, { "epoch": 1.1446011349754914, "grad_norm": 0.10491590201854706, "learning_rate": 0.002, "loss": 2.3348, "step": 296090 }, { "epoch": 1.1446397921788747, "grad_norm": 0.13738693296909332, "learning_rate": 0.002, "loss": 2.3289, "step": 296100 }, { "epoch": 1.144678449382258, "grad_norm": 0.5196298360824585, "learning_rate": 0.002, "loss": 2.3561, "step": 296110 }, { "epoch": 1.1447171065856412, "grad_norm": 0.12203863263130188, "learning_rate": 0.002, "loss": 2.3549, "step": 296120 }, { "epoch": 1.1447557637890244, "grad_norm": 0.10520727187395096, "learning_rate": 0.002, "loss": 2.3415, "step": 296130 }, { "epoch": 1.1447944209924077, "grad_norm": 0.12339538335800171, "learning_rate": 0.002, "loss": 2.3522, "step": 296140 }, { "epoch": 1.144833078195791, "grad_norm": 0.10765001177787781, "learning_rate": 0.002, "loss": 2.3248, "step": 296150 }, { "epoch": 1.1448717353991742, "grad_norm": 0.11031709611415863, "learning_rate": 0.002, "loss": 2.3372, "step": 296160 }, { "epoch": 1.1449103926025574, "grad_norm": 0.09553369134664536, "learning_rate": 0.002, "loss": 2.3323, "step": 296170 }, { "epoch": 1.144949049805941, "grad_norm": 0.11168837547302246, "learning_rate": 0.002, "loss": 2.3575, "step": 296180 }, { "epoch": 1.1449877070093242, "grad_norm": 0.11190593242645264, "learning_rate": 0.002, "loss": 2.3305, "step": 296190 }, { "epoch": 1.1450263642127074, "grad_norm": 0.09106079488992691, "learning_rate": 0.002, "loss": 2.3363, "step": 296200 }, { "epoch": 1.1450650214160907, "grad_norm": 0.10923727601766586, "learning_rate": 0.002, "loss": 2.3209, "step": 296210 }, { "epoch": 1.145103678619474, "grad_norm": 0.10502249002456665, "learning_rate": 0.002, "loss": 2.3321, "step": 296220 }, { "epoch": 1.1451423358228572, "grad_norm": 0.109004445374012, "learning_rate": 0.002, "loss": 2.3418, "step": 296230 }, { "epoch": 1.1451809930262404, "grad_norm": 0.10266054421663284, "learning_rate": 0.002, "loss": 2.3393, "step": 296240 }, { "epoch": 1.145219650229624, "grad_norm": 0.1098453477025032, "learning_rate": 0.002, "loss": 2.3343, "step": 296250 }, { "epoch": 1.1452583074330072, "grad_norm": 0.0950651541352272, "learning_rate": 0.002, "loss": 2.344, "step": 296260 }, { "epoch": 1.1452969646363904, "grad_norm": 0.0950298085808754, "learning_rate": 0.002, "loss": 2.3234, "step": 296270 }, { "epoch": 1.1453356218397737, "grad_norm": 0.109294094145298, "learning_rate": 0.002, "loss": 2.3244, "step": 296280 }, { "epoch": 1.145374279043157, "grad_norm": 0.13532811403274536, "learning_rate": 0.002, "loss": 2.3347, "step": 296290 }, { "epoch": 1.1454129362465402, "grad_norm": 0.10342597961425781, "learning_rate": 0.002, "loss": 2.3438, "step": 296300 }, { "epoch": 1.1454515934499234, "grad_norm": 0.11257849633693695, "learning_rate": 0.002, "loss": 2.3359, "step": 296310 }, { "epoch": 1.1454902506533067, "grad_norm": 0.09743151813745499, "learning_rate": 0.002, "loss": 2.3257, "step": 296320 }, { "epoch": 1.14552890785669, "grad_norm": 0.10734330117702484, "learning_rate": 0.002, "loss": 2.3225, "step": 296330 }, { "epoch": 1.1455675650600732, "grad_norm": 0.10549934953451157, "learning_rate": 0.002, "loss": 2.3276, "step": 296340 }, { "epoch": 1.1456062222634567, "grad_norm": 0.10295829176902771, "learning_rate": 0.002, "loss": 2.3294, "step": 296350 }, { "epoch": 1.14564487946684, "grad_norm": 0.10301025211811066, "learning_rate": 0.002, "loss": 2.3337, "step": 296360 }, { "epoch": 1.1456835366702232, "grad_norm": 0.12947851419448853, "learning_rate": 0.002, "loss": 2.3139, "step": 296370 }, { "epoch": 1.1457221938736064, "grad_norm": 0.10246923565864563, "learning_rate": 0.002, "loss": 2.3345, "step": 296380 }, { "epoch": 1.1457608510769897, "grad_norm": 0.10931044816970825, "learning_rate": 0.002, "loss": 2.3267, "step": 296390 }, { "epoch": 1.145799508280373, "grad_norm": 0.11043696850538254, "learning_rate": 0.002, "loss": 2.3363, "step": 296400 }, { "epoch": 1.1458381654837562, "grad_norm": 0.10995186120271683, "learning_rate": 0.002, "loss": 2.3342, "step": 296410 }, { "epoch": 1.1458768226871396, "grad_norm": 0.11364120244979858, "learning_rate": 0.002, "loss": 2.3251, "step": 296420 }, { "epoch": 1.145915479890523, "grad_norm": 0.1192992627620697, "learning_rate": 0.002, "loss": 2.3331, "step": 296430 }, { "epoch": 1.1459541370939061, "grad_norm": 0.09699320048093796, "learning_rate": 0.002, "loss": 2.3192, "step": 296440 }, { "epoch": 1.1459927942972894, "grad_norm": 0.1066770851612091, "learning_rate": 0.002, "loss": 2.3293, "step": 296450 }, { "epoch": 1.1460314515006726, "grad_norm": 0.09629204869270325, "learning_rate": 0.002, "loss": 2.3441, "step": 296460 }, { "epoch": 1.146070108704056, "grad_norm": 0.10704781115055084, "learning_rate": 0.002, "loss": 2.3425, "step": 296470 }, { "epoch": 1.1461087659074392, "grad_norm": 0.10781607776880264, "learning_rate": 0.002, "loss": 2.33, "step": 296480 }, { "epoch": 1.1461474231108224, "grad_norm": 0.11625618487596512, "learning_rate": 0.002, "loss": 2.3294, "step": 296490 }, { "epoch": 1.1461860803142057, "grad_norm": 0.10001617670059204, "learning_rate": 0.002, "loss": 2.3474, "step": 296500 }, { "epoch": 1.146224737517589, "grad_norm": 0.1142706573009491, "learning_rate": 0.002, "loss": 2.354, "step": 296510 }, { "epoch": 1.1462633947209724, "grad_norm": 0.11631233245134354, "learning_rate": 0.002, "loss": 2.3308, "step": 296520 }, { "epoch": 1.1463020519243556, "grad_norm": 0.0933648869395256, "learning_rate": 0.002, "loss": 2.3308, "step": 296530 }, { "epoch": 1.1463407091277389, "grad_norm": 0.10287400335073471, "learning_rate": 0.002, "loss": 2.3263, "step": 296540 }, { "epoch": 1.1463793663311221, "grad_norm": 0.09764724969863892, "learning_rate": 0.002, "loss": 2.3305, "step": 296550 }, { "epoch": 1.1464180235345054, "grad_norm": 0.0917297750711441, "learning_rate": 0.002, "loss": 2.3427, "step": 296560 }, { "epoch": 1.1464566807378886, "grad_norm": 0.11046093702316284, "learning_rate": 0.002, "loss": 2.3299, "step": 296570 }, { "epoch": 1.146495337941272, "grad_norm": 0.1363193839788437, "learning_rate": 0.002, "loss": 2.3198, "step": 296580 }, { "epoch": 1.1465339951446554, "grad_norm": 0.09778366982936859, "learning_rate": 0.002, "loss": 2.3144, "step": 296590 }, { "epoch": 1.1465726523480386, "grad_norm": 0.11382003128528595, "learning_rate": 0.002, "loss": 2.3355, "step": 296600 }, { "epoch": 1.1466113095514219, "grad_norm": 0.11143088340759277, "learning_rate": 0.002, "loss": 2.3378, "step": 296610 }, { "epoch": 1.1466499667548051, "grad_norm": 0.10715234279632568, "learning_rate": 0.002, "loss": 2.3368, "step": 296620 }, { "epoch": 1.1466886239581884, "grad_norm": 0.19777058064937592, "learning_rate": 0.002, "loss": 2.3494, "step": 296630 }, { "epoch": 1.1467272811615716, "grad_norm": 0.10907673835754395, "learning_rate": 0.002, "loss": 2.3258, "step": 296640 }, { "epoch": 1.1467659383649549, "grad_norm": 0.11077481508255005, "learning_rate": 0.002, "loss": 2.3068, "step": 296650 }, { "epoch": 1.1468045955683381, "grad_norm": 0.09810581058263779, "learning_rate": 0.002, "loss": 2.3512, "step": 296660 }, { "epoch": 1.1468432527717214, "grad_norm": 0.09280452132225037, "learning_rate": 0.002, "loss": 2.3368, "step": 296670 }, { "epoch": 1.1468819099751046, "grad_norm": 0.18051892518997192, "learning_rate": 0.002, "loss": 2.3309, "step": 296680 }, { "epoch": 1.1469205671784881, "grad_norm": 0.10337542742490768, "learning_rate": 0.002, "loss": 2.3206, "step": 296690 }, { "epoch": 1.1469592243818714, "grad_norm": 0.11727637052536011, "learning_rate": 0.002, "loss": 2.3453, "step": 296700 }, { "epoch": 1.1469978815852546, "grad_norm": 0.10399829596281052, "learning_rate": 0.002, "loss": 2.341, "step": 296710 }, { "epoch": 1.1470365387886379, "grad_norm": 0.10587452352046967, "learning_rate": 0.002, "loss": 2.3445, "step": 296720 }, { "epoch": 1.1470751959920211, "grad_norm": 0.10825172066688538, "learning_rate": 0.002, "loss": 2.3386, "step": 296730 }, { "epoch": 1.1471138531954044, "grad_norm": 0.10754440724849701, "learning_rate": 0.002, "loss": 2.3532, "step": 296740 }, { "epoch": 1.1471525103987876, "grad_norm": 0.10634081065654755, "learning_rate": 0.002, "loss": 2.333, "step": 296750 }, { "epoch": 1.147191167602171, "grad_norm": 0.13813266158103943, "learning_rate": 0.002, "loss": 2.3244, "step": 296760 }, { "epoch": 1.1472298248055544, "grad_norm": 0.14105969667434692, "learning_rate": 0.002, "loss": 2.3535, "step": 296770 }, { "epoch": 1.1472684820089376, "grad_norm": 0.1194007471203804, "learning_rate": 0.002, "loss": 2.3269, "step": 296780 }, { "epoch": 1.1473071392123209, "grad_norm": 0.09992986172437668, "learning_rate": 0.002, "loss": 2.3527, "step": 296790 }, { "epoch": 1.1473457964157041, "grad_norm": 0.09494622051715851, "learning_rate": 0.002, "loss": 2.339, "step": 296800 }, { "epoch": 1.1473844536190874, "grad_norm": 0.13352660834789276, "learning_rate": 0.002, "loss": 2.3353, "step": 296810 }, { "epoch": 1.1474231108224706, "grad_norm": 0.10252249985933304, "learning_rate": 0.002, "loss": 2.345, "step": 296820 }, { "epoch": 1.1474617680258539, "grad_norm": 0.08959279209375381, "learning_rate": 0.002, "loss": 2.333, "step": 296830 }, { "epoch": 1.1475004252292371, "grad_norm": 0.1278964728116989, "learning_rate": 0.002, "loss": 2.334, "step": 296840 }, { "epoch": 1.1475390824326204, "grad_norm": 0.11255158483982086, "learning_rate": 0.002, "loss": 2.3521, "step": 296850 }, { "epoch": 1.1475777396360038, "grad_norm": 0.09615004062652588, "learning_rate": 0.002, "loss": 2.334, "step": 296860 }, { "epoch": 1.147616396839387, "grad_norm": 0.12653405964374542, "learning_rate": 0.002, "loss": 2.3276, "step": 296870 }, { "epoch": 1.1476550540427704, "grad_norm": 0.096727654337883, "learning_rate": 0.002, "loss": 2.3394, "step": 296880 }, { "epoch": 1.1476937112461536, "grad_norm": 0.11681623011827469, "learning_rate": 0.002, "loss": 2.3339, "step": 296890 }, { "epoch": 1.1477323684495369, "grad_norm": 0.09364824742078781, "learning_rate": 0.002, "loss": 2.3261, "step": 296900 }, { "epoch": 1.14777102565292, "grad_norm": 0.10341514647006989, "learning_rate": 0.002, "loss": 2.3266, "step": 296910 }, { "epoch": 1.1478096828563034, "grad_norm": 0.09559976309537888, "learning_rate": 0.002, "loss": 2.3386, "step": 296920 }, { "epoch": 1.1478483400596868, "grad_norm": 0.1283910572528839, "learning_rate": 0.002, "loss": 2.3442, "step": 296930 }, { "epoch": 1.14788699726307, "grad_norm": 0.1130613386631012, "learning_rate": 0.002, "loss": 2.3427, "step": 296940 }, { "epoch": 1.1479256544664533, "grad_norm": 0.1011149063706398, "learning_rate": 0.002, "loss": 2.3496, "step": 296950 }, { "epoch": 1.1479643116698366, "grad_norm": 0.14449918270111084, "learning_rate": 0.002, "loss": 2.3423, "step": 296960 }, { "epoch": 1.1480029688732198, "grad_norm": 0.11203070729970932, "learning_rate": 0.002, "loss": 2.3266, "step": 296970 }, { "epoch": 1.148041626076603, "grad_norm": 0.11074665188789368, "learning_rate": 0.002, "loss": 2.3345, "step": 296980 }, { "epoch": 1.1480802832799863, "grad_norm": 0.10194501280784607, "learning_rate": 0.002, "loss": 2.3264, "step": 296990 }, { "epoch": 1.1481189404833696, "grad_norm": 0.10869333148002625, "learning_rate": 0.002, "loss": 2.3182, "step": 297000 }, { "epoch": 1.1481575976867529, "grad_norm": 0.1241835206747055, "learning_rate": 0.002, "loss": 2.3212, "step": 297010 }, { "epoch": 1.148196254890136, "grad_norm": 0.10600640624761581, "learning_rate": 0.002, "loss": 2.3302, "step": 297020 }, { "epoch": 1.1482349120935196, "grad_norm": 0.18213452398777008, "learning_rate": 0.002, "loss": 2.3311, "step": 297030 }, { "epoch": 1.1482735692969028, "grad_norm": 0.11283764988183975, "learning_rate": 0.002, "loss": 2.3521, "step": 297040 }, { "epoch": 1.148312226500286, "grad_norm": 0.10974473506212234, "learning_rate": 0.002, "loss": 2.3205, "step": 297050 }, { "epoch": 1.1483508837036693, "grad_norm": 0.09493612498044968, "learning_rate": 0.002, "loss": 2.3347, "step": 297060 }, { "epoch": 1.1483895409070526, "grad_norm": 0.09714702516794205, "learning_rate": 0.002, "loss": 2.3436, "step": 297070 }, { "epoch": 1.1484281981104358, "grad_norm": 0.1268359273672104, "learning_rate": 0.002, "loss": 2.3368, "step": 297080 }, { "epoch": 1.148466855313819, "grad_norm": 0.10395190864801407, "learning_rate": 0.002, "loss": 2.333, "step": 297090 }, { "epoch": 1.1485055125172026, "grad_norm": 0.13091175258159637, "learning_rate": 0.002, "loss": 2.3317, "step": 297100 }, { "epoch": 1.1485441697205858, "grad_norm": 0.09874138981103897, "learning_rate": 0.002, "loss": 2.3376, "step": 297110 }, { "epoch": 1.148582826923969, "grad_norm": 0.10048501193523407, "learning_rate": 0.002, "loss": 2.3297, "step": 297120 }, { "epoch": 1.1486214841273523, "grad_norm": 0.11106102168560028, "learning_rate": 0.002, "loss": 2.348, "step": 297130 }, { "epoch": 1.1486601413307356, "grad_norm": 0.10966502875089645, "learning_rate": 0.002, "loss": 2.3477, "step": 297140 }, { "epoch": 1.1486987985341188, "grad_norm": 0.09882976114749908, "learning_rate": 0.002, "loss": 2.3248, "step": 297150 }, { "epoch": 1.148737455737502, "grad_norm": 0.1049468070268631, "learning_rate": 0.002, "loss": 2.3186, "step": 297160 }, { "epoch": 1.1487761129408853, "grad_norm": 0.10627221316099167, "learning_rate": 0.002, "loss": 2.327, "step": 297170 }, { "epoch": 1.1488147701442686, "grad_norm": 0.2593117952346802, "learning_rate": 0.002, "loss": 2.3278, "step": 297180 }, { "epoch": 1.148853427347652, "grad_norm": 0.1032712534070015, "learning_rate": 0.002, "loss": 2.3515, "step": 297190 }, { "epoch": 1.1488920845510353, "grad_norm": 0.09483695030212402, "learning_rate": 0.002, "loss": 2.3323, "step": 297200 }, { "epoch": 1.1489307417544186, "grad_norm": 0.10538081079721451, "learning_rate": 0.002, "loss": 2.3432, "step": 297210 }, { "epoch": 1.1489693989578018, "grad_norm": 0.10002459585666656, "learning_rate": 0.002, "loss": 2.3332, "step": 297220 }, { "epoch": 1.149008056161185, "grad_norm": 0.11455190181732178, "learning_rate": 0.002, "loss": 2.3328, "step": 297230 }, { "epoch": 1.1490467133645683, "grad_norm": 0.10687296092510223, "learning_rate": 0.002, "loss": 2.3394, "step": 297240 }, { "epoch": 1.1490853705679516, "grad_norm": 0.1209367960691452, "learning_rate": 0.002, "loss": 2.334, "step": 297250 }, { "epoch": 1.1491240277713348, "grad_norm": 0.11175933480262756, "learning_rate": 0.002, "loss": 2.3358, "step": 297260 }, { "epoch": 1.1491626849747183, "grad_norm": 0.09743501991033554, "learning_rate": 0.002, "loss": 2.34, "step": 297270 }, { "epoch": 1.1492013421781015, "grad_norm": 0.11677234619855881, "learning_rate": 0.002, "loss": 2.3431, "step": 297280 }, { "epoch": 1.1492399993814848, "grad_norm": 0.10145784914493561, "learning_rate": 0.002, "loss": 2.3427, "step": 297290 }, { "epoch": 1.149278656584868, "grad_norm": 0.0993756502866745, "learning_rate": 0.002, "loss": 2.3437, "step": 297300 }, { "epoch": 1.1493173137882513, "grad_norm": 0.11287908256053925, "learning_rate": 0.002, "loss": 2.3272, "step": 297310 }, { "epoch": 1.1493559709916346, "grad_norm": 0.10820694267749786, "learning_rate": 0.002, "loss": 2.3242, "step": 297320 }, { "epoch": 1.1493946281950178, "grad_norm": 0.12097244709730148, "learning_rate": 0.002, "loss": 2.3472, "step": 297330 }, { "epoch": 1.149433285398401, "grad_norm": 0.10064926743507385, "learning_rate": 0.002, "loss": 2.3312, "step": 297340 }, { "epoch": 1.1494719426017843, "grad_norm": 0.10099725425243378, "learning_rate": 0.002, "loss": 2.333, "step": 297350 }, { "epoch": 1.1495105998051678, "grad_norm": 0.11119300872087479, "learning_rate": 0.002, "loss": 2.3349, "step": 297360 }, { "epoch": 1.149549257008551, "grad_norm": 0.10817861557006836, "learning_rate": 0.002, "loss": 2.3278, "step": 297370 }, { "epoch": 1.1495879142119343, "grad_norm": 0.16150224208831787, "learning_rate": 0.002, "loss": 2.3455, "step": 297380 }, { "epoch": 1.1496265714153175, "grad_norm": 0.09874442219734192, "learning_rate": 0.002, "loss": 2.3366, "step": 297390 }, { "epoch": 1.1496652286187008, "grad_norm": 0.1252291351556778, "learning_rate": 0.002, "loss": 2.3298, "step": 297400 }, { "epoch": 1.149703885822084, "grad_norm": 0.10039383918046951, "learning_rate": 0.002, "loss": 2.3361, "step": 297410 }, { "epoch": 1.1497425430254673, "grad_norm": 0.09669938683509827, "learning_rate": 0.002, "loss": 2.3409, "step": 297420 }, { "epoch": 1.1497812002288506, "grad_norm": 0.1003023162484169, "learning_rate": 0.002, "loss": 2.3225, "step": 297430 }, { "epoch": 1.149819857432234, "grad_norm": 0.11819092184305191, "learning_rate": 0.002, "loss": 2.3155, "step": 297440 }, { "epoch": 1.1498585146356173, "grad_norm": 0.11495328694581985, "learning_rate": 0.002, "loss": 2.3325, "step": 297450 }, { "epoch": 1.1498971718390005, "grad_norm": 0.10414993762969971, "learning_rate": 0.002, "loss": 2.3362, "step": 297460 }, { "epoch": 1.1499358290423838, "grad_norm": 0.10090905427932739, "learning_rate": 0.002, "loss": 2.3424, "step": 297470 }, { "epoch": 1.149974486245767, "grad_norm": 0.11985575407743454, "learning_rate": 0.002, "loss": 2.3368, "step": 297480 }, { "epoch": 1.1500131434491503, "grad_norm": 0.09595164656639099, "learning_rate": 0.002, "loss": 2.3333, "step": 297490 }, { "epoch": 1.1500518006525335, "grad_norm": 0.13302060961723328, "learning_rate": 0.002, "loss": 2.3487, "step": 297500 }, { "epoch": 1.1500904578559168, "grad_norm": 0.10358621180057526, "learning_rate": 0.002, "loss": 2.3377, "step": 297510 }, { "epoch": 1.1501291150593, "grad_norm": 0.11713553965091705, "learning_rate": 0.002, "loss": 2.3123, "step": 297520 }, { "epoch": 1.1501677722626835, "grad_norm": 0.09859627485275269, "learning_rate": 0.002, "loss": 2.3403, "step": 297530 }, { "epoch": 1.1502064294660668, "grad_norm": 0.10029926151037216, "learning_rate": 0.002, "loss": 2.3298, "step": 297540 }, { "epoch": 1.15024508666945, "grad_norm": 0.10118751972913742, "learning_rate": 0.002, "loss": 2.3364, "step": 297550 }, { "epoch": 1.1502837438728333, "grad_norm": 0.11143433302640915, "learning_rate": 0.002, "loss": 2.3381, "step": 297560 }, { "epoch": 1.1503224010762165, "grad_norm": 0.08883854746818542, "learning_rate": 0.002, "loss": 2.3368, "step": 297570 }, { "epoch": 1.1503610582795998, "grad_norm": 0.12167437374591827, "learning_rate": 0.002, "loss": 2.3335, "step": 297580 }, { "epoch": 1.150399715482983, "grad_norm": 0.10091345012187958, "learning_rate": 0.002, "loss": 2.3317, "step": 297590 }, { "epoch": 1.1504383726863663, "grad_norm": 0.09581989049911499, "learning_rate": 0.002, "loss": 2.3373, "step": 297600 }, { "epoch": 1.1504770298897498, "grad_norm": 0.10923956334590912, "learning_rate": 0.002, "loss": 2.3419, "step": 297610 }, { "epoch": 1.150515687093133, "grad_norm": 0.11006530374288559, "learning_rate": 0.002, "loss": 2.3207, "step": 297620 }, { "epoch": 1.1505543442965163, "grad_norm": 0.1279909759759903, "learning_rate": 0.002, "loss": 2.3441, "step": 297630 }, { "epoch": 1.1505930014998995, "grad_norm": 0.09534849226474762, "learning_rate": 0.002, "loss": 2.3149, "step": 297640 }, { "epoch": 1.1506316587032828, "grad_norm": 0.10234690457582474, "learning_rate": 0.002, "loss": 2.3304, "step": 297650 }, { "epoch": 1.150670315906666, "grad_norm": 0.10225453972816467, "learning_rate": 0.002, "loss": 2.3265, "step": 297660 }, { "epoch": 1.1507089731100493, "grad_norm": 0.10582304745912552, "learning_rate": 0.002, "loss": 2.3396, "step": 297670 }, { "epoch": 1.1507476303134325, "grad_norm": 0.10547558963298798, "learning_rate": 0.002, "loss": 2.328, "step": 297680 }, { "epoch": 1.1507862875168158, "grad_norm": 0.10145711153745651, "learning_rate": 0.002, "loss": 2.3331, "step": 297690 }, { "epoch": 1.1508249447201992, "grad_norm": 0.1168600469827652, "learning_rate": 0.002, "loss": 2.3322, "step": 297700 }, { "epoch": 1.1508636019235825, "grad_norm": 0.1104208454489708, "learning_rate": 0.002, "loss": 2.3216, "step": 297710 }, { "epoch": 1.1509022591269658, "grad_norm": 0.10342969745397568, "learning_rate": 0.002, "loss": 2.3315, "step": 297720 }, { "epoch": 1.150940916330349, "grad_norm": 0.09394558519124985, "learning_rate": 0.002, "loss": 2.336, "step": 297730 }, { "epoch": 1.1509795735337323, "grad_norm": 0.10640209168195724, "learning_rate": 0.002, "loss": 2.3403, "step": 297740 }, { "epoch": 1.1510182307371155, "grad_norm": 0.15803790092468262, "learning_rate": 0.002, "loss": 2.3413, "step": 297750 }, { "epoch": 1.1510568879404988, "grad_norm": 0.10676860064268112, "learning_rate": 0.002, "loss": 2.3235, "step": 297760 }, { "epoch": 1.151095545143882, "grad_norm": 0.09660085290670395, "learning_rate": 0.002, "loss": 2.3365, "step": 297770 }, { "epoch": 1.1511342023472655, "grad_norm": 0.10708586126565933, "learning_rate": 0.002, "loss": 2.3431, "step": 297780 }, { "epoch": 1.1511728595506487, "grad_norm": 0.10311704128980637, "learning_rate": 0.002, "loss": 2.3314, "step": 297790 }, { "epoch": 1.151211516754032, "grad_norm": 0.11200468242168427, "learning_rate": 0.002, "loss": 2.3193, "step": 297800 }, { "epoch": 1.1512501739574152, "grad_norm": 0.11802764981985092, "learning_rate": 0.002, "loss": 2.3439, "step": 297810 }, { "epoch": 1.1512888311607985, "grad_norm": 0.09990455955266953, "learning_rate": 0.002, "loss": 2.3365, "step": 297820 }, { "epoch": 1.1513274883641818, "grad_norm": 0.13548719882965088, "learning_rate": 0.002, "loss": 2.3347, "step": 297830 }, { "epoch": 1.151366145567565, "grad_norm": 0.13101348280906677, "learning_rate": 0.002, "loss": 2.3485, "step": 297840 }, { "epoch": 1.1514048027709483, "grad_norm": 0.11312423646450043, "learning_rate": 0.002, "loss": 2.3348, "step": 297850 }, { "epoch": 1.1514434599743315, "grad_norm": 0.09926366806030273, "learning_rate": 0.002, "loss": 2.3333, "step": 297860 }, { "epoch": 1.151482117177715, "grad_norm": 0.09945975244045258, "learning_rate": 0.002, "loss": 2.345, "step": 297870 }, { "epoch": 1.1515207743810982, "grad_norm": 0.10929170995950699, "learning_rate": 0.002, "loss": 2.3451, "step": 297880 }, { "epoch": 1.1515594315844815, "grad_norm": 0.12738560140132904, "learning_rate": 0.002, "loss": 2.3297, "step": 297890 }, { "epoch": 1.1515980887878647, "grad_norm": 0.10594571381807327, "learning_rate": 0.002, "loss": 2.3261, "step": 297900 }, { "epoch": 1.151636745991248, "grad_norm": 0.0898451879620552, "learning_rate": 0.002, "loss": 2.3271, "step": 297910 }, { "epoch": 1.1516754031946312, "grad_norm": 0.10429723560810089, "learning_rate": 0.002, "loss": 2.3455, "step": 297920 }, { "epoch": 1.1517140603980145, "grad_norm": 0.0913781002163887, "learning_rate": 0.002, "loss": 2.3294, "step": 297930 }, { "epoch": 1.151752717601398, "grad_norm": 0.10696902871131897, "learning_rate": 0.002, "loss": 2.3456, "step": 297940 }, { "epoch": 1.1517913748047812, "grad_norm": 0.1067197248339653, "learning_rate": 0.002, "loss": 2.3446, "step": 297950 }, { "epoch": 1.1518300320081645, "grad_norm": 0.11086300760507584, "learning_rate": 0.002, "loss": 2.334, "step": 297960 }, { "epoch": 1.1518686892115477, "grad_norm": 0.10444667935371399, "learning_rate": 0.002, "loss": 2.3342, "step": 297970 }, { "epoch": 1.151907346414931, "grad_norm": 0.12773331999778748, "learning_rate": 0.002, "loss": 2.3491, "step": 297980 }, { "epoch": 1.1519460036183142, "grad_norm": 0.11314024776220322, "learning_rate": 0.002, "loss": 2.3293, "step": 297990 }, { "epoch": 1.1519846608216975, "grad_norm": 0.09332095086574554, "learning_rate": 0.002, "loss": 2.3322, "step": 298000 }, { "epoch": 1.1520233180250807, "grad_norm": 0.12006017565727234, "learning_rate": 0.002, "loss": 2.3352, "step": 298010 }, { "epoch": 1.152061975228464, "grad_norm": 0.09459801763296127, "learning_rate": 0.002, "loss": 2.314, "step": 298020 }, { "epoch": 1.1521006324318472, "grad_norm": 0.1063527911901474, "learning_rate": 0.002, "loss": 2.34, "step": 298030 }, { "epoch": 1.1521392896352307, "grad_norm": 0.11665216088294983, "learning_rate": 0.002, "loss": 2.3456, "step": 298040 }, { "epoch": 1.152177946838614, "grad_norm": 0.11165454238653183, "learning_rate": 0.002, "loss": 2.3488, "step": 298050 }, { "epoch": 1.1522166040419972, "grad_norm": 0.11676241457462311, "learning_rate": 0.002, "loss": 2.3423, "step": 298060 }, { "epoch": 1.1522552612453805, "grad_norm": 0.1029791459441185, "learning_rate": 0.002, "loss": 2.3464, "step": 298070 }, { "epoch": 1.1522939184487637, "grad_norm": 0.10949081182479858, "learning_rate": 0.002, "loss": 2.3212, "step": 298080 }, { "epoch": 1.152332575652147, "grad_norm": 0.12753087282180786, "learning_rate": 0.002, "loss": 2.3298, "step": 298090 }, { "epoch": 1.1523712328555302, "grad_norm": 0.11897064745426178, "learning_rate": 0.002, "loss": 2.3483, "step": 298100 }, { "epoch": 1.1524098900589137, "grad_norm": 0.10330282896757126, "learning_rate": 0.002, "loss": 2.3359, "step": 298110 }, { "epoch": 1.152448547262297, "grad_norm": 0.12037758529186249, "learning_rate": 0.002, "loss": 2.3205, "step": 298120 }, { "epoch": 1.1524872044656802, "grad_norm": 0.1015135869383812, "learning_rate": 0.002, "loss": 2.3444, "step": 298130 }, { "epoch": 1.1525258616690635, "grad_norm": 0.11100692301988602, "learning_rate": 0.002, "loss": 2.3492, "step": 298140 }, { "epoch": 1.1525645188724467, "grad_norm": 0.09652989357709885, "learning_rate": 0.002, "loss": 2.3286, "step": 298150 }, { "epoch": 1.15260317607583, "grad_norm": 0.11029095202684402, "learning_rate": 0.002, "loss": 2.3476, "step": 298160 }, { "epoch": 1.1526418332792132, "grad_norm": 0.09196136891841888, "learning_rate": 0.002, "loss": 2.3334, "step": 298170 }, { "epoch": 1.1526804904825965, "grad_norm": 0.1197347342967987, "learning_rate": 0.002, "loss": 2.3366, "step": 298180 }, { "epoch": 1.1527191476859797, "grad_norm": 0.11640334874391556, "learning_rate": 0.002, "loss": 2.3202, "step": 298190 }, { "epoch": 1.152757804889363, "grad_norm": 0.10275018960237503, "learning_rate": 0.002, "loss": 2.3316, "step": 298200 }, { "epoch": 1.1527964620927464, "grad_norm": 0.10140158981084824, "learning_rate": 0.002, "loss": 2.331, "step": 298210 }, { "epoch": 1.1528351192961297, "grad_norm": 0.11424034833908081, "learning_rate": 0.002, "loss": 2.329, "step": 298220 }, { "epoch": 1.152873776499513, "grad_norm": 0.11030445247888565, "learning_rate": 0.002, "loss": 2.3237, "step": 298230 }, { "epoch": 1.1529124337028962, "grad_norm": 0.10368213057518005, "learning_rate": 0.002, "loss": 2.3317, "step": 298240 }, { "epoch": 1.1529510909062795, "grad_norm": 0.10507994890213013, "learning_rate": 0.002, "loss": 2.3278, "step": 298250 }, { "epoch": 1.1529897481096627, "grad_norm": 0.11051535606384277, "learning_rate": 0.002, "loss": 2.3367, "step": 298260 }, { "epoch": 1.153028405313046, "grad_norm": 0.11708662658929825, "learning_rate": 0.002, "loss": 2.3365, "step": 298270 }, { "epoch": 1.1530670625164294, "grad_norm": 0.10672136396169662, "learning_rate": 0.002, "loss": 2.3339, "step": 298280 }, { "epoch": 1.1531057197198127, "grad_norm": 0.10426277667284012, "learning_rate": 0.002, "loss": 2.336, "step": 298290 }, { "epoch": 1.153144376923196, "grad_norm": 0.11060561239719391, "learning_rate": 0.002, "loss": 2.3383, "step": 298300 }, { "epoch": 1.1531830341265792, "grad_norm": 0.10655226558446884, "learning_rate": 0.002, "loss": 2.3378, "step": 298310 }, { "epoch": 1.1532216913299624, "grad_norm": 0.0946177989244461, "learning_rate": 0.002, "loss": 2.3235, "step": 298320 }, { "epoch": 1.1532603485333457, "grad_norm": 0.0947253406047821, "learning_rate": 0.002, "loss": 2.3286, "step": 298330 }, { "epoch": 1.153299005736729, "grad_norm": 0.09470956772565842, "learning_rate": 0.002, "loss": 2.3391, "step": 298340 }, { "epoch": 1.1533376629401122, "grad_norm": 0.09391769021749496, "learning_rate": 0.002, "loss": 2.3421, "step": 298350 }, { "epoch": 1.1533763201434954, "grad_norm": 0.10826486349105835, "learning_rate": 0.002, "loss": 2.3251, "step": 298360 }, { "epoch": 1.1534149773468787, "grad_norm": 0.102037712931633, "learning_rate": 0.002, "loss": 2.3293, "step": 298370 }, { "epoch": 1.1534536345502622, "grad_norm": 0.12063083797693253, "learning_rate": 0.002, "loss": 2.3377, "step": 298380 }, { "epoch": 1.1534922917536454, "grad_norm": 0.09736413508653641, "learning_rate": 0.002, "loss": 2.3583, "step": 298390 }, { "epoch": 1.1535309489570287, "grad_norm": 0.12331432849168777, "learning_rate": 0.002, "loss": 2.3391, "step": 298400 }, { "epoch": 1.153569606160412, "grad_norm": 0.0907859355211258, "learning_rate": 0.002, "loss": 2.346, "step": 298410 }, { "epoch": 1.1536082633637952, "grad_norm": 0.09962484240531921, "learning_rate": 0.002, "loss": 2.338, "step": 298420 }, { "epoch": 1.1536469205671784, "grad_norm": 0.10940424352884293, "learning_rate": 0.002, "loss": 2.3376, "step": 298430 }, { "epoch": 1.1536855777705617, "grad_norm": 0.10202945023775101, "learning_rate": 0.002, "loss": 2.3441, "step": 298440 }, { "epoch": 1.1537242349739452, "grad_norm": 0.10457737743854523, "learning_rate": 0.002, "loss": 2.3107, "step": 298450 }, { "epoch": 1.1537628921773284, "grad_norm": 0.1488359272480011, "learning_rate": 0.002, "loss": 2.3486, "step": 298460 }, { "epoch": 1.1538015493807117, "grad_norm": 0.08914506435394287, "learning_rate": 0.002, "loss": 2.3288, "step": 298470 }, { "epoch": 1.153840206584095, "grad_norm": 0.10192002356052399, "learning_rate": 0.002, "loss": 2.3266, "step": 298480 }, { "epoch": 1.1538788637874782, "grad_norm": 0.10051634907722473, "learning_rate": 0.002, "loss": 2.3457, "step": 298490 }, { "epoch": 1.1539175209908614, "grad_norm": 0.10865295678377151, "learning_rate": 0.002, "loss": 2.3279, "step": 298500 }, { "epoch": 1.1539561781942447, "grad_norm": 0.11688423156738281, "learning_rate": 0.002, "loss": 2.3448, "step": 298510 }, { "epoch": 1.153994835397628, "grad_norm": 0.09238366782665253, "learning_rate": 0.002, "loss": 2.3325, "step": 298520 }, { "epoch": 1.1540334926010112, "grad_norm": 0.12976820766925812, "learning_rate": 0.002, "loss": 2.318, "step": 298530 }, { "epoch": 1.1540721498043944, "grad_norm": 0.09833407402038574, "learning_rate": 0.002, "loss": 2.3459, "step": 298540 }, { "epoch": 1.154110807007778, "grad_norm": 0.1135307028889656, "learning_rate": 0.002, "loss": 2.3259, "step": 298550 }, { "epoch": 1.1541494642111612, "grad_norm": 0.10334569215774536, "learning_rate": 0.002, "loss": 2.3343, "step": 298560 }, { "epoch": 1.1541881214145444, "grad_norm": 0.10028822720050812, "learning_rate": 0.002, "loss": 2.3506, "step": 298570 }, { "epoch": 1.1542267786179277, "grad_norm": 0.12211079150438309, "learning_rate": 0.002, "loss": 2.3374, "step": 298580 }, { "epoch": 1.154265435821311, "grad_norm": 0.10825755447149277, "learning_rate": 0.002, "loss": 2.3417, "step": 298590 }, { "epoch": 1.1543040930246942, "grad_norm": 0.10587592422962189, "learning_rate": 0.002, "loss": 2.3428, "step": 298600 }, { "epoch": 1.1543427502280774, "grad_norm": 0.11863046139478683, "learning_rate": 0.002, "loss": 2.3391, "step": 298610 }, { "epoch": 1.154381407431461, "grad_norm": 0.10384626686573029, "learning_rate": 0.002, "loss": 2.3353, "step": 298620 }, { "epoch": 1.1544200646348441, "grad_norm": 0.10779169946908951, "learning_rate": 0.002, "loss": 2.345, "step": 298630 }, { "epoch": 1.1544587218382274, "grad_norm": 0.10602803528308868, "learning_rate": 0.002, "loss": 2.3304, "step": 298640 }, { "epoch": 1.1544973790416106, "grad_norm": 0.10449524223804474, "learning_rate": 0.002, "loss": 2.3393, "step": 298650 }, { "epoch": 1.154536036244994, "grad_norm": 0.0889180600643158, "learning_rate": 0.002, "loss": 2.3426, "step": 298660 }, { "epoch": 1.1545746934483772, "grad_norm": 0.10732623934745789, "learning_rate": 0.002, "loss": 2.343, "step": 298670 }, { "epoch": 1.1546133506517604, "grad_norm": 0.09704501181840897, "learning_rate": 0.002, "loss": 2.3336, "step": 298680 }, { "epoch": 1.1546520078551437, "grad_norm": 0.09987418353557587, "learning_rate": 0.002, "loss": 2.3283, "step": 298690 }, { "epoch": 1.154690665058527, "grad_norm": 0.12397871911525726, "learning_rate": 0.002, "loss": 2.3299, "step": 298700 }, { "epoch": 1.1547293222619102, "grad_norm": 0.10326369851827621, "learning_rate": 0.002, "loss": 2.3408, "step": 298710 }, { "epoch": 1.1547679794652936, "grad_norm": 0.10426446050405502, "learning_rate": 0.002, "loss": 2.3432, "step": 298720 }, { "epoch": 1.154806636668677, "grad_norm": 0.11490487307310104, "learning_rate": 0.002, "loss": 2.3402, "step": 298730 }, { "epoch": 1.1548452938720601, "grad_norm": 0.11625088006258011, "learning_rate": 0.002, "loss": 2.3315, "step": 298740 }, { "epoch": 1.1548839510754434, "grad_norm": 0.0878482460975647, "learning_rate": 0.002, "loss": 2.337, "step": 298750 }, { "epoch": 1.1549226082788266, "grad_norm": 0.1331155151128769, "learning_rate": 0.002, "loss": 2.3235, "step": 298760 }, { "epoch": 1.15496126548221, "grad_norm": 0.09513157606124878, "learning_rate": 0.002, "loss": 2.3238, "step": 298770 }, { "epoch": 1.1549999226855932, "grad_norm": 0.136835515499115, "learning_rate": 0.002, "loss": 2.3359, "step": 298780 }, { "epoch": 1.1550385798889766, "grad_norm": 0.09034319967031479, "learning_rate": 0.002, "loss": 2.3321, "step": 298790 }, { "epoch": 1.1550772370923599, "grad_norm": 0.10461827367544174, "learning_rate": 0.002, "loss": 2.3392, "step": 298800 }, { "epoch": 1.1551158942957431, "grad_norm": 0.10814294219017029, "learning_rate": 0.002, "loss": 2.3395, "step": 298810 }, { "epoch": 1.1551545514991264, "grad_norm": 0.10786023736000061, "learning_rate": 0.002, "loss": 2.3311, "step": 298820 }, { "epoch": 1.1551932087025096, "grad_norm": 0.1013743132352829, "learning_rate": 0.002, "loss": 2.3268, "step": 298830 }, { "epoch": 1.1552318659058929, "grad_norm": 0.11625484377145767, "learning_rate": 0.002, "loss": 2.3357, "step": 298840 }, { "epoch": 1.1552705231092761, "grad_norm": 0.10375026613473892, "learning_rate": 0.002, "loss": 2.3164, "step": 298850 }, { "epoch": 1.1553091803126594, "grad_norm": 0.10422974824905396, "learning_rate": 0.002, "loss": 2.3364, "step": 298860 }, { "epoch": 1.1553478375160426, "grad_norm": 0.11368890106678009, "learning_rate": 0.002, "loss": 2.3408, "step": 298870 }, { "epoch": 1.155386494719426, "grad_norm": 0.1095462292432785, "learning_rate": 0.002, "loss": 2.3311, "step": 298880 }, { "epoch": 1.1554251519228094, "grad_norm": 0.11351530253887177, "learning_rate": 0.002, "loss": 2.3379, "step": 298890 }, { "epoch": 1.1554638091261926, "grad_norm": 0.11732436716556549, "learning_rate": 0.002, "loss": 2.3525, "step": 298900 }, { "epoch": 1.1555024663295759, "grad_norm": 0.115448959171772, "learning_rate": 0.002, "loss": 2.3421, "step": 298910 }, { "epoch": 1.1555411235329591, "grad_norm": 0.1171172708272934, "learning_rate": 0.002, "loss": 2.3382, "step": 298920 }, { "epoch": 1.1555797807363424, "grad_norm": 0.15639221668243408, "learning_rate": 0.002, "loss": 2.3304, "step": 298930 }, { "epoch": 1.1556184379397256, "grad_norm": 0.10376555472612381, "learning_rate": 0.002, "loss": 2.3299, "step": 298940 }, { "epoch": 1.1556570951431089, "grad_norm": 0.10352832078933716, "learning_rate": 0.002, "loss": 2.3364, "step": 298950 }, { "epoch": 1.1556957523464924, "grad_norm": 0.10156875848770142, "learning_rate": 0.002, "loss": 2.3443, "step": 298960 }, { "epoch": 1.1557344095498756, "grad_norm": 0.08658002316951752, "learning_rate": 0.002, "loss": 2.3401, "step": 298970 }, { "epoch": 1.1557730667532589, "grad_norm": 0.1257506012916565, "learning_rate": 0.002, "loss": 2.3347, "step": 298980 }, { "epoch": 1.1558117239566421, "grad_norm": 0.11586952954530716, "learning_rate": 0.002, "loss": 2.3325, "step": 298990 }, { "epoch": 1.1558503811600254, "grad_norm": 0.10772280395030975, "learning_rate": 0.002, "loss": 2.3489, "step": 299000 }, { "epoch": 1.1558890383634086, "grad_norm": 0.10585318505764008, "learning_rate": 0.002, "loss": 2.3322, "step": 299010 }, { "epoch": 1.1559276955667919, "grad_norm": 0.0990624874830246, "learning_rate": 0.002, "loss": 2.3264, "step": 299020 }, { "epoch": 1.1559663527701751, "grad_norm": 0.1242714673280716, "learning_rate": 0.002, "loss": 2.3347, "step": 299030 }, { "epoch": 1.1560050099735584, "grad_norm": 0.12910033762454987, "learning_rate": 0.002, "loss": 2.3336, "step": 299040 }, { "epoch": 1.1560436671769418, "grad_norm": 0.09899185597896576, "learning_rate": 0.002, "loss": 2.3415, "step": 299050 }, { "epoch": 1.156082324380325, "grad_norm": 0.11427902430295944, "learning_rate": 0.002, "loss": 2.3334, "step": 299060 }, { "epoch": 1.1561209815837084, "grad_norm": 0.27933862805366516, "learning_rate": 0.002, "loss": 2.3376, "step": 299070 }, { "epoch": 1.1561596387870916, "grad_norm": 0.12222559005022049, "learning_rate": 0.002, "loss": 2.3356, "step": 299080 }, { "epoch": 1.1561982959904749, "grad_norm": 0.09958814084529877, "learning_rate": 0.002, "loss": 2.3487, "step": 299090 }, { "epoch": 1.156236953193858, "grad_norm": 0.10184434056282043, "learning_rate": 0.002, "loss": 2.3375, "step": 299100 }, { "epoch": 1.1562756103972414, "grad_norm": 0.10524575412273407, "learning_rate": 0.002, "loss": 2.3322, "step": 299110 }, { "epoch": 1.1563142676006246, "grad_norm": 0.10867523401975632, "learning_rate": 0.002, "loss": 2.327, "step": 299120 }, { "epoch": 1.156352924804008, "grad_norm": 0.11048688739538193, "learning_rate": 0.002, "loss": 2.3255, "step": 299130 }, { "epoch": 1.1563915820073913, "grad_norm": 0.09732969850301743, "learning_rate": 0.002, "loss": 2.3358, "step": 299140 }, { "epoch": 1.1564302392107746, "grad_norm": 0.0917268842458725, "learning_rate": 0.002, "loss": 2.3298, "step": 299150 }, { "epoch": 1.1564688964141578, "grad_norm": 0.10696238279342651, "learning_rate": 0.002, "loss": 2.3448, "step": 299160 }, { "epoch": 1.156507553617541, "grad_norm": 0.10692931711673737, "learning_rate": 0.002, "loss": 2.33, "step": 299170 }, { "epoch": 1.1565462108209243, "grad_norm": 0.10884692519903183, "learning_rate": 0.002, "loss": 2.3413, "step": 299180 }, { "epoch": 1.1565848680243076, "grad_norm": 0.11779770255088806, "learning_rate": 0.002, "loss": 2.3425, "step": 299190 }, { "epoch": 1.1566235252276909, "grad_norm": 0.09254894405603409, "learning_rate": 0.002, "loss": 2.3427, "step": 299200 }, { "epoch": 1.156662182431074, "grad_norm": 0.10647129267454147, "learning_rate": 0.002, "loss": 2.3345, "step": 299210 }, { "epoch": 1.1567008396344576, "grad_norm": 0.11991509050130844, "learning_rate": 0.002, "loss": 2.3201, "step": 299220 }, { "epoch": 1.1567394968378408, "grad_norm": 0.11554388701915741, "learning_rate": 0.002, "loss": 2.3302, "step": 299230 }, { "epoch": 1.156778154041224, "grad_norm": 0.10474992543458939, "learning_rate": 0.002, "loss": 2.3397, "step": 299240 }, { "epoch": 1.1568168112446073, "grad_norm": 0.11388864368200302, "learning_rate": 0.002, "loss": 2.3149, "step": 299250 }, { "epoch": 1.1568554684479906, "grad_norm": 0.10299711674451828, "learning_rate": 0.002, "loss": 2.3372, "step": 299260 }, { "epoch": 1.1568941256513738, "grad_norm": 0.14622846245765686, "learning_rate": 0.002, "loss": 2.3488, "step": 299270 }, { "epoch": 1.156932782854757, "grad_norm": 0.09922489523887634, "learning_rate": 0.002, "loss": 2.3417, "step": 299280 }, { "epoch": 1.1569714400581403, "grad_norm": 0.11386466026306152, "learning_rate": 0.002, "loss": 2.3409, "step": 299290 }, { "epoch": 1.1570100972615238, "grad_norm": 0.10140512138605118, "learning_rate": 0.002, "loss": 2.3393, "step": 299300 }, { "epoch": 1.157048754464907, "grad_norm": 0.10422612726688385, "learning_rate": 0.002, "loss": 2.33, "step": 299310 }, { "epoch": 1.1570874116682903, "grad_norm": 0.10468500852584839, "learning_rate": 0.002, "loss": 2.332, "step": 299320 }, { "epoch": 1.1571260688716736, "grad_norm": 0.10063187032938004, "learning_rate": 0.002, "loss": 2.3325, "step": 299330 }, { "epoch": 1.1571647260750568, "grad_norm": 0.09477391839027405, "learning_rate": 0.002, "loss": 2.3277, "step": 299340 }, { "epoch": 1.15720338327844, "grad_norm": 0.09196572005748749, "learning_rate": 0.002, "loss": 2.3322, "step": 299350 }, { "epoch": 1.1572420404818233, "grad_norm": 0.10172398388385773, "learning_rate": 0.002, "loss": 2.3501, "step": 299360 }, { "epoch": 1.1572806976852066, "grad_norm": 0.1202421635389328, "learning_rate": 0.002, "loss": 2.3181, "step": 299370 }, { "epoch": 1.1573193548885898, "grad_norm": 0.10497753322124481, "learning_rate": 0.002, "loss": 2.3383, "step": 299380 }, { "epoch": 1.1573580120919733, "grad_norm": 0.12195313721895218, "learning_rate": 0.002, "loss": 2.3232, "step": 299390 }, { "epoch": 1.1573966692953566, "grad_norm": 0.09654326736927032, "learning_rate": 0.002, "loss": 2.3483, "step": 299400 }, { "epoch": 1.1574353264987398, "grad_norm": 0.09133104979991913, "learning_rate": 0.002, "loss": 2.3234, "step": 299410 }, { "epoch": 1.157473983702123, "grad_norm": 0.11477682739496231, "learning_rate": 0.002, "loss": 2.3486, "step": 299420 }, { "epoch": 1.1575126409055063, "grad_norm": 0.10247191786766052, "learning_rate": 0.002, "loss": 2.3336, "step": 299430 }, { "epoch": 1.1575512981088896, "grad_norm": 0.10641482472419739, "learning_rate": 0.002, "loss": 2.333, "step": 299440 }, { "epoch": 1.1575899553122728, "grad_norm": 0.2622292637825012, "learning_rate": 0.002, "loss": 2.3442, "step": 299450 }, { "epoch": 1.157628612515656, "grad_norm": 0.7004684209823608, "learning_rate": 0.002, "loss": 2.3476, "step": 299460 }, { "epoch": 1.1576672697190395, "grad_norm": 0.11828330159187317, "learning_rate": 0.002, "loss": 2.3329, "step": 299470 }, { "epoch": 1.1577059269224228, "grad_norm": 0.09065783023834229, "learning_rate": 0.002, "loss": 2.33, "step": 299480 }, { "epoch": 1.157744584125806, "grad_norm": 0.10334282368421555, "learning_rate": 0.002, "loss": 2.3139, "step": 299490 }, { "epoch": 1.1577832413291893, "grad_norm": 0.11078079044818878, "learning_rate": 0.002, "loss": 2.3276, "step": 299500 }, { "epoch": 1.1578218985325726, "grad_norm": 0.10222682356834412, "learning_rate": 0.002, "loss": 2.3377, "step": 299510 }, { "epoch": 1.1578605557359558, "grad_norm": 0.09242222458124161, "learning_rate": 0.002, "loss": 2.3472, "step": 299520 }, { "epoch": 1.157899212939339, "grad_norm": 0.13665029406547546, "learning_rate": 0.002, "loss": 2.3293, "step": 299530 }, { "epoch": 1.1579378701427223, "grad_norm": 0.11644630134105682, "learning_rate": 0.002, "loss": 2.3345, "step": 299540 }, { "epoch": 1.1579765273461056, "grad_norm": 0.10311318188905716, "learning_rate": 0.002, "loss": 2.3359, "step": 299550 }, { "epoch": 1.158015184549489, "grad_norm": 0.0962674617767334, "learning_rate": 0.002, "loss": 2.3232, "step": 299560 }, { "epoch": 1.1580538417528723, "grad_norm": 0.10548513382673264, "learning_rate": 0.002, "loss": 2.3414, "step": 299570 }, { "epoch": 1.1580924989562555, "grad_norm": 0.10236866027116776, "learning_rate": 0.002, "loss": 2.3341, "step": 299580 }, { "epoch": 1.1581311561596388, "grad_norm": 0.09971366077661514, "learning_rate": 0.002, "loss": 2.3425, "step": 299590 }, { "epoch": 1.158169813363022, "grad_norm": 0.10626211017370224, "learning_rate": 0.002, "loss": 2.335, "step": 299600 }, { "epoch": 1.1582084705664053, "grad_norm": 0.11893949657678604, "learning_rate": 0.002, "loss": 2.332, "step": 299610 }, { "epoch": 1.1582471277697886, "grad_norm": 0.10046777874231339, "learning_rate": 0.002, "loss": 2.3515, "step": 299620 }, { "epoch": 1.1582857849731718, "grad_norm": 0.09620541334152222, "learning_rate": 0.002, "loss": 2.3465, "step": 299630 }, { "epoch": 1.1583244421765553, "grad_norm": 0.13971824944019318, "learning_rate": 0.002, "loss": 2.3463, "step": 299640 }, { "epoch": 1.1583630993799385, "grad_norm": 0.10550142079591751, "learning_rate": 0.002, "loss": 2.3267, "step": 299650 }, { "epoch": 1.1584017565833218, "grad_norm": 0.11489161103963852, "learning_rate": 0.002, "loss": 2.3343, "step": 299660 }, { "epoch": 1.158440413786705, "grad_norm": 0.09856657683849335, "learning_rate": 0.002, "loss": 2.333, "step": 299670 }, { "epoch": 1.1584790709900883, "grad_norm": 0.10443252325057983, "learning_rate": 0.002, "loss": 2.3627, "step": 299680 }, { "epoch": 1.1585177281934715, "grad_norm": 0.09363368898630142, "learning_rate": 0.002, "loss": 2.3492, "step": 299690 }, { "epoch": 1.1585563853968548, "grad_norm": 0.13245029747486115, "learning_rate": 0.002, "loss": 2.3368, "step": 299700 }, { "epoch": 1.158595042600238, "grad_norm": 0.12561266124248505, "learning_rate": 0.002, "loss": 2.3479, "step": 299710 }, { "epoch": 1.1586336998036213, "grad_norm": 0.0979650467634201, "learning_rate": 0.002, "loss": 2.3308, "step": 299720 }, { "epoch": 1.1586723570070048, "grad_norm": 0.10856011509895325, "learning_rate": 0.002, "loss": 2.3491, "step": 299730 }, { "epoch": 1.158711014210388, "grad_norm": 0.1100655347108841, "learning_rate": 0.002, "loss": 2.3425, "step": 299740 }, { "epoch": 1.1587496714137713, "grad_norm": 0.09626168012619019, "learning_rate": 0.002, "loss": 2.3344, "step": 299750 }, { "epoch": 1.1587883286171545, "grad_norm": 0.1044035255908966, "learning_rate": 0.002, "loss": 2.3299, "step": 299760 }, { "epoch": 1.1588269858205378, "grad_norm": 0.1007981076836586, "learning_rate": 0.002, "loss": 2.3426, "step": 299770 }, { "epoch": 1.158865643023921, "grad_norm": 0.10776758193969727, "learning_rate": 0.002, "loss": 2.3231, "step": 299780 }, { "epoch": 1.1589043002273043, "grad_norm": 0.11865066736936569, "learning_rate": 0.002, "loss": 2.3404, "step": 299790 }, { "epoch": 1.1589429574306878, "grad_norm": 0.09247344732284546, "learning_rate": 0.002, "loss": 2.3348, "step": 299800 }, { "epoch": 1.158981614634071, "grad_norm": 0.09969916939735413, "learning_rate": 0.002, "loss": 2.3312, "step": 299810 }, { "epoch": 1.1590202718374543, "grad_norm": 0.11441514641046524, "learning_rate": 0.002, "loss": 2.33, "step": 299820 }, { "epoch": 1.1590589290408375, "grad_norm": 0.12042112648487091, "learning_rate": 0.002, "loss": 2.35, "step": 299830 }, { "epoch": 1.1590975862442208, "grad_norm": 0.13869859278202057, "learning_rate": 0.002, "loss": 2.3365, "step": 299840 }, { "epoch": 1.159136243447604, "grad_norm": 0.09707216918468475, "learning_rate": 0.002, "loss": 2.3369, "step": 299850 }, { "epoch": 1.1591749006509873, "grad_norm": 0.11898298561573029, "learning_rate": 0.002, "loss": 2.3396, "step": 299860 }, { "epoch": 1.1592135578543705, "grad_norm": 0.09777060896158218, "learning_rate": 0.002, "loss": 2.3451, "step": 299870 }, { "epoch": 1.1592522150577538, "grad_norm": 0.10587567090988159, "learning_rate": 0.002, "loss": 2.3498, "step": 299880 }, { "epoch": 1.159290872261137, "grad_norm": 0.11703342199325562, "learning_rate": 0.002, "loss": 2.3206, "step": 299890 }, { "epoch": 1.1593295294645205, "grad_norm": 0.106162890791893, "learning_rate": 0.002, "loss": 2.3319, "step": 299900 }, { "epoch": 1.1593681866679038, "grad_norm": 0.11302954703569412, "learning_rate": 0.002, "loss": 2.3321, "step": 299910 }, { "epoch": 1.159406843871287, "grad_norm": 0.12867295742034912, "learning_rate": 0.002, "loss": 2.3239, "step": 299920 }, { "epoch": 1.1594455010746703, "grad_norm": 0.09764662384986877, "learning_rate": 0.002, "loss": 2.339, "step": 299930 }, { "epoch": 1.1594841582780535, "grad_norm": 0.09465903043746948, "learning_rate": 0.002, "loss": 2.3496, "step": 299940 }, { "epoch": 1.1595228154814368, "grad_norm": 0.10889971256256104, "learning_rate": 0.002, "loss": 2.3304, "step": 299950 }, { "epoch": 1.15956147268482, "grad_norm": 0.09882204234600067, "learning_rate": 0.002, "loss": 2.3398, "step": 299960 }, { "epoch": 1.1596001298882035, "grad_norm": 0.10860224068164825, "learning_rate": 0.002, "loss": 2.3342, "step": 299970 }, { "epoch": 1.1596387870915867, "grad_norm": 0.11030543595552444, "learning_rate": 0.002, "loss": 2.3388, "step": 299980 }, { "epoch": 1.15967744429497, "grad_norm": 0.1054888442158699, "learning_rate": 0.002, "loss": 2.336, "step": 299990 }, { "epoch": 1.1597161014983532, "grad_norm": 0.10351574420928955, "learning_rate": 0.002, "loss": 2.3593, "step": 300000 }, { "epoch": 1.1597547587017365, "grad_norm": 0.11095496267080307, "learning_rate": 0.002, "loss": 2.3296, "step": 300010 }, { "epoch": 1.1597934159051198, "grad_norm": 0.09909340739250183, "learning_rate": 0.002, "loss": 2.346, "step": 300020 }, { "epoch": 1.159832073108503, "grad_norm": 0.12616288661956787, "learning_rate": 0.002, "loss": 2.3325, "step": 300030 }, { "epoch": 1.1598707303118863, "grad_norm": 0.09794657677412033, "learning_rate": 0.002, "loss": 2.3368, "step": 300040 }, { "epoch": 1.1599093875152695, "grad_norm": 0.10776015371084213, "learning_rate": 0.002, "loss": 2.3423, "step": 300050 }, { "epoch": 1.1599480447186528, "grad_norm": 0.10282982885837555, "learning_rate": 0.002, "loss": 2.3382, "step": 300060 }, { "epoch": 1.1599867019220362, "grad_norm": 0.121549591422081, "learning_rate": 0.002, "loss": 2.3215, "step": 300070 }, { "epoch": 1.1600253591254195, "grad_norm": 0.1109287366271019, "learning_rate": 0.002, "loss": 2.3212, "step": 300080 }, { "epoch": 1.1600640163288027, "grad_norm": 0.08838099241256714, "learning_rate": 0.002, "loss": 2.3502, "step": 300090 }, { "epoch": 1.160102673532186, "grad_norm": 0.12162969261407852, "learning_rate": 0.002, "loss": 2.3358, "step": 300100 }, { "epoch": 1.1601413307355692, "grad_norm": 0.098938949406147, "learning_rate": 0.002, "loss": 2.3268, "step": 300110 }, { "epoch": 1.1601799879389525, "grad_norm": 0.09758912026882172, "learning_rate": 0.002, "loss": 2.342, "step": 300120 }, { "epoch": 1.1602186451423357, "grad_norm": 0.10965708643198013, "learning_rate": 0.002, "loss": 2.3337, "step": 300130 }, { "epoch": 1.1602573023457192, "grad_norm": 0.08661700785160065, "learning_rate": 0.002, "loss": 2.3285, "step": 300140 }, { "epoch": 1.1602959595491025, "grad_norm": 0.09226927161216736, "learning_rate": 0.002, "loss": 2.3275, "step": 300150 }, { "epoch": 1.1603346167524857, "grad_norm": 0.09487424045801163, "learning_rate": 0.002, "loss": 2.33, "step": 300160 }, { "epoch": 1.160373273955869, "grad_norm": 0.12119285762310028, "learning_rate": 0.002, "loss": 2.3413, "step": 300170 }, { "epoch": 1.1604119311592522, "grad_norm": 0.11557340621948242, "learning_rate": 0.002, "loss": 2.3348, "step": 300180 }, { "epoch": 1.1604505883626355, "grad_norm": 0.11036872118711472, "learning_rate": 0.002, "loss": 2.3257, "step": 300190 }, { "epoch": 1.1604892455660187, "grad_norm": 0.10435856133699417, "learning_rate": 0.002, "loss": 2.3267, "step": 300200 }, { "epoch": 1.160527902769402, "grad_norm": 0.10671596229076385, "learning_rate": 0.002, "loss": 2.3545, "step": 300210 }, { "epoch": 1.1605665599727852, "grad_norm": 0.14279340207576752, "learning_rate": 0.002, "loss": 2.34, "step": 300220 }, { "epoch": 1.1606052171761685, "grad_norm": 0.10160546004772186, "learning_rate": 0.002, "loss": 2.3227, "step": 300230 }, { "epoch": 1.160643874379552, "grad_norm": 0.10072829574346542, "learning_rate": 0.002, "loss": 2.3456, "step": 300240 }, { "epoch": 1.1606825315829352, "grad_norm": 0.10195444524288177, "learning_rate": 0.002, "loss": 2.3407, "step": 300250 }, { "epoch": 1.1607211887863185, "grad_norm": 0.12637607753276825, "learning_rate": 0.002, "loss": 2.3292, "step": 300260 }, { "epoch": 1.1607598459897017, "grad_norm": 0.10484391450881958, "learning_rate": 0.002, "loss": 2.3296, "step": 300270 }, { "epoch": 1.160798503193085, "grad_norm": 0.18085014820098877, "learning_rate": 0.002, "loss": 2.3246, "step": 300280 }, { "epoch": 1.1608371603964682, "grad_norm": 0.10291527211666107, "learning_rate": 0.002, "loss": 2.3268, "step": 300290 }, { "epoch": 1.1608758175998515, "grad_norm": 0.09656839817762375, "learning_rate": 0.002, "loss": 2.3407, "step": 300300 }, { "epoch": 1.160914474803235, "grad_norm": 0.08661667257547379, "learning_rate": 0.002, "loss": 2.3411, "step": 300310 }, { "epoch": 1.1609531320066182, "grad_norm": 0.11928417533636093, "learning_rate": 0.002, "loss": 2.3435, "step": 300320 }, { "epoch": 1.1609917892100015, "grad_norm": 0.09696167707443237, "learning_rate": 0.002, "loss": 2.3461, "step": 300330 }, { "epoch": 1.1610304464133847, "grad_norm": 0.11764390766620636, "learning_rate": 0.002, "loss": 2.3379, "step": 300340 }, { "epoch": 1.161069103616768, "grad_norm": 0.11150548607110977, "learning_rate": 0.002, "loss": 2.3346, "step": 300350 }, { "epoch": 1.1611077608201512, "grad_norm": 0.12289080023765564, "learning_rate": 0.002, "loss": 2.3257, "step": 300360 }, { "epoch": 1.1611464180235345, "grad_norm": 0.1032310277223587, "learning_rate": 0.002, "loss": 2.3394, "step": 300370 }, { "epoch": 1.1611850752269177, "grad_norm": 0.13653236627578735, "learning_rate": 0.002, "loss": 2.3384, "step": 300380 }, { "epoch": 1.161223732430301, "grad_norm": 0.10622701793909073, "learning_rate": 0.002, "loss": 2.349, "step": 300390 }, { "epoch": 1.1612623896336842, "grad_norm": 0.09135434776544571, "learning_rate": 0.002, "loss": 2.3407, "step": 300400 }, { "epoch": 1.1613010468370677, "grad_norm": 0.1155262440443039, "learning_rate": 0.002, "loss": 2.3455, "step": 300410 }, { "epoch": 1.161339704040451, "grad_norm": 0.09453856945037842, "learning_rate": 0.002, "loss": 2.3259, "step": 300420 }, { "epoch": 1.1613783612438342, "grad_norm": 0.08795636147260666, "learning_rate": 0.002, "loss": 2.3337, "step": 300430 }, { "epoch": 1.1614170184472175, "grad_norm": 0.14006325602531433, "learning_rate": 0.002, "loss": 2.335, "step": 300440 }, { "epoch": 1.1614556756506007, "grad_norm": 0.09395135939121246, "learning_rate": 0.002, "loss": 2.3306, "step": 300450 }, { "epoch": 1.161494332853984, "grad_norm": 0.1190577894449234, "learning_rate": 0.002, "loss": 2.3296, "step": 300460 }, { "epoch": 1.1615329900573672, "grad_norm": 0.10218417644500732, "learning_rate": 0.002, "loss": 2.3368, "step": 300470 }, { "epoch": 1.1615716472607507, "grad_norm": 0.12376347929239273, "learning_rate": 0.002, "loss": 2.3339, "step": 300480 }, { "epoch": 1.161610304464134, "grad_norm": 0.10159707069396973, "learning_rate": 0.002, "loss": 2.334, "step": 300490 }, { "epoch": 1.1616489616675172, "grad_norm": 0.09371952712535858, "learning_rate": 0.002, "loss": 2.3244, "step": 300500 }, { "epoch": 1.1616876188709004, "grad_norm": 0.11684858053922653, "learning_rate": 0.002, "loss": 2.3486, "step": 300510 }, { "epoch": 1.1617262760742837, "grad_norm": 0.0982179343700409, "learning_rate": 0.002, "loss": 2.3216, "step": 300520 }, { "epoch": 1.161764933277667, "grad_norm": 0.0905216634273529, "learning_rate": 0.002, "loss": 2.3531, "step": 300530 }, { "epoch": 1.1618035904810502, "grad_norm": 0.12256705015897751, "learning_rate": 0.002, "loss": 2.3314, "step": 300540 }, { "epoch": 1.1618422476844334, "grad_norm": 0.10113391280174255, "learning_rate": 0.002, "loss": 2.3356, "step": 300550 }, { "epoch": 1.1618809048878167, "grad_norm": 0.09468615800142288, "learning_rate": 0.002, "loss": 2.3377, "step": 300560 }, { "epoch": 1.1619195620912, "grad_norm": 0.09727149456739426, "learning_rate": 0.002, "loss": 2.3413, "step": 300570 }, { "epoch": 1.1619582192945834, "grad_norm": 0.09404943883419037, "learning_rate": 0.002, "loss": 2.3337, "step": 300580 }, { "epoch": 1.1619968764979667, "grad_norm": 0.1004868894815445, "learning_rate": 0.002, "loss": 2.3199, "step": 300590 }, { "epoch": 1.16203553370135, "grad_norm": 0.12360697239637375, "learning_rate": 0.002, "loss": 2.3186, "step": 300600 }, { "epoch": 1.1620741909047332, "grad_norm": 0.13018663227558136, "learning_rate": 0.002, "loss": 2.3421, "step": 300610 }, { "epoch": 1.1621128481081164, "grad_norm": 0.09137805551290512, "learning_rate": 0.002, "loss": 2.3303, "step": 300620 }, { "epoch": 1.1621515053114997, "grad_norm": 0.10897412896156311, "learning_rate": 0.002, "loss": 2.3422, "step": 300630 }, { "epoch": 1.162190162514883, "grad_norm": 0.09854017198085785, "learning_rate": 0.002, "loss": 2.3287, "step": 300640 }, { "epoch": 1.1622288197182664, "grad_norm": 0.10869092494249344, "learning_rate": 0.002, "loss": 2.3289, "step": 300650 }, { "epoch": 1.1622674769216497, "grad_norm": 0.1176285594701767, "learning_rate": 0.002, "loss": 2.3336, "step": 300660 }, { "epoch": 1.162306134125033, "grad_norm": 0.13885082304477692, "learning_rate": 0.002, "loss": 2.3471, "step": 300670 }, { "epoch": 1.1623447913284162, "grad_norm": 0.10269021987915039, "learning_rate": 0.002, "loss": 2.3408, "step": 300680 }, { "epoch": 1.1623834485317994, "grad_norm": 0.11646575480699539, "learning_rate": 0.002, "loss": 2.3291, "step": 300690 }, { "epoch": 1.1624221057351827, "grad_norm": 0.11593282222747803, "learning_rate": 0.002, "loss": 2.354, "step": 300700 }, { "epoch": 1.162460762938566, "grad_norm": 0.10426647216081619, "learning_rate": 0.002, "loss": 2.3271, "step": 300710 }, { "epoch": 1.1624994201419492, "grad_norm": 0.09929317235946655, "learning_rate": 0.002, "loss": 2.3346, "step": 300720 }, { "epoch": 1.1625380773453324, "grad_norm": 0.09835363924503326, "learning_rate": 0.002, "loss": 2.3292, "step": 300730 }, { "epoch": 1.1625767345487157, "grad_norm": 0.10210787504911423, "learning_rate": 0.002, "loss": 2.3415, "step": 300740 }, { "epoch": 1.1626153917520992, "grad_norm": 0.121326744556427, "learning_rate": 0.002, "loss": 2.3366, "step": 300750 }, { "epoch": 1.1626540489554824, "grad_norm": 0.11139842122793198, "learning_rate": 0.002, "loss": 2.336, "step": 300760 }, { "epoch": 1.1626927061588657, "grad_norm": 0.11265687644481659, "learning_rate": 0.002, "loss": 2.3394, "step": 300770 }, { "epoch": 1.162731363362249, "grad_norm": 0.10704000294208527, "learning_rate": 0.002, "loss": 2.3224, "step": 300780 }, { "epoch": 1.1627700205656322, "grad_norm": 0.12270093709230423, "learning_rate": 0.002, "loss": 2.3352, "step": 300790 }, { "epoch": 1.1628086777690154, "grad_norm": 0.10371486097574234, "learning_rate": 0.002, "loss": 2.3379, "step": 300800 }, { "epoch": 1.1628473349723987, "grad_norm": 0.10047317296266556, "learning_rate": 0.002, "loss": 2.3364, "step": 300810 }, { "epoch": 1.1628859921757821, "grad_norm": 0.10786613076925278, "learning_rate": 0.002, "loss": 2.3416, "step": 300820 }, { "epoch": 1.1629246493791654, "grad_norm": 0.1134653314948082, "learning_rate": 0.002, "loss": 2.3374, "step": 300830 }, { "epoch": 1.1629633065825487, "grad_norm": 0.16030678153038025, "learning_rate": 0.002, "loss": 2.3331, "step": 300840 }, { "epoch": 1.163001963785932, "grad_norm": 0.12873561680316925, "learning_rate": 0.002, "loss": 2.3447, "step": 300850 }, { "epoch": 1.1630406209893152, "grad_norm": 0.09747027605772018, "learning_rate": 0.002, "loss": 2.3239, "step": 300860 }, { "epoch": 1.1630792781926984, "grad_norm": 0.1002790704369545, "learning_rate": 0.002, "loss": 2.3235, "step": 300870 }, { "epoch": 1.1631179353960817, "grad_norm": 0.1007324680685997, "learning_rate": 0.002, "loss": 2.3394, "step": 300880 }, { "epoch": 1.163156592599465, "grad_norm": 0.09922447055578232, "learning_rate": 0.002, "loss": 2.3237, "step": 300890 }, { "epoch": 1.1631952498028482, "grad_norm": 0.09478519856929779, "learning_rate": 0.002, "loss": 2.3421, "step": 300900 }, { "epoch": 1.1632339070062316, "grad_norm": 0.12346886843442917, "learning_rate": 0.002, "loss": 2.3306, "step": 300910 }, { "epoch": 1.163272564209615, "grad_norm": 0.1043015792965889, "learning_rate": 0.002, "loss": 2.3575, "step": 300920 }, { "epoch": 1.1633112214129981, "grad_norm": 0.09666992723941803, "learning_rate": 0.002, "loss": 2.3358, "step": 300930 }, { "epoch": 1.1633498786163814, "grad_norm": 0.0899510309100151, "learning_rate": 0.002, "loss": 2.3327, "step": 300940 }, { "epoch": 1.1633885358197646, "grad_norm": 0.12516461312770844, "learning_rate": 0.002, "loss": 2.3231, "step": 300950 }, { "epoch": 1.163427193023148, "grad_norm": 0.10826769471168518, "learning_rate": 0.002, "loss": 2.3463, "step": 300960 }, { "epoch": 1.1634658502265312, "grad_norm": 0.11351073533296585, "learning_rate": 0.002, "loss": 2.3352, "step": 300970 }, { "epoch": 1.1635045074299144, "grad_norm": 0.1089121550321579, "learning_rate": 0.002, "loss": 2.3366, "step": 300980 }, { "epoch": 1.1635431646332979, "grad_norm": 0.10068459808826447, "learning_rate": 0.002, "loss": 2.3396, "step": 300990 }, { "epoch": 1.1635818218366811, "grad_norm": 0.1085510179400444, "learning_rate": 0.002, "loss": 2.3367, "step": 301000 }, { "epoch": 1.1636204790400644, "grad_norm": 0.09401760250329971, "learning_rate": 0.002, "loss": 2.3459, "step": 301010 }, { "epoch": 1.1636591362434476, "grad_norm": 0.1090821698307991, "learning_rate": 0.002, "loss": 2.3267, "step": 301020 }, { "epoch": 1.1636977934468309, "grad_norm": 0.10806293040513992, "learning_rate": 0.002, "loss": 2.3474, "step": 301030 }, { "epoch": 1.1637364506502141, "grad_norm": 0.13532325625419617, "learning_rate": 0.002, "loss": 2.3309, "step": 301040 }, { "epoch": 1.1637751078535974, "grad_norm": 0.09232982993125916, "learning_rate": 0.002, "loss": 2.3371, "step": 301050 }, { "epoch": 1.1638137650569806, "grad_norm": 0.0929589793086052, "learning_rate": 0.002, "loss": 2.3421, "step": 301060 }, { "epoch": 1.163852422260364, "grad_norm": 0.11074736714363098, "learning_rate": 0.002, "loss": 2.3245, "step": 301070 }, { "epoch": 1.1638910794637474, "grad_norm": 0.11797834932804108, "learning_rate": 0.002, "loss": 2.32, "step": 301080 }, { "epoch": 1.1639297366671306, "grad_norm": 0.0900818407535553, "learning_rate": 0.002, "loss": 2.3262, "step": 301090 }, { "epoch": 1.1639683938705139, "grad_norm": 0.11992789804935455, "learning_rate": 0.002, "loss": 2.3365, "step": 301100 }, { "epoch": 1.1640070510738971, "grad_norm": 0.1221378892660141, "learning_rate": 0.002, "loss": 2.3411, "step": 301110 }, { "epoch": 1.1640457082772804, "grad_norm": 0.09956325590610504, "learning_rate": 0.002, "loss": 2.3416, "step": 301120 }, { "epoch": 1.1640843654806636, "grad_norm": 0.10660485178232193, "learning_rate": 0.002, "loss": 2.3432, "step": 301130 }, { "epoch": 1.1641230226840469, "grad_norm": 0.09304320812225342, "learning_rate": 0.002, "loss": 2.3277, "step": 301140 }, { "epoch": 1.1641616798874301, "grad_norm": 0.10756511986255646, "learning_rate": 0.002, "loss": 2.3295, "step": 301150 }, { "epoch": 1.1642003370908136, "grad_norm": 0.10929480940103531, "learning_rate": 0.002, "loss": 2.3262, "step": 301160 }, { "epoch": 1.1642389942941969, "grad_norm": 0.09262862801551819, "learning_rate": 0.002, "loss": 2.3366, "step": 301170 }, { "epoch": 1.1642776514975801, "grad_norm": 0.12419795989990234, "learning_rate": 0.002, "loss": 2.3443, "step": 301180 }, { "epoch": 1.1643163087009634, "grad_norm": 0.11665025353431702, "learning_rate": 0.002, "loss": 2.3404, "step": 301190 }, { "epoch": 1.1643549659043466, "grad_norm": 0.10153454542160034, "learning_rate": 0.002, "loss": 2.3292, "step": 301200 }, { "epoch": 1.1643936231077299, "grad_norm": 0.10074479132890701, "learning_rate": 0.002, "loss": 2.3434, "step": 301210 }, { "epoch": 1.1644322803111131, "grad_norm": 0.10060901939868927, "learning_rate": 0.002, "loss": 2.3402, "step": 301220 }, { "epoch": 1.1644709375144964, "grad_norm": 0.10998406261205673, "learning_rate": 0.002, "loss": 2.3266, "step": 301230 }, { "epoch": 1.1645095947178796, "grad_norm": 0.09824924916028976, "learning_rate": 0.002, "loss": 2.3375, "step": 301240 }, { "epoch": 1.164548251921263, "grad_norm": 0.09302695840597153, "learning_rate": 0.002, "loss": 2.3577, "step": 301250 }, { "epoch": 1.1645869091246464, "grad_norm": 0.1218177080154419, "learning_rate": 0.002, "loss": 2.335, "step": 301260 }, { "epoch": 1.1646255663280296, "grad_norm": 0.10029064863920212, "learning_rate": 0.002, "loss": 2.3395, "step": 301270 }, { "epoch": 1.1646642235314129, "grad_norm": 0.17595061659812927, "learning_rate": 0.002, "loss": 2.3363, "step": 301280 }, { "epoch": 1.164702880734796, "grad_norm": 0.0965089499950409, "learning_rate": 0.002, "loss": 2.3482, "step": 301290 }, { "epoch": 1.1647415379381794, "grad_norm": 0.1053343340754509, "learning_rate": 0.002, "loss": 2.3231, "step": 301300 }, { "epoch": 1.1647801951415626, "grad_norm": 0.13139332830905914, "learning_rate": 0.002, "loss": 2.3402, "step": 301310 }, { "epoch": 1.1648188523449459, "grad_norm": 0.10064920783042908, "learning_rate": 0.002, "loss": 2.3286, "step": 301320 }, { "epoch": 1.1648575095483293, "grad_norm": 0.10264275968074799, "learning_rate": 0.002, "loss": 2.3255, "step": 301330 }, { "epoch": 1.1648961667517126, "grad_norm": 0.1274469792842865, "learning_rate": 0.002, "loss": 2.348, "step": 301340 }, { "epoch": 1.1649348239550958, "grad_norm": 0.09462037682533264, "learning_rate": 0.002, "loss": 2.3348, "step": 301350 }, { "epoch": 1.164973481158479, "grad_norm": 0.11224725842475891, "learning_rate": 0.002, "loss": 2.3557, "step": 301360 }, { "epoch": 1.1650121383618623, "grad_norm": 0.12100022286176682, "learning_rate": 0.002, "loss": 2.3409, "step": 301370 }, { "epoch": 1.1650507955652456, "grad_norm": 0.08697990328073502, "learning_rate": 0.002, "loss": 2.3385, "step": 301380 }, { "epoch": 1.1650894527686289, "grad_norm": 0.1286000907421112, "learning_rate": 0.002, "loss": 2.331, "step": 301390 }, { "epoch": 1.165128109972012, "grad_norm": 0.10235783457756042, "learning_rate": 0.002, "loss": 2.3309, "step": 301400 }, { "epoch": 1.1651667671753954, "grad_norm": 0.10327660292387009, "learning_rate": 0.002, "loss": 2.328, "step": 301410 }, { "epoch": 1.1652054243787788, "grad_norm": 0.09840571880340576, "learning_rate": 0.002, "loss": 2.3282, "step": 301420 }, { "epoch": 1.165244081582162, "grad_norm": 0.10375815629959106, "learning_rate": 0.002, "loss": 2.3429, "step": 301430 }, { "epoch": 1.1652827387855453, "grad_norm": 0.11783238500356674, "learning_rate": 0.002, "loss": 2.3483, "step": 301440 }, { "epoch": 1.1653213959889286, "grad_norm": 0.10080355405807495, "learning_rate": 0.002, "loss": 2.3442, "step": 301450 }, { "epoch": 1.1653600531923118, "grad_norm": 0.11816217750310898, "learning_rate": 0.002, "loss": 2.3378, "step": 301460 }, { "epoch": 1.165398710395695, "grad_norm": 0.10482607036828995, "learning_rate": 0.002, "loss": 2.3127, "step": 301470 }, { "epoch": 1.1654373675990783, "grad_norm": 0.11168037354946136, "learning_rate": 0.002, "loss": 2.3412, "step": 301480 }, { "epoch": 1.1654760248024616, "grad_norm": 0.09168479591608047, "learning_rate": 0.002, "loss": 2.3304, "step": 301490 }, { "epoch": 1.165514682005845, "grad_norm": 0.10162169486284256, "learning_rate": 0.002, "loss": 2.3483, "step": 301500 }, { "epoch": 1.1655533392092283, "grad_norm": 0.12645724415779114, "learning_rate": 0.002, "loss": 2.3503, "step": 301510 }, { "epoch": 1.1655919964126116, "grad_norm": 0.1105879694223404, "learning_rate": 0.002, "loss": 2.3381, "step": 301520 }, { "epoch": 1.1656306536159948, "grad_norm": 0.10556762665510178, "learning_rate": 0.002, "loss": 2.3322, "step": 301530 }, { "epoch": 1.165669310819378, "grad_norm": 0.1095210388302803, "learning_rate": 0.002, "loss": 2.343, "step": 301540 }, { "epoch": 1.1657079680227613, "grad_norm": 0.11712250858545303, "learning_rate": 0.002, "loss": 2.3295, "step": 301550 }, { "epoch": 1.1657466252261446, "grad_norm": 0.11337719112634659, "learning_rate": 0.002, "loss": 2.3283, "step": 301560 }, { "epoch": 1.1657852824295278, "grad_norm": 0.1021040752530098, "learning_rate": 0.002, "loss": 2.342, "step": 301570 }, { "epoch": 1.165823939632911, "grad_norm": 0.09224560111761093, "learning_rate": 0.002, "loss": 2.326, "step": 301580 }, { "epoch": 1.1658625968362946, "grad_norm": 0.1109401062130928, "learning_rate": 0.002, "loss": 2.3461, "step": 301590 }, { "epoch": 1.1659012540396778, "grad_norm": 0.08976784348487854, "learning_rate": 0.002, "loss": 2.3389, "step": 301600 }, { "epoch": 1.165939911243061, "grad_norm": 0.10225795954465866, "learning_rate": 0.002, "loss": 2.3351, "step": 301610 }, { "epoch": 1.1659785684464443, "grad_norm": 0.12347820401191711, "learning_rate": 0.002, "loss": 2.3321, "step": 301620 }, { "epoch": 1.1660172256498276, "grad_norm": 0.17781856656074524, "learning_rate": 0.002, "loss": 2.3307, "step": 301630 }, { "epoch": 1.1660558828532108, "grad_norm": 0.10366151481866837, "learning_rate": 0.002, "loss": 2.3415, "step": 301640 }, { "epoch": 1.166094540056594, "grad_norm": 0.10425670444965363, "learning_rate": 0.002, "loss": 2.3373, "step": 301650 }, { "epoch": 1.1661331972599773, "grad_norm": 0.10662340372800827, "learning_rate": 0.002, "loss": 2.3412, "step": 301660 }, { "epoch": 1.1661718544633608, "grad_norm": 0.10434211045503616, "learning_rate": 0.002, "loss": 2.3608, "step": 301670 }, { "epoch": 1.166210511666744, "grad_norm": 0.09581685811281204, "learning_rate": 0.002, "loss": 2.3314, "step": 301680 }, { "epoch": 1.1662491688701273, "grad_norm": 0.0873362123966217, "learning_rate": 0.002, "loss": 2.3539, "step": 301690 }, { "epoch": 1.1662878260735106, "grad_norm": 0.1252460479736328, "learning_rate": 0.002, "loss": 2.3449, "step": 301700 }, { "epoch": 1.1663264832768938, "grad_norm": 0.119962178170681, "learning_rate": 0.002, "loss": 2.3223, "step": 301710 }, { "epoch": 1.166365140480277, "grad_norm": 0.09406163543462753, "learning_rate": 0.002, "loss": 2.3328, "step": 301720 }, { "epoch": 1.1664037976836603, "grad_norm": 0.10572811961174011, "learning_rate": 0.002, "loss": 2.33, "step": 301730 }, { "epoch": 1.1664424548870436, "grad_norm": 0.10888563841581345, "learning_rate": 0.002, "loss": 2.3285, "step": 301740 }, { "epoch": 1.1664811120904268, "grad_norm": 0.0975589007139206, "learning_rate": 0.002, "loss": 2.3368, "step": 301750 }, { "epoch": 1.1665197692938103, "grad_norm": 0.11016840487718582, "learning_rate": 0.002, "loss": 2.3351, "step": 301760 }, { "epoch": 1.1665584264971935, "grad_norm": 0.12181409448385239, "learning_rate": 0.002, "loss": 2.3461, "step": 301770 }, { "epoch": 1.1665970837005768, "grad_norm": 0.10931305587291718, "learning_rate": 0.002, "loss": 2.3251, "step": 301780 }, { "epoch": 1.16663574090396, "grad_norm": 0.11234242469072342, "learning_rate": 0.002, "loss": 2.3492, "step": 301790 }, { "epoch": 1.1666743981073433, "grad_norm": 0.09115167707204819, "learning_rate": 0.002, "loss": 2.3285, "step": 301800 }, { "epoch": 1.1667130553107266, "grad_norm": 0.15512128174304962, "learning_rate": 0.002, "loss": 2.3382, "step": 301810 }, { "epoch": 1.1667517125141098, "grad_norm": 0.11527229100465775, "learning_rate": 0.002, "loss": 2.3193, "step": 301820 }, { "epoch": 1.1667903697174933, "grad_norm": 0.10287356376647949, "learning_rate": 0.002, "loss": 2.3502, "step": 301830 }, { "epoch": 1.1668290269208765, "grad_norm": 0.09924790263175964, "learning_rate": 0.002, "loss": 2.3363, "step": 301840 }, { "epoch": 1.1668676841242598, "grad_norm": 0.11103297024965286, "learning_rate": 0.002, "loss": 2.3426, "step": 301850 }, { "epoch": 1.166906341327643, "grad_norm": 0.10338755697011948, "learning_rate": 0.002, "loss": 2.3401, "step": 301860 }, { "epoch": 1.1669449985310263, "grad_norm": 0.11973705142736435, "learning_rate": 0.002, "loss": 2.3246, "step": 301870 }, { "epoch": 1.1669836557344095, "grad_norm": 0.09499425441026688, "learning_rate": 0.002, "loss": 2.3378, "step": 301880 }, { "epoch": 1.1670223129377928, "grad_norm": 0.09334953129291534, "learning_rate": 0.002, "loss": 2.3464, "step": 301890 }, { "epoch": 1.167060970141176, "grad_norm": 0.12852221727371216, "learning_rate": 0.002, "loss": 2.3473, "step": 301900 }, { "epoch": 1.1670996273445593, "grad_norm": 0.12621502578258514, "learning_rate": 0.002, "loss": 2.3315, "step": 301910 }, { "epoch": 1.1671382845479426, "grad_norm": 0.09067445993423462, "learning_rate": 0.002, "loss": 2.3315, "step": 301920 }, { "epoch": 1.167176941751326, "grad_norm": 0.09515558183193207, "learning_rate": 0.002, "loss": 2.3556, "step": 301930 }, { "epoch": 1.1672155989547093, "grad_norm": 0.10094108432531357, "learning_rate": 0.002, "loss": 2.348, "step": 301940 }, { "epoch": 1.1672542561580925, "grad_norm": 0.10715378075838089, "learning_rate": 0.002, "loss": 2.3278, "step": 301950 }, { "epoch": 1.1672929133614758, "grad_norm": 0.11361619085073471, "learning_rate": 0.002, "loss": 2.3472, "step": 301960 }, { "epoch": 1.167331570564859, "grad_norm": 0.13124187290668488, "learning_rate": 0.002, "loss": 2.3448, "step": 301970 }, { "epoch": 1.1673702277682423, "grad_norm": 0.11959458887577057, "learning_rate": 0.002, "loss": 2.3262, "step": 301980 }, { "epoch": 1.1674088849716255, "grad_norm": 0.09688874334096909, "learning_rate": 0.002, "loss": 2.3355, "step": 301990 }, { "epoch": 1.167447542175009, "grad_norm": 0.10897421091794968, "learning_rate": 0.002, "loss": 2.3314, "step": 302000 }, { "epoch": 1.1674861993783923, "grad_norm": 0.11167865991592407, "learning_rate": 0.002, "loss": 2.337, "step": 302010 }, { "epoch": 1.1675248565817755, "grad_norm": 0.0992354303598404, "learning_rate": 0.002, "loss": 2.3364, "step": 302020 }, { "epoch": 1.1675635137851588, "grad_norm": 0.10574154555797577, "learning_rate": 0.002, "loss": 2.3258, "step": 302030 }, { "epoch": 1.167602170988542, "grad_norm": 0.097483791410923, "learning_rate": 0.002, "loss": 2.3318, "step": 302040 }, { "epoch": 1.1676408281919253, "grad_norm": 0.10599792748689651, "learning_rate": 0.002, "loss": 2.3419, "step": 302050 }, { "epoch": 1.1676794853953085, "grad_norm": 0.11261613667011261, "learning_rate": 0.002, "loss": 2.3263, "step": 302060 }, { "epoch": 1.1677181425986918, "grad_norm": 0.12978191673755646, "learning_rate": 0.002, "loss": 2.3519, "step": 302070 }, { "epoch": 1.167756799802075, "grad_norm": 0.10361035168170929, "learning_rate": 0.002, "loss": 2.3168, "step": 302080 }, { "epoch": 1.1677954570054583, "grad_norm": 0.1083601862192154, "learning_rate": 0.002, "loss": 2.3244, "step": 302090 }, { "epoch": 1.1678341142088418, "grad_norm": 0.10080970823764801, "learning_rate": 0.002, "loss": 2.3518, "step": 302100 }, { "epoch": 1.167872771412225, "grad_norm": 0.09778130054473877, "learning_rate": 0.002, "loss": 2.3297, "step": 302110 }, { "epoch": 1.1679114286156083, "grad_norm": 0.11359573900699615, "learning_rate": 0.002, "loss": 2.3425, "step": 302120 }, { "epoch": 1.1679500858189915, "grad_norm": 0.09689043462276459, "learning_rate": 0.002, "loss": 2.3473, "step": 302130 }, { "epoch": 1.1679887430223748, "grad_norm": 0.09919068217277527, "learning_rate": 0.002, "loss": 2.3452, "step": 302140 }, { "epoch": 1.168027400225758, "grad_norm": 0.09681098163127899, "learning_rate": 0.002, "loss": 2.319, "step": 302150 }, { "epoch": 1.1680660574291413, "grad_norm": 0.09828266501426697, "learning_rate": 0.002, "loss": 2.3213, "step": 302160 }, { "epoch": 1.1681047146325247, "grad_norm": 0.12220165133476257, "learning_rate": 0.002, "loss": 2.3457, "step": 302170 }, { "epoch": 1.168143371835908, "grad_norm": 0.09539945423603058, "learning_rate": 0.002, "loss": 2.3323, "step": 302180 }, { "epoch": 1.1681820290392912, "grad_norm": 0.09313678741455078, "learning_rate": 0.002, "loss": 2.3201, "step": 302190 }, { "epoch": 1.1682206862426745, "grad_norm": 0.11383495479822159, "learning_rate": 0.002, "loss": 2.342, "step": 302200 }, { "epoch": 1.1682593434460578, "grad_norm": 0.10649847239255905, "learning_rate": 0.002, "loss": 2.3269, "step": 302210 }, { "epoch": 1.168298000649441, "grad_norm": 0.12820562720298767, "learning_rate": 0.002, "loss": 2.3367, "step": 302220 }, { "epoch": 1.1683366578528243, "grad_norm": 0.11359477043151855, "learning_rate": 0.002, "loss": 2.3308, "step": 302230 }, { "epoch": 1.1683753150562075, "grad_norm": 0.10154304653406143, "learning_rate": 0.002, "loss": 2.3418, "step": 302240 }, { "epoch": 1.1684139722595908, "grad_norm": 0.10479036718606949, "learning_rate": 0.002, "loss": 2.3321, "step": 302250 }, { "epoch": 1.168452629462974, "grad_norm": 0.15693366527557373, "learning_rate": 0.002, "loss": 2.3284, "step": 302260 }, { "epoch": 1.1684912866663575, "grad_norm": 0.09982585906982422, "learning_rate": 0.002, "loss": 2.3231, "step": 302270 }, { "epoch": 1.1685299438697407, "grad_norm": 0.10710987448692322, "learning_rate": 0.002, "loss": 2.3402, "step": 302280 }, { "epoch": 1.168568601073124, "grad_norm": 0.10061867535114288, "learning_rate": 0.002, "loss": 2.3345, "step": 302290 }, { "epoch": 1.1686072582765072, "grad_norm": 0.1065753772854805, "learning_rate": 0.002, "loss": 2.3392, "step": 302300 }, { "epoch": 1.1686459154798905, "grad_norm": 0.10224214941263199, "learning_rate": 0.002, "loss": 2.3302, "step": 302310 }, { "epoch": 1.1686845726832737, "grad_norm": 0.09375665336847305, "learning_rate": 0.002, "loss": 2.3438, "step": 302320 }, { "epoch": 1.168723229886657, "grad_norm": 0.08769812434911728, "learning_rate": 0.002, "loss": 2.3477, "step": 302330 }, { "epoch": 1.1687618870900405, "grad_norm": 0.10518988221883774, "learning_rate": 0.002, "loss": 2.3359, "step": 302340 }, { "epoch": 1.1688005442934237, "grad_norm": 0.137264683842659, "learning_rate": 0.002, "loss": 2.3495, "step": 302350 }, { "epoch": 1.168839201496807, "grad_norm": 0.12920604646205902, "learning_rate": 0.002, "loss": 2.3415, "step": 302360 }, { "epoch": 1.1688778587001902, "grad_norm": 0.1093267872929573, "learning_rate": 0.002, "loss": 2.354, "step": 302370 }, { "epoch": 1.1689165159035735, "grad_norm": 0.09726943075656891, "learning_rate": 0.002, "loss": 2.33, "step": 302380 }, { "epoch": 1.1689551731069567, "grad_norm": 0.11821261048316956, "learning_rate": 0.002, "loss": 2.3451, "step": 302390 }, { "epoch": 1.16899383031034, "grad_norm": 0.09697729349136353, "learning_rate": 0.002, "loss": 2.328, "step": 302400 }, { "epoch": 1.1690324875137232, "grad_norm": 0.12189007550477982, "learning_rate": 0.002, "loss": 2.3476, "step": 302410 }, { "epoch": 1.1690711447171065, "grad_norm": 0.11940648406744003, "learning_rate": 0.002, "loss": 2.3364, "step": 302420 }, { "epoch": 1.1691098019204897, "grad_norm": 0.1193215548992157, "learning_rate": 0.002, "loss": 2.3292, "step": 302430 }, { "epoch": 1.1691484591238732, "grad_norm": 0.10657306015491486, "learning_rate": 0.002, "loss": 2.3334, "step": 302440 }, { "epoch": 1.1691871163272565, "grad_norm": 0.16606692969799042, "learning_rate": 0.002, "loss": 2.3253, "step": 302450 }, { "epoch": 1.1692257735306397, "grad_norm": 0.12425743043422699, "learning_rate": 0.002, "loss": 2.3453, "step": 302460 }, { "epoch": 1.169264430734023, "grad_norm": 0.09916871786117554, "learning_rate": 0.002, "loss": 2.3255, "step": 302470 }, { "epoch": 1.1693030879374062, "grad_norm": 0.1128845140337944, "learning_rate": 0.002, "loss": 2.3418, "step": 302480 }, { "epoch": 1.1693417451407895, "grad_norm": 0.10971315205097198, "learning_rate": 0.002, "loss": 2.3314, "step": 302490 }, { "epoch": 1.1693804023441727, "grad_norm": 0.10512728989124298, "learning_rate": 0.002, "loss": 2.3241, "step": 302500 }, { "epoch": 1.1694190595475562, "grad_norm": 0.1052752435207367, "learning_rate": 0.002, "loss": 2.3353, "step": 302510 }, { "epoch": 1.1694577167509395, "grad_norm": 0.11025025695562363, "learning_rate": 0.002, "loss": 2.3443, "step": 302520 }, { "epoch": 1.1694963739543227, "grad_norm": 0.09883037954568863, "learning_rate": 0.002, "loss": 2.3415, "step": 302530 }, { "epoch": 1.169535031157706, "grad_norm": 0.12725435197353363, "learning_rate": 0.002, "loss": 2.3474, "step": 302540 }, { "epoch": 1.1695736883610892, "grad_norm": 0.19296719133853912, "learning_rate": 0.002, "loss": 2.337, "step": 302550 }, { "epoch": 1.1696123455644725, "grad_norm": 0.10766754299402237, "learning_rate": 0.002, "loss": 2.3463, "step": 302560 }, { "epoch": 1.1696510027678557, "grad_norm": 0.09638424962759018, "learning_rate": 0.002, "loss": 2.3421, "step": 302570 }, { "epoch": 1.169689659971239, "grad_norm": 0.1059780865907669, "learning_rate": 0.002, "loss": 2.3454, "step": 302580 }, { "epoch": 1.1697283171746222, "grad_norm": 0.11075137555599213, "learning_rate": 0.002, "loss": 2.3324, "step": 302590 }, { "epoch": 1.1697669743780055, "grad_norm": 0.12818937003612518, "learning_rate": 0.002, "loss": 2.3358, "step": 302600 }, { "epoch": 1.169805631581389, "grad_norm": 0.11721966415643692, "learning_rate": 0.002, "loss": 2.3275, "step": 302610 }, { "epoch": 1.1698442887847722, "grad_norm": 0.08859899640083313, "learning_rate": 0.002, "loss": 2.3352, "step": 302620 }, { "epoch": 1.1698829459881555, "grad_norm": 0.1014174371957779, "learning_rate": 0.002, "loss": 2.3271, "step": 302630 }, { "epoch": 1.1699216031915387, "grad_norm": 0.11284614354372025, "learning_rate": 0.002, "loss": 2.3284, "step": 302640 }, { "epoch": 1.169960260394922, "grad_norm": 0.10882255434989929, "learning_rate": 0.002, "loss": 2.325, "step": 302650 }, { "epoch": 1.1699989175983052, "grad_norm": 0.08628625422716141, "learning_rate": 0.002, "loss": 2.3437, "step": 302660 }, { "epoch": 1.1700375748016885, "grad_norm": 0.12380296736955643, "learning_rate": 0.002, "loss": 2.3463, "step": 302670 }, { "epoch": 1.170076232005072, "grad_norm": 0.10757222771644592, "learning_rate": 0.002, "loss": 2.3203, "step": 302680 }, { "epoch": 1.1701148892084552, "grad_norm": 0.10542470961809158, "learning_rate": 0.002, "loss": 2.3489, "step": 302690 }, { "epoch": 1.1701535464118384, "grad_norm": 0.10293202847242355, "learning_rate": 0.002, "loss": 2.3334, "step": 302700 }, { "epoch": 1.1701922036152217, "grad_norm": 0.14387407898902893, "learning_rate": 0.002, "loss": 2.3325, "step": 302710 }, { "epoch": 1.170230860818605, "grad_norm": 0.09444009512662888, "learning_rate": 0.002, "loss": 2.354, "step": 302720 }, { "epoch": 1.1702695180219882, "grad_norm": 0.09907840192317963, "learning_rate": 0.002, "loss": 2.3333, "step": 302730 }, { "epoch": 1.1703081752253714, "grad_norm": 0.09588519483804703, "learning_rate": 0.002, "loss": 2.324, "step": 302740 }, { "epoch": 1.1703468324287547, "grad_norm": 0.10444150120019913, "learning_rate": 0.002, "loss": 2.3403, "step": 302750 }, { "epoch": 1.170385489632138, "grad_norm": 0.13282561302185059, "learning_rate": 0.002, "loss": 2.3388, "step": 302760 }, { "epoch": 1.1704241468355212, "grad_norm": 0.1018977016210556, "learning_rate": 0.002, "loss": 2.33, "step": 302770 }, { "epoch": 1.1704628040389047, "grad_norm": 0.09958646446466446, "learning_rate": 0.002, "loss": 2.3425, "step": 302780 }, { "epoch": 1.170501461242288, "grad_norm": 0.10159620642662048, "learning_rate": 0.002, "loss": 2.3266, "step": 302790 }, { "epoch": 1.1705401184456712, "grad_norm": 0.10525692254304886, "learning_rate": 0.002, "loss": 2.3245, "step": 302800 }, { "epoch": 1.1705787756490544, "grad_norm": 0.12873084843158722, "learning_rate": 0.002, "loss": 2.3465, "step": 302810 }, { "epoch": 1.1706174328524377, "grad_norm": 0.09802334755659103, "learning_rate": 0.002, "loss": 2.3308, "step": 302820 }, { "epoch": 1.170656090055821, "grad_norm": 0.10764884948730469, "learning_rate": 0.002, "loss": 2.341, "step": 302830 }, { "epoch": 1.1706947472592042, "grad_norm": 0.1302516609430313, "learning_rate": 0.002, "loss": 2.3397, "step": 302840 }, { "epoch": 1.1707334044625877, "grad_norm": 0.09868675470352173, "learning_rate": 0.002, "loss": 2.3357, "step": 302850 }, { "epoch": 1.170772061665971, "grad_norm": 0.09090667217969894, "learning_rate": 0.002, "loss": 2.3233, "step": 302860 }, { "epoch": 1.1708107188693542, "grad_norm": 0.09880606085062027, "learning_rate": 0.002, "loss": 2.333, "step": 302870 }, { "epoch": 1.1708493760727374, "grad_norm": 0.09945500642061234, "learning_rate": 0.002, "loss": 2.3347, "step": 302880 }, { "epoch": 1.1708880332761207, "grad_norm": 0.09477779269218445, "learning_rate": 0.002, "loss": 2.337, "step": 302890 }, { "epoch": 1.170926690479504, "grad_norm": 0.12763994932174683, "learning_rate": 0.002, "loss": 2.3386, "step": 302900 }, { "epoch": 1.1709653476828872, "grad_norm": 0.11147284507751465, "learning_rate": 0.002, "loss": 2.3346, "step": 302910 }, { "epoch": 1.1710040048862704, "grad_norm": 0.11019851267337799, "learning_rate": 0.002, "loss": 2.3529, "step": 302920 }, { "epoch": 1.1710426620896537, "grad_norm": 0.09784045815467834, "learning_rate": 0.002, "loss": 2.3313, "step": 302930 }, { "epoch": 1.1710813192930372, "grad_norm": 0.10193490236997604, "learning_rate": 0.002, "loss": 2.3335, "step": 302940 }, { "epoch": 1.1711199764964204, "grad_norm": 0.1023782268166542, "learning_rate": 0.002, "loss": 2.3354, "step": 302950 }, { "epoch": 1.1711586336998037, "grad_norm": 0.1048358753323555, "learning_rate": 0.002, "loss": 2.3224, "step": 302960 }, { "epoch": 1.171197290903187, "grad_norm": 0.10859642177820206, "learning_rate": 0.002, "loss": 2.3301, "step": 302970 }, { "epoch": 1.1712359481065702, "grad_norm": 0.10615593940019608, "learning_rate": 0.002, "loss": 2.3387, "step": 302980 }, { "epoch": 1.1712746053099534, "grad_norm": 0.10078933089971542, "learning_rate": 0.002, "loss": 2.348, "step": 302990 }, { "epoch": 1.1713132625133367, "grad_norm": 0.09982361644506454, "learning_rate": 0.002, "loss": 2.3202, "step": 303000 }, { "epoch": 1.17135191971672, "grad_norm": 0.09770236164331436, "learning_rate": 0.002, "loss": 2.3286, "step": 303010 }, { "epoch": 1.1713905769201034, "grad_norm": 0.11166229099035263, "learning_rate": 0.002, "loss": 2.3246, "step": 303020 }, { "epoch": 1.1714292341234867, "grad_norm": 0.09842663258314133, "learning_rate": 0.002, "loss": 2.3343, "step": 303030 }, { "epoch": 1.17146789132687, "grad_norm": 0.09624983370304108, "learning_rate": 0.002, "loss": 2.3438, "step": 303040 }, { "epoch": 1.1715065485302532, "grad_norm": 0.11118954420089722, "learning_rate": 0.002, "loss": 2.3385, "step": 303050 }, { "epoch": 1.1715452057336364, "grad_norm": 0.10093878209590912, "learning_rate": 0.002, "loss": 2.3181, "step": 303060 }, { "epoch": 1.1715838629370197, "grad_norm": 0.1114550307393074, "learning_rate": 0.002, "loss": 2.3263, "step": 303070 }, { "epoch": 1.171622520140403, "grad_norm": 0.10811921954154968, "learning_rate": 0.002, "loss": 2.3463, "step": 303080 }, { "epoch": 1.1716611773437862, "grad_norm": 0.0978502631187439, "learning_rate": 0.002, "loss": 2.3227, "step": 303090 }, { "epoch": 1.1716998345471694, "grad_norm": 0.1217813491821289, "learning_rate": 0.002, "loss": 2.3327, "step": 303100 }, { "epoch": 1.171738491750553, "grad_norm": 0.10993924736976624, "learning_rate": 0.002, "loss": 2.3333, "step": 303110 }, { "epoch": 1.1717771489539361, "grad_norm": 0.0941442996263504, "learning_rate": 0.002, "loss": 2.342, "step": 303120 }, { "epoch": 1.1718158061573194, "grad_norm": 0.09519714117050171, "learning_rate": 0.002, "loss": 2.3218, "step": 303130 }, { "epoch": 1.1718544633607026, "grad_norm": 0.09512632340192795, "learning_rate": 0.002, "loss": 2.329, "step": 303140 }, { "epoch": 1.171893120564086, "grad_norm": 0.08752937614917755, "learning_rate": 0.002, "loss": 2.3298, "step": 303150 }, { "epoch": 1.1719317777674692, "grad_norm": 0.1074802577495575, "learning_rate": 0.002, "loss": 2.3372, "step": 303160 }, { "epoch": 1.1719704349708524, "grad_norm": 0.09255402535200119, "learning_rate": 0.002, "loss": 2.3387, "step": 303170 }, { "epoch": 1.1720090921742357, "grad_norm": 0.09914116561412811, "learning_rate": 0.002, "loss": 2.3403, "step": 303180 }, { "epoch": 1.1720477493776191, "grad_norm": 0.0997026115655899, "learning_rate": 0.002, "loss": 2.3391, "step": 303190 }, { "epoch": 1.1720864065810024, "grad_norm": 0.12617866694927216, "learning_rate": 0.002, "loss": 2.3473, "step": 303200 }, { "epoch": 1.1721250637843856, "grad_norm": 0.11828405410051346, "learning_rate": 0.002, "loss": 2.337, "step": 303210 }, { "epoch": 1.1721637209877689, "grad_norm": 0.1121009960770607, "learning_rate": 0.002, "loss": 2.3257, "step": 303220 }, { "epoch": 1.1722023781911521, "grad_norm": 0.09488152712583542, "learning_rate": 0.002, "loss": 2.3257, "step": 303230 }, { "epoch": 1.1722410353945354, "grad_norm": 0.13893936574459076, "learning_rate": 0.002, "loss": 2.3171, "step": 303240 }, { "epoch": 1.1722796925979186, "grad_norm": 0.09628939628601074, "learning_rate": 0.002, "loss": 2.3486, "step": 303250 }, { "epoch": 1.172318349801302, "grad_norm": 0.09990405291318893, "learning_rate": 0.002, "loss": 2.3454, "step": 303260 }, { "epoch": 1.1723570070046851, "grad_norm": 0.10545917600393295, "learning_rate": 0.002, "loss": 2.352, "step": 303270 }, { "epoch": 1.1723956642080686, "grad_norm": 0.09982466697692871, "learning_rate": 0.002, "loss": 2.3446, "step": 303280 }, { "epoch": 1.1724343214114519, "grad_norm": 0.09827154129743576, "learning_rate": 0.002, "loss": 2.3358, "step": 303290 }, { "epoch": 1.1724729786148351, "grad_norm": 0.09286846220493317, "learning_rate": 0.002, "loss": 2.3238, "step": 303300 }, { "epoch": 1.1725116358182184, "grad_norm": 0.10099194198846817, "learning_rate": 0.002, "loss": 2.3238, "step": 303310 }, { "epoch": 1.1725502930216016, "grad_norm": 0.11508749425411224, "learning_rate": 0.002, "loss": 2.3287, "step": 303320 }, { "epoch": 1.1725889502249849, "grad_norm": 0.10250307619571686, "learning_rate": 0.002, "loss": 2.3513, "step": 303330 }, { "epoch": 1.1726276074283681, "grad_norm": 0.10944922268390656, "learning_rate": 0.002, "loss": 2.3443, "step": 303340 }, { "epoch": 1.1726662646317514, "grad_norm": 0.11692958325147629, "learning_rate": 0.002, "loss": 2.3223, "step": 303350 }, { "epoch": 1.1727049218351349, "grad_norm": 0.10119730234146118, "learning_rate": 0.002, "loss": 2.3348, "step": 303360 }, { "epoch": 1.1727435790385181, "grad_norm": 0.12241552770137787, "learning_rate": 0.002, "loss": 2.3274, "step": 303370 }, { "epoch": 1.1727822362419014, "grad_norm": 0.0938178300857544, "learning_rate": 0.002, "loss": 2.3336, "step": 303380 }, { "epoch": 1.1728208934452846, "grad_norm": 0.11530961096286774, "learning_rate": 0.002, "loss": 2.3445, "step": 303390 }, { "epoch": 1.1728595506486679, "grad_norm": 0.09542708843946457, "learning_rate": 0.002, "loss": 2.347, "step": 303400 }, { "epoch": 1.1728982078520511, "grad_norm": 0.10534845292568207, "learning_rate": 0.002, "loss": 2.3352, "step": 303410 }, { "epoch": 1.1729368650554344, "grad_norm": 0.10652362555265427, "learning_rate": 0.002, "loss": 2.3234, "step": 303420 }, { "epoch": 1.1729755222588176, "grad_norm": 0.09927625954151154, "learning_rate": 0.002, "loss": 2.3528, "step": 303430 }, { "epoch": 1.1730141794622009, "grad_norm": 0.12564003467559814, "learning_rate": 0.002, "loss": 2.3423, "step": 303440 }, { "epoch": 1.1730528366655844, "grad_norm": 0.15445850789546967, "learning_rate": 0.002, "loss": 2.329, "step": 303450 }, { "epoch": 1.1730914938689676, "grad_norm": 0.11700819432735443, "learning_rate": 0.002, "loss": 2.324, "step": 303460 }, { "epoch": 1.1731301510723509, "grad_norm": 0.11349461227655411, "learning_rate": 0.002, "loss": 2.3391, "step": 303470 }, { "epoch": 1.173168808275734, "grad_norm": 0.10206008702516556, "learning_rate": 0.002, "loss": 2.3346, "step": 303480 }, { "epoch": 1.1732074654791174, "grad_norm": 0.10352712124586105, "learning_rate": 0.002, "loss": 2.3301, "step": 303490 }, { "epoch": 1.1732461226825006, "grad_norm": 0.09197427332401276, "learning_rate": 0.002, "loss": 2.3313, "step": 303500 }, { "epoch": 1.1732847798858839, "grad_norm": 0.11469713598489761, "learning_rate": 0.002, "loss": 2.3331, "step": 303510 }, { "epoch": 1.1733234370892671, "grad_norm": 0.10551794618368149, "learning_rate": 0.002, "loss": 2.3374, "step": 303520 }, { "epoch": 1.1733620942926506, "grad_norm": 0.10328248888254166, "learning_rate": 0.002, "loss": 2.3368, "step": 303530 }, { "epoch": 1.1734007514960338, "grad_norm": 0.11079932749271393, "learning_rate": 0.002, "loss": 2.3175, "step": 303540 }, { "epoch": 1.173439408699417, "grad_norm": 0.10771428048610687, "learning_rate": 0.002, "loss": 2.347, "step": 303550 }, { "epoch": 1.1734780659028003, "grad_norm": 0.10114746540784836, "learning_rate": 0.002, "loss": 2.3496, "step": 303560 }, { "epoch": 1.1735167231061836, "grad_norm": 0.10057065635919571, "learning_rate": 0.002, "loss": 2.3369, "step": 303570 }, { "epoch": 1.1735553803095669, "grad_norm": 0.10123337060213089, "learning_rate": 0.002, "loss": 2.3382, "step": 303580 }, { "epoch": 1.17359403751295, "grad_norm": 0.10665623843669891, "learning_rate": 0.002, "loss": 2.3335, "step": 303590 }, { "epoch": 1.1736326947163334, "grad_norm": 0.094930000603199, "learning_rate": 0.002, "loss": 2.3317, "step": 303600 }, { "epoch": 1.1736713519197166, "grad_norm": 0.12668092548847198, "learning_rate": 0.002, "loss": 2.3268, "step": 303610 }, { "epoch": 1.1737100091231, "grad_norm": 0.09292471408843994, "learning_rate": 0.002, "loss": 2.3477, "step": 303620 }, { "epoch": 1.1737486663264833, "grad_norm": 0.10419165343046188, "learning_rate": 0.002, "loss": 2.3459, "step": 303630 }, { "epoch": 1.1737873235298666, "grad_norm": 0.12584923207759857, "learning_rate": 0.002, "loss": 2.3348, "step": 303640 }, { "epoch": 1.1738259807332498, "grad_norm": 0.10469865798950195, "learning_rate": 0.002, "loss": 2.3368, "step": 303650 }, { "epoch": 1.173864637936633, "grad_norm": 0.10127655416727066, "learning_rate": 0.002, "loss": 2.3375, "step": 303660 }, { "epoch": 1.1739032951400163, "grad_norm": 0.10466544330120087, "learning_rate": 0.002, "loss": 2.3423, "step": 303670 }, { "epoch": 1.1739419523433996, "grad_norm": 0.11453257501125336, "learning_rate": 0.002, "loss": 2.3248, "step": 303680 }, { "epoch": 1.173980609546783, "grad_norm": 0.10881021618843079, "learning_rate": 0.002, "loss": 2.3355, "step": 303690 }, { "epoch": 1.1740192667501663, "grad_norm": 0.10869313031435013, "learning_rate": 0.002, "loss": 2.3372, "step": 303700 }, { "epoch": 1.1740579239535496, "grad_norm": 0.10462198406457901, "learning_rate": 0.002, "loss": 2.3346, "step": 303710 }, { "epoch": 1.1740965811569328, "grad_norm": 0.10045907646417618, "learning_rate": 0.002, "loss": 2.326, "step": 303720 }, { "epoch": 1.174135238360316, "grad_norm": 0.09981822222471237, "learning_rate": 0.002, "loss": 2.3378, "step": 303730 }, { "epoch": 1.1741738955636993, "grad_norm": 0.11632543802261353, "learning_rate": 0.002, "loss": 2.341, "step": 303740 }, { "epoch": 1.1742125527670826, "grad_norm": 0.13264437019824982, "learning_rate": 0.002, "loss": 2.3496, "step": 303750 }, { "epoch": 1.1742512099704658, "grad_norm": 0.1124536469578743, "learning_rate": 0.002, "loss": 2.3291, "step": 303760 }, { "epoch": 1.174289867173849, "grad_norm": 0.10196714848279953, "learning_rate": 0.002, "loss": 2.3352, "step": 303770 }, { "epoch": 1.1743285243772323, "grad_norm": 0.1239824965596199, "learning_rate": 0.002, "loss": 2.3337, "step": 303780 }, { "epoch": 1.1743671815806158, "grad_norm": 0.10366879403591156, "learning_rate": 0.002, "loss": 2.3526, "step": 303790 }, { "epoch": 1.174405838783999, "grad_norm": 0.09123245626688004, "learning_rate": 0.002, "loss": 2.3408, "step": 303800 }, { "epoch": 1.1744444959873823, "grad_norm": 0.10823673009872437, "learning_rate": 0.002, "loss": 2.3228, "step": 303810 }, { "epoch": 1.1744831531907656, "grad_norm": 0.107994444668293, "learning_rate": 0.002, "loss": 2.3538, "step": 303820 }, { "epoch": 1.1745218103941488, "grad_norm": 0.10749204456806183, "learning_rate": 0.002, "loss": 2.3335, "step": 303830 }, { "epoch": 1.174560467597532, "grad_norm": 0.10459636151790619, "learning_rate": 0.002, "loss": 2.3347, "step": 303840 }, { "epoch": 1.1745991248009153, "grad_norm": 0.10358193516731262, "learning_rate": 0.002, "loss": 2.3485, "step": 303850 }, { "epoch": 1.1746377820042988, "grad_norm": 0.09980751574039459, "learning_rate": 0.002, "loss": 2.3279, "step": 303860 }, { "epoch": 1.174676439207682, "grad_norm": 0.11811071634292603, "learning_rate": 0.002, "loss": 2.3381, "step": 303870 }, { "epoch": 1.1747150964110653, "grad_norm": 0.10448575764894485, "learning_rate": 0.002, "loss": 2.3629, "step": 303880 }, { "epoch": 1.1747537536144486, "grad_norm": 0.092103011906147, "learning_rate": 0.002, "loss": 2.3325, "step": 303890 }, { "epoch": 1.1747924108178318, "grad_norm": 0.11232535541057587, "learning_rate": 0.002, "loss": 2.3176, "step": 303900 }, { "epoch": 1.174831068021215, "grad_norm": 0.10447873175144196, "learning_rate": 0.002, "loss": 2.3422, "step": 303910 }, { "epoch": 1.1748697252245983, "grad_norm": 0.11118560284376144, "learning_rate": 0.002, "loss": 2.3338, "step": 303920 }, { "epoch": 1.1749083824279816, "grad_norm": 0.08857395499944687, "learning_rate": 0.002, "loss": 2.3402, "step": 303930 }, { "epoch": 1.1749470396313648, "grad_norm": 0.13168229162693024, "learning_rate": 0.002, "loss": 2.3429, "step": 303940 }, { "epoch": 1.174985696834748, "grad_norm": 0.11878544837236404, "learning_rate": 0.002, "loss": 2.3272, "step": 303950 }, { "epoch": 1.1750243540381315, "grad_norm": 0.10220491141080856, "learning_rate": 0.002, "loss": 2.32, "step": 303960 }, { "epoch": 1.1750630112415148, "grad_norm": 0.11073249578475952, "learning_rate": 0.002, "loss": 2.3244, "step": 303970 }, { "epoch": 1.175101668444898, "grad_norm": 0.089708112180233, "learning_rate": 0.002, "loss": 2.3443, "step": 303980 }, { "epoch": 1.1751403256482813, "grad_norm": 0.12098492681980133, "learning_rate": 0.002, "loss": 2.334, "step": 303990 }, { "epoch": 1.1751789828516646, "grad_norm": 0.11484367400407791, "learning_rate": 0.002, "loss": 2.3204, "step": 304000 }, { "epoch": 1.1752176400550478, "grad_norm": 0.09806149452924728, "learning_rate": 0.002, "loss": 2.3363, "step": 304010 }, { "epoch": 1.175256297258431, "grad_norm": 0.1285572201013565, "learning_rate": 0.002, "loss": 2.3265, "step": 304020 }, { "epoch": 1.1752949544618145, "grad_norm": 0.1325317770242691, "learning_rate": 0.002, "loss": 2.3383, "step": 304030 }, { "epoch": 1.1753336116651978, "grad_norm": 0.11979369074106216, "learning_rate": 0.002, "loss": 2.344, "step": 304040 }, { "epoch": 1.175372268868581, "grad_norm": 0.11940714716911316, "learning_rate": 0.002, "loss": 2.3335, "step": 304050 }, { "epoch": 1.1754109260719643, "grad_norm": 0.09162340313196182, "learning_rate": 0.002, "loss": 2.3422, "step": 304060 }, { "epoch": 1.1754495832753475, "grad_norm": 0.1013336181640625, "learning_rate": 0.002, "loss": 2.3256, "step": 304070 }, { "epoch": 1.1754882404787308, "grad_norm": 0.11899504065513611, "learning_rate": 0.002, "loss": 2.3432, "step": 304080 }, { "epoch": 1.175526897682114, "grad_norm": 0.10176719725131989, "learning_rate": 0.002, "loss": 2.3504, "step": 304090 }, { "epoch": 1.1755655548854973, "grad_norm": 0.11070800572633743, "learning_rate": 0.002, "loss": 2.3232, "step": 304100 }, { "epoch": 1.1756042120888806, "grad_norm": 0.10064385086297989, "learning_rate": 0.002, "loss": 2.3344, "step": 304110 }, { "epoch": 1.1756428692922638, "grad_norm": 0.10409154742956161, "learning_rate": 0.002, "loss": 2.323, "step": 304120 }, { "epoch": 1.1756815264956473, "grad_norm": 0.09488542377948761, "learning_rate": 0.002, "loss": 2.3175, "step": 304130 }, { "epoch": 1.1757201836990305, "grad_norm": 0.1136431023478508, "learning_rate": 0.002, "loss": 2.346, "step": 304140 }, { "epoch": 1.1757588409024138, "grad_norm": 0.09265240281820297, "learning_rate": 0.002, "loss": 2.3247, "step": 304150 }, { "epoch": 1.175797498105797, "grad_norm": 0.09490280598402023, "learning_rate": 0.002, "loss": 2.3359, "step": 304160 }, { "epoch": 1.1758361553091803, "grad_norm": 0.1054140031337738, "learning_rate": 0.002, "loss": 2.332, "step": 304170 }, { "epoch": 1.1758748125125635, "grad_norm": 0.10391158610582352, "learning_rate": 0.002, "loss": 2.3483, "step": 304180 }, { "epoch": 1.1759134697159468, "grad_norm": 0.09150063991546631, "learning_rate": 0.002, "loss": 2.3186, "step": 304190 }, { "epoch": 1.1759521269193303, "grad_norm": 0.11067679524421692, "learning_rate": 0.002, "loss": 2.3363, "step": 304200 }, { "epoch": 1.1759907841227135, "grad_norm": 0.10968092083930969, "learning_rate": 0.002, "loss": 2.3305, "step": 304210 }, { "epoch": 1.1760294413260968, "grad_norm": 0.10392303019762039, "learning_rate": 0.002, "loss": 2.3357, "step": 304220 }, { "epoch": 1.17606809852948, "grad_norm": 0.11467640101909637, "learning_rate": 0.002, "loss": 2.3424, "step": 304230 }, { "epoch": 1.1761067557328633, "grad_norm": 0.10126051306724548, "learning_rate": 0.002, "loss": 2.3494, "step": 304240 }, { "epoch": 1.1761454129362465, "grad_norm": 0.10900580137968063, "learning_rate": 0.002, "loss": 2.3463, "step": 304250 }, { "epoch": 1.1761840701396298, "grad_norm": 0.09736665338277817, "learning_rate": 0.002, "loss": 2.3424, "step": 304260 }, { "epoch": 1.176222727343013, "grad_norm": 0.10468364506959915, "learning_rate": 0.002, "loss": 2.3324, "step": 304270 }, { "epoch": 1.1762613845463963, "grad_norm": 0.11530701816082001, "learning_rate": 0.002, "loss": 2.3397, "step": 304280 }, { "epoch": 1.1763000417497795, "grad_norm": 0.13199259340763092, "learning_rate": 0.002, "loss": 2.3321, "step": 304290 }, { "epoch": 1.176338698953163, "grad_norm": 0.10489901900291443, "learning_rate": 0.002, "loss": 2.3445, "step": 304300 }, { "epoch": 1.1763773561565463, "grad_norm": 0.09021489322185516, "learning_rate": 0.002, "loss": 2.3467, "step": 304310 }, { "epoch": 1.1764160133599295, "grad_norm": 0.11300509423017502, "learning_rate": 0.002, "loss": 2.3454, "step": 304320 }, { "epoch": 1.1764546705633128, "grad_norm": 0.1115007996559143, "learning_rate": 0.002, "loss": 2.3551, "step": 304330 }, { "epoch": 1.176493327766696, "grad_norm": 0.10999033600091934, "learning_rate": 0.002, "loss": 2.3257, "step": 304340 }, { "epoch": 1.1765319849700793, "grad_norm": 0.09730103611946106, "learning_rate": 0.002, "loss": 2.3339, "step": 304350 }, { "epoch": 1.1765706421734625, "grad_norm": 0.0897442027926445, "learning_rate": 0.002, "loss": 2.3338, "step": 304360 }, { "epoch": 1.176609299376846, "grad_norm": 0.10270145535469055, "learning_rate": 0.002, "loss": 2.3195, "step": 304370 }, { "epoch": 1.1766479565802292, "grad_norm": 0.11997896432876587, "learning_rate": 0.002, "loss": 2.3325, "step": 304380 }, { "epoch": 1.1766866137836125, "grad_norm": 0.11275748163461685, "learning_rate": 0.002, "loss": 2.3456, "step": 304390 }, { "epoch": 1.1767252709869958, "grad_norm": 0.09913094341754913, "learning_rate": 0.002, "loss": 2.3271, "step": 304400 }, { "epoch": 1.176763928190379, "grad_norm": 0.13795678317546844, "learning_rate": 0.002, "loss": 2.3431, "step": 304410 }, { "epoch": 1.1768025853937623, "grad_norm": 0.1019025593996048, "learning_rate": 0.002, "loss": 2.3372, "step": 304420 }, { "epoch": 1.1768412425971455, "grad_norm": 0.12087249010801315, "learning_rate": 0.002, "loss": 2.3329, "step": 304430 }, { "epoch": 1.1768798998005288, "grad_norm": 0.10497815907001495, "learning_rate": 0.002, "loss": 2.3369, "step": 304440 }, { "epoch": 1.176918557003912, "grad_norm": 0.14686134457588196, "learning_rate": 0.002, "loss": 2.3536, "step": 304450 }, { "epoch": 1.1769572142072953, "grad_norm": 0.1088930293917656, "learning_rate": 0.002, "loss": 2.3408, "step": 304460 }, { "epoch": 1.1769958714106787, "grad_norm": 0.11077285557985306, "learning_rate": 0.002, "loss": 2.3389, "step": 304470 }, { "epoch": 1.177034528614062, "grad_norm": 0.10116249322891235, "learning_rate": 0.002, "loss": 2.3459, "step": 304480 }, { "epoch": 1.1770731858174452, "grad_norm": 0.09450482577085495, "learning_rate": 0.002, "loss": 2.3193, "step": 304490 }, { "epoch": 1.1771118430208285, "grad_norm": 0.11417514830827713, "learning_rate": 0.002, "loss": 2.3271, "step": 304500 }, { "epoch": 1.1771505002242117, "grad_norm": 0.10279672592878342, "learning_rate": 0.002, "loss": 2.3265, "step": 304510 }, { "epoch": 1.177189157427595, "grad_norm": 0.10613597929477692, "learning_rate": 0.002, "loss": 2.335, "step": 304520 }, { "epoch": 1.1772278146309783, "grad_norm": 0.11793537437915802, "learning_rate": 0.002, "loss": 2.3172, "step": 304530 }, { "epoch": 1.1772664718343617, "grad_norm": 0.10077495127916336, "learning_rate": 0.002, "loss": 2.3354, "step": 304540 }, { "epoch": 1.177305129037745, "grad_norm": 0.10949613153934479, "learning_rate": 0.002, "loss": 2.3334, "step": 304550 }, { "epoch": 1.1773437862411282, "grad_norm": 0.11719959229230881, "learning_rate": 0.002, "loss": 2.3306, "step": 304560 }, { "epoch": 1.1773824434445115, "grad_norm": 0.1240655705332756, "learning_rate": 0.002, "loss": 2.3431, "step": 304570 }, { "epoch": 1.1774211006478947, "grad_norm": 0.0896514281630516, "learning_rate": 0.002, "loss": 2.3315, "step": 304580 }, { "epoch": 1.177459757851278, "grad_norm": 0.11183775216341019, "learning_rate": 0.002, "loss": 2.3487, "step": 304590 }, { "epoch": 1.1774984150546612, "grad_norm": 0.12366892397403717, "learning_rate": 0.002, "loss": 2.3497, "step": 304600 }, { "epoch": 1.1775370722580445, "grad_norm": 0.11976148188114166, "learning_rate": 0.002, "loss": 2.3378, "step": 304610 }, { "epoch": 1.1775757294614277, "grad_norm": 0.10143893957138062, "learning_rate": 0.002, "loss": 2.3276, "step": 304620 }, { "epoch": 1.177614386664811, "grad_norm": 0.09807173907756805, "learning_rate": 0.002, "loss": 2.3397, "step": 304630 }, { "epoch": 1.1776530438681945, "grad_norm": 0.08996410667896271, "learning_rate": 0.002, "loss": 2.3192, "step": 304640 }, { "epoch": 1.1776917010715777, "grad_norm": 0.15061452984809875, "learning_rate": 0.002, "loss": 2.3336, "step": 304650 }, { "epoch": 1.177730358274961, "grad_norm": 0.10375141352415085, "learning_rate": 0.002, "loss": 2.3582, "step": 304660 }, { "epoch": 1.1777690154783442, "grad_norm": 0.0976865217089653, "learning_rate": 0.002, "loss": 2.3424, "step": 304670 }, { "epoch": 1.1778076726817275, "grad_norm": 0.10766734182834625, "learning_rate": 0.002, "loss": 2.3422, "step": 304680 }, { "epoch": 1.1778463298851107, "grad_norm": 0.10150058567523956, "learning_rate": 0.002, "loss": 2.3258, "step": 304690 }, { "epoch": 1.177884987088494, "grad_norm": 0.11639443784952164, "learning_rate": 0.002, "loss": 2.3257, "step": 304700 }, { "epoch": 1.1779236442918775, "grad_norm": 0.10969920456409454, "learning_rate": 0.002, "loss": 2.3392, "step": 304710 }, { "epoch": 1.1779623014952607, "grad_norm": 0.11082258075475693, "learning_rate": 0.002, "loss": 2.3316, "step": 304720 }, { "epoch": 1.178000958698644, "grad_norm": 0.10220003128051758, "learning_rate": 0.002, "loss": 2.3294, "step": 304730 }, { "epoch": 1.1780396159020272, "grad_norm": 0.1293516606092453, "learning_rate": 0.002, "loss": 2.3379, "step": 304740 }, { "epoch": 1.1780782731054105, "grad_norm": 0.10310104489326477, "learning_rate": 0.002, "loss": 2.3266, "step": 304750 }, { "epoch": 1.1781169303087937, "grad_norm": 0.10579738020896912, "learning_rate": 0.002, "loss": 2.3372, "step": 304760 }, { "epoch": 1.178155587512177, "grad_norm": 0.09480555355548859, "learning_rate": 0.002, "loss": 2.3454, "step": 304770 }, { "epoch": 1.1781942447155602, "grad_norm": 0.1701122671365738, "learning_rate": 0.002, "loss": 2.3324, "step": 304780 }, { "epoch": 1.1782329019189435, "grad_norm": 0.12626154720783234, "learning_rate": 0.002, "loss": 2.3372, "step": 304790 }, { "epoch": 1.178271559122327, "grad_norm": 0.10779381543397903, "learning_rate": 0.002, "loss": 2.3437, "step": 304800 }, { "epoch": 1.1783102163257102, "grad_norm": 0.10039182752370834, "learning_rate": 0.002, "loss": 2.3285, "step": 304810 }, { "epoch": 1.1783488735290935, "grad_norm": 0.09189040213823318, "learning_rate": 0.002, "loss": 2.3393, "step": 304820 }, { "epoch": 1.1783875307324767, "grad_norm": 0.1019379049539566, "learning_rate": 0.002, "loss": 2.3478, "step": 304830 }, { "epoch": 1.17842618793586, "grad_norm": 0.14611029624938965, "learning_rate": 0.002, "loss": 2.3338, "step": 304840 }, { "epoch": 1.1784648451392432, "grad_norm": 0.10264472663402557, "learning_rate": 0.002, "loss": 2.3229, "step": 304850 }, { "epoch": 1.1785035023426265, "grad_norm": 0.10579685121774673, "learning_rate": 0.002, "loss": 2.3319, "step": 304860 }, { "epoch": 1.1785421595460097, "grad_norm": 0.09584686905145645, "learning_rate": 0.002, "loss": 2.3216, "step": 304870 }, { "epoch": 1.1785808167493932, "grad_norm": 0.13405711948871613, "learning_rate": 0.002, "loss": 2.329, "step": 304880 }, { "epoch": 1.1786194739527764, "grad_norm": 0.12086565047502518, "learning_rate": 0.002, "loss": 2.3349, "step": 304890 }, { "epoch": 1.1786581311561597, "grad_norm": 0.10096962749958038, "learning_rate": 0.002, "loss": 2.3334, "step": 304900 }, { "epoch": 1.178696788359543, "grad_norm": 0.10008525103330612, "learning_rate": 0.002, "loss": 2.3495, "step": 304910 }, { "epoch": 1.1787354455629262, "grad_norm": 0.10858751833438873, "learning_rate": 0.002, "loss": 2.3287, "step": 304920 }, { "epoch": 1.1787741027663095, "grad_norm": 0.10242888331413269, "learning_rate": 0.002, "loss": 2.3197, "step": 304930 }, { "epoch": 1.1788127599696927, "grad_norm": 0.1024758443236351, "learning_rate": 0.002, "loss": 2.3365, "step": 304940 }, { "epoch": 1.178851417173076, "grad_norm": 0.10521946847438812, "learning_rate": 0.002, "loss": 2.3383, "step": 304950 }, { "epoch": 1.1788900743764592, "grad_norm": 0.0923774316906929, "learning_rate": 0.002, "loss": 2.3154, "step": 304960 }, { "epoch": 1.1789287315798427, "grad_norm": 0.10218238085508347, "learning_rate": 0.002, "loss": 2.3319, "step": 304970 }, { "epoch": 1.178967388783226, "grad_norm": 0.10977078229188919, "learning_rate": 0.002, "loss": 2.3485, "step": 304980 }, { "epoch": 1.1790060459866092, "grad_norm": 0.09633048623800278, "learning_rate": 0.002, "loss": 2.3467, "step": 304990 }, { "epoch": 1.1790447031899924, "grad_norm": 0.10134069621562958, "learning_rate": 0.002, "loss": 2.3319, "step": 305000 }, { "epoch": 1.1790833603933757, "grad_norm": 0.10490524023771286, "learning_rate": 0.002, "loss": 2.3303, "step": 305010 }, { "epoch": 1.179122017596759, "grad_norm": 0.1142604649066925, "learning_rate": 0.002, "loss": 2.332, "step": 305020 }, { "epoch": 1.1791606748001422, "grad_norm": 0.09350310266017914, "learning_rate": 0.002, "loss": 2.3365, "step": 305030 }, { "epoch": 1.1791993320035254, "grad_norm": 0.1101241186261177, "learning_rate": 0.002, "loss": 2.3314, "step": 305040 }, { "epoch": 1.179237989206909, "grad_norm": 0.11008507758378983, "learning_rate": 0.002, "loss": 2.3286, "step": 305050 }, { "epoch": 1.1792766464102922, "grad_norm": 0.10210888832807541, "learning_rate": 0.002, "loss": 2.3306, "step": 305060 }, { "epoch": 1.1793153036136754, "grad_norm": 0.12260551750659943, "learning_rate": 0.002, "loss": 2.3498, "step": 305070 }, { "epoch": 1.1793539608170587, "grad_norm": 0.09672050178050995, "learning_rate": 0.002, "loss": 2.3211, "step": 305080 }, { "epoch": 1.179392618020442, "grad_norm": 0.11338058114051819, "learning_rate": 0.002, "loss": 2.3314, "step": 305090 }, { "epoch": 1.1794312752238252, "grad_norm": 0.12634330987930298, "learning_rate": 0.002, "loss": 2.3519, "step": 305100 }, { "epoch": 1.1794699324272084, "grad_norm": 0.10544847697019577, "learning_rate": 0.002, "loss": 2.3222, "step": 305110 }, { "epoch": 1.1795085896305917, "grad_norm": 0.1192038282752037, "learning_rate": 0.002, "loss": 2.3404, "step": 305120 }, { "epoch": 1.179547246833975, "grad_norm": 0.08795661479234695, "learning_rate": 0.002, "loss": 2.3234, "step": 305130 }, { "epoch": 1.1795859040373584, "grad_norm": 0.10282469540834427, "learning_rate": 0.002, "loss": 2.3359, "step": 305140 }, { "epoch": 1.1796245612407417, "grad_norm": 0.1073727011680603, "learning_rate": 0.002, "loss": 2.3362, "step": 305150 }, { "epoch": 1.179663218444125, "grad_norm": 0.09340602159500122, "learning_rate": 0.002, "loss": 2.3291, "step": 305160 }, { "epoch": 1.1797018756475082, "grad_norm": 0.09694849699735641, "learning_rate": 0.002, "loss": 2.3305, "step": 305170 }, { "epoch": 1.1797405328508914, "grad_norm": 0.1085776686668396, "learning_rate": 0.002, "loss": 2.3257, "step": 305180 }, { "epoch": 1.1797791900542747, "grad_norm": 0.10637427866458893, "learning_rate": 0.002, "loss": 2.331, "step": 305190 }, { "epoch": 1.179817847257658, "grad_norm": 0.13425855338573456, "learning_rate": 0.002, "loss": 2.3381, "step": 305200 }, { "epoch": 1.1798565044610412, "grad_norm": 0.11008214950561523, "learning_rate": 0.002, "loss": 2.3372, "step": 305210 }, { "epoch": 1.1798951616644247, "grad_norm": 0.10857033729553223, "learning_rate": 0.002, "loss": 2.3208, "step": 305220 }, { "epoch": 1.179933818867808, "grad_norm": 0.10989338159561157, "learning_rate": 0.002, "loss": 2.3403, "step": 305230 }, { "epoch": 1.1799724760711912, "grad_norm": 0.10804920643568039, "learning_rate": 0.002, "loss": 2.3264, "step": 305240 }, { "epoch": 1.1800111332745744, "grad_norm": 0.09076432883739471, "learning_rate": 0.002, "loss": 2.3259, "step": 305250 }, { "epoch": 1.1800497904779577, "grad_norm": 0.10450518876314163, "learning_rate": 0.002, "loss": 2.3396, "step": 305260 }, { "epoch": 1.180088447681341, "grad_norm": 0.11721612513065338, "learning_rate": 0.002, "loss": 2.3404, "step": 305270 }, { "epoch": 1.1801271048847242, "grad_norm": 0.11377738416194916, "learning_rate": 0.002, "loss": 2.3546, "step": 305280 }, { "epoch": 1.1801657620881074, "grad_norm": 0.10185536742210388, "learning_rate": 0.002, "loss": 2.3327, "step": 305290 }, { "epoch": 1.1802044192914907, "grad_norm": 0.1519765406847, "learning_rate": 0.002, "loss": 2.3373, "step": 305300 }, { "epoch": 1.1802430764948741, "grad_norm": 0.0957445278763771, "learning_rate": 0.002, "loss": 2.327, "step": 305310 }, { "epoch": 1.1802817336982574, "grad_norm": 0.1012631356716156, "learning_rate": 0.002, "loss": 2.3331, "step": 305320 }, { "epoch": 1.1803203909016406, "grad_norm": 0.10935018211603165, "learning_rate": 0.002, "loss": 2.3461, "step": 305330 }, { "epoch": 1.180359048105024, "grad_norm": 0.09782283008098602, "learning_rate": 0.002, "loss": 2.3321, "step": 305340 }, { "epoch": 1.1803977053084072, "grad_norm": 0.11596140265464783, "learning_rate": 0.002, "loss": 2.3188, "step": 305350 }, { "epoch": 1.1804363625117904, "grad_norm": 0.1043427437543869, "learning_rate": 0.002, "loss": 2.3397, "step": 305360 }, { "epoch": 1.1804750197151737, "grad_norm": 0.09407017379999161, "learning_rate": 0.002, "loss": 2.3305, "step": 305370 }, { "epoch": 1.180513676918557, "grad_norm": 0.08882657438516617, "learning_rate": 0.002, "loss": 2.3382, "step": 305380 }, { "epoch": 1.1805523341219404, "grad_norm": 0.07942847907543182, "learning_rate": 0.002, "loss": 2.3354, "step": 305390 }, { "epoch": 1.1805909913253236, "grad_norm": 0.12886931002140045, "learning_rate": 0.002, "loss": 2.3235, "step": 305400 }, { "epoch": 1.1806296485287069, "grad_norm": 0.10612437129020691, "learning_rate": 0.002, "loss": 2.3566, "step": 305410 }, { "epoch": 1.1806683057320901, "grad_norm": 0.11033554375171661, "learning_rate": 0.002, "loss": 2.3246, "step": 305420 }, { "epoch": 1.1807069629354734, "grad_norm": 0.11206621676683426, "learning_rate": 0.002, "loss": 2.3328, "step": 305430 }, { "epoch": 1.1807456201388566, "grad_norm": 0.10127930343151093, "learning_rate": 0.002, "loss": 2.3387, "step": 305440 }, { "epoch": 1.18078427734224, "grad_norm": 0.10065993666648865, "learning_rate": 0.002, "loss": 2.3314, "step": 305450 }, { "epoch": 1.1808229345456231, "grad_norm": 0.11005466431379318, "learning_rate": 0.002, "loss": 2.3102, "step": 305460 }, { "epoch": 1.1808615917490064, "grad_norm": 0.11241529881954193, "learning_rate": 0.002, "loss": 2.3262, "step": 305470 }, { "epoch": 1.1809002489523899, "grad_norm": 0.09118768572807312, "learning_rate": 0.002, "loss": 2.3396, "step": 305480 }, { "epoch": 1.1809389061557731, "grad_norm": 0.1035250723361969, "learning_rate": 0.002, "loss": 2.3392, "step": 305490 }, { "epoch": 1.1809775633591564, "grad_norm": 0.10607394576072693, "learning_rate": 0.002, "loss": 2.3401, "step": 305500 }, { "epoch": 1.1810162205625396, "grad_norm": 0.10815197974443436, "learning_rate": 0.002, "loss": 2.3532, "step": 305510 }, { "epoch": 1.1810548777659229, "grad_norm": 0.10144860297441483, "learning_rate": 0.002, "loss": 2.3252, "step": 305520 }, { "epoch": 1.1810935349693061, "grad_norm": 0.10189273208379745, "learning_rate": 0.002, "loss": 2.3426, "step": 305530 }, { "epoch": 1.1811321921726894, "grad_norm": 0.09582088142633438, "learning_rate": 0.002, "loss": 2.3205, "step": 305540 }, { "epoch": 1.1811708493760729, "grad_norm": 0.10352307558059692, "learning_rate": 0.002, "loss": 2.3276, "step": 305550 }, { "epoch": 1.1812095065794561, "grad_norm": 0.13020190596580505, "learning_rate": 0.002, "loss": 2.3399, "step": 305560 }, { "epoch": 1.1812481637828394, "grad_norm": 0.09354320168495178, "learning_rate": 0.002, "loss": 2.3387, "step": 305570 }, { "epoch": 1.1812868209862226, "grad_norm": 0.09511909633874893, "learning_rate": 0.002, "loss": 2.3281, "step": 305580 }, { "epoch": 1.1813254781896059, "grad_norm": 0.1106218695640564, "learning_rate": 0.002, "loss": 2.3328, "step": 305590 }, { "epoch": 1.1813641353929891, "grad_norm": 0.11186060309410095, "learning_rate": 0.002, "loss": 2.3344, "step": 305600 }, { "epoch": 1.1814027925963724, "grad_norm": 0.0967664122581482, "learning_rate": 0.002, "loss": 2.3221, "step": 305610 }, { "epoch": 1.1814414497997556, "grad_norm": 0.09282700717449188, "learning_rate": 0.002, "loss": 2.3268, "step": 305620 }, { "epoch": 1.1814801070031389, "grad_norm": 0.11425047367811203, "learning_rate": 0.002, "loss": 2.332, "step": 305630 }, { "epoch": 1.1815187642065221, "grad_norm": 0.11561277508735657, "learning_rate": 0.002, "loss": 2.319, "step": 305640 }, { "epoch": 1.1815574214099056, "grad_norm": 0.11100748181343079, "learning_rate": 0.002, "loss": 2.3376, "step": 305650 }, { "epoch": 1.1815960786132889, "grad_norm": 0.10870927572250366, "learning_rate": 0.002, "loss": 2.3393, "step": 305660 }, { "epoch": 1.181634735816672, "grad_norm": 0.10777224600315094, "learning_rate": 0.002, "loss": 2.3345, "step": 305670 }, { "epoch": 1.1816733930200554, "grad_norm": 0.09503760188817978, "learning_rate": 0.002, "loss": 2.3407, "step": 305680 }, { "epoch": 1.1817120502234386, "grad_norm": 0.12765321135520935, "learning_rate": 0.002, "loss": 2.3418, "step": 305690 }, { "epoch": 1.1817507074268219, "grad_norm": 0.10698419064283371, "learning_rate": 0.002, "loss": 2.3281, "step": 305700 }, { "epoch": 1.1817893646302051, "grad_norm": 0.09345374256372452, "learning_rate": 0.002, "loss": 2.3366, "step": 305710 }, { "epoch": 1.1818280218335886, "grad_norm": 0.10894269496202469, "learning_rate": 0.002, "loss": 2.3313, "step": 305720 }, { "epoch": 1.1818666790369718, "grad_norm": 0.10499607026576996, "learning_rate": 0.002, "loss": 2.3502, "step": 305730 }, { "epoch": 1.181905336240355, "grad_norm": 0.10457663983106613, "learning_rate": 0.002, "loss": 2.3398, "step": 305740 }, { "epoch": 1.1819439934437383, "grad_norm": 0.11890369653701782, "learning_rate": 0.002, "loss": 2.3396, "step": 305750 }, { "epoch": 1.1819826506471216, "grad_norm": 0.11439453065395355, "learning_rate": 0.002, "loss": 2.3369, "step": 305760 }, { "epoch": 1.1820213078505049, "grad_norm": 0.11376997083425522, "learning_rate": 0.002, "loss": 2.3349, "step": 305770 }, { "epoch": 1.182059965053888, "grad_norm": 0.09751281887292862, "learning_rate": 0.002, "loss": 2.332, "step": 305780 }, { "epoch": 1.1820986222572714, "grad_norm": 0.10192884504795074, "learning_rate": 0.002, "loss": 2.3282, "step": 305790 }, { "epoch": 1.1821372794606546, "grad_norm": 0.12316975742578506, "learning_rate": 0.002, "loss": 2.3416, "step": 305800 }, { "epoch": 1.1821759366640379, "grad_norm": 0.09169697761535645, "learning_rate": 0.002, "loss": 2.333, "step": 305810 }, { "epoch": 1.1822145938674213, "grad_norm": 0.09697745740413666, "learning_rate": 0.002, "loss": 2.3353, "step": 305820 }, { "epoch": 1.1822532510708046, "grad_norm": 0.10638635605573654, "learning_rate": 0.002, "loss": 2.335, "step": 305830 }, { "epoch": 1.1822919082741878, "grad_norm": 0.1196802407503128, "learning_rate": 0.002, "loss": 2.3123, "step": 305840 }, { "epoch": 1.182330565477571, "grad_norm": 0.09272005409002304, "learning_rate": 0.002, "loss": 2.3241, "step": 305850 }, { "epoch": 1.1823692226809543, "grad_norm": 0.10029997676610947, "learning_rate": 0.002, "loss": 2.3366, "step": 305860 }, { "epoch": 1.1824078798843376, "grad_norm": 0.09517457336187363, "learning_rate": 0.002, "loss": 2.3319, "step": 305870 }, { "epoch": 1.1824465370877209, "grad_norm": 0.11812178045511246, "learning_rate": 0.002, "loss": 2.3254, "step": 305880 }, { "epoch": 1.1824851942911043, "grad_norm": 0.11352573335170746, "learning_rate": 0.002, "loss": 2.3271, "step": 305890 }, { "epoch": 1.1825238514944876, "grad_norm": 0.11691530048847198, "learning_rate": 0.002, "loss": 2.3292, "step": 305900 }, { "epoch": 1.1825625086978708, "grad_norm": 0.102568119764328, "learning_rate": 0.002, "loss": 2.3278, "step": 305910 }, { "epoch": 1.182601165901254, "grad_norm": 0.09333674609661102, "learning_rate": 0.002, "loss": 2.3414, "step": 305920 }, { "epoch": 1.1826398231046373, "grad_norm": 0.1106361448764801, "learning_rate": 0.002, "loss": 2.3402, "step": 305930 }, { "epoch": 1.1826784803080206, "grad_norm": 0.10933764278888702, "learning_rate": 0.002, "loss": 2.3347, "step": 305940 }, { "epoch": 1.1827171375114038, "grad_norm": 0.11312533169984818, "learning_rate": 0.002, "loss": 2.3347, "step": 305950 }, { "epoch": 1.182755794714787, "grad_norm": 0.10010302811861038, "learning_rate": 0.002, "loss": 2.3293, "step": 305960 }, { "epoch": 1.1827944519181703, "grad_norm": 0.09435338526964188, "learning_rate": 0.002, "loss": 2.3325, "step": 305970 }, { "epoch": 1.1828331091215536, "grad_norm": 0.1275791972875595, "learning_rate": 0.002, "loss": 2.3444, "step": 305980 }, { "epoch": 1.182871766324937, "grad_norm": 0.11196912080049515, "learning_rate": 0.002, "loss": 2.3266, "step": 305990 }, { "epoch": 1.1829104235283203, "grad_norm": 0.09606979042291641, "learning_rate": 0.002, "loss": 2.3456, "step": 306000 }, { "epoch": 1.1829490807317036, "grad_norm": 0.12343128025531769, "learning_rate": 0.002, "loss": 2.3342, "step": 306010 }, { "epoch": 1.1829877379350868, "grad_norm": 0.13962876796722412, "learning_rate": 0.002, "loss": 2.3356, "step": 306020 }, { "epoch": 1.18302639513847, "grad_norm": 0.10752856731414795, "learning_rate": 0.002, "loss": 2.3409, "step": 306030 }, { "epoch": 1.1830650523418533, "grad_norm": 0.10863500088453293, "learning_rate": 0.002, "loss": 2.3279, "step": 306040 }, { "epoch": 1.1831037095452366, "grad_norm": 0.11633353680372238, "learning_rate": 0.002, "loss": 2.3348, "step": 306050 }, { "epoch": 1.18314236674862, "grad_norm": 0.09035677462816238, "learning_rate": 0.002, "loss": 2.3427, "step": 306060 }, { "epoch": 1.1831810239520033, "grad_norm": 0.12252160161733627, "learning_rate": 0.002, "loss": 2.3235, "step": 306070 }, { "epoch": 1.1832196811553866, "grad_norm": 0.1086413711309433, "learning_rate": 0.002, "loss": 2.3372, "step": 306080 }, { "epoch": 1.1832583383587698, "grad_norm": 0.09670509397983551, "learning_rate": 0.002, "loss": 2.3202, "step": 306090 }, { "epoch": 1.183296995562153, "grad_norm": 0.12193197757005692, "learning_rate": 0.002, "loss": 2.3291, "step": 306100 }, { "epoch": 1.1833356527655363, "grad_norm": 0.11630985885858536, "learning_rate": 0.002, "loss": 2.3193, "step": 306110 }, { "epoch": 1.1833743099689196, "grad_norm": 0.10431554168462753, "learning_rate": 0.002, "loss": 2.3324, "step": 306120 }, { "epoch": 1.1834129671723028, "grad_norm": 0.11279870569705963, "learning_rate": 0.002, "loss": 2.3193, "step": 306130 }, { "epoch": 1.183451624375686, "grad_norm": 0.10690067708492279, "learning_rate": 0.002, "loss": 2.331, "step": 306140 }, { "epoch": 1.1834902815790693, "grad_norm": 0.10031410306692123, "learning_rate": 0.002, "loss": 2.3522, "step": 306150 }, { "epoch": 1.1835289387824528, "grad_norm": 0.10549402236938477, "learning_rate": 0.002, "loss": 2.3263, "step": 306160 }, { "epoch": 1.183567595985836, "grad_norm": 0.09717525541782379, "learning_rate": 0.002, "loss": 2.3419, "step": 306170 }, { "epoch": 1.1836062531892193, "grad_norm": 0.10498525202274323, "learning_rate": 0.002, "loss": 2.3212, "step": 306180 }, { "epoch": 1.1836449103926026, "grad_norm": 0.10035532712936401, "learning_rate": 0.002, "loss": 2.3133, "step": 306190 }, { "epoch": 1.1836835675959858, "grad_norm": 0.1097157821059227, "learning_rate": 0.002, "loss": 2.3423, "step": 306200 }, { "epoch": 1.183722224799369, "grad_norm": 0.10666971653699875, "learning_rate": 0.002, "loss": 2.325, "step": 306210 }, { "epoch": 1.1837608820027523, "grad_norm": 0.10622602701187134, "learning_rate": 0.002, "loss": 2.326, "step": 306220 }, { "epoch": 1.1837995392061358, "grad_norm": 0.11170188337564468, "learning_rate": 0.002, "loss": 2.3242, "step": 306230 }, { "epoch": 1.183838196409519, "grad_norm": 0.09753941744565964, "learning_rate": 0.002, "loss": 2.3264, "step": 306240 }, { "epoch": 1.1838768536129023, "grad_norm": 0.12126488238573074, "learning_rate": 0.002, "loss": 2.3344, "step": 306250 }, { "epoch": 1.1839155108162855, "grad_norm": 0.10929860919713974, "learning_rate": 0.002, "loss": 2.3513, "step": 306260 }, { "epoch": 1.1839541680196688, "grad_norm": 0.09766397625207901, "learning_rate": 0.002, "loss": 2.346, "step": 306270 }, { "epoch": 1.183992825223052, "grad_norm": 0.11956153064966202, "learning_rate": 0.002, "loss": 2.3467, "step": 306280 }, { "epoch": 1.1840314824264353, "grad_norm": 0.13645967841148376, "learning_rate": 0.002, "loss": 2.3264, "step": 306290 }, { "epoch": 1.1840701396298186, "grad_norm": 0.132292702794075, "learning_rate": 0.002, "loss": 2.3377, "step": 306300 }, { "epoch": 1.1841087968332018, "grad_norm": 0.09791436791419983, "learning_rate": 0.002, "loss": 2.3164, "step": 306310 }, { "epoch": 1.184147454036585, "grad_norm": 0.10473255813121796, "learning_rate": 0.002, "loss": 2.3547, "step": 306320 }, { "epoch": 1.1841861112399685, "grad_norm": 0.11555669456720352, "learning_rate": 0.002, "loss": 2.3362, "step": 306330 }, { "epoch": 1.1842247684433518, "grad_norm": 0.11690516024827957, "learning_rate": 0.002, "loss": 2.3343, "step": 306340 }, { "epoch": 1.184263425646735, "grad_norm": 0.095167376101017, "learning_rate": 0.002, "loss": 2.3384, "step": 306350 }, { "epoch": 1.1843020828501183, "grad_norm": 0.10989649593830109, "learning_rate": 0.002, "loss": 2.3297, "step": 306360 }, { "epoch": 1.1843407400535015, "grad_norm": 0.13234752416610718, "learning_rate": 0.002, "loss": 2.3319, "step": 306370 }, { "epoch": 1.1843793972568848, "grad_norm": 0.11061105132102966, "learning_rate": 0.002, "loss": 2.3651, "step": 306380 }, { "epoch": 1.184418054460268, "grad_norm": 0.10562548041343689, "learning_rate": 0.002, "loss": 2.3388, "step": 306390 }, { "epoch": 1.1844567116636515, "grad_norm": 0.09931013733148575, "learning_rate": 0.002, "loss": 2.3456, "step": 306400 }, { "epoch": 1.1844953688670348, "grad_norm": 0.1872059553861618, "learning_rate": 0.002, "loss": 2.3333, "step": 306410 }, { "epoch": 1.184534026070418, "grad_norm": 0.09803324192762375, "learning_rate": 0.002, "loss": 2.3364, "step": 306420 }, { "epoch": 1.1845726832738013, "grad_norm": 0.10257833451032639, "learning_rate": 0.002, "loss": 2.3352, "step": 306430 }, { "epoch": 1.1846113404771845, "grad_norm": 0.11944346129894257, "learning_rate": 0.002, "loss": 2.321, "step": 306440 }, { "epoch": 1.1846499976805678, "grad_norm": 0.11018768697977066, "learning_rate": 0.002, "loss": 2.3381, "step": 306450 }, { "epoch": 1.184688654883951, "grad_norm": 0.09937886148691177, "learning_rate": 0.002, "loss": 2.3397, "step": 306460 }, { "epoch": 1.1847273120873343, "grad_norm": 0.11394789069890976, "learning_rate": 0.002, "loss": 2.3339, "step": 306470 }, { "epoch": 1.1847659692907175, "grad_norm": 0.10206689685583115, "learning_rate": 0.002, "loss": 2.3323, "step": 306480 }, { "epoch": 1.1848046264941008, "grad_norm": 0.09132876992225647, "learning_rate": 0.002, "loss": 2.3426, "step": 306490 }, { "epoch": 1.1848432836974843, "grad_norm": 0.10233450680971146, "learning_rate": 0.002, "loss": 2.3473, "step": 306500 }, { "epoch": 1.1848819409008675, "grad_norm": 0.11387529969215393, "learning_rate": 0.002, "loss": 2.3197, "step": 306510 }, { "epoch": 1.1849205981042508, "grad_norm": 0.09129679203033447, "learning_rate": 0.002, "loss": 2.3208, "step": 306520 }, { "epoch": 1.184959255307634, "grad_norm": 0.12205521762371063, "learning_rate": 0.002, "loss": 2.3247, "step": 306530 }, { "epoch": 1.1849979125110173, "grad_norm": 0.11332284659147263, "learning_rate": 0.002, "loss": 2.3438, "step": 306540 }, { "epoch": 1.1850365697144005, "grad_norm": 0.10139425843954086, "learning_rate": 0.002, "loss": 2.3404, "step": 306550 }, { "epoch": 1.1850752269177838, "grad_norm": 0.10110003501176834, "learning_rate": 0.002, "loss": 2.3339, "step": 306560 }, { "epoch": 1.1851138841211672, "grad_norm": 0.23061221837997437, "learning_rate": 0.002, "loss": 2.3468, "step": 306570 }, { "epoch": 1.1851525413245505, "grad_norm": 0.11319497227668762, "learning_rate": 0.002, "loss": 2.3315, "step": 306580 }, { "epoch": 1.1851911985279338, "grad_norm": 0.09993323683738708, "learning_rate": 0.002, "loss": 2.3261, "step": 306590 }, { "epoch": 1.185229855731317, "grad_norm": 0.11044862866401672, "learning_rate": 0.002, "loss": 2.3375, "step": 306600 }, { "epoch": 1.1852685129347003, "grad_norm": 0.09655289351940155, "learning_rate": 0.002, "loss": 2.3244, "step": 306610 }, { "epoch": 1.1853071701380835, "grad_norm": 0.13763998448848724, "learning_rate": 0.002, "loss": 2.3269, "step": 306620 }, { "epoch": 1.1853458273414668, "grad_norm": 0.10221410542726517, "learning_rate": 0.002, "loss": 2.3343, "step": 306630 }, { "epoch": 1.18538448454485, "grad_norm": 0.09295745193958282, "learning_rate": 0.002, "loss": 2.338, "step": 306640 }, { "epoch": 1.1854231417482333, "grad_norm": 0.0991741344332695, "learning_rate": 0.002, "loss": 2.325, "step": 306650 }, { "epoch": 1.1854617989516167, "grad_norm": 0.12523503601551056, "learning_rate": 0.002, "loss": 2.3453, "step": 306660 }, { "epoch": 1.185500456155, "grad_norm": 0.09871948510408401, "learning_rate": 0.002, "loss": 2.3273, "step": 306670 }, { "epoch": 1.1855391133583832, "grad_norm": 0.11182510107755661, "learning_rate": 0.002, "loss": 2.3332, "step": 306680 }, { "epoch": 1.1855777705617665, "grad_norm": 0.09244689345359802, "learning_rate": 0.002, "loss": 2.3349, "step": 306690 }, { "epoch": 1.1856164277651497, "grad_norm": 0.09097203612327576, "learning_rate": 0.002, "loss": 2.3298, "step": 306700 }, { "epoch": 1.185655084968533, "grad_norm": 0.10416053980588913, "learning_rate": 0.002, "loss": 2.3241, "step": 306710 }, { "epoch": 1.1856937421719163, "grad_norm": 0.09652557224035263, "learning_rate": 0.002, "loss": 2.3247, "step": 306720 }, { "epoch": 1.1857323993752995, "grad_norm": 0.10522696375846863, "learning_rate": 0.002, "loss": 2.3278, "step": 306730 }, { "epoch": 1.185771056578683, "grad_norm": 0.12889423966407776, "learning_rate": 0.002, "loss": 2.3299, "step": 306740 }, { "epoch": 1.1858097137820662, "grad_norm": 0.11473888903856277, "learning_rate": 0.002, "loss": 2.3191, "step": 306750 }, { "epoch": 1.1858483709854495, "grad_norm": 0.1227368712425232, "learning_rate": 0.002, "loss": 2.3261, "step": 306760 }, { "epoch": 1.1858870281888327, "grad_norm": 0.12399069219827652, "learning_rate": 0.002, "loss": 2.3423, "step": 306770 }, { "epoch": 1.185925685392216, "grad_norm": 0.09926015883684158, "learning_rate": 0.002, "loss": 2.336, "step": 306780 }, { "epoch": 1.1859643425955992, "grad_norm": 0.09733019024133682, "learning_rate": 0.002, "loss": 2.3423, "step": 306790 }, { "epoch": 1.1860029997989825, "grad_norm": 0.09353894740343094, "learning_rate": 0.002, "loss": 2.3376, "step": 306800 }, { "epoch": 1.1860416570023657, "grad_norm": 0.11982983350753784, "learning_rate": 0.002, "loss": 2.3463, "step": 306810 }, { "epoch": 1.186080314205749, "grad_norm": 0.10334306210279465, "learning_rate": 0.002, "loss": 2.3404, "step": 306820 }, { "epoch": 1.1861189714091325, "grad_norm": 0.09221477806568146, "learning_rate": 0.002, "loss": 2.3275, "step": 306830 }, { "epoch": 1.1861576286125157, "grad_norm": 0.10017237812280655, "learning_rate": 0.002, "loss": 2.3322, "step": 306840 }, { "epoch": 1.186196285815899, "grad_norm": 0.10200434923171997, "learning_rate": 0.002, "loss": 2.3394, "step": 306850 }, { "epoch": 1.1862349430192822, "grad_norm": 0.12413059175014496, "learning_rate": 0.002, "loss": 2.3411, "step": 306860 }, { "epoch": 1.1862736002226655, "grad_norm": 0.10810477286577225, "learning_rate": 0.002, "loss": 2.3173, "step": 306870 }, { "epoch": 1.1863122574260487, "grad_norm": 0.1036938801407814, "learning_rate": 0.002, "loss": 2.3245, "step": 306880 }, { "epoch": 1.186350914629432, "grad_norm": 0.0994827151298523, "learning_rate": 0.002, "loss": 2.3392, "step": 306890 }, { "epoch": 1.1863895718328152, "grad_norm": 0.10518717765808105, "learning_rate": 0.002, "loss": 2.3299, "step": 306900 }, { "epoch": 1.1864282290361987, "grad_norm": 0.09604518115520477, "learning_rate": 0.002, "loss": 2.3397, "step": 306910 }, { "epoch": 1.186466886239582, "grad_norm": 0.10765660554170609, "learning_rate": 0.002, "loss": 2.3286, "step": 306920 }, { "epoch": 1.1865055434429652, "grad_norm": 0.1137009784579277, "learning_rate": 0.002, "loss": 2.3387, "step": 306930 }, { "epoch": 1.1865442006463485, "grad_norm": 0.10713685303926468, "learning_rate": 0.002, "loss": 2.334, "step": 306940 }, { "epoch": 1.1865828578497317, "grad_norm": 0.09157906472682953, "learning_rate": 0.002, "loss": 2.3378, "step": 306950 }, { "epoch": 1.186621515053115, "grad_norm": 0.11402291059494019, "learning_rate": 0.002, "loss": 2.3161, "step": 306960 }, { "epoch": 1.1866601722564982, "grad_norm": 0.09192168712615967, "learning_rate": 0.002, "loss": 2.3532, "step": 306970 }, { "epoch": 1.1866988294598815, "grad_norm": 0.10053259879350662, "learning_rate": 0.002, "loss": 2.3388, "step": 306980 }, { "epoch": 1.1867374866632647, "grad_norm": 0.11963258683681488, "learning_rate": 0.002, "loss": 2.3269, "step": 306990 }, { "epoch": 1.1867761438666482, "grad_norm": 0.11115763336420059, "learning_rate": 0.002, "loss": 2.321, "step": 307000 }, { "epoch": 1.1868148010700315, "grad_norm": 0.12055559456348419, "learning_rate": 0.002, "loss": 2.3298, "step": 307010 }, { "epoch": 1.1868534582734147, "grad_norm": 0.11069901287555695, "learning_rate": 0.002, "loss": 2.3352, "step": 307020 }, { "epoch": 1.186892115476798, "grad_norm": 0.10177513211965561, "learning_rate": 0.002, "loss": 2.3426, "step": 307030 }, { "epoch": 1.1869307726801812, "grad_norm": 0.10583703964948654, "learning_rate": 0.002, "loss": 2.3286, "step": 307040 }, { "epoch": 1.1869694298835645, "grad_norm": 0.10791445523500443, "learning_rate": 0.002, "loss": 2.3412, "step": 307050 }, { "epoch": 1.1870080870869477, "grad_norm": 0.10395360738039017, "learning_rate": 0.002, "loss": 2.3593, "step": 307060 }, { "epoch": 1.187046744290331, "grad_norm": 0.09354843199253082, "learning_rate": 0.002, "loss": 2.3288, "step": 307070 }, { "epoch": 1.1870854014937144, "grad_norm": 0.11032462120056152, "learning_rate": 0.002, "loss": 2.3395, "step": 307080 }, { "epoch": 1.1871240586970977, "grad_norm": 0.12376872450113297, "learning_rate": 0.002, "loss": 2.3506, "step": 307090 }, { "epoch": 1.187162715900481, "grad_norm": 0.1092623770236969, "learning_rate": 0.002, "loss": 2.3262, "step": 307100 }, { "epoch": 1.1872013731038642, "grad_norm": 0.11651972681283951, "learning_rate": 0.002, "loss": 2.3538, "step": 307110 }, { "epoch": 1.1872400303072475, "grad_norm": 0.10235489159822464, "learning_rate": 0.002, "loss": 2.3316, "step": 307120 }, { "epoch": 1.1872786875106307, "grad_norm": 0.10163053870201111, "learning_rate": 0.002, "loss": 2.3318, "step": 307130 }, { "epoch": 1.187317344714014, "grad_norm": 0.10021232813596725, "learning_rate": 0.002, "loss": 2.3254, "step": 307140 }, { "epoch": 1.1873560019173972, "grad_norm": 0.10669775307178497, "learning_rate": 0.002, "loss": 2.3342, "step": 307150 }, { "epoch": 1.1873946591207805, "grad_norm": 0.08023542910814285, "learning_rate": 0.002, "loss": 2.3254, "step": 307160 }, { "epoch": 1.187433316324164, "grad_norm": 0.10091197490692139, "learning_rate": 0.002, "loss": 2.3357, "step": 307170 }, { "epoch": 1.1874719735275472, "grad_norm": 0.11032316088676453, "learning_rate": 0.002, "loss": 2.3265, "step": 307180 }, { "epoch": 1.1875106307309304, "grad_norm": 0.10547507554292679, "learning_rate": 0.002, "loss": 2.3299, "step": 307190 }, { "epoch": 1.1875492879343137, "grad_norm": 0.11119785159826279, "learning_rate": 0.002, "loss": 2.3422, "step": 307200 }, { "epoch": 1.187587945137697, "grad_norm": 0.10898708552122116, "learning_rate": 0.002, "loss": 2.3413, "step": 307210 }, { "epoch": 1.1876266023410802, "grad_norm": 0.11538541316986084, "learning_rate": 0.002, "loss": 2.3331, "step": 307220 }, { "epoch": 1.1876652595444634, "grad_norm": 0.10693303495645523, "learning_rate": 0.002, "loss": 2.3413, "step": 307230 }, { "epoch": 1.1877039167478467, "grad_norm": 0.09446164220571518, "learning_rate": 0.002, "loss": 2.3381, "step": 307240 }, { "epoch": 1.1877425739512302, "grad_norm": 0.11978152394294739, "learning_rate": 0.002, "loss": 2.337, "step": 307250 }, { "epoch": 1.1877812311546134, "grad_norm": 0.10594190657138824, "learning_rate": 0.002, "loss": 2.3256, "step": 307260 }, { "epoch": 1.1878198883579967, "grad_norm": 0.11744443327188492, "learning_rate": 0.002, "loss": 2.3353, "step": 307270 }, { "epoch": 1.18785854556138, "grad_norm": 0.10290765762329102, "learning_rate": 0.002, "loss": 2.3353, "step": 307280 }, { "epoch": 1.1878972027647632, "grad_norm": 0.10236617177724838, "learning_rate": 0.002, "loss": 2.3371, "step": 307290 }, { "epoch": 1.1879358599681464, "grad_norm": 0.10091518610715866, "learning_rate": 0.002, "loss": 2.33, "step": 307300 }, { "epoch": 1.1879745171715297, "grad_norm": 0.09385214000940323, "learning_rate": 0.002, "loss": 2.3234, "step": 307310 }, { "epoch": 1.188013174374913, "grad_norm": 0.11631542444229126, "learning_rate": 0.002, "loss": 2.3356, "step": 307320 }, { "epoch": 1.1880518315782962, "grad_norm": 0.10773419588804245, "learning_rate": 0.002, "loss": 2.306, "step": 307330 }, { "epoch": 1.1880904887816797, "grad_norm": 0.11131122708320618, "learning_rate": 0.002, "loss": 2.3496, "step": 307340 }, { "epoch": 1.188129145985063, "grad_norm": 0.1060451790690422, "learning_rate": 0.002, "loss": 2.336, "step": 307350 }, { "epoch": 1.1881678031884462, "grad_norm": 0.10052044689655304, "learning_rate": 0.002, "loss": 2.3489, "step": 307360 }, { "epoch": 1.1882064603918294, "grad_norm": 0.10508698970079422, "learning_rate": 0.002, "loss": 2.3308, "step": 307370 }, { "epoch": 1.1882451175952127, "grad_norm": 0.18196623027324677, "learning_rate": 0.002, "loss": 2.3279, "step": 307380 }, { "epoch": 1.188283774798596, "grad_norm": 0.10565753281116486, "learning_rate": 0.002, "loss": 2.3296, "step": 307390 }, { "epoch": 1.1883224320019792, "grad_norm": 0.09839676320552826, "learning_rate": 0.002, "loss": 2.3456, "step": 307400 }, { "epoch": 1.1883610892053624, "grad_norm": 0.09454700350761414, "learning_rate": 0.002, "loss": 2.3325, "step": 307410 }, { "epoch": 1.188399746408746, "grad_norm": 0.11287378519773483, "learning_rate": 0.002, "loss": 2.3235, "step": 307420 }, { "epoch": 1.1884384036121292, "grad_norm": 0.10098222643136978, "learning_rate": 0.002, "loss": 2.333, "step": 307430 }, { "epoch": 1.1884770608155124, "grad_norm": 0.10282760858535767, "learning_rate": 0.002, "loss": 2.3323, "step": 307440 }, { "epoch": 1.1885157180188957, "grad_norm": 0.09915308654308319, "learning_rate": 0.002, "loss": 2.3258, "step": 307450 }, { "epoch": 1.188554375222279, "grad_norm": 0.10907262563705444, "learning_rate": 0.002, "loss": 2.3408, "step": 307460 }, { "epoch": 1.1885930324256622, "grad_norm": 0.1083667203783989, "learning_rate": 0.002, "loss": 2.3209, "step": 307470 }, { "epoch": 1.1886316896290454, "grad_norm": 0.11217281967401505, "learning_rate": 0.002, "loss": 2.3375, "step": 307480 }, { "epoch": 1.1886703468324287, "grad_norm": 0.09714347869157791, "learning_rate": 0.002, "loss": 2.3439, "step": 307490 }, { "epoch": 1.188709004035812, "grad_norm": 0.0932031124830246, "learning_rate": 0.002, "loss": 2.3379, "step": 307500 }, { "epoch": 1.1887476612391954, "grad_norm": 0.09969399124383926, "learning_rate": 0.002, "loss": 2.3439, "step": 307510 }, { "epoch": 1.1887863184425786, "grad_norm": 0.10963352024555206, "learning_rate": 0.002, "loss": 2.3449, "step": 307520 }, { "epoch": 1.188824975645962, "grad_norm": 0.10182205587625504, "learning_rate": 0.002, "loss": 2.341, "step": 307530 }, { "epoch": 1.1888636328493452, "grad_norm": 0.13690140843391418, "learning_rate": 0.002, "loss": 2.3411, "step": 307540 }, { "epoch": 1.1889022900527284, "grad_norm": 0.11846937984228134, "learning_rate": 0.002, "loss": 2.3508, "step": 307550 }, { "epoch": 1.1889409472561117, "grad_norm": 0.1026109978556633, "learning_rate": 0.002, "loss": 2.3193, "step": 307560 }, { "epoch": 1.188979604459495, "grad_norm": 0.1018030047416687, "learning_rate": 0.002, "loss": 2.33, "step": 307570 }, { "epoch": 1.1890182616628784, "grad_norm": 0.09329404681921005, "learning_rate": 0.002, "loss": 2.3361, "step": 307580 }, { "epoch": 1.1890569188662616, "grad_norm": 0.09284912049770355, "learning_rate": 0.002, "loss": 2.3274, "step": 307590 }, { "epoch": 1.1890955760696449, "grad_norm": 0.10771986097097397, "learning_rate": 0.002, "loss": 2.332, "step": 307600 }, { "epoch": 1.1891342332730281, "grad_norm": 0.10230138152837753, "learning_rate": 0.002, "loss": 2.3326, "step": 307610 }, { "epoch": 1.1891728904764114, "grad_norm": 0.09888580441474915, "learning_rate": 0.002, "loss": 2.3549, "step": 307620 }, { "epoch": 1.1892115476797946, "grad_norm": 0.10737486183643341, "learning_rate": 0.002, "loss": 2.3256, "step": 307630 }, { "epoch": 1.189250204883178, "grad_norm": 0.12650904059410095, "learning_rate": 0.002, "loss": 2.3252, "step": 307640 }, { "epoch": 1.1892888620865611, "grad_norm": 0.08823800086975098, "learning_rate": 0.002, "loss": 2.3346, "step": 307650 }, { "epoch": 1.1893275192899444, "grad_norm": 0.12838537991046906, "learning_rate": 0.002, "loss": 2.3474, "step": 307660 }, { "epoch": 1.1893661764933277, "grad_norm": 0.12015782296657562, "learning_rate": 0.002, "loss": 2.3388, "step": 307670 }, { "epoch": 1.1894048336967111, "grad_norm": 0.09876890480518341, "learning_rate": 0.002, "loss": 2.3363, "step": 307680 }, { "epoch": 1.1894434909000944, "grad_norm": 0.11343344300985336, "learning_rate": 0.002, "loss": 2.3215, "step": 307690 }, { "epoch": 1.1894821481034776, "grad_norm": 0.10124673694372177, "learning_rate": 0.002, "loss": 2.3457, "step": 307700 }, { "epoch": 1.1895208053068609, "grad_norm": 0.11742060631513596, "learning_rate": 0.002, "loss": 2.3545, "step": 307710 }, { "epoch": 1.1895594625102441, "grad_norm": 0.10189960896968842, "learning_rate": 0.002, "loss": 2.3294, "step": 307720 }, { "epoch": 1.1895981197136274, "grad_norm": 0.12576916813850403, "learning_rate": 0.002, "loss": 2.3193, "step": 307730 }, { "epoch": 1.1896367769170106, "grad_norm": 0.1032869890332222, "learning_rate": 0.002, "loss": 2.3277, "step": 307740 }, { "epoch": 1.1896754341203941, "grad_norm": 0.09936468303203583, "learning_rate": 0.002, "loss": 2.3358, "step": 307750 }, { "epoch": 1.1897140913237774, "grad_norm": 0.1241552010178566, "learning_rate": 0.002, "loss": 2.3354, "step": 307760 }, { "epoch": 1.1897527485271606, "grad_norm": 0.0964624434709549, "learning_rate": 0.002, "loss": 2.3457, "step": 307770 }, { "epoch": 1.1897914057305439, "grad_norm": 0.12272000312805176, "learning_rate": 0.002, "loss": 2.333, "step": 307780 }, { "epoch": 1.1898300629339271, "grad_norm": 0.08760685473680496, "learning_rate": 0.002, "loss": 2.3384, "step": 307790 }, { "epoch": 1.1898687201373104, "grad_norm": 0.0961800292134285, "learning_rate": 0.002, "loss": 2.316, "step": 307800 }, { "epoch": 1.1899073773406936, "grad_norm": 0.1039305329322815, "learning_rate": 0.002, "loss": 2.3136, "step": 307810 }, { "epoch": 1.1899460345440769, "grad_norm": 0.09458538889884949, "learning_rate": 0.002, "loss": 2.3337, "step": 307820 }, { "epoch": 1.1899846917474601, "grad_norm": 0.12459837645292282, "learning_rate": 0.002, "loss": 2.3336, "step": 307830 }, { "epoch": 1.1900233489508434, "grad_norm": 0.10995468497276306, "learning_rate": 0.002, "loss": 2.3388, "step": 307840 }, { "epoch": 1.1900620061542269, "grad_norm": 0.1075374186038971, "learning_rate": 0.002, "loss": 2.3316, "step": 307850 }, { "epoch": 1.1901006633576101, "grad_norm": 0.130019411444664, "learning_rate": 0.002, "loss": 2.3308, "step": 307860 }, { "epoch": 1.1901393205609934, "grad_norm": 0.10518362373113632, "learning_rate": 0.002, "loss": 2.3474, "step": 307870 }, { "epoch": 1.1901779777643766, "grad_norm": 0.10554949939250946, "learning_rate": 0.002, "loss": 2.3356, "step": 307880 }, { "epoch": 1.1902166349677599, "grad_norm": 0.10739036649465561, "learning_rate": 0.002, "loss": 2.3404, "step": 307890 }, { "epoch": 1.1902552921711431, "grad_norm": 0.11622469127178192, "learning_rate": 0.002, "loss": 2.3363, "step": 307900 }, { "epoch": 1.1902939493745264, "grad_norm": 0.1241396814584732, "learning_rate": 0.002, "loss": 2.3385, "step": 307910 }, { "epoch": 1.1903326065779098, "grad_norm": 0.1170787587761879, "learning_rate": 0.002, "loss": 2.3469, "step": 307920 }, { "epoch": 1.190371263781293, "grad_norm": 0.10361120849847794, "learning_rate": 0.002, "loss": 2.3381, "step": 307930 }, { "epoch": 1.1904099209846764, "grad_norm": 0.11528493463993073, "learning_rate": 0.002, "loss": 2.3292, "step": 307940 }, { "epoch": 1.1904485781880596, "grad_norm": 0.10670798271894455, "learning_rate": 0.002, "loss": 2.3412, "step": 307950 }, { "epoch": 1.1904872353914429, "grad_norm": 0.1060328334569931, "learning_rate": 0.002, "loss": 2.3365, "step": 307960 }, { "epoch": 1.190525892594826, "grad_norm": 0.096269890666008, "learning_rate": 0.002, "loss": 2.3304, "step": 307970 }, { "epoch": 1.1905645497982094, "grad_norm": 0.10902507603168488, "learning_rate": 0.002, "loss": 2.3376, "step": 307980 }, { "epoch": 1.1906032070015926, "grad_norm": 0.11386062949895859, "learning_rate": 0.002, "loss": 2.3374, "step": 307990 }, { "epoch": 1.1906418642049759, "grad_norm": 0.11864448338747025, "learning_rate": 0.002, "loss": 2.3325, "step": 308000 }, { "epoch": 1.1906805214083591, "grad_norm": 0.10171358287334442, "learning_rate": 0.002, "loss": 2.3366, "step": 308010 }, { "epoch": 1.1907191786117426, "grad_norm": 0.09052585065364838, "learning_rate": 0.002, "loss": 2.3545, "step": 308020 }, { "epoch": 1.1907578358151258, "grad_norm": 0.09586137533187866, "learning_rate": 0.002, "loss": 2.3236, "step": 308030 }, { "epoch": 1.190796493018509, "grad_norm": 0.10091507434844971, "learning_rate": 0.002, "loss": 2.3347, "step": 308040 }, { "epoch": 1.1908351502218923, "grad_norm": 0.10137958079576492, "learning_rate": 0.002, "loss": 2.3278, "step": 308050 }, { "epoch": 1.1908738074252756, "grad_norm": 0.10727639496326447, "learning_rate": 0.002, "loss": 2.3415, "step": 308060 }, { "epoch": 1.1909124646286589, "grad_norm": 0.23738090693950653, "learning_rate": 0.002, "loss": 2.3303, "step": 308070 }, { "epoch": 1.190951121832042, "grad_norm": 0.20171679556369781, "learning_rate": 0.002, "loss": 2.3359, "step": 308080 }, { "epoch": 1.1909897790354256, "grad_norm": 0.10972524434328079, "learning_rate": 0.002, "loss": 2.3252, "step": 308090 }, { "epoch": 1.1910284362388088, "grad_norm": 0.09481258690357208, "learning_rate": 0.002, "loss": 2.3347, "step": 308100 }, { "epoch": 1.191067093442192, "grad_norm": 0.09582295268774033, "learning_rate": 0.002, "loss": 2.3253, "step": 308110 }, { "epoch": 1.1911057506455753, "grad_norm": 0.1289292275905609, "learning_rate": 0.002, "loss": 2.3345, "step": 308120 }, { "epoch": 1.1911444078489586, "grad_norm": 0.10114498436450958, "learning_rate": 0.002, "loss": 2.3358, "step": 308130 }, { "epoch": 1.1911830650523418, "grad_norm": 0.09929239004850388, "learning_rate": 0.002, "loss": 2.3358, "step": 308140 }, { "epoch": 1.191221722255725, "grad_norm": 0.10156530886888504, "learning_rate": 0.002, "loss": 2.3281, "step": 308150 }, { "epoch": 1.1912603794591083, "grad_norm": 0.10426311939954758, "learning_rate": 0.002, "loss": 2.3438, "step": 308160 }, { "epoch": 1.1912990366624916, "grad_norm": 0.11024799942970276, "learning_rate": 0.002, "loss": 2.3264, "step": 308170 }, { "epoch": 1.1913376938658748, "grad_norm": 0.10496527701616287, "learning_rate": 0.002, "loss": 2.343, "step": 308180 }, { "epoch": 1.1913763510692583, "grad_norm": 0.09811306744813919, "learning_rate": 0.002, "loss": 2.333, "step": 308190 }, { "epoch": 1.1914150082726416, "grad_norm": 0.1302216500043869, "learning_rate": 0.002, "loss": 2.3325, "step": 308200 }, { "epoch": 1.1914536654760248, "grad_norm": 0.11187312752008438, "learning_rate": 0.002, "loss": 2.3258, "step": 308210 }, { "epoch": 1.191492322679408, "grad_norm": 0.10793672502040863, "learning_rate": 0.002, "loss": 2.3171, "step": 308220 }, { "epoch": 1.1915309798827913, "grad_norm": 0.08793167024850845, "learning_rate": 0.002, "loss": 2.3394, "step": 308230 }, { "epoch": 1.1915696370861746, "grad_norm": 0.10832060128450394, "learning_rate": 0.002, "loss": 2.3377, "step": 308240 }, { "epoch": 1.1916082942895578, "grad_norm": 0.10909979790449142, "learning_rate": 0.002, "loss": 2.3387, "step": 308250 }, { "epoch": 1.1916469514929413, "grad_norm": 0.11267384886741638, "learning_rate": 0.002, "loss": 2.3264, "step": 308260 }, { "epoch": 1.1916856086963246, "grad_norm": 0.09032999724149704, "learning_rate": 0.002, "loss": 2.3475, "step": 308270 }, { "epoch": 1.1917242658997078, "grad_norm": 0.1101817861199379, "learning_rate": 0.002, "loss": 2.3231, "step": 308280 }, { "epoch": 1.191762923103091, "grad_norm": 0.11183900386095047, "learning_rate": 0.002, "loss": 2.346, "step": 308290 }, { "epoch": 1.1918015803064743, "grad_norm": 0.09554877132177353, "learning_rate": 0.002, "loss": 2.3264, "step": 308300 }, { "epoch": 1.1918402375098576, "grad_norm": 0.10025066882371902, "learning_rate": 0.002, "loss": 2.3277, "step": 308310 }, { "epoch": 1.1918788947132408, "grad_norm": 0.11711962521076202, "learning_rate": 0.002, "loss": 2.3201, "step": 308320 }, { "epoch": 1.191917551916624, "grad_norm": 0.10263378918170929, "learning_rate": 0.002, "loss": 2.3307, "step": 308330 }, { "epoch": 1.1919562091200073, "grad_norm": 0.09514163434505463, "learning_rate": 0.002, "loss": 2.3434, "step": 308340 }, { "epoch": 1.1919948663233906, "grad_norm": 0.13325776159763336, "learning_rate": 0.002, "loss": 2.3415, "step": 308350 }, { "epoch": 1.192033523526774, "grad_norm": 0.09737804532051086, "learning_rate": 0.002, "loss": 2.3346, "step": 308360 }, { "epoch": 1.1920721807301573, "grad_norm": 0.09637849032878876, "learning_rate": 0.002, "loss": 2.3182, "step": 308370 }, { "epoch": 1.1921108379335406, "grad_norm": 0.10954175144433975, "learning_rate": 0.002, "loss": 2.3275, "step": 308380 }, { "epoch": 1.1921494951369238, "grad_norm": 0.09258211404085159, "learning_rate": 0.002, "loss": 2.3542, "step": 308390 }, { "epoch": 1.192188152340307, "grad_norm": 0.1098412349820137, "learning_rate": 0.002, "loss": 2.3266, "step": 308400 }, { "epoch": 1.1922268095436903, "grad_norm": 0.14261560142040253, "learning_rate": 0.002, "loss": 2.3279, "step": 308410 }, { "epoch": 1.1922654667470736, "grad_norm": 0.09297990798950195, "learning_rate": 0.002, "loss": 2.3489, "step": 308420 }, { "epoch": 1.192304123950457, "grad_norm": 0.09771433472633362, "learning_rate": 0.002, "loss": 2.3317, "step": 308430 }, { "epoch": 1.1923427811538403, "grad_norm": 0.12001529335975647, "learning_rate": 0.002, "loss": 2.3376, "step": 308440 }, { "epoch": 1.1923814383572235, "grad_norm": 0.1097932979464531, "learning_rate": 0.002, "loss": 2.3437, "step": 308450 }, { "epoch": 1.1924200955606068, "grad_norm": 0.09968653321266174, "learning_rate": 0.002, "loss": 2.3359, "step": 308460 }, { "epoch": 1.19245875276399, "grad_norm": 0.11144307255744934, "learning_rate": 0.002, "loss": 2.333, "step": 308470 }, { "epoch": 1.1924974099673733, "grad_norm": 0.17210029065608978, "learning_rate": 0.002, "loss": 2.3325, "step": 308480 }, { "epoch": 1.1925360671707566, "grad_norm": 0.11730711162090302, "learning_rate": 0.002, "loss": 2.3434, "step": 308490 }, { "epoch": 1.1925747243741398, "grad_norm": 0.12075983732938766, "learning_rate": 0.002, "loss": 2.3394, "step": 308500 }, { "epoch": 1.192613381577523, "grad_norm": 0.10987797379493713, "learning_rate": 0.002, "loss": 2.3277, "step": 308510 }, { "epoch": 1.1926520387809063, "grad_norm": 0.10271605104207993, "learning_rate": 0.002, "loss": 2.3311, "step": 308520 }, { "epoch": 1.1926906959842898, "grad_norm": 0.10429355502128601, "learning_rate": 0.002, "loss": 2.3423, "step": 308530 }, { "epoch": 1.192729353187673, "grad_norm": 0.10565821826457977, "learning_rate": 0.002, "loss": 2.3446, "step": 308540 }, { "epoch": 1.1927680103910563, "grad_norm": 0.09842915832996368, "learning_rate": 0.002, "loss": 2.3359, "step": 308550 }, { "epoch": 1.1928066675944395, "grad_norm": 0.10916607081890106, "learning_rate": 0.002, "loss": 2.3324, "step": 308560 }, { "epoch": 1.1928453247978228, "grad_norm": 0.10457335412502289, "learning_rate": 0.002, "loss": 2.3347, "step": 308570 }, { "epoch": 1.192883982001206, "grad_norm": 0.13191327452659607, "learning_rate": 0.002, "loss": 2.3303, "step": 308580 }, { "epoch": 1.1929226392045893, "grad_norm": 0.0972403958439827, "learning_rate": 0.002, "loss": 2.3416, "step": 308590 }, { "epoch": 1.1929612964079728, "grad_norm": 0.11111482977867126, "learning_rate": 0.002, "loss": 2.335, "step": 308600 }, { "epoch": 1.192999953611356, "grad_norm": 0.08959123492240906, "learning_rate": 0.002, "loss": 2.3079, "step": 308610 }, { "epoch": 1.1930386108147393, "grad_norm": 0.09942329674959183, "learning_rate": 0.002, "loss": 2.3343, "step": 308620 }, { "epoch": 1.1930772680181225, "grad_norm": 0.10511796176433563, "learning_rate": 0.002, "loss": 2.3048, "step": 308630 }, { "epoch": 1.1931159252215058, "grad_norm": 0.11657918989658356, "learning_rate": 0.002, "loss": 2.3227, "step": 308640 }, { "epoch": 1.193154582424889, "grad_norm": 0.10845893621444702, "learning_rate": 0.002, "loss": 2.3412, "step": 308650 }, { "epoch": 1.1931932396282723, "grad_norm": 0.09751000255346298, "learning_rate": 0.002, "loss": 2.324, "step": 308660 }, { "epoch": 1.1932318968316555, "grad_norm": 0.11037997156381607, "learning_rate": 0.002, "loss": 2.3275, "step": 308670 }, { "epoch": 1.1932705540350388, "grad_norm": 0.10616657137870789, "learning_rate": 0.002, "loss": 2.3232, "step": 308680 }, { "epoch": 1.1933092112384223, "grad_norm": 0.11541463434696198, "learning_rate": 0.002, "loss": 2.3522, "step": 308690 }, { "epoch": 1.1933478684418055, "grad_norm": 0.10183534026145935, "learning_rate": 0.002, "loss": 2.3284, "step": 308700 }, { "epoch": 1.1933865256451888, "grad_norm": 0.11032002419233322, "learning_rate": 0.002, "loss": 2.3262, "step": 308710 }, { "epoch": 1.193425182848572, "grad_norm": 0.11707691103219986, "learning_rate": 0.002, "loss": 2.3363, "step": 308720 }, { "epoch": 1.1934638400519553, "grad_norm": 0.1008838638663292, "learning_rate": 0.002, "loss": 2.3265, "step": 308730 }, { "epoch": 1.1935024972553385, "grad_norm": 0.09674558788537979, "learning_rate": 0.002, "loss": 2.3441, "step": 308740 }, { "epoch": 1.1935411544587218, "grad_norm": 0.10396383702754974, "learning_rate": 0.002, "loss": 2.3197, "step": 308750 }, { "epoch": 1.193579811662105, "grad_norm": 0.10542046278715134, "learning_rate": 0.002, "loss": 2.3248, "step": 308760 }, { "epoch": 1.1936184688654885, "grad_norm": 0.12094923853874207, "learning_rate": 0.002, "loss": 2.3202, "step": 308770 }, { "epoch": 1.1936571260688718, "grad_norm": 0.10676782578229904, "learning_rate": 0.002, "loss": 2.3279, "step": 308780 }, { "epoch": 1.193695783272255, "grad_norm": 0.10295173525810242, "learning_rate": 0.002, "loss": 2.3397, "step": 308790 }, { "epoch": 1.1937344404756383, "grad_norm": 0.10381203889846802, "learning_rate": 0.002, "loss": 2.3325, "step": 308800 }, { "epoch": 1.1937730976790215, "grad_norm": 0.11356569081544876, "learning_rate": 0.002, "loss": 2.334, "step": 308810 }, { "epoch": 1.1938117548824048, "grad_norm": 0.10669662803411484, "learning_rate": 0.002, "loss": 2.3393, "step": 308820 }, { "epoch": 1.193850412085788, "grad_norm": 0.10124512761831284, "learning_rate": 0.002, "loss": 2.3419, "step": 308830 }, { "epoch": 1.1938890692891713, "grad_norm": 0.1617117077112198, "learning_rate": 0.002, "loss": 2.3356, "step": 308840 }, { "epoch": 1.1939277264925545, "grad_norm": 0.10875289887189865, "learning_rate": 0.002, "loss": 2.3183, "step": 308850 }, { "epoch": 1.193966383695938, "grad_norm": 0.10443301498889923, "learning_rate": 0.002, "loss": 2.3449, "step": 308860 }, { "epoch": 1.1940050408993212, "grad_norm": 0.1163385659456253, "learning_rate": 0.002, "loss": 2.3197, "step": 308870 }, { "epoch": 1.1940436981027045, "grad_norm": 0.09189847856760025, "learning_rate": 0.002, "loss": 2.3323, "step": 308880 }, { "epoch": 1.1940823553060878, "grad_norm": 0.1170836016535759, "learning_rate": 0.002, "loss": 2.3363, "step": 308890 }, { "epoch": 1.194121012509471, "grad_norm": 0.1277143359184265, "learning_rate": 0.002, "loss": 2.3151, "step": 308900 }, { "epoch": 1.1941596697128543, "grad_norm": 0.09729064255952835, "learning_rate": 0.002, "loss": 2.3409, "step": 308910 }, { "epoch": 1.1941983269162375, "grad_norm": 0.10895591974258423, "learning_rate": 0.002, "loss": 2.338, "step": 308920 }, { "epoch": 1.1942369841196208, "grad_norm": 0.10426922887563705, "learning_rate": 0.002, "loss": 2.3236, "step": 308930 }, { "epoch": 1.1942756413230042, "grad_norm": 0.09164383262395859, "learning_rate": 0.002, "loss": 2.3286, "step": 308940 }, { "epoch": 1.1943142985263875, "grad_norm": 0.08422087132930756, "learning_rate": 0.002, "loss": 2.3318, "step": 308950 }, { "epoch": 1.1943529557297707, "grad_norm": 0.09703510999679565, "learning_rate": 0.002, "loss": 2.3313, "step": 308960 }, { "epoch": 1.194391612933154, "grad_norm": 0.09442820399999619, "learning_rate": 0.002, "loss": 2.3206, "step": 308970 }, { "epoch": 1.1944302701365372, "grad_norm": 0.19480974972248077, "learning_rate": 0.002, "loss": 2.3291, "step": 308980 }, { "epoch": 1.1944689273399205, "grad_norm": 0.10483382642269135, "learning_rate": 0.002, "loss": 2.3264, "step": 308990 }, { "epoch": 1.1945075845433037, "grad_norm": 0.08840036392211914, "learning_rate": 0.002, "loss": 2.3276, "step": 309000 }, { "epoch": 1.194546241746687, "grad_norm": 0.09493028372526169, "learning_rate": 0.002, "loss": 2.3259, "step": 309010 }, { "epoch": 1.1945848989500703, "grad_norm": 0.10036586970090866, "learning_rate": 0.002, "loss": 2.3456, "step": 309020 }, { "epoch": 1.1946235561534537, "grad_norm": 0.11401264369487762, "learning_rate": 0.002, "loss": 2.3343, "step": 309030 }, { "epoch": 1.194662213356837, "grad_norm": 0.09835246205329895, "learning_rate": 0.002, "loss": 2.3482, "step": 309040 }, { "epoch": 1.1947008705602202, "grad_norm": 0.09693726152181625, "learning_rate": 0.002, "loss": 2.3304, "step": 309050 }, { "epoch": 1.1947395277636035, "grad_norm": 0.10586172342300415, "learning_rate": 0.002, "loss": 2.3246, "step": 309060 }, { "epoch": 1.1947781849669867, "grad_norm": 0.10866979509592056, "learning_rate": 0.002, "loss": 2.3269, "step": 309070 }, { "epoch": 1.19481684217037, "grad_norm": 0.15142571926116943, "learning_rate": 0.002, "loss": 2.343, "step": 309080 }, { "epoch": 1.1948554993737532, "grad_norm": 0.10809877514839172, "learning_rate": 0.002, "loss": 2.3201, "step": 309090 }, { "epoch": 1.1948941565771365, "grad_norm": 0.12076469510793686, "learning_rate": 0.002, "loss": 2.3345, "step": 309100 }, { "epoch": 1.19493281378052, "grad_norm": 0.09712011367082596, "learning_rate": 0.002, "loss": 2.3467, "step": 309110 }, { "epoch": 1.1949714709839032, "grad_norm": 0.11490646004676819, "learning_rate": 0.002, "loss": 2.332, "step": 309120 }, { "epoch": 1.1950101281872865, "grad_norm": 0.12239948660135269, "learning_rate": 0.002, "loss": 2.3418, "step": 309130 }, { "epoch": 1.1950487853906697, "grad_norm": 0.099088653922081, "learning_rate": 0.002, "loss": 2.329, "step": 309140 }, { "epoch": 1.195087442594053, "grad_norm": 0.10289102792739868, "learning_rate": 0.002, "loss": 2.3435, "step": 309150 }, { "epoch": 1.1951260997974362, "grad_norm": 0.1293850541114807, "learning_rate": 0.002, "loss": 2.3405, "step": 309160 }, { "epoch": 1.1951647570008195, "grad_norm": 0.09299208968877792, "learning_rate": 0.002, "loss": 2.3232, "step": 309170 }, { "epoch": 1.1952034142042027, "grad_norm": 0.10709170997142792, "learning_rate": 0.002, "loss": 2.3269, "step": 309180 }, { "epoch": 1.195242071407586, "grad_norm": 0.09683641791343689, "learning_rate": 0.002, "loss": 2.3457, "step": 309190 }, { "epoch": 1.1952807286109695, "grad_norm": 0.0931752473115921, "learning_rate": 0.002, "loss": 2.33, "step": 309200 }, { "epoch": 1.1953193858143527, "grad_norm": 0.11616582423448563, "learning_rate": 0.002, "loss": 2.338, "step": 309210 }, { "epoch": 1.195358043017736, "grad_norm": 0.10611452162265778, "learning_rate": 0.002, "loss": 2.3446, "step": 309220 }, { "epoch": 1.1953967002211192, "grad_norm": 0.11353754252195358, "learning_rate": 0.002, "loss": 2.3162, "step": 309230 }, { "epoch": 1.1954353574245025, "grad_norm": 0.10875255614519119, "learning_rate": 0.002, "loss": 2.3313, "step": 309240 }, { "epoch": 1.1954740146278857, "grad_norm": 0.11347277462482452, "learning_rate": 0.002, "loss": 2.3438, "step": 309250 }, { "epoch": 1.195512671831269, "grad_norm": 0.09928679466247559, "learning_rate": 0.002, "loss": 2.3221, "step": 309260 }, { "epoch": 1.1955513290346522, "grad_norm": 0.09654610604047775, "learning_rate": 0.002, "loss": 2.3207, "step": 309270 }, { "epoch": 1.1955899862380357, "grad_norm": 0.10184766352176666, "learning_rate": 0.002, "loss": 2.3358, "step": 309280 }, { "epoch": 1.195628643441419, "grad_norm": 0.11834858357906342, "learning_rate": 0.002, "loss": 2.3334, "step": 309290 }, { "epoch": 1.1956673006448022, "grad_norm": 0.1013941541314125, "learning_rate": 0.002, "loss": 2.3256, "step": 309300 }, { "epoch": 1.1957059578481855, "grad_norm": 0.12373871356248856, "learning_rate": 0.002, "loss": 2.3386, "step": 309310 }, { "epoch": 1.1957446150515687, "grad_norm": 0.11233308166265488, "learning_rate": 0.002, "loss": 2.3324, "step": 309320 }, { "epoch": 1.195783272254952, "grad_norm": 0.12040448933839798, "learning_rate": 0.002, "loss": 2.34, "step": 309330 }, { "epoch": 1.1958219294583352, "grad_norm": 0.1115075871348381, "learning_rate": 0.002, "loss": 2.3402, "step": 309340 }, { "epoch": 1.1958605866617185, "grad_norm": 0.09439297765493393, "learning_rate": 0.002, "loss": 2.3366, "step": 309350 }, { "epoch": 1.1958992438651017, "grad_norm": 0.11103499680757523, "learning_rate": 0.002, "loss": 2.3184, "step": 309360 }, { "epoch": 1.1959379010684852, "grad_norm": 0.09440824389457703, "learning_rate": 0.002, "loss": 2.3453, "step": 309370 }, { "epoch": 1.1959765582718684, "grad_norm": 0.10800778120756149, "learning_rate": 0.002, "loss": 2.322, "step": 309380 }, { "epoch": 1.1960152154752517, "grad_norm": 0.11891094595193863, "learning_rate": 0.002, "loss": 2.3339, "step": 309390 }, { "epoch": 1.196053872678635, "grad_norm": 0.09633566439151764, "learning_rate": 0.002, "loss": 2.3324, "step": 309400 }, { "epoch": 1.1960925298820182, "grad_norm": 0.1220220997929573, "learning_rate": 0.002, "loss": 2.3261, "step": 309410 }, { "epoch": 1.1961311870854014, "grad_norm": 0.09684797376394272, "learning_rate": 0.002, "loss": 2.3199, "step": 309420 }, { "epoch": 1.1961698442887847, "grad_norm": 0.12197108566761017, "learning_rate": 0.002, "loss": 2.3325, "step": 309430 }, { "epoch": 1.1962085014921682, "grad_norm": 0.0932028740644455, "learning_rate": 0.002, "loss": 2.3407, "step": 309440 }, { "epoch": 1.1962471586955514, "grad_norm": 0.09771830588579178, "learning_rate": 0.002, "loss": 2.3182, "step": 309450 }, { "epoch": 1.1962858158989347, "grad_norm": 0.10045291483402252, "learning_rate": 0.002, "loss": 2.3274, "step": 309460 }, { "epoch": 1.196324473102318, "grad_norm": 0.16125184297561646, "learning_rate": 0.002, "loss": 2.3574, "step": 309470 }, { "epoch": 1.1963631303057012, "grad_norm": 0.11066312342882156, "learning_rate": 0.002, "loss": 2.339, "step": 309480 }, { "epoch": 1.1964017875090844, "grad_norm": 0.11836784332990646, "learning_rate": 0.002, "loss": 2.3267, "step": 309490 }, { "epoch": 1.1964404447124677, "grad_norm": 0.11377326399087906, "learning_rate": 0.002, "loss": 2.3371, "step": 309500 }, { "epoch": 1.196479101915851, "grad_norm": 0.11057248711585999, "learning_rate": 0.002, "loss": 2.3414, "step": 309510 }, { "epoch": 1.1965177591192342, "grad_norm": 0.09580224752426147, "learning_rate": 0.002, "loss": 2.3249, "step": 309520 }, { "epoch": 1.1965564163226174, "grad_norm": 0.09781431406736374, "learning_rate": 0.002, "loss": 2.3297, "step": 309530 }, { "epoch": 1.196595073526001, "grad_norm": 0.09017840027809143, "learning_rate": 0.002, "loss": 2.3259, "step": 309540 }, { "epoch": 1.1966337307293842, "grad_norm": 0.10557594895362854, "learning_rate": 0.002, "loss": 2.3334, "step": 309550 }, { "epoch": 1.1966723879327674, "grad_norm": 0.1149761825799942, "learning_rate": 0.002, "loss": 2.3425, "step": 309560 }, { "epoch": 1.1967110451361507, "grad_norm": 0.11471065133810043, "learning_rate": 0.002, "loss": 2.3388, "step": 309570 }, { "epoch": 1.196749702339534, "grad_norm": 0.10117927193641663, "learning_rate": 0.002, "loss": 2.3338, "step": 309580 }, { "epoch": 1.1967883595429172, "grad_norm": 0.11146955192089081, "learning_rate": 0.002, "loss": 2.3397, "step": 309590 }, { "epoch": 1.1968270167463004, "grad_norm": 0.1146436259150505, "learning_rate": 0.002, "loss": 2.3442, "step": 309600 }, { "epoch": 1.196865673949684, "grad_norm": 0.09842398762702942, "learning_rate": 0.002, "loss": 2.3258, "step": 309610 }, { "epoch": 1.1969043311530672, "grad_norm": 0.1109817773103714, "learning_rate": 0.002, "loss": 2.3319, "step": 309620 }, { "epoch": 1.1969429883564504, "grad_norm": 0.09087683260440826, "learning_rate": 0.002, "loss": 2.3256, "step": 309630 }, { "epoch": 1.1969816455598337, "grad_norm": 0.11437252908945084, "learning_rate": 0.002, "loss": 2.3333, "step": 309640 }, { "epoch": 1.197020302763217, "grad_norm": 0.10380634665489197, "learning_rate": 0.002, "loss": 2.3324, "step": 309650 }, { "epoch": 1.1970589599666002, "grad_norm": 0.10666163265705109, "learning_rate": 0.002, "loss": 2.329, "step": 309660 }, { "epoch": 1.1970976171699834, "grad_norm": 0.09751375764608383, "learning_rate": 0.002, "loss": 2.3167, "step": 309670 }, { "epoch": 1.1971362743733667, "grad_norm": 0.10477735102176666, "learning_rate": 0.002, "loss": 2.3228, "step": 309680 }, { "epoch": 1.19717493157675, "grad_norm": 0.09767846763134003, "learning_rate": 0.002, "loss": 2.3273, "step": 309690 }, { "epoch": 1.1972135887801332, "grad_norm": 0.109504334628582, "learning_rate": 0.002, "loss": 2.3347, "step": 309700 }, { "epoch": 1.1972522459835166, "grad_norm": 0.10675853490829468, "learning_rate": 0.002, "loss": 2.3389, "step": 309710 }, { "epoch": 1.1972909031869, "grad_norm": 0.09702813625335693, "learning_rate": 0.002, "loss": 2.3149, "step": 309720 }, { "epoch": 1.1973295603902832, "grad_norm": 0.09669061750173569, "learning_rate": 0.002, "loss": 2.3448, "step": 309730 }, { "epoch": 1.1973682175936664, "grad_norm": 0.11900770664215088, "learning_rate": 0.002, "loss": 2.3355, "step": 309740 }, { "epoch": 1.1974068747970497, "grad_norm": 0.09379424154758453, "learning_rate": 0.002, "loss": 2.335, "step": 309750 }, { "epoch": 1.197445532000433, "grad_norm": 0.11037370562553406, "learning_rate": 0.002, "loss": 2.342, "step": 309760 }, { "epoch": 1.1974841892038162, "grad_norm": 0.10939163714647293, "learning_rate": 0.002, "loss": 2.3412, "step": 309770 }, { "epoch": 1.1975228464071996, "grad_norm": 0.1152690127491951, "learning_rate": 0.002, "loss": 2.3299, "step": 309780 }, { "epoch": 1.197561503610583, "grad_norm": 0.10573794692754745, "learning_rate": 0.002, "loss": 2.3306, "step": 309790 }, { "epoch": 1.1976001608139661, "grad_norm": 0.11093052476644516, "learning_rate": 0.002, "loss": 2.3352, "step": 309800 }, { "epoch": 1.1976388180173494, "grad_norm": 0.11789587885141373, "learning_rate": 0.002, "loss": 2.3278, "step": 309810 }, { "epoch": 1.1976774752207326, "grad_norm": 0.11578092724084854, "learning_rate": 0.002, "loss": 2.3471, "step": 309820 }, { "epoch": 1.197716132424116, "grad_norm": 0.10635251551866531, "learning_rate": 0.002, "loss": 2.3466, "step": 309830 }, { "epoch": 1.1977547896274992, "grad_norm": 0.0906798392534256, "learning_rate": 0.002, "loss": 2.3179, "step": 309840 }, { "epoch": 1.1977934468308824, "grad_norm": 0.10510555654764175, "learning_rate": 0.002, "loss": 2.3224, "step": 309850 }, { "epoch": 1.1978321040342657, "grad_norm": 0.10239271819591522, "learning_rate": 0.002, "loss": 2.3412, "step": 309860 }, { "epoch": 1.197870761237649, "grad_norm": 0.11979003995656967, "learning_rate": 0.002, "loss": 2.3343, "step": 309870 }, { "epoch": 1.1979094184410324, "grad_norm": 0.10474664717912674, "learning_rate": 0.002, "loss": 2.354, "step": 309880 }, { "epoch": 1.1979480756444156, "grad_norm": 0.09589861333370209, "learning_rate": 0.002, "loss": 2.351, "step": 309890 }, { "epoch": 1.1979867328477989, "grad_norm": 0.10829038172960281, "learning_rate": 0.002, "loss": 2.3346, "step": 309900 }, { "epoch": 1.1980253900511821, "grad_norm": 0.11464477330446243, "learning_rate": 0.002, "loss": 2.342, "step": 309910 }, { "epoch": 1.1980640472545654, "grad_norm": 0.10862333327531815, "learning_rate": 0.002, "loss": 2.349, "step": 309920 }, { "epoch": 1.1981027044579486, "grad_norm": 0.10340090841054916, "learning_rate": 0.002, "loss": 2.3441, "step": 309930 }, { "epoch": 1.198141361661332, "grad_norm": 0.10448790341615677, "learning_rate": 0.002, "loss": 2.3388, "step": 309940 }, { "epoch": 1.1981800188647154, "grad_norm": 0.10916253179311752, "learning_rate": 0.002, "loss": 2.3436, "step": 309950 }, { "epoch": 1.1982186760680986, "grad_norm": 0.105451300740242, "learning_rate": 0.002, "loss": 2.3411, "step": 309960 }, { "epoch": 1.1982573332714819, "grad_norm": 0.10273198038339615, "learning_rate": 0.002, "loss": 2.3282, "step": 309970 }, { "epoch": 1.1982959904748651, "grad_norm": 0.09579921513795853, "learning_rate": 0.002, "loss": 2.3313, "step": 309980 }, { "epoch": 1.1983346476782484, "grad_norm": 0.10059081017971039, "learning_rate": 0.002, "loss": 2.3223, "step": 309990 }, { "epoch": 1.1983733048816316, "grad_norm": 0.09338857233524323, "learning_rate": 0.002, "loss": 2.3223, "step": 310000 }, { "epoch": 1.1984119620850149, "grad_norm": 0.10279089212417603, "learning_rate": 0.002, "loss": 2.3279, "step": 310010 }, { "epoch": 1.1984506192883981, "grad_norm": 0.08862245827913284, "learning_rate": 0.002, "loss": 2.3399, "step": 310020 }, { "epoch": 1.1984892764917814, "grad_norm": 0.12492749840021133, "learning_rate": 0.002, "loss": 2.3244, "step": 310030 }, { "epoch": 1.1985279336951646, "grad_norm": 0.11897465586662292, "learning_rate": 0.002, "loss": 2.3375, "step": 310040 }, { "epoch": 1.1985665908985481, "grad_norm": 0.10090432316064835, "learning_rate": 0.002, "loss": 2.336, "step": 310050 }, { "epoch": 1.1986052481019314, "grad_norm": 0.09931127727031708, "learning_rate": 0.002, "loss": 2.3367, "step": 310060 }, { "epoch": 1.1986439053053146, "grad_norm": 0.10696078836917877, "learning_rate": 0.002, "loss": 2.3326, "step": 310070 }, { "epoch": 1.1986825625086979, "grad_norm": 0.11368277668952942, "learning_rate": 0.002, "loss": 2.3357, "step": 310080 }, { "epoch": 1.1987212197120811, "grad_norm": 0.10651197284460068, "learning_rate": 0.002, "loss": 2.3323, "step": 310090 }, { "epoch": 1.1987598769154644, "grad_norm": 0.12357331067323685, "learning_rate": 0.002, "loss": 2.3327, "step": 310100 }, { "epoch": 1.1987985341188476, "grad_norm": 0.1091422438621521, "learning_rate": 0.002, "loss": 2.333, "step": 310110 }, { "epoch": 1.198837191322231, "grad_norm": 0.10718175768852234, "learning_rate": 0.002, "loss": 2.3307, "step": 310120 }, { "epoch": 1.1988758485256144, "grad_norm": 0.10889726132154465, "learning_rate": 0.002, "loss": 2.3388, "step": 310130 }, { "epoch": 1.1989145057289976, "grad_norm": 0.10348767787218094, "learning_rate": 0.002, "loss": 2.3289, "step": 310140 }, { "epoch": 1.1989531629323809, "grad_norm": 0.11551138013601303, "learning_rate": 0.002, "loss": 2.3358, "step": 310150 }, { "epoch": 1.198991820135764, "grad_norm": 0.103383868932724, "learning_rate": 0.002, "loss": 2.3341, "step": 310160 }, { "epoch": 1.1990304773391474, "grad_norm": 0.10193579643964767, "learning_rate": 0.002, "loss": 2.3254, "step": 310170 }, { "epoch": 1.1990691345425306, "grad_norm": 0.10685243457555771, "learning_rate": 0.002, "loss": 2.3354, "step": 310180 }, { "epoch": 1.1991077917459139, "grad_norm": 0.159051775932312, "learning_rate": 0.002, "loss": 2.3525, "step": 310190 }, { "epoch": 1.1991464489492971, "grad_norm": 0.10236582159996033, "learning_rate": 0.002, "loss": 2.3507, "step": 310200 }, { "epoch": 1.1991851061526804, "grad_norm": 0.1219271719455719, "learning_rate": 0.002, "loss": 2.3153, "step": 310210 }, { "epoch": 1.1992237633560638, "grad_norm": 0.11734987050294876, "learning_rate": 0.002, "loss": 2.3377, "step": 310220 }, { "epoch": 1.199262420559447, "grad_norm": 0.09976553916931152, "learning_rate": 0.002, "loss": 2.3423, "step": 310230 }, { "epoch": 1.1993010777628303, "grad_norm": 0.15345707535743713, "learning_rate": 0.002, "loss": 2.34, "step": 310240 }, { "epoch": 1.1993397349662136, "grad_norm": 0.10631512105464935, "learning_rate": 0.002, "loss": 2.3314, "step": 310250 }, { "epoch": 1.1993783921695969, "grad_norm": 0.09638812392950058, "learning_rate": 0.002, "loss": 2.3341, "step": 310260 }, { "epoch": 1.19941704937298, "grad_norm": 0.10293331742286682, "learning_rate": 0.002, "loss": 2.3568, "step": 310270 }, { "epoch": 1.1994557065763634, "grad_norm": 0.10437485575675964, "learning_rate": 0.002, "loss": 2.3442, "step": 310280 }, { "epoch": 1.1994943637797468, "grad_norm": 0.12006824463605881, "learning_rate": 0.002, "loss": 2.3516, "step": 310290 }, { "epoch": 1.19953302098313, "grad_norm": 0.09263867884874344, "learning_rate": 0.002, "loss": 2.3413, "step": 310300 }, { "epoch": 1.1995716781865133, "grad_norm": 0.1929297298192978, "learning_rate": 0.002, "loss": 2.3326, "step": 310310 }, { "epoch": 1.1996103353898966, "grad_norm": 0.12172416597604752, "learning_rate": 0.002, "loss": 2.3303, "step": 310320 }, { "epoch": 1.1996489925932798, "grad_norm": 0.1284799724817276, "learning_rate": 0.002, "loss": 2.3289, "step": 310330 }, { "epoch": 1.199687649796663, "grad_norm": 0.09342958778142929, "learning_rate": 0.002, "loss": 2.3337, "step": 310340 }, { "epoch": 1.1997263070000463, "grad_norm": 0.10736458003520966, "learning_rate": 0.002, "loss": 2.3356, "step": 310350 }, { "epoch": 1.1997649642034296, "grad_norm": 0.11657021939754486, "learning_rate": 0.002, "loss": 2.3318, "step": 310360 }, { "epoch": 1.1998036214068128, "grad_norm": 0.1261918991804123, "learning_rate": 0.002, "loss": 2.3435, "step": 310370 }, { "epoch": 1.199842278610196, "grad_norm": 0.10105116665363312, "learning_rate": 0.002, "loss": 2.3283, "step": 310380 }, { "epoch": 1.1998809358135796, "grad_norm": 0.11543400585651398, "learning_rate": 0.002, "loss": 2.3321, "step": 310390 }, { "epoch": 1.1999195930169628, "grad_norm": 0.09686581790447235, "learning_rate": 0.002, "loss": 2.3396, "step": 310400 }, { "epoch": 1.199958250220346, "grad_norm": 0.0990409329533577, "learning_rate": 0.002, "loss": 2.3537, "step": 310410 }, { "epoch": 1.1999969074237293, "grad_norm": 0.12822741270065308, "learning_rate": 0.002, "loss": 2.3364, "step": 310420 }, { "epoch": 1.2000355646271126, "grad_norm": 0.108786940574646, "learning_rate": 0.002, "loss": 2.3452, "step": 310430 }, { "epoch": 1.2000742218304958, "grad_norm": 0.11611306667327881, "learning_rate": 0.002, "loss": 2.3286, "step": 310440 }, { "epoch": 1.200112879033879, "grad_norm": 0.12393523007631302, "learning_rate": 0.002, "loss": 2.3452, "step": 310450 }, { "epoch": 1.2001515362372626, "grad_norm": 0.10479535162448883, "learning_rate": 0.002, "loss": 2.321, "step": 310460 }, { "epoch": 1.2001901934406458, "grad_norm": 0.20911908149719238, "learning_rate": 0.002, "loss": 2.3385, "step": 310470 }, { "epoch": 1.200228850644029, "grad_norm": 0.11443410813808441, "learning_rate": 0.002, "loss": 2.3325, "step": 310480 }, { "epoch": 1.2002675078474123, "grad_norm": 0.10956830531358719, "learning_rate": 0.002, "loss": 2.3441, "step": 310490 }, { "epoch": 1.2003061650507956, "grad_norm": 0.135431170463562, "learning_rate": 0.002, "loss": 2.3377, "step": 310500 }, { "epoch": 1.2003448222541788, "grad_norm": 0.10912695527076721, "learning_rate": 0.002, "loss": 2.3342, "step": 310510 }, { "epoch": 1.200383479457562, "grad_norm": 0.09585479646921158, "learning_rate": 0.002, "loss": 2.3308, "step": 310520 }, { "epoch": 1.2004221366609453, "grad_norm": 0.12916892766952515, "learning_rate": 0.002, "loss": 2.3486, "step": 310530 }, { "epoch": 1.2004607938643286, "grad_norm": 0.10099588334560394, "learning_rate": 0.002, "loss": 2.3418, "step": 310540 }, { "epoch": 1.200499451067712, "grad_norm": 0.08918637037277222, "learning_rate": 0.002, "loss": 2.3362, "step": 310550 }, { "epoch": 1.2005381082710953, "grad_norm": 0.12469889968633652, "learning_rate": 0.002, "loss": 2.3339, "step": 310560 }, { "epoch": 1.2005767654744786, "grad_norm": 0.10082226246595383, "learning_rate": 0.002, "loss": 2.3342, "step": 310570 }, { "epoch": 1.2006154226778618, "grad_norm": 0.10894294828176498, "learning_rate": 0.002, "loss": 2.3244, "step": 310580 }, { "epoch": 1.200654079881245, "grad_norm": 0.1181500107049942, "learning_rate": 0.002, "loss": 2.3506, "step": 310590 }, { "epoch": 1.2006927370846283, "grad_norm": 0.09749269485473633, "learning_rate": 0.002, "loss": 2.3303, "step": 310600 }, { "epoch": 1.2007313942880116, "grad_norm": 0.12218060344457626, "learning_rate": 0.002, "loss": 2.3241, "step": 310610 }, { "epoch": 1.2007700514913948, "grad_norm": 0.11155841499567032, "learning_rate": 0.002, "loss": 2.3399, "step": 310620 }, { "epoch": 1.2008087086947783, "grad_norm": 0.09525422006845474, "learning_rate": 0.002, "loss": 2.338, "step": 310630 }, { "epoch": 1.2008473658981615, "grad_norm": 0.12496226280927658, "learning_rate": 0.002, "loss": 2.3282, "step": 310640 }, { "epoch": 1.2008860231015448, "grad_norm": 0.11647341400384903, "learning_rate": 0.002, "loss": 2.3353, "step": 310650 }, { "epoch": 1.200924680304928, "grad_norm": 0.10525427013635635, "learning_rate": 0.002, "loss": 2.3358, "step": 310660 }, { "epoch": 1.2009633375083113, "grad_norm": 0.10254848748445511, "learning_rate": 0.002, "loss": 2.3337, "step": 310670 }, { "epoch": 1.2010019947116946, "grad_norm": 0.11311331391334534, "learning_rate": 0.002, "loss": 2.3254, "step": 310680 }, { "epoch": 1.2010406519150778, "grad_norm": 0.12352067232131958, "learning_rate": 0.002, "loss": 2.3337, "step": 310690 }, { "epoch": 1.201079309118461, "grad_norm": 0.30723175406455994, "learning_rate": 0.002, "loss": 2.344, "step": 310700 }, { "epoch": 1.2011179663218443, "grad_norm": 0.11771678924560547, "learning_rate": 0.002, "loss": 2.347, "step": 310710 }, { "epoch": 1.2011566235252278, "grad_norm": 0.1137363612651825, "learning_rate": 0.002, "loss": 2.3253, "step": 310720 }, { "epoch": 1.201195280728611, "grad_norm": 0.11905137449502945, "learning_rate": 0.002, "loss": 2.3376, "step": 310730 }, { "epoch": 1.2012339379319943, "grad_norm": 0.12443353980779648, "learning_rate": 0.002, "loss": 2.3172, "step": 310740 }, { "epoch": 1.2012725951353775, "grad_norm": 0.11437974125146866, "learning_rate": 0.002, "loss": 2.33, "step": 310750 }, { "epoch": 1.2013112523387608, "grad_norm": 0.1042497307062149, "learning_rate": 0.002, "loss": 2.3402, "step": 310760 }, { "epoch": 1.201349909542144, "grad_norm": 0.10717729479074478, "learning_rate": 0.002, "loss": 2.3437, "step": 310770 }, { "epoch": 1.2013885667455273, "grad_norm": 0.09702154248952866, "learning_rate": 0.002, "loss": 2.3303, "step": 310780 }, { "epoch": 1.2014272239489105, "grad_norm": 0.15113039314746857, "learning_rate": 0.002, "loss": 2.328, "step": 310790 }, { "epoch": 1.201465881152294, "grad_norm": 0.09931427240371704, "learning_rate": 0.002, "loss": 2.3268, "step": 310800 }, { "epoch": 1.2015045383556773, "grad_norm": 0.10436324030160904, "learning_rate": 0.002, "loss": 2.328, "step": 310810 }, { "epoch": 1.2015431955590605, "grad_norm": 0.14245779812335968, "learning_rate": 0.002, "loss": 2.3351, "step": 310820 }, { "epoch": 1.2015818527624438, "grad_norm": 0.11675668507814407, "learning_rate": 0.002, "loss": 2.3374, "step": 310830 }, { "epoch": 1.201620509965827, "grad_norm": 0.09828370809555054, "learning_rate": 0.002, "loss": 2.3458, "step": 310840 }, { "epoch": 1.2016591671692103, "grad_norm": 0.09691742807626724, "learning_rate": 0.002, "loss": 2.3344, "step": 310850 }, { "epoch": 1.2016978243725935, "grad_norm": 0.09506635367870331, "learning_rate": 0.002, "loss": 2.3406, "step": 310860 }, { "epoch": 1.2017364815759768, "grad_norm": 0.10716848075389862, "learning_rate": 0.002, "loss": 2.3297, "step": 310870 }, { "epoch": 1.20177513877936, "grad_norm": 0.11083471029996872, "learning_rate": 0.002, "loss": 2.3364, "step": 310880 }, { "epoch": 1.2018137959827435, "grad_norm": 0.10451477020978928, "learning_rate": 0.002, "loss": 2.3109, "step": 310890 }, { "epoch": 1.2018524531861268, "grad_norm": 0.12889741361141205, "learning_rate": 0.002, "loss": 2.3356, "step": 310900 }, { "epoch": 1.20189111038951, "grad_norm": 0.11663840711116791, "learning_rate": 0.002, "loss": 2.335, "step": 310910 }, { "epoch": 1.2019297675928933, "grad_norm": 0.09428218007087708, "learning_rate": 0.002, "loss": 2.3392, "step": 310920 }, { "epoch": 1.2019684247962765, "grad_norm": 0.10899657756090164, "learning_rate": 0.002, "loss": 2.3256, "step": 310930 }, { "epoch": 1.2020070819996598, "grad_norm": 0.0901101604104042, "learning_rate": 0.002, "loss": 2.3402, "step": 310940 }, { "epoch": 1.202045739203043, "grad_norm": 0.1286320686340332, "learning_rate": 0.002, "loss": 2.3361, "step": 310950 }, { "epoch": 1.2020843964064263, "grad_norm": 0.09208814799785614, "learning_rate": 0.002, "loss": 2.3269, "step": 310960 }, { "epoch": 1.2021230536098098, "grad_norm": 0.09907399863004684, "learning_rate": 0.002, "loss": 2.3228, "step": 310970 }, { "epoch": 1.202161710813193, "grad_norm": 0.10375700891017914, "learning_rate": 0.002, "loss": 2.3367, "step": 310980 }, { "epoch": 1.2022003680165763, "grad_norm": 0.1176227405667305, "learning_rate": 0.002, "loss": 2.3319, "step": 310990 }, { "epoch": 1.2022390252199595, "grad_norm": 0.10297471284866333, "learning_rate": 0.002, "loss": 2.3366, "step": 311000 }, { "epoch": 1.2022776824233428, "grad_norm": 0.10666001588106155, "learning_rate": 0.002, "loss": 2.3439, "step": 311010 }, { "epoch": 1.202316339626726, "grad_norm": 0.10830654203891754, "learning_rate": 0.002, "loss": 2.3438, "step": 311020 }, { "epoch": 1.2023549968301093, "grad_norm": 0.13971230387687683, "learning_rate": 0.002, "loss": 2.3417, "step": 311030 }, { "epoch": 1.2023936540334925, "grad_norm": 0.1161842793226242, "learning_rate": 0.002, "loss": 2.3338, "step": 311040 }, { "epoch": 1.2024323112368758, "grad_norm": 0.11453959345817566, "learning_rate": 0.002, "loss": 2.3458, "step": 311050 }, { "epoch": 1.2024709684402592, "grad_norm": 0.10962758213281631, "learning_rate": 0.002, "loss": 2.3453, "step": 311060 }, { "epoch": 1.2025096256436425, "grad_norm": 0.10430661588907242, "learning_rate": 0.002, "loss": 2.3332, "step": 311070 }, { "epoch": 1.2025482828470258, "grad_norm": 0.10192476212978363, "learning_rate": 0.002, "loss": 2.3401, "step": 311080 }, { "epoch": 1.202586940050409, "grad_norm": 0.0879945158958435, "learning_rate": 0.002, "loss": 2.3345, "step": 311090 }, { "epoch": 1.2026255972537923, "grad_norm": 0.10970672965049744, "learning_rate": 0.002, "loss": 2.3446, "step": 311100 }, { "epoch": 1.2026642544571755, "grad_norm": 0.10071579366922379, "learning_rate": 0.002, "loss": 2.3393, "step": 311110 }, { "epoch": 1.2027029116605588, "grad_norm": 0.11206358671188354, "learning_rate": 0.002, "loss": 2.351, "step": 311120 }, { "epoch": 1.202741568863942, "grad_norm": 0.10092099756002426, "learning_rate": 0.002, "loss": 2.3327, "step": 311130 }, { "epoch": 1.2027802260673255, "grad_norm": 0.09487482905387878, "learning_rate": 0.002, "loss": 2.3363, "step": 311140 }, { "epoch": 1.2028188832707087, "grad_norm": 0.10168376564979553, "learning_rate": 0.002, "loss": 2.3392, "step": 311150 }, { "epoch": 1.202857540474092, "grad_norm": 0.11102360486984253, "learning_rate": 0.002, "loss": 2.332, "step": 311160 }, { "epoch": 1.2028961976774752, "grad_norm": 0.10950905084609985, "learning_rate": 0.002, "loss": 2.3314, "step": 311170 }, { "epoch": 1.2029348548808585, "grad_norm": 0.09757718443870544, "learning_rate": 0.002, "loss": 2.3259, "step": 311180 }, { "epoch": 1.2029735120842417, "grad_norm": 0.10875348001718521, "learning_rate": 0.002, "loss": 2.347, "step": 311190 }, { "epoch": 1.203012169287625, "grad_norm": 0.10386452078819275, "learning_rate": 0.002, "loss": 2.3363, "step": 311200 }, { "epoch": 1.2030508264910083, "grad_norm": 0.12517563998699188, "learning_rate": 0.002, "loss": 2.3524, "step": 311210 }, { "epoch": 1.2030894836943915, "grad_norm": 0.11543699353933334, "learning_rate": 0.002, "loss": 2.3456, "step": 311220 }, { "epoch": 1.203128140897775, "grad_norm": 0.11032703518867493, "learning_rate": 0.002, "loss": 2.3357, "step": 311230 }, { "epoch": 1.2031667981011582, "grad_norm": 0.10646285861730576, "learning_rate": 0.002, "loss": 2.3318, "step": 311240 }, { "epoch": 1.2032054553045415, "grad_norm": 0.12593898177146912, "learning_rate": 0.002, "loss": 2.3358, "step": 311250 }, { "epoch": 1.2032441125079247, "grad_norm": 0.21965470910072327, "learning_rate": 0.002, "loss": 2.3346, "step": 311260 }, { "epoch": 1.203282769711308, "grad_norm": 0.12008555978536606, "learning_rate": 0.002, "loss": 2.324, "step": 311270 }, { "epoch": 1.2033214269146912, "grad_norm": 0.09635363519191742, "learning_rate": 0.002, "loss": 2.3326, "step": 311280 }, { "epoch": 1.2033600841180745, "grad_norm": 0.13143694400787354, "learning_rate": 0.002, "loss": 2.3373, "step": 311290 }, { "epoch": 1.2033987413214577, "grad_norm": 0.09808219969272614, "learning_rate": 0.002, "loss": 2.3284, "step": 311300 }, { "epoch": 1.2034373985248412, "grad_norm": 0.12062323093414307, "learning_rate": 0.002, "loss": 2.3306, "step": 311310 }, { "epoch": 1.2034760557282245, "grad_norm": 0.1174851804971695, "learning_rate": 0.002, "loss": 2.3381, "step": 311320 }, { "epoch": 1.2035147129316077, "grad_norm": 0.10547292977571487, "learning_rate": 0.002, "loss": 2.3297, "step": 311330 }, { "epoch": 1.203553370134991, "grad_norm": 0.11646241694688797, "learning_rate": 0.002, "loss": 2.3339, "step": 311340 }, { "epoch": 1.2035920273383742, "grad_norm": 0.12754155695438385, "learning_rate": 0.002, "loss": 2.3452, "step": 311350 }, { "epoch": 1.2036306845417575, "grad_norm": 0.10357648134231567, "learning_rate": 0.002, "loss": 2.3129, "step": 311360 }, { "epoch": 1.2036693417451407, "grad_norm": 0.13172759115695953, "learning_rate": 0.002, "loss": 2.335, "step": 311370 }, { "epoch": 1.203707998948524, "grad_norm": 0.10856924951076508, "learning_rate": 0.002, "loss": 2.3344, "step": 311380 }, { "epoch": 1.2037466561519072, "grad_norm": 0.09741537272930145, "learning_rate": 0.002, "loss": 2.3279, "step": 311390 }, { "epoch": 1.2037853133552907, "grad_norm": 0.09914067387580872, "learning_rate": 0.002, "loss": 2.3203, "step": 311400 }, { "epoch": 1.203823970558674, "grad_norm": 0.09517299383878708, "learning_rate": 0.002, "loss": 2.3235, "step": 311410 }, { "epoch": 1.2038626277620572, "grad_norm": 0.1110980212688446, "learning_rate": 0.002, "loss": 2.3314, "step": 311420 }, { "epoch": 1.2039012849654405, "grad_norm": 0.092925526201725, "learning_rate": 0.002, "loss": 2.3299, "step": 311430 }, { "epoch": 1.2039399421688237, "grad_norm": 0.09799396991729736, "learning_rate": 0.002, "loss": 2.3307, "step": 311440 }, { "epoch": 1.203978599372207, "grad_norm": 0.09903433918952942, "learning_rate": 0.002, "loss": 2.3273, "step": 311450 }, { "epoch": 1.2040172565755902, "grad_norm": 0.11398271471261978, "learning_rate": 0.002, "loss": 2.3375, "step": 311460 }, { "epoch": 1.2040559137789737, "grad_norm": 0.16214625537395477, "learning_rate": 0.002, "loss": 2.3507, "step": 311470 }, { "epoch": 1.204094570982357, "grad_norm": 0.11520147323608398, "learning_rate": 0.002, "loss": 2.3475, "step": 311480 }, { "epoch": 1.2041332281857402, "grad_norm": 0.10259690880775452, "learning_rate": 0.002, "loss": 2.3373, "step": 311490 }, { "epoch": 1.2041718853891235, "grad_norm": 0.09407929331064224, "learning_rate": 0.002, "loss": 2.3295, "step": 311500 }, { "epoch": 1.2042105425925067, "grad_norm": 0.10382479429244995, "learning_rate": 0.002, "loss": 2.3437, "step": 311510 }, { "epoch": 1.20424919979589, "grad_norm": 0.12786495685577393, "learning_rate": 0.002, "loss": 2.3417, "step": 311520 }, { "epoch": 1.2042878569992732, "grad_norm": 0.10865487158298492, "learning_rate": 0.002, "loss": 2.3417, "step": 311530 }, { "epoch": 1.2043265142026565, "grad_norm": 0.09080743789672852, "learning_rate": 0.002, "loss": 2.3462, "step": 311540 }, { "epoch": 1.2043651714060397, "grad_norm": 0.11545614153146744, "learning_rate": 0.002, "loss": 2.3507, "step": 311550 }, { "epoch": 1.204403828609423, "grad_norm": 0.11504527926445007, "learning_rate": 0.002, "loss": 2.3402, "step": 311560 }, { "epoch": 1.2044424858128064, "grad_norm": 0.11840058863162994, "learning_rate": 0.002, "loss": 2.3426, "step": 311570 }, { "epoch": 1.2044811430161897, "grad_norm": 0.10996630787849426, "learning_rate": 0.002, "loss": 2.335, "step": 311580 }, { "epoch": 1.204519800219573, "grad_norm": 0.08883655816316605, "learning_rate": 0.002, "loss": 2.3397, "step": 311590 }, { "epoch": 1.2045584574229562, "grad_norm": 0.10874045640230179, "learning_rate": 0.002, "loss": 2.3427, "step": 311600 }, { "epoch": 1.2045971146263394, "grad_norm": 0.11322815716266632, "learning_rate": 0.002, "loss": 2.3402, "step": 311610 }, { "epoch": 1.2046357718297227, "grad_norm": 0.11345550417900085, "learning_rate": 0.002, "loss": 2.3248, "step": 311620 }, { "epoch": 1.204674429033106, "grad_norm": 0.10841508954763412, "learning_rate": 0.002, "loss": 2.3284, "step": 311630 }, { "epoch": 1.2047130862364894, "grad_norm": 0.10970387607812881, "learning_rate": 0.002, "loss": 2.3275, "step": 311640 }, { "epoch": 1.2047517434398727, "grad_norm": 0.11749184131622314, "learning_rate": 0.002, "loss": 2.3361, "step": 311650 }, { "epoch": 1.204790400643256, "grad_norm": 0.0987529382109642, "learning_rate": 0.002, "loss": 2.3362, "step": 311660 }, { "epoch": 1.2048290578466392, "grad_norm": 0.09749867022037506, "learning_rate": 0.002, "loss": 2.3303, "step": 311670 }, { "epoch": 1.2048677150500224, "grad_norm": 0.09900732338428497, "learning_rate": 0.002, "loss": 2.3276, "step": 311680 }, { "epoch": 1.2049063722534057, "grad_norm": 0.09547862410545349, "learning_rate": 0.002, "loss": 2.3483, "step": 311690 }, { "epoch": 1.204945029456789, "grad_norm": 0.10451550036668777, "learning_rate": 0.002, "loss": 2.3491, "step": 311700 }, { "epoch": 1.2049836866601722, "grad_norm": 0.10269980877637863, "learning_rate": 0.002, "loss": 2.3265, "step": 311710 }, { "epoch": 1.2050223438635554, "grad_norm": 0.13897179067134857, "learning_rate": 0.002, "loss": 2.3403, "step": 311720 }, { "epoch": 1.2050610010669387, "grad_norm": 0.09629146009683609, "learning_rate": 0.002, "loss": 2.3434, "step": 311730 }, { "epoch": 1.2050996582703222, "grad_norm": 0.1335947960615158, "learning_rate": 0.002, "loss": 2.3287, "step": 311740 }, { "epoch": 1.2051383154737054, "grad_norm": 0.10571476817131042, "learning_rate": 0.002, "loss": 2.3287, "step": 311750 }, { "epoch": 1.2051769726770887, "grad_norm": 0.10034831613302231, "learning_rate": 0.002, "loss": 2.334, "step": 311760 }, { "epoch": 1.205215629880472, "grad_norm": 0.11033013463020325, "learning_rate": 0.002, "loss": 2.3316, "step": 311770 }, { "epoch": 1.2052542870838552, "grad_norm": 0.09822043031454086, "learning_rate": 0.002, "loss": 2.3292, "step": 311780 }, { "epoch": 1.2052929442872384, "grad_norm": 0.11147646605968475, "learning_rate": 0.002, "loss": 2.3208, "step": 311790 }, { "epoch": 1.2053316014906217, "grad_norm": 0.10494410246610641, "learning_rate": 0.002, "loss": 2.3397, "step": 311800 }, { "epoch": 1.2053702586940052, "grad_norm": 0.09362241625785828, "learning_rate": 0.002, "loss": 2.3248, "step": 311810 }, { "epoch": 1.2054089158973884, "grad_norm": 0.09579791128635406, "learning_rate": 0.002, "loss": 2.3301, "step": 311820 }, { "epoch": 1.2054475731007717, "grad_norm": 0.1143781915307045, "learning_rate": 0.002, "loss": 2.3316, "step": 311830 }, { "epoch": 1.205486230304155, "grad_norm": 0.10467755794525146, "learning_rate": 0.002, "loss": 2.327, "step": 311840 }, { "epoch": 1.2055248875075382, "grad_norm": 0.10779611766338348, "learning_rate": 0.002, "loss": 2.3353, "step": 311850 }, { "epoch": 1.2055635447109214, "grad_norm": 0.0962359607219696, "learning_rate": 0.002, "loss": 2.3291, "step": 311860 }, { "epoch": 1.2056022019143047, "grad_norm": 0.09667246788740158, "learning_rate": 0.002, "loss": 2.3223, "step": 311870 }, { "epoch": 1.205640859117688, "grad_norm": 0.09373782575130463, "learning_rate": 0.002, "loss": 2.3351, "step": 311880 }, { "epoch": 1.2056795163210712, "grad_norm": 0.1168796718120575, "learning_rate": 0.002, "loss": 2.3378, "step": 311890 }, { "epoch": 1.2057181735244544, "grad_norm": 0.08741265535354614, "learning_rate": 0.002, "loss": 2.3245, "step": 311900 }, { "epoch": 1.205756830727838, "grad_norm": 0.10326257348060608, "learning_rate": 0.002, "loss": 2.3359, "step": 311910 }, { "epoch": 1.2057954879312212, "grad_norm": 0.10092765092849731, "learning_rate": 0.002, "loss": 2.3454, "step": 311920 }, { "epoch": 1.2058341451346044, "grad_norm": 0.10536117851734161, "learning_rate": 0.002, "loss": 2.3428, "step": 311930 }, { "epoch": 1.2058728023379877, "grad_norm": 0.09137813001871109, "learning_rate": 0.002, "loss": 2.3234, "step": 311940 }, { "epoch": 1.205911459541371, "grad_norm": 0.10496657341718674, "learning_rate": 0.002, "loss": 2.3499, "step": 311950 }, { "epoch": 1.2059501167447542, "grad_norm": 0.10132060945034027, "learning_rate": 0.002, "loss": 2.3317, "step": 311960 }, { "epoch": 1.2059887739481374, "grad_norm": 0.11203447729349136, "learning_rate": 0.002, "loss": 2.3579, "step": 311970 }, { "epoch": 1.206027431151521, "grad_norm": 0.10228228569030762, "learning_rate": 0.002, "loss": 2.3318, "step": 311980 }, { "epoch": 1.2060660883549041, "grad_norm": 0.12769025564193726, "learning_rate": 0.002, "loss": 2.3393, "step": 311990 }, { "epoch": 1.2061047455582874, "grad_norm": 0.10657043755054474, "learning_rate": 0.002, "loss": 2.3383, "step": 312000 }, { "epoch": 1.2061434027616706, "grad_norm": 0.10488114506006241, "learning_rate": 0.002, "loss": 2.3287, "step": 312010 }, { "epoch": 1.206182059965054, "grad_norm": 0.1053939238190651, "learning_rate": 0.002, "loss": 2.3372, "step": 312020 }, { "epoch": 1.2062207171684372, "grad_norm": 0.10338485985994339, "learning_rate": 0.002, "loss": 2.3382, "step": 312030 }, { "epoch": 1.2062593743718204, "grad_norm": 0.09866391867399216, "learning_rate": 0.002, "loss": 2.3359, "step": 312040 }, { "epoch": 1.2062980315752037, "grad_norm": 0.09621734172105789, "learning_rate": 0.002, "loss": 2.3262, "step": 312050 }, { "epoch": 1.206336688778587, "grad_norm": 0.10968897491693497, "learning_rate": 0.002, "loss": 2.3384, "step": 312060 }, { "epoch": 1.2063753459819702, "grad_norm": 0.12046651542186737, "learning_rate": 0.002, "loss": 2.3394, "step": 312070 }, { "epoch": 1.2064140031853536, "grad_norm": 0.11235171556472778, "learning_rate": 0.002, "loss": 2.3364, "step": 312080 }, { "epoch": 1.2064526603887369, "grad_norm": 0.09258708357810974, "learning_rate": 0.002, "loss": 2.3263, "step": 312090 }, { "epoch": 1.2064913175921201, "grad_norm": 0.11983036249876022, "learning_rate": 0.002, "loss": 2.3371, "step": 312100 }, { "epoch": 1.2065299747955034, "grad_norm": 0.0992218405008316, "learning_rate": 0.002, "loss": 2.3427, "step": 312110 }, { "epoch": 1.2065686319988866, "grad_norm": 0.1157960370182991, "learning_rate": 0.002, "loss": 2.3276, "step": 312120 }, { "epoch": 1.20660728920227, "grad_norm": 0.11695519089698792, "learning_rate": 0.002, "loss": 2.3301, "step": 312130 }, { "epoch": 1.2066459464056531, "grad_norm": 0.14703066647052765, "learning_rate": 0.002, "loss": 2.3309, "step": 312140 }, { "epoch": 1.2066846036090366, "grad_norm": 0.10208508372306824, "learning_rate": 0.002, "loss": 2.3392, "step": 312150 }, { "epoch": 1.2067232608124199, "grad_norm": 0.10670216381549835, "learning_rate": 0.002, "loss": 2.3447, "step": 312160 }, { "epoch": 1.2067619180158031, "grad_norm": 0.1255640834569931, "learning_rate": 0.002, "loss": 2.3461, "step": 312170 }, { "epoch": 1.2068005752191864, "grad_norm": 0.09390535205602646, "learning_rate": 0.002, "loss": 2.3108, "step": 312180 }, { "epoch": 1.2068392324225696, "grad_norm": 0.10780937224626541, "learning_rate": 0.002, "loss": 2.3419, "step": 312190 }, { "epoch": 1.2068778896259529, "grad_norm": 0.10417443513870239, "learning_rate": 0.002, "loss": 2.3338, "step": 312200 }, { "epoch": 1.2069165468293361, "grad_norm": 0.1079430803656578, "learning_rate": 0.002, "loss": 2.3334, "step": 312210 }, { "epoch": 1.2069552040327194, "grad_norm": 0.10621048510074615, "learning_rate": 0.002, "loss": 2.3278, "step": 312220 }, { "epoch": 1.2069938612361026, "grad_norm": 0.10394273698329926, "learning_rate": 0.002, "loss": 2.3414, "step": 312230 }, { "epoch": 1.207032518439486, "grad_norm": 0.1757679581642151, "learning_rate": 0.002, "loss": 2.3368, "step": 312240 }, { "epoch": 1.2070711756428694, "grad_norm": 0.09549301862716675, "learning_rate": 0.002, "loss": 2.3343, "step": 312250 }, { "epoch": 1.2071098328462526, "grad_norm": 0.08968798816204071, "learning_rate": 0.002, "loss": 2.3473, "step": 312260 }, { "epoch": 1.2071484900496359, "grad_norm": 0.1374940127134323, "learning_rate": 0.002, "loss": 2.3291, "step": 312270 }, { "epoch": 1.2071871472530191, "grad_norm": 0.11503007262945175, "learning_rate": 0.002, "loss": 2.3369, "step": 312280 }, { "epoch": 1.2072258044564024, "grad_norm": 0.09821897000074387, "learning_rate": 0.002, "loss": 2.3415, "step": 312290 }, { "epoch": 1.2072644616597856, "grad_norm": 0.16372337937355042, "learning_rate": 0.002, "loss": 2.3362, "step": 312300 }, { "epoch": 1.2073031188631689, "grad_norm": 0.0956815704703331, "learning_rate": 0.002, "loss": 2.3491, "step": 312310 }, { "epoch": 1.2073417760665524, "grad_norm": 0.09556156396865845, "learning_rate": 0.002, "loss": 2.3347, "step": 312320 }, { "epoch": 1.2073804332699356, "grad_norm": 0.09671517461538315, "learning_rate": 0.002, "loss": 2.3216, "step": 312330 }, { "epoch": 1.2074190904733189, "grad_norm": 0.09471859037876129, "learning_rate": 0.002, "loss": 2.3265, "step": 312340 }, { "epoch": 1.207457747676702, "grad_norm": 0.09411439299583435, "learning_rate": 0.002, "loss": 2.3206, "step": 312350 }, { "epoch": 1.2074964048800854, "grad_norm": 0.09138353914022446, "learning_rate": 0.002, "loss": 2.3364, "step": 312360 }, { "epoch": 1.2075350620834686, "grad_norm": 0.1586178094148636, "learning_rate": 0.002, "loss": 2.3212, "step": 312370 }, { "epoch": 1.2075737192868519, "grad_norm": 0.10865724086761475, "learning_rate": 0.002, "loss": 2.3561, "step": 312380 }, { "epoch": 1.2076123764902351, "grad_norm": 0.14529366791248322, "learning_rate": 0.002, "loss": 2.3365, "step": 312390 }, { "epoch": 1.2076510336936184, "grad_norm": 0.10525742173194885, "learning_rate": 0.002, "loss": 2.3332, "step": 312400 }, { "epoch": 1.2076896908970018, "grad_norm": 0.10483916103839874, "learning_rate": 0.002, "loss": 2.3567, "step": 312410 }, { "epoch": 1.207728348100385, "grad_norm": 0.097395159304142, "learning_rate": 0.002, "loss": 2.3389, "step": 312420 }, { "epoch": 1.2077670053037683, "grad_norm": 0.09661631286144257, "learning_rate": 0.002, "loss": 2.3347, "step": 312430 }, { "epoch": 1.2078056625071516, "grad_norm": 0.10141381621360779, "learning_rate": 0.002, "loss": 2.3347, "step": 312440 }, { "epoch": 1.2078443197105349, "grad_norm": 0.10463304072618484, "learning_rate": 0.002, "loss": 2.3355, "step": 312450 }, { "epoch": 1.207882976913918, "grad_norm": 0.09635960310697556, "learning_rate": 0.002, "loss": 2.3478, "step": 312460 }, { "epoch": 1.2079216341173014, "grad_norm": 0.09967007488012314, "learning_rate": 0.002, "loss": 2.3326, "step": 312470 }, { "epoch": 1.2079602913206846, "grad_norm": 0.10142817348241806, "learning_rate": 0.002, "loss": 2.3409, "step": 312480 }, { "epoch": 1.207998948524068, "grad_norm": 0.09476986527442932, "learning_rate": 0.002, "loss": 2.3262, "step": 312490 }, { "epoch": 1.2080376057274513, "grad_norm": 0.09432511031627655, "learning_rate": 0.002, "loss": 2.3432, "step": 312500 }, { "epoch": 1.2080762629308346, "grad_norm": 0.10480641573667526, "learning_rate": 0.002, "loss": 2.3382, "step": 312510 }, { "epoch": 1.2081149201342178, "grad_norm": 0.09587416052818298, "learning_rate": 0.002, "loss": 2.3461, "step": 312520 }, { "epoch": 1.208153577337601, "grad_norm": 0.1027066707611084, "learning_rate": 0.002, "loss": 2.3276, "step": 312530 }, { "epoch": 1.2081922345409843, "grad_norm": 0.1043514683842659, "learning_rate": 0.002, "loss": 2.3194, "step": 312540 }, { "epoch": 1.2082308917443676, "grad_norm": 0.10770545899868011, "learning_rate": 0.002, "loss": 2.3223, "step": 312550 }, { "epoch": 1.2082695489477508, "grad_norm": 0.09790293127298355, "learning_rate": 0.002, "loss": 2.3432, "step": 312560 }, { "epoch": 1.208308206151134, "grad_norm": 0.09138157218694687, "learning_rate": 0.002, "loss": 2.3219, "step": 312570 }, { "epoch": 1.2083468633545176, "grad_norm": 0.10595359653234482, "learning_rate": 0.002, "loss": 2.3258, "step": 312580 }, { "epoch": 1.2083855205579008, "grad_norm": 0.10801562666893005, "learning_rate": 0.002, "loss": 2.3283, "step": 312590 }, { "epoch": 1.208424177761284, "grad_norm": 0.09553363919258118, "learning_rate": 0.002, "loss": 2.3289, "step": 312600 }, { "epoch": 1.2084628349646673, "grad_norm": 0.10602040588855743, "learning_rate": 0.002, "loss": 2.3284, "step": 312610 }, { "epoch": 1.2085014921680506, "grad_norm": 0.11849401891231537, "learning_rate": 0.002, "loss": 2.3245, "step": 312620 }, { "epoch": 1.2085401493714338, "grad_norm": 0.1252974569797516, "learning_rate": 0.002, "loss": 2.3366, "step": 312630 }, { "epoch": 1.208578806574817, "grad_norm": 0.09098678082227707, "learning_rate": 0.002, "loss": 2.3372, "step": 312640 }, { "epoch": 1.2086174637782003, "grad_norm": 0.10646401345729828, "learning_rate": 0.002, "loss": 2.3434, "step": 312650 }, { "epoch": 1.2086561209815838, "grad_norm": 0.09371813386678696, "learning_rate": 0.002, "loss": 2.3351, "step": 312660 }, { "epoch": 1.208694778184967, "grad_norm": 0.11778826266527176, "learning_rate": 0.002, "loss": 2.3421, "step": 312670 }, { "epoch": 1.2087334353883503, "grad_norm": 0.11409354209899902, "learning_rate": 0.002, "loss": 2.3535, "step": 312680 }, { "epoch": 1.2087720925917336, "grad_norm": 0.1014944463968277, "learning_rate": 0.002, "loss": 2.332, "step": 312690 }, { "epoch": 1.2088107497951168, "grad_norm": 0.09537586569786072, "learning_rate": 0.002, "loss": 2.3386, "step": 312700 }, { "epoch": 1.2088494069985, "grad_norm": 0.1035507544875145, "learning_rate": 0.002, "loss": 2.3278, "step": 312710 }, { "epoch": 1.2088880642018833, "grad_norm": 0.10714036971330643, "learning_rate": 0.002, "loss": 2.339, "step": 312720 }, { "epoch": 1.2089267214052666, "grad_norm": 0.11011461913585663, "learning_rate": 0.002, "loss": 2.3339, "step": 312730 }, { "epoch": 1.2089653786086498, "grad_norm": 0.09917209297418594, "learning_rate": 0.002, "loss": 2.3319, "step": 312740 }, { "epoch": 1.2090040358120333, "grad_norm": 0.1113947182893753, "learning_rate": 0.002, "loss": 2.3321, "step": 312750 }, { "epoch": 1.2090426930154166, "grad_norm": 0.10072827339172363, "learning_rate": 0.002, "loss": 2.3421, "step": 312760 }, { "epoch": 1.2090813502187998, "grad_norm": 0.09403202682733536, "learning_rate": 0.002, "loss": 2.3457, "step": 312770 }, { "epoch": 1.209120007422183, "grad_norm": 0.13177041709423065, "learning_rate": 0.002, "loss": 2.3259, "step": 312780 }, { "epoch": 1.2091586646255663, "grad_norm": 0.12640121579170227, "learning_rate": 0.002, "loss": 2.3347, "step": 312790 }, { "epoch": 1.2091973218289496, "grad_norm": 0.09275078028440475, "learning_rate": 0.002, "loss": 2.3304, "step": 312800 }, { "epoch": 1.2092359790323328, "grad_norm": 0.11911021918058395, "learning_rate": 0.002, "loss": 2.3368, "step": 312810 }, { "epoch": 1.209274636235716, "grad_norm": 0.09386845678091049, "learning_rate": 0.002, "loss": 2.3403, "step": 312820 }, { "epoch": 1.2093132934390995, "grad_norm": 0.0880388468503952, "learning_rate": 0.002, "loss": 2.3378, "step": 312830 }, { "epoch": 1.2093519506424828, "grad_norm": 0.09610741585493088, "learning_rate": 0.002, "loss": 2.337, "step": 312840 }, { "epoch": 1.209390607845866, "grad_norm": 0.10509993880987167, "learning_rate": 0.002, "loss": 2.3229, "step": 312850 }, { "epoch": 1.2094292650492493, "grad_norm": 0.11510360240936279, "learning_rate": 0.002, "loss": 2.3509, "step": 312860 }, { "epoch": 1.2094679222526326, "grad_norm": 0.10064709186553955, "learning_rate": 0.002, "loss": 2.3499, "step": 312870 }, { "epoch": 1.2095065794560158, "grad_norm": 0.10817084461450577, "learning_rate": 0.002, "loss": 2.3274, "step": 312880 }, { "epoch": 1.209545236659399, "grad_norm": 0.10244237631559372, "learning_rate": 0.002, "loss": 2.3353, "step": 312890 }, { "epoch": 1.2095838938627823, "grad_norm": 0.10062064230442047, "learning_rate": 0.002, "loss": 2.3336, "step": 312900 }, { "epoch": 1.2096225510661656, "grad_norm": 0.10862799733877182, "learning_rate": 0.002, "loss": 2.3419, "step": 312910 }, { "epoch": 1.209661208269549, "grad_norm": 0.11865110695362091, "learning_rate": 0.002, "loss": 2.3441, "step": 312920 }, { "epoch": 1.2096998654729323, "grad_norm": 0.1131138727068901, "learning_rate": 0.002, "loss": 2.3314, "step": 312930 }, { "epoch": 1.2097385226763155, "grad_norm": 0.08708862215280533, "learning_rate": 0.002, "loss": 2.3297, "step": 312940 }, { "epoch": 1.2097771798796988, "grad_norm": 0.1317860186100006, "learning_rate": 0.002, "loss": 2.3288, "step": 312950 }, { "epoch": 1.209815837083082, "grad_norm": 0.09572873264551163, "learning_rate": 0.002, "loss": 2.3256, "step": 312960 }, { "epoch": 1.2098544942864653, "grad_norm": 0.09569898992776871, "learning_rate": 0.002, "loss": 2.3274, "step": 312970 }, { "epoch": 1.2098931514898486, "grad_norm": 0.11135315150022507, "learning_rate": 0.002, "loss": 2.3298, "step": 312980 }, { "epoch": 1.2099318086932318, "grad_norm": 0.12566828727722168, "learning_rate": 0.002, "loss": 2.3471, "step": 312990 }, { "epoch": 1.2099704658966153, "grad_norm": 0.09661416709423065, "learning_rate": 0.002, "loss": 2.3369, "step": 313000 }, { "epoch": 1.2100091230999985, "grad_norm": 0.10898587852716446, "learning_rate": 0.002, "loss": 2.3195, "step": 313010 }, { "epoch": 1.2100477803033818, "grad_norm": 0.10114371031522751, "learning_rate": 0.002, "loss": 2.3447, "step": 313020 }, { "epoch": 1.210086437506765, "grad_norm": 0.10844885557889938, "learning_rate": 0.002, "loss": 2.3389, "step": 313030 }, { "epoch": 1.2101250947101483, "grad_norm": 0.10523076355457306, "learning_rate": 0.002, "loss": 2.3247, "step": 313040 }, { "epoch": 1.2101637519135315, "grad_norm": 0.10654118657112122, "learning_rate": 0.002, "loss": 2.3317, "step": 313050 }, { "epoch": 1.2102024091169148, "grad_norm": 0.10612224787473679, "learning_rate": 0.002, "loss": 2.3232, "step": 313060 }, { "epoch": 1.210241066320298, "grad_norm": 0.11240912228822708, "learning_rate": 0.002, "loss": 2.3308, "step": 313070 }, { "epoch": 1.2102797235236813, "grad_norm": 0.1061621680855751, "learning_rate": 0.002, "loss": 2.3309, "step": 313080 }, { "epoch": 1.2103183807270648, "grad_norm": 0.11006850004196167, "learning_rate": 0.002, "loss": 2.325, "step": 313090 }, { "epoch": 1.210357037930448, "grad_norm": 0.09734800457954407, "learning_rate": 0.002, "loss": 2.3359, "step": 313100 }, { "epoch": 1.2103956951338313, "grad_norm": 0.10108668357133865, "learning_rate": 0.002, "loss": 2.3306, "step": 313110 }, { "epoch": 1.2104343523372145, "grad_norm": 0.12004440277814865, "learning_rate": 0.002, "loss": 2.3206, "step": 313120 }, { "epoch": 1.2104730095405978, "grad_norm": 0.11126869916915894, "learning_rate": 0.002, "loss": 2.3261, "step": 313130 }, { "epoch": 1.210511666743981, "grad_norm": 0.11197934299707413, "learning_rate": 0.002, "loss": 2.3316, "step": 313140 }, { "epoch": 1.2105503239473643, "grad_norm": 0.0979895368218422, "learning_rate": 0.002, "loss": 2.3294, "step": 313150 }, { "epoch": 1.2105889811507475, "grad_norm": 0.10922550410032272, "learning_rate": 0.002, "loss": 2.3343, "step": 313160 }, { "epoch": 1.210627638354131, "grad_norm": 0.12186593562364578, "learning_rate": 0.002, "loss": 2.3296, "step": 313170 }, { "epoch": 1.2106662955575143, "grad_norm": 0.10034412890672684, "learning_rate": 0.002, "loss": 2.3181, "step": 313180 }, { "epoch": 1.2107049527608975, "grad_norm": 0.47450196743011475, "learning_rate": 0.002, "loss": 2.3325, "step": 313190 }, { "epoch": 1.2107436099642808, "grad_norm": 0.1090787947177887, "learning_rate": 0.002, "loss": 2.3277, "step": 313200 }, { "epoch": 1.210782267167664, "grad_norm": 0.08918242901563644, "learning_rate": 0.002, "loss": 2.3421, "step": 313210 }, { "epoch": 1.2108209243710473, "grad_norm": 0.13283796608448029, "learning_rate": 0.002, "loss": 2.3247, "step": 313220 }, { "epoch": 1.2108595815744305, "grad_norm": 0.11585294455289841, "learning_rate": 0.002, "loss": 2.3304, "step": 313230 }, { "epoch": 1.2108982387778138, "grad_norm": 0.10596839338541031, "learning_rate": 0.002, "loss": 2.3438, "step": 313240 }, { "epoch": 1.210936895981197, "grad_norm": 0.09580260515213013, "learning_rate": 0.002, "loss": 2.3255, "step": 313250 }, { "epoch": 1.2109755531845805, "grad_norm": 0.1149885281920433, "learning_rate": 0.002, "loss": 2.3463, "step": 313260 }, { "epoch": 1.2110142103879638, "grad_norm": 0.10573212802410126, "learning_rate": 0.002, "loss": 2.3298, "step": 313270 }, { "epoch": 1.211052867591347, "grad_norm": 0.09755532443523407, "learning_rate": 0.002, "loss": 2.3269, "step": 313280 }, { "epoch": 1.2110915247947303, "grad_norm": 0.09388445317745209, "learning_rate": 0.002, "loss": 2.3204, "step": 313290 }, { "epoch": 1.2111301819981135, "grad_norm": 0.10133211314678192, "learning_rate": 0.002, "loss": 2.3238, "step": 313300 }, { "epoch": 1.2111688392014968, "grad_norm": 0.11387095600366592, "learning_rate": 0.002, "loss": 2.3308, "step": 313310 }, { "epoch": 1.21120749640488, "grad_norm": 0.10835465788841248, "learning_rate": 0.002, "loss": 2.3349, "step": 313320 }, { "epoch": 1.2112461536082635, "grad_norm": 0.10440170764923096, "learning_rate": 0.002, "loss": 2.3255, "step": 313330 }, { "epoch": 1.2112848108116467, "grad_norm": 0.09477156400680542, "learning_rate": 0.002, "loss": 2.3476, "step": 313340 }, { "epoch": 1.21132346801503, "grad_norm": 0.11110051721334457, "learning_rate": 0.002, "loss": 2.3405, "step": 313350 }, { "epoch": 1.2113621252184132, "grad_norm": 0.10621228069067001, "learning_rate": 0.002, "loss": 2.3309, "step": 313360 }, { "epoch": 1.2114007824217965, "grad_norm": 0.10771118849515915, "learning_rate": 0.002, "loss": 2.35, "step": 313370 }, { "epoch": 1.2114394396251797, "grad_norm": 0.11031128466129303, "learning_rate": 0.002, "loss": 2.3318, "step": 313380 }, { "epoch": 1.211478096828563, "grad_norm": 0.10690969973802567, "learning_rate": 0.002, "loss": 2.3242, "step": 313390 }, { "epoch": 1.2115167540319463, "grad_norm": 0.12000640481710434, "learning_rate": 0.002, "loss": 2.3153, "step": 313400 }, { "epoch": 1.2115554112353295, "grad_norm": 0.11352237313985825, "learning_rate": 0.002, "loss": 2.3294, "step": 313410 }, { "epoch": 1.2115940684387128, "grad_norm": 0.11398783326148987, "learning_rate": 0.002, "loss": 2.3471, "step": 313420 }, { "epoch": 1.2116327256420962, "grad_norm": 0.1145615205168724, "learning_rate": 0.002, "loss": 2.33, "step": 313430 }, { "epoch": 1.2116713828454795, "grad_norm": 0.11705408245325089, "learning_rate": 0.002, "loss": 2.3392, "step": 313440 }, { "epoch": 1.2117100400488627, "grad_norm": 0.09482872486114502, "learning_rate": 0.002, "loss": 2.3299, "step": 313450 }, { "epoch": 1.211748697252246, "grad_norm": 0.10971511900424957, "learning_rate": 0.002, "loss": 2.3332, "step": 313460 }, { "epoch": 1.2117873544556292, "grad_norm": 0.11067706346511841, "learning_rate": 0.002, "loss": 2.3234, "step": 313470 }, { "epoch": 1.2118260116590125, "grad_norm": 0.09540124237537384, "learning_rate": 0.002, "loss": 2.3386, "step": 313480 }, { "epoch": 1.2118646688623957, "grad_norm": 0.0967855378985405, "learning_rate": 0.002, "loss": 2.345, "step": 313490 }, { "epoch": 1.2119033260657792, "grad_norm": 0.1019299179315567, "learning_rate": 0.002, "loss": 2.3438, "step": 313500 }, { "epoch": 1.2119419832691625, "grad_norm": 0.1015499085187912, "learning_rate": 0.002, "loss": 2.3202, "step": 313510 }, { "epoch": 1.2119806404725457, "grad_norm": 0.08799052983522415, "learning_rate": 0.002, "loss": 2.3199, "step": 313520 }, { "epoch": 1.212019297675929, "grad_norm": 0.09830170124769211, "learning_rate": 0.002, "loss": 2.3413, "step": 313530 }, { "epoch": 1.2120579548793122, "grad_norm": 0.14667995274066925, "learning_rate": 0.002, "loss": 2.3449, "step": 313540 }, { "epoch": 1.2120966120826955, "grad_norm": 0.0980951189994812, "learning_rate": 0.002, "loss": 2.3374, "step": 313550 }, { "epoch": 1.2121352692860787, "grad_norm": 0.10406613349914551, "learning_rate": 0.002, "loss": 2.3441, "step": 313560 }, { "epoch": 1.212173926489462, "grad_norm": 0.09259974956512451, "learning_rate": 0.002, "loss": 2.3304, "step": 313570 }, { "epoch": 1.2122125836928452, "grad_norm": 0.10583232343196869, "learning_rate": 0.002, "loss": 2.3522, "step": 313580 }, { "epoch": 1.2122512408962285, "grad_norm": 0.11357071995735168, "learning_rate": 0.002, "loss": 2.3463, "step": 313590 }, { "epoch": 1.212289898099612, "grad_norm": 0.1437079757452011, "learning_rate": 0.002, "loss": 2.3491, "step": 313600 }, { "epoch": 1.2123285553029952, "grad_norm": 0.1140052005648613, "learning_rate": 0.002, "loss": 2.338, "step": 313610 }, { "epoch": 1.2123672125063785, "grad_norm": 0.10280095785856247, "learning_rate": 0.002, "loss": 2.3323, "step": 313620 }, { "epoch": 1.2124058697097617, "grad_norm": 0.09381737560033798, "learning_rate": 0.002, "loss": 2.3228, "step": 313630 }, { "epoch": 1.212444526913145, "grad_norm": 0.12156960368156433, "learning_rate": 0.002, "loss": 2.3253, "step": 313640 }, { "epoch": 1.2124831841165282, "grad_norm": 0.10479841381311417, "learning_rate": 0.002, "loss": 2.3441, "step": 313650 }, { "epoch": 1.2125218413199115, "grad_norm": 0.12612693011760712, "learning_rate": 0.002, "loss": 2.3414, "step": 313660 }, { "epoch": 1.212560498523295, "grad_norm": 0.10016128420829773, "learning_rate": 0.002, "loss": 2.33, "step": 313670 }, { "epoch": 1.2125991557266782, "grad_norm": 0.11436531692743301, "learning_rate": 0.002, "loss": 2.3406, "step": 313680 }, { "epoch": 1.2126378129300615, "grad_norm": 0.10612929612398148, "learning_rate": 0.002, "loss": 2.3398, "step": 313690 }, { "epoch": 1.2126764701334447, "grad_norm": 0.13559940457344055, "learning_rate": 0.002, "loss": 2.3299, "step": 313700 }, { "epoch": 1.212715127336828, "grad_norm": 0.12024310231208801, "learning_rate": 0.002, "loss": 2.3255, "step": 313710 }, { "epoch": 1.2127537845402112, "grad_norm": 0.12355874478816986, "learning_rate": 0.002, "loss": 2.3325, "step": 313720 }, { "epoch": 1.2127924417435945, "grad_norm": 0.10474978387355804, "learning_rate": 0.002, "loss": 2.3341, "step": 313730 }, { "epoch": 1.2128310989469777, "grad_norm": 0.10101180523633957, "learning_rate": 0.002, "loss": 2.3316, "step": 313740 }, { "epoch": 1.212869756150361, "grad_norm": 0.09087458997964859, "learning_rate": 0.002, "loss": 2.3373, "step": 313750 }, { "epoch": 1.2129084133537442, "grad_norm": 0.0855170488357544, "learning_rate": 0.002, "loss": 2.3341, "step": 313760 }, { "epoch": 1.2129470705571277, "grad_norm": 0.11923859268426895, "learning_rate": 0.002, "loss": 2.3277, "step": 313770 }, { "epoch": 1.212985727760511, "grad_norm": 0.12264309078454971, "learning_rate": 0.002, "loss": 2.3414, "step": 313780 }, { "epoch": 1.2130243849638942, "grad_norm": 0.10807929933071136, "learning_rate": 0.002, "loss": 2.3399, "step": 313790 }, { "epoch": 1.2130630421672774, "grad_norm": 0.09908405691385269, "learning_rate": 0.002, "loss": 2.3392, "step": 313800 }, { "epoch": 1.2131016993706607, "grad_norm": 0.10864149034023285, "learning_rate": 0.002, "loss": 2.3524, "step": 313810 }, { "epoch": 1.213140356574044, "grad_norm": 0.09791579842567444, "learning_rate": 0.002, "loss": 2.3319, "step": 313820 }, { "epoch": 1.2131790137774272, "grad_norm": 0.09416855126619339, "learning_rate": 0.002, "loss": 2.3398, "step": 313830 }, { "epoch": 1.2132176709808107, "grad_norm": 0.11778811365365982, "learning_rate": 0.002, "loss": 2.3448, "step": 313840 }, { "epoch": 1.213256328184194, "grad_norm": 0.10956388711929321, "learning_rate": 0.002, "loss": 2.3154, "step": 313850 }, { "epoch": 1.2132949853875772, "grad_norm": 0.10013221949338913, "learning_rate": 0.002, "loss": 2.3299, "step": 313860 }, { "epoch": 1.2133336425909604, "grad_norm": 0.10736773908138275, "learning_rate": 0.002, "loss": 2.3291, "step": 313870 }, { "epoch": 1.2133722997943437, "grad_norm": 0.09436599910259247, "learning_rate": 0.002, "loss": 2.3506, "step": 313880 }, { "epoch": 1.213410956997727, "grad_norm": 0.11187426745891571, "learning_rate": 0.002, "loss": 2.3304, "step": 313890 }, { "epoch": 1.2134496142011102, "grad_norm": 0.10453151166439056, "learning_rate": 0.002, "loss": 2.3293, "step": 313900 }, { "epoch": 1.2134882714044934, "grad_norm": 0.11502104997634888, "learning_rate": 0.002, "loss": 2.3391, "step": 313910 }, { "epoch": 1.2135269286078767, "grad_norm": 0.10102856159210205, "learning_rate": 0.002, "loss": 2.3435, "step": 313920 }, { "epoch": 1.21356558581126, "grad_norm": 0.09505726397037506, "learning_rate": 0.002, "loss": 2.3151, "step": 313930 }, { "epoch": 1.2136042430146434, "grad_norm": 0.09413469582796097, "learning_rate": 0.002, "loss": 2.3427, "step": 313940 }, { "epoch": 1.2136429002180267, "grad_norm": 0.11024481058120728, "learning_rate": 0.002, "loss": 2.3322, "step": 313950 }, { "epoch": 1.21368155742141, "grad_norm": 0.0951482281088829, "learning_rate": 0.002, "loss": 2.3405, "step": 313960 }, { "epoch": 1.2137202146247932, "grad_norm": 0.1029760017991066, "learning_rate": 0.002, "loss": 2.3375, "step": 313970 }, { "epoch": 1.2137588718281764, "grad_norm": 0.10141143202781677, "learning_rate": 0.002, "loss": 2.3345, "step": 313980 }, { "epoch": 1.2137975290315597, "grad_norm": 0.09400579333305359, "learning_rate": 0.002, "loss": 2.3214, "step": 313990 }, { "epoch": 1.213836186234943, "grad_norm": 0.5630881786346436, "learning_rate": 0.002, "loss": 2.3486, "step": 314000 }, { "epoch": 1.2138748434383264, "grad_norm": 0.1237252727150917, "learning_rate": 0.002, "loss": 2.3399, "step": 314010 }, { "epoch": 1.2139135006417097, "grad_norm": 0.09510214626789093, "learning_rate": 0.002, "loss": 2.3366, "step": 314020 }, { "epoch": 1.213952157845093, "grad_norm": 0.09647620469331741, "learning_rate": 0.002, "loss": 2.3355, "step": 314030 }, { "epoch": 1.2139908150484762, "grad_norm": 0.10102799534797668, "learning_rate": 0.002, "loss": 2.3424, "step": 314040 }, { "epoch": 1.2140294722518594, "grad_norm": 0.0946962758898735, "learning_rate": 0.002, "loss": 2.33, "step": 314050 }, { "epoch": 1.2140681294552427, "grad_norm": 0.10435698181390762, "learning_rate": 0.002, "loss": 2.3378, "step": 314060 }, { "epoch": 1.214106786658626, "grad_norm": 0.10194170475006104, "learning_rate": 0.002, "loss": 2.3374, "step": 314070 }, { "epoch": 1.2141454438620092, "grad_norm": 0.12994642555713654, "learning_rate": 0.002, "loss": 2.3396, "step": 314080 }, { "epoch": 1.2141841010653924, "grad_norm": 0.09357239305973053, "learning_rate": 0.002, "loss": 2.3394, "step": 314090 }, { "epoch": 1.2142227582687757, "grad_norm": 0.09413903206586838, "learning_rate": 0.002, "loss": 2.3277, "step": 314100 }, { "epoch": 1.2142614154721592, "grad_norm": 0.13679394125938416, "learning_rate": 0.002, "loss": 2.3259, "step": 314110 }, { "epoch": 1.2143000726755424, "grad_norm": 0.09909292310476303, "learning_rate": 0.002, "loss": 2.3218, "step": 314120 }, { "epoch": 1.2143387298789257, "grad_norm": 0.09950712323188782, "learning_rate": 0.002, "loss": 2.3332, "step": 314130 }, { "epoch": 1.214377387082309, "grad_norm": 0.09253106266260147, "learning_rate": 0.002, "loss": 2.3258, "step": 314140 }, { "epoch": 1.2144160442856922, "grad_norm": 0.10468227416276932, "learning_rate": 0.002, "loss": 2.3478, "step": 314150 }, { "epoch": 1.2144547014890754, "grad_norm": 0.09493131935596466, "learning_rate": 0.002, "loss": 2.3235, "step": 314160 }, { "epoch": 1.2144933586924587, "grad_norm": 0.1063208281993866, "learning_rate": 0.002, "loss": 2.3433, "step": 314170 }, { "epoch": 1.2145320158958421, "grad_norm": 0.09948117285966873, "learning_rate": 0.002, "loss": 2.3434, "step": 314180 }, { "epoch": 1.2145706730992254, "grad_norm": 0.10003940761089325, "learning_rate": 0.002, "loss": 2.3424, "step": 314190 }, { "epoch": 1.2146093303026086, "grad_norm": 0.10735779255628586, "learning_rate": 0.002, "loss": 2.3317, "step": 314200 }, { "epoch": 1.214647987505992, "grad_norm": 0.10176675766706467, "learning_rate": 0.002, "loss": 2.3384, "step": 314210 }, { "epoch": 1.2146866447093752, "grad_norm": 0.10090947151184082, "learning_rate": 0.002, "loss": 2.3351, "step": 314220 }, { "epoch": 1.2147253019127584, "grad_norm": 0.0865064188838005, "learning_rate": 0.002, "loss": 2.3194, "step": 314230 }, { "epoch": 1.2147639591161417, "grad_norm": 0.10404794663190842, "learning_rate": 0.002, "loss": 2.3281, "step": 314240 }, { "epoch": 1.214802616319525, "grad_norm": 0.10815301537513733, "learning_rate": 0.002, "loss": 2.3412, "step": 314250 }, { "epoch": 1.2148412735229082, "grad_norm": 0.10101471096277237, "learning_rate": 0.002, "loss": 2.3369, "step": 314260 }, { "epoch": 1.2148799307262914, "grad_norm": 0.11894841492176056, "learning_rate": 0.002, "loss": 2.3441, "step": 314270 }, { "epoch": 1.2149185879296749, "grad_norm": 0.11323252320289612, "learning_rate": 0.002, "loss": 2.3469, "step": 314280 }, { "epoch": 1.2149572451330581, "grad_norm": 0.1007615402340889, "learning_rate": 0.002, "loss": 2.3323, "step": 314290 }, { "epoch": 1.2149959023364414, "grad_norm": 0.0997728779911995, "learning_rate": 0.002, "loss": 2.3354, "step": 314300 }, { "epoch": 1.2150345595398246, "grad_norm": 0.11074774712324142, "learning_rate": 0.002, "loss": 2.3331, "step": 314310 }, { "epoch": 1.215073216743208, "grad_norm": 0.11069449782371521, "learning_rate": 0.002, "loss": 2.3249, "step": 314320 }, { "epoch": 1.2151118739465911, "grad_norm": 0.12711121141910553, "learning_rate": 0.002, "loss": 2.3321, "step": 314330 }, { "epoch": 1.2151505311499744, "grad_norm": 0.10363688319921494, "learning_rate": 0.002, "loss": 2.3241, "step": 314340 }, { "epoch": 1.2151891883533579, "grad_norm": 0.11091338843107224, "learning_rate": 0.002, "loss": 2.332, "step": 314350 }, { "epoch": 1.2152278455567411, "grad_norm": 0.10448306798934937, "learning_rate": 0.002, "loss": 2.3168, "step": 314360 }, { "epoch": 1.2152665027601244, "grad_norm": 0.10605467110872269, "learning_rate": 0.002, "loss": 2.3321, "step": 314370 }, { "epoch": 1.2153051599635076, "grad_norm": 0.09692072123289108, "learning_rate": 0.002, "loss": 2.3326, "step": 314380 }, { "epoch": 1.2153438171668909, "grad_norm": 0.09874030947685242, "learning_rate": 0.002, "loss": 2.3263, "step": 314390 }, { "epoch": 1.2153824743702741, "grad_norm": 0.09986934810876846, "learning_rate": 0.002, "loss": 2.3275, "step": 314400 }, { "epoch": 1.2154211315736574, "grad_norm": 0.10098947584629059, "learning_rate": 0.002, "loss": 2.3257, "step": 314410 }, { "epoch": 1.2154597887770406, "grad_norm": 0.10305691510438919, "learning_rate": 0.002, "loss": 2.3389, "step": 314420 }, { "epoch": 1.215498445980424, "grad_norm": 0.10658477991819382, "learning_rate": 0.002, "loss": 2.3333, "step": 314430 }, { "epoch": 1.2155371031838074, "grad_norm": 0.10453560948371887, "learning_rate": 0.002, "loss": 2.3218, "step": 314440 }, { "epoch": 1.2155757603871906, "grad_norm": 0.12518541514873505, "learning_rate": 0.002, "loss": 2.3325, "step": 314450 }, { "epoch": 1.2156144175905739, "grad_norm": 0.10759428143501282, "learning_rate": 0.002, "loss": 2.3208, "step": 314460 }, { "epoch": 1.2156530747939571, "grad_norm": 0.139102503657341, "learning_rate": 0.002, "loss": 2.3343, "step": 314470 }, { "epoch": 1.2156917319973404, "grad_norm": 0.12430110573768616, "learning_rate": 0.002, "loss": 2.3357, "step": 314480 }, { "epoch": 1.2157303892007236, "grad_norm": 0.10704773664474487, "learning_rate": 0.002, "loss": 2.3354, "step": 314490 }, { "epoch": 1.2157690464041069, "grad_norm": 0.09441366046667099, "learning_rate": 0.002, "loss": 2.3337, "step": 314500 }, { "epoch": 1.2158077036074901, "grad_norm": 0.1318897306919098, "learning_rate": 0.002, "loss": 2.341, "step": 314510 }, { "epoch": 1.2158463608108736, "grad_norm": 0.09802470356225967, "learning_rate": 0.002, "loss": 2.3239, "step": 314520 }, { "epoch": 1.2158850180142569, "grad_norm": 0.09931337088346481, "learning_rate": 0.002, "loss": 2.3341, "step": 314530 }, { "epoch": 1.21592367521764, "grad_norm": 0.5061547756195068, "learning_rate": 0.002, "loss": 2.3385, "step": 314540 }, { "epoch": 1.2159623324210234, "grad_norm": 0.10328753292560577, "learning_rate": 0.002, "loss": 2.3216, "step": 314550 }, { "epoch": 1.2160009896244066, "grad_norm": 0.09852741658687592, "learning_rate": 0.002, "loss": 2.3575, "step": 314560 }, { "epoch": 1.2160396468277899, "grad_norm": 0.0911833718419075, "learning_rate": 0.002, "loss": 2.3319, "step": 314570 }, { "epoch": 1.2160783040311731, "grad_norm": 0.10800252854824066, "learning_rate": 0.002, "loss": 2.3453, "step": 314580 }, { "epoch": 1.2161169612345564, "grad_norm": 0.10898005962371826, "learning_rate": 0.002, "loss": 2.3394, "step": 314590 }, { "epoch": 1.2161556184379396, "grad_norm": 0.12181519716978073, "learning_rate": 0.002, "loss": 2.3247, "step": 314600 }, { "epoch": 1.216194275641323, "grad_norm": 0.10061764717102051, "learning_rate": 0.002, "loss": 2.3413, "step": 314610 }, { "epoch": 1.2162329328447063, "grad_norm": 0.09571439027786255, "learning_rate": 0.002, "loss": 2.3363, "step": 314620 }, { "epoch": 1.2162715900480896, "grad_norm": 0.17038051784038544, "learning_rate": 0.002, "loss": 2.3303, "step": 314630 }, { "epoch": 1.2163102472514729, "grad_norm": 0.1250414401292801, "learning_rate": 0.002, "loss": 2.344, "step": 314640 }, { "epoch": 1.216348904454856, "grad_norm": 0.09042432904243469, "learning_rate": 0.002, "loss": 2.3289, "step": 314650 }, { "epoch": 1.2163875616582394, "grad_norm": 0.09905265271663666, "learning_rate": 0.002, "loss": 2.3418, "step": 314660 }, { "epoch": 1.2164262188616226, "grad_norm": 0.11659180372953415, "learning_rate": 0.002, "loss": 2.3333, "step": 314670 }, { "epoch": 1.2164648760650059, "grad_norm": 0.10536820441484451, "learning_rate": 0.002, "loss": 2.3278, "step": 314680 }, { "epoch": 1.2165035332683893, "grad_norm": 0.10633594542741776, "learning_rate": 0.002, "loss": 2.3493, "step": 314690 }, { "epoch": 1.2165421904717726, "grad_norm": 0.10117705166339874, "learning_rate": 0.002, "loss": 2.3329, "step": 314700 }, { "epoch": 1.2165808476751558, "grad_norm": 0.11793801188468933, "learning_rate": 0.002, "loss": 2.3279, "step": 314710 }, { "epoch": 1.216619504878539, "grad_norm": 0.1170022115111351, "learning_rate": 0.002, "loss": 2.3386, "step": 314720 }, { "epoch": 1.2166581620819223, "grad_norm": 0.108283132314682, "learning_rate": 0.002, "loss": 2.3356, "step": 314730 }, { "epoch": 1.2166968192853056, "grad_norm": 0.2359154373407364, "learning_rate": 0.002, "loss": 2.344, "step": 314740 }, { "epoch": 1.2167354764886888, "grad_norm": 0.10665125399827957, "learning_rate": 0.002, "loss": 2.3231, "step": 314750 }, { "epoch": 1.216774133692072, "grad_norm": 0.11395130306482315, "learning_rate": 0.002, "loss": 2.334, "step": 314760 }, { "epoch": 1.2168127908954554, "grad_norm": 0.0980997234582901, "learning_rate": 0.002, "loss": 2.3369, "step": 314770 }, { "epoch": 1.2168514480988388, "grad_norm": 0.10164439678192139, "learning_rate": 0.002, "loss": 2.3438, "step": 314780 }, { "epoch": 1.216890105302222, "grad_norm": 0.2382308840751648, "learning_rate": 0.002, "loss": 2.3347, "step": 314790 }, { "epoch": 1.2169287625056053, "grad_norm": 0.10206914693117142, "learning_rate": 0.002, "loss": 2.3385, "step": 314800 }, { "epoch": 1.2169674197089886, "grad_norm": 0.1029033362865448, "learning_rate": 0.002, "loss": 2.3206, "step": 314810 }, { "epoch": 1.2170060769123718, "grad_norm": 0.11026882380247116, "learning_rate": 0.002, "loss": 2.3355, "step": 314820 }, { "epoch": 1.217044734115755, "grad_norm": 0.09904853254556656, "learning_rate": 0.002, "loss": 2.3363, "step": 314830 }, { "epoch": 1.2170833913191383, "grad_norm": 0.10073044896125793, "learning_rate": 0.002, "loss": 2.3392, "step": 314840 }, { "epoch": 1.2171220485225216, "grad_norm": 0.10072771459817886, "learning_rate": 0.002, "loss": 2.3273, "step": 314850 }, { "epoch": 1.217160705725905, "grad_norm": 0.11892835050821304, "learning_rate": 0.002, "loss": 2.3246, "step": 314860 }, { "epoch": 1.2171993629292883, "grad_norm": 0.10275215655565262, "learning_rate": 0.002, "loss": 2.3404, "step": 314870 }, { "epoch": 1.2172380201326716, "grad_norm": 0.13225539028644562, "learning_rate": 0.002, "loss": 2.3377, "step": 314880 }, { "epoch": 1.2172766773360548, "grad_norm": 0.10573133081197739, "learning_rate": 0.002, "loss": 2.3326, "step": 314890 }, { "epoch": 1.217315334539438, "grad_norm": 0.1118333712220192, "learning_rate": 0.002, "loss": 2.3397, "step": 314900 }, { "epoch": 1.2173539917428213, "grad_norm": 0.1679127812385559, "learning_rate": 0.002, "loss": 2.3468, "step": 314910 }, { "epoch": 1.2173926489462046, "grad_norm": 0.10684831440448761, "learning_rate": 0.002, "loss": 2.3215, "step": 314920 }, { "epoch": 1.2174313061495878, "grad_norm": 0.10050488263368607, "learning_rate": 0.002, "loss": 2.3336, "step": 314930 }, { "epoch": 1.217469963352971, "grad_norm": 0.11194442957639694, "learning_rate": 0.002, "loss": 2.3409, "step": 314940 }, { "epoch": 1.2175086205563546, "grad_norm": 0.10014735907316208, "learning_rate": 0.002, "loss": 2.3208, "step": 314950 }, { "epoch": 1.2175472777597378, "grad_norm": 0.1252419799566269, "learning_rate": 0.002, "loss": 2.338, "step": 314960 }, { "epoch": 1.217585934963121, "grad_norm": 0.11564713716506958, "learning_rate": 0.002, "loss": 2.3382, "step": 314970 }, { "epoch": 1.2176245921665043, "grad_norm": 0.11097046732902527, "learning_rate": 0.002, "loss": 2.3265, "step": 314980 }, { "epoch": 1.2176632493698876, "grad_norm": 0.09906239062547684, "learning_rate": 0.002, "loss": 2.3375, "step": 314990 }, { "epoch": 1.2177019065732708, "grad_norm": 0.11770200729370117, "learning_rate": 0.002, "loss": 2.3367, "step": 315000 }, { "epoch": 1.217740563776654, "grad_norm": 0.10562057793140411, "learning_rate": 0.002, "loss": 2.3294, "step": 315010 }, { "epoch": 1.2177792209800373, "grad_norm": 0.16002397239208221, "learning_rate": 0.002, "loss": 2.3439, "step": 315020 }, { "epoch": 1.2178178781834208, "grad_norm": 0.14758117496967316, "learning_rate": 0.002, "loss": 2.3201, "step": 315030 }, { "epoch": 1.217856535386804, "grad_norm": 0.11798250675201416, "learning_rate": 0.002, "loss": 2.3316, "step": 315040 }, { "epoch": 1.2178951925901873, "grad_norm": 0.1063949316740036, "learning_rate": 0.002, "loss": 2.3432, "step": 315050 }, { "epoch": 1.2179338497935706, "grad_norm": 0.10853171348571777, "learning_rate": 0.002, "loss": 2.3305, "step": 315060 }, { "epoch": 1.2179725069969538, "grad_norm": 0.10362334549427032, "learning_rate": 0.002, "loss": 2.3338, "step": 315070 }, { "epoch": 1.218011164200337, "grad_norm": 0.10557835549116135, "learning_rate": 0.002, "loss": 2.3352, "step": 315080 }, { "epoch": 1.2180498214037203, "grad_norm": 0.13382239639759064, "learning_rate": 0.002, "loss": 2.3498, "step": 315090 }, { "epoch": 1.2180884786071036, "grad_norm": 0.10299064218997955, "learning_rate": 0.002, "loss": 2.3377, "step": 315100 }, { "epoch": 1.2181271358104868, "grad_norm": 0.11686524003744125, "learning_rate": 0.002, "loss": 2.3433, "step": 315110 }, { "epoch": 1.2181657930138703, "grad_norm": 0.09765559434890747, "learning_rate": 0.002, "loss": 2.3311, "step": 315120 }, { "epoch": 1.2182044502172535, "grad_norm": 0.12375412881374359, "learning_rate": 0.002, "loss": 2.3436, "step": 315130 }, { "epoch": 1.2182431074206368, "grad_norm": 0.10474059730768204, "learning_rate": 0.002, "loss": 2.3403, "step": 315140 }, { "epoch": 1.21828176462402, "grad_norm": 0.11837010085582733, "learning_rate": 0.002, "loss": 2.3306, "step": 315150 }, { "epoch": 1.2183204218274033, "grad_norm": 0.11289907246828079, "learning_rate": 0.002, "loss": 2.3467, "step": 315160 }, { "epoch": 1.2183590790307866, "grad_norm": 0.11022341251373291, "learning_rate": 0.002, "loss": 2.34, "step": 315170 }, { "epoch": 1.2183977362341698, "grad_norm": 0.11386298388242722, "learning_rate": 0.002, "loss": 2.3272, "step": 315180 }, { "epoch": 1.2184363934375533, "grad_norm": 0.12257837504148483, "learning_rate": 0.002, "loss": 2.327, "step": 315190 }, { "epoch": 1.2184750506409365, "grad_norm": 0.11156097054481506, "learning_rate": 0.002, "loss": 2.3459, "step": 315200 }, { "epoch": 1.2185137078443198, "grad_norm": 0.10310366004705429, "learning_rate": 0.002, "loss": 2.3527, "step": 315210 }, { "epoch": 1.218552365047703, "grad_norm": 0.10431301593780518, "learning_rate": 0.002, "loss": 2.3303, "step": 315220 }, { "epoch": 1.2185910222510863, "grad_norm": 0.09562677890062332, "learning_rate": 0.002, "loss": 2.332, "step": 315230 }, { "epoch": 1.2186296794544695, "grad_norm": 0.09988576918840408, "learning_rate": 0.002, "loss": 2.3442, "step": 315240 }, { "epoch": 1.2186683366578528, "grad_norm": 0.11268305033445358, "learning_rate": 0.002, "loss": 2.3509, "step": 315250 }, { "epoch": 1.218706993861236, "grad_norm": 0.1090041846036911, "learning_rate": 0.002, "loss": 2.3339, "step": 315260 }, { "epoch": 1.2187456510646193, "grad_norm": 0.09609409421682358, "learning_rate": 0.002, "loss": 2.3333, "step": 315270 }, { "epoch": 1.2187843082680025, "grad_norm": 0.10149850696325302, "learning_rate": 0.002, "loss": 2.3438, "step": 315280 }, { "epoch": 1.218822965471386, "grad_norm": 0.1283339262008667, "learning_rate": 0.002, "loss": 2.3283, "step": 315290 }, { "epoch": 1.2188616226747693, "grad_norm": 0.11716891080141068, "learning_rate": 0.002, "loss": 2.3287, "step": 315300 }, { "epoch": 1.2189002798781525, "grad_norm": 0.10556206852197647, "learning_rate": 0.002, "loss": 2.3422, "step": 315310 }, { "epoch": 1.2189389370815358, "grad_norm": 0.1255723237991333, "learning_rate": 0.002, "loss": 2.3307, "step": 315320 }, { "epoch": 1.218977594284919, "grad_norm": 0.09731069207191467, "learning_rate": 0.002, "loss": 2.3404, "step": 315330 }, { "epoch": 1.2190162514883023, "grad_norm": 0.11111821234226227, "learning_rate": 0.002, "loss": 2.3395, "step": 315340 }, { "epoch": 1.2190549086916855, "grad_norm": 0.10132262110710144, "learning_rate": 0.002, "loss": 2.3259, "step": 315350 }, { "epoch": 1.219093565895069, "grad_norm": 0.11700930446386337, "learning_rate": 0.002, "loss": 2.3193, "step": 315360 }, { "epoch": 1.2191322230984523, "grad_norm": 0.10876394808292389, "learning_rate": 0.002, "loss": 2.3286, "step": 315370 }, { "epoch": 1.2191708803018355, "grad_norm": 0.1077607125043869, "learning_rate": 0.002, "loss": 2.3348, "step": 315380 }, { "epoch": 1.2192095375052188, "grad_norm": 0.10873237252235413, "learning_rate": 0.002, "loss": 2.3401, "step": 315390 }, { "epoch": 1.219248194708602, "grad_norm": 0.10627356171607971, "learning_rate": 0.002, "loss": 2.3434, "step": 315400 }, { "epoch": 1.2192868519119853, "grad_norm": 0.1141803041100502, "learning_rate": 0.002, "loss": 2.3363, "step": 315410 }, { "epoch": 1.2193255091153685, "grad_norm": 0.1050865575671196, "learning_rate": 0.002, "loss": 2.3558, "step": 315420 }, { "epoch": 1.2193641663187518, "grad_norm": 0.09554886817932129, "learning_rate": 0.002, "loss": 2.3366, "step": 315430 }, { "epoch": 1.219402823522135, "grad_norm": 0.10202132165431976, "learning_rate": 0.002, "loss": 2.3428, "step": 315440 }, { "epoch": 1.2194414807255183, "grad_norm": 0.11309561878442764, "learning_rate": 0.002, "loss": 2.3375, "step": 315450 }, { "epoch": 1.2194801379289018, "grad_norm": 0.14103220403194427, "learning_rate": 0.002, "loss": 2.3397, "step": 315460 }, { "epoch": 1.219518795132285, "grad_norm": 0.10153191536664963, "learning_rate": 0.002, "loss": 2.3321, "step": 315470 }, { "epoch": 1.2195574523356683, "grad_norm": 0.09797697514295578, "learning_rate": 0.002, "loss": 2.336, "step": 315480 }, { "epoch": 1.2195961095390515, "grad_norm": 0.11822252720594406, "learning_rate": 0.002, "loss": 2.3245, "step": 315490 }, { "epoch": 1.2196347667424348, "grad_norm": 0.09600470960140228, "learning_rate": 0.002, "loss": 2.3397, "step": 315500 }, { "epoch": 1.219673423945818, "grad_norm": 0.09823154658079147, "learning_rate": 0.002, "loss": 2.3488, "step": 315510 }, { "epoch": 1.2197120811492013, "grad_norm": 0.1113450825214386, "learning_rate": 0.002, "loss": 2.3436, "step": 315520 }, { "epoch": 1.2197507383525847, "grad_norm": 0.11101141571998596, "learning_rate": 0.002, "loss": 2.3424, "step": 315530 }, { "epoch": 1.219789395555968, "grad_norm": 0.1019652783870697, "learning_rate": 0.002, "loss": 2.3412, "step": 315540 }, { "epoch": 1.2198280527593512, "grad_norm": 0.13949422538280487, "learning_rate": 0.002, "loss": 2.3361, "step": 315550 }, { "epoch": 1.2198667099627345, "grad_norm": 0.10365574806928635, "learning_rate": 0.002, "loss": 2.3262, "step": 315560 }, { "epoch": 1.2199053671661177, "grad_norm": 0.10307446122169495, "learning_rate": 0.002, "loss": 2.3276, "step": 315570 }, { "epoch": 1.219944024369501, "grad_norm": 0.09809573739767075, "learning_rate": 0.002, "loss": 2.3428, "step": 315580 }, { "epoch": 1.2199826815728843, "grad_norm": 0.11042384803295135, "learning_rate": 0.002, "loss": 2.3366, "step": 315590 }, { "epoch": 1.2200213387762675, "grad_norm": 0.10844165086746216, "learning_rate": 0.002, "loss": 2.3371, "step": 315600 }, { "epoch": 1.2200599959796508, "grad_norm": 0.10989883542060852, "learning_rate": 0.002, "loss": 2.3399, "step": 315610 }, { "epoch": 1.220098653183034, "grad_norm": 0.10099762678146362, "learning_rate": 0.002, "loss": 2.3358, "step": 315620 }, { "epoch": 1.2201373103864175, "grad_norm": 0.14972710609436035, "learning_rate": 0.002, "loss": 2.3278, "step": 315630 }, { "epoch": 1.2201759675898007, "grad_norm": 0.103081613779068, "learning_rate": 0.002, "loss": 2.3528, "step": 315640 }, { "epoch": 1.220214624793184, "grad_norm": 0.10600491613149643, "learning_rate": 0.002, "loss": 2.3459, "step": 315650 }, { "epoch": 1.2202532819965672, "grad_norm": 0.11377498507499695, "learning_rate": 0.002, "loss": 2.3536, "step": 315660 }, { "epoch": 1.2202919391999505, "grad_norm": 0.1005830317735672, "learning_rate": 0.002, "loss": 2.3121, "step": 315670 }, { "epoch": 1.2203305964033337, "grad_norm": 0.10253360122442245, "learning_rate": 0.002, "loss": 2.3302, "step": 315680 }, { "epoch": 1.220369253606717, "grad_norm": 0.11248026043176651, "learning_rate": 0.002, "loss": 2.34, "step": 315690 }, { "epoch": 1.2204079108101005, "grad_norm": 0.11137398332357407, "learning_rate": 0.002, "loss": 2.3377, "step": 315700 }, { "epoch": 1.2204465680134837, "grad_norm": 0.10283508896827698, "learning_rate": 0.002, "loss": 2.3437, "step": 315710 }, { "epoch": 1.220485225216867, "grad_norm": 0.1013549268245697, "learning_rate": 0.002, "loss": 2.3451, "step": 315720 }, { "epoch": 1.2205238824202502, "grad_norm": 0.10436509549617767, "learning_rate": 0.002, "loss": 2.3283, "step": 315730 }, { "epoch": 1.2205625396236335, "grad_norm": 0.11275258660316467, "learning_rate": 0.002, "loss": 2.3472, "step": 315740 }, { "epoch": 1.2206011968270167, "grad_norm": 0.1164979338645935, "learning_rate": 0.002, "loss": 2.3404, "step": 315750 }, { "epoch": 1.2206398540304, "grad_norm": 0.11312644928693771, "learning_rate": 0.002, "loss": 2.3317, "step": 315760 }, { "epoch": 1.2206785112337832, "grad_norm": 0.09488767385482788, "learning_rate": 0.002, "loss": 2.3411, "step": 315770 }, { "epoch": 1.2207171684371665, "grad_norm": 0.11264296621084213, "learning_rate": 0.002, "loss": 2.3338, "step": 315780 }, { "epoch": 1.2207558256405497, "grad_norm": 0.10742346942424774, "learning_rate": 0.002, "loss": 2.3366, "step": 315790 }, { "epoch": 1.2207944828439332, "grad_norm": 0.10454041510820389, "learning_rate": 0.002, "loss": 2.3312, "step": 315800 }, { "epoch": 1.2208331400473165, "grad_norm": 0.11628180742263794, "learning_rate": 0.002, "loss": 2.3432, "step": 315810 }, { "epoch": 1.2208717972506997, "grad_norm": 0.10877648741006851, "learning_rate": 0.002, "loss": 2.3441, "step": 315820 }, { "epoch": 1.220910454454083, "grad_norm": 0.09616900235414505, "learning_rate": 0.002, "loss": 2.3418, "step": 315830 }, { "epoch": 1.2209491116574662, "grad_norm": 0.10478873550891876, "learning_rate": 0.002, "loss": 2.3352, "step": 315840 }, { "epoch": 1.2209877688608495, "grad_norm": 0.09449452906847, "learning_rate": 0.002, "loss": 2.3267, "step": 315850 }, { "epoch": 1.2210264260642327, "grad_norm": 0.09216703474521637, "learning_rate": 0.002, "loss": 2.3324, "step": 315860 }, { "epoch": 1.2210650832676162, "grad_norm": 0.09965404868125916, "learning_rate": 0.002, "loss": 2.3389, "step": 315870 }, { "epoch": 1.2211037404709995, "grad_norm": 0.119733527302742, "learning_rate": 0.002, "loss": 2.3478, "step": 315880 }, { "epoch": 1.2211423976743827, "grad_norm": 0.09414691478013992, "learning_rate": 0.002, "loss": 2.3179, "step": 315890 }, { "epoch": 1.221181054877766, "grad_norm": 0.10925483703613281, "learning_rate": 0.002, "loss": 2.3393, "step": 315900 }, { "epoch": 1.2212197120811492, "grad_norm": 0.10677146911621094, "learning_rate": 0.002, "loss": 2.3309, "step": 315910 }, { "epoch": 1.2212583692845325, "grad_norm": 0.09887148439884186, "learning_rate": 0.002, "loss": 2.3358, "step": 315920 }, { "epoch": 1.2212970264879157, "grad_norm": 0.09858877211809158, "learning_rate": 0.002, "loss": 2.328, "step": 315930 }, { "epoch": 1.221335683691299, "grad_norm": 0.16472014784812927, "learning_rate": 0.002, "loss": 2.3364, "step": 315940 }, { "epoch": 1.2213743408946822, "grad_norm": 0.11468665301799774, "learning_rate": 0.002, "loss": 2.3331, "step": 315950 }, { "epoch": 1.2214129980980655, "grad_norm": 0.12694212794303894, "learning_rate": 0.002, "loss": 2.3269, "step": 315960 }, { "epoch": 1.221451655301449, "grad_norm": 0.12337895482778549, "learning_rate": 0.002, "loss": 2.3366, "step": 315970 }, { "epoch": 1.2214903125048322, "grad_norm": 0.10966597497463226, "learning_rate": 0.002, "loss": 2.3328, "step": 315980 }, { "epoch": 1.2215289697082155, "grad_norm": 0.17105089128017426, "learning_rate": 0.002, "loss": 2.3462, "step": 315990 }, { "epoch": 1.2215676269115987, "grad_norm": 0.13237425684928894, "learning_rate": 0.002, "loss": 2.342, "step": 316000 }, { "epoch": 1.221606284114982, "grad_norm": 0.08818645030260086, "learning_rate": 0.002, "loss": 2.3223, "step": 316010 }, { "epoch": 1.2216449413183652, "grad_norm": 0.09917477518320084, "learning_rate": 0.002, "loss": 2.338, "step": 316020 }, { "epoch": 1.2216835985217485, "grad_norm": 0.10293343663215637, "learning_rate": 0.002, "loss": 2.3283, "step": 316030 }, { "epoch": 1.221722255725132, "grad_norm": 0.11665060371160507, "learning_rate": 0.002, "loss": 2.3434, "step": 316040 }, { "epoch": 1.2217609129285152, "grad_norm": 0.09830315411090851, "learning_rate": 0.002, "loss": 2.3501, "step": 316050 }, { "epoch": 1.2217995701318984, "grad_norm": 0.13115578889846802, "learning_rate": 0.002, "loss": 2.3339, "step": 316060 }, { "epoch": 1.2218382273352817, "grad_norm": 0.10233128070831299, "learning_rate": 0.002, "loss": 2.3243, "step": 316070 }, { "epoch": 1.221876884538665, "grad_norm": 0.09380917251110077, "learning_rate": 0.002, "loss": 2.3287, "step": 316080 }, { "epoch": 1.2219155417420482, "grad_norm": 0.10349245369434357, "learning_rate": 0.002, "loss": 2.3316, "step": 316090 }, { "epoch": 1.2219541989454314, "grad_norm": 0.10417374223470688, "learning_rate": 0.002, "loss": 2.3359, "step": 316100 }, { "epoch": 1.2219928561488147, "grad_norm": 0.10841956734657288, "learning_rate": 0.002, "loss": 2.3222, "step": 316110 }, { "epoch": 1.222031513352198, "grad_norm": 0.12777014076709747, "learning_rate": 0.002, "loss": 2.3523, "step": 316120 }, { "epoch": 1.2220701705555812, "grad_norm": 0.12356674671173096, "learning_rate": 0.002, "loss": 2.3515, "step": 316130 }, { "epoch": 1.2221088277589647, "grad_norm": 0.11700879037380219, "learning_rate": 0.002, "loss": 2.3519, "step": 316140 }, { "epoch": 1.222147484962348, "grad_norm": 0.10147671401500702, "learning_rate": 0.002, "loss": 2.3543, "step": 316150 }, { "epoch": 1.2221861421657312, "grad_norm": 0.12761598825454712, "learning_rate": 0.002, "loss": 2.339, "step": 316160 }, { "epoch": 1.2222247993691144, "grad_norm": 0.10373452305793762, "learning_rate": 0.002, "loss": 2.3401, "step": 316170 }, { "epoch": 1.2222634565724977, "grad_norm": 0.10208388417959213, "learning_rate": 0.002, "loss": 2.3291, "step": 316180 }, { "epoch": 1.222302113775881, "grad_norm": 0.0917031541466713, "learning_rate": 0.002, "loss": 2.3334, "step": 316190 }, { "epoch": 1.2223407709792642, "grad_norm": 0.10595124959945679, "learning_rate": 0.002, "loss": 2.3323, "step": 316200 }, { "epoch": 1.2223794281826477, "grad_norm": 0.1208588257431984, "learning_rate": 0.002, "loss": 2.3159, "step": 316210 }, { "epoch": 1.222418085386031, "grad_norm": 0.1297757923603058, "learning_rate": 0.002, "loss": 2.3222, "step": 316220 }, { "epoch": 1.2224567425894142, "grad_norm": 0.1111166700720787, "learning_rate": 0.002, "loss": 2.3309, "step": 316230 }, { "epoch": 1.2224953997927974, "grad_norm": 0.12035632878541946, "learning_rate": 0.002, "loss": 2.3482, "step": 316240 }, { "epoch": 1.2225340569961807, "grad_norm": 0.0926397442817688, "learning_rate": 0.002, "loss": 2.3345, "step": 316250 }, { "epoch": 1.222572714199564, "grad_norm": 0.109648197889328, "learning_rate": 0.002, "loss": 2.334, "step": 316260 }, { "epoch": 1.2226113714029472, "grad_norm": 0.097147636115551, "learning_rate": 0.002, "loss": 2.3392, "step": 316270 }, { "epoch": 1.2226500286063304, "grad_norm": 0.10870775580406189, "learning_rate": 0.002, "loss": 2.337, "step": 316280 }, { "epoch": 1.2226886858097137, "grad_norm": 0.09800266474485397, "learning_rate": 0.002, "loss": 2.345, "step": 316290 }, { "epoch": 1.2227273430130972, "grad_norm": 0.11024896800518036, "learning_rate": 0.002, "loss": 2.336, "step": 316300 }, { "epoch": 1.2227660002164804, "grad_norm": 0.10475356131792068, "learning_rate": 0.002, "loss": 2.3304, "step": 316310 }, { "epoch": 1.2228046574198637, "grad_norm": 0.11313185840845108, "learning_rate": 0.002, "loss": 2.3404, "step": 316320 }, { "epoch": 1.222843314623247, "grad_norm": 0.10262496024370193, "learning_rate": 0.002, "loss": 2.33, "step": 316330 }, { "epoch": 1.2228819718266302, "grad_norm": 0.09623805433511734, "learning_rate": 0.002, "loss": 2.3352, "step": 316340 }, { "epoch": 1.2229206290300134, "grad_norm": 0.10645546764135361, "learning_rate": 0.002, "loss": 2.3477, "step": 316350 }, { "epoch": 1.2229592862333967, "grad_norm": 0.1368713229894638, "learning_rate": 0.002, "loss": 2.3352, "step": 316360 }, { "epoch": 1.22299794343678, "grad_norm": 0.2733914256095886, "learning_rate": 0.002, "loss": 2.3345, "step": 316370 }, { "epoch": 1.2230366006401634, "grad_norm": 0.111940898001194, "learning_rate": 0.002, "loss": 2.3376, "step": 316380 }, { "epoch": 1.2230752578435466, "grad_norm": 0.0978144183754921, "learning_rate": 0.002, "loss": 2.3351, "step": 316390 }, { "epoch": 1.22311391504693, "grad_norm": 0.11087758839130402, "learning_rate": 0.002, "loss": 2.3153, "step": 316400 }, { "epoch": 1.2231525722503132, "grad_norm": 0.10408814996480942, "learning_rate": 0.002, "loss": 2.3315, "step": 316410 }, { "epoch": 1.2231912294536964, "grad_norm": 0.09981653094291687, "learning_rate": 0.002, "loss": 2.34, "step": 316420 }, { "epoch": 1.2232298866570797, "grad_norm": 0.10293741524219513, "learning_rate": 0.002, "loss": 2.3345, "step": 316430 }, { "epoch": 1.223268543860463, "grad_norm": 0.10573162138462067, "learning_rate": 0.002, "loss": 2.3175, "step": 316440 }, { "epoch": 1.2233072010638462, "grad_norm": 0.09074226766824722, "learning_rate": 0.002, "loss": 2.3382, "step": 316450 }, { "epoch": 1.2233458582672294, "grad_norm": 0.103923000395298, "learning_rate": 0.002, "loss": 2.3405, "step": 316460 }, { "epoch": 1.2233845154706129, "grad_norm": 0.10840972512960434, "learning_rate": 0.002, "loss": 2.3298, "step": 316470 }, { "epoch": 1.2234231726739961, "grad_norm": 0.10516613721847534, "learning_rate": 0.002, "loss": 2.3409, "step": 316480 }, { "epoch": 1.2234618298773794, "grad_norm": 0.10513375699520111, "learning_rate": 0.002, "loss": 2.3397, "step": 316490 }, { "epoch": 1.2235004870807626, "grad_norm": 0.0963342934846878, "learning_rate": 0.002, "loss": 2.3338, "step": 316500 }, { "epoch": 1.223539144284146, "grad_norm": 0.114549919962883, "learning_rate": 0.002, "loss": 2.3398, "step": 316510 }, { "epoch": 1.2235778014875291, "grad_norm": 0.11324504762887955, "learning_rate": 0.002, "loss": 2.3531, "step": 316520 }, { "epoch": 1.2236164586909124, "grad_norm": 0.11725080758333206, "learning_rate": 0.002, "loss": 2.3244, "step": 316530 }, { "epoch": 1.2236551158942957, "grad_norm": 0.10726740211248398, "learning_rate": 0.002, "loss": 2.3273, "step": 316540 }, { "epoch": 1.2236937730976791, "grad_norm": 0.10920976847410202, "learning_rate": 0.002, "loss": 2.34, "step": 316550 }, { "epoch": 1.2237324303010624, "grad_norm": 0.11233483254909515, "learning_rate": 0.002, "loss": 2.3372, "step": 316560 }, { "epoch": 1.2237710875044456, "grad_norm": 0.10754575580358505, "learning_rate": 0.002, "loss": 2.3293, "step": 316570 }, { "epoch": 1.2238097447078289, "grad_norm": 0.26863402128219604, "learning_rate": 0.002, "loss": 2.3307, "step": 316580 }, { "epoch": 1.2238484019112121, "grad_norm": 1.1302027702331543, "learning_rate": 0.002, "loss": 2.3396, "step": 316590 }, { "epoch": 1.2238870591145954, "grad_norm": 1.478796124458313, "learning_rate": 0.002, "loss": 2.3738, "step": 316600 }, { "epoch": 1.2239257163179786, "grad_norm": 0.12988591194152832, "learning_rate": 0.002, "loss": 2.3564, "step": 316610 }, { "epoch": 1.223964373521362, "grad_norm": 0.09178368747234344, "learning_rate": 0.002, "loss": 2.3405, "step": 316620 }, { "epoch": 1.2240030307247451, "grad_norm": 0.10592716187238693, "learning_rate": 0.002, "loss": 2.3437, "step": 316630 }, { "epoch": 1.2240416879281286, "grad_norm": 0.09742153435945511, "learning_rate": 0.002, "loss": 2.3293, "step": 316640 }, { "epoch": 1.2240803451315119, "grad_norm": 0.11273608356714249, "learning_rate": 0.002, "loss": 2.3269, "step": 316650 }, { "epoch": 1.2241190023348951, "grad_norm": 0.09497487545013428, "learning_rate": 0.002, "loss": 2.3378, "step": 316660 }, { "epoch": 1.2241576595382784, "grad_norm": 0.0983516052365303, "learning_rate": 0.002, "loss": 2.3326, "step": 316670 }, { "epoch": 1.2241963167416616, "grad_norm": 0.11890359967947006, "learning_rate": 0.002, "loss": 2.3408, "step": 316680 }, { "epoch": 1.2242349739450449, "grad_norm": 0.09866367280483246, "learning_rate": 0.002, "loss": 2.3366, "step": 316690 }, { "epoch": 1.2242736311484281, "grad_norm": 0.1096860021352768, "learning_rate": 0.002, "loss": 2.3255, "step": 316700 }, { "epoch": 1.2243122883518114, "grad_norm": 0.13388590514659882, "learning_rate": 0.002, "loss": 2.3396, "step": 316710 }, { "epoch": 1.2243509455551949, "grad_norm": 0.1254730373620987, "learning_rate": 0.002, "loss": 2.3321, "step": 316720 }, { "epoch": 1.224389602758578, "grad_norm": 0.09265025705099106, "learning_rate": 0.002, "loss": 2.3433, "step": 316730 }, { "epoch": 1.2244282599619614, "grad_norm": 0.1134009137749672, "learning_rate": 0.002, "loss": 2.3303, "step": 316740 }, { "epoch": 1.2244669171653446, "grad_norm": 0.10628406703472137, "learning_rate": 0.002, "loss": 2.3357, "step": 316750 }, { "epoch": 1.2245055743687279, "grad_norm": 0.11307065188884735, "learning_rate": 0.002, "loss": 2.3339, "step": 316760 }, { "epoch": 1.2245442315721111, "grad_norm": 0.10712608695030212, "learning_rate": 0.002, "loss": 2.3224, "step": 316770 }, { "epoch": 1.2245828887754944, "grad_norm": 0.10583803802728653, "learning_rate": 0.002, "loss": 2.3417, "step": 316780 }, { "epoch": 1.2246215459788776, "grad_norm": 0.1089818999171257, "learning_rate": 0.002, "loss": 2.3335, "step": 316790 }, { "epoch": 1.2246602031822609, "grad_norm": 0.10267559438943863, "learning_rate": 0.002, "loss": 2.3282, "step": 316800 }, { "epoch": 1.2246988603856444, "grad_norm": 0.11662919074296951, "learning_rate": 0.002, "loss": 2.3327, "step": 316810 }, { "epoch": 1.2247375175890276, "grad_norm": 0.10340824723243713, "learning_rate": 0.002, "loss": 2.3455, "step": 316820 }, { "epoch": 1.2247761747924109, "grad_norm": 0.10434640944004059, "learning_rate": 0.002, "loss": 2.3378, "step": 316830 }, { "epoch": 1.224814831995794, "grad_norm": 0.10398906469345093, "learning_rate": 0.002, "loss": 2.3331, "step": 316840 }, { "epoch": 1.2248534891991774, "grad_norm": 0.10468418896198273, "learning_rate": 0.002, "loss": 2.3411, "step": 316850 }, { "epoch": 1.2248921464025606, "grad_norm": 0.11407443881034851, "learning_rate": 0.002, "loss": 2.3447, "step": 316860 }, { "epoch": 1.2249308036059439, "grad_norm": 0.10805194824934006, "learning_rate": 0.002, "loss": 2.3158, "step": 316870 }, { "epoch": 1.2249694608093271, "grad_norm": 0.11241121590137482, "learning_rate": 0.002, "loss": 2.3221, "step": 316880 }, { "epoch": 1.2250081180127106, "grad_norm": 0.13383576273918152, "learning_rate": 0.002, "loss": 2.345, "step": 316890 }, { "epoch": 1.2250467752160938, "grad_norm": 0.1047653928399086, "learning_rate": 0.002, "loss": 2.3199, "step": 316900 }, { "epoch": 1.225085432419477, "grad_norm": 0.10415913909673691, "learning_rate": 0.002, "loss": 2.3249, "step": 316910 }, { "epoch": 1.2251240896228603, "grad_norm": 0.09722809493541718, "learning_rate": 0.002, "loss": 2.352, "step": 316920 }, { "epoch": 1.2251627468262436, "grad_norm": 0.11248863488435745, "learning_rate": 0.002, "loss": 2.3391, "step": 316930 }, { "epoch": 1.2252014040296269, "grad_norm": 0.10968249291181564, "learning_rate": 0.002, "loss": 2.3396, "step": 316940 }, { "epoch": 1.22524006123301, "grad_norm": 0.1054498702287674, "learning_rate": 0.002, "loss": 2.3351, "step": 316950 }, { "epoch": 1.2252787184363934, "grad_norm": 0.1123088076710701, "learning_rate": 0.002, "loss": 2.3287, "step": 316960 }, { "epoch": 1.2253173756397766, "grad_norm": 0.09681358933448792, "learning_rate": 0.002, "loss": 2.3434, "step": 316970 }, { "epoch": 1.22535603284316, "grad_norm": 0.10066763311624527, "learning_rate": 0.002, "loss": 2.3174, "step": 316980 }, { "epoch": 1.2253946900465433, "grad_norm": 0.11117463558912277, "learning_rate": 0.002, "loss": 2.3261, "step": 316990 }, { "epoch": 1.2254333472499266, "grad_norm": 0.10142451524734497, "learning_rate": 0.002, "loss": 2.3405, "step": 317000 }, { "epoch": 1.2254720044533098, "grad_norm": 0.11557786911725998, "learning_rate": 0.002, "loss": 2.3368, "step": 317010 }, { "epoch": 1.225510661656693, "grad_norm": 0.10839302092790604, "learning_rate": 0.002, "loss": 2.3411, "step": 317020 }, { "epoch": 1.2255493188600763, "grad_norm": 0.10059081763029099, "learning_rate": 0.002, "loss": 2.3403, "step": 317030 }, { "epoch": 1.2255879760634596, "grad_norm": 0.0907895416021347, "learning_rate": 0.002, "loss": 2.3543, "step": 317040 }, { "epoch": 1.2256266332668428, "grad_norm": 0.11913354694843292, "learning_rate": 0.002, "loss": 2.3215, "step": 317050 }, { "epoch": 1.2256652904702263, "grad_norm": 0.10284785181283951, "learning_rate": 0.002, "loss": 2.3349, "step": 317060 }, { "epoch": 1.2257039476736096, "grad_norm": 0.10316737741231918, "learning_rate": 0.002, "loss": 2.3384, "step": 317070 }, { "epoch": 1.2257426048769928, "grad_norm": 0.1105693131685257, "learning_rate": 0.002, "loss": 2.3249, "step": 317080 }, { "epoch": 1.225781262080376, "grad_norm": 0.10793313384056091, "learning_rate": 0.002, "loss": 2.3205, "step": 317090 }, { "epoch": 1.2258199192837593, "grad_norm": 0.09296276420354843, "learning_rate": 0.002, "loss": 2.3509, "step": 317100 }, { "epoch": 1.2258585764871426, "grad_norm": 0.11745022982358932, "learning_rate": 0.002, "loss": 2.347, "step": 317110 }, { "epoch": 1.2258972336905258, "grad_norm": 0.1112813726067543, "learning_rate": 0.002, "loss": 2.327, "step": 317120 }, { "epoch": 1.225935890893909, "grad_norm": 0.1136942058801651, "learning_rate": 0.002, "loss": 2.3244, "step": 317130 }, { "epoch": 1.2259745480972923, "grad_norm": 0.10509467124938965, "learning_rate": 0.002, "loss": 2.3395, "step": 317140 }, { "epoch": 1.2260132053006758, "grad_norm": 0.09688248485326767, "learning_rate": 0.002, "loss": 2.3441, "step": 317150 }, { "epoch": 1.226051862504059, "grad_norm": 0.10055442899465561, "learning_rate": 0.002, "loss": 2.3304, "step": 317160 }, { "epoch": 1.2260905197074423, "grad_norm": 0.09972388297319412, "learning_rate": 0.002, "loss": 2.3497, "step": 317170 }, { "epoch": 1.2261291769108256, "grad_norm": 0.103391632437706, "learning_rate": 0.002, "loss": 2.3393, "step": 317180 }, { "epoch": 1.2261678341142088, "grad_norm": 0.11840787529945374, "learning_rate": 0.002, "loss": 2.3253, "step": 317190 }, { "epoch": 1.226206491317592, "grad_norm": 0.10957502573728561, "learning_rate": 0.002, "loss": 2.3358, "step": 317200 }, { "epoch": 1.2262451485209753, "grad_norm": 0.09851084649562836, "learning_rate": 0.002, "loss": 2.3326, "step": 317210 }, { "epoch": 1.2262838057243588, "grad_norm": 0.1071065366268158, "learning_rate": 0.002, "loss": 2.342, "step": 317220 }, { "epoch": 1.226322462927742, "grad_norm": 0.10723765939474106, "learning_rate": 0.002, "loss": 2.3389, "step": 317230 }, { "epoch": 1.2263611201311253, "grad_norm": 0.10659579187631607, "learning_rate": 0.002, "loss": 2.3384, "step": 317240 }, { "epoch": 1.2263997773345086, "grad_norm": 0.11792304366827011, "learning_rate": 0.002, "loss": 2.3339, "step": 317250 }, { "epoch": 1.2264384345378918, "grad_norm": 0.10275556892156601, "learning_rate": 0.002, "loss": 2.3412, "step": 317260 }, { "epoch": 1.226477091741275, "grad_norm": 0.10414616018533707, "learning_rate": 0.002, "loss": 2.3375, "step": 317270 }, { "epoch": 1.2265157489446583, "grad_norm": 0.10112849622964859, "learning_rate": 0.002, "loss": 2.3227, "step": 317280 }, { "epoch": 1.2265544061480416, "grad_norm": 0.11342323571443558, "learning_rate": 0.002, "loss": 2.3217, "step": 317290 }, { "epoch": 1.2265930633514248, "grad_norm": 0.3798435926437378, "learning_rate": 0.002, "loss": 2.3468, "step": 317300 }, { "epoch": 1.226631720554808, "grad_norm": 0.11032916605472565, "learning_rate": 0.002, "loss": 2.3402, "step": 317310 }, { "epoch": 1.2266703777581915, "grad_norm": 0.15820318460464478, "learning_rate": 0.002, "loss": 2.3443, "step": 317320 }, { "epoch": 1.2267090349615748, "grad_norm": 0.14765627682209015, "learning_rate": 0.002, "loss": 2.3378, "step": 317330 }, { "epoch": 1.226747692164958, "grad_norm": 0.12677901983261108, "learning_rate": 0.002, "loss": 2.3611, "step": 317340 }, { "epoch": 1.2267863493683413, "grad_norm": 0.1373942494392395, "learning_rate": 0.002, "loss": 2.3747, "step": 317350 }, { "epoch": 1.2268250065717246, "grad_norm": 0.1087678000330925, "learning_rate": 0.002, "loss": 2.3413, "step": 317360 }, { "epoch": 1.2268636637751078, "grad_norm": 0.09919967502355576, "learning_rate": 0.002, "loss": 2.3202, "step": 317370 }, { "epoch": 1.226902320978491, "grad_norm": 0.10102380067110062, "learning_rate": 0.002, "loss": 2.3314, "step": 317380 }, { "epoch": 1.2269409781818745, "grad_norm": 0.10804580897092819, "learning_rate": 0.002, "loss": 2.3427, "step": 317390 }, { "epoch": 1.2269796353852578, "grad_norm": 0.10194896906614304, "learning_rate": 0.002, "loss": 2.3268, "step": 317400 }, { "epoch": 1.227018292588641, "grad_norm": 0.10182174295186996, "learning_rate": 0.002, "loss": 2.3564, "step": 317410 }, { "epoch": 1.2270569497920243, "grad_norm": 0.09750444442033768, "learning_rate": 0.002, "loss": 2.324, "step": 317420 }, { "epoch": 1.2270956069954075, "grad_norm": 0.10217190533876419, "learning_rate": 0.002, "loss": 2.3383, "step": 317430 }, { "epoch": 1.2271342641987908, "grad_norm": 0.11747973412275314, "learning_rate": 0.002, "loss": 2.3457, "step": 317440 }, { "epoch": 1.227172921402174, "grad_norm": 0.10178996622562408, "learning_rate": 0.002, "loss": 2.3378, "step": 317450 }, { "epoch": 1.2272115786055573, "grad_norm": 0.2366355061531067, "learning_rate": 0.002, "loss": 2.3321, "step": 317460 }, { "epoch": 1.2272502358089405, "grad_norm": 0.09020444750785828, "learning_rate": 0.002, "loss": 2.3262, "step": 317470 }, { "epoch": 1.2272888930123238, "grad_norm": 0.11036652326583862, "learning_rate": 0.002, "loss": 2.3367, "step": 317480 }, { "epoch": 1.2273275502157073, "grad_norm": 0.10769999772310257, "learning_rate": 0.002, "loss": 2.3389, "step": 317490 }, { "epoch": 1.2273662074190905, "grad_norm": 0.1324654221534729, "learning_rate": 0.002, "loss": 2.3252, "step": 317500 }, { "epoch": 1.2274048646224738, "grad_norm": 0.09234415739774704, "learning_rate": 0.002, "loss": 2.3436, "step": 317510 }, { "epoch": 1.227443521825857, "grad_norm": 0.08859698474407196, "learning_rate": 0.002, "loss": 2.3506, "step": 317520 }, { "epoch": 1.2274821790292403, "grad_norm": 0.08461683243513107, "learning_rate": 0.002, "loss": 2.3218, "step": 317530 }, { "epoch": 1.2275208362326235, "grad_norm": 0.10731338709592819, "learning_rate": 0.002, "loss": 2.3351, "step": 317540 }, { "epoch": 1.2275594934360068, "grad_norm": 0.12990544736385345, "learning_rate": 0.002, "loss": 2.3307, "step": 317550 }, { "epoch": 1.2275981506393903, "grad_norm": 0.1029435470700264, "learning_rate": 0.002, "loss": 2.3388, "step": 317560 }, { "epoch": 1.2276368078427735, "grad_norm": 0.10510992258787155, "learning_rate": 0.002, "loss": 2.3457, "step": 317570 }, { "epoch": 1.2276754650461568, "grad_norm": 0.13130173087120056, "learning_rate": 0.002, "loss": 2.3365, "step": 317580 }, { "epoch": 1.22771412224954, "grad_norm": 0.1059836894273758, "learning_rate": 0.002, "loss": 2.3408, "step": 317590 }, { "epoch": 1.2277527794529233, "grad_norm": 0.11718516796827316, "learning_rate": 0.002, "loss": 2.3455, "step": 317600 }, { "epoch": 1.2277914366563065, "grad_norm": 0.09132679551839828, "learning_rate": 0.002, "loss": 2.3404, "step": 317610 }, { "epoch": 1.2278300938596898, "grad_norm": 0.10622987151145935, "learning_rate": 0.002, "loss": 2.3326, "step": 317620 }, { "epoch": 1.227868751063073, "grad_norm": 0.09866461902856827, "learning_rate": 0.002, "loss": 2.341, "step": 317630 }, { "epoch": 1.2279074082664563, "grad_norm": 0.12461042404174805, "learning_rate": 0.002, "loss": 2.3411, "step": 317640 }, { "epoch": 1.2279460654698395, "grad_norm": 0.10212338715791702, "learning_rate": 0.002, "loss": 2.3375, "step": 317650 }, { "epoch": 1.227984722673223, "grad_norm": 0.1242091953754425, "learning_rate": 0.002, "loss": 2.3391, "step": 317660 }, { "epoch": 1.2280233798766063, "grad_norm": 0.12155944108963013, "learning_rate": 0.002, "loss": 2.3376, "step": 317670 }, { "epoch": 1.2280620370799895, "grad_norm": 0.11292026191949844, "learning_rate": 0.002, "loss": 2.3362, "step": 317680 }, { "epoch": 1.2281006942833728, "grad_norm": 0.10036251693964005, "learning_rate": 0.002, "loss": 2.3388, "step": 317690 }, { "epoch": 1.228139351486756, "grad_norm": 0.10223782062530518, "learning_rate": 0.002, "loss": 2.3383, "step": 317700 }, { "epoch": 1.2281780086901393, "grad_norm": 0.1443469524383545, "learning_rate": 0.002, "loss": 2.3473, "step": 317710 }, { "epoch": 1.2282166658935225, "grad_norm": 0.13119737803936005, "learning_rate": 0.002, "loss": 2.3334, "step": 317720 }, { "epoch": 1.228255323096906, "grad_norm": 0.10402494668960571, "learning_rate": 0.002, "loss": 2.3484, "step": 317730 }, { "epoch": 1.2282939803002892, "grad_norm": 0.1034676656126976, "learning_rate": 0.002, "loss": 2.3278, "step": 317740 }, { "epoch": 1.2283326375036725, "grad_norm": 0.12188990414142609, "learning_rate": 0.002, "loss": 2.3406, "step": 317750 }, { "epoch": 1.2283712947070557, "grad_norm": 0.09709770977497101, "learning_rate": 0.002, "loss": 2.351, "step": 317760 }, { "epoch": 1.228409951910439, "grad_norm": 0.1213182806968689, "learning_rate": 0.002, "loss": 2.3475, "step": 317770 }, { "epoch": 1.2284486091138223, "grad_norm": 0.1171458512544632, "learning_rate": 0.002, "loss": 2.3521, "step": 317780 }, { "epoch": 1.2284872663172055, "grad_norm": 0.09407136589288712, "learning_rate": 0.002, "loss": 2.3224, "step": 317790 }, { "epoch": 1.2285259235205888, "grad_norm": 0.09715291857719421, "learning_rate": 0.002, "loss": 2.3538, "step": 317800 }, { "epoch": 1.228564580723972, "grad_norm": 0.12914592027664185, "learning_rate": 0.002, "loss": 2.3377, "step": 317810 }, { "epoch": 1.2286032379273553, "grad_norm": 0.09152337163686752, "learning_rate": 0.002, "loss": 2.3402, "step": 317820 }, { "epoch": 1.2286418951307387, "grad_norm": 0.10001769661903381, "learning_rate": 0.002, "loss": 2.3485, "step": 317830 }, { "epoch": 1.228680552334122, "grad_norm": 0.10625078529119492, "learning_rate": 0.002, "loss": 2.3226, "step": 317840 }, { "epoch": 1.2287192095375052, "grad_norm": 0.09744622558355331, "learning_rate": 0.002, "loss": 2.3292, "step": 317850 }, { "epoch": 1.2287578667408885, "grad_norm": 0.10693547129631042, "learning_rate": 0.002, "loss": 2.3547, "step": 317860 }, { "epoch": 1.2287965239442717, "grad_norm": 0.11540830135345459, "learning_rate": 0.002, "loss": 2.3353, "step": 317870 }, { "epoch": 1.228835181147655, "grad_norm": 0.11118239164352417, "learning_rate": 0.002, "loss": 2.3195, "step": 317880 }, { "epoch": 1.2288738383510383, "grad_norm": 0.09778404235839844, "learning_rate": 0.002, "loss": 2.3383, "step": 317890 }, { "epoch": 1.2289124955544217, "grad_norm": 0.09472370147705078, "learning_rate": 0.002, "loss": 2.3373, "step": 317900 }, { "epoch": 1.228951152757805, "grad_norm": 0.09750627726316452, "learning_rate": 0.002, "loss": 2.3188, "step": 317910 }, { "epoch": 1.2289898099611882, "grad_norm": 0.11595270037651062, "learning_rate": 0.002, "loss": 2.3316, "step": 317920 }, { "epoch": 1.2290284671645715, "grad_norm": 0.12342119961977005, "learning_rate": 0.002, "loss": 2.3404, "step": 317930 }, { "epoch": 1.2290671243679547, "grad_norm": 0.09711139649152756, "learning_rate": 0.002, "loss": 2.3459, "step": 317940 }, { "epoch": 1.229105781571338, "grad_norm": 0.09201836585998535, "learning_rate": 0.002, "loss": 2.3556, "step": 317950 }, { "epoch": 1.2291444387747212, "grad_norm": 0.11474625766277313, "learning_rate": 0.002, "loss": 2.3349, "step": 317960 }, { "epoch": 1.2291830959781045, "grad_norm": 0.10597923398017883, "learning_rate": 0.002, "loss": 2.3378, "step": 317970 }, { "epoch": 1.2292217531814877, "grad_norm": 0.10793079435825348, "learning_rate": 0.002, "loss": 2.3295, "step": 317980 }, { "epoch": 1.229260410384871, "grad_norm": 0.10415330529212952, "learning_rate": 0.002, "loss": 2.3427, "step": 317990 }, { "epoch": 1.2292990675882545, "grad_norm": 0.213987335562706, "learning_rate": 0.002, "loss": 2.3403, "step": 318000 }, { "epoch": 1.2293377247916377, "grad_norm": 0.09513404965400696, "learning_rate": 0.002, "loss": 2.3486, "step": 318010 }, { "epoch": 1.229376381995021, "grad_norm": 0.10263754427433014, "learning_rate": 0.002, "loss": 2.3253, "step": 318020 }, { "epoch": 1.2294150391984042, "grad_norm": 0.10201618075370789, "learning_rate": 0.002, "loss": 2.344, "step": 318030 }, { "epoch": 1.2294536964017875, "grad_norm": 0.10374777764081955, "learning_rate": 0.002, "loss": 2.3477, "step": 318040 }, { "epoch": 1.2294923536051707, "grad_norm": 0.11623865365982056, "learning_rate": 0.002, "loss": 2.3396, "step": 318050 }, { "epoch": 1.229531010808554, "grad_norm": 0.09610338509082794, "learning_rate": 0.002, "loss": 2.3304, "step": 318060 }, { "epoch": 1.2295696680119375, "grad_norm": 0.10656408220529556, "learning_rate": 0.002, "loss": 2.3387, "step": 318070 }, { "epoch": 1.2296083252153207, "grad_norm": 0.08608821034431458, "learning_rate": 0.002, "loss": 2.3562, "step": 318080 }, { "epoch": 1.229646982418704, "grad_norm": 0.11922752857208252, "learning_rate": 0.002, "loss": 2.3278, "step": 318090 }, { "epoch": 1.2296856396220872, "grad_norm": 0.1097605973482132, "learning_rate": 0.002, "loss": 2.3376, "step": 318100 }, { "epoch": 1.2297242968254705, "grad_norm": 0.10531909018754959, "learning_rate": 0.002, "loss": 2.3229, "step": 318110 }, { "epoch": 1.2297629540288537, "grad_norm": 0.11432632803916931, "learning_rate": 0.002, "loss": 2.3426, "step": 318120 }, { "epoch": 1.229801611232237, "grad_norm": 0.09751730412244797, "learning_rate": 0.002, "loss": 2.3428, "step": 318130 }, { "epoch": 1.2298402684356202, "grad_norm": 0.11484427750110626, "learning_rate": 0.002, "loss": 2.3468, "step": 318140 }, { "epoch": 1.2298789256390035, "grad_norm": 0.1034855917096138, "learning_rate": 0.002, "loss": 2.3391, "step": 318150 }, { "epoch": 1.229917582842387, "grad_norm": 0.09148947149515152, "learning_rate": 0.002, "loss": 2.3418, "step": 318160 }, { "epoch": 1.2299562400457702, "grad_norm": 0.10319654643535614, "learning_rate": 0.002, "loss": 2.3331, "step": 318170 }, { "epoch": 1.2299948972491535, "grad_norm": 0.10570099204778671, "learning_rate": 0.002, "loss": 2.3348, "step": 318180 }, { "epoch": 1.2300335544525367, "grad_norm": 0.08839817345142365, "learning_rate": 0.002, "loss": 2.3322, "step": 318190 }, { "epoch": 1.23007221165592, "grad_norm": 0.09677153825759888, "learning_rate": 0.002, "loss": 2.3287, "step": 318200 }, { "epoch": 1.2301108688593032, "grad_norm": 0.11502538621425629, "learning_rate": 0.002, "loss": 2.3182, "step": 318210 }, { "epoch": 1.2301495260626865, "grad_norm": 0.11625374108552933, "learning_rate": 0.002, "loss": 2.359, "step": 318220 }, { "epoch": 1.2301881832660697, "grad_norm": 0.10126259177923203, "learning_rate": 0.002, "loss": 2.3341, "step": 318230 }, { "epoch": 1.2302268404694532, "grad_norm": 0.100708968937397, "learning_rate": 0.002, "loss": 2.3323, "step": 318240 }, { "epoch": 1.2302654976728364, "grad_norm": 0.10420756787061691, "learning_rate": 0.002, "loss": 2.3435, "step": 318250 }, { "epoch": 1.2303041548762197, "grad_norm": 0.11100134998559952, "learning_rate": 0.002, "loss": 2.3358, "step": 318260 }, { "epoch": 1.230342812079603, "grad_norm": 0.12913987040519714, "learning_rate": 0.002, "loss": 2.3376, "step": 318270 }, { "epoch": 1.2303814692829862, "grad_norm": 0.09815038740634918, "learning_rate": 0.002, "loss": 2.3365, "step": 318280 }, { "epoch": 1.2304201264863694, "grad_norm": 0.09525801986455917, "learning_rate": 0.002, "loss": 2.3295, "step": 318290 }, { "epoch": 1.2304587836897527, "grad_norm": 0.0997467041015625, "learning_rate": 0.002, "loss": 2.3395, "step": 318300 }, { "epoch": 1.230497440893136, "grad_norm": 0.10505304485559464, "learning_rate": 0.002, "loss": 2.3469, "step": 318310 }, { "epoch": 1.2305360980965192, "grad_norm": 0.11818020790815353, "learning_rate": 0.002, "loss": 2.3297, "step": 318320 }, { "epoch": 1.2305747552999027, "grad_norm": 0.12198269367218018, "learning_rate": 0.002, "loss": 2.3355, "step": 318330 }, { "epoch": 1.230613412503286, "grad_norm": 0.10771975666284561, "learning_rate": 0.002, "loss": 2.3446, "step": 318340 }, { "epoch": 1.2306520697066692, "grad_norm": 0.10391145944595337, "learning_rate": 0.002, "loss": 2.3344, "step": 318350 }, { "epoch": 1.2306907269100524, "grad_norm": 0.10317855328321457, "learning_rate": 0.002, "loss": 2.3278, "step": 318360 }, { "epoch": 1.2307293841134357, "grad_norm": 0.11238475143909454, "learning_rate": 0.002, "loss": 2.3328, "step": 318370 }, { "epoch": 1.230768041316819, "grad_norm": 0.10218264162540436, "learning_rate": 0.002, "loss": 2.3277, "step": 318380 }, { "epoch": 1.2308066985202022, "grad_norm": 0.10200051963329315, "learning_rate": 0.002, "loss": 2.33, "step": 318390 }, { "epoch": 1.2308453557235854, "grad_norm": 0.10236311703920364, "learning_rate": 0.002, "loss": 2.3185, "step": 318400 }, { "epoch": 1.230884012926969, "grad_norm": 0.10305205732584, "learning_rate": 0.002, "loss": 2.3396, "step": 318410 }, { "epoch": 1.2309226701303522, "grad_norm": 0.08705495297908783, "learning_rate": 0.002, "loss": 2.3478, "step": 318420 }, { "epoch": 1.2309613273337354, "grad_norm": 0.14205306768417358, "learning_rate": 0.002, "loss": 2.3296, "step": 318430 }, { "epoch": 1.2309999845371187, "grad_norm": 0.1121765598654747, "learning_rate": 0.002, "loss": 2.3456, "step": 318440 }, { "epoch": 1.231038641740502, "grad_norm": 0.09126516431570053, "learning_rate": 0.002, "loss": 2.3412, "step": 318450 }, { "epoch": 1.2310772989438852, "grad_norm": 0.13235266506671906, "learning_rate": 0.002, "loss": 2.3353, "step": 318460 }, { "epoch": 1.2311159561472684, "grad_norm": 0.10639721155166626, "learning_rate": 0.002, "loss": 2.3374, "step": 318470 }, { "epoch": 1.2311546133506517, "grad_norm": 0.15126991271972656, "learning_rate": 0.002, "loss": 2.3368, "step": 318480 }, { "epoch": 1.231193270554035, "grad_norm": 0.116932213306427, "learning_rate": 0.002, "loss": 2.3269, "step": 318490 }, { "epoch": 1.2312319277574184, "grad_norm": 0.10263022780418396, "learning_rate": 0.002, "loss": 2.3321, "step": 318500 }, { "epoch": 1.2312705849608017, "grad_norm": 0.09604064375162125, "learning_rate": 0.002, "loss": 2.3355, "step": 318510 }, { "epoch": 1.231309242164185, "grad_norm": 0.1132984459400177, "learning_rate": 0.002, "loss": 2.3465, "step": 318520 }, { "epoch": 1.2313478993675682, "grad_norm": 0.10443414002656937, "learning_rate": 0.002, "loss": 2.3191, "step": 318530 }, { "epoch": 1.2313865565709514, "grad_norm": 0.12972725927829742, "learning_rate": 0.002, "loss": 2.3376, "step": 318540 }, { "epoch": 1.2314252137743347, "grad_norm": 0.10199279338121414, "learning_rate": 0.002, "loss": 2.3296, "step": 318550 }, { "epoch": 1.231463870977718, "grad_norm": 0.10174122452735901, "learning_rate": 0.002, "loss": 2.3438, "step": 318560 }, { "epoch": 1.2315025281811012, "grad_norm": 0.10849396884441376, "learning_rate": 0.002, "loss": 2.3339, "step": 318570 }, { "epoch": 1.2315411853844846, "grad_norm": 0.09650886058807373, "learning_rate": 0.002, "loss": 2.3331, "step": 318580 }, { "epoch": 1.231579842587868, "grad_norm": 0.09846727550029755, "learning_rate": 0.002, "loss": 2.3392, "step": 318590 }, { "epoch": 1.2316184997912512, "grad_norm": 0.10801167041063309, "learning_rate": 0.002, "loss": 2.3291, "step": 318600 }, { "epoch": 1.2316571569946344, "grad_norm": 0.1104314848780632, "learning_rate": 0.002, "loss": 2.3378, "step": 318610 }, { "epoch": 1.2316958141980177, "grad_norm": 0.11098235100507736, "learning_rate": 0.002, "loss": 2.336, "step": 318620 }, { "epoch": 1.231734471401401, "grad_norm": 0.09050797671079636, "learning_rate": 0.002, "loss": 2.333, "step": 318630 }, { "epoch": 1.2317731286047842, "grad_norm": 0.12480476498603821, "learning_rate": 0.002, "loss": 2.34, "step": 318640 }, { "epoch": 1.2318117858081674, "grad_norm": 0.09416493028402328, "learning_rate": 0.002, "loss": 2.3268, "step": 318650 }, { "epoch": 1.2318504430115507, "grad_norm": 0.10407298058271408, "learning_rate": 0.002, "loss": 2.3525, "step": 318660 }, { "epoch": 1.2318891002149341, "grad_norm": 0.10351002961397171, "learning_rate": 0.002, "loss": 2.3285, "step": 318670 }, { "epoch": 1.2319277574183174, "grad_norm": 0.12359218299388885, "learning_rate": 0.002, "loss": 2.3285, "step": 318680 }, { "epoch": 1.2319664146217006, "grad_norm": 0.10347049683332443, "learning_rate": 0.002, "loss": 2.3494, "step": 318690 }, { "epoch": 1.232005071825084, "grad_norm": 0.11429308354854584, "learning_rate": 0.002, "loss": 2.3391, "step": 318700 }, { "epoch": 1.2320437290284671, "grad_norm": 0.0998351201415062, "learning_rate": 0.002, "loss": 2.3432, "step": 318710 }, { "epoch": 1.2320823862318504, "grad_norm": 0.09996669739484787, "learning_rate": 0.002, "loss": 2.3356, "step": 318720 }, { "epoch": 1.2321210434352337, "grad_norm": 0.11034220457077026, "learning_rate": 0.002, "loss": 2.3351, "step": 318730 }, { "epoch": 1.232159700638617, "grad_norm": 0.10390716046094894, "learning_rate": 0.002, "loss": 2.3361, "step": 318740 }, { "epoch": 1.2321983578420004, "grad_norm": 0.11131299287080765, "learning_rate": 0.002, "loss": 2.342, "step": 318750 }, { "epoch": 1.2322370150453836, "grad_norm": 0.10226169228553772, "learning_rate": 0.002, "loss": 2.3332, "step": 318760 }, { "epoch": 1.2322756722487669, "grad_norm": 0.11132095754146576, "learning_rate": 0.002, "loss": 2.3302, "step": 318770 }, { "epoch": 1.2323143294521501, "grad_norm": 0.10304592549800873, "learning_rate": 0.002, "loss": 2.3331, "step": 318780 }, { "epoch": 1.2323529866555334, "grad_norm": 0.10334457457065582, "learning_rate": 0.002, "loss": 2.3292, "step": 318790 }, { "epoch": 1.2323916438589166, "grad_norm": 0.09750176966190338, "learning_rate": 0.002, "loss": 2.3468, "step": 318800 }, { "epoch": 1.2324303010623, "grad_norm": 0.1015402153134346, "learning_rate": 0.002, "loss": 2.3314, "step": 318810 }, { "epoch": 1.2324689582656831, "grad_norm": 0.09915605187416077, "learning_rate": 0.002, "loss": 2.3266, "step": 318820 }, { "epoch": 1.2325076154690664, "grad_norm": 0.14046163856983185, "learning_rate": 0.002, "loss": 2.3233, "step": 318830 }, { "epoch": 1.2325462726724499, "grad_norm": 0.09245661646127701, "learning_rate": 0.002, "loss": 2.3474, "step": 318840 }, { "epoch": 1.2325849298758331, "grad_norm": 0.1208549439907074, "learning_rate": 0.002, "loss": 2.3427, "step": 318850 }, { "epoch": 1.2326235870792164, "grad_norm": 0.11933174729347229, "learning_rate": 0.002, "loss": 2.3407, "step": 318860 }, { "epoch": 1.2326622442825996, "grad_norm": 0.10953730344772339, "learning_rate": 0.002, "loss": 2.3258, "step": 318870 }, { "epoch": 1.2327009014859829, "grad_norm": 0.09889823198318481, "learning_rate": 0.002, "loss": 2.3384, "step": 318880 }, { "epoch": 1.2327395586893661, "grad_norm": 0.11832044273614883, "learning_rate": 0.002, "loss": 2.3405, "step": 318890 }, { "epoch": 1.2327782158927494, "grad_norm": 0.09988594800233841, "learning_rate": 0.002, "loss": 2.3369, "step": 318900 }, { "epoch": 1.2328168730961326, "grad_norm": 0.11520710587501526, "learning_rate": 0.002, "loss": 2.3353, "step": 318910 }, { "epoch": 1.2328555302995161, "grad_norm": 0.11049897968769073, "learning_rate": 0.002, "loss": 2.3311, "step": 318920 }, { "epoch": 1.2328941875028994, "grad_norm": 0.09919171780347824, "learning_rate": 0.002, "loss": 2.3402, "step": 318930 }, { "epoch": 1.2329328447062826, "grad_norm": 0.11593607813119888, "learning_rate": 0.002, "loss": 2.3328, "step": 318940 }, { "epoch": 1.2329715019096659, "grad_norm": 0.10163404792547226, "learning_rate": 0.002, "loss": 2.347, "step": 318950 }, { "epoch": 1.2330101591130491, "grad_norm": 0.12897267937660217, "learning_rate": 0.002, "loss": 2.3268, "step": 318960 }, { "epoch": 1.2330488163164324, "grad_norm": 0.09961461275815964, "learning_rate": 0.002, "loss": 2.3387, "step": 318970 }, { "epoch": 1.2330874735198156, "grad_norm": 0.11196241527795792, "learning_rate": 0.002, "loss": 2.335, "step": 318980 }, { "epoch": 1.2331261307231989, "grad_norm": 0.10454481095075607, "learning_rate": 0.002, "loss": 2.3414, "step": 318990 }, { "epoch": 1.2331647879265821, "grad_norm": 0.10805007815361023, "learning_rate": 0.002, "loss": 2.3468, "step": 319000 }, { "epoch": 1.2332034451299656, "grad_norm": 0.14677643775939941, "learning_rate": 0.002, "loss": 2.3448, "step": 319010 }, { "epoch": 1.2332421023333489, "grad_norm": 0.09992588311433792, "learning_rate": 0.002, "loss": 2.3295, "step": 319020 }, { "epoch": 1.233280759536732, "grad_norm": 0.09757273644208908, "learning_rate": 0.002, "loss": 2.3359, "step": 319030 }, { "epoch": 1.2333194167401154, "grad_norm": 0.10164768248796463, "learning_rate": 0.002, "loss": 2.3307, "step": 319040 }, { "epoch": 1.2333580739434986, "grad_norm": 0.10568263381719589, "learning_rate": 0.002, "loss": 2.3202, "step": 319050 }, { "epoch": 1.2333967311468819, "grad_norm": 0.1015145406126976, "learning_rate": 0.002, "loss": 2.3377, "step": 319060 }, { "epoch": 1.2334353883502651, "grad_norm": 0.11714985966682434, "learning_rate": 0.002, "loss": 2.3295, "step": 319070 }, { "epoch": 1.2334740455536486, "grad_norm": 0.10222362726926804, "learning_rate": 0.002, "loss": 2.3286, "step": 319080 }, { "epoch": 1.2335127027570318, "grad_norm": 0.1122443675994873, "learning_rate": 0.002, "loss": 2.3438, "step": 319090 }, { "epoch": 1.233551359960415, "grad_norm": 0.09449625760316849, "learning_rate": 0.002, "loss": 2.3244, "step": 319100 }, { "epoch": 1.2335900171637983, "grad_norm": 0.11275065690279007, "learning_rate": 0.002, "loss": 2.325, "step": 319110 }, { "epoch": 1.2336286743671816, "grad_norm": 0.09365367144346237, "learning_rate": 0.002, "loss": 2.3186, "step": 319120 }, { "epoch": 1.2336673315705649, "grad_norm": 0.09719277918338776, "learning_rate": 0.002, "loss": 2.3303, "step": 319130 }, { "epoch": 1.233705988773948, "grad_norm": 0.11273623257875443, "learning_rate": 0.002, "loss": 2.3392, "step": 319140 }, { "epoch": 1.2337446459773314, "grad_norm": 0.11690273135900497, "learning_rate": 0.002, "loss": 2.3191, "step": 319150 }, { "epoch": 1.2337833031807146, "grad_norm": 0.10188272595405579, "learning_rate": 0.002, "loss": 2.3278, "step": 319160 }, { "epoch": 1.2338219603840979, "grad_norm": 0.09970256686210632, "learning_rate": 0.002, "loss": 2.335, "step": 319170 }, { "epoch": 1.2338606175874813, "grad_norm": 0.112132728099823, "learning_rate": 0.002, "loss": 2.3215, "step": 319180 }, { "epoch": 1.2338992747908646, "grad_norm": 0.10451371222734451, "learning_rate": 0.002, "loss": 2.3388, "step": 319190 }, { "epoch": 1.2339379319942478, "grad_norm": 0.11047357320785522, "learning_rate": 0.002, "loss": 2.3449, "step": 319200 }, { "epoch": 1.233976589197631, "grad_norm": 0.10152693837881088, "learning_rate": 0.002, "loss": 2.3468, "step": 319210 }, { "epoch": 1.2340152464010143, "grad_norm": 0.10769753903150558, "learning_rate": 0.002, "loss": 2.3349, "step": 319220 }, { "epoch": 1.2340539036043976, "grad_norm": 0.10410183668136597, "learning_rate": 0.002, "loss": 2.3462, "step": 319230 }, { "epoch": 1.2340925608077808, "grad_norm": 0.09276141226291656, "learning_rate": 0.002, "loss": 2.3496, "step": 319240 }, { "epoch": 1.2341312180111643, "grad_norm": 0.09850878268480301, "learning_rate": 0.002, "loss": 2.3304, "step": 319250 }, { "epoch": 1.2341698752145476, "grad_norm": 0.10244489461183548, "learning_rate": 0.002, "loss": 2.3422, "step": 319260 }, { "epoch": 1.2342085324179308, "grad_norm": 0.11640128493309021, "learning_rate": 0.002, "loss": 2.3303, "step": 319270 }, { "epoch": 1.234247189621314, "grad_norm": 0.1207994893193245, "learning_rate": 0.002, "loss": 2.332, "step": 319280 }, { "epoch": 1.2342858468246973, "grad_norm": 0.10476819425821304, "learning_rate": 0.002, "loss": 2.3323, "step": 319290 }, { "epoch": 1.2343245040280806, "grad_norm": 0.10477297753095627, "learning_rate": 0.002, "loss": 2.3292, "step": 319300 }, { "epoch": 1.2343631612314638, "grad_norm": 0.10364378988742828, "learning_rate": 0.002, "loss": 2.3273, "step": 319310 }, { "epoch": 1.234401818434847, "grad_norm": 0.10070034861564636, "learning_rate": 0.002, "loss": 2.3301, "step": 319320 }, { "epoch": 1.2344404756382303, "grad_norm": 0.1035122498869896, "learning_rate": 0.002, "loss": 2.3373, "step": 319330 }, { "epoch": 1.2344791328416136, "grad_norm": 0.11822505295276642, "learning_rate": 0.002, "loss": 2.3341, "step": 319340 }, { "epoch": 1.234517790044997, "grad_norm": 0.10413868725299835, "learning_rate": 0.002, "loss": 2.3432, "step": 319350 }, { "epoch": 1.2345564472483803, "grad_norm": 0.11653405427932739, "learning_rate": 0.002, "loss": 2.3399, "step": 319360 }, { "epoch": 1.2345951044517636, "grad_norm": 0.09522883594036102, "learning_rate": 0.002, "loss": 2.3356, "step": 319370 }, { "epoch": 1.2346337616551468, "grad_norm": 0.13596872985363007, "learning_rate": 0.002, "loss": 2.3325, "step": 319380 }, { "epoch": 1.23467241885853, "grad_norm": 0.09495305269956589, "learning_rate": 0.002, "loss": 2.3445, "step": 319390 }, { "epoch": 1.2347110760619133, "grad_norm": 0.09859729558229446, "learning_rate": 0.002, "loss": 2.3335, "step": 319400 }, { "epoch": 1.2347497332652966, "grad_norm": 0.09662479907274246, "learning_rate": 0.002, "loss": 2.3275, "step": 319410 }, { "epoch": 1.23478839046868, "grad_norm": 0.11425012350082397, "learning_rate": 0.002, "loss": 2.3411, "step": 319420 }, { "epoch": 1.2348270476720633, "grad_norm": 0.12249550223350525, "learning_rate": 0.002, "loss": 2.3467, "step": 319430 }, { "epoch": 1.2348657048754466, "grad_norm": 0.09997022151947021, "learning_rate": 0.002, "loss": 2.3316, "step": 319440 }, { "epoch": 1.2349043620788298, "grad_norm": 0.11657397449016571, "learning_rate": 0.002, "loss": 2.3381, "step": 319450 }, { "epoch": 1.234943019282213, "grad_norm": 0.08549603074789047, "learning_rate": 0.002, "loss": 2.3437, "step": 319460 }, { "epoch": 1.2349816764855963, "grad_norm": 0.10350823402404785, "learning_rate": 0.002, "loss": 2.3406, "step": 319470 }, { "epoch": 1.2350203336889796, "grad_norm": 0.10171400755643845, "learning_rate": 0.002, "loss": 2.3364, "step": 319480 }, { "epoch": 1.2350589908923628, "grad_norm": 0.1119752749800682, "learning_rate": 0.002, "loss": 2.3447, "step": 319490 }, { "epoch": 1.235097648095746, "grad_norm": 0.11628837138414383, "learning_rate": 0.002, "loss": 2.3283, "step": 319500 }, { "epoch": 1.2351363052991293, "grad_norm": 0.10625061392784119, "learning_rate": 0.002, "loss": 2.3328, "step": 319510 }, { "epoch": 1.2351749625025128, "grad_norm": 0.09767068922519684, "learning_rate": 0.002, "loss": 2.3076, "step": 319520 }, { "epoch": 1.235213619705896, "grad_norm": 0.10150033980607986, "learning_rate": 0.002, "loss": 2.3368, "step": 319530 }, { "epoch": 1.2352522769092793, "grad_norm": 0.10505492240190506, "learning_rate": 0.002, "loss": 2.3301, "step": 319540 }, { "epoch": 1.2352909341126626, "grad_norm": 0.09431470930576324, "learning_rate": 0.002, "loss": 2.3425, "step": 319550 }, { "epoch": 1.2353295913160458, "grad_norm": 0.0902949795126915, "learning_rate": 0.002, "loss": 2.3179, "step": 319560 }, { "epoch": 1.235368248519429, "grad_norm": 0.09864936023950577, "learning_rate": 0.002, "loss": 2.3406, "step": 319570 }, { "epoch": 1.2354069057228123, "grad_norm": 0.12213067710399628, "learning_rate": 0.002, "loss": 2.3341, "step": 319580 }, { "epoch": 1.2354455629261958, "grad_norm": 0.0943695455789566, "learning_rate": 0.002, "loss": 2.3327, "step": 319590 }, { "epoch": 1.235484220129579, "grad_norm": 0.12795186042785645, "learning_rate": 0.002, "loss": 2.3299, "step": 319600 }, { "epoch": 1.2355228773329623, "grad_norm": 0.1095963716506958, "learning_rate": 0.002, "loss": 2.3337, "step": 319610 }, { "epoch": 1.2355615345363455, "grad_norm": 0.10382067412137985, "learning_rate": 0.002, "loss": 2.3371, "step": 319620 }, { "epoch": 1.2356001917397288, "grad_norm": 0.11466068774461746, "learning_rate": 0.002, "loss": 2.3281, "step": 319630 }, { "epoch": 1.235638848943112, "grad_norm": 0.1038190945982933, "learning_rate": 0.002, "loss": 2.3248, "step": 319640 }, { "epoch": 1.2356775061464953, "grad_norm": 0.13916151225566864, "learning_rate": 0.002, "loss": 2.3447, "step": 319650 }, { "epoch": 1.2357161633498785, "grad_norm": 0.14593204855918884, "learning_rate": 0.002, "loss": 2.3328, "step": 319660 }, { "epoch": 1.2357548205532618, "grad_norm": 0.22053200006484985, "learning_rate": 0.002, "loss": 2.3471, "step": 319670 }, { "epoch": 1.235793477756645, "grad_norm": 0.10943808406591415, "learning_rate": 0.002, "loss": 2.3565, "step": 319680 }, { "epoch": 1.2358321349600285, "grad_norm": 0.09973354637622833, "learning_rate": 0.002, "loss": 2.3356, "step": 319690 }, { "epoch": 1.2358707921634118, "grad_norm": 0.09205275028944016, "learning_rate": 0.002, "loss": 2.3269, "step": 319700 }, { "epoch": 1.235909449366795, "grad_norm": 0.1062823161482811, "learning_rate": 0.002, "loss": 2.3337, "step": 319710 }, { "epoch": 1.2359481065701783, "grad_norm": 0.10272687673568726, "learning_rate": 0.002, "loss": 2.3337, "step": 319720 }, { "epoch": 1.2359867637735615, "grad_norm": 0.1765047013759613, "learning_rate": 0.002, "loss": 2.3236, "step": 319730 }, { "epoch": 1.2360254209769448, "grad_norm": 0.10312442481517792, "learning_rate": 0.002, "loss": 2.3311, "step": 319740 }, { "epoch": 1.236064078180328, "grad_norm": 0.10451663285493851, "learning_rate": 0.002, "loss": 2.3282, "step": 319750 }, { "epoch": 1.2361027353837115, "grad_norm": 0.11653146147727966, "learning_rate": 0.002, "loss": 2.3286, "step": 319760 }, { "epoch": 1.2361413925870948, "grad_norm": 0.11353792995214462, "learning_rate": 0.002, "loss": 2.3421, "step": 319770 }, { "epoch": 1.236180049790478, "grad_norm": 0.10471892356872559, "learning_rate": 0.002, "loss": 2.3546, "step": 319780 }, { "epoch": 1.2362187069938613, "grad_norm": 0.11989996582269669, "learning_rate": 0.002, "loss": 2.3404, "step": 319790 }, { "epoch": 1.2362573641972445, "grad_norm": 0.09007346630096436, "learning_rate": 0.002, "loss": 2.3307, "step": 319800 }, { "epoch": 1.2362960214006278, "grad_norm": 0.10456572473049164, "learning_rate": 0.002, "loss": 2.3281, "step": 319810 }, { "epoch": 1.236334678604011, "grad_norm": 0.11958345770835876, "learning_rate": 0.002, "loss": 2.326, "step": 319820 }, { "epoch": 1.2363733358073943, "grad_norm": 0.10415913909673691, "learning_rate": 0.002, "loss": 2.3318, "step": 319830 }, { "epoch": 1.2364119930107775, "grad_norm": 0.10972173511981964, "learning_rate": 0.002, "loss": 2.3277, "step": 319840 }, { "epoch": 1.2364506502141608, "grad_norm": 0.12542030215263367, "learning_rate": 0.002, "loss": 2.3265, "step": 319850 }, { "epoch": 1.2364893074175443, "grad_norm": 0.10410010814666748, "learning_rate": 0.002, "loss": 2.3363, "step": 319860 }, { "epoch": 1.2365279646209275, "grad_norm": 0.10815926641225815, "learning_rate": 0.002, "loss": 2.3342, "step": 319870 }, { "epoch": 1.2365666218243108, "grad_norm": 0.11116991937160492, "learning_rate": 0.002, "loss": 2.3387, "step": 319880 }, { "epoch": 1.236605279027694, "grad_norm": 0.11590959131717682, "learning_rate": 0.002, "loss": 2.3197, "step": 319890 }, { "epoch": 1.2366439362310773, "grad_norm": 0.22155319154262543, "learning_rate": 0.002, "loss": 2.3377, "step": 319900 }, { "epoch": 1.2366825934344605, "grad_norm": 0.11598339676856995, "learning_rate": 0.002, "loss": 2.3304, "step": 319910 }, { "epoch": 1.2367212506378438, "grad_norm": 0.11123668402433395, "learning_rate": 0.002, "loss": 2.3292, "step": 319920 }, { "epoch": 1.2367599078412272, "grad_norm": 0.1021030843257904, "learning_rate": 0.002, "loss": 2.3421, "step": 319930 }, { "epoch": 1.2367985650446105, "grad_norm": 0.12723006308078766, "learning_rate": 0.002, "loss": 2.3516, "step": 319940 }, { "epoch": 1.2368372222479938, "grad_norm": 0.10411926358938217, "learning_rate": 0.002, "loss": 2.3286, "step": 319950 }, { "epoch": 1.236875879451377, "grad_norm": 0.11432521045207977, "learning_rate": 0.002, "loss": 2.3416, "step": 319960 }, { "epoch": 1.2369145366547603, "grad_norm": 0.10154687613248825, "learning_rate": 0.002, "loss": 2.3262, "step": 319970 }, { "epoch": 1.2369531938581435, "grad_norm": 0.10039026290178299, "learning_rate": 0.002, "loss": 2.3422, "step": 319980 }, { "epoch": 1.2369918510615268, "grad_norm": 0.10765459388494492, "learning_rate": 0.002, "loss": 2.3357, "step": 319990 }, { "epoch": 1.23703050826491, "grad_norm": 0.113979272544384, "learning_rate": 0.002, "loss": 2.3362, "step": 320000 }, { "epoch": 1.2370691654682933, "grad_norm": 0.096729576587677, "learning_rate": 0.002, "loss": 2.3432, "step": 320010 }, { "epoch": 1.2371078226716765, "grad_norm": 0.10932206362485886, "learning_rate": 0.002, "loss": 2.3317, "step": 320020 }, { "epoch": 1.23714647987506, "grad_norm": 0.10438414663076401, "learning_rate": 0.002, "loss": 2.3304, "step": 320030 }, { "epoch": 1.2371851370784432, "grad_norm": 0.09965009987354279, "learning_rate": 0.002, "loss": 2.3267, "step": 320040 }, { "epoch": 1.2372237942818265, "grad_norm": 0.10202410072088242, "learning_rate": 0.002, "loss": 2.3433, "step": 320050 }, { "epoch": 1.2372624514852097, "grad_norm": 0.10439611971378326, "learning_rate": 0.002, "loss": 2.3366, "step": 320060 }, { "epoch": 1.237301108688593, "grad_norm": 0.10762651264667511, "learning_rate": 0.002, "loss": 2.3298, "step": 320070 }, { "epoch": 1.2373397658919763, "grad_norm": 0.10113856941461563, "learning_rate": 0.002, "loss": 2.3427, "step": 320080 }, { "epoch": 1.2373784230953595, "grad_norm": 0.09861285239458084, "learning_rate": 0.002, "loss": 2.3335, "step": 320090 }, { "epoch": 1.237417080298743, "grad_norm": 0.12643854320049286, "learning_rate": 0.002, "loss": 2.3427, "step": 320100 }, { "epoch": 1.2374557375021262, "grad_norm": 0.10227780789136887, "learning_rate": 0.002, "loss": 2.3293, "step": 320110 }, { "epoch": 1.2374943947055095, "grad_norm": 0.10449239611625671, "learning_rate": 0.002, "loss": 2.3292, "step": 320120 }, { "epoch": 1.2375330519088927, "grad_norm": 0.10152996331453323, "learning_rate": 0.002, "loss": 2.3285, "step": 320130 }, { "epoch": 1.237571709112276, "grad_norm": 0.11013568937778473, "learning_rate": 0.002, "loss": 2.3435, "step": 320140 }, { "epoch": 1.2376103663156592, "grad_norm": 0.12788760662078857, "learning_rate": 0.002, "loss": 2.3399, "step": 320150 }, { "epoch": 1.2376490235190425, "grad_norm": 0.27631428837776184, "learning_rate": 0.002, "loss": 2.3367, "step": 320160 }, { "epoch": 1.2376876807224257, "grad_norm": 0.1050380989909172, "learning_rate": 0.002, "loss": 2.3522, "step": 320170 }, { "epoch": 1.237726337925809, "grad_norm": 0.19374831020832062, "learning_rate": 0.002, "loss": 2.3431, "step": 320180 }, { "epoch": 1.2377649951291925, "grad_norm": 0.1197795569896698, "learning_rate": 0.002, "loss": 2.3316, "step": 320190 }, { "epoch": 1.2378036523325757, "grad_norm": 0.10829479247331619, "learning_rate": 0.002, "loss": 2.3521, "step": 320200 }, { "epoch": 1.237842309535959, "grad_norm": 0.11106017231941223, "learning_rate": 0.002, "loss": 2.3323, "step": 320210 }, { "epoch": 1.2378809667393422, "grad_norm": 0.10681232810020447, "learning_rate": 0.002, "loss": 2.3408, "step": 320220 }, { "epoch": 1.2379196239427255, "grad_norm": 0.09508251398801804, "learning_rate": 0.002, "loss": 2.3253, "step": 320230 }, { "epoch": 1.2379582811461087, "grad_norm": 0.10988068580627441, "learning_rate": 0.002, "loss": 2.3323, "step": 320240 }, { "epoch": 1.237996938349492, "grad_norm": 0.09734196215867996, "learning_rate": 0.002, "loss": 2.3327, "step": 320250 }, { "epoch": 1.2380355955528752, "grad_norm": 0.10664012283086777, "learning_rate": 0.002, "loss": 2.3426, "step": 320260 }, { "epoch": 1.2380742527562587, "grad_norm": 0.10162442177534103, "learning_rate": 0.002, "loss": 2.3304, "step": 320270 }, { "epoch": 1.238112909959642, "grad_norm": 0.11559383571147919, "learning_rate": 0.002, "loss": 2.3338, "step": 320280 }, { "epoch": 1.2381515671630252, "grad_norm": 0.10670052468776703, "learning_rate": 0.002, "loss": 2.331, "step": 320290 }, { "epoch": 1.2381902243664085, "grad_norm": 0.11281143128871918, "learning_rate": 0.002, "loss": 2.3308, "step": 320300 }, { "epoch": 1.2382288815697917, "grad_norm": 0.10445000231266022, "learning_rate": 0.002, "loss": 2.3237, "step": 320310 }, { "epoch": 1.238267538773175, "grad_norm": 0.1028844341635704, "learning_rate": 0.002, "loss": 2.3459, "step": 320320 }, { "epoch": 1.2383061959765582, "grad_norm": 0.10735354572534561, "learning_rate": 0.002, "loss": 2.3264, "step": 320330 }, { "epoch": 1.2383448531799415, "grad_norm": 0.10678750276565552, "learning_rate": 0.002, "loss": 2.3607, "step": 320340 }, { "epoch": 1.2383835103833247, "grad_norm": 0.09609895199537277, "learning_rate": 0.002, "loss": 2.3365, "step": 320350 }, { "epoch": 1.2384221675867082, "grad_norm": 0.10678284615278244, "learning_rate": 0.002, "loss": 2.3303, "step": 320360 }, { "epoch": 1.2384608247900915, "grad_norm": 0.10990513861179352, "learning_rate": 0.002, "loss": 2.3492, "step": 320370 }, { "epoch": 1.2384994819934747, "grad_norm": 0.12047272175550461, "learning_rate": 0.002, "loss": 2.3327, "step": 320380 }, { "epoch": 1.238538139196858, "grad_norm": 0.09940064698457718, "learning_rate": 0.002, "loss": 2.3386, "step": 320390 }, { "epoch": 1.2385767964002412, "grad_norm": 0.09595578163862228, "learning_rate": 0.002, "loss": 2.3385, "step": 320400 }, { "epoch": 1.2386154536036245, "grad_norm": 0.11717253923416138, "learning_rate": 0.002, "loss": 2.3443, "step": 320410 }, { "epoch": 1.2386541108070077, "grad_norm": 0.1013934463262558, "learning_rate": 0.002, "loss": 2.3299, "step": 320420 }, { "epoch": 1.238692768010391, "grad_norm": 0.1056397333741188, "learning_rate": 0.002, "loss": 2.3122, "step": 320430 }, { "epoch": 1.2387314252137744, "grad_norm": 0.11160103231668472, "learning_rate": 0.002, "loss": 2.332, "step": 320440 }, { "epoch": 1.2387700824171577, "grad_norm": 0.09870457649230957, "learning_rate": 0.002, "loss": 2.3423, "step": 320450 }, { "epoch": 1.238808739620541, "grad_norm": 0.10088534653186798, "learning_rate": 0.002, "loss": 2.3368, "step": 320460 }, { "epoch": 1.2388473968239242, "grad_norm": 0.09906332194805145, "learning_rate": 0.002, "loss": 2.3334, "step": 320470 }, { "epoch": 1.2388860540273074, "grad_norm": 0.10245811194181442, "learning_rate": 0.002, "loss": 2.3329, "step": 320480 }, { "epoch": 1.2389247112306907, "grad_norm": 0.11680164188146591, "learning_rate": 0.002, "loss": 2.3485, "step": 320490 }, { "epoch": 1.238963368434074, "grad_norm": 0.09580478072166443, "learning_rate": 0.002, "loss": 2.3374, "step": 320500 }, { "epoch": 1.2390020256374572, "grad_norm": 0.10380738228559494, "learning_rate": 0.002, "loss": 2.3443, "step": 320510 }, { "epoch": 1.2390406828408405, "grad_norm": 0.10249635577201843, "learning_rate": 0.002, "loss": 2.3492, "step": 320520 }, { "epoch": 1.239079340044224, "grad_norm": 0.11178512126207352, "learning_rate": 0.002, "loss": 2.3453, "step": 320530 }, { "epoch": 1.2391179972476072, "grad_norm": 0.10207261145114899, "learning_rate": 0.002, "loss": 2.327, "step": 320540 }, { "epoch": 1.2391566544509904, "grad_norm": 0.11807084828615189, "learning_rate": 0.002, "loss": 2.3353, "step": 320550 }, { "epoch": 1.2391953116543737, "grad_norm": 0.10369338095188141, "learning_rate": 0.002, "loss": 2.3453, "step": 320560 }, { "epoch": 1.239233968857757, "grad_norm": 0.09732040017843246, "learning_rate": 0.002, "loss": 2.3391, "step": 320570 }, { "epoch": 1.2392726260611402, "grad_norm": 0.10082051903009415, "learning_rate": 0.002, "loss": 2.3236, "step": 320580 }, { "epoch": 1.2393112832645234, "grad_norm": 0.10379715263843536, "learning_rate": 0.002, "loss": 2.3444, "step": 320590 }, { "epoch": 1.2393499404679067, "grad_norm": 0.10741373896598816, "learning_rate": 0.002, "loss": 2.3345, "step": 320600 }, { "epoch": 1.2393885976712902, "grad_norm": 0.0996238961815834, "learning_rate": 0.002, "loss": 2.3201, "step": 320610 }, { "epoch": 1.2394272548746734, "grad_norm": 0.10056623816490173, "learning_rate": 0.002, "loss": 2.3443, "step": 320620 }, { "epoch": 1.2394659120780567, "grad_norm": 0.11594576388597488, "learning_rate": 0.002, "loss": 2.3315, "step": 320630 }, { "epoch": 1.23950456928144, "grad_norm": 0.11052383482456207, "learning_rate": 0.002, "loss": 2.3318, "step": 320640 }, { "epoch": 1.2395432264848232, "grad_norm": 0.10528027266263962, "learning_rate": 0.002, "loss": 2.3335, "step": 320650 }, { "epoch": 1.2395818836882064, "grad_norm": 0.08858749270439148, "learning_rate": 0.002, "loss": 2.3319, "step": 320660 }, { "epoch": 1.2396205408915897, "grad_norm": 0.1268991082906723, "learning_rate": 0.002, "loss": 2.3428, "step": 320670 }, { "epoch": 1.239659198094973, "grad_norm": 0.10651437938213348, "learning_rate": 0.002, "loss": 2.3393, "step": 320680 }, { "epoch": 1.2396978552983562, "grad_norm": 0.10093619674444199, "learning_rate": 0.002, "loss": 2.3473, "step": 320690 }, { "epoch": 1.2397365125017397, "grad_norm": 0.10272887349128723, "learning_rate": 0.002, "loss": 2.3247, "step": 320700 }, { "epoch": 1.239775169705123, "grad_norm": 0.10681917518377304, "learning_rate": 0.002, "loss": 2.3322, "step": 320710 }, { "epoch": 1.2398138269085062, "grad_norm": 0.10045037418603897, "learning_rate": 0.002, "loss": 2.3346, "step": 320720 }, { "epoch": 1.2398524841118894, "grad_norm": 0.10670724511146545, "learning_rate": 0.002, "loss": 2.3371, "step": 320730 }, { "epoch": 1.2398911413152727, "grad_norm": 0.11286060512065887, "learning_rate": 0.002, "loss": 2.3352, "step": 320740 }, { "epoch": 1.239929798518656, "grad_norm": 0.09124636650085449, "learning_rate": 0.002, "loss": 2.323, "step": 320750 }, { "epoch": 1.2399684557220392, "grad_norm": 0.11720071732997894, "learning_rate": 0.002, "loss": 2.3324, "step": 320760 }, { "epoch": 1.2400071129254224, "grad_norm": 0.11184117943048477, "learning_rate": 0.002, "loss": 2.3372, "step": 320770 }, { "epoch": 1.240045770128806, "grad_norm": 0.09715303778648376, "learning_rate": 0.002, "loss": 2.3363, "step": 320780 }, { "epoch": 1.2400844273321892, "grad_norm": 0.10788849741220474, "learning_rate": 0.002, "loss": 2.3138, "step": 320790 }, { "epoch": 1.2401230845355724, "grad_norm": 0.10525120049715042, "learning_rate": 0.002, "loss": 2.3406, "step": 320800 }, { "epoch": 1.2401617417389557, "grad_norm": 0.10268529504537582, "learning_rate": 0.002, "loss": 2.3327, "step": 320810 }, { "epoch": 1.240200398942339, "grad_norm": 0.11665871739387512, "learning_rate": 0.002, "loss": 2.3401, "step": 320820 }, { "epoch": 1.2402390561457222, "grad_norm": 0.09270110726356506, "learning_rate": 0.002, "loss": 2.3351, "step": 320830 }, { "epoch": 1.2402777133491054, "grad_norm": 0.12644942104816437, "learning_rate": 0.002, "loss": 2.338, "step": 320840 }, { "epoch": 1.2403163705524887, "grad_norm": 0.1157117709517479, "learning_rate": 0.002, "loss": 2.3261, "step": 320850 }, { "epoch": 1.240355027755872, "grad_norm": 0.11809808015823364, "learning_rate": 0.002, "loss": 2.351, "step": 320860 }, { "epoch": 1.2403936849592554, "grad_norm": 0.114087775349617, "learning_rate": 0.002, "loss": 2.3286, "step": 320870 }, { "epoch": 1.2404323421626386, "grad_norm": 0.10386577993631363, "learning_rate": 0.002, "loss": 2.3294, "step": 320880 }, { "epoch": 1.240470999366022, "grad_norm": 0.11299306899309158, "learning_rate": 0.002, "loss": 2.3307, "step": 320890 }, { "epoch": 1.2405096565694052, "grad_norm": 0.09175659716129303, "learning_rate": 0.002, "loss": 2.3417, "step": 320900 }, { "epoch": 1.2405483137727884, "grad_norm": 0.1031114012002945, "learning_rate": 0.002, "loss": 2.3389, "step": 320910 }, { "epoch": 1.2405869709761717, "grad_norm": 0.13444626331329346, "learning_rate": 0.002, "loss": 2.3226, "step": 320920 }, { "epoch": 1.240625628179555, "grad_norm": 0.10885864496231079, "learning_rate": 0.002, "loss": 2.3372, "step": 320930 }, { "epoch": 1.2406642853829384, "grad_norm": 0.09645969420671463, "learning_rate": 0.002, "loss": 2.3418, "step": 320940 }, { "epoch": 1.2407029425863216, "grad_norm": 0.10317646712064743, "learning_rate": 0.002, "loss": 2.3357, "step": 320950 }, { "epoch": 1.2407415997897049, "grad_norm": 0.10633083432912827, "learning_rate": 0.002, "loss": 2.3428, "step": 320960 }, { "epoch": 1.2407802569930881, "grad_norm": 0.09729152172803879, "learning_rate": 0.002, "loss": 2.3348, "step": 320970 }, { "epoch": 1.2408189141964714, "grad_norm": 0.1272486001253128, "learning_rate": 0.002, "loss": 2.3348, "step": 320980 }, { "epoch": 1.2408575713998546, "grad_norm": 0.1055624857544899, "learning_rate": 0.002, "loss": 2.3517, "step": 320990 }, { "epoch": 1.240896228603238, "grad_norm": 0.09390482306480408, "learning_rate": 0.002, "loss": 2.341, "step": 321000 }, { "epoch": 1.2409348858066211, "grad_norm": 0.09995166212320328, "learning_rate": 0.002, "loss": 2.3278, "step": 321010 }, { "epoch": 1.2409735430100044, "grad_norm": 0.10061180591583252, "learning_rate": 0.002, "loss": 2.3174, "step": 321020 }, { "epoch": 1.2410122002133877, "grad_norm": 0.10821964591741562, "learning_rate": 0.002, "loss": 2.3321, "step": 321030 }, { "epoch": 1.2410508574167711, "grad_norm": 0.10576090961694717, "learning_rate": 0.002, "loss": 2.3371, "step": 321040 }, { "epoch": 1.2410895146201544, "grad_norm": 0.10806534439325333, "learning_rate": 0.002, "loss": 2.3365, "step": 321050 }, { "epoch": 1.2411281718235376, "grad_norm": 0.11444949358701706, "learning_rate": 0.002, "loss": 2.3399, "step": 321060 }, { "epoch": 1.2411668290269209, "grad_norm": 0.10749319940805435, "learning_rate": 0.002, "loss": 2.3478, "step": 321070 }, { "epoch": 1.2412054862303041, "grad_norm": 0.12176235020160675, "learning_rate": 0.002, "loss": 2.3437, "step": 321080 }, { "epoch": 1.2412441434336874, "grad_norm": 0.10878723859786987, "learning_rate": 0.002, "loss": 2.341, "step": 321090 }, { "epoch": 1.2412828006370706, "grad_norm": 0.11336250603199005, "learning_rate": 0.002, "loss": 2.3348, "step": 321100 }, { "epoch": 1.2413214578404541, "grad_norm": 0.0931602269411087, "learning_rate": 0.002, "loss": 2.3373, "step": 321110 }, { "epoch": 1.2413601150438374, "grad_norm": 0.10910608619451523, "learning_rate": 0.002, "loss": 2.3254, "step": 321120 }, { "epoch": 1.2413987722472206, "grad_norm": 0.1164921373128891, "learning_rate": 0.002, "loss": 2.3395, "step": 321130 }, { "epoch": 1.2414374294506039, "grad_norm": 0.09855716675519943, "learning_rate": 0.002, "loss": 2.3408, "step": 321140 }, { "epoch": 1.2414760866539871, "grad_norm": 0.10300564020872116, "learning_rate": 0.002, "loss": 2.336, "step": 321150 }, { "epoch": 1.2415147438573704, "grad_norm": 0.11619854718446732, "learning_rate": 0.002, "loss": 2.3213, "step": 321160 }, { "epoch": 1.2415534010607536, "grad_norm": 0.11751411855220795, "learning_rate": 0.002, "loss": 2.3416, "step": 321170 }, { "epoch": 1.2415920582641369, "grad_norm": 0.10656020045280457, "learning_rate": 0.002, "loss": 2.3438, "step": 321180 }, { "epoch": 1.2416307154675201, "grad_norm": 0.09795525670051575, "learning_rate": 0.002, "loss": 2.3347, "step": 321190 }, { "epoch": 1.2416693726709034, "grad_norm": 0.12078218907117844, "learning_rate": 0.002, "loss": 2.3486, "step": 321200 }, { "epoch": 1.2417080298742869, "grad_norm": 0.1273619532585144, "learning_rate": 0.002, "loss": 2.3317, "step": 321210 }, { "epoch": 1.24174668707767, "grad_norm": 0.0964427962899208, "learning_rate": 0.002, "loss": 2.3425, "step": 321220 }, { "epoch": 1.2417853442810534, "grad_norm": 0.11902978271245956, "learning_rate": 0.002, "loss": 2.3441, "step": 321230 }, { "epoch": 1.2418240014844366, "grad_norm": 0.1078677698969841, "learning_rate": 0.002, "loss": 2.3282, "step": 321240 }, { "epoch": 1.2418626586878199, "grad_norm": 0.08856847137212753, "learning_rate": 0.002, "loss": 2.3379, "step": 321250 }, { "epoch": 1.2419013158912031, "grad_norm": 0.09772702306509018, "learning_rate": 0.002, "loss": 2.3466, "step": 321260 }, { "epoch": 1.2419399730945864, "grad_norm": 0.11561083793640137, "learning_rate": 0.002, "loss": 2.3231, "step": 321270 }, { "epoch": 1.2419786302979698, "grad_norm": 0.10868635773658752, "learning_rate": 0.002, "loss": 2.3385, "step": 321280 }, { "epoch": 1.242017287501353, "grad_norm": 0.09984630346298218, "learning_rate": 0.002, "loss": 2.3494, "step": 321290 }, { "epoch": 1.2420559447047363, "grad_norm": 0.10971865803003311, "learning_rate": 0.002, "loss": 2.3496, "step": 321300 }, { "epoch": 1.2420946019081196, "grad_norm": 0.10339515656232834, "learning_rate": 0.002, "loss": 2.3295, "step": 321310 }, { "epoch": 1.2421332591115029, "grad_norm": 0.1033627837896347, "learning_rate": 0.002, "loss": 2.3243, "step": 321320 }, { "epoch": 1.242171916314886, "grad_norm": 0.11540020257234573, "learning_rate": 0.002, "loss": 2.3482, "step": 321330 }, { "epoch": 1.2422105735182694, "grad_norm": 0.11354392021894455, "learning_rate": 0.002, "loss": 2.3436, "step": 321340 }, { "epoch": 1.2422492307216526, "grad_norm": 0.11125141382217407, "learning_rate": 0.002, "loss": 2.3337, "step": 321350 }, { "epoch": 1.2422878879250359, "grad_norm": 0.09528633952140808, "learning_rate": 0.002, "loss": 2.3329, "step": 321360 }, { "epoch": 1.2423265451284191, "grad_norm": 0.10134680569171906, "learning_rate": 0.002, "loss": 2.3298, "step": 321370 }, { "epoch": 1.2423652023318026, "grad_norm": 0.10753732174634933, "learning_rate": 0.002, "loss": 2.335, "step": 321380 }, { "epoch": 1.2424038595351858, "grad_norm": 0.10672889649868011, "learning_rate": 0.002, "loss": 2.3466, "step": 321390 }, { "epoch": 1.242442516738569, "grad_norm": 0.11310621351003647, "learning_rate": 0.002, "loss": 2.3424, "step": 321400 }, { "epoch": 1.2424811739419523, "grad_norm": 0.10450848191976547, "learning_rate": 0.002, "loss": 2.3459, "step": 321410 }, { "epoch": 1.2425198311453356, "grad_norm": 0.11112019419670105, "learning_rate": 0.002, "loss": 2.3265, "step": 321420 }, { "epoch": 1.2425584883487188, "grad_norm": 0.10467560589313507, "learning_rate": 0.002, "loss": 2.3389, "step": 321430 }, { "epoch": 1.242597145552102, "grad_norm": 0.11490056663751602, "learning_rate": 0.002, "loss": 2.3158, "step": 321440 }, { "epoch": 1.2426358027554856, "grad_norm": 0.11514199525117874, "learning_rate": 0.002, "loss": 2.3234, "step": 321450 }, { "epoch": 1.2426744599588688, "grad_norm": 0.09664133936166763, "learning_rate": 0.002, "loss": 2.3296, "step": 321460 }, { "epoch": 1.242713117162252, "grad_norm": 0.09833734482526779, "learning_rate": 0.002, "loss": 2.3305, "step": 321470 }, { "epoch": 1.2427517743656353, "grad_norm": 0.0944104790687561, "learning_rate": 0.002, "loss": 2.3332, "step": 321480 }, { "epoch": 1.2427904315690186, "grad_norm": 0.113669253885746, "learning_rate": 0.002, "loss": 2.3428, "step": 321490 }, { "epoch": 1.2428290887724018, "grad_norm": 0.11991959810256958, "learning_rate": 0.002, "loss": 2.3299, "step": 321500 }, { "epoch": 1.242867745975785, "grad_norm": 0.11033076792955399, "learning_rate": 0.002, "loss": 2.3484, "step": 321510 }, { "epoch": 1.2429064031791683, "grad_norm": 0.10738854855298996, "learning_rate": 0.002, "loss": 2.3334, "step": 321520 }, { "epoch": 1.2429450603825516, "grad_norm": 0.09296815097332001, "learning_rate": 0.002, "loss": 2.3152, "step": 321530 }, { "epoch": 1.2429837175859348, "grad_norm": 0.09881693869829178, "learning_rate": 0.002, "loss": 2.3185, "step": 321540 }, { "epoch": 1.2430223747893183, "grad_norm": 0.10417408496141434, "learning_rate": 0.002, "loss": 2.3396, "step": 321550 }, { "epoch": 1.2430610319927016, "grad_norm": 0.11233276128768921, "learning_rate": 0.002, "loss": 2.3362, "step": 321560 }, { "epoch": 1.2430996891960848, "grad_norm": 0.12582659721374512, "learning_rate": 0.002, "loss": 2.3444, "step": 321570 }, { "epoch": 1.243138346399468, "grad_norm": 0.10311932116746902, "learning_rate": 0.002, "loss": 2.3349, "step": 321580 }, { "epoch": 1.2431770036028513, "grad_norm": 0.11493781954050064, "learning_rate": 0.002, "loss": 2.3453, "step": 321590 }, { "epoch": 1.2432156608062346, "grad_norm": 0.1054019182920456, "learning_rate": 0.002, "loss": 2.3239, "step": 321600 }, { "epoch": 1.2432543180096178, "grad_norm": 0.09729453176259995, "learning_rate": 0.002, "loss": 2.351, "step": 321610 }, { "epoch": 1.2432929752130013, "grad_norm": 0.11637569218873978, "learning_rate": 0.002, "loss": 2.3377, "step": 321620 }, { "epoch": 1.2433316324163846, "grad_norm": 0.1038183718919754, "learning_rate": 0.002, "loss": 2.3225, "step": 321630 }, { "epoch": 1.2433702896197678, "grad_norm": 0.13066309690475464, "learning_rate": 0.002, "loss": 2.3341, "step": 321640 }, { "epoch": 1.243408946823151, "grad_norm": 0.0983934998512268, "learning_rate": 0.002, "loss": 2.3422, "step": 321650 }, { "epoch": 1.2434476040265343, "grad_norm": 0.08770925551652908, "learning_rate": 0.002, "loss": 2.334, "step": 321660 }, { "epoch": 1.2434862612299176, "grad_norm": 0.11882904917001724, "learning_rate": 0.002, "loss": 2.321, "step": 321670 }, { "epoch": 1.2435249184333008, "grad_norm": 0.0967506393790245, "learning_rate": 0.002, "loss": 2.335, "step": 321680 }, { "epoch": 1.243563575636684, "grad_norm": 0.10030996799468994, "learning_rate": 0.002, "loss": 2.3351, "step": 321690 }, { "epoch": 1.2436022328400673, "grad_norm": 0.11539009213447571, "learning_rate": 0.002, "loss": 2.3244, "step": 321700 }, { "epoch": 1.2436408900434506, "grad_norm": 0.10054624825716019, "learning_rate": 0.002, "loss": 2.3354, "step": 321710 }, { "epoch": 1.243679547246834, "grad_norm": 0.11041809618473053, "learning_rate": 0.002, "loss": 2.3321, "step": 321720 }, { "epoch": 1.2437182044502173, "grad_norm": 0.10580580681562424, "learning_rate": 0.002, "loss": 2.3316, "step": 321730 }, { "epoch": 1.2437568616536006, "grad_norm": 0.10055314004421234, "learning_rate": 0.002, "loss": 2.326, "step": 321740 }, { "epoch": 1.2437955188569838, "grad_norm": 0.1091417744755745, "learning_rate": 0.002, "loss": 2.3279, "step": 321750 }, { "epoch": 1.243834176060367, "grad_norm": 0.10327596962451935, "learning_rate": 0.002, "loss": 2.3443, "step": 321760 }, { "epoch": 1.2438728332637503, "grad_norm": 0.09359539300203323, "learning_rate": 0.002, "loss": 2.3462, "step": 321770 }, { "epoch": 1.2439114904671336, "grad_norm": 0.10791551321744919, "learning_rate": 0.002, "loss": 2.3315, "step": 321780 }, { "epoch": 1.243950147670517, "grad_norm": 0.11361528187990189, "learning_rate": 0.002, "loss": 2.3195, "step": 321790 }, { "epoch": 1.2439888048739003, "grad_norm": 0.1098126620054245, "learning_rate": 0.002, "loss": 2.3352, "step": 321800 }, { "epoch": 1.2440274620772835, "grad_norm": 0.1013980358839035, "learning_rate": 0.002, "loss": 2.3526, "step": 321810 }, { "epoch": 1.2440661192806668, "grad_norm": 0.1106821596622467, "learning_rate": 0.002, "loss": 2.3429, "step": 321820 }, { "epoch": 1.24410477648405, "grad_norm": 0.10736387223005295, "learning_rate": 0.002, "loss": 2.3371, "step": 321830 }, { "epoch": 1.2441434336874333, "grad_norm": 0.09489861130714417, "learning_rate": 0.002, "loss": 2.3388, "step": 321840 }, { "epoch": 1.2441820908908165, "grad_norm": 0.11261710524559021, "learning_rate": 0.002, "loss": 2.3437, "step": 321850 }, { "epoch": 1.2442207480941998, "grad_norm": 0.09783985465765, "learning_rate": 0.002, "loss": 2.3348, "step": 321860 }, { "epoch": 1.244259405297583, "grad_norm": 0.11677198112010956, "learning_rate": 0.002, "loss": 2.3229, "step": 321870 }, { "epoch": 1.2442980625009663, "grad_norm": 0.11479967087507248, "learning_rate": 0.002, "loss": 2.3391, "step": 321880 }, { "epoch": 1.2443367197043498, "grad_norm": 0.10969112068414688, "learning_rate": 0.002, "loss": 2.3334, "step": 321890 }, { "epoch": 1.244375376907733, "grad_norm": 0.11333106458187103, "learning_rate": 0.002, "loss": 2.3263, "step": 321900 }, { "epoch": 1.2444140341111163, "grad_norm": 0.11218047887086868, "learning_rate": 0.002, "loss": 2.3348, "step": 321910 }, { "epoch": 1.2444526913144995, "grad_norm": 0.11694422364234924, "learning_rate": 0.002, "loss": 2.3258, "step": 321920 }, { "epoch": 1.2444913485178828, "grad_norm": 0.10061642527580261, "learning_rate": 0.002, "loss": 2.3426, "step": 321930 }, { "epoch": 1.244530005721266, "grad_norm": 0.11137649416923523, "learning_rate": 0.002, "loss": 2.3362, "step": 321940 }, { "epoch": 1.2445686629246493, "grad_norm": 0.10614387691020966, "learning_rate": 0.002, "loss": 2.3268, "step": 321950 }, { "epoch": 1.2446073201280328, "grad_norm": 0.11743594706058502, "learning_rate": 0.002, "loss": 2.3231, "step": 321960 }, { "epoch": 1.244645977331416, "grad_norm": 0.11147256195545197, "learning_rate": 0.002, "loss": 2.3371, "step": 321970 }, { "epoch": 1.2446846345347993, "grad_norm": 0.0961056798696518, "learning_rate": 0.002, "loss": 2.344, "step": 321980 }, { "epoch": 1.2447232917381825, "grad_norm": 0.11435211449861526, "learning_rate": 0.002, "loss": 2.3326, "step": 321990 }, { "epoch": 1.2447619489415658, "grad_norm": 0.10239315778017044, "learning_rate": 0.002, "loss": 2.3492, "step": 322000 }, { "epoch": 1.244800606144949, "grad_norm": 0.10872689634561539, "learning_rate": 0.002, "loss": 2.3461, "step": 322010 }, { "epoch": 1.2448392633483323, "grad_norm": 0.10963756591081619, "learning_rate": 0.002, "loss": 2.3413, "step": 322020 }, { "epoch": 1.2448779205517155, "grad_norm": 0.11242996156215668, "learning_rate": 0.002, "loss": 2.3235, "step": 322030 }, { "epoch": 1.2449165777550988, "grad_norm": 0.1051398292183876, "learning_rate": 0.002, "loss": 2.3211, "step": 322040 }, { "epoch": 1.2449552349584823, "grad_norm": 0.11519026011228561, "learning_rate": 0.002, "loss": 2.3374, "step": 322050 }, { "epoch": 1.2449938921618655, "grad_norm": 0.09578767418861389, "learning_rate": 0.002, "loss": 2.3236, "step": 322060 }, { "epoch": 1.2450325493652488, "grad_norm": 0.10644643753767014, "learning_rate": 0.002, "loss": 2.3408, "step": 322070 }, { "epoch": 1.245071206568632, "grad_norm": 0.11046939343214035, "learning_rate": 0.002, "loss": 2.3487, "step": 322080 }, { "epoch": 1.2451098637720153, "grad_norm": 0.15813681483268738, "learning_rate": 0.002, "loss": 2.3324, "step": 322090 }, { "epoch": 1.2451485209753985, "grad_norm": 0.09647150337696075, "learning_rate": 0.002, "loss": 2.3271, "step": 322100 }, { "epoch": 1.2451871781787818, "grad_norm": 0.1031809076666832, "learning_rate": 0.002, "loss": 2.3238, "step": 322110 }, { "epoch": 1.245225835382165, "grad_norm": 0.09915446490049362, "learning_rate": 0.002, "loss": 2.3261, "step": 322120 }, { "epoch": 1.2452644925855485, "grad_norm": 0.0994412750005722, "learning_rate": 0.002, "loss": 2.3264, "step": 322130 }, { "epoch": 1.2453031497889318, "grad_norm": 0.0929538905620575, "learning_rate": 0.002, "loss": 2.3466, "step": 322140 }, { "epoch": 1.245341806992315, "grad_norm": 0.10131704062223434, "learning_rate": 0.002, "loss": 2.3477, "step": 322150 }, { "epoch": 1.2453804641956983, "grad_norm": 0.10987386107444763, "learning_rate": 0.002, "loss": 2.3309, "step": 322160 }, { "epoch": 1.2454191213990815, "grad_norm": 0.13560396432876587, "learning_rate": 0.002, "loss": 2.346, "step": 322170 }, { "epoch": 1.2454577786024648, "grad_norm": 0.10473495721817017, "learning_rate": 0.002, "loss": 2.3422, "step": 322180 }, { "epoch": 1.245496435805848, "grad_norm": 0.09378011524677277, "learning_rate": 0.002, "loss": 2.3271, "step": 322190 }, { "epoch": 1.2455350930092313, "grad_norm": 0.10519225895404816, "learning_rate": 0.002, "loss": 2.3246, "step": 322200 }, { "epoch": 1.2455737502126145, "grad_norm": 0.11469787359237671, "learning_rate": 0.002, "loss": 2.3296, "step": 322210 }, { "epoch": 1.245612407415998, "grad_norm": 0.10396713018417358, "learning_rate": 0.002, "loss": 2.3467, "step": 322220 }, { "epoch": 1.2456510646193812, "grad_norm": 0.09106788784265518, "learning_rate": 0.002, "loss": 2.3304, "step": 322230 }, { "epoch": 1.2456897218227645, "grad_norm": 0.10046976059675217, "learning_rate": 0.002, "loss": 2.3341, "step": 322240 }, { "epoch": 1.2457283790261477, "grad_norm": 0.10367235541343689, "learning_rate": 0.002, "loss": 2.334, "step": 322250 }, { "epoch": 1.245767036229531, "grad_norm": 0.11166314780712128, "learning_rate": 0.002, "loss": 2.3414, "step": 322260 }, { "epoch": 1.2458056934329143, "grad_norm": 0.09072278439998627, "learning_rate": 0.002, "loss": 2.3467, "step": 322270 }, { "epoch": 1.2458443506362975, "grad_norm": 0.10355474799871445, "learning_rate": 0.002, "loss": 2.3339, "step": 322280 }, { "epoch": 1.2458830078396808, "grad_norm": 0.10259489715099335, "learning_rate": 0.002, "loss": 2.3307, "step": 322290 }, { "epoch": 1.2459216650430642, "grad_norm": 0.12334828078746796, "learning_rate": 0.002, "loss": 2.3306, "step": 322300 }, { "epoch": 1.2459603222464475, "grad_norm": 0.12639813125133514, "learning_rate": 0.002, "loss": 2.3534, "step": 322310 }, { "epoch": 1.2459989794498307, "grad_norm": 0.10717806220054626, "learning_rate": 0.002, "loss": 2.34, "step": 322320 }, { "epoch": 1.246037636653214, "grad_norm": 0.10780540853738785, "learning_rate": 0.002, "loss": 2.3387, "step": 322330 }, { "epoch": 1.2460762938565972, "grad_norm": 0.12163043767213821, "learning_rate": 0.002, "loss": 2.3284, "step": 322340 }, { "epoch": 1.2461149510599805, "grad_norm": 0.10512256622314453, "learning_rate": 0.002, "loss": 2.3285, "step": 322350 }, { "epoch": 1.2461536082633637, "grad_norm": 0.11891256272792816, "learning_rate": 0.002, "loss": 2.3363, "step": 322360 }, { "epoch": 1.246192265466747, "grad_norm": 0.08721699565649033, "learning_rate": 0.002, "loss": 2.3321, "step": 322370 }, { "epoch": 1.2462309226701302, "grad_norm": 0.10301852226257324, "learning_rate": 0.002, "loss": 2.316, "step": 322380 }, { "epoch": 1.2462695798735137, "grad_norm": 0.10433807969093323, "learning_rate": 0.002, "loss": 2.3417, "step": 322390 }, { "epoch": 1.246308237076897, "grad_norm": 0.11882524937391281, "learning_rate": 0.002, "loss": 2.3443, "step": 322400 }, { "epoch": 1.2463468942802802, "grad_norm": 0.09801863133907318, "learning_rate": 0.002, "loss": 2.3486, "step": 322410 }, { "epoch": 1.2463855514836635, "grad_norm": 0.10055270045995712, "learning_rate": 0.002, "loss": 2.3354, "step": 322420 }, { "epoch": 1.2464242086870467, "grad_norm": 0.11083211749792099, "learning_rate": 0.002, "loss": 2.3395, "step": 322430 }, { "epoch": 1.24646286589043, "grad_norm": 0.11715829372406006, "learning_rate": 0.002, "loss": 2.3355, "step": 322440 }, { "epoch": 1.2465015230938132, "grad_norm": 0.10977619141340256, "learning_rate": 0.002, "loss": 2.3336, "step": 322450 }, { "epoch": 1.2465401802971965, "grad_norm": 0.0934174656867981, "learning_rate": 0.002, "loss": 2.3404, "step": 322460 }, { "epoch": 1.24657883750058, "grad_norm": 0.10772105306386948, "learning_rate": 0.002, "loss": 2.3542, "step": 322470 }, { "epoch": 1.2466174947039632, "grad_norm": 0.10033559799194336, "learning_rate": 0.002, "loss": 2.3469, "step": 322480 }, { "epoch": 1.2466561519073465, "grad_norm": 0.1018151119351387, "learning_rate": 0.002, "loss": 2.3358, "step": 322490 }, { "epoch": 1.2466948091107297, "grad_norm": 0.11213316768407822, "learning_rate": 0.002, "loss": 2.3389, "step": 322500 }, { "epoch": 1.246733466314113, "grad_norm": 0.10616469383239746, "learning_rate": 0.002, "loss": 2.3232, "step": 322510 }, { "epoch": 1.2467721235174962, "grad_norm": 0.09601625800132751, "learning_rate": 0.002, "loss": 2.3286, "step": 322520 }, { "epoch": 1.2468107807208795, "grad_norm": 0.09934542328119278, "learning_rate": 0.002, "loss": 2.3366, "step": 322530 }, { "epoch": 1.2468494379242627, "grad_norm": 0.09939441829919815, "learning_rate": 0.002, "loss": 2.3312, "step": 322540 }, { "epoch": 1.246888095127646, "grad_norm": 0.11408967524766922, "learning_rate": 0.002, "loss": 2.3437, "step": 322550 }, { "epoch": 1.2469267523310295, "grad_norm": 0.10870948433876038, "learning_rate": 0.002, "loss": 2.3339, "step": 322560 }, { "epoch": 1.2469654095344127, "grad_norm": 0.10187172889709473, "learning_rate": 0.002, "loss": 2.3403, "step": 322570 }, { "epoch": 1.247004066737796, "grad_norm": 0.09450990706682205, "learning_rate": 0.002, "loss": 2.3345, "step": 322580 }, { "epoch": 1.2470427239411792, "grad_norm": 0.09321904927492142, "learning_rate": 0.002, "loss": 2.324, "step": 322590 }, { "epoch": 1.2470813811445625, "grad_norm": 0.09764519333839417, "learning_rate": 0.002, "loss": 2.3395, "step": 322600 }, { "epoch": 1.2471200383479457, "grad_norm": 0.10473806411027908, "learning_rate": 0.002, "loss": 2.3241, "step": 322610 }, { "epoch": 1.247158695551329, "grad_norm": 0.107349693775177, "learning_rate": 0.002, "loss": 2.3379, "step": 322620 }, { "epoch": 1.2471973527547122, "grad_norm": 0.12023581564426422, "learning_rate": 0.002, "loss": 2.3347, "step": 322630 }, { "epoch": 1.2472360099580957, "grad_norm": 0.09608040750026703, "learning_rate": 0.002, "loss": 2.3169, "step": 322640 }, { "epoch": 1.247274667161479, "grad_norm": 0.10328279435634613, "learning_rate": 0.002, "loss": 2.3396, "step": 322650 }, { "epoch": 1.2473133243648622, "grad_norm": 0.09133925288915634, "learning_rate": 0.002, "loss": 2.3475, "step": 322660 }, { "epoch": 1.2473519815682454, "grad_norm": 0.10648240149021149, "learning_rate": 0.002, "loss": 2.342, "step": 322670 }, { "epoch": 1.2473906387716287, "grad_norm": 0.10589372366666794, "learning_rate": 0.002, "loss": 2.3479, "step": 322680 }, { "epoch": 1.247429295975012, "grad_norm": 0.10525405406951904, "learning_rate": 0.002, "loss": 2.3386, "step": 322690 }, { "epoch": 1.2474679531783952, "grad_norm": 0.10498455166816711, "learning_rate": 0.002, "loss": 2.3423, "step": 322700 }, { "epoch": 1.2475066103817785, "grad_norm": 0.10058266669511795, "learning_rate": 0.002, "loss": 2.3291, "step": 322710 }, { "epoch": 1.2475452675851617, "grad_norm": 0.11225242912769318, "learning_rate": 0.002, "loss": 2.3375, "step": 322720 }, { "epoch": 1.2475839247885452, "grad_norm": 0.122991181910038, "learning_rate": 0.002, "loss": 2.334, "step": 322730 }, { "epoch": 1.2476225819919284, "grad_norm": 0.09694638103246689, "learning_rate": 0.002, "loss": 2.3215, "step": 322740 }, { "epoch": 1.2476612391953117, "grad_norm": 0.09865941852331161, "learning_rate": 0.002, "loss": 2.3336, "step": 322750 }, { "epoch": 1.247699896398695, "grad_norm": 0.10031592845916748, "learning_rate": 0.002, "loss": 2.327, "step": 322760 }, { "epoch": 1.2477385536020782, "grad_norm": 0.11467334628105164, "learning_rate": 0.002, "loss": 2.3328, "step": 322770 }, { "epoch": 1.2477772108054614, "grad_norm": 0.10169904679059982, "learning_rate": 0.002, "loss": 2.3471, "step": 322780 }, { "epoch": 1.2478158680088447, "grad_norm": 0.09758496284484863, "learning_rate": 0.002, "loss": 2.3403, "step": 322790 }, { "epoch": 1.247854525212228, "grad_norm": 0.12482014298439026, "learning_rate": 0.002, "loss": 2.3356, "step": 322800 }, { "epoch": 1.2478931824156114, "grad_norm": 0.11974287778139114, "learning_rate": 0.002, "loss": 2.3381, "step": 322810 }, { "epoch": 1.2479318396189947, "grad_norm": 0.1108810156583786, "learning_rate": 0.002, "loss": 2.3249, "step": 322820 }, { "epoch": 1.247970496822378, "grad_norm": 0.09258130937814713, "learning_rate": 0.002, "loss": 2.3295, "step": 322830 }, { "epoch": 1.2480091540257612, "grad_norm": 0.09061979502439499, "learning_rate": 0.002, "loss": 2.3372, "step": 322840 }, { "epoch": 1.2480478112291444, "grad_norm": 0.10905580222606659, "learning_rate": 0.002, "loss": 2.3196, "step": 322850 }, { "epoch": 1.2480864684325277, "grad_norm": 0.11101836711168289, "learning_rate": 0.002, "loss": 2.3432, "step": 322860 }, { "epoch": 1.248125125635911, "grad_norm": 0.11768799275159836, "learning_rate": 0.002, "loss": 2.3236, "step": 322870 }, { "epoch": 1.2481637828392942, "grad_norm": 0.11336436867713928, "learning_rate": 0.002, "loss": 2.3373, "step": 322880 }, { "epoch": 1.2482024400426774, "grad_norm": 0.11874739080667496, "learning_rate": 0.002, "loss": 2.3194, "step": 322890 }, { "epoch": 1.248241097246061, "grad_norm": 0.10988987237215042, "learning_rate": 0.002, "loss": 2.3384, "step": 322900 }, { "epoch": 1.2482797544494442, "grad_norm": 0.09862332046031952, "learning_rate": 0.002, "loss": 2.3354, "step": 322910 }, { "epoch": 1.2483184116528274, "grad_norm": 0.10089791566133499, "learning_rate": 0.002, "loss": 2.3246, "step": 322920 }, { "epoch": 1.2483570688562107, "grad_norm": 0.10909537971019745, "learning_rate": 0.002, "loss": 2.322, "step": 322930 }, { "epoch": 1.248395726059594, "grad_norm": 0.13516369462013245, "learning_rate": 0.002, "loss": 2.3429, "step": 322940 }, { "epoch": 1.2484343832629772, "grad_norm": 0.1360653042793274, "learning_rate": 0.002, "loss": 2.3419, "step": 322950 }, { "epoch": 1.2484730404663604, "grad_norm": 0.10364606231451035, "learning_rate": 0.002, "loss": 2.3261, "step": 322960 }, { "epoch": 1.248511697669744, "grad_norm": 0.09633604437112808, "learning_rate": 0.002, "loss": 2.3234, "step": 322970 }, { "epoch": 1.2485503548731272, "grad_norm": 0.37557491660118103, "learning_rate": 0.002, "loss": 2.3774, "step": 322980 }, { "epoch": 1.2485890120765104, "grad_norm": 0.11646705120801926, "learning_rate": 0.002, "loss": 2.3514, "step": 322990 }, { "epoch": 1.2486276692798937, "grad_norm": 0.13214127719402313, "learning_rate": 0.002, "loss": 2.3469, "step": 323000 }, { "epoch": 1.248666326483277, "grad_norm": 0.08824113756418228, "learning_rate": 0.002, "loss": 2.3505, "step": 323010 }, { "epoch": 1.2487049836866602, "grad_norm": 0.10804717242717743, "learning_rate": 0.002, "loss": 2.3446, "step": 323020 }, { "epoch": 1.2487436408900434, "grad_norm": 0.10116700828075409, "learning_rate": 0.002, "loss": 2.3411, "step": 323030 }, { "epoch": 1.2487822980934267, "grad_norm": 0.121637724339962, "learning_rate": 0.002, "loss": 2.3349, "step": 323040 }, { "epoch": 1.24882095529681, "grad_norm": 0.12432374805212021, "learning_rate": 0.002, "loss": 2.3338, "step": 323050 }, { "epoch": 1.2488596125001932, "grad_norm": 0.09569886326789856, "learning_rate": 0.002, "loss": 2.3312, "step": 323060 }, { "epoch": 1.2488982697035766, "grad_norm": 0.08894059807062149, "learning_rate": 0.002, "loss": 2.3352, "step": 323070 }, { "epoch": 1.24893692690696, "grad_norm": 0.10931940376758575, "learning_rate": 0.002, "loss": 2.3178, "step": 323080 }, { "epoch": 1.2489755841103432, "grad_norm": 0.11267866939306259, "learning_rate": 0.002, "loss": 2.3363, "step": 323090 }, { "epoch": 1.2490142413137264, "grad_norm": 0.09634686261415482, "learning_rate": 0.002, "loss": 2.3391, "step": 323100 }, { "epoch": 1.2490528985171097, "grad_norm": 0.10163258016109467, "learning_rate": 0.002, "loss": 2.3301, "step": 323110 }, { "epoch": 1.249091555720493, "grad_norm": 0.09575599431991577, "learning_rate": 0.002, "loss": 2.3394, "step": 323120 }, { "epoch": 1.2491302129238762, "grad_norm": 0.10602719336748123, "learning_rate": 0.002, "loss": 2.3453, "step": 323130 }, { "epoch": 1.2491688701272596, "grad_norm": 0.10932130366563797, "learning_rate": 0.002, "loss": 2.3386, "step": 323140 }, { "epoch": 1.2492075273306429, "grad_norm": 0.10526705533266068, "learning_rate": 0.002, "loss": 2.3383, "step": 323150 }, { "epoch": 1.2492461845340261, "grad_norm": 0.0988556370139122, "learning_rate": 0.002, "loss": 2.3411, "step": 323160 }, { "epoch": 1.2492848417374094, "grad_norm": 0.10179826617240906, "learning_rate": 0.002, "loss": 2.3269, "step": 323170 }, { "epoch": 1.2493234989407926, "grad_norm": 0.0974382758140564, "learning_rate": 0.002, "loss": 2.3366, "step": 323180 }, { "epoch": 1.249362156144176, "grad_norm": 0.10001240670681, "learning_rate": 0.002, "loss": 2.328, "step": 323190 }, { "epoch": 1.2494008133475591, "grad_norm": 0.10301165282726288, "learning_rate": 0.002, "loss": 2.3435, "step": 323200 }, { "epoch": 1.2494394705509424, "grad_norm": 0.1391913890838623, "learning_rate": 0.002, "loss": 2.3357, "step": 323210 }, { "epoch": 1.2494781277543257, "grad_norm": 0.10194188356399536, "learning_rate": 0.002, "loss": 2.3361, "step": 323220 }, { "epoch": 1.249516784957709, "grad_norm": 0.10584280639886856, "learning_rate": 0.002, "loss": 2.3472, "step": 323230 }, { "epoch": 1.2495554421610924, "grad_norm": 0.10867606103420258, "learning_rate": 0.002, "loss": 2.3154, "step": 323240 }, { "epoch": 1.2495940993644756, "grad_norm": 0.0992872565984726, "learning_rate": 0.002, "loss": 2.3495, "step": 323250 }, { "epoch": 1.2496327565678589, "grad_norm": 0.12357765436172485, "learning_rate": 0.002, "loss": 2.3315, "step": 323260 }, { "epoch": 1.2496714137712421, "grad_norm": 0.08753013610839844, "learning_rate": 0.002, "loss": 2.3437, "step": 323270 }, { "epoch": 1.2497100709746254, "grad_norm": 0.09192200005054474, "learning_rate": 0.002, "loss": 2.3192, "step": 323280 }, { "epoch": 1.2497487281780086, "grad_norm": 0.1059727743268013, "learning_rate": 0.002, "loss": 2.3408, "step": 323290 }, { "epoch": 1.249787385381392, "grad_norm": 0.10856989771127701, "learning_rate": 0.002, "loss": 2.3313, "step": 323300 }, { "epoch": 1.2498260425847754, "grad_norm": 0.10081683099269867, "learning_rate": 0.002, "loss": 2.3422, "step": 323310 }, { "epoch": 1.2498646997881586, "grad_norm": 0.10372840613126755, "learning_rate": 0.002, "loss": 2.3295, "step": 323320 }, { "epoch": 1.2499033569915419, "grad_norm": 0.09857896715402603, "learning_rate": 0.002, "loss": 2.3458, "step": 323330 }, { "epoch": 1.2499420141949251, "grad_norm": 0.10680362582206726, "learning_rate": 0.002, "loss": 2.3267, "step": 323340 }, { "epoch": 1.2499806713983084, "grad_norm": 0.1055355817079544, "learning_rate": 0.002, "loss": 2.3147, "step": 323350 }, { "epoch": 1.2500193286016916, "grad_norm": 0.12696675956249237, "learning_rate": 0.002, "loss": 2.3385, "step": 323360 }, { "epoch": 1.2500579858050749, "grad_norm": 0.09672246128320694, "learning_rate": 0.002, "loss": 2.3446, "step": 323370 }, { "epoch": 1.2500966430084581, "grad_norm": 0.0960371345281601, "learning_rate": 0.002, "loss": 2.3328, "step": 323380 }, { "epoch": 1.2501353002118414, "grad_norm": 0.09530273079872131, "learning_rate": 0.002, "loss": 2.3392, "step": 323390 }, { "epoch": 1.2501739574152246, "grad_norm": 0.17731760442256927, "learning_rate": 0.002, "loss": 2.3246, "step": 323400 }, { "epoch": 1.250212614618608, "grad_norm": 0.11541884392499924, "learning_rate": 0.002, "loss": 2.3418, "step": 323410 }, { "epoch": 1.2502512718219914, "grad_norm": 0.09873352199792862, "learning_rate": 0.002, "loss": 2.3242, "step": 323420 }, { "epoch": 1.2502899290253746, "grad_norm": 0.13132119178771973, "learning_rate": 0.002, "loss": 2.3137, "step": 323430 }, { "epoch": 1.2503285862287579, "grad_norm": 0.11925432085990906, "learning_rate": 0.002, "loss": 2.3278, "step": 323440 }, { "epoch": 1.2503672434321411, "grad_norm": 0.10793092101812363, "learning_rate": 0.002, "loss": 2.3319, "step": 323450 }, { "epoch": 1.2504059006355244, "grad_norm": 0.10153786092996597, "learning_rate": 0.002, "loss": 2.336, "step": 323460 }, { "epoch": 1.2504445578389076, "grad_norm": 0.10431569069623947, "learning_rate": 0.002, "loss": 2.3315, "step": 323470 }, { "epoch": 1.250483215042291, "grad_norm": 0.10726450383663177, "learning_rate": 0.002, "loss": 2.3463, "step": 323480 }, { "epoch": 1.2505218722456743, "grad_norm": 0.10803461819887161, "learning_rate": 0.002, "loss": 2.3334, "step": 323490 }, { "epoch": 1.2505605294490576, "grad_norm": 0.10541044175624847, "learning_rate": 0.002, "loss": 2.3313, "step": 323500 }, { "epoch": 1.2505991866524409, "grad_norm": 0.10016429424285889, "learning_rate": 0.002, "loss": 2.3189, "step": 323510 }, { "epoch": 1.250637843855824, "grad_norm": 0.09697940200567245, "learning_rate": 0.002, "loss": 2.3292, "step": 323520 }, { "epoch": 1.2506765010592074, "grad_norm": 0.09459806233644485, "learning_rate": 0.002, "loss": 2.3364, "step": 323530 }, { "epoch": 1.2507151582625906, "grad_norm": 0.10965185612440109, "learning_rate": 0.002, "loss": 2.3343, "step": 323540 }, { "epoch": 1.2507538154659739, "grad_norm": 0.1285904347896576, "learning_rate": 0.002, "loss": 2.3318, "step": 323550 }, { "epoch": 1.2507924726693571, "grad_norm": 0.11946802586317062, "learning_rate": 0.002, "loss": 2.3257, "step": 323560 }, { "epoch": 1.2508311298727404, "grad_norm": 0.11244595050811768, "learning_rate": 0.002, "loss": 2.35, "step": 323570 }, { "epoch": 1.2508697870761238, "grad_norm": 0.10817580670118332, "learning_rate": 0.002, "loss": 2.3378, "step": 323580 }, { "epoch": 1.250908444279507, "grad_norm": 0.12300717085599899, "learning_rate": 0.002, "loss": 2.3369, "step": 323590 }, { "epoch": 1.2509471014828903, "grad_norm": 0.10982823371887207, "learning_rate": 0.002, "loss": 2.3371, "step": 323600 }, { "epoch": 1.2509857586862736, "grad_norm": 0.10455646365880966, "learning_rate": 0.002, "loss": 2.3378, "step": 323610 }, { "epoch": 1.2510244158896568, "grad_norm": 0.09407669305801392, "learning_rate": 0.002, "loss": 2.3381, "step": 323620 }, { "epoch": 1.25106307309304, "grad_norm": 0.14270919561386108, "learning_rate": 0.002, "loss": 2.3376, "step": 323630 }, { "epoch": 1.2511017302964234, "grad_norm": 0.09745250642299652, "learning_rate": 0.002, "loss": 2.3366, "step": 323640 }, { "epoch": 1.2511403874998068, "grad_norm": 0.10952335596084595, "learning_rate": 0.002, "loss": 2.3343, "step": 323650 }, { "epoch": 1.25117904470319, "grad_norm": 0.10126937925815582, "learning_rate": 0.002, "loss": 2.3359, "step": 323660 }, { "epoch": 1.2512177019065733, "grad_norm": 0.09247476607561111, "learning_rate": 0.002, "loss": 2.343, "step": 323670 }, { "epoch": 1.2512563591099566, "grad_norm": 0.1153465211391449, "learning_rate": 0.002, "loss": 2.3193, "step": 323680 }, { "epoch": 1.2512950163133398, "grad_norm": 0.1340150535106659, "learning_rate": 0.002, "loss": 2.353, "step": 323690 }, { "epoch": 1.251333673516723, "grad_norm": 0.11080127209424973, "learning_rate": 0.002, "loss": 2.3401, "step": 323700 }, { "epoch": 1.2513723307201063, "grad_norm": 0.13601601123809814, "learning_rate": 0.002, "loss": 2.3387, "step": 323710 }, { "epoch": 1.2514109879234896, "grad_norm": 0.10433941334486008, "learning_rate": 0.002, "loss": 2.3311, "step": 323720 }, { "epoch": 1.2514496451268728, "grad_norm": 0.10418447852134705, "learning_rate": 0.002, "loss": 2.3343, "step": 323730 }, { "epoch": 1.251488302330256, "grad_norm": 0.10729973763227463, "learning_rate": 0.002, "loss": 2.3439, "step": 323740 }, { "epoch": 1.2515269595336396, "grad_norm": 0.09255962818861008, "learning_rate": 0.002, "loss": 2.3319, "step": 323750 }, { "epoch": 1.2515656167370228, "grad_norm": 0.12399463355541229, "learning_rate": 0.002, "loss": 2.3367, "step": 323760 }, { "epoch": 1.251604273940406, "grad_norm": 0.1212589368224144, "learning_rate": 0.002, "loss": 2.3432, "step": 323770 }, { "epoch": 1.2516429311437893, "grad_norm": 0.10145343095064163, "learning_rate": 0.002, "loss": 2.3361, "step": 323780 }, { "epoch": 1.2516815883471726, "grad_norm": 0.11932121962308884, "learning_rate": 0.002, "loss": 2.3282, "step": 323790 }, { "epoch": 1.2517202455505558, "grad_norm": 0.08953940868377686, "learning_rate": 0.002, "loss": 2.3442, "step": 323800 }, { "epoch": 1.2517589027539393, "grad_norm": 0.09370152652263641, "learning_rate": 0.002, "loss": 2.3262, "step": 323810 }, { "epoch": 1.2517975599573226, "grad_norm": 0.10805421322584152, "learning_rate": 0.002, "loss": 2.3377, "step": 323820 }, { "epoch": 1.2518362171607058, "grad_norm": 0.09570808708667755, "learning_rate": 0.002, "loss": 2.3286, "step": 323830 }, { "epoch": 1.251874874364089, "grad_norm": 0.13799162209033966, "learning_rate": 0.002, "loss": 2.3622, "step": 323840 }, { "epoch": 1.2519135315674723, "grad_norm": 0.11238276213407516, "learning_rate": 0.002, "loss": 2.3387, "step": 323850 }, { "epoch": 1.2519521887708556, "grad_norm": 0.12940475344657898, "learning_rate": 0.002, "loss": 2.3541, "step": 323860 }, { "epoch": 1.2519908459742388, "grad_norm": 0.1093161404132843, "learning_rate": 0.002, "loss": 2.3433, "step": 323870 }, { "epoch": 1.252029503177622, "grad_norm": 0.1395666003227234, "learning_rate": 0.002, "loss": 2.3182, "step": 323880 }, { "epoch": 1.2520681603810053, "grad_norm": 0.1023389920592308, "learning_rate": 0.002, "loss": 2.3231, "step": 323890 }, { "epoch": 1.2521068175843886, "grad_norm": 0.1550685316324234, "learning_rate": 0.002, "loss": 2.3499, "step": 323900 }, { "epoch": 1.2521454747877718, "grad_norm": 0.11096793413162231, "learning_rate": 0.002, "loss": 2.3192, "step": 323910 }, { "epoch": 1.2521841319911553, "grad_norm": 0.0967497006058693, "learning_rate": 0.002, "loss": 2.3328, "step": 323920 }, { "epoch": 1.2522227891945386, "grad_norm": 0.11543576419353485, "learning_rate": 0.002, "loss": 2.3415, "step": 323930 }, { "epoch": 1.2522614463979218, "grad_norm": 0.11807817220687866, "learning_rate": 0.002, "loss": 2.3186, "step": 323940 }, { "epoch": 1.252300103601305, "grad_norm": 0.10806535929441452, "learning_rate": 0.002, "loss": 2.3347, "step": 323950 }, { "epoch": 1.2523387608046883, "grad_norm": 0.12325377762317657, "learning_rate": 0.002, "loss": 2.3431, "step": 323960 }, { "epoch": 1.2523774180080716, "grad_norm": 0.10087354481220245, "learning_rate": 0.002, "loss": 2.3485, "step": 323970 }, { "epoch": 1.252416075211455, "grad_norm": 0.12561774253845215, "learning_rate": 0.002, "loss": 2.3388, "step": 323980 }, { "epoch": 1.2524547324148383, "grad_norm": 0.3844805657863617, "learning_rate": 0.002, "loss": 2.3368, "step": 323990 }, { "epoch": 1.2524933896182215, "grad_norm": 0.0988330990076065, "learning_rate": 0.002, "loss": 2.3192, "step": 324000 }, { "epoch": 1.2525320468216048, "grad_norm": 0.11373604089021683, "learning_rate": 0.002, "loss": 2.3492, "step": 324010 }, { "epoch": 1.252570704024988, "grad_norm": 0.10148126631975174, "learning_rate": 0.002, "loss": 2.3478, "step": 324020 }, { "epoch": 1.2526093612283713, "grad_norm": 0.09937749803066254, "learning_rate": 0.002, "loss": 2.3421, "step": 324030 }, { "epoch": 1.2526480184317546, "grad_norm": 0.11062469333410263, "learning_rate": 0.002, "loss": 2.3495, "step": 324040 }, { "epoch": 1.2526866756351378, "grad_norm": 0.09238646179437637, "learning_rate": 0.002, "loss": 2.3277, "step": 324050 }, { "epoch": 1.252725332838521, "grad_norm": 0.11890768259763718, "learning_rate": 0.002, "loss": 2.3414, "step": 324060 }, { "epoch": 1.2527639900419043, "grad_norm": 0.1008533462882042, "learning_rate": 0.002, "loss": 2.3289, "step": 324070 }, { "epoch": 1.2528026472452876, "grad_norm": 0.11144950240850449, "learning_rate": 0.002, "loss": 2.3587, "step": 324080 }, { "epoch": 1.252841304448671, "grad_norm": 0.09708874672651291, "learning_rate": 0.002, "loss": 2.3435, "step": 324090 }, { "epoch": 1.2528799616520543, "grad_norm": 0.10695436596870422, "learning_rate": 0.002, "loss": 2.3412, "step": 324100 }, { "epoch": 1.2529186188554375, "grad_norm": 0.09778022766113281, "learning_rate": 0.002, "loss": 2.3393, "step": 324110 }, { "epoch": 1.2529572760588208, "grad_norm": 0.09368152171373367, "learning_rate": 0.002, "loss": 2.3386, "step": 324120 }, { "epoch": 1.252995933262204, "grad_norm": 0.09820955246686935, "learning_rate": 0.002, "loss": 2.3468, "step": 324130 }, { "epoch": 1.2530345904655873, "grad_norm": 0.09251420944929123, "learning_rate": 0.002, "loss": 2.3428, "step": 324140 }, { "epoch": 1.2530732476689708, "grad_norm": 0.09938377887010574, "learning_rate": 0.002, "loss": 2.3212, "step": 324150 }, { "epoch": 1.253111904872354, "grad_norm": 0.11175873130559921, "learning_rate": 0.002, "loss": 2.3303, "step": 324160 }, { "epoch": 1.2531505620757373, "grad_norm": 0.1269582211971283, "learning_rate": 0.002, "loss": 2.3252, "step": 324170 }, { "epoch": 1.2531892192791205, "grad_norm": 0.1151537373661995, "learning_rate": 0.002, "loss": 2.3264, "step": 324180 }, { "epoch": 1.2532278764825038, "grad_norm": 0.10594908148050308, "learning_rate": 0.002, "loss": 2.3486, "step": 324190 }, { "epoch": 1.253266533685887, "grad_norm": 0.11179118603467941, "learning_rate": 0.002, "loss": 2.3521, "step": 324200 }, { "epoch": 1.2533051908892703, "grad_norm": 0.10564833134412766, "learning_rate": 0.002, "loss": 2.3271, "step": 324210 }, { "epoch": 1.2533438480926535, "grad_norm": 0.12424666434526443, "learning_rate": 0.002, "loss": 2.3352, "step": 324220 }, { "epoch": 1.2533825052960368, "grad_norm": 0.10340416431427002, "learning_rate": 0.002, "loss": 2.3292, "step": 324230 }, { "epoch": 1.25342116249942, "grad_norm": 0.10386926680803299, "learning_rate": 0.002, "loss": 2.3369, "step": 324240 }, { "epoch": 1.2534598197028033, "grad_norm": 0.3834894001483917, "learning_rate": 0.002, "loss": 2.3363, "step": 324250 }, { "epoch": 1.2534984769061868, "grad_norm": 0.11524228006601334, "learning_rate": 0.002, "loss": 2.34, "step": 324260 }, { "epoch": 1.25353713410957, "grad_norm": 0.12251781672239304, "learning_rate": 0.002, "loss": 2.3426, "step": 324270 }, { "epoch": 1.2535757913129533, "grad_norm": 0.10764903575181961, "learning_rate": 0.002, "loss": 2.3412, "step": 324280 }, { "epoch": 1.2536144485163365, "grad_norm": 0.09343133121728897, "learning_rate": 0.002, "loss": 2.3504, "step": 324290 }, { "epoch": 1.2536531057197198, "grad_norm": 0.11491622775793076, "learning_rate": 0.002, "loss": 2.3292, "step": 324300 }, { "epoch": 1.253691762923103, "grad_norm": 0.112935371696949, "learning_rate": 0.002, "loss": 2.349, "step": 324310 }, { "epoch": 1.2537304201264865, "grad_norm": 0.09517694264650345, "learning_rate": 0.002, "loss": 2.3216, "step": 324320 }, { "epoch": 1.2537690773298698, "grad_norm": 0.10657618939876556, "learning_rate": 0.002, "loss": 2.3424, "step": 324330 }, { "epoch": 1.253807734533253, "grad_norm": 0.10911275446414948, "learning_rate": 0.002, "loss": 2.3261, "step": 324340 }, { "epoch": 1.2538463917366363, "grad_norm": 0.10177423059940338, "learning_rate": 0.002, "loss": 2.3433, "step": 324350 }, { "epoch": 1.2538850489400195, "grad_norm": 0.09765344858169556, "learning_rate": 0.002, "loss": 2.3351, "step": 324360 }, { "epoch": 1.2539237061434028, "grad_norm": 0.10047116875648499, "learning_rate": 0.002, "loss": 2.3417, "step": 324370 }, { "epoch": 1.253962363346786, "grad_norm": 0.10966706275939941, "learning_rate": 0.002, "loss": 2.3328, "step": 324380 }, { "epoch": 1.2540010205501693, "grad_norm": 0.11583580821752548, "learning_rate": 0.002, "loss": 2.3323, "step": 324390 }, { "epoch": 1.2540396777535525, "grad_norm": 0.09870652109384537, "learning_rate": 0.002, "loss": 2.3253, "step": 324400 }, { "epoch": 1.2540783349569358, "grad_norm": 0.08850183337926865, "learning_rate": 0.002, "loss": 2.3279, "step": 324410 }, { "epoch": 1.254116992160319, "grad_norm": 0.09612442553043365, "learning_rate": 0.002, "loss": 2.3392, "step": 324420 }, { "epoch": 1.2541556493637025, "grad_norm": 0.12101586163043976, "learning_rate": 0.002, "loss": 2.3296, "step": 324430 }, { "epoch": 1.2541943065670857, "grad_norm": 0.10274821519851685, "learning_rate": 0.002, "loss": 2.3227, "step": 324440 }, { "epoch": 1.254232963770469, "grad_norm": 0.09802849590778351, "learning_rate": 0.002, "loss": 2.3401, "step": 324450 }, { "epoch": 1.2542716209738523, "grad_norm": 0.09413763135671616, "learning_rate": 0.002, "loss": 2.3365, "step": 324460 }, { "epoch": 1.2543102781772355, "grad_norm": 0.0963965356349945, "learning_rate": 0.002, "loss": 2.33, "step": 324470 }, { "epoch": 1.2543489353806188, "grad_norm": 0.14099185168743134, "learning_rate": 0.002, "loss": 2.3349, "step": 324480 }, { "epoch": 1.2543875925840022, "grad_norm": 0.09496639668941498, "learning_rate": 0.002, "loss": 2.332, "step": 324490 }, { "epoch": 1.2544262497873855, "grad_norm": 0.09903492778539658, "learning_rate": 0.002, "loss": 2.3291, "step": 324500 }, { "epoch": 1.2544649069907687, "grad_norm": 0.11639129370450974, "learning_rate": 0.002, "loss": 2.3391, "step": 324510 }, { "epoch": 1.254503564194152, "grad_norm": 0.1040710061788559, "learning_rate": 0.002, "loss": 2.3408, "step": 324520 }, { "epoch": 1.2545422213975352, "grad_norm": 0.09413284063339233, "learning_rate": 0.002, "loss": 2.3433, "step": 324530 }, { "epoch": 1.2545808786009185, "grad_norm": 0.09563596546649933, "learning_rate": 0.002, "loss": 2.3364, "step": 324540 }, { "epoch": 1.2546195358043017, "grad_norm": 0.10063523054122925, "learning_rate": 0.002, "loss": 2.3354, "step": 324550 }, { "epoch": 1.254658193007685, "grad_norm": 0.1292758584022522, "learning_rate": 0.002, "loss": 2.329, "step": 324560 }, { "epoch": 1.2546968502110682, "grad_norm": 0.12233975529670715, "learning_rate": 0.002, "loss": 2.3541, "step": 324570 }, { "epoch": 1.2547355074144515, "grad_norm": 0.10087815672159195, "learning_rate": 0.002, "loss": 2.3486, "step": 324580 }, { "epoch": 1.2547741646178348, "grad_norm": 0.09283623099327087, "learning_rate": 0.002, "loss": 2.3445, "step": 324590 }, { "epoch": 1.2548128218212182, "grad_norm": 0.11724922806024551, "learning_rate": 0.002, "loss": 2.3365, "step": 324600 }, { "epoch": 1.2548514790246015, "grad_norm": 0.0939897671341896, "learning_rate": 0.002, "loss": 2.3358, "step": 324610 }, { "epoch": 1.2548901362279847, "grad_norm": 0.09834831953048706, "learning_rate": 0.002, "loss": 2.3414, "step": 324620 }, { "epoch": 1.254928793431368, "grad_norm": 0.10058873146772385, "learning_rate": 0.002, "loss": 2.3382, "step": 324630 }, { "epoch": 1.2549674506347512, "grad_norm": 0.11193490773439407, "learning_rate": 0.002, "loss": 2.3201, "step": 324640 }, { "epoch": 1.2550061078381345, "grad_norm": 0.1047125980257988, "learning_rate": 0.002, "loss": 2.3414, "step": 324650 }, { "epoch": 1.255044765041518, "grad_norm": 0.12173046916723251, "learning_rate": 0.002, "loss": 2.3196, "step": 324660 }, { "epoch": 1.2550834222449012, "grad_norm": 0.09190022200345993, "learning_rate": 0.002, "loss": 2.3439, "step": 324670 }, { "epoch": 1.2551220794482845, "grad_norm": 0.09540484100580215, "learning_rate": 0.002, "loss": 2.3307, "step": 324680 }, { "epoch": 1.2551607366516677, "grad_norm": 0.10608922690153122, "learning_rate": 0.002, "loss": 2.3322, "step": 324690 }, { "epoch": 1.255199393855051, "grad_norm": 0.10679183155298233, "learning_rate": 0.002, "loss": 2.3318, "step": 324700 }, { "epoch": 1.2552380510584342, "grad_norm": 0.10198316723108292, "learning_rate": 0.002, "loss": 2.3308, "step": 324710 }, { "epoch": 1.2552767082618175, "grad_norm": 0.09413036704063416, "learning_rate": 0.002, "loss": 2.3199, "step": 324720 }, { "epoch": 1.2553153654652007, "grad_norm": 0.09942302852869034, "learning_rate": 0.002, "loss": 2.3366, "step": 324730 }, { "epoch": 1.255354022668584, "grad_norm": 0.10988292098045349, "learning_rate": 0.002, "loss": 2.3295, "step": 324740 }, { "epoch": 1.2553926798719672, "grad_norm": 0.10978883504867554, "learning_rate": 0.002, "loss": 2.3343, "step": 324750 }, { "epoch": 1.2554313370753505, "grad_norm": 0.09921663254499435, "learning_rate": 0.002, "loss": 2.3302, "step": 324760 }, { "epoch": 1.255469994278734, "grad_norm": 0.10996361821889877, "learning_rate": 0.002, "loss": 2.3259, "step": 324770 }, { "epoch": 1.2555086514821172, "grad_norm": 0.10230149328708649, "learning_rate": 0.002, "loss": 2.3208, "step": 324780 }, { "epoch": 1.2555473086855005, "grad_norm": 0.11551018804311752, "learning_rate": 0.002, "loss": 2.3332, "step": 324790 }, { "epoch": 1.2555859658888837, "grad_norm": 0.10201035439968109, "learning_rate": 0.002, "loss": 2.3208, "step": 324800 }, { "epoch": 1.255624623092267, "grad_norm": 0.09801744669675827, "learning_rate": 0.002, "loss": 2.3211, "step": 324810 }, { "epoch": 1.2556632802956502, "grad_norm": 0.0981050655245781, "learning_rate": 0.002, "loss": 2.3212, "step": 324820 }, { "epoch": 1.2557019374990337, "grad_norm": 0.11296948045492172, "learning_rate": 0.002, "loss": 2.332, "step": 324830 }, { "epoch": 1.255740594702417, "grad_norm": 0.11516579985618591, "learning_rate": 0.002, "loss": 2.3217, "step": 324840 }, { "epoch": 1.2557792519058002, "grad_norm": 0.10614938288927078, "learning_rate": 0.002, "loss": 2.3296, "step": 324850 }, { "epoch": 1.2558179091091835, "grad_norm": 0.09583014249801636, "learning_rate": 0.002, "loss": 2.3336, "step": 324860 }, { "epoch": 1.2558565663125667, "grad_norm": 0.10878030955791473, "learning_rate": 0.002, "loss": 2.3331, "step": 324870 }, { "epoch": 1.25589522351595, "grad_norm": 0.09600366652011871, "learning_rate": 0.002, "loss": 2.3266, "step": 324880 }, { "epoch": 1.2559338807193332, "grad_norm": 0.10757233202457428, "learning_rate": 0.002, "loss": 2.3335, "step": 324890 }, { "epoch": 1.2559725379227165, "grad_norm": 0.11732026934623718, "learning_rate": 0.002, "loss": 2.3424, "step": 324900 }, { "epoch": 1.2560111951260997, "grad_norm": 0.09119915962219238, "learning_rate": 0.002, "loss": 2.3236, "step": 324910 }, { "epoch": 1.256049852329483, "grad_norm": 0.10209295153617859, "learning_rate": 0.002, "loss": 2.3207, "step": 324920 }, { "epoch": 1.2560885095328664, "grad_norm": 0.1162097305059433, "learning_rate": 0.002, "loss": 2.3387, "step": 324930 }, { "epoch": 1.2561271667362497, "grad_norm": 0.1079174280166626, "learning_rate": 0.002, "loss": 2.3507, "step": 324940 }, { "epoch": 1.256165823939633, "grad_norm": 0.12759262323379517, "learning_rate": 0.002, "loss": 2.3173, "step": 324950 }, { "epoch": 1.2562044811430162, "grad_norm": 0.10023845732212067, "learning_rate": 0.002, "loss": 2.3345, "step": 324960 }, { "epoch": 1.2562431383463994, "grad_norm": 0.10516351461410522, "learning_rate": 0.002, "loss": 2.3306, "step": 324970 }, { "epoch": 1.2562817955497827, "grad_norm": 0.11115523427724838, "learning_rate": 0.002, "loss": 2.3434, "step": 324980 }, { "epoch": 1.256320452753166, "grad_norm": 0.11688680946826935, "learning_rate": 0.002, "loss": 2.3419, "step": 324990 }, { "epoch": 1.2563591099565494, "grad_norm": 0.09157036989927292, "learning_rate": 0.002, "loss": 2.3495, "step": 325000 }, { "epoch": 1.2563977671599327, "grad_norm": 0.09125621616840363, "learning_rate": 0.002, "loss": 2.3307, "step": 325010 }, { "epoch": 1.256436424363316, "grad_norm": 0.1386650800704956, "learning_rate": 0.002, "loss": 2.3484, "step": 325020 }, { "epoch": 1.2564750815666992, "grad_norm": 0.10481204837560654, "learning_rate": 0.002, "loss": 2.3345, "step": 325030 }, { "epoch": 1.2565137387700824, "grad_norm": 0.11478083580732346, "learning_rate": 0.002, "loss": 2.3292, "step": 325040 }, { "epoch": 1.2565523959734657, "grad_norm": 0.09643019735813141, "learning_rate": 0.002, "loss": 2.3215, "step": 325050 }, { "epoch": 1.256591053176849, "grad_norm": 0.11193980276584625, "learning_rate": 0.002, "loss": 2.3254, "step": 325060 }, { "epoch": 1.2566297103802322, "grad_norm": 0.10178869217634201, "learning_rate": 0.002, "loss": 2.3346, "step": 325070 }, { "epoch": 1.2566683675836154, "grad_norm": 0.13125890493392944, "learning_rate": 0.002, "loss": 2.336, "step": 325080 }, { "epoch": 1.2567070247869987, "grad_norm": 0.09475627541542053, "learning_rate": 0.002, "loss": 2.337, "step": 325090 }, { "epoch": 1.2567456819903822, "grad_norm": 0.0955866202712059, "learning_rate": 0.002, "loss": 2.3402, "step": 325100 }, { "epoch": 1.2567843391937654, "grad_norm": 0.10707160085439682, "learning_rate": 0.002, "loss": 2.331, "step": 325110 }, { "epoch": 1.2568229963971487, "grad_norm": 0.09212376177310944, "learning_rate": 0.002, "loss": 2.3488, "step": 325120 }, { "epoch": 1.256861653600532, "grad_norm": 0.12452114373445511, "learning_rate": 0.002, "loss": 2.3365, "step": 325130 }, { "epoch": 1.2569003108039152, "grad_norm": 0.12266793102025986, "learning_rate": 0.002, "loss": 2.349, "step": 325140 }, { "epoch": 1.2569389680072984, "grad_norm": 0.1223602294921875, "learning_rate": 0.002, "loss": 2.3381, "step": 325150 }, { "epoch": 1.2569776252106817, "grad_norm": 0.1065744161605835, "learning_rate": 0.002, "loss": 2.3145, "step": 325160 }, { "epoch": 1.2570162824140652, "grad_norm": 0.09754288196563721, "learning_rate": 0.002, "loss": 2.3206, "step": 325170 }, { "epoch": 1.2570549396174484, "grad_norm": 0.1234099268913269, "learning_rate": 0.002, "loss": 2.3288, "step": 325180 }, { "epoch": 1.2570935968208317, "grad_norm": 0.1373051553964615, "learning_rate": 0.002, "loss": 2.3424, "step": 325190 }, { "epoch": 1.257132254024215, "grad_norm": 0.09725300222635269, "learning_rate": 0.002, "loss": 2.3424, "step": 325200 }, { "epoch": 1.2571709112275982, "grad_norm": 0.10427434742450714, "learning_rate": 0.002, "loss": 2.3407, "step": 325210 }, { "epoch": 1.2572095684309814, "grad_norm": 0.1223810687661171, "learning_rate": 0.002, "loss": 2.3328, "step": 325220 }, { "epoch": 1.2572482256343647, "grad_norm": 0.09645023941993713, "learning_rate": 0.002, "loss": 2.3338, "step": 325230 }, { "epoch": 1.257286882837748, "grad_norm": 0.08541225641965866, "learning_rate": 0.002, "loss": 2.3462, "step": 325240 }, { "epoch": 1.2573255400411312, "grad_norm": 0.09978261590003967, "learning_rate": 0.002, "loss": 2.3331, "step": 325250 }, { "epoch": 1.2573641972445144, "grad_norm": 0.11347880959510803, "learning_rate": 0.002, "loss": 2.3515, "step": 325260 }, { "epoch": 1.257402854447898, "grad_norm": 0.11747587472200394, "learning_rate": 0.002, "loss": 2.3318, "step": 325270 }, { "epoch": 1.2574415116512812, "grad_norm": 0.1173802837729454, "learning_rate": 0.002, "loss": 2.3354, "step": 325280 }, { "epoch": 1.2574801688546644, "grad_norm": 0.10309915244579315, "learning_rate": 0.002, "loss": 2.3306, "step": 325290 }, { "epoch": 1.2575188260580477, "grad_norm": 0.0944056510925293, "learning_rate": 0.002, "loss": 2.3289, "step": 325300 }, { "epoch": 1.257557483261431, "grad_norm": 0.1091337502002716, "learning_rate": 0.002, "loss": 2.3315, "step": 325310 }, { "epoch": 1.2575961404648142, "grad_norm": 0.11433182656764984, "learning_rate": 0.002, "loss": 2.3431, "step": 325320 }, { "epoch": 1.2576347976681974, "grad_norm": 0.13041917979717255, "learning_rate": 0.002, "loss": 2.3342, "step": 325330 }, { "epoch": 1.2576734548715809, "grad_norm": 0.1184714138507843, "learning_rate": 0.002, "loss": 2.3365, "step": 325340 }, { "epoch": 1.2577121120749641, "grad_norm": 0.0960577055811882, "learning_rate": 0.002, "loss": 2.3324, "step": 325350 }, { "epoch": 1.2577507692783474, "grad_norm": 0.09737391024827957, "learning_rate": 0.002, "loss": 2.322, "step": 325360 }, { "epoch": 1.2577894264817306, "grad_norm": 0.11727464944124222, "learning_rate": 0.002, "loss": 2.3344, "step": 325370 }, { "epoch": 1.257828083685114, "grad_norm": 0.1169314980506897, "learning_rate": 0.002, "loss": 2.3309, "step": 325380 }, { "epoch": 1.2578667408884971, "grad_norm": 0.09046033769845963, "learning_rate": 0.002, "loss": 2.3312, "step": 325390 }, { "epoch": 1.2579053980918804, "grad_norm": 0.09294285625219345, "learning_rate": 0.002, "loss": 2.3245, "step": 325400 }, { "epoch": 1.2579440552952637, "grad_norm": 0.1019824743270874, "learning_rate": 0.002, "loss": 2.3294, "step": 325410 }, { "epoch": 1.257982712498647, "grad_norm": 0.11393209546804428, "learning_rate": 0.002, "loss": 2.3441, "step": 325420 }, { "epoch": 1.2580213697020302, "grad_norm": 0.10201390832662582, "learning_rate": 0.002, "loss": 2.3194, "step": 325430 }, { "epoch": 1.2580600269054136, "grad_norm": 0.09985291212797165, "learning_rate": 0.002, "loss": 2.3173, "step": 325440 }, { "epoch": 1.2580986841087969, "grad_norm": 0.10562863200902939, "learning_rate": 0.002, "loss": 2.3168, "step": 325450 }, { "epoch": 1.2581373413121801, "grad_norm": 0.09784512966871262, "learning_rate": 0.002, "loss": 2.3448, "step": 325460 }, { "epoch": 1.2581759985155634, "grad_norm": 0.10671749711036682, "learning_rate": 0.002, "loss": 2.3393, "step": 325470 }, { "epoch": 1.2582146557189466, "grad_norm": 0.12761123478412628, "learning_rate": 0.002, "loss": 2.3296, "step": 325480 }, { "epoch": 1.25825331292233, "grad_norm": 0.09099403768777847, "learning_rate": 0.002, "loss": 2.3164, "step": 325490 }, { "epoch": 1.2582919701257131, "grad_norm": 0.0952981561422348, "learning_rate": 0.002, "loss": 2.3284, "step": 325500 }, { "epoch": 1.2583306273290966, "grad_norm": 0.12638869881629944, "learning_rate": 0.002, "loss": 2.3417, "step": 325510 }, { "epoch": 1.2583692845324799, "grad_norm": 0.09895528107881546, "learning_rate": 0.002, "loss": 2.3365, "step": 325520 }, { "epoch": 1.2584079417358631, "grad_norm": 0.10875288397073746, "learning_rate": 0.002, "loss": 2.3365, "step": 325530 }, { "epoch": 1.2584465989392464, "grad_norm": 0.10601435601711273, "learning_rate": 0.002, "loss": 2.3195, "step": 325540 }, { "epoch": 1.2584852561426296, "grad_norm": 0.09812024980783463, "learning_rate": 0.002, "loss": 2.3279, "step": 325550 }, { "epoch": 1.2585239133460129, "grad_norm": 0.10472586005926132, "learning_rate": 0.002, "loss": 2.3194, "step": 325560 }, { "epoch": 1.2585625705493961, "grad_norm": 0.09921694546937943, "learning_rate": 0.002, "loss": 2.3445, "step": 325570 }, { "epoch": 1.2586012277527794, "grad_norm": 0.10477118194103241, "learning_rate": 0.002, "loss": 2.3238, "step": 325580 }, { "epoch": 1.2586398849561626, "grad_norm": 0.09114798158407211, "learning_rate": 0.002, "loss": 2.3455, "step": 325590 }, { "epoch": 1.2586785421595459, "grad_norm": 0.0994727835059166, "learning_rate": 0.002, "loss": 2.3254, "step": 325600 }, { "epoch": 1.2587171993629294, "grad_norm": 0.09729986637830734, "learning_rate": 0.002, "loss": 2.3178, "step": 325610 }, { "epoch": 1.2587558565663126, "grad_norm": 0.1109003946185112, "learning_rate": 0.002, "loss": 2.3484, "step": 325620 }, { "epoch": 1.2587945137696959, "grad_norm": 0.09484990686178207, "learning_rate": 0.002, "loss": 2.349, "step": 325630 }, { "epoch": 1.2588331709730791, "grad_norm": 0.11547428369522095, "learning_rate": 0.002, "loss": 2.3236, "step": 325640 }, { "epoch": 1.2588718281764624, "grad_norm": 0.11740986257791519, "learning_rate": 0.002, "loss": 2.3432, "step": 325650 }, { "epoch": 1.2589104853798456, "grad_norm": 0.10123540461063385, "learning_rate": 0.002, "loss": 2.3243, "step": 325660 }, { "epoch": 1.258949142583229, "grad_norm": 0.1007891520857811, "learning_rate": 0.002, "loss": 2.3374, "step": 325670 }, { "epoch": 1.2589877997866123, "grad_norm": 0.1192469522356987, "learning_rate": 0.002, "loss": 2.3205, "step": 325680 }, { "epoch": 1.2590264569899956, "grad_norm": 0.10603582859039307, "learning_rate": 0.002, "loss": 2.3323, "step": 325690 }, { "epoch": 1.2590651141933789, "grad_norm": 0.10165327042341232, "learning_rate": 0.002, "loss": 2.3174, "step": 325700 }, { "epoch": 1.259103771396762, "grad_norm": 0.11069201678037643, "learning_rate": 0.002, "loss": 2.3397, "step": 325710 }, { "epoch": 1.2591424286001454, "grad_norm": 0.10442770272493362, "learning_rate": 0.002, "loss": 2.3443, "step": 325720 }, { "epoch": 1.2591810858035286, "grad_norm": 0.10615067183971405, "learning_rate": 0.002, "loss": 2.3255, "step": 325730 }, { "epoch": 1.2592197430069119, "grad_norm": 0.09877041727304459, "learning_rate": 0.002, "loss": 2.3155, "step": 325740 }, { "epoch": 1.2592584002102951, "grad_norm": 0.1348433494567871, "learning_rate": 0.002, "loss": 2.3338, "step": 325750 }, { "epoch": 1.2592970574136784, "grad_norm": 0.12218829244375229, "learning_rate": 0.002, "loss": 2.3267, "step": 325760 }, { "epoch": 1.2593357146170616, "grad_norm": 0.10086431354284286, "learning_rate": 0.002, "loss": 2.3406, "step": 325770 }, { "epoch": 1.259374371820445, "grad_norm": 0.11661353707313538, "learning_rate": 0.002, "loss": 2.3208, "step": 325780 }, { "epoch": 1.2594130290238283, "grad_norm": 0.08707497268915176, "learning_rate": 0.002, "loss": 2.3397, "step": 325790 }, { "epoch": 1.2594516862272116, "grad_norm": 0.1499214470386505, "learning_rate": 0.002, "loss": 2.3372, "step": 325800 }, { "epoch": 1.2594903434305948, "grad_norm": 0.10032685846090317, "learning_rate": 0.002, "loss": 2.3234, "step": 325810 }, { "epoch": 1.259529000633978, "grad_norm": 0.10852710157632828, "learning_rate": 0.002, "loss": 2.3307, "step": 325820 }, { "epoch": 1.2595676578373614, "grad_norm": 0.11643079668283463, "learning_rate": 0.002, "loss": 2.3227, "step": 325830 }, { "epoch": 1.2596063150407448, "grad_norm": 0.11143416911363602, "learning_rate": 0.002, "loss": 2.324, "step": 325840 }, { "epoch": 1.259644972244128, "grad_norm": 0.10897476971149445, "learning_rate": 0.002, "loss": 2.3391, "step": 325850 }, { "epoch": 1.2596836294475113, "grad_norm": 0.09578459709882736, "learning_rate": 0.002, "loss": 2.3336, "step": 325860 }, { "epoch": 1.2597222866508946, "grad_norm": 0.10422952473163605, "learning_rate": 0.002, "loss": 2.3379, "step": 325870 }, { "epoch": 1.2597609438542778, "grad_norm": 0.10057570785284042, "learning_rate": 0.002, "loss": 2.3365, "step": 325880 }, { "epoch": 1.259799601057661, "grad_norm": 0.10279736667871475, "learning_rate": 0.002, "loss": 2.3296, "step": 325890 }, { "epoch": 1.2598382582610443, "grad_norm": 0.10968570411205292, "learning_rate": 0.002, "loss": 2.3373, "step": 325900 }, { "epoch": 1.2598769154644276, "grad_norm": 0.09762471914291382, "learning_rate": 0.002, "loss": 2.3134, "step": 325910 }, { "epoch": 1.2599155726678108, "grad_norm": 0.1026439517736435, "learning_rate": 0.002, "loss": 2.3268, "step": 325920 }, { "epoch": 1.259954229871194, "grad_norm": 0.10050418972969055, "learning_rate": 0.002, "loss": 2.3247, "step": 325930 }, { "epoch": 1.2599928870745774, "grad_norm": 0.10055512189865112, "learning_rate": 0.002, "loss": 2.3482, "step": 325940 }, { "epoch": 1.2600315442779608, "grad_norm": 0.11480934917926788, "learning_rate": 0.002, "loss": 2.3283, "step": 325950 }, { "epoch": 1.260070201481344, "grad_norm": 0.12158878892660141, "learning_rate": 0.002, "loss": 2.334, "step": 325960 }, { "epoch": 1.2601088586847273, "grad_norm": 0.11016429215669632, "learning_rate": 0.002, "loss": 2.3341, "step": 325970 }, { "epoch": 1.2601475158881106, "grad_norm": 0.1013168916106224, "learning_rate": 0.002, "loss": 2.3297, "step": 325980 }, { "epoch": 1.2601861730914938, "grad_norm": 0.10989508777856827, "learning_rate": 0.002, "loss": 2.3289, "step": 325990 }, { "epoch": 1.260224830294877, "grad_norm": 0.10646646469831467, "learning_rate": 0.002, "loss": 2.3263, "step": 326000 }, { "epoch": 1.2602634874982606, "grad_norm": 0.09758585691452026, "learning_rate": 0.002, "loss": 2.3382, "step": 326010 }, { "epoch": 1.2603021447016438, "grad_norm": 0.11484365910291672, "learning_rate": 0.002, "loss": 2.3423, "step": 326020 }, { "epoch": 1.260340801905027, "grad_norm": 0.10497698932886124, "learning_rate": 0.002, "loss": 2.3477, "step": 326030 }, { "epoch": 1.2603794591084103, "grad_norm": 0.11317858844995499, "learning_rate": 0.002, "loss": 2.3357, "step": 326040 }, { "epoch": 1.2604181163117936, "grad_norm": 0.08910156786441803, "learning_rate": 0.002, "loss": 2.3359, "step": 326050 }, { "epoch": 1.2604567735151768, "grad_norm": 0.10349009931087494, "learning_rate": 0.002, "loss": 2.3368, "step": 326060 }, { "epoch": 1.26049543071856, "grad_norm": 0.1077611893415451, "learning_rate": 0.002, "loss": 2.3178, "step": 326070 }, { "epoch": 1.2605340879219433, "grad_norm": 0.10131070762872696, "learning_rate": 0.002, "loss": 2.3367, "step": 326080 }, { "epoch": 1.2605727451253266, "grad_norm": 0.1114729791879654, "learning_rate": 0.002, "loss": 2.3264, "step": 326090 }, { "epoch": 1.2606114023287098, "grad_norm": 0.10723952203989029, "learning_rate": 0.002, "loss": 2.3386, "step": 326100 }, { "epoch": 1.260650059532093, "grad_norm": 0.10471288859844208, "learning_rate": 0.002, "loss": 2.3322, "step": 326110 }, { "epoch": 1.2606887167354766, "grad_norm": 0.1356627196073532, "learning_rate": 0.002, "loss": 2.3279, "step": 326120 }, { "epoch": 1.2607273739388598, "grad_norm": 0.10309799760580063, "learning_rate": 0.002, "loss": 2.3252, "step": 326130 }, { "epoch": 1.260766031142243, "grad_norm": 0.10922736674547195, "learning_rate": 0.002, "loss": 2.3338, "step": 326140 }, { "epoch": 1.2608046883456263, "grad_norm": 0.09780388325452805, "learning_rate": 0.002, "loss": 2.3348, "step": 326150 }, { "epoch": 1.2608433455490096, "grad_norm": 0.10111238062381744, "learning_rate": 0.002, "loss": 2.3498, "step": 326160 }, { "epoch": 1.2608820027523928, "grad_norm": 0.11071167141199112, "learning_rate": 0.002, "loss": 2.3283, "step": 326170 }, { "epoch": 1.2609206599557763, "grad_norm": 0.10419418662786484, "learning_rate": 0.002, "loss": 2.3665, "step": 326180 }, { "epoch": 1.2609593171591595, "grad_norm": 0.13271836936473846, "learning_rate": 0.002, "loss": 2.3339, "step": 326190 }, { "epoch": 1.2609979743625428, "grad_norm": 0.10130574554204941, "learning_rate": 0.002, "loss": 2.3417, "step": 326200 }, { "epoch": 1.261036631565926, "grad_norm": 0.11065841466188431, "learning_rate": 0.002, "loss": 2.34, "step": 326210 }, { "epoch": 1.2610752887693093, "grad_norm": 0.10128871351480484, "learning_rate": 0.002, "loss": 2.3284, "step": 326220 }, { "epoch": 1.2611139459726926, "grad_norm": 0.11795809119939804, "learning_rate": 0.002, "loss": 2.3488, "step": 326230 }, { "epoch": 1.2611526031760758, "grad_norm": 0.10937850177288055, "learning_rate": 0.002, "loss": 2.3403, "step": 326240 }, { "epoch": 1.261191260379459, "grad_norm": 0.10301138460636139, "learning_rate": 0.002, "loss": 2.3252, "step": 326250 }, { "epoch": 1.2612299175828423, "grad_norm": 0.1003199890255928, "learning_rate": 0.002, "loss": 2.3211, "step": 326260 }, { "epoch": 1.2612685747862256, "grad_norm": 0.09486802667379379, "learning_rate": 0.002, "loss": 2.3182, "step": 326270 }, { "epoch": 1.2613072319896088, "grad_norm": 0.12239663302898407, "learning_rate": 0.002, "loss": 2.335, "step": 326280 }, { "epoch": 1.2613458891929923, "grad_norm": 0.10332158952951431, "learning_rate": 0.002, "loss": 2.3368, "step": 326290 }, { "epoch": 1.2613845463963755, "grad_norm": 0.10419623553752899, "learning_rate": 0.002, "loss": 2.3232, "step": 326300 }, { "epoch": 1.2614232035997588, "grad_norm": 0.08871430903673172, "learning_rate": 0.002, "loss": 2.345, "step": 326310 }, { "epoch": 1.261461860803142, "grad_norm": 0.13712400197982788, "learning_rate": 0.002, "loss": 2.3249, "step": 326320 }, { "epoch": 1.2615005180065253, "grad_norm": 0.09635666012763977, "learning_rate": 0.002, "loss": 2.3317, "step": 326330 }, { "epoch": 1.2615391752099085, "grad_norm": 0.0998094230890274, "learning_rate": 0.002, "loss": 2.3479, "step": 326340 }, { "epoch": 1.261577832413292, "grad_norm": 0.21075473725795746, "learning_rate": 0.002, "loss": 2.3344, "step": 326350 }, { "epoch": 1.2616164896166753, "grad_norm": 0.1039869412779808, "learning_rate": 0.002, "loss": 2.3382, "step": 326360 }, { "epoch": 1.2616551468200585, "grad_norm": 0.0978020578622818, "learning_rate": 0.002, "loss": 2.3292, "step": 326370 }, { "epoch": 1.2616938040234418, "grad_norm": 0.10820144414901733, "learning_rate": 0.002, "loss": 2.338, "step": 326380 }, { "epoch": 1.261732461226825, "grad_norm": 0.11902282387018204, "learning_rate": 0.002, "loss": 2.3437, "step": 326390 }, { "epoch": 1.2617711184302083, "grad_norm": 0.09906861186027527, "learning_rate": 0.002, "loss": 2.3402, "step": 326400 }, { "epoch": 1.2618097756335915, "grad_norm": 0.09596288204193115, "learning_rate": 0.002, "loss": 2.3312, "step": 326410 }, { "epoch": 1.2618484328369748, "grad_norm": 0.12837019562721252, "learning_rate": 0.002, "loss": 2.339, "step": 326420 }, { "epoch": 1.261887090040358, "grad_norm": 0.10290003567934036, "learning_rate": 0.002, "loss": 2.3343, "step": 326430 }, { "epoch": 1.2619257472437413, "grad_norm": 0.11014819890260696, "learning_rate": 0.002, "loss": 2.3285, "step": 326440 }, { "epoch": 1.2619644044471245, "grad_norm": 0.11158356070518494, "learning_rate": 0.002, "loss": 2.332, "step": 326450 }, { "epoch": 1.262003061650508, "grad_norm": 0.09634177386760712, "learning_rate": 0.002, "loss": 2.3379, "step": 326460 }, { "epoch": 1.2620417188538913, "grad_norm": 0.10639488697052002, "learning_rate": 0.002, "loss": 2.3246, "step": 326470 }, { "epoch": 1.2620803760572745, "grad_norm": 0.11883672326803207, "learning_rate": 0.002, "loss": 2.3302, "step": 326480 }, { "epoch": 1.2621190332606578, "grad_norm": 0.13678741455078125, "learning_rate": 0.002, "loss": 2.3371, "step": 326490 }, { "epoch": 1.262157690464041, "grad_norm": 0.10750120133161545, "learning_rate": 0.002, "loss": 2.3292, "step": 326500 }, { "epoch": 1.2621963476674243, "grad_norm": 0.16615253686904907, "learning_rate": 0.002, "loss": 2.3447, "step": 326510 }, { "epoch": 1.2622350048708078, "grad_norm": 0.11281480640172958, "learning_rate": 0.002, "loss": 2.3259, "step": 326520 }, { "epoch": 1.262273662074191, "grad_norm": 0.1095518171787262, "learning_rate": 0.002, "loss": 2.3422, "step": 326530 }, { "epoch": 1.2623123192775743, "grad_norm": 0.10316134244203568, "learning_rate": 0.002, "loss": 2.3212, "step": 326540 }, { "epoch": 1.2623509764809575, "grad_norm": 0.09717752784490585, "learning_rate": 0.002, "loss": 2.3333, "step": 326550 }, { "epoch": 1.2623896336843408, "grad_norm": 0.11651014536619186, "learning_rate": 0.002, "loss": 2.345, "step": 326560 }, { "epoch": 1.262428290887724, "grad_norm": 0.11065381020307541, "learning_rate": 0.002, "loss": 2.3357, "step": 326570 }, { "epoch": 1.2624669480911073, "grad_norm": 0.1010802835226059, "learning_rate": 0.002, "loss": 2.3418, "step": 326580 }, { "epoch": 1.2625056052944905, "grad_norm": 0.09426004439592361, "learning_rate": 0.002, "loss": 2.3414, "step": 326590 }, { "epoch": 1.2625442624978738, "grad_norm": 0.16192200779914856, "learning_rate": 0.002, "loss": 2.3411, "step": 326600 }, { "epoch": 1.262582919701257, "grad_norm": 0.09397521615028381, "learning_rate": 0.002, "loss": 2.3429, "step": 326610 }, { "epoch": 1.2626215769046403, "grad_norm": 0.13081824779510498, "learning_rate": 0.002, "loss": 2.342, "step": 326620 }, { "epoch": 1.2626602341080237, "grad_norm": 0.09981647878885269, "learning_rate": 0.002, "loss": 2.3335, "step": 326630 }, { "epoch": 1.262698891311407, "grad_norm": 0.12131404131650925, "learning_rate": 0.002, "loss": 2.3399, "step": 326640 }, { "epoch": 1.2627375485147903, "grad_norm": 0.11333523690700531, "learning_rate": 0.002, "loss": 2.326, "step": 326650 }, { "epoch": 1.2627762057181735, "grad_norm": 0.10390151292085648, "learning_rate": 0.002, "loss": 2.3363, "step": 326660 }, { "epoch": 1.2628148629215568, "grad_norm": 0.10886949300765991, "learning_rate": 0.002, "loss": 2.329, "step": 326670 }, { "epoch": 1.26285352012494, "grad_norm": 0.10427232086658478, "learning_rate": 0.002, "loss": 2.3501, "step": 326680 }, { "epoch": 1.2628921773283235, "grad_norm": 0.10599779337644577, "learning_rate": 0.002, "loss": 2.3293, "step": 326690 }, { "epoch": 1.2629308345317067, "grad_norm": 0.10859518498182297, "learning_rate": 0.002, "loss": 2.3345, "step": 326700 }, { "epoch": 1.26296949173509, "grad_norm": 0.09477894753217697, "learning_rate": 0.002, "loss": 2.3495, "step": 326710 }, { "epoch": 1.2630081489384732, "grad_norm": 0.11893752217292786, "learning_rate": 0.002, "loss": 2.3346, "step": 326720 }, { "epoch": 1.2630468061418565, "grad_norm": 0.10002786666154861, "learning_rate": 0.002, "loss": 2.3356, "step": 326730 }, { "epoch": 1.2630854633452397, "grad_norm": 0.110787034034729, "learning_rate": 0.002, "loss": 2.3244, "step": 326740 }, { "epoch": 1.263124120548623, "grad_norm": 0.10369330644607544, "learning_rate": 0.002, "loss": 2.3325, "step": 326750 }, { "epoch": 1.2631627777520062, "grad_norm": 0.09941741079092026, "learning_rate": 0.002, "loss": 2.3263, "step": 326760 }, { "epoch": 1.2632014349553895, "grad_norm": 0.11788026988506317, "learning_rate": 0.002, "loss": 2.3348, "step": 326770 }, { "epoch": 1.2632400921587728, "grad_norm": 0.10224135965108871, "learning_rate": 0.002, "loss": 2.3288, "step": 326780 }, { "epoch": 1.263278749362156, "grad_norm": 0.10844755917787552, "learning_rate": 0.002, "loss": 2.3355, "step": 326790 }, { "epoch": 1.2633174065655395, "grad_norm": 0.12195967137813568, "learning_rate": 0.002, "loss": 2.346, "step": 326800 }, { "epoch": 1.2633560637689227, "grad_norm": 0.10885465145111084, "learning_rate": 0.002, "loss": 2.3254, "step": 326810 }, { "epoch": 1.263394720972306, "grad_norm": 0.09080061316490173, "learning_rate": 0.002, "loss": 2.3336, "step": 326820 }, { "epoch": 1.2634333781756892, "grad_norm": 0.1021483764052391, "learning_rate": 0.002, "loss": 2.3353, "step": 326830 }, { "epoch": 1.2634720353790725, "grad_norm": 0.12462744116783142, "learning_rate": 0.002, "loss": 2.3286, "step": 326840 }, { "epoch": 1.2635106925824557, "grad_norm": 0.11733567714691162, "learning_rate": 0.002, "loss": 2.3251, "step": 326850 }, { "epoch": 1.2635493497858392, "grad_norm": 0.10920538753271103, "learning_rate": 0.002, "loss": 2.3284, "step": 326860 }, { "epoch": 1.2635880069892225, "grad_norm": 0.10772059857845306, "learning_rate": 0.002, "loss": 2.3429, "step": 326870 }, { "epoch": 1.2636266641926057, "grad_norm": 0.09897726029157639, "learning_rate": 0.002, "loss": 2.3448, "step": 326880 }, { "epoch": 1.263665321395989, "grad_norm": 0.09287161380052567, "learning_rate": 0.002, "loss": 2.3253, "step": 326890 }, { "epoch": 1.2637039785993722, "grad_norm": 0.10096503049135208, "learning_rate": 0.002, "loss": 2.3214, "step": 326900 }, { "epoch": 1.2637426358027555, "grad_norm": 0.10740195959806442, "learning_rate": 0.002, "loss": 2.3326, "step": 326910 }, { "epoch": 1.2637812930061387, "grad_norm": 0.12665532529354095, "learning_rate": 0.002, "loss": 2.3344, "step": 326920 }, { "epoch": 1.263819950209522, "grad_norm": 0.09911788254976273, "learning_rate": 0.002, "loss": 2.3299, "step": 326930 }, { "epoch": 1.2638586074129052, "grad_norm": 0.11081048846244812, "learning_rate": 0.002, "loss": 2.3277, "step": 326940 }, { "epoch": 1.2638972646162885, "grad_norm": 0.10057511925697327, "learning_rate": 0.002, "loss": 2.3308, "step": 326950 }, { "epoch": 1.263935921819672, "grad_norm": 0.09975273907184601, "learning_rate": 0.002, "loss": 2.3399, "step": 326960 }, { "epoch": 1.2639745790230552, "grad_norm": 0.09870759397745132, "learning_rate": 0.002, "loss": 2.33, "step": 326970 }, { "epoch": 1.2640132362264385, "grad_norm": 0.1000353991985321, "learning_rate": 0.002, "loss": 2.3307, "step": 326980 }, { "epoch": 1.2640518934298217, "grad_norm": 0.08643193542957306, "learning_rate": 0.002, "loss": 2.3341, "step": 326990 }, { "epoch": 1.264090550633205, "grad_norm": 0.1016695648431778, "learning_rate": 0.002, "loss": 2.3288, "step": 327000 }, { "epoch": 1.2641292078365882, "grad_norm": 0.11448567360639572, "learning_rate": 0.002, "loss": 2.3509, "step": 327010 }, { "epoch": 1.2641678650399715, "grad_norm": 0.1103028953075409, "learning_rate": 0.002, "loss": 2.3386, "step": 327020 }, { "epoch": 1.264206522243355, "grad_norm": 0.10044880211353302, "learning_rate": 0.002, "loss": 2.3361, "step": 327030 }, { "epoch": 1.2642451794467382, "grad_norm": 0.10634750872850418, "learning_rate": 0.002, "loss": 2.3405, "step": 327040 }, { "epoch": 1.2642838366501215, "grad_norm": 0.10897121578454971, "learning_rate": 0.002, "loss": 2.3345, "step": 327050 }, { "epoch": 1.2643224938535047, "grad_norm": 0.10211598128080368, "learning_rate": 0.002, "loss": 2.3263, "step": 327060 }, { "epoch": 1.264361151056888, "grad_norm": 0.13065731525421143, "learning_rate": 0.002, "loss": 2.3403, "step": 327070 }, { "epoch": 1.2643998082602712, "grad_norm": 0.08493944257497787, "learning_rate": 0.002, "loss": 2.3219, "step": 327080 }, { "epoch": 1.2644384654636545, "grad_norm": 0.09587639570236206, "learning_rate": 0.002, "loss": 2.3449, "step": 327090 }, { "epoch": 1.2644771226670377, "grad_norm": 0.10556354373693466, "learning_rate": 0.002, "loss": 2.3279, "step": 327100 }, { "epoch": 1.264515779870421, "grad_norm": 0.10257713496685028, "learning_rate": 0.002, "loss": 2.3316, "step": 327110 }, { "epoch": 1.2645544370738042, "grad_norm": 0.09609122574329376, "learning_rate": 0.002, "loss": 2.3268, "step": 327120 }, { "epoch": 1.2645930942771877, "grad_norm": 0.12300848960876465, "learning_rate": 0.002, "loss": 2.33, "step": 327130 }, { "epoch": 1.264631751480571, "grad_norm": 0.0994371771812439, "learning_rate": 0.002, "loss": 2.3377, "step": 327140 }, { "epoch": 1.2646704086839542, "grad_norm": 0.08351749926805496, "learning_rate": 0.002, "loss": 2.3337, "step": 327150 }, { "epoch": 1.2647090658873374, "grad_norm": 0.11216479539871216, "learning_rate": 0.002, "loss": 2.3377, "step": 327160 }, { "epoch": 1.2647477230907207, "grad_norm": 0.10732587426900864, "learning_rate": 0.002, "loss": 2.3393, "step": 327170 }, { "epoch": 1.264786380294104, "grad_norm": 0.12903602421283722, "learning_rate": 0.002, "loss": 2.3406, "step": 327180 }, { "epoch": 1.2648250374974872, "grad_norm": 0.10329445451498032, "learning_rate": 0.002, "loss": 2.3204, "step": 327190 }, { "epoch": 1.2648636947008707, "grad_norm": 0.10292506217956543, "learning_rate": 0.002, "loss": 2.3335, "step": 327200 }, { "epoch": 1.264902351904254, "grad_norm": 0.25355979800224304, "learning_rate": 0.002, "loss": 2.3328, "step": 327210 }, { "epoch": 1.2649410091076372, "grad_norm": 0.10248008370399475, "learning_rate": 0.002, "loss": 2.3264, "step": 327220 }, { "epoch": 1.2649796663110204, "grad_norm": 0.10794755816459656, "learning_rate": 0.002, "loss": 2.3351, "step": 327230 }, { "epoch": 1.2650183235144037, "grad_norm": 0.2157856971025467, "learning_rate": 0.002, "loss": 2.3396, "step": 327240 }, { "epoch": 1.265056980717787, "grad_norm": 0.12279105186462402, "learning_rate": 0.002, "loss": 2.3467, "step": 327250 }, { "epoch": 1.2650956379211702, "grad_norm": 0.254301518201828, "learning_rate": 0.002, "loss": 2.3302, "step": 327260 }, { "epoch": 1.2651342951245534, "grad_norm": 0.12824511528015137, "learning_rate": 0.002, "loss": 2.3561, "step": 327270 }, { "epoch": 1.2651729523279367, "grad_norm": 0.09473264962434769, "learning_rate": 0.002, "loss": 2.334, "step": 327280 }, { "epoch": 1.26521160953132, "grad_norm": 0.09445297718048096, "learning_rate": 0.002, "loss": 2.3331, "step": 327290 }, { "epoch": 1.2652502667347034, "grad_norm": 0.12316767871379852, "learning_rate": 0.002, "loss": 2.3279, "step": 327300 }, { "epoch": 1.2652889239380867, "grad_norm": 0.09455137699842453, "learning_rate": 0.002, "loss": 2.3221, "step": 327310 }, { "epoch": 1.26532758114147, "grad_norm": 0.10438383370637894, "learning_rate": 0.002, "loss": 2.3337, "step": 327320 }, { "epoch": 1.2653662383448532, "grad_norm": 0.10898282378911972, "learning_rate": 0.002, "loss": 2.3162, "step": 327330 }, { "epoch": 1.2654048955482364, "grad_norm": 0.11414649337530136, "learning_rate": 0.002, "loss": 2.3338, "step": 327340 }, { "epoch": 1.2654435527516197, "grad_norm": 0.1032983660697937, "learning_rate": 0.002, "loss": 2.3271, "step": 327350 }, { "epoch": 1.265482209955003, "grad_norm": 0.12052953243255615, "learning_rate": 0.002, "loss": 2.3263, "step": 327360 }, { "epoch": 1.2655208671583864, "grad_norm": 0.12255626171827316, "learning_rate": 0.002, "loss": 2.3441, "step": 327370 }, { "epoch": 1.2655595243617697, "grad_norm": 0.09772976487874985, "learning_rate": 0.002, "loss": 2.3403, "step": 327380 }, { "epoch": 1.265598181565153, "grad_norm": 0.09752926230430603, "learning_rate": 0.002, "loss": 2.35, "step": 327390 }, { "epoch": 1.2656368387685362, "grad_norm": 0.11025545001029968, "learning_rate": 0.002, "loss": 2.3318, "step": 327400 }, { "epoch": 1.2656754959719194, "grad_norm": 0.11592836678028107, "learning_rate": 0.002, "loss": 2.3125, "step": 327410 }, { "epoch": 1.2657141531753027, "grad_norm": 0.09760356694459915, "learning_rate": 0.002, "loss": 2.3353, "step": 327420 }, { "epoch": 1.265752810378686, "grad_norm": 0.17295199632644653, "learning_rate": 0.002, "loss": 2.3418, "step": 327430 }, { "epoch": 1.2657914675820692, "grad_norm": 0.11333099007606506, "learning_rate": 0.002, "loss": 2.3415, "step": 327440 }, { "epoch": 1.2658301247854524, "grad_norm": 0.10388067364692688, "learning_rate": 0.002, "loss": 2.3288, "step": 327450 }, { "epoch": 1.2658687819888357, "grad_norm": 0.1109703928232193, "learning_rate": 0.002, "loss": 2.317, "step": 327460 }, { "epoch": 1.2659074391922192, "grad_norm": 0.11445900797843933, "learning_rate": 0.002, "loss": 2.3235, "step": 327470 }, { "epoch": 1.2659460963956024, "grad_norm": 0.11122239381074905, "learning_rate": 0.002, "loss": 2.349, "step": 327480 }, { "epoch": 1.2659847535989857, "grad_norm": 0.10093493014574051, "learning_rate": 0.002, "loss": 2.3403, "step": 327490 }, { "epoch": 1.266023410802369, "grad_norm": 0.09518415480852127, "learning_rate": 0.002, "loss": 2.3324, "step": 327500 }, { "epoch": 1.2660620680057522, "grad_norm": 0.12398479133844376, "learning_rate": 0.002, "loss": 2.3253, "step": 327510 }, { "epoch": 1.2661007252091354, "grad_norm": 0.10292278230190277, "learning_rate": 0.002, "loss": 2.3466, "step": 327520 }, { "epoch": 1.2661393824125187, "grad_norm": 0.11218395084142685, "learning_rate": 0.002, "loss": 2.3337, "step": 327530 }, { "epoch": 1.2661780396159021, "grad_norm": 0.11104859411716461, "learning_rate": 0.002, "loss": 2.329, "step": 327540 }, { "epoch": 1.2662166968192854, "grad_norm": 0.12359805405139923, "learning_rate": 0.002, "loss": 2.3352, "step": 327550 }, { "epoch": 1.2662553540226686, "grad_norm": 0.09395068138837814, "learning_rate": 0.002, "loss": 2.3369, "step": 327560 }, { "epoch": 1.266294011226052, "grad_norm": 0.10843432694673538, "learning_rate": 0.002, "loss": 2.3418, "step": 327570 }, { "epoch": 1.2663326684294351, "grad_norm": 0.10082303732633591, "learning_rate": 0.002, "loss": 2.344, "step": 327580 }, { "epoch": 1.2663713256328184, "grad_norm": 0.09647303074598312, "learning_rate": 0.002, "loss": 2.3177, "step": 327590 }, { "epoch": 1.2664099828362017, "grad_norm": 0.11412365734577179, "learning_rate": 0.002, "loss": 2.3337, "step": 327600 }, { "epoch": 1.266448640039585, "grad_norm": 0.11117341369390488, "learning_rate": 0.002, "loss": 2.3319, "step": 327610 }, { "epoch": 1.2664872972429682, "grad_norm": 0.0973285362124443, "learning_rate": 0.002, "loss": 2.344, "step": 327620 }, { "epoch": 1.2665259544463514, "grad_norm": 0.1106308177113533, "learning_rate": 0.002, "loss": 2.3272, "step": 327630 }, { "epoch": 1.2665646116497349, "grad_norm": 0.11163157969713211, "learning_rate": 0.002, "loss": 2.342, "step": 327640 }, { "epoch": 1.2666032688531181, "grad_norm": 0.09332996606826782, "learning_rate": 0.002, "loss": 2.3395, "step": 327650 }, { "epoch": 1.2666419260565014, "grad_norm": 0.10091850161552429, "learning_rate": 0.002, "loss": 2.3267, "step": 327660 }, { "epoch": 1.2666805832598846, "grad_norm": 0.11225081235170364, "learning_rate": 0.002, "loss": 2.3315, "step": 327670 }, { "epoch": 1.266719240463268, "grad_norm": 0.10611289739608765, "learning_rate": 0.002, "loss": 2.3439, "step": 327680 }, { "epoch": 1.2667578976666511, "grad_norm": 0.13344308733940125, "learning_rate": 0.002, "loss": 2.3333, "step": 327690 }, { "epoch": 1.2667965548700346, "grad_norm": 0.11110425740480423, "learning_rate": 0.002, "loss": 2.3399, "step": 327700 }, { "epoch": 1.2668352120734179, "grad_norm": 0.11922081559896469, "learning_rate": 0.002, "loss": 2.3383, "step": 327710 }, { "epoch": 1.2668738692768011, "grad_norm": 0.0929272323846817, "learning_rate": 0.002, "loss": 2.3125, "step": 327720 }, { "epoch": 1.2669125264801844, "grad_norm": 0.11714441329240799, "learning_rate": 0.002, "loss": 2.3434, "step": 327730 }, { "epoch": 1.2669511836835676, "grad_norm": 0.12794846296310425, "learning_rate": 0.002, "loss": 2.3315, "step": 327740 }, { "epoch": 1.2669898408869509, "grad_norm": 0.10424962639808655, "learning_rate": 0.002, "loss": 2.3392, "step": 327750 }, { "epoch": 1.2670284980903341, "grad_norm": 0.09436719119548798, "learning_rate": 0.002, "loss": 2.338, "step": 327760 }, { "epoch": 1.2670671552937174, "grad_norm": 0.10538437962532043, "learning_rate": 0.002, "loss": 2.3421, "step": 327770 }, { "epoch": 1.2671058124971006, "grad_norm": 0.10082082450389862, "learning_rate": 0.002, "loss": 2.3356, "step": 327780 }, { "epoch": 1.2671444697004839, "grad_norm": 0.09229423850774765, "learning_rate": 0.002, "loss": 2.3419, "step": 327790 }, { "epoch": 1.2671831269038671, "grad_norm": 0.10747473686933517, "learning_rate": 0.002, "loss": 2.3395, "step": 327800 }, { "epoch": 1.2672217841072506, "grad_norm": 0.10845305770635605, "learning_rate": 0.002, "loss": 2.3243, "step": 327810 }, { "epoch": 1.2672604413106339, "grad_norm": 0.12466636300086975, "learning_rate": 0.002, "loss": 2.3497, "step": 327820 }, { "epoch": 1.2672990985140171, "grad_norm": 0.10115847736597061, "learning_rate": 0.002, "loss": 2.3461, "step": 327830 }, { "epoch": 1.2673377557174004, "grad_norm": 0.10708760470151901, "learning_rate": 0.002, "loss": 2.3483, "step": 327840 }, { "epoch": 1.2673764129207836, "grad_norm": 0.1159091666340828, "learning_rate": 0.002, "loss": 2.341, "step": 327850 }, { "epoch": 1.2674150701241669, "grad_norm": 0.10875213146209717, "learning_rate": 0.002, "loss": 2.3249, "step": 327860 }, { "epoch": 1.2674537273275504, "grad_norm": 0.11530718207359314, "learning_rate": 0.002, "loss": 2.3224, "step": 327870 }, { "epoch": 1.2674923845309336, "grad_norm": 0.09820769727230072, "learning_rate": 0.002, "loss": 2.3239, "step": 327880 }, { "epoch": 1.2675310417343169, "grad_norm": 0.10630863159894943, "learning_rate": 0.002, "loss": 2.3344, "step": 327890 }, { "epoch": 1.2675696989377, "grad_norm": 0.11648089438676834, "learning_rate": 0.002, "loss": 2.3317, "step": 327900 }, { "epoch": 1.2676083561410834, "grad_norm": 0.10292232036590576, "learning_rate": 0.002, "loss": 2.318, "step": 327910 }, { "epoch": 1.2676470133444666, "grad_norm": 0.11076568812131882, "learning_rate": 0.002, "loss": 2.3332, "step": 327920 }, { "epoch": 1.2676856705478499, "grad_norm": 0.13279521465301514, "learning_rate": 0.002, "loss": 2.3371, "step": 327930 }, { "epoch": 1.2677243277512331, "grad_norm": 0.10814601182937622, "learning_rate": 0.002, "loss": 2.3371, "step": 327940 }, { "epoch": 1.2677629849546164, "grad_norm": 0.09568917751312256, "learning_rate": 0.002, "loss": 2.3534, "step": 327950 }, { "epoch": 1.2678016421579996, "grad_norm": 0.10426489263772964, "learning_rate": 0.002, "loss": 2.3377, "step": 327960 }, { "epoch": 1.2678402993613829, "grad_norm": 0.1028430238366127, "learning_rate": 0.002, "loss": 2.3513, "step": 327970 }, { "epoch": 1.2678789565647663, "grad_norm": 0.11005204170942307, "learning_rate": 0.002, "loss": 2.3456, "step": 327980 }, { "epoch": 1.2679176137681496, "grad_norm": 0.10916189104318619, "learning_rate": 0.002, "loss": 2.3364, "step": 327990 }, { "epoch": 1.2679562709715329, "grad_norm": 0.09372225403785706, "learning_rate": 0.002, "loss": 2.3372, "step": 328000 }, { "epoch": 1.267994928174916, "grad_norm": 0.11431435495615005, "learning_rate": 0.002, "loss": 2.3327, "step": 328010 }, { "epoch": 1.2680335853782994, "grad_norm": 0.13004766404628754, "learning_rate": 0.002, "loss": 2.3363, "step": 328020 }, { "epoch": 1.2680722425816826, "grad_norm": 0.26628178358078003, "learning_rate": 0.002, "loss": 2.3423, "step": 328030 }, { "epoch": 1.268110899785066, "grad_norm": 0.12861287593841553, "learning_rate": 0.002, "loss": 2.3206, "step": 328040 }, { "epoch": 1.2681495569884493, "grad_norm": 0.11879384517669678, "learning_rate": 0.002, "loss": 2.3281, "step": 328050 }, { "epoch": 1.2681882141918326, "grad_norm": 0.3298933506011963, "learning_rate": 0.002, "loss": 2.3245, "step": 328060 }, { "epoch": 1.2682268713952158, "grad_norm": 0.10708534717559814, "learning_rate": 0.002, "loss": 2.3517, "step": 328070 }, { "epoch": 1.268265528598599, "grad_norm": 0.10920543223619461, "learning_rate": 0.002, "loss": 2.3408, "step": 328080 }, { "epoch": 1.2683041858019823, "grad_norm": 0.0907408595085144, "learning_rate": 0.002, "loss": 2.3227, "step": 328090 }, { "epoch": 1.2683428430053656, "grad_norm": 0.1118137463927269, "learning_rate": 0.002, "loss": 2.331, "step": 328100 }, { "epoch": 1.2683815002087488, "grad_norm": 0.10773264616727829, "learning_rate": 0.002, "loss": 2.3372, "step": 328110 }, { "epoch": 1.268420157412132, "grad_norm": 0.10068121552467346, "learning_rate": 0.002, "loss": 2.3322, "step": 328120 }, { "epoch": 1.2684588146155154, "grad_norm": 0.10338988155126572, "learning_rate": 0.002, "loss": 2.3521, "step": 328130 }, { "epoch": 1.2684974718188986, "grad_norm": 0.09399203211069107, "learning_rate": 0.002, "loss": 2.3489, "step": 328140 }, { "epoch": 1.268536129022282, "grad_norm": 0.12288428843021393, "learning_rate": 0.002, "loss": 2.3306, "step": 328150 }, { "epoch": 1.2685747862256653, "grad_norm": 0.09742148965597153, "learning_rate": 0.002, "loss": 2.3323, "step": 328160 }, { "epoch": 1.2686134434290486, "grad_norm": 0.11509417742490768, "learning_rate": 0.002, "loss": 2.3313, "step": 328170 }, { "epoch": 1.2686521006324318, "grad_norm": 0.1266513615846634, "learning_rate": 0.002, "loss": 2.3393, "step": 328180 }, { "epoch": 1.268690757835815, "grad_norm": 0.0991605892777443, "learning_rate": 0.002, "loss": 2.3401, "step": 328190 }, { "epoch": 1.2687294150391983, "grad_norm": 0.1043912023305893, "learning_rate": 0.002, "loss": 2.3479, "step": 328200 }, { "epoch": 1.2687680722425818, "grad_norm": 0.10311584174633026, "learning_rate": 0.002, "loss": 2.3268, "step": 328210 }, { "epoch": 1.268806729445965, "grad_norm": 0.09867702424526215, "learning_rate": 0.002, "loss": 2.309, "step": 328220 }, { "epoch": 1.2688453866493483, "grad_norm": 0.10184060037136078, "learning_rate": 0.002, "loss": 2.3269, "step": 328230 }, { "epoch": 1.2688840438527316, "grad_norm": 0.10069439560174942, "learning_rate": 0.002, "loss": 2.3323, "step": 328240 }, { "epoch": 1.2689227010561148, "grad_norm": 0.10947359353303909, "learning_rate": 0.002, "loss": 2.3283, "step": 328250 }, { "epoch": 1.268961358259498, "grad_norm": 0.1088777631521225, "learning_rate": 0.002, "loss": 2.3381, "step": 328260 }, { "epoch": 1.2690000154628813, "grad_norm": 0.13187672197818756, "learning_rate": 0.002, "loss": 2.3229, "step": 328270 }, { "epoch": 1.2690386726662646, "grad_norm": 0.1275816261768341, "learning_rate": 0.002, "loss": 2.3421, "step": 328280 }, { "epoch": 1.2690773298696478, "grad_norm": 0.1199779212474823, "learning_rate": 0.002, "loss": 2.3193, "step": 328290 }, { "epoch": 1.269115987073031, "grad_norm": 0.09605924040079117, "learning_rate": 0.002, "loss": 2.3413, "step": 328300 }, { "epoch": 1.2691546442764143, "grad_norm": 0.10590273141860962, "learning_rate": 0.002, "loss": 2.3484, "step": 328310 }, { "epoch": 1.2691933014797978, "grad_norm": 0.10233684629201889, "learning_rate": 0.002, "loss": 2.3439, "step": 328320 }, { "epoch": 1.269231958683181, "grad_norm": 0.1044899970293045, "learning_rate": 0.002, "loss": 2.3262, "step": 328330 }, { "epoch": 1.2692706158865643, "grad_norm": 0.09585341811180115, "learning_rate": 0.002, "loss": 2.3338, "step": 328340 }, { "epoch": 1.2693092730899476, "grad_norm": 0.11074862629175186, "learning_rate": 0.002, "loss": 2.3322, "step": 328350 }, { "epoch": 1.2693479302933308, "grad_norm": 0.11396773159503937, "learning_rate": 0.002, "loss": 2.3328, "step": 328360 }, { "epoch": 1.269386587496714, "grad_norm": 0.09954419732093811, "learning_rate": 0.002, "loss": 2.3108, "step": 328370 }, { "epoch": 1.2694252447000975, "grad_norm": 0.11653383076190948, "learning_rate": 0.002, "loss": 2.3228, "step": 328380 }, { "epoch": 1.2694639019034808, "grad_norm": 0.11051646620035172, "learning_rate": 0.002, "loss": 2.322, "step": 328390 }, { "epoch": 1.269502559106864, "grad_norm": 0.10264965891838074, "learning_rate": 0.002, "loss": 2.339, "step": 328400 }, { "epoch": 1.2695412163102473, "grad_norm": 0.09628605097532272, "learning_rate": 0.002, "loss": 2.3362, "step": 328410 }, { "epoch": 1.2695798735136306, "grad_norm": 0.10045577585697174, "learning_rate": 0.002, "loss": 2.34, "step": 328420 }, { "epoch": 1.2696185307170138, "grad_norm": 0.10564279556274414, "learning_rate": 0.002, "loss": 2.3224, "step": 328430 }, { "epoch": 1.269657187920397, "grad_norm": 0.10866489261388779, "learning_rate": 0.002, "loss": 2.3145, "step": 328440 }, { "epoch": 1.2696958451237803, "grad_norm": 0.11215133965015411, "learning_rate": 0.002, "loss": 2.3195, "step": 328450 }, { "epoch": 1.2697345023271636, "grad_norm": 0.09572611004114151, "learning_rate": 0.002, "loss": 2.3267, "step": 328460 }, { "epoch": 1.2697731595305468, "grad_norm": 0.10626961290836334, "learning_rate": 0.002, "loss": 2.3357, "step": 328470 }, { "epoch": 1.26981181673393, "grad_norm": 0.10905894637107849, "learning_rate": 0.002, "loss": 2.3246, "step": 328480 }, { "epoch": 1.2698504739373135, "grad_norm": 0.10089924186468124, "learning_rate": 0.002, "loss": 2.3284, "step": 328490 }, { "epoch": 1.2698891311406968, "grad_norm": 0.10201458632946014, "learning_rate": 0.002, "loss": 2.3281, "step": 328500 }, { "epoch": 1.26992778834408, "grad_norm": 0.10394861549139023, "learning_rate": 0.002, "loss": 2.3314, "step": 328510 }, { "epoch": 1.2699664455474633, "grad_norm": 0.12203310430049896, "learning_rate": 0.002, "loss": 2.3308, "step": 328520 }, { "epoch": 1.2700051027508465, "grad_norm": 0.11315803974866867, "learning_rate": 0.002, "loss": 2.3346, "step": 328530 }, { "epoch": 1.2700437599542298, "grad_norm": 0.09893783181905746, "learning_rate": 0.002, "loss": 2.3324, "step": 328540 }, { "epoch": 1.2700824171576133, "grad_norm": 0.10396460443735123, "learning_rate": 0.002, "loss": 2.3246, "step": 328550 }, { "epoch": 1.2701210743609965, "grad_norm": 0.10547640174627304, "learning_rate": 0.002, "loss": 2.327, "step": 328560 }, { "epoch": 1.2701597315643798, "grad_norm": 0.0919446349143982, "learning_rate": 0.002, "loss": 2.3292, "step": 328570 }, { "epoch": 1.270198388767763, "grad_norm": 0.10054364055395126, "learning_rate": 0.002, "loss": 2.3359, "step": 328580 }, { "epoch": 1.2702370459711463, "grad_norm": 0.10095813870429993, "learning_rate": 0.002, "loss": 2.3445, "step": 328590 }, { "epoch": 1.2702757031745295, "grad_norm": 0.1102762222290039, "learning_rate": 0.002, "loss": 2.3287, "step": 328600 }, { "epoch": 1.2703143603779128, "grad_norm": 0.10052599012851715, "learning_rate": 0.002, "loss": 2.3333, "step": 328610 }, { "epoch": 1.270353017581296, "grad_norm": 0.1568072885274887, "learning_rate": 0.002, "loss": 2.3418, "step": 328620 }, { "epoch": 1.2703916747846793, "grad_norm": 0.10998766869306564, "learning_rate": 0.002, "loss": 2.3275, "step": 328630 }, { "epoch": 1.2704303319880625, "grad_norm": 0.10029449313879013, "learning_rate": 0.002, "loss": 2.3531, "step": 328640 }, { "epoch": 1.2704689891914458, "grad_norm": 0.10915552824735641, "learning_rate": 0.002, "loss": 2.3257, "step": 328650 }, { "epoch": 1.2705076463948293, "grad_norm": 0.13900934159755707, "learning_rate": 0.002, "loss": 2.3394, "step": 328660 }, { "epoch": 1.2705463035982125, "grad_norm": 0.09437382221221924, "learning_rate": 0.002, "loss": 2.3216, "step": 328670 }, { "epoch": 1.2705849608015958, "grad_norm": 0.0986660048365593, "learning_rate": 0.002, "loss": 2.3195, "step": 328680 }, { "epoch": 1.270623618004979, "grad_norm": 0.09324406832456589, "learning_rate": 0.002, "loss": 2.3341, "step": 328690 }, { "epoch": 1.2706622752083623, "grad_norm": 0.11582262068986893, "learning_rate": 0.002, "loss": 2.3082, "step": 328700 }, { "epoch": 1.2707009324117455, "grad_norm": 0.11707904189825058, "learning_rate": 0.002, "loss": 2.3329, "step": 328710 }, { "epoch": 1.270739589615129, "grad_norm": 0.10172367841005325, "learning_rate": 0.002, "loss": 2.3323, "step": 328720 }, { "epoch": 1.2707782468185123, "grad_norm": 0.10533636063337326, "learning_rate": 0.002, "loss": 2.3366, "step": 328730 }, { "epoch": 1.2708169040218955, "grad_norm": 0.11258100718259811, "learning_rate": 0.002, "loss": 2.3407, "step": 328740 }, { "epoch": 1.2708555612252788, "grad_norm": 0.11175481230020523, "learning_rate": 0.002, "loss": 2.3302, "step": 328750 }, { "epoch": 1.270894218428662, "grad_norm": 0.1085057482123375, "learning_rate": 0.002, "loss": 2.3182, "step": 328760 }, { "epoch": 1.2709328756320453, "grad_norm": 0.10206273198127747, "learning_rate": 0.002, "loss": 2.3134, "step": 328770 }, { "epoch": 1.2709715328354285, "grad_norm": 0.104701928794384, "learning_rate": 0.002, "loss": 2.3261, "step": 328780 }, { "epoch": 1.2710101900388118, "grad_norm": 0.10033353418111801, "learning_rate": 0.002, "loss": 2.3564, "step": 328790 }, { "epoch": 1.271048847242195, "grad_norm": 0.0895090103149414, "learning_rate": 0.002, "loss": 2.3215, "step": 328800 }, { "epoch": 1.2710875044455783, "grad_norm": 0.10525655001401901, "learning_rate": 0.002, "loss": 2.3313, "step": 328810 }, { "epoch": 1.2711261616489617, "grad_norm": 0.10305428504943848, "learning_rate": 0.002, "loss": 2.3247, "step": 328820 }, { "epoch": 1.271164818852345, "grad_norm": 0.10475514084100723, "learning_rate": 0.002, "loss": 2.3293, "step": 328830 }, { "epoch": 1.2712034760557283, "grad_norm": 0.10202089697122574, "learning_rate": 0.002, "loss": 2.344, "step": 328840 }, { "epoch": 1.2712421332591115, "grad_norm": 0.12603822350502014, "learning_rate": 0.002, "loss": 2.3412, "step": 328850 }, { "epoch": 1.2712807904624948, "grad_norm": 0.10456045717000961, "learning_rate": 0.002, "loss": 2.3325, "step": 328860 }, { "epoch": 1.271319447665878, "grad_norm": 0.10017739981412888, "learning_rate": 0.002, "loss": 2.3227, "step": 328870 }, { "epoch": 1.2713581048692613, "grad_norm": 0.0923190712928772, "learning_rate": 0.002, "loss": 2.3394, "step": 328880 }, { "epoch": 1.2713967620726447, "grad_norm": 0.10937119275331497, "learning_rate": 0.002, "loss": 2.3215, "step": 328890 }, { "epoch": 1.271435419276028, "grad_norm": 0.10462003201246262, "learning_rate": 0.002, "loss": 2.3383, "step": 328900 }, { "epoch": 1.2714740764794112, "grad_norm": 0.10549817979335785, "learning_rate": 0.002, "loss": 2.3108, "step": 328910 }, { "epoch": 1.2715127336827945, "grad_norm": 0.11557579040527344, "learning_rate": 0.002, "loss": 2.3388, "step": 328920 }, { "epoch": 1.2715513908861777, "grad_norm": 0.11169763654470444, "learning_rate": 0.002, "loss": 2.3361, "step": 328930 }, { "epoch": 1.271590048089561, "grad_norm": 0.09808123856782913, "learning_rate": 0.002, "loss": 2.3323, "step": 328940 }, { "epoch": 1.2716287052929443, "grad_norm": 0.09811786562204361, "learning_rate": 0.002, "loss": 2.3319, "step": 328950 }, { "epoch": 1.2716673624963275, "grad_norm": 0.11601688712835312, "learning_rate": 0.002, "loss": 2.3227, "step": 328960 }, { "epoch": 1.2717060196997108, "grad_norm": 0.11422158032655716, "learning_rate": 0.002, "loss": 2.3377, "step": 328970 }, { "epoch": 1.271744676903094, "grad_norm": 0.11772765219211578, "learning_rate": 0.002, "loss": 2.3384, "step": 328980 }, { "epoch": 1.2717833341064775, "grad_norm": 0.09244425594806671, "learning_rate": 0.002, "loss": 2.3308, "step": 328990 }, { "epoch": 1.2718219913098607, "grad_norm": 0.10463545471429825, "learning_rate": 0.002, "loss": 2.3365, "step": 329000 }, { "epoch": 1.271860648513244, "grad_norm": 0.1032916009426117, "learning_rate": 0.002, "loss": 2.3375, "step": 329010 }, { "epoch": 1.2718993057166272, "grad_norm": 0.12296883016824722, "learning_rate": 0.002, "loss": 2.3503, "step": 329020 }, { "epoch": 1.2719379629200105, "grad_norm": 0.09704263508319855, "learning_rate": 0.002, "loss": 2.3302, "step": 329030 }, { "epoch": 1.2719766201233937, "grad_norm": 0.11321119964122772, "learning_rate": 0.002, "loss": 2.3467, "step": 329040 }, { "epoch": 1.272015277326777, "grad_norm": 0.11545021831989288, "learning_rate": 0.002, "loss": 2.3277, "step": 329050 }, { "epoch": 1.2720539345301605, "grad_norm": 0.10558608174324036, "learning_rate": 0.002, "loss": 2.3443, "step": 329060 }, { "epoch": 1.2720925917335437, "grad_norm": 0.0944591835141182, "learning_rate": 0.002, "loss": 2.3231, "step": 329070 }, { "epoch": 1.272131248936927, "grad_norm": 0.09486759454011917, "learning_rate": 0.002, "loss": 2.3313, "step": 329080 }, { "epoch": 1.2721699061403102, "grad_norm": 0.10092812031507492, "learning_rate": 0.002, "loss": 2.3221, "step": 329090 }, { "epoch": 1.2722085633436935, "grad_norm": 0.11117656528949738, "learning_rate": 0.002, "loss": 2.3468, "step": 329100 }, { "epoch": 1.2722472205470767, "grad_norm": 0.10919707268476486, "learning_rate": 0.002, "loss": 2.3341, "step": 329110 }, { "epoch": 1.27228587775046, "grad_norm": 0.13514567911624908, "learning_rate": 0.002, "loss": 2.3407, "step": 329120 }, { "epoch": 1.2723245349538432, "grad_norm": 0.3119554817676544, "learning_rate": 0.002, "loss": 2.3365, "step": 329130 }, { "epoch": 1.2723631921572265, "grad_norm": 0.11358354985713959, "learning_rate": 0.002, "loss": 2.3424, "step": 329140 }, { "epoch": 1.2724018493606097, "grad_norm": 0.09593138098716736, "learning_rate": 0.002, "loss": 2.342, "step": 329150 }, { "epoch": 1.2724405065639932, "grad_norm": 0.13451911509037018, "learning_rate": 0.002, "loss": 2.3395, "step": 329160 }, { "epoch": 1.2724791637673765, "grad_norm": 0.12064133584499359, "learning_rate": 0.002, "loss": 2.3446, "step": 329170 }, { "epoch": 1.2725178209707597, "grad_norm": 0.09411446750164032, "learning_rate": 0.002, "loss": 2.3424, "step": 329180 }, { "epoch": 1.272556478174143, "grad_norm": 0.10068316012620926, "learning_rate": 0.002, "loss": 2.3372, "step": 329190 }, { "epoch": 1.2725951353775262, "grad_norm": 0.10359273850917816, "learning_rate": 0.002, "loss": 2.3376, "step": 329200 }, { "epoch": 1.2726337925809095, "grad_norm": 0.09584148228168488, "learning_rate": 0.002, "loss": 2.3365, "step": 329210 }, { "epoch": 1.2726724497842927, "grad_norm": 0.18259383738040924, "learning_rate": 0.002, "loss": 2.3326, "step": 329220 }, { "epoch": 1.2727111069876762, "grad_norm": 0.11002141237258911, "learning_rate": 0.002, "loss": 2.3469, "step": 329230 }, { "epoch": 1.2727497641910595, "grad_norm": 0.09783057123422623, "learning_rate": 0.002, "loss": 2.3238, "step": 329240 }, { "epoch": 1.2727884213944427, "grad_norm": 0.09692111611366272, "learning_rate": 0.002, "loss": 2.3288, "step": 329250 }, { "epoch": 1.272827078597826, "grad_norm": 0.09134890139102936, "learning_rate": 0.002, "loss": 2.3337, "step": 329260 }, { "epoch": 1.2728657358012092, "grad_norm": 0.12369803339242935, "learning_rate": 0.002, "loss": 2.3274, "step": 329270 }, { "epoch": 1.2729043930045925, "grad_norm": 0.10898613184690475, "learning_rate": 0.002, "loss": 2.3265, "step": 329280 }, { "epoch": 1.2729430502079757, "grad_norm": 0.09025852382183075, "learning_rate": 0.002, "loss": 2.3415, "step": 329290 }, { "epoch": 1.272981707411359, "grad_norm": 0.09845314174890518, "learning_rate": 0.002, "loss": 2.3266, "step": 329300 }, { "epoch": 1.2730203646147422, "grad_norm": 0.09647542238235474, "learning_rate": 0.002, "loss": 2.3408, "step": 329310 }, { "epoch": 1.2730590218181255, "grad_norm": 0.09182790666818619, "learning_rate": 0.002, "loss": 2.3463, "step": 329320 }, { "epoch": 1.273097679021509, "grad_norm": 0.11160074174404144, "learning_rate": 0.002, "loss": 2.3458, "step": 329330 }, { "epoch": 1.2731363362248922, "grad_norm": 0.11283206939697266, "learning_rate": 0.002, "loss": 2.3257, "step": 329340 }, { "epoch": 1.2731749934282754, "grad_norm": 0.1080353707075119, "learning_rate": 0.002, "loss": 2.3196, "step": 329350 }, { "epoch": 1.2732136506316587, "grad_norm": 0.10062264651060104, "learning_rate": 0.002, "loss": 2.3291, "step": 329360 }, { "epoch": 1.273252307835042, "grad_norm": 0.09652829170227051, "learning_rate": 0.002, "loss": 2.3256, "step": 329370 }, { "epoch": 1.2732909650384252, "grad_norm": 0.1126071885228157, "learning_rate": 0.002, "loss": 2.3327, "step": 329380 }, { "epoch": 1.2733296222418085, "grad_norm": 0.08950099349021912, "learning_rate": 0.002, "loss": 2.336, "step": 329390 }, { "epoch": 1.273368279445192, "grad_norm": 0.09760060161352158, "learning_rate": 0.002, "loss": 2.33, "step": 329400 }, { "epoch": 1.2734069366485752, "grad_norm": 0.10084472596645355, "learning_rate": 0.002, "loss": 2.3363, "step": 329410 }, { "epoch": 1.2734455938519584, "grad_norm": 0.10872059315443039, "learning_rate": 0.002, "loss": 2.3366, "step": 329420 }, { "epoch": 1.2734842510553417, "grad_norm": 0.09868069738149643, "learning_rate": 0.002, "loss": 2.3184, "step": 329430 }, { "epoch": 1.273522908258725, "grad_norm": 0.11543266475200653, "learning_rate": 0.002, "loss": 2.3405, "step": 329440 }, { "epoch": 1.2735615654621082, "grad_norm": 0.22089999914169312, "learning_rate": 0.002, "loss": 2.3478, "step": 329450 }, { "epoch": 1.2736002226654914, "grad_norm": 0.0972055122256279, "learning_rate": 0.002, "loss": 2.3511, "step": 329460 }, { "epoch": 1.2736388798688747, "grad_norm": 0.0951479971408844, "learning_rate": 0.002, "loss": 2.3439, "step": 329470 }, { "epoch": 1.273677537072258, "grad_norm": 0.113665871322155, "learning_rate": 0.002, "loss": 2.3478, "step": 329480 }, { "epoch": 1.2737161942756412, "grad_norm": 0.10153911262750626, "learning_rate": 0.002, "loss": 2.3456, "step": 329490 }, { "epoch": 1.2737548514790247, "grad_norm": 0.10728565603494644, "learning_rate": 0.002, "loss": 2.3393, "step": 329500 }, { "epoch": 1.273793508682408, "grad_norm": 0.11218227446079254, "learning_rate": 0.002, "loss": 2.3339, "step": 329510 }, { "epoch": 1.2738321658857912, "grad_norm": 0.09646441042423248, "learning_rate": 0.002, "loss": 2.3206, "step": 329520 }, { "epoch": 1.2738708230891744, "grad_norm": 0.08836732059717178, "learning_rate": 0.002, "loss": 2.3323, "step": 329530 }, { "epoch": 1.2739094802925577, "grad_norm": 0.09999502450227737, "learning_rate": 0.002, "loss": 2.335, "step": 329540 }, { "epoch": 1.273948137495941, "grad_norm": 0.11865074932575226, "learning_rate": 0.002, "loss": 2.3373, "step": 329550 }, { "epoch": 1.2739867946993244, "grad_norm": 0.10607121139764786, "learning_rate": 0.002, "loss": 2.3231, "step": 329560 }, { "epoch": 1.2740254519027077, "grad_norm": 0.09681159257888794, "learning_rate": 0.002, "loss": 2.3391, "step": 329570 }, { "epoch": 1.274064109106091, "grad_norm": 0.10897815227508545, "learning_rate": 0.002, "loss": 2.3372, "step": 329580 }, { "epoch": 1.2741027663094742, "grad_norm": 0.11636954545974731, "learning_rate": 0.002, "loss": 2.3241, "step": 329590 }, { "epoch": 1.2741414235128574, "grad_norm": 0.08978872001171112, "learning_rate": 0.002, "loss": 2.3325, "step": 329600 }, { "epoch": 1.2741800807162407, "grad_norm": 0.1251995861530304, "learning_rate": 0.002, "loss": 2.353, "step": 329610 }, { "epoch": 1.274218737919624, "grad_norm": 0.10945021361112595, "learning_rate": 0.002, "loss": 2.3338, "step": 329620 }, { "epoch": 1.2742573951230072, "grad_norm": 0.09309005737304688, "learning_rate": 0.002, "loss": 2.3385, "step": 329630 }, { "epoch": 1.2742960523263904, "grad_norm": 0.10380406677722931, "learning_rate": 0.002, "loss": 2.3491, "step": 329640 }, { "epoch": 1.2743347095297737, "grad_norm": 0.09443452209234238, "learning_rate": 0.002, "loss": 2.3265, "step": 329650 }, { "epoch": 1.274373366733157, "grad_norm": 0.10749629884958267, "learning_rate": 0.002, "loss": 2.3205, "step": 329660 }, { "epoch": 1.2744120239365404, "grad_norm": 0.11914601922035217, "learning_rate": 0.002, "loss": 2.3453, "step": 329670 }, { "epoch": 1.2744506811399237, "grad_norm": 0.10565859824419022, "learning_rate": 0.002, "loss": 2.3319, "step": 329680 }, { "epoch": 1.274489338343307, "grad_norm": 0.10837848484516144, "learning_rate": 0.002, "loss": 2.3408, "step": 329690 }, { "epoch": 1.2745279955466902, "grad_norm": 0.11632353067398071, "learning_rate": 0.002, "loss": 2.3315, "step": 329700 }, { "epoch": 1.2745666527500734, "grad_norm": 0.08982210606336594, "learning_rate": 0.002, "loss": 2.3437, "step": 329710 }, { "epoch": 1.2746053099534567, "grad_norm": 0.11114289611577988, "learning_rate": 0.002, "loss": 2.3203, "step": 329720 }, { "epoch": 1.2746439671568401, "grad_norm": 0.09798840433359146, "learning_rate": 0.002, "loss": 2.3399, "step": 329730 }, { "epoch": 1.2746826243602234, "grad_norm": 0.1059572622179985, "learning_rate": 0.002, "loss": 2.3296, "step": 329740 }, { "epoch": 1.2747212815636066, "grad_norm": 0.09854008257389069, "learning_rate": 0.002, "loss": 2.3364, "step": 329750 }, { "epoch": 1.27475993876699, "grad_norm": 0.106520876288414, "learning_rate": 0.002, "loss": 2.3352, "step": 329760 }, { "epoch": 1.2747985959703731, "grad_norm": 0.11019045114517212, "learning_rate": 0.002, "loss": 2.331, "step": 329770 }, { "epoch": 1.2748372531737564, "grad_norm": 0.11295673251152039, "learning_rate": 0.002, "loss": 2.3447, "step": 329780 }, { "epoch": 1.2748759103771397, "grad_norm": 0.10148819535970688, "learning_rate": 0.002, "loss": 2.3255, "step": 329790 }, { "epoch": 1.274914567580523, "grad_norm": 0.10899393260478973, "learning_rate": 0.002, "loss": 2.3354, "step": 329800 }, { "epoch": 1.2749532247839062, "grad_norm": 0.09473425149917603, "learning_rate": 0.002, "loss": 2.323, "step": 329810 }, { "epoch": 1.2749918819872894, "grad_norm": 0.09841945767402649, "learning_rate": 0.002, "loss": 2.3307, "step": 329820 }, { "epoch": 1.2750305391906727, "grad_norm": 0.10254204273223877, "learning_rate": 0.002, "loss": 2.319, "step": 329830 }, { "epoch": 1.2750691963940561, "grad_norm": 0.12272008508443832, "learning_rate": 0.002, "loss": 2.3485, "step": 329840 }, { "epoch": 1.2751078535974394, "grad_norm": 0.09451805055141449, "learning_rate": 0.002, "loss": 2.3444, "step": 329850 }, { "epoch": 1.2751465108008226, "grad_norm": 0.10965383797883987, "learning_rate": 0.002, "loss": 2.3449, "step": 329860 }, { "epoch": 1.275185168004206, "grad_norm": 0.09316661953926086, "learning_rate": 0.002, "loss": 2.334, "step": 329870 }, { "epoch": 1.2752238252075891, "grad_norm": 0.10202120244503021, "learning_rate": 0.002, "loss": 2.3385, "step": 329880 }, { "epoch": 1.2752624824109724, "grad_norm": 0.11021611839532852, "learning_rate": 0.002, "loss": 2.3135, "step": 329890 }, { "epoch": 1.2753011396143559, "grad_norm": 0.13178619742393494, "learning_rate": 0.002, "loss": 2.3238, "step": 329900 }, { "epoch": 1.2753397968177391, "grad_norm": 0.10314160585403442, "learning_rate": 0.002, "loss": 2.3213, "step": 329910 }, { "epoch": 1.2753784540211224, "grad_norm": 0.09276103973388672, "learning_rate": 0.002, "loss": 2.3269, "step": 329920 }, { "epoch": 1.2754171112245056, "grad_norm": 0.10184242576360703, "learning_rate": 0.002, "loss": 2.3049, "step": 329930 }, { "epoch": 1.2754557684278889, "grad_norm": 0.13053973019123077, "learning_rate": 0.002, "loss": 2.337, "step": 329940 }, { "epoch": 1.2754944256312721, "grad_norm": 0.09545928239822388, "learning_rate": 0.002, "loss": 2.3365, "step": 329950 }, { "epoch": 1.2755330828346554, "grad_norm": 0.14243371784687042, "learning_rate": 0.002, "loss": 2.3196, "step": 329960 }, { "epoch": 1.2755717400380386, "grad_norm": 0.09996338933706284, "learning_rate": 0.002, "loss": 2.3303, "step": 329970 }, { "epoch": 1.275610397241422, "grad_norm": 0.11278600245714188, "learning_rate": 0.002, "loss": 2.3304, "step": 329980 }, { "epoch": 1.2756490544448051, "grad_norm": 0.1257156878709793, "learning_rate": 0.002, "loss": 2.3404, "step": 329990 }, { "epoch": 1.2756877116481884, "grad_norm": 0.09659942984580994, "learning_rate": 0.002, "loss": 2.339, "step": 330000 }, { "epoch": 1.2757263688515719, "grad_norm": 0.10965616255998611, "learning_rate": 0.002, "loss": 2.3353, "step": 330010 }, { "epoch": 1.2757650260549551, "grad_norm": 0.1346312016248703, "learning_rate": 0.002, "loss": 2.3516, "step": 330020 }, { "epoch": 1.2758036832583384, "grad_norm": 0.08886497467756271, "learning_rate": 0.002, "loss": 2.323, "step": 330030 }, { "epoch": 1.2758423404617216, "grad_norm": 0.09994087368249893, "learning_rate": 0.002, "loss": 2.338, "step": 330040 }, { "epoch": 1.2758809976651049, "grad_norm": 0.10083264112472534, "learning_rate": 0.002, "loss": 2.3358, "step": 330050 }, { "epoch": 1.2759196548684881, "grad_norm": 0.5164046883583069, "learning_rate": 0.002, "loss": 2.3381, "step": 330060 }, { "epoch": 1.2759583120718716, "grad_norm": 0.10039766877889633, "learning_rate": 0.002, "loss": 2.3377, "step": 330070 }, { "epoch": 1.2759969692752549, "grad_norm": 0.10687883198261261, "learning_rate": 0.002, "loss": 2.3466, "step": 330080 }, { "epoch": 1.276035626478638, "grad_norm": 0.09684999287128448, "learning_rate": 0.002, "loss": 2.3283, "step": 330090 }, { "epoch": 1.2760742836820214, "grad_norm": 0.09277097135782242, "learning_rate": 0.002, "loss": 2.3406, "step": 330100 }, { "epoch": 1.2761129408854046, "grad_norm": 0.10449618101119995, "learning_rate": 0.002, "loss": 2.3498, "step": 330110 }, { "epoch": 1.2761515980887879, "grad_norm": 0.10649379342794418, "learning_rate": 0.002, "loss": 2.3412, "step": 330120 }, { "epoch": 1.2761902552921711, "grad_norm": 0.10755191743373871, "learning_rate": 0.002, "loss": 2.3518, "step": 330130 }, { "epoch": 1.2762289124955544, "grad_norm": 0.10106264799833298, "learning_rate": 0.002, "loss": 2.3332, "step": 330140 }, { "epoch": 1.2762675696989376, "grad_norm": 0.10244058817625046, "learning_rate": 0.002, "loss": 2.3476, "step": 330150 }, { "epoch": 1.2763062269023209, "grad_norm": 0.10412459075450897, "learning_rate": 0.002, "loss": 2.333, "step": 330160 }, { "epoch": 1.2763448841057041, "grad_norm": 0.10179536044597626, "learning_rate": 0.002, "loss": 2.3461, "step": 330170 }, { "epoch": 1.2763835413090876, "grad_norm": 0.09596909582614899, "learning_rate": 0.002, "loss": 2.33, "step": 330180 }, { "epoch": 1.2764221985124709, "grad_norm": 0.11567061394453049, "learning_rate": 0.002, "loss": 2.327, "step": 330190 }, { "epoch": 1.276460855715854, "grad_norm": 0.11815230548381805, "learning_rate": 0.002, "loss": 2.3211, "step": 330200 }, { "epoch": 1.2764995129192374, "grad_norm": 0.11768794804811478, "learning_rate": 0.002, "loss": 2.3392, "step": 330210 }, { "epoch": 1.2765381701226206, "grad_norm": 0.1113450825214386, "learning_rate": 0.002, "loss": 2.3277, "step": 330220 }, { "epoch": 1.2765768273260039, "grad_norm": 0.12138304114341736, "learning_rate": 0.002, "loss": 2.329, "step": 330230 }, { "epoch": 1.2766154845293873, "grad_norm": 0.11253490298986435, "learning_rate": 0.002, "loss": 2.3413, "step": 330240 }, { "epoch": 1.2766541417327706, "grad_norm": 0.0988660380244255, "learning_rate": 0.002, "loss": 2.3331, "step": 330250 }, { "epoch": 1.2766927989361538, "grad_norm": 0.12407879531383514, "learning_rate": 0.002, "loss": 2.3417, "step": 330260 }, { "epoch": 1.276731456139537, "grad_norm": 0.10875300318002701, "learning_rate": 0.002, "loss": 2.3368, "step": 330270 }, { "epoch": 1.2767701133429203, "grad_norm": 0.15900826454162598, "learning_rate": 0.002, "loss": 2.3391, "step": 330280 }, { "epoch": 1.2768087705463036, "grad_norm": 0.12358444929122925, "learning_rate": 0.002, "loss": 2.3402, "step": 330290 }, { "epoch": 1.2768474277496868, "grad_norm": 0.10288117080926895, "learning_rate": 0.002, "loss": 2.3142, "step": 330300 }, { "epoch": 1.27688608495307, "grad_norm": 0.12121498584747314, "learning_rate": 0.002, "loss": 2.3328, "step": 330310 }, { "epoch": 1.2769247421564534, "grad_norm": 0.10315801203250885, "learning_rate": 0.002, "loss": 2.3422, "step": 330320 }, { "epoch": 1.2769633993598366, "grad_norm": 0.10812592506408691, "learning_rate": 0.002, "loss": 2.3225, "step": 330330 }, { "epoch": 1.2770020565632199, "grad_norm": 0.09591901302337646, "learning_rate": 0.002, "loss": 2.3322, "step": 330340 }, { "epoch": 1.2770407137666033, "grad_norm": 0.11056618392467499, "learning_rate": 0.002, "loss": 2.3474, "step": 330350 }, { "epoch": 1.2770793709699866, "grad_norm": 0.10496780276298523, "learning_rate": 0.002, "loss": 2.3332, "step": 330360 }, { "epoch": 1.2771180281733698, "grad_norm": 0.10728298127651215, "learning_rate": 0.002, "loss": 2.3359, "step": 330370 }, { "epoch": 1.277156685376753, "grad_norm": 0.12340054661035538, "learning_rate": 0.002, "loss": 2.3311, "step": 330380 }, { "epoch": 1.2771953425801363, "grad_norm": 0.12097841501235962, "learning_rate": 0.002, "loss": 2.3286, "step": 330390 }, { "epoch": 1.2772339997835196, "grad_norm": 0.10657333582639694, "learning_rate": 0.002, "loss": 2.3192, "step": 330400 }, { "epoch": 1.277272656986903, "grad_norm": 0.10780542343854904, "learning_rate": 0.002, "loss": 2.3415, "step": 330410 }, { "epoch": 1.2773113141902863, "grad_norm": 0.09199956059455872, "learning_rate": 0.002, "loss": 2.3412, "step": 330420 }, { "epoch": 1.2773499713936696, "grad_norm": 0.08981581777334213, "learning_rate": 0.002, "loss": 2.34, "step": 330430 }, { "epoch": 1.2773886285970528, "grad_norm": 0.11883309483528137, "learning_rate": 0.002, "loss": 2.3357, "step": 330440 }, { "epoch": 1.277427285800436, "grad_norm": 0.09634125977754593, "learning_rate": 0.002, "loss": 2.3351, "step": 330450 }, { "epoch": 1.2774659430038193, "grad_norm": 0.10400119423866272, "learning_rate": 0.002, "loss": 2.3347, "step": 330460 }, { "epoch": 1.2775046002072026, "grad_norm": 0.11077505350112915, "learning_rate": 0.002, "loss": 2.3138, "step": 330470 }, { "epoch": 1.2775432574105858, "grad_norm": 0.09788201004266739, "learning_rate": 0.002, "loss": 2.3476, "step": 330480 }, { "epoch": 1.277581914613969, "grad_norm": 0.10747364163398743, "learning_rate": 0.002, "loss": 2.3325, "step": 330490 }, { "epoch": 1.2776205718173523, "grad_norm": 0.11120908707380295, "learning_rate": 0.002, "loss": 2.3313, "step": 330500 }, { "epoch": 1.2776592290207356, "grad_norm": 0.10732141137123108, "learning_rate": 0.002, "loss": 2.3301, "step": 330510 }, { "epoch": 1.277697886224119, "grad_norm": 0.09890435636043549, "learning_rate": 0.002, "loss": 2.3291, "step": 330520 }, { "epoch": 1.2777365434275023, "grad_norm": 0.09526444971561432, "learning_rate": 0.002, "loss": 2.3425, "step": 330530 }, { "epoch": 1.2777752006308856, "grad_norm": 0.13470156490802765, "learning_rate": 0.002, "loss": 2.3284, "step": 330540 }, { "epoch": 1.2778138578342688, "grad_norm": 0.10842323303222656, "learning_rate": 0.002, "loss": 2.3338, "step": 330550 }, { "epoch": 1.277852515037652, "grad_norm": 0.09326523542404175, "learning_rate": 0.002, "loss": 2.3476, "step": 330560 }, { "epoch": 1.2778911722410353, "grad_norm": 0.10812563449144363, "learning_rate": 0.002, "loss": 2.3224, "step": 330570 }, { "epoch": 1.2779298294444188, "grad_norm": 0.09856005012989044, "learning_rate": 0.002, "loss": 2.3371, "step": 330580 }, { "epoch": 1.277968486647802, "grad_norm": 0.11291167885065079, "learning_rate": 0.002, "loss": 2.3534, "step": 330590 }, { "epoch": 1.2780071438511853, "grad_norm": 0.10198131203651428, "learning_rate": 0.002, "loss": 2.3357, "step": 330600 }, { "epoch": 1.2780458010545686, "grad_norm": 0.10310892760753632, "learning_rate": 0.002, "loss": 2.3186, "step": 330610 }, { "epoch": 1.2780844582579518, "grad_norm": 0.10659323632717133, "learning_rate": 0.002, "loss": 2.3262, "step": 330620 }, { "epoch": 1.278123115461335, "grad_norm": 0.09991616010665894, "learning_rate": 0.002, "loss": 2.3348, "step": 330630 }, { "epoch": 1.2781617726647183, "grad_norm": 0.10998846590518951, "learning_rate": 0.002, "loss": 2.3478, "step": 330640 }, { "epoch": 1.2782004298681016, "grad_norm": 0.1059923768043518, "learning_rate": 0.002, "loss": 2.3308, "step": 330650 }, { "epoch": 1.2782390870714848, "grad_norm": 0.08670962601900101, "learning_rate": 0.002, "loss": 2.333, "step": 330660 }, { "epoch": 1.278277744274868, "grad_norm": 0.10149190574884415, "learning_rate": 0.002, "loss": 2.332, "step": 330670 }, { "epoch": 1.2783164014782515, "grad_norm": 0.10550031065940857, "learning_rate": 0.002, "loss": 2.3292, "step": 330680 }, { "epoch": 1.2783550586816348, "grad_norm": 0.100679412484169, "learning_rate": 0.002, "loss": 2.3342, "step": 330690 }, { "epoch": 1.278393715885018, "grad_norm": 0.11549176275730133, "learning_rate": 0.002, "loss": 2.3414, "step": 330700 }, { "epoch": 1.2784323730884013, "grad_norm": 0.09727973490953445, "learning_rate": 0.002, "loss": 2.3262, "step": 330710 }, { "epoch": 1.2784710302917845, "grad_norm": 0.10293383151292801, "learning_rate": 0.002, "loss": 2.3216, "step": 330720 }, { "epoch": 1.2785096874951678, "grad_norm": 0.1048823818564415, "learning_rate": 0.002, "loss": 2.3338, "step": 330730 }, { "epoch": 1.278548344698551, "grad_norm": 0.10713917762041092, "learning_rate": 0.002, "loss": 2.329, "step": 330740 }, { "epoch": 1.2785870019019345, "grad_norm": 0.11875288933515549, "learning_rate": 0.002, "loss": 2.3376, "step": 330750 }, { "epoch": 1.2786256591053178, "grad_norm": 0.09954854100942612, "learning_rate": 0.002, "loss": 2.316, "step": 330760 }, { "epoch": 1.278664316308701, "grad_norm": 0.09504789113998413, "learning_rate": 0.002, "loss": 2.3323, "step": 330770 }, { "epoch": 1.2787029735120843, "grad_norm": 0.1305740773677826, "learning_rate": 0.002, "loss": 2.3329, "step": 330780 }, { "epoch": 1.2787416307154675, "grad_norm": 0.10554517805576324, "learning_rate": 0.002, "loss": 2.3316, "step": 330790 }, { "epoch": 1.2787802879188508, "grad_norm": 0.1055464893579483, "learning_rate": 0.002, "loss": 2.34, "step": 330800 }, { "epoch": 1.278818945122234, "grad_norm": 0.09993565827608109, "learning_rate": 0.002, "loss": 2.3318, "step": 330810 }, { "epoch": 1.2788576023256173, "grad_norm": 0.10794758796691895, "learning_rate": 0.002, "loss": 2.3389, "step": 330820 }, { "epoch": 1.2788962595290005, "grad_norm": 0.09847798198461533, "learning_rate": 0.002, "loss": 2.3403, "step": 330830 }, { "epoch": 1.2789349167323838, "grad_norm": 0.14246560633182526, "learning_rate": 0.002, "loss": 2.3474, "step": 330840 }, { "epoch": 1.2789735739357673, "grad_norm": 0.10085310786962509, "learning_rate": 0.002, "loss": 2.3277, "step": 330850 }, { "epoch": 1.2790122311391505, "grad_norm": 0.09441061317920685, "learning_rate": 0.002, "loss": 2.3158, "step": 330860 }, { "epoch": 1.2790508883425338, "grad_norm": 0.09778943657875061, "learning_rate": 0.002, "loss": 2.3148, "step": 330870 }, { "epoch": 1.279089545545917, "grad_norm": 0.09769915044307709, "learning_rate": 0.002, "loss": 2.3348, "step": 330880 }, { "epoch": 1.2791282027493003, "grad_norm": 0.10353109985589981, "learning_rate": 0.002, "loss": 2.3353, "step": 330890 }, { "epoch": 1.2791668599526835, "grad_norm": 0.11549326032400131, "learning_rate": 0.002, "loss": 2.3314, "step": 330900 }, { "epoch": 1.2792055171560668, "grad_norm": 0.09193005412817001, "learning_rate": 0.002, "loss": 2.3465, "step": 330910 }, { "epoch": 1.2792441743594503, "grad_norm": 0.09352631121873856, "learning_rate": 0.002, "loss": 2.3353, "step": 330920 }, { "epoch": 1.2792828315628335, "grad_norm": 0.10780029743909836, "learning_rate": 0.002, "loss": 2.3391, "step": 330930 }, { "epoch": 1.2793214887662168, "grad_norm": 0.09702304750680923, "learning_rate": 0.002, "loss": 2.3299, "step": 330940 }, { "epoch": 1.2793601459696, "grad_norm": 0.10021896660327911, "learning_rate": 0.002, "loss": 2.3198, "step": 330950 }, { "epoch": 1.2793988031729833, "grad_norm": 0.11453984677791595, "learning_rate": 0.002, "loss": 2.3407, "step": 330960 }, { "epoch": 1.2794374603763665, "grad_norm": 0.13235102593898773, "learning_rate": 0.002, "loss": 2.344, "step": 330970 }, { "epoch": 1.2794761175797498, "grad_norm": 0.0944540798664093, "learning_rate": 0.002, "loss": 2.3308, "step": 330980 }, { "epoch": 1.279514774783133, "grad_norm": 0.12713098526000977, "learning_rate": 0.002, "loss": 2.3406, "step": 330990 }, { "epoch": 1.2795534319865163, "grad_norm": 0.09712237119674683, "learning_rate": 0.002, "loss": 2.3302, "step": 331000 }, { "epoch": 1.2795920891898995, "grad_norm": 0.11072835326194763, "learning_rate": 0.002, "loss": 2.3337, "step": 331010 }, { "epoch": 1.279630746393283, "grad_norm": 0.11048628389835358, "learning_rate": 0.002, "loss": 2.3426, "step": 331020 }, { "epoch": 1.2796694035966663, "grad_norm": 0.10861332714557648, "learning_rate": 0.002, "loss": 2.3283, "step": 331030 }, { "epoch": 1.2797080608000495, "grad_norm": 0.11873438954353333, "learning_rate": 0.002, "loss": 2.3284, "step": 331040 }, { "epoch": 1.2797467180034328, "grad_norm": 0.10168084502220154, "learning_rate": 0.002, "loss": 2.3316, "step": 331050 }, { "epoch": 1.279785375206816, "grad_norm": 0.10715948790311813, "learning_rate": 0.002, "loss": 2.3254, "step": 331060 }, { "epoch": 1.2798240324101993, "grad_norm": 0.1116722822189331, "learning_rate": 0.002, "loss": 2.3379, "step": 331070 }, { "epoch": 1.2798626896135825, "grad_norm": 0.0973399206995964, "learning_rate": 0.002, "loss": 2.3434, "step": 331080 }, { "epoch": 1.279901346816966, "grad_norm": 0.09459315240383148, "learning_rate": 0.002, "loss": 2.3316, "step": 331090 }, { "epoch": 1.2799400040203492, "grad_norm": 0.10086680203676224, "learning_rate": 0.002, "loss": 2.3493, "step": 331100 }, { "epoch": 1.2799786612237325, "grad_norm": 0.10074183344841003, "learning_rate": 0.002, "loss": 2.326, "step": 331110 }, { "epoch": 1.2800173184271157, "grad_norm": 0.1126500815153122, "learning_rate": 0.002, "loss": 2.3466, "step": 331120 }, { "epoch": 1.280055975630499, "grad_norm": 0.11003128439188004, "learning_rate": 0.002, "loss": 2.3321, "step": 331130 }, { "epoch": 1.2800946328338823, "grad_norm": 0.11613652855157852, "learning_rate": 0.002, "loss": 2.3379, "step": 331140 }, { "epoch": 1.2801332900372655, "grad_norm": 0.13420990109443665, "learning_rate": 0.002, "loss": 2.3295, "step": 331150 }, { "epoch": 1.2801719472406488, "grad_norm": 0.10044151544570923, "learning_rate": 0.002, "loss": 2.3341, "step": 331160 }, { "epoch": 1.280210604444032, "grad_norm": 0.10778767615556717, "learning_rate": 0.002, "loss": 2.3284, "step": 331170 }, { "epoch": 1.2802492616474153, "grad_norm": 0.10573440790176392, "learning_rate": 0.002, "loss": 2.3454, "step": 331180 }, { "epoch": 1.2802879188507987, "grad_norm": 0.10240902006626129, "learning_rate": 0.002, "loss": 2.3361, "step": 331190 }, { "epoch": 1.280326576054182, "grad_norm": 0.11871691793203354, "learning_rate": 0.002, "loss": 2.3463, "step": 331200 }, { "epoch": 1.2803652332575652, "grad_norm": 0.14145809412002563, "learning_rate": 0.002, "loss": 2.3188, "step": 331210 }, { "epoch": 1.2804038904609485, "grad_norm": 0.10078298300504684, "learning_rate": 0.002, "loss": 2.3297, "step": 331220 }, { "epoch": 1.2804425476643317, "grad_norm": 0.11439738422632217, "learning_rate": 0.002, "loss": 2.3286, "step": 331230 }, { "epoch": 1.280481204867715, "grad_norm": 0.1064569428563118, "learning_rate": 0.002, "loss": 2.337, "step": 331240 }, { "epoch": 1.2805198620710982, "grad_norm": 0.10036151111125946, "learning_rate": 0.002, "loss": 2.3375, "step": 331250 }, { "epoch": 1.2805585192744817, "grad_norm": 0.11667811125516891, "learning_rate": 0.002, "loss": 2.3476, "step": 331260 }, { "epoch": 1.280597176477865, "grad_norm": 0.1017821803689003, "learning_rate": 0.002, "loss": 2.3383, "step": 331270 }, { "epoch": 1.2806358336812482, "grad_norm": 0.1017158031463623, "learning_rate": 0.002, "loss": 2.3313, "step": 331280 }, { "epoch": 1.2806744908846315, "grad_norm": 0.09027392417192459, "learning_rate": 0.002, "loss": 2.3284, "step": 331290 }, { "epoch": 1.2807131480880147, "grad_norm": 0.1140141710639, "learning_rate": 0.002, "loss": 2.3278, "step": 331300 }, { "epoch": 1.280751805291398, "grad_norm": 0.13576845824718475, "learning_rate": 0.002, "loss": 2.3227, "step": 331310 }, { "epoch": 1.2807904624947812, "grad_norm": 0.10066722333431244, "learning_rate": 0.002, "loss": 2.3315, "step": 331320 }, { "epoch": 1.2808291196981645, "grad_norm": 0.09830858558416367, "learning_rate": 0.002, "loss": 2.341, "step": 331330 }, { "epoch": 1.2808677769015477, "grad_norm": 0.09742210805416107, "learning_rate": 0.002, "loss": 2.3314, "step": 331340 }, { "epoch": 1.280906434104931, "grad_norm": 0.10384872555732727, "learning_rate": 0.002, "loss": 2.3368, "step": 331350 }, { "epoch": 1.2809450913083145, "grad_norm": 0.10708346217870712, "learning_rate": 0.002, "loss": 2.3234, "step": 331360 }, { "epoch": 1.2809837485116977, "grad_norm": 0.10111190378665924, "learning_rate": 0.002, "loss": 2.3361, "step": 331370 }, { "epoch": 1.281022405715081, "grad_norm": 0.09716185927391052, "learning_rate": 0.002, "loss": 2.3338, "step": 331380 }, { "epoch": 1.2810610629184642, "grad_norm": 0.09608956426382065, "learning_rate": 0.002, "loss": 2.3342, "step": 331390 }, { "epoch": 1.2810997201218475, "grad_norm": 0.1042938381433487, "learning_rate": 0.002, "loss": 2.334, "step": 331400 }, { "epoch": 1.2811383773252307, "grad_norm": 0.09524276852607727, "learning_rate": 0.002, "loss": 2.338, "step": 331410 }, { "epoch": 1.2811770345286142, "grad_norm": 0.10780641436576843, "learning_rate": 0.002, "loss": 2.3214, "step": 331420 }, { "epoch": 1.2812156917319975, "grad_norm": 0.10179249942302704, "learning_rate": 0.002, "loss": 2.3433, "step": 331430 }, { "epoch": 1.2812543489353807, "grad_norm": 0.11295023560523987, "learning_rate": 0.002, "loss": 2.316, "step": 331440 }, { "epoch": 1.281293006138764, "grad_norm": 0.11664408445358276, "learning_rate": 0.002, "loss": 2.3364, "step": 331450 }, { "epoch": 1.2813316633421472, "grad_norm": 0.11424199491739273, "learning_rate": 0.002, "loss": 2.3412, "step": 331460 }, { "epoch": 1.2813703205455305, "grad_norm": 0.8250250816345215, "learning_rate": 0.002, "loss": 2.3273, "step": 331470 }, { "epoch": 1.2814089777489137, "grad_norm": 0.6171488165855408, "learning_rate": 0.002, "loss": 2.3443, "step": 331480 }, { "epoch": 1.281447634952297, "grad_norm": 0.15012137591838837, "learning_rate": 0.002, "loss": 2.3453, "step": 331490 }, { "epoch": 1.2814862921556802, "grad_norm": 0.10564377903938293, "learning_rate": 0.002, "loss": 2.3333, "step": 331500 }, { "epoch": 1.2815249493590635, "grad_norm": 0.11790092289447784, "learning_rate": 0.002, "loss": 2.3439, "step": 331510 }, { "epoch": 1.2815636065624467, "grad_norm": 0.10824461281299591, "learning_rate": 0.002, "loss": 2.3359, "step": 331520 }, { "epoch": 1.2816022637658302, "grad_norm": 0.10344535857439041, "learning_rate": 0.002, "loss": 2.3373, "step": 331530 }, { "epoch": 1.2816409209692134, "grad_norm": 0.10616878420114517, "learning_rate": 0.002, "loss": 2.3298, "step": 331540 }, { "epoch": 1.2816795781725967, "grad_norm": 0.11207808554172516, "learning_rate": 0.002, "loss": 2.327, "step": 331550 }, { "epoch": 1.28171823537598, "grad_norm": 0.09932498633861542, "learning_rate": 0.002, "loss": 2.3412, "step": 331560 }, { "epoch": 1.2817568925793632, "grad_norm": 0.10903242230415344, "learning_rate": 0.002, "loss": 2.3255, "step": 331570 }, { "epoch": 1.2817955497827465, "grad_norm": 0.12127885967493057, "learning_rate": 0.002, "loss": 2.3366, "step": 331580 }, { "epoch": 1.28183420698613, "grad_norm": 0.10876268148422241, "learning_rate": 0.002, "loss": 2.3365, "step": 331590 }, { "epoch": 1.2818728641895132, "grad_norm": 0.10647932440042496, "learning_rate": 0.002, "loss": 2.3325, "step": 331600 }, { "epoch": 1.2819115213928964, "grad_norm": 0.09786685556173325, "learning_rate": 0.002, "loss": 2.3491, "step": 331610 }, { "epoch": 1.2819501785962797, "grad_norm": 0.12839925289154053, "learning_rate": 0.002, "loss": 2.3385, "step": 331620 }, { "epoch": 1.281988835799663, "grad_norm": 0.11178138852119446, "learning_rate": 0.002, "loss": 2.3524, "step": 331630 }, { "epoch": 1.2820274930030462, "grad_norm": 0.09654244035482407, "learning_rate": 0.002, "loss": 2.3478, "step": 331640 }, { "epoch": 1.2820661502064294, "grad_norm": 0.11449973285198212, "learning_rate": 0.002, "loss": 2.3457, "step": 331650 }, { "epoch": 1.2821048074098127, "grad_norm": 0.10151177644729614, "learning_rate": 0.002, "loss": 2.3311, "step": 331660 }, { "epoch": 1.282143464613196, "grad_norm": 0.09369701892137527, "learning_rate": 0.002, "loss": 2.3396, "step": 331670 }, { "epoch": 1.2821821218165792, "grad_norm": 0.09277847409248352, "learning_rate": 0.002, "loss": 2.3369, "step": 331680 }, { "epoch": 1.2822207790199625, "grad_norm": 0.11237433552742004, "learning_rate": 0.002, "loss": 2.3299, "step": 331690 }, { "epoch": 1.282259436223346, "grad_norm": 0.2749531865119934, "learning_rate": 0.002, "loss": 2.3344, "step": 331700 }, { "epoch": 1.2822980934267292, "grad_norm": 0.12759670615196228, "learning_rate": 0.002, "loss": 2.337, "step": 331710 }, { "epoch": 1.2823367506301124, "grad_norm": 0.1205337718129158, "learning_rate": 0.002, "loss": 2.3213, "step": 331720 }, { "epoch": 1.2823754078334957, "grad_norm": 0.10772675275802612, "learning_rate": 0.002, "loss": 2.3305, "step": 331730 }, { "epoch": 1.282414065036879, "grad_norm": 0.1037701889872551, "learning_rate": 0.002, "loss": 2.3441, "step": 331740 }, { "epoch": 1.2824527222402622, "grad_norm": 0.10590890049934387, "learning_rate": 0.002, "loss": 2.3199, "step": 331750 }, { "epoch": 1.2824913794436457, "grad_norm": 0.09448172152042389, "learning_rate": 0.002, "loss": 2.3467, "step": 331760 }, { "epoch": 1.282530036647029, "grad_norm": 0.11490815132856369, "learning_rate": 0.002, "loss": 2.3405, "step": 331770 }, { "epoch": 1.2825686938504122, "grad_norm": 0.21631719172000885, "learning_rate": 0.002, "loss": 2.3371, "step": 331780 }, { "epoch": 1.2826073510537954, "grad_norm": 0.09326942265033722, "learning_rate": 0.002, "loss": 2.3299, "step": 331790 }, { "epoch": 1.2826460082571787, "grad_norm": 0.09836390614509583, "learning_rate": 0.002, "loss": 2.3295, "step": 331800 }, { "epoch": 1.282684665460562, "grad_norm": 0.09765587002038956, "learning_rate": 0.002, "loss": 2.3347, "step": 331810 }, { "epoch": 1.2827233226639452, "grad_norm": 0.09442969411611557, "learning_rate": 0.002, "loss": 2.325, "step": 331820 }, { "epoch": 1.2827619798673284, "grad_norm": 0.12060291320085526, "learning_rate": 0.002, "loss": 2.3299, "step": 331830 }, { "epoch": 1.2828006370707117, "grad_norm": 0.10620687156915665, "learning_rate": 0.002, "loss": 2.3361, "step": 331840 }, { "epoch": 1.282839294274095, "grad_norm": 0.12601856887340546, "learning_rate": 0.002, "loss": 2.3452, "step": 331850 }, { "epoch": 1.2828779514774782, "grad_norm": 0.10232758522033691, "learning_rate": 0.002, "loss": 2.3548, "step": 331860 }, { "epoch": 1.2829166086808617, "grad_norm": 0.11093420535326004, "learning_rate": 0.002, "loss": 2.3316, "step": 331870 }, { "epoch": 1.282955265884245, "grad_norm": 0.12195949256420135, "learning_rate": 0.002, "loss": 2.326, "step": 331880 }, { "epoch": 1.2829939230876282, "grad_norm": 0.1167902797460556, "learning_rate": 0.002, "loss": 2.3173, "step": 331890 }, { "epoch": 1.2830325802910114, "grad_norm": 0.09185588359832764, "learning_rate": 0.002, "loss": 2.343, "step": 331900 }, { "epoch": 1.2830712374943947, "grad_norm": 0.10136355459690094, "learning_rate": 0.002, "loss": 2.3331, "step": 331910 }, { "epoch": 1.283109894697778, "grad_norm": 0.09425336867570877, "learning_rate": 0.002, "loss": 2.3491, "step": 331920 }, { "epoch": 1.2831485519011614, "grad_norm": 0.12710914015769958, "learning_rate": 0.002, "loss": 2.3445, "step": 331930 }, { "epoch": 1.2831872091045446, "grad_norm": 0.10715360939502716, "learning_rate": 0.002, "loss": 2.3317, "step": 331940 }, { "epoch": 1.283225866307928, "grad_norm": 0.1006692498922348, "learning_rate": 0.002, "loss": 2.331, "step": 331950 }, { "epoch": 1.2832645235113112, "grad_norm": 0.11472304165363312, "learning_rate": 0.002, "loss": 2.3259, "step": 331960 }, { "epoch": 1.2833031807146944, "grad_norm": 0.09872464090585709, "learning_rate": 0.002, "loss": 2.343, "step": 331970 }, { "epoch": 1.2833418379180777, "grad_norm": 0.11383762210607529, "learning_rate": 0.002, "loss": 2.3301, "step": 331980 }, { "epoch": 1.283380495121461, "grad_norm": 0.11117720603942871, "learning_rate": 0.002, "loss": 2.3311, "step": 331990 }, { "epoch": 1.2834191523248442, "grad_norm": 0.10939493030309677, "learning_rate": 0.002, "loss": 2.327, "step": 332000 }, { "epoch": 1.2834578095282274, "grad_norm": 0.11570484936237335, "learning_rate": 0.002, "loss": 2.3247, "step": 332010 }, { "epoch": 1.2834964667316107, "grad_norm": 0.10188289731740952, "learning_rate": 0.002, "loss": 2.3388, "step": 332020 }, { "epoch": 1.283535123934994, "grad_norm": 0.10991719365119934, "learning_rate": 0.002, "loss": 2.3296, "step": 332030 }, { "epoch": 1.2835737811383774, "grad_norm": 0.10216062515974045, "learning_rate": 0.002, "loss": 2.3474, "step": 332040 }, { "epoch": 1.2836124383417606, "grad_norm": 0.10282238572835922, "learning_rate": 0.002, "loss": 2.3222, "step": 332050 }, { "epoch": 1.283651095545144, "grad_norm": 0.09967167675495148, "learning_rate": 0.002, "loss": 2.3402, "step": 332060 }, { "epoch": 1.2836897527485271, "grad_norm": 0.09352394938468933, "learning_rate": 0.002, "loss": 2.336, "step": 332070 }, { "epoch": 1.2837284099519104, "grad_norm": 0.11892616003751755, "learning_rate": 0.002, "loss": 2.329, "step": 332080 }, { "epoch": 1.2837670671552937, "grad_norm": 0.10310930758714676, "learning_rate": 0.002, "loss": 2.3408, "step": 332090 }, { "epoch": 1.2838057243586771, "grad_norm": 0.1289806365966797, "learning_rate": 0.002, "loss": 2.3463, "step": 332100 }, { "epoch": 1.2838443815620604, "grad_norm": 0.10785408318042755, "learning_rate": 0.002, "loss": 2.3447, "step": 332110 }, { "epoch": 1.2838830387654436, "grad_norm": 0.1182045266032219, "learning_rate": 0.002, "loss": 2.347, "step": 332120 }, { "epoch": 1.2839216959688269, "grad_norm": 0.12128960341215134, "learning_rate": 0.002, "loss": 2.3483, "step": 332130 }, { "epoch": 1.2839603531722101, "grad_norm": 0.09235437959432602, "learning_rate": 0.002, "loss": 2.3403, "step": 332140 }, { "epoch": 1.2839990103755934, "grad_norm": 0.10368960350751877, "learning_rate": 0.002, "loss": 2.3519, "step": 332150 }, { "epoch": 1.2840376675789766, "grad_norm": 0.1179451122879982, "learning_rate": 0.002, "loss": 2.3339, "step": 332160 }, { "epoch": 1.28407632478236, "grad_norm": 0.11054787784814835, "learning_rate": 0.002, "loss": 2.3289, "step": 332170 }, { "epoch": 1.2841149819857431, "grad_norm": 0.09622831642627716, "learning_rate": 0.002, "loss": 2.3287, "step": 332180 }, { "epoch": 1.2841536391891264, "grad_norm": 0.1420263797044754, "learning_rate": 0.002, "loss": 2.3434, "step": 332190 }, { "epoch": 1.2841922963925096, "grad_norm": 0.10438098013401031, "learning_rate": 0.002, "loss": 2.3385, "step": 332200 }, { "epoch": 1.2842309535958931, "grad_norm": 0.11499829590320587, "learning_rate": 0.002, "loss": 2.3361, "step": 332210 }, { "epoch": 1.2842696107992764, "grad_norm": 0.11683389544487, "learning_rate": 0.002, "loss": 2.3304, "step": 332220 }, { "epoch": 1.2843082680026596, "grad_norm": 0.10560715198516846, "learning_rate": 0.002, "loss": 2.3253, "step": 332230 }, { "epoch": 1.2843469252060429, "grad_norm": 0.12091319262981415, "learning_rate": 0.002, "loss": 2.3176, "step": 332240 }, { "epoch": 1.2843855824094261, "grad_norm": 0.0979316458106041, "learning_rate": 0.002, "loss": 2.3309, "step": 332250 }, { "epoch": 1.2844242396128094, "grad_norm": 0.10224587470293045, "learning_rate": 0.002, "loss": 2.3297, "step": 332260 }, { "epoch": 1.2844628968161929, "grad_norm": 0.10661505162715912, "learning_rate": 0.002, "loss": 2.3424, "step": 332270 }, { "epoch": 1.284501554019576, "grad_norm": 0.09975156933069229, "learning_rate": 0.002, "loss": 2.3373, "step": 332280 }, { "epoch": 1.2845402112229594, "grad_norm": 0.1133033037185669, "learning_rate": 0.002, "loss": 2.3291, "step": 332290 }, { "epoch": 1.2845788684263426, "grad_norm": 0.14396722614765167, "learning_rate": 0.002, "loss": 2.32, "step": 332300 }, { "epoch": 1.2846175256297259, "grad_norm": 0.10433393716812134, "learning_rate": 0.002, "loss": 2.3529, "step": 332310 }, { "epoch": 1.2846561828331091, "grad_norm": 0.09392431378364563, "learning_rate": 0.002, "loss": 2.3352, "step": 332320 }, { "epoch": 1.2846948400364924, "grad_norm": 0.09219883382320404, "learning_rate": 0.002, "loss": 2.3385, "step": 332330 }, { "epoch": 1.2847334972398756, "grad_norm": 0.09603586047887802, "learning_rate": 0.002, "loss": 2.323, "step": 332340 }, { "epoch": 1.2847721544432589, "grad_norm": 0.10061074048280716, "learning_rate": 0.002, "loss": 2.3417, "step": 332350 }, { "epoch": 1.2848108116466421, "grad_norm": 0.11493312567472458, "learning_rate": 0.002, "loss": 2.3373, "step": 332360 }, { "epoch": 1.2848494688500254, "grad_norm": 0.11308170855045319, "learning_rate": 0.002, "loss": 2.3419, "step": 332370 }, { "epoch": 1.2848881260534089, "grad_norm": 0.09977343678474426, "learning_rate": 0.002, "loss": 2.3285, "step": 332380 }, { "epoch": 1.284926783256792, "grad_norm": 0.10442902892827988, "learning_rate": 0.002, "loss": 2.3295, "step": 332390 }, { "epoch": 1.2849654404601754, "grad_norm": 0.11375498026609421, "learning_rate": 0.002, "loss": 2.3419, "step": 332400 }, { "epoch": 1.2850040976635586, "grad_norm": 0.09284843504428864, "learning_rate": 0.002, "loss": 2.3303, "step": 332410 }, { "epoch": 1.2850427548669419, "grad_norm": 0.09526360780000687, "learning_rate": 0.002, "loss": 2.339, "step": 332420 }, { "epoch": 1.2850814120703251, "grad_norm": 0.1074945479631424, "learning_rate": 0.002, "loss": 2.3426, "step": 332430 }, { "epoch": 1.2851200692737086, "grad_norm": 0.10130199044942856, "learning_rate": 0.002, "loss": 2.3447, "step": 332440 }, { "epoch": 1.2851587264770918, "grad_norm": 0.11674440652132034, "learning_rate": 0.002, "loss": 2.3389, "step": 332450 }, { "epoch": 1.285197383680475, "grad_norm": 0.12361260503530502, "learning_rate": 0.002, "loss": 2.3235, "step": 332460 }, { "epoch": 1.2852360408838583, "grad_norm": 0.10297805070877075, "learning_rate": 0.002, "loss": 2.3477, "step": 332470 }, { "epoch": 1.2852746980872416, "grad_norm": 0.10889725387096405, "learning_rate": 0.002, "loss": 2.3275, "step": 332480 }, { "epoch": 1.2853133552906248, "grad_norm": 0.12578794360160828, "learning_rate": 0.002, "loss": 2.3405, "step": 332490 }, { "epoch": 1.285352012494008, "grad_norm": 0.11805281788110733, "learning_rate": 0.002, "loss": 2.3351, "step": 332500 }, { "epoch": 1.2853906696973914, "grad_norm": 0.120563805103302, "learning_rate": 0.002, "loss": 2.3374, "step": 332510 }, { "epoch": 1.2854293269007746, "grad_norm": 0.11009590327739716, "learning_rate": 0.002, "loss": 2.3368, "step": 332520 }, { "epoch": 1.2854679841041579, "grad_norm": 0.10590917617082596, "learning_rate": 0.002, "loss": 2.3423, "step": 332530 }, { "epoch": 1.285506641307541, "grad_norm": 0.0980500653386116, "learning_rate": 0.002, "loss": 2.341, "step": 332540 }, { "epoch": 1.2855452985109246, "grad_norm": 0.1209760531783104, "learning_rate": 0.002, "loss": 2.3333, "step": 332550 }, { "epoch": 1.2855839557143078, "grad_norm": 0.10607732832431793, "learning_rate": 0.002, "loss": 2.3566, "step": 332560 }, { "epoch": 1.285622612917691, "grad_norm": 0.11301247775554657, "learning_rate": 0.002, "loss": 2.3261, "step": 332570 }, { "epoch": 1.2856612701210743, "grad_norm": 0.12619836628437042, "learning_rate": 0.002, "loss": 2.3411, "step": 332580 }, { "epoch": 1.2856999273244576, "grad_norm": 0.09536372870206833, "learning_rate": 0.002, "loss": 2.3237, "step": 332590 }, { "epoch": 1.2857385845278408, "grad_norm": 0.12440627813339233, "learning_rate": 0.002, "loss": 2.3469, "step": 332600 }, { "epoch": 1.2857772417312243, "grad_norm": 0.12063415348529816, "learning_rate": 0.002, "loss": 2.3371, "step": 332610 }, { "epoch": 1.2858158989346076, "grad_norm": 0.10072565078735352, "learning_rate": 0.002, "loss": 2.335, "step": 332620 }, { "epoch": 1.2858545561379908, "grad_norm": 0.10750167071819305, "learning_rate": 0.002, "loss": 2.3253, "step": 332630 }, { "epoch": 1.285893213341374, "grad_norm": 0.10173853486776352, "learning_rate": 0.002, "loss": 2.3309, "step": 332640 }, { "epoch": 1.2859318705447573, "grad_norm": 0.09797729551792145, "learning_rate": 0.002, "loss": 2.33, "step": 332650 }, { "epoch": 1.2859705277481406, "grad_norm": 0.09454578906297684, "learning_rate": 0.002, "loss": 2.3384, "step": 332660 }, { "epoch": 1.2860091849515238, "grad_norm": 0.09492919594049454, "learning_rate": 0.002, "loss": 2.3399, "step": 332670 }, { "epoch": 1.286047842154907, "grad_norm": 0.12662118673324585, "learning_rate": 0.002, "loss": 2.3408, "step": 332680 }, { "epoch": 1.2860864993582903, "grad_norm": 0.11416533589363098, "learning_rate": 0.002, "loss": 2.3385, "step": 332690 }, { "epoch": 1.2861251565616736, "grad_norm": 0.09003262966871262, "learning_rate": 0.002, "loss": 2.3579, "step": 332700 }, { "epoch": 1.286163813765057, "grad_norm": 0.10572832822799683, "learning_rate": 0.002, "loss": 2.3381, "step": 332710 }, { "epoch": 1.2862024709684403, "grad_norm": 0.1264459639787674, "learning_rate": 0.002, "loss": 2.3362, "step": 332720 }, { "epoch": 1.2862411281718236, "grad_norm": 0.1304396539926529, "learning_rate": 0.002, "loss": 2.3471, "step": 332730 }, { "epoch": 1.2862797853752068, "grad_norm": 0.11325572431087494, "learning_rate": 0.002, "loss": 2.3104, "step": 332740 }, { "epoch": 1.28631844257859, "grad_norm": 0.09685946255922318, "learning_rate": 0.002, "loss": 2.3249, "step": 332750 }, { "epoch": 1.2863570997819733, "grad_norm": 0.11727797240018845, "learning_rate": 0.002, "loss": 2.336, "step": 332760 }, { "epoch": 1.2863957569853566, "grad_norm": 0.10779083520174026, "learning_rate": 0.002, "loss": 2.3292, "step": 332770 }, { "epoch": 1.28643441418874, "grad_norm": 0.1004313975572586, "learning_rate": 0.002, "loss": 2.3272, "step": 332780 }, { "epoch": 1.2864730713921233, "grad_norm": 0.1066732108592987, "learning_rate": 0.002, "loss": 2.3255, "step": 332790 }, { "epoch": 1.2865117285955066, "grad_norm": 0.09625577926635742, "learning_rate": 0.002, "loss": 2.3313, "step": 332800 }, { "epoch": 1.2865503857988898, "grad_norm": 0.09934387356042862, "learning_rate": 0.002, "loss": 2.3319, "step": 332810 }, { "epoch": 1.286589043002273, "grad_norm": 0.11568392068147659, "learning_rate": 0.002, "loss": 2.3333, "step": 332820 }, { "epoch": 1.2866277002056563, "grad_norm": 0.10015887767076492, "learning_rate": 0.002, "loss": 2.3366, "step": 332830 }, { "epoch": 1.2866663574090396, "grad_norm": 0.10513238608837128, "learning_rate": 0.002, "loss": 2.3345, "step": 332840 }, { "epoch": 1.2867050146124228, "grad_norm": 0.11645331233739853, "learning_rate": 0.002, "loss": 2.3323, "step": 332850 }, { "epoch": 1.286743671815806, "grad_norm": 0.15896683931350708, "learning_rate": 0.002, "loss": 2.3389, "step": 332860 }, { "epoch": 1.2867823290191893, "grad_norm": 0.11531778424978256, "learning_rate": 0.002, "loss": 2.3417, "step": 332870 }, { "epoch": 1.2868209862225728, "grad_norm": 0.10579390078783035, "learning_rate": 0.002, "loss": 2.3442, "step": 332880 }, { "epoch": 1.286859643425956, "grad_norm": 0.0989760085940361, "learning_rate": 0.002, "loss": 2.3334, "step": 332890 }, { "epoch": 1.2868983006293393, "grad_norm": 0.1162877306342125, "learning_rate": 0.002, "loss": 2.3339, "step": 332900 }, { "epoch": 1.2869369578327226, "grad_norm": 0.11907833814620972, "learning_rate": 0.002, "loss": 2.3304, "step": 332910 }, { "epoch": 1.2869756150361058, "grad_norm": 0.11692595481872559, "learning_rate": 0.002, "loss": 2.3425, "step": 332920 }, { "epoch": 1.287014272239489, "grad_norm": 0.0976448655128479, "learning_rate": 0.002, "loss": 2.3402, "step": 332930 }, { "epoch": 1.2870529294428723, "grad_norm": 0.10867959260940552, "learning_rate": 0.002, "loss": 2.3304, "step": 332940 }, { "epoch": 1.2870915866462558, "grad_norm": 0.10054941475391388, "learning_rate": 0.002, "loss": 2.3331, "step": 332950 }, { "epoch": 1.287130243849639, "grad_norm": 0.11905647069215775, "learning_rate": 0.002, "loss": 2.3284, "step": 332960 }, { "epoch": 1.2871689010530223, "grad_norm": 0.12290788441896439, "learning_rate": 0.002, "loss": 2.3334, "step": 332970 }, { "epoch": 1.2872075582564055, "grad_norm": 0.14153672754764557, "learning_rate": 0.002, "loss": 2.3292, "step": 332980 }, { "epoch": 1.2872462154597888, "grad_norm": 0.10869767516851425, "learning_rate": 0.002, "loss": 2.35, "step": 332990 }, { "epoch": 1.287284872663172, "grad_norm": 0.1402626931667328, "learning_rate": 0.002, "loss": 2.3318, "step": 333000 }, { "epoch": 1.2873235298665553, "grad_norm": 0.11460593342781067, "learning_rate": 0.002, "loss": 2.3408, "step": 333010 }, { "epoch": 1.2873621870699385, "grad_norm": 0.11055731773376465, "learning_rate": 0.002, "loss": 2.334, "step": 333020 }, { "epoch": 1.2874008442733218, "grad_norm": 0.11018706113100052, "learning_rate": 0.002, "loss": 2.3341, "step": 333030 }, { "epoch": 1.287439501476705, "grad_norm": 0.12170359492301941, "learning_rate": 0.002, "loss": 2.3347, "step": 333040 }, { "epoch": 1.2874781586800885, "grad_norm": 0.10103699564933777, "learning_rate": 0.002, "loss": 2.3292, "step": 333050 }, { "epoch": 1.2875168158834718, "grad_norm": 0.08956848829984665, "learning_rate": 0.002, "loss": 2.3561, "step": 333060 }, { "epoch": 1.287555473086855, "grad_norm": 0.10570359230041504, "learning_rate": 0.002, "loss": 2.3368, "step": 333070 }, { "epoch": 1.2875941302902383, "grad_norm": 0.11920324712991714, "learning_rate": 0.002, "loss": 2.322, "step": 333080 }, { "epoch": 1.2876327874936215, "grad_norm": 0.09573183208703995, "learning_rate": 0.002, "loss": 2.3426, "step": 333090 }, { "epoch": 1.2876714446970048, "grad_norm": 0.11040244251489639, "learning_rate": 0.002, "loss": 2.3293, "step": 333100 }, { "epoch": 1.287710101900388, "grad_norm": 0.09444387257099152, "learning_rate": 0.002, "loss": 2.3393, "step": 333110 }, { "epoch": 1.2877487591037715, "grad_norm": 0.1170918419957161, "learning_rate": 0.002, "loss": 2.3297, "step": 333120 }, { "epoch": 1.2877874163071548, "grad_norm": 0.10428602993488312, "learning_rate": 0.002, "loss": 2.3434, "step": 333130 }, { "epoch": 1.287826073510538, "grad_norm": 0.1396740972995758, "learning_rate": 0.002, "loss": 2.3366, "step": 333140 }, { "epoch": 1.2878647307139213, "grad_norm": 0.10158234089612961, "learning_rate": 0.002, "loss": 2.3391, "step": 333150 }, { "epoch": 1.2879033879173045, "grad_norm": 0.10186280310153961, "learning_rate": 0.002, "loss": 2.3356, "step": 333160 }, { "epoch": 1.2879420451206878, "grad_norm": 0.11232930421829224, "learning_rate": 0.002, "loss": 2.3327, "step": 333170 }, { "epoch": 1.287980702324071, "grad_norm": 0.09306978434324265, "learning_rate": 0.002, "loss": 2.3461, "step": 333180 }, { "epoch": 1.2880193595274543, "grad_norm": 0.10687271505594254, "learning_rate": 0.002, "loss": 2.3461, "step": 333190 }, { "epoch": 1.2880580167308375, "grad_norm": 0.09269829094409943, "learning_rate": 0.002, "loss": 2.3377, "step": 333200 }, { "epoch": 1.2880966739342208, "grad_norm": 0.1074671596288681, "learning_rate": 0.002, "loss": 2.3299, "step": 333210 }, { "epoch": 1.2881353311376043, "grad_norm": 0.10309260338544846, "learning_rate": 0.002, "loss": 2.3399, "step": 333220 }, { "epoch": 1.2881739883409875, "grad_norm": 0.11010892689228058, "learning_rate": 0.002, "loss": 2.3382, "step": 333230 }, { "epoch": 1.2882126455443708, "grad_norm": 0.10869075357913971, "learning_rate": 0.002, "loss": 2.3382, "step": 333240 }, { "epoch": 1.288251302747754, "grad_norm": 0.10025380551815033, "learning_rate": 0.002, "loss": 2.3246, "step": 333250 }, { "epoch": 1.2882899599511373, "grad_norm": 0.0960700735449791, "learning_rate": 0.002, "loss": 2.344, "step": 333260 }, { "epoch": 1.2883286171545205, "grad_norm": 0.13028484582901, "learning_rate": 0.002, "loss": 2.3283, "step": 333270 }, { "epoch": 1.2883672743579038, "grad_norm": 0.08875899016857147, "learning_rate": 0.002, "loss": 2.3236, "step": 333280 }, { "epoch": 1.2884059315612872, "grad_norm": 0.10433296114206314, "learning_rate": 0.002, "loss": 2.34, "step": 333290 }, { "epoch": 1.2884445887646705, "grad_norm": 0.09889642894268036, "learning_rate": 0.002, "loss": 2.319, "step": 333300 }, { "epoch": 1.2884832459680537, "grad_norm": 0.11376189440488815, "learning_rate": 0.002, "loss": 2.3401, "step": 333310 }, { "epoch": 1.288521903171437, "grad_norm": 0.10081131011247635, "learning_rate": 0.002, "loss": 2.3171, "step": 333320 }, { "epoch": 1.2885605603748203, "grad_norm": 0.11411105841398239, "learning_rate": 0.002, "loss": 2.3254, "step": 333330 }, { "epoch": 1.2885992175782035, "grad_norm": 0.11444903165102005, "learning_rate": 0.002, "loss": 2.3255, "step": 333340 }, { "epoch": 1.2886378747815868, "grad_norm": 0.10614798218011856, "learning_rate": 0.002, "loss": 2.332, "step": 333350 }, { "epoch": 1.28867653198497, "grad_norm": 0.1183219850063324, "learning_rate": 0.002, "loss": 2.3442, "step": 333360 }, { "epoch": 1.2887151891883533, "grad_norm": 0.09680161625146866, "learning_rate": 0.002, "loss": 2.3144, "step": 333370 }, { "epoch": 1.2887538463917365, "grad_norm": 0.10841412842273712, "learning_rate": 0.002, "loss": 2.3224, "step": 333380 }, { "epoch": 1.28879250359512, "grad_norm": 0.08972472697496414, "learning_rate": 0.002, "loss": 2.3278, "step": 333390 }, { "epoch": 1.2888311607985032, "grad_norm": 0.12399033457040787, "learning_rate": 0.002, "loss": 2.3374, "step": 333400 }, { "epoch": 1.2888698180018865, "grad_norm": 0.12081394344568253, "learning_rate": 0.002, "loss": 2.3358, "step": 333410 }, { "epoch": 1.2889084752052697, "grad_norm": 0.09331200271844864, "learning_rate": 0.002, "loss": 2.3522, "step": 333420 }, { "epoch": 1.288947132408653, "grad_norm": 0.09348198771476746, "learning_rate": 0.002, "loss": 2.3433, "step": 333430 }, { "epoch": 1.2889857896120362, "grad_norm": 0.1317223161458969, "learning_rate": 0.002, "loss": 2.3212, "step": 333440 }, { "epoch": 1.2890244468154197, "grad_norm": 0.10621041804552078, "learning_rate": 0.002, "loss": 2.3516, "step": 333450 }, { "epoch": 1.289063104018803, "grad_norm": 0.10697954893112183, "learning_rate": 0.002, "loss": 2.3374, "step": 333460 }, { "epoch": 1.2891017612221862, "grad_norm": 0.21501240134239197, "learning_rate": 0.002, "loss": 2.3347, "step": 333470 }, { "epoch": 1.2891404184255695, "grad_norm": 0.10425000637769699, "learning_rate": 0.002, "loss": 2.3311, "step": 333480 }, { "epoch": 1.2891790756289527, "grad_norm": 0.11156164109706879, "learning_rate": 0.002, "loss": 2.3487, "step": 333490 }, { "epoch": 1.289217732832336, "grad_norm": 0.11151176691055298, "learning_rate": 0.002, "loss": 2.3379, "step": 333500 }, { "epoch": 1.2892563900357192, "grad_norm": 0.1183115541934967, "learning_rate": 0.002, "loss": 2.3409, "step": 333510 }, { "epoch": 1.2892950472391025, "grad_norm": 0.0971737876534462, "learning_rate": 0.002, "loss": 2.3448, "step": 333520 }, { "epoch": 1.2893337044424857, "grad_norm": 0.09613676369190216, "learning_rate": 0.002, "loss": 2.3441, "step": 333530 }, { "epoch": 1.289372361645869, "grad_norm": 0.10197040438652039, "learning_rate": 0.002, "loss": 2.3312, "step": 333540 }, { "epoch": 1.2894110188492522, "grad_norm": 0.10441060364246368, "learning_rate": 0.002, "loss": 2.3321, "step": 333550 }, { "epoch": 1.2894496760526357, "grad_norm": 0.10522343963384628, "learning_rate": 0.002, "loss": 2.3446, "step": 333560 }, { "epoch": 1.289488333256019, "grad_norm": 0.10223446786403656, "learning_rate": 0.002, "loss": 2.3362, "step": 333570 }, { "epoch": 1.2895269904594022, "grad_norm": 0.12261584401130676, "learning_rate": 0.002, "loss": 2.3228, "step": 333580 }, { "epoch": 1.2895656476627855, "grad_norm": 0.11205879598855972, "learning_rate": 0.002, "loss": 2.3373, "step": 333590 }, { "epoch": 1.2896043048661687, "grad_norm": 0.09977878630161285, "learning_rate": 0.002, "loss": 2.3425, "step": 333600 }, { "epoch": 1.289642962069552, "grad_norm": 0.09606704860925674, "learning_rate": 0.002, "loss": 2.3412, "step": 333610 }, { "epoch": 1.2896816192729355, "grad_norm": 0.13726183772087097, "learning_rate": 0.002, "loss": 2.3385, "step": 333620 }, { "epoch": 1.2897202764763187, "grad_norm": 0.10022982954978943, "learning_rate": 0.002, "loss": 2.3359, "step": 333630 }, { "epoch": 1.289758933679702, "grad_norm": 0.11005251854658127, "learning_rate": 0.002, "loss": 2.3217, "step": 333640 }, { "epoch": 1.2897975908830852, "grad_norm": 0.10867244750261307, "learning_rate": 0.002, "loss": 2.3284, "step": 333650 }, { "epoch": 1.2898362480864685, "grad_norm": 0.10423463582992554, "learning_rate": 0.002, "loss": 2.3442, "step": 333660 }, { "epoch": 1.2898749052898517, "grad_norm": 0.09577327966690063, "learning_rate": 0.002, "loss": 2.3391, "step": 333670 }, { "epoch": 1.289913562493235, "grad_norm": 0.12469309568405151, "learning_rate": 0.002, "loss": 2.3169, "step": 333680 }, { "epoch": 1.2899522196966182, "grad_norm": 0.11108225584030151, "learning_rate": 0.002, "loss": 2.3295, "step": 333690 }, { "epoch": 1.2899908769000015, "grad_norm": 0.09957767277956009, "learning_rate": 0.002, "loss": 2.3195, "step": 333700 }, { "epoch": 1.2900295341033847, "grad_norm": 0.1158376932144165, "learning_rate": 0.002, "loss": 2.3408, "step": 333710 }, { "epoch": 1.290068191306768, "grad_norm": 0.10757265239953995, "learning_rate": 0.002, "loss": 2.3373, "step": 333720 }, { "epoch": 1.2901068485101514, "grad_norm": 0.09862326085567474, "learning_rate": 0.002, "loss": 2.3401, "step": 333730 }, { "epoch": 1.2901455057135347, "grad_norm": 0.09710995852947235, "learning_rate": 0.002, "loss": 2.3147, "step": 333740 }, { "epoch": 1.290184162916918, "grad_norm": 0.10201451927423477, "learning_rate": 0.002, "loss": 2.3115, "step": 333750 }, { "epoch": 1.2902228201203012, "grad_norm": 0.11438058316707611, "learning_rate": 0.002, "loss": 2.3523, "step": 333760 }, { "epoch": 1.2902614773236845, "grad_norm": 0.11025866121053696, "learning_rate": 0.002, "loss": 2.3363, "step": 333770 }, { "epoch": 1.2903001345270677, "grad_norm": 0.09014347195625305, "learning_rate": 0.002, "loss": 2.3395, "step": 333780 }, { "epoch": 1.2903387917304512, "grad_norm": 0.11819101125001907, "learning_rate": 0.002, "loss": 2.3332, "step": 333790 }, { "epoch": 1.2903774489338344, "grad_norm": 0.1083206906914711, "learning_rate": 0.002, "loss": 2.3373, "step": 333800 }, { "epoch": 1.2904161061372177, "grad_norm": 0.1187998354434967, "learning_rate": 0.002, "loss": 2.3564, "step": 333810 }, { "epoch": 1.290454763340601, "grad_norm": 0.1018686294555664, "learning_rate": 0.002, "loss": 2.3317, "step": 333820 }, { "epoch": 1.2904934205439842, "grad_norm": 0.09539389610290527, "learning_rate": 0.002, "loss": 2.332, "step": 333830 }, { "epoch": 1.2905320777473674, "grad_norm": 0.10429718345403671, "learning_rate": 0.002, "loss": 2.3218, "step": 333840 }, { "epoch": 1.2905707349507507, "grad_norm": 0.11827939003705978, "learning_rate": 0.002, "loss": 2.3357, "step": 333850 }, { "epoch": 1.290609392154134, "grad_norm": 0.10822010040283203, "learning_rate": 0.002, "loss": 2.3278, "step": 333860 }, { "epoch": 1.2906480493575172, "grad_norm": 0.09067022800445557, "learning_rate": 0.002, "loss": 2.343, "step": 333870 }, { "epoch": 1.2906867065609005, "grad_norm": 0.11707578599452972, "learning_rate": 0.002, "loss": 2.3192, "step": 333880 }, { "epoch": 1.2907253637642837, "grad_norm": 0.10048212110996246, "learning_rate": 0.002, "loss": 2.319, "step": 333890 }, { "epoch": 1.2907640209676672, "grad_norm": 0.10516627132892609, "learning_rate": 0.002, "loss": 2.3203, "step": 333900 }, { "epoch": 1.2908026781710504, "grad_norm": 0.13592956960201263, "learning_rate": 0.002, "loss": 2.3225, "step": 333910 }, { "epoch": 1.2908413353744337, "grad_norm": 0.10363247990608215, "learning_rate": 0.002, "loss": 2.3317, "step": 333920 }, { "epoch": 1.290879992577817, "grad_norm": 0.10619153082370758, "learning_rate": 0.002, "loss": 2.3479, "step": 333930 }, { "epoch": 1.2909186497812002, "grad_norm": 0.10397988557815552, "learning_rate": 0.002, "loss": 2.3279, "step": 333940 }, { "epoch": 1.2909573069845834, "grad_norm": 0.1019267588853836, "learning_rate": 0.002, "loss": 2.3268, "step": 333950 }, { "epoch": 1.290995964187967, "grad_norm": 0.10218657553195953, "learning_rate": 0.002, "loss": 2.341, "step": 333960 }, { "epoch": 1.2910346213913502, "grad_norm": 0.117024265229702, "learning_rate": 0.002, "loss": 2.351, "step": 333970 }, { "epoch": 1.2910732785947334, "grad_norm": 0.10420398414134979, "learning_rate": 0.002, "loss": 2.3429, "step": 333980 }, { "epoch": 1.2911119357981167, "grad_norm": 0.13143710792064667, "learning_rate": 0.002, "loss": 2.3216, "step": 333990 }, { "epoch": 1.2911505930015, "grad_norm": 0.10686857998371124, "learning_rate": 0.002, "loss": 2.3336, "step": 334000 }, { "epoch": 1.2911892502048832, "grad_norm": 0.0898427739739418, "learning_rate": 0.002, "loss": 2.3494, "step": 334010 }, { "epoch": 1.2912279074082664, "grad_norm": 0.268848717212677, "learning_rate": 0.002, "loss": 2.3338, "step": 334020 }, { "epoch": 1.2912665646116497, "grad_norm": 0.11120209097862244, "learning_rate": 0.002, "loss": 2.3528, "step": 334030 }, { "epoch": 1.291305221815033, "grad_norm": 0.09789606928825378, "learning_rate": 0.002, "loss": 2.3367, "step": 334040 }, { "epoch": 1.2913438790184162, "grad_norm": 0.1309780478477478, "learning_rate": 0.002, "loss": 2.3314, "step": 334050 }, { "epoch": 1.2913825362217994, "grad_norm": 0.1025337427854538, "learning_rate": 0.002, "loss": 2.3368, "step": 334060 }, { "epoch": 1.291421193425183, "grad_norm": 0.09491181373596191, "learning_rate": 0.002, "loss": 2.3297, "step": 334070 }, { "epoch": 1.2914598506285662, "grad_norm": 0.09663745015859604, "learning_rate": 0.002, "loss": 2.3344, "step": 334080 }, { "epoch": 1.2914985078319494, "grad_norm": 0.11719124019145966, "learning_rate": 0.002, "loss": 2.3319, "step": 334090 }, { "epoch": 1.2915371650353327, "grad_norm": 0.09677215665578842, "learning_rate": 0.002, "loss": 2.3311, "step": 334100 }, { "epoch": 1.291575822238716, "grad_norm": 0.11550380289554596, "learning_rate": 0.002, "loss": 2.3116, "step": 334110 }, { "epoch": 1.2916144794420992, "grad_norm": 0.1174958124756813, "learning_rate": 0.002, "loss": 2.3147, "step": 334120 }, { "epoch": 1.2916531366454826, "grad_norm": 0.1034787967801094, "learning_rate": 0.002, "loss": 2.335, "step": 334130 }, { "epoch": 1.291691793848866, "grad_norm": 0.09159667044878006, "learning_rate": 0.002, "loss": 2.3292, "step": 334140 }, { "epoch": 1.2917304510522492, "grad_norm": 0.11333204805850983, "learning_rate": 0.002, "loss": 2.3346, "step": 334150 }, { "epoch": 1.2917691082556324, "grad_norm": 0.1204734668135643, "learning_rate": 0.002, "loss": 2.3285, "step": 334160 }, { "epoch": 1.2918077654590157, "grad_norm": 0.10005148500204086, "learning_rate": 0.002, "loss": 2.3161, "step": 334170 }, { "epoch": 1.291846422662399, "grad_norm": 0.10399097204208374, "learning_rate": 0.002, "loss": 2.3351, "step": 334180 }, { "epoch": 1.2918850798657822, "grad_norm": 0.10964294523000717, "learning_rate": 0.002, "loss": 2.3372, "step": 334190 }, { "epoch": 1.2919237370691654, "grad_norm": 0.11057308316230774, "learning_rate": 0.002, "loss": 2.329, "step": 334200 }, { "epoch": 1.2919623942725487, "grad_norm": 0.09860649704933167, "learning_rate": 0.002, "loss": 2.3349, "step": 334210 }, { "epoch": 1.292001051475932, "grad_norm": 0.11533652245998383, "learning_rate": 0.002, "loss": 2.339, "step": 334220 }, { "epoch": 1.2920397086793152, "grad_norm": 0.12342556565999985, "learning_rate": 0.002, "loss": 2.3219, "step": 334230 }, { "epoch": 1.2920783658826986, "grad_norm": 0.10310442000627518, "learning_rate": 0.002, "loss": 2.3291, "step": 334240 }, { "epoch": 1.292117023086082, "grad_norm": 0.09682980179786682, "learning_rate": 0.002, "loss": 2.3347, "step": 334250 }, { "epoch": 1.2921556802894651, "grad_norm": 0.10156537592411041, "learning_rate": 0.002, "loss": 2.3415, "step": 334260 }, { "epoch": 1.2921943374928484, "grad_norm": 0.1010320708155632, "learning_rate": 0.002, "loss": 2.3406, "step": 334270 }, { "epoch": 1.2922329946962317, "grad_norm": 0.13401281833648682, "learning_rate": 0.002, "loss": 2.3523, "step": 334280 }, { "epoch": 1.292271651899615, "grad_norm": 0.0929509848356247, "learning_rate": 0.002, "loss": 2.3329, "step": 334290 }, { "epoch": 1.2923103091029984, "grad_norm": 0.10416734218597412, "learning_rate": 0.002, "loss": 2.3341, "step": 334300 }, { "epoch": 1.2923489663063816, "grad_norm": 0.10413626581430435, "learning_rate": 0.002, "loss": 2.3308, "step": 334310 }, { "epoch": 1.2923876235097649, "grad_norm": 0.09059230238199234, "learning_rate": 0.002, "loss": 2.3174, "step": 334320 }, { "epoch": 1.2924262807131481, "grad_norm": 0.11529724299907684, "learning_rate": 0.002, "loss": 2.3257, "step": 334330 }, { "epoch": 1.2924649379165314, "grad_norm": 0.10367235541343689, "learning_rate": 0.002, "loss": 2.3186, "step": 334340 }, { "epoch": 1.2925035951199146, "grad_norm": 0.10158482193946838, "learning_rate": 0.002, "loss": 2.3348, "step": 334350 }, { "epoch": 1.292542252323298, "grad_norm": 0.10162920504808426, "learning_rate": 0.002, "loss": 2.324, "step": 334360 }, { "epoch": 1.2925809095266811, "grad_norm": 0.12735851109027863, "learning_rate": 0.002, "loss": 2.3336, "step": 334370 }, { "epoch": 1.2926195667300644, "grad_norm": 0.0941648855805397, "learning_rate": 0.002, "loss": 2.345, "step": 334380 }, { "epoch": 1.2926582239334476, "grad_norm": 0.10265415161848068, "learning_rate": 0.002, "loss": 2.3402, "step": 334390 }, { "epoch": 1.292696881136831, "grad_norm": 0.09607397764921188, "learning_rate": 0.002, "loss": 2.3235, "step": 334400 }, { "epoch": 1.2927355383402144, "grad_norm": 0.12596885859966278, "learning_rate": 0.002, "loss": 2.3256, "step": 334410 }, { "epoch": 1.2927741955435976, "grad_norm": 0.09151950478553772, "learning_rate": 0.002, "loss": 2.3412, "step": 334420 }, { "epoch": 1.2928128527469809, "grad_norm": 0.12431547790765762, "learning_rate": 0.002, "loss": 2.3236, "step": 334430 }, { "epoch": 1.2928515099503641, "grad_norm": 0.10649395734071732, "learning_rate": 0.002, "loss": 2.335, "step": 334440 }, { "epoch": 1.2928901671537474, "grad_norm": 0.11754316091537476, "learning_rate": 0.002, "loss": 2.3397, "step": 334450 }, { "epoch": 1.2929288243571306, "grad_norm": 0.10570388287305832, "learning_rate": 0.002, "loss": 2.3343, "step": 334460 }, { "epoch": 1.292967481560514, "grad_norm": 0.09635928273200989, "learning_rate": 0.002, "loss": 2.3285, "step": 334470 }, { "epoch": 1.2930061387638974, "grad_norm": 0.11909741163253784, "learning_rate": 0.002, "loss": 2.33, "step": 334480 }, { "epoch": 1.2930447959672806, "grad_norm": 0.10475486516952515, "learning_rate": 0.002, "loss": 2.3414, "step": 334490 }, { "epoch": 1.2930834531706639, "grad_norm": 0.1174573004245758, "learning_rate": 0.002, "loss": 2.3366, "step": 334500 }, { "epoch": 1.2931221103740471, "grad_norm": 0.0980861634016037, "learning_rate": 0.002, "loss": 2.3422, "step": 334510 }, { "epoch": 1.2931607675774304, "grad_norm": 0.10527966916561127, "learning_rate": 0.002, "loss": 2.3262, "step": 334520 }, { "epoch": 1.2931994247808136, "grad_norm": 0.15596070885658264, "learning_rate": 0.002, "loss": 2.3462, "step": 334530 }, { "epoch": 1.2932380819841969, "grad_norm": 0.09148496389389038, "learning_rate": 0.002, "loss": 2.3425, "step": 334540 }, { "epoch": 1.2932767391875801, "grad_norm": 0.10149691253900528, "learning_rate": 0.002, "loss": 2.3241, "step": 334550 }, { "epoch": 1.2933153963909634, "grad_norm": 0.1106388196349144, "learning_rate": 0.002, "loss": 2.332, "step": 334560 }, { "epoch": 1.2933540535943469, "grad_norm": 0.10097920894622803, "learning_rate": 0.002, "loss": 2.3511, "step": 334570 }, { "epoch": 1.29339271079773, "grad_norm": 0.10403983294963837, "learning_rate": 0.002, "loss": 2.3377, "step": 334580 }, { "epoch": 1.2934313680011134, "grad_norm": 0.11371996253728867, "learning_rate": 0.002, "loss": 2.3247, "step": 334590 }, { "epoch": 1.2934700252044966, "grad_norm": 0.11415140330791473, "learning_rate": 0.002, "loss": 2.3374, "step": 334600 }, { "epoch": 1.2935086824078799, "grad_norm": 0.10624424368143082, "learning_rate": 0.002, "loss": 2.33, "step": 334610 }, { "epoch": 1.2935473396112631, "grad_norm": 0.1049097403883934, "learning_rate": 0.002, "loss": 2.3337, "step": 334620 }, { "epoch": 1.2935859968146464, "grad_norm": 0.09997744113206863, "learning_rate": 0.002, "loss": 2.3294, "step": 334630 }, { "epoch": 1.2936246540180298, "grad_norm": 0.11178059130907059, "learning_rate": 0.002, "loss": 2.3445, "step": 334640 }, { "epoch": 1.293663311221413, "grad_norm": 0.09644506126642227, "learning_rate": 0.002, "loss": 2.3246, "step": 334650 }, { "epoch": 1.2937019684247963, "grad_norm": 0.133858323097229, "learning_rate": 0.002, "loss": 2.3207, "step": 334660 }, { "epoch": 1.2937406256281796, "grad_norm": 0.1203598603606224, "learning_rate": 0.002, "loss": 2.3315, "step": 334670 }, { "epoch": 1.2937792828315628, "grad_norm": 0.15581074357032776, "learning_rate": 0.002, "loss": 2.3399, "step": 334680 }, { "epoch": 1.293817940034946, "grad_norm": 0.09663668274879456, "learning_rate": 0.002, "loss": 2.3307, "step": 334690 }, { "epoch": 1.2938565972383294, "grad_norm": 0.0946556106209755, "learning_rate": 0.002, "loss": 2.3324, "step": 334700 }, { "epoch": 1.2938952544417126, "grad_norm": 0.10505830496549606, "learning_rate": 0.002, "loss": 2.3337, "step": 334710 }, { "epoch": 1.2939339116450959, "grad_norm": 0.1246907189488411, "learning_rate": 0.002, "loss": 2.3184, "step": 334720 }, { "epoch": 1.293972568848479, "grad_norm": 0.10076715052127838, "learning_rate": 0.002, "loss": 2.3415, "step": 334730 }, { "epoch": 1.2940112260518626, "grad_norm": 0.09258640557527542, "learning_rate": 0.002, "loss": 2.3269, "step": 334740 }, { "epoch": 1.2940498832552458, "grad_norm": 0.11278831958770752, "learning_rate": 0.002, "loss": 2.3255, "step": 334750 }, { "epoch": 1.294088540458629, "grad_norm": 0.12063068896532059, "learning_rate": 0.002, "loss": 2.3284, "step": 334760 }, { "epoch": 1.2941271976620123, "grad_norm": 0.110850989818573, "learning_rate": 0.002, "loss": 2.3409, "step": 334770 }, { "epoch": 1.2941658548653956, "grad_norm": 0.12250538170337677, "learning_rate": 0.002, "loss": 2.3418, "step": 334780 }, { "epoch": 1.2942045120687788, "grad_norm": 0.0948261097073555, "learning_rate": 0.002, "loss": 2.326, "step": 334790 }, { "epoch": 1.294243169272162, "grad_norm": 0.09137909859418869, "learning_rate": 0.002, "loss": 2.3229, "step": 334800 }, { "epoch": 1.2942818264755456, "grad_norm": 0.10054846107959747, "learning_rate": 0.002, "loss": 2.3315, "step": 334810 }, { "epoch": 1.2943204836789288, "grad_norm": 0.09631969779729843, "learning_rate": 0.002, "loss": 2.3563, "step": 334820 }, { "epoch": 1.294359140882312, "grad_norm": 0.09292064607143402, "learning_rate": 0.002, "loss": 2.3483, "step": 334830 }, { "epoch": 1.2943977980856953, "grad_norm": 0.10343699157238007, "learning_rate": 0.002, "loss": 2.3374, "step": 334840 }, { "epoch": 1.2944364552890786, "grad_norm": 0.11731761693954468, "learning_rate": 0.002, "loss": 2.3339, "step": 334850 }, { "epoch": 1.2944751124924618, "grad_norm": 0.11252184212207794, "learning_rate": 0.002, "loss": 2.3365, "step": 334860 }, { "epoch": 1.294513769695845, "grad_norm": 0.11709108203649521, "learning_rate": 0.002, "loss": 2.3146, "step": 334870 }, { "epoch": 1.2945524268992283, "grad_norm": 0.09856344014406204, "learning_rate": 0.002, "loss": 2.3168, "step": 334880 }, { "epoch": 1.2945910841026116, "grad_norm": 0.10993824899196625, "learning_rate": 0.002, "loss": 2.3267, "step": 334890 }, { "epoch": 1.2946297413059948, "grad_norm": 0.11090542376041412, "learning_rate": 0.002, "loss": 2.3167, "step": 334900 }, { "epoch": 1.2946683985093783, "grad_norm": 0.11012540757656097, "learning_rate": 0.002, "loss": 2.3245, "step": 334910 }, { "epoch": 1.2947070557127616, "grad_norm": 0.09483652561903, "learning_rate": 0.002, "loss": 2.3211, "step": 334920 }, { "epoch": 1.2947457129161448, "grad_norm": 0.09918684512376785, "learning_rate": 0.002, "loss": 2.338, "step": 334930 }, { "epoch": 1.294784370119528, "grad_norm": 0.09008168429136276, "learning_rate": 0.002, "loss": 2.3422, "step": 334940 }, { "epoch": 1.2948230273229113, "grad_norm": 0.10129818320274353, "learning_rate": 0.002, "loss": 2.3345, "step": 334950 }, { "epoch": 1.2948616845262946, "grad_norm": 0.10634876042604446, "learning_rate": 0.002, "loss": 2.3323, "step": 334960 }, { "epoch": 1.2949003417296778, "grad_norm": 0.12891563773155212, "learning_rate": 0.002, "loss": 2.3156, "step": 334970 }, { "epoch": 1.2949389989330613, "grad_norm": 0.10008393973112106, "learning_rate": 0.002, "loss": 2.3381, "step": 334980 }, { "epoch": 1.2949776561364446, "grad_norm": 0.1001753956079483, "learning_rate": 0.002, "loss": 2.3279, "step": 334990 }, { "epoch": 1.2950163133398278, "grad_norm": 0.1086198091506958, "learning_rate": 0.002, "loss": 2.3509, "step": 335000 }, { "epoch": 1.295054970543211, "grad_norm": 0.10994220525026321, "learning_rate": 0.002, "loss": 2.3199, "step": 335010 }, { "epoch": 1.2950936277465943, "grad_norm": 0.11280565708875656, "learning_rate": 0.002, "loss": 2.3485, "step": 335020 }, { "epoch": 1.2951322849499776, "grad_norm": 0.12463513016700745, "learning_rate": 0.002, "loss": 2.3324, "step": 335030 }, { "epoch": 1.2951709421533608, "grad_norm": 0.10185111314058304, "learning_rate": 0.002, "loss": 2.3288, "step": 335040 }, { "epoch": 1.295209599356744, "grad_norm": 0.10697565227746964, "learning_rate": 0.002, "loss": 2.3475, "step": 335050 }, { "epoch": 1.2952482565601273, "grad_norm": 0.1059790849685669, "learning_rate": 0.002, "loss": 2.3302, "step": 335060 }, { "epoch": 1.2952869137635106, "grad_norm": 0.10459061712026596, "learning_rate": 0.002, "loss": 2.3251, "step": 335070 }, { "epoch": 1.295325570966894, "grad_norm": 0.11010526120662689, "learning_rate": 0.002, "loss": 2.3501, "step": 335080 }, { "epoch": 1.2953642281702773, "grad_norm": 0.11023080348968506, "learning_rate": 0.002, "loss": 2.3308, "step": 335090 }, { "epoch": 1.2954028853736606, "grad_norm": 0.09726311266422272, "learning_rate": 0.002, "loss": 2.3338, "step": 335100 }, { "epoch": 1.2954415425770438, "grad_norm": 0.10792980343103409, "learning_rate": 0.002, "loss": 2.3201, "step": 335110 }, { "epoch": 1.295480199780427, "grad_norm": 0.09982597827911377, "learning_rate": 0.002, "loss": 2.3459, "step": 335120 }, { "epoch": 1.2955188569838103, "grad_norm": 0.0997501090168953, "learning_rate": 0.002, "loss": 2.3369, "step": 335130 }, { "epoch": 1.2955575141871936, "grad_norm": 0.12466259300708771, "learning_rate": 0.002, "loss": 2.3366, "step": 335140 }, { "epoch": 1.295596171390577, "grad_norm": 0.12011416256427765, "learning_rate": 0.002, "loss": 2.3342, "step": 335150 }, { "epoch": 1.2956348285939603, "grad_norm": 0.11512520164251328, "learning_rate": 0.002, "loss": 2.3373, "step": 335160 }, { "epoch": 1.2956734857973435, "grad_norm": 0.10447486490011215, "learning_rate": 0.002, "loss": 2.3258, "step": 335170 }, { "epoch": 1.2957121430007268, "grad_norm": 0.10611852258443832, "learning_rate": 0.002, "loss": 2.3441, "step": 335180 }, { "epoch": 1.29575080020411, "grad_norm": 0.10485371947288513, "learning_rate": 0.002, "loss": 2.3289, "step": 335190 }, { "epoch": 1.2957894574074933, "grad_norm": 0.09470439702272415, "learning_rate": 0.002, "loss": 2.3423, "step": 335200 }, { "epoch": 1.2958281146108765, "grad_norm": 0.11864740401506424, "learning_rate": 0.002, "loss": 2.3427, "step": 335210 }, { "epoch": 1.2958667718142598, "grad_norm": 0.10729487240314484, "learning_rate": 0.002, "loss": 2.345, "step": 335220 }, { "epoch": 1.295905429017643, "grad_norm": 0.09711641073226929, "learning_rate": 0.002, "loss": 2.3315, "step": 335230 }, { "epoch": 1.2959440862210263, "grad_norm": 0.09695418924093246, "learning_rate": 0.002, "loss": 2.3424, "step": 335240 }, { "epoch": 1.2959827434244098, "grad_norm": 0.10560396313667297, "learning_rate": 0.002, "loss": 2.3382, "step": 335250 }, { "epoch": 1.296021400627793, "grad_norm": 0.10399089008569717, "learning_rate": 0.002, "loss": 2.3308, "step": 335260 }, { "epoch": 1.2960600578311763, "grad_norm": 0.09577060490846634, "learning_rate": 0.002, "loss": 2.3432, "step": 335270 }, { "epoch": 1.2960987150345595, "grad_norm": 0.10907510668039322, "learning_rate": 0.002, "loss": 2.3373, "step": 335280 }, { "epoch": 1.2961373722379428, "grad_norm": 0.09519508481025696, "learning_rate": 0.002, "loss": 2.3363, "step": 335290 }, { "epoch": 1.296176029441326, "grad_norm": 0.11467217653989792, "learning_rate": 0.002, "loss": 2.3267, "step": 335300 }, { "epoch": 1.2962146866447095, "grad_norm": 0.10151347517967224, "learning_rate": 0.002, "loss": 2.3376, "step": 335310 }, { "epoch": 1.2962533438480928, "grad_norm": 0.10027828067541122, "learning_rate": 0.002, "loss": 2.3418, "step": 335320 }, { "epoch": 1.296292001051476, "grad_norm": 0.11993741989135742, "learning_rate": 0.002, "loss": 2.3332, "step": 335330 }, { "epoch": 1.2963306582548593, "grad_norm": 0.1123969629406929, "learning_rate": 0.002, "loss": 2.3329, "step": 335340 }, { "epoch": 1.2963693154582425, "grad_norm": 0.1024906262755394, "learning_rate": 0.002, "loss": 2.3353, "step": 335350 }, { "epoch": 1.2964079726616258, "grad_norm": 0.10444162040948868, "learning_rate": 0.002, "loss": 2.3199, "step": 335360 }, { "epoch": 1.296446629865009, "grad_norm": 0.09288977086544037, "learning_rate": 0.002, "loss": 2.3238, "step": 335370 }, { "epoch": 1.2964852870683923, "grad_norm": 0.10308873653411865, "learning_rate": 0.002, "loss": 2.357, "step": 335380 }, { "epoch": 1.2965239442717755, "grad_norm": 0.0930061861872673, "learning_rate": 0.002, "loss": 2.3301, "step": 335390 }, { "epoch": 1.2965626014751588, "grad_norm": 0.12539707124233246, "learning_rate": 0.002, "loss": 2.3328, "step": 335400 }, { "epoch": 1.296601258678542, "grad_norm": 0.10915507376194, "learning_rate": 0.002, "loss": 2.3378, "step": 335410 }, { "epoch": 1.2966399158819255, "grad_norm": 0.10099329799413681, "learning_rate": 0.002, "loss": 2.3182, "step": 335420 }, { "epoch": 1.2966785730853088, "grad_norm": 0.09893771260976791, "learning_rate": 0.002, "loss": 2.3446, "step": 335430 }, { "epoch": 1.296717230288692, "grad_norm": 0.13321642577648163, "learning_rate": 0.002, "loss": 2.328, "step": 335440 }, { "epoch": 1.2967558874920753, "grad_norm": 0.10005443543195724, "learning_rate": 0.002, "loss": 2.3161, "step": 335450 }, { "epoch": 1.2967945446954585, "grad_norm": 0.11731036752462387, "learning_rate": 0.002, "loss": 2.3484, "step": 335460 }, { "epoch": 1.2968332018988418, "grad_norm": 0.11234107613563538, "learning_rate": 0.002, "loss": 2.3313, "step": 335470 }, { "epoch": 1.2968718591022252, "grad_norm": 0.12516427040100098, "learning_rate": 0.002, "loss": 2.326, "step": 335480 }, { "epoch": 1.2969105163056085, "grad_norm": 0.08740191906690598, "learning_rate": 0.002, "loss": 2.3315, "step": 335490 }, { "epoch": 1.2969491735089917, "grad_norm": 0.11662085354328156, "learning_rate": 0.002, "loss": 2.3281, "step": 335500 }, { "epoch": 1.296987830712375, "grad_norm": 0.09221193194389343, "learning_rate": 0.002, "loss": 2.3429, "step": 335510 }, { "epoch": 1.2970264879157583, "grad_norm": 0.1095178946852684, "learning_rate": 0.002, "loss": 2.3338, "step": 335520 }, { "epoch": 1.2970651451191415, "grad_norm": 0.09435485303401947, "learning_rate": 0.002, "loss": 2.3322, "step": 335530 }, { "epoch": 1.2971038023225248, "grad_norm": 0.1369830220937729, "learning_rate": 0.002, "loss": 2.3379, "step": 335540 }, { "epoch": 1.297142459525908, "grad_norm": 0.12459218502044678, "learning_rate": 0.002, "loss": 2.3484, "step": 335550 }, { "epoch": 1.2971811167292913, "grad_norm": 0.11784704774618149, "learning_rate": 0.002, "loss": 2.3297, "step": 335560 }, { "epoch": 1.2972197739326745, "grad_norm": 0.09934574365615845, "learning_rate": 0.002, "loss": 2.3337, "step": 335570 }, { "epoch": 1.2972584311360578, "grad_norm": 0.09636490792036057, "learning_rate": 0.002, "loss": 2.3383, "step": 335580 }, { "epoch": 1.2972970883394412, "grad_norm": 0.10465794056653976, "learning_rate": 0.002, "loss": 2.3262, "step": 335590 }, { "epoch": 1.2973357455428245, "grad_norm": 0.11000781506299973, "learning_rate": 0.002, "loss": 2.321, "step": 335600 }, { "epoch": 1.2973744027462077, "grad_norm": 0.13012675940990448, "learning_rate": 0.002, "loss": 2.3469, "step": 335610 }, { "epoch": 1.297413059949591, "grad_norm": 0.09287827461957932, "learning_rate": 0.002, "loss": 2.34, "step": 335620 }, { "epoch": 1.2974517171529742, "grad_norm": 0.09919273108243942, "learning_rate": 0.002, "loss": 2.3572, "step": 335630 }, { "epoch": 1.2974903743563575, "grad_norm": 0.09842280298471451, "learning_rate": 0.002, "loss": 2.3348, "step": 335640 }, { "epoch": 1.297529031559741, "grad_norm": 0.11691803485155106, "learning_rate": 0.002, "loss": 2.3457, "step": 335650 }, { "epoch": 1.2975676887631242, "grad_norm": 0.10015939176082611, "learning_rate": 0.002, "loss": 2.3261, "step": 335660 }, { "epoch": 1.2976063459665075, "grad_norm": 0.09664006531238556, "learning_rate": 0.002, "loss": 2.3308, "step": 335670 }, { "epoch": 1.2976450031698907, "grad_norm": 0.12423496693372726, "learning_rate": 0.002, "loss": 2.3373, "step": 335680 }, { "epoch": 1.297683660373274, "grad_norm": 0.10078054666519165, "learning_rate": 0.002, "loss": 2.3245, "step": 335690 }, { "epoch": 1.2977223175766572, "grad_norm": 0.11702215671539307, "learning_rate": 0.002, "loss": 2.3333, "step": 335700 }, { "epoch": 1.2977609747800405, "grad_norm": 0.09916981309652328, "learning_rate": 0.002, "loss": 2.3221, "step": 335710 }, { "epoch": 1.2977996319834237, "grad_norm": 0.11054660379886627, "learning_rate": 0.002, "loss": 2.3271, "step": 335720 }, { "epoch": 1.297838289186807, "grad_norm": 0.10521753132343292, "learning_rate": 0.002, "loss": 2.3377, "step": 335730 }, { "epoch": 1.2978769463901902, "grad_norm": 0.14257629215717316, "learning_rate": 0.002, "loss": 2.3323, "step": 335740 }, { "epoch": 1.2979156035935735, "grad_norm": 0.0905749499797821, "learning_rate": 0.002, "loss": 2.3327, "step": 335750 }, { "epoch": 1.297954260796957, "grad_norm": 0.11051960289478302, "learning_rate": 0.002, "loss": 2.3194, "step": 335760 }, { "epoch": 1.2979929180003402, "grad_norm": 0.10828401148319244, "learning_rate": 0.002, "loss": 2.3443, "step": 335770 }, { "epoch": 1.2980315752037235, "grad_norm": 0.11706371605396271, "learning_rate": 0.002, "loss": 2.3381, "step": 335780 }, { "epoch": 1.2980702324071067, "grad_norm": 0.11631658673286438, "learning_rate": 0.002, "loss": 2.3283, "step": 335790 }, { "epoch": 1.29810888961049, "grad_norm": 0.0892113521695137, "learning_rate": 0.002, "loss": 2.3357, "step": 335800 }, { "epoch": 1.2981475468138732, "grad_norm": 0.12711207568645477, "learning_rate": 0.002, "loss": 2.3201, "step": 335810 }, { "epoch": 1.2981862040172567, "grad_norm": 0.11014045774936676, "learning_rate": 0.002, "loss": 2.3332, "step": 335820 }, { "epoch": 1.29822486122064, "grad_norm": 0.09357475489377975, "learning_rate": 0.002, "loss": 2.3383, "step": 335830 }, { "epoch": 1.2982635184240232, "grad_norm": 0.09620398283004761, "learning_rate": 0.002, "loss": 2.3267, "step": 335840 }, { "epoch": 1.2983021756274065, "grad_norm": 0.10793457180261612, "learning_rate": 0.002, "loss": 2.3179, "step": 335850 }, { "epoch": 1.2983408328307897, "grad_norm": 0.13548347353935242, "learning_rate": 0.002, "loss": 2.3271, "step": 335860 }, { "epoch": 1.298379490034173, "grad_norm": 0.10772477835416794, "learning_rate": 0.002, "loss": 2.3338, "step": 335870 }, { "epoch": 1.2984181472375562, "grad_norm": 0.09605101495981216, "learning_rate": 0.002, "loss": 2.3362, "step": 335880 }, { "epoch": 1.2984568044409395, "grad_norm": 0.11341050267219543, "learning_rate": 0.002, "loss": 2.3485, "step": 335890 }, { "epoch": 1.2984954616443227, "grad_norm": 0.11545135825872421, "learning_rate": 0.002, "loss": 2.327, "step": 335900 }, { "epoch": 1.298534118847706, "grad_norm": 0.12482807785272598, "learning_rate": 0.002, "loss": 2.3411, "step": 335910 }, { "epoch": 1.2985727760510892, "grad_norm": 0.08460353314876556, "learning_rate": 0.002, "loss": 2.3288, "step": 335920 }, { "epoch": 1.2986114332544727, "grad_norm": 0.28585994243621826, "learning_rate": 0.002, "loss": 2.3277, "step": 335930 }, { "epoch": 1.298650090457856, "grad_norm": 0.10742087662220001, "learning_rate": 0.002, "loss": 2.3316, "step": 335940 }, { "epoch": 1.2986887476612392, "grad_norm": 0.12978002429008484, "learning_rate": 0.002, "loss": 2.3469, "step": 335950 }, { "epoch": 1.2987274048646225, "grad_norm": 0.09802679717540741, "learning_rate": 0.002, "loss": 2.3365, "step": 335960 }, { "epoch": 1.2987660620680057, "grad_norm": 0.10356856882572174, "learning_rate": 0.002, "loss": 2.3159, "step": 335970 }, { "epoch": 1.298804719271389, "grad_norm": 0.12377564609050751, "learning_rate": 0.002, "loss": 2.3473, "step": 335980 }, { "epoch": 1.2988433764747724, "grad_norm": 0.10669989138841629, "learning_rate": 0.002, "loss": 2.3315, "step": 335990 }, { "epoch": 1.2988820336781557, "grad_norm": 0.09646441042423248, "learning_rate": 0.002, "loss": 2.3242, "step": 336000 }, { "epoch": 1.298920690881539, "grad_norm": 0.13213708996772766, "learning_rate": 0.002, "loss": 2.3465, "step": 336010 }, { "epoch": 1.2989593480849222, "grad_norm": 0.0944688692688942, "learning_rate": 0.002, "loss": 2.3251, "step": 336020 }, { "epoch": 1.2989980052883054, "grad_norm": 0.10846567898988724, "learning_rate": 0.002, "loss": 2.3468, "step": 336030 }, { "epoch": 1.2990366624916887, "grad_norm": 0.09957831352949142, "learning_rate": 0.002, "loss": 2.3293, "step": 336040 }, { "epoch": 1.299075319695072, "grad_norm": 0.09659119695425034, "learning_rate": 0.002, "loss": 2.3342, "step": 336050 }, { "epoch": 1.2991139768984552, "grad_norm": 0.10422774404287338, "learning_rate": 0.002, "loss": 2.3277, "step": 336060 }, { "epoch": 1.2991526341018385, "grad_norm": 0.10652784258127213, "learning_rate": 0.002, "loss": 2.3156, "step": 336070 }, { "epoch": 1.2991912913052217, "grad_norm": 0.11235243827104568, "learning_rate": 0.002, "loss": 2.3286, "step": 336080 }, { "epoch": 1.299229948508605, "grad_norm": 0.1195795089006424, "learning_rate": 0.002, "loss": 2.3281, "step": 336090 }, { "epoch": 1.2992686057119884, "grad_norm": 0.11767005175352097, "learning_rate": 0.002, "loss": 2.3438, "step": 336100 }, { "epoch": 1.2993072629153717, "grad_norm": 0.09454245865345001, "learning_rate": 0.002, "loss": 2.3335, "step": 336110 }, { "epoch": 1.299345920118755, "grad_norm": 0.12325325608253479, "learning_rate": 0.002, "loss": 2.3364, "step": 336120 }, { "epoch": 1.2993845773221382, "grad_norm": 0.10042198747396469, "learning_rate": 0.002, "loss": 2.3505, "step": 336130 }, { "epoch": 1.2994232345255214, "grad_norm": 0.10879756510257721, "learning_rate": 0.002, "loss": 2.3292, "step": 336140 }, { "epoch": 1.2994618917289047, "grad_norm": 0.09640879184007645, "learning_rate": 0.002, "loss": 2.3399, "step": 336150 }, { "epoch": 1.2995005489322882, "grad_norm": 0.1137596070766449, "learning_rate": 0.002, "loss": 2.3194, "step": 336160 }, { "epoch": 1.2995392061356714, "grad_norm": 0.10465048998594284, "learning_rate": 0.002, "loss": 2.3435, "step": 336170 }, { "epoch": 1.2995778633390547, "grad_norm": 0.1117529347538948, "learning_rate": 0.002, "loss": 2.32, "step": 336180 }, { "epoch": 1.299616520542438, "grad_norm": 0.1018039882183075, "learning_rate": 0.002, "loss": 2.343, "step": 336190 }, { "epoch": 1.2996551777458212, "grad_norm": 0.0903591737151146, "learning_rate": 0.002, "loss": 2.3264, "step": 336200 }, { "epoch": 1.2996938349492044, "grad_norm": 0.09142091870307922, "learning_rate": 0.002, "loss": 2.3385, "step": 336210 }, { "epoch": 1.2997324921525877, "grad_norm": 0.11007067561149597, "learning_rate": 0.002, "loss": 2.3276, "step": 336220 }, { "epoch": 1.299771149355971, "grad_norm": 0.09496650099754333, "learning_rate": 0.002, "loss": 2.3442, "step": 336230 }, { "epoch": 1.2998098065593542, "grad_norm": 0.11753443628549576, "learning_rate": 0.002, "loss": 2.3349, "step": 336240 }, { "epoch": 1.2998484637627374, "grad_norm": 0.10725142061710358, "learning_rate": 0.002, "loss": 2.3459, "step": 336250 }, { "epoch": 1.2998871209661207, "grad_norm": 0.11182443797588348, "learning_rate": 0.002, "loss": 2.3202, "step": 336260 }, { "epoch": 1.2999257781695042, "grad_norm": 0.09985292702913284, "learning_rate": 0.002, "loss": 2.3361, "step": 336270 }, { "epoch": 1.2999644353728874, "grad_norm": 0.14232194423675537, "learning_rate": 0.002, "loss": 2.3355, "step": 336280 }, { "epoch": 1.3000030925762707, "grad_norm": 0.09903427958488464, "learning_rate": 0.002, "loss": 2.3353, "step": 336290 }, { "epoch": 1.300041749779654, "grad_norm": 0.10525563359260559, "learning_rate": 0.002, "loss": 2.3483, "step": 336300 }, { "epoch": 1.3000804069830372, "grad_norm": 0.10530021786689758, "learning_rate": 0.002, "loss": 2.3244, "step": 336310 }, { "epoch": 1.3001190641864204, "grad_norm": 0.10072167217731476, "learning_rate": 0.002, "loss": 2.3299, "step": 336320 }, { "epoch": 1.300157721389804, "grad_norm": 0.11304374784231186, "learning_rate": 0.002, "loss": 2.3354, "step": 336330 }, { "epoch": 1.3001963785931872, "grad_norm": 0.11669431626796722, "learning_rate": 0.002, "loss": 2.3478, "step": 336340 }, { "epoch": 1.3002350357965704, "grad_norm": 0.11866362392902374, "learning_rate": 0.002, "loss": 2.3356, "step": 336350 }, { "epoch": 1.3002736929999537, "grad_norm": 0.09389518201351166, "learning_rate": 0.002, "loss": 2.3356, "step": 336360 }, { "epoch": 1.300312350203337, "grad_norm": 0.12181688845157623, "learning_rate": 0.002, "loss": 2.3348, "step": 336370 }, { "epoch": 1.3003510074067202, "grad_norm": 0.12622328102588654, "learning_rate": 0.002, "loss": 2.343, "step": 336380 }, { "epoch": 1.3003896646101034, "grad_norm": 0.12845391035079956, "learning_rate": 0.002, "loss": 2.3403, "step": 336390 }, { "epoch": 1.3004283218134867, "grad_norm": 0.11289853602647781, "learning_rate": 0.002, "loss": 2.3244, "step": 336400 }, { "epoch": 1.30046697901687, "grad_norm": 0.12921935319900513, "learning_rate": 0.002, "loss": 2.317, "step": 336410 }, { "epoch": 1.3005056362202532, "grad_norm": 0.10790617763996124, "learning_rate": 0.002, "loss": 2.3506, "step": 336420 }, { "epoch": 1.3005442934236366, "grad_norm": 0.10064290463924408, "learning_rate": 0.002, "loss": 2.3401, "step": 336430 }, { "epoch": 1.30058295062702, "grad_norm": 0.10124460607767105, "learning_rate": 0.002, "loss": 2.3386, "step": 336440 }, { "epoch": 1.3006216078304031, "grad_norm": 0.09299731999635696, "learning_rate": 0.002, "loss": 2.3339, "step": 336450 }, { "epoch": 1.3006602650337864, "grad_norm": 0.10883777588605881, "learning_rate": 0.002, "loss": 2.3347, "step": 336460 }, { "epoch": 1.3006989222371697, "grad_norm": 0.10185468941926956, "learning_rate": 0.002, "loss": 2.3409, "step": 336470 }, { "epoch": 1.300737579440553, "grad_norm": 0.1190355122089386, "learning_rate": 0.002, "loss": 2.3436, "step": 336480 }, { "epoch": 1.3007762366439362, "grad_norm": 0.1011800616979599, "learning_rate": 0.002, "loss": 2.3263, "step": 336490 }, { "epoch": 1.3008148938473196, "grad_norm": 0.08444646000862122, "learning_rate": 0.002, "loss": 2.3302, "step": 336500 }, { "epoch": 1.3008535510507029, "grad_norm": 0.10320805013179779, "learning_rate": 0.002, "loss": 2.3347, "step": 336510 }, { "epoch": 1.3008922082540861, "grad_norm": 0.11415956169366837, "learning_rate": 0.002, "loss": 2.3356, "step": 336520 }, { "epoch": 1.3009308654574694, "grad_norm": 0.1253964602947235, "learning_rate": 0.002, "loss": 2.3202, "step": 336530 }, { "epoch": 1.3009695226608526, "grad_norm": 0.11867789179086685, "learning_rate": 0.002, "loss": 2.3322, "step": 336540 }, { "epoch": 1.301008179864236, "grad_norm": 0.13367541134357452, "learning_rate": 0.002, "loss": 2.3303, "step": 336550 }, { "epoch": 1.3010468370676191, "grad_norm": 0.1060272827744484, "learning_rate": 0.002, "loss": 2.3512, "step": 336560 }, { "epoch": 1.3010854942710024, "grad_norm": 0.1167825311422348, "learning_rate": 0.002, "loss": 2.3294, "step": 336570 }, { "epoch": 1.3011241514743856, "grad_norm": 0.0996595248579979, "learning_rate": 0.002, "loss": 2.3327, "step": 336580 }, { "epoch": 1.301162808677769, "grad_norm": 0.10070031136274338, "learning_rate": 0.002, "loss": 2.3293, "step": 336590 }, { "epoch": 1.3012014658811524, "grad_norm": 0.14351944625377655, "learning_rate": 0.002, "loss": 2.3362, "step": 336600 }, { "epoch": 1.3012401230845356, "grad_norm": 0.09690338373184204, "learning_rate": 0.002, "loss": 2.3245, "step": 336610 }, { "epoch": 1.3012787802879189, "grad_norm": 0.1026904284954071, "learning_rate": 0.002, "loss": 2.3369, "step": 336620 }, { "epoch": 1.3013174374913021, "grad_norm": 0.12201692909002304, "learning_rate": 0.002, "loss": 2.3348, "step": 336630 }, { "epoch": 1.3013560946946854, "grad_norm": 0.15047767758369446, "learning_rate": 0.002, "loss": 2.3376, "step": 336640 }, { "epoch": 1.3013947518980686, "grad_norm": 0.11998075991868973, "learning_rate": 0.002, "loss": 2.3439, "step": 336650 }, { "epoch": 1.3014334091014519, "grad_norm": 0.09685704112052917, "learning_rate": 0.002, "loss": 2.34, "step": 336660 }, { "epoch": 1.3014720663048354, "grad_norm": 0.08562976866960526, "learning_rate": 0.002, "loss": 2.3374, "step": 336670 }, { "epoch": 1.3015107235082186, "grad_norm": 0.10097695887088776, "learning_rate": 0.002, "loss": 2.3383, "step": 336680 }, { "epoch": 1.3015493807116019, "grad_norm": 0.1395394206047058, "learning_rate": 0.002, "loss": 2.3231, "step": 336690 }, { "epoch": 1.3015880379149851, "grad_norm": 0.097958505153656, "learning_rate": 0.002, "loss": 2.3373, "step": 336700 }, { "epoch": 1.3016266951183684, "grad_norm": 0.1106363832950592, "learning_rate": 0.002, "loss": 2.3292, "step": 336710 }, { "epoch": 1.3016653523217516, "grad_norm": 0.10885391384363174, "learning_rate": 0.002, "loss": 2.321, "step": 336720 }, { "epoch": 1.3017040095251349, "grad_norm": 0.10044512897729874, "learning_rate": 0.002, "loss": 2.3257, "step": 336730 }, { "epoch": 1.3017426667285181, "grad_norm": 0.1009591743350029, "learning_rate": 0.002, "loss": 2.3441, "step": 336740 }, { "epoch": 1.3017813239319014, "grad_norm": 0.09731756895780563, "learning_rate": 0.002, "loss": 2.3507, "step": 336750 }, { "epoch": 1.3018199811352846, "grad_norm": 0.18127033114433289, "learning_rate": 0.002, "loss": 2.3451, "step": 336760 }, { "epoch": 1.301858638338668, "grad_norm": 0.1387605369091034, "learning_rate": 0.002, "loss": 2.3335, "step": 336770 }, { "epoch": 1.3018972955420514, "grad_norm": 0.13605371117591858, "learning_rate": 0.002, "loss": 2.3376, "step": 336780 }, { "epoch": 1.3019359527454346, "grad_norm": 0.09742670506238937, "learning_rate": 0.002, "loss": 2.3387, "step": 336790 }, { "epoch": 1.3019746099488179, "grad_norm": 0.11238933354616165, "learning_rate": 0.002, "loss": 2.3424, "step": 336800 }, { "epoch": 1.3020132671522011, "grad_norm": 0.1343478411436081, "learning_rate": 0.002, "loss": 2.3201, "step": 336810 }, { "epoch": 1.3020519243555844, "grad_norm": 0.11248623579740524, "learning_rate": 0.002, "loss": 2.3298, "step": 336820 }, { "epoch": 1.3020905815589676, "grad_norm": 0.10541951656341553, "learning_rate": 0.002, "loss": 2.3423, "step": 336830 }, { "epoch": 1.302129238762351, "grad_norm": 0.08968300372362137, "learning_rate": 0.002, "loss": 2.33, "step": 336840 }, { "epoch": 1.3021678959657343, "grad_norm": 0.1093692034482956, "learning_rate": 0.002, "loss": 2.335, "step": 336850 }, { "epoch": 1.3022065531691176, "grad_norm": 0.10313425213098526, "learning_rate": 0.002, "loss": 2.3256, "step": 336860 }, { "epoch": 1.3022452103725008, "grad_norm": 0.11104105412960052, "learning_rate": 0.002, "loss": 2.3318, "step": 336870 }, { "epoch": 1.302283867575884, "grad_norm": 0.10031123459339142, "learning_rate": 0.002, "loss": 2.3389, "step": 336880 }, { "epoch": 1.3023225247792674, "grad_norm": 0.1145625114440918, "learning_rate": 0.002, "loss": 2.3289, "step": 336890 }, { "epoch": 1.3023611819826506, "grad_norm": 0.11190888285636902, "learning_rate": 0.002, "loss": 2.3233, "step": 336900 }, { "epoch": 1.3023998391860339, "grad_norm": 0.10132209956645966, "learning_rate": 0.002, "loss": 2.3247, "step": 336910 }, { "epoch": 1.302438496389417, "grad_norm": 0.1086704432964325, "learning_rate": 0.002, "loss": 2.3322, "step": 336920 }, { "epoch": 1.3024771535928004, "grad_norm": 0.10256091505289078, "learning_rate": 0.002, "loss": 2.3387, "step": 336930 }, { "epoch": 1.3025158107961838, "grad_norm": 0.10453871637582779, "learning_rate": 0.002, "loss": 2.3386, "step": 336940 }, { "epoch": 1.302554467999567, "grad_norm": 0.11024262756109238, "learning_rate": 0.002, "loss": 2.3247, "step": 336950 }, { "epoch": 1.3025931252029503, "grad_norm": 0.09911150485277176, "learning_rate": 0.002, "loss": 2.3383, "step": 336960 }, { "epoch": 1.3026317824063336, "grad_norm": 0.09939631074666977, "learning_rate": 0.002, "loss": 2.3354, "step": 336970 }, { "epoch": 1.3026704396097168, "grad_norm": 0.10066703706979752, "learning_rate": 0.002, "loss": 2.3213, "step": 336980 }, { "epoch": 1.3027090968131, "grad_norm": 0.10721877217292786, "learning_rate": 0.002, "loss": 2.3191, "step": 336990 }, { "epoch": 1.3027477540164834, "grad_norm": 0.10138001292943954, "learning_rate": 0.002, "loss": 2.323, "step": 337000 }, { "epoch": 1.3027864112198668, "grad_norm": 0.09895486384630203, "learning_rate": 0.002, "loss": 2.3234, "step": 337010 }, { "epoch": 1.30282506842325, "grad_norm": 0.1078089028596878, "learning_rate": 0.002, "loss": 2.3454, "step": 337020 }, { "epoch": 1.3028637256266333, "grad_norm": 0.1079610288143158, "learning_rate": 0.002, "loss": 2.3384, "step": 337030 }, { "epoch": 1.3029023828300166, "grad_norm": 0.1307416409254074, "learning_rate": 0.002, "loss": 2.3274, "step": 337040 }, { "epoch": 1.3029410400333998, "grad_norm": 0.09234651178121567, "learning_rate": 0.002, "loss": 2.3373, "step": 337050 }, { "epoch": 1.302979697236783, "grad_norm": 0.10930173099040985, "learning_rate": 0.002, "loss": 2.33, "step": 337060 }, { "epoch": 1.3030183544401663, "grad_norm": 0.10918635129928589, "learning_rate": 0.002, "loss": 2.3287, "step": 337070 }, { "epoch": 1.3030570116435496, "grad_norm": 0.10842832177877426, "learning_rate": 0.002, "loss": 2.327, "step": 337080 }, { "epoch": 1.3030956688469328, "grad_norm": 0.11361797153949738, "learning_rate": 0.002, "loss": 2.343, "step": 337090 }, { "epoch": 1.303134326050316, "grad_norm": 0.09919606894254684, "learning_rate": 0.002, "loss": 2.3225, "step": 337100 }, { "epoch": 1.3031729832536996, "grad_norm": 0.12349893897771835, "learning_rate": 0.002, "loss": 2.3254, "step": 337110 }, { "epoch": 1.3032116404570828, "grad_norm": 0.10912661254405975, "learning_rate": 0.002, "loss": 2.3452, "step": 337120 }, { "epoch": 1.303250297660466, "grad_norm": 0.12357006967067719, "learning_rate": 0.002, "loss": 2.3311, "step": 337130 }, { "epoch": 1.3032889548638493, "grad_norm": 0.10051541775465012, "learning_rate": 0.002, "loss": 2.3264, "step": 337140 }, { "epoch": 1.3033276120672326, "grad_norm": 0.09484165161848068, "learning_rate": 0.002, "loss": 2.3265, "step": 337150 }, { "epoch": 1.3033662692706158, "grad_norm": 0.12610755860805511, "learning_rate": 0.002, "loss": 2.3345, "step": 337160 }, { "epoch": 1.3034049264739993, "grad_norm": 0.1031932383775711, "learning_rate": 0.002, "loss": 2.3333, "step": 337170 }, { "epoch": 1.3034435836773826, "grad_norm": 0.11469267308712006, "learning_rate": 0.002, "loss": 2.3282, "step": 337180 }, { "epoch": 1.3034822408807658, "grad_norm": 0.10891234874725342, "learning_rate": 0.002, "loss": 2.3346, "step": 337190 }, { "epoch": 1.303520898084149, "grad_norm": 0.12440503388643265, "learning_rate": 0.002, "loss": 2.3199, "step": 337200 }, { "epoch": 1.3035595552875323, "grad_norm": 0.12115427106618881, "learning_rate": 0.002, "loss": 2.3439, "step": 337210 }, { "epoch": 1.3035982124909156, "grad_norm": 0.1324635148048401, "learning_rate": 0.002, "loss": 2.3404, "step": 337220 }, { "epoch": 1.3036368696942988, "grad_norm": 0.10365882515907288, "learning_rate": 0.002, "loss": 2.3344, "step": 337230 }, { "epoch": 1.303675526897682, "grad_norm": 0.11532828211784363, "learning_rate": 0.002, "loss": 2.3316, "step": 337240 }, { "epoch": 1.3037141841010653, "grad_norm": 0.1240374743938446, "learning_rate": 0.002, "loss": 2.3367, "step": 337250 }, { "epoch": 1.3037528413044486, "grad_norm": 0.10586193203926086, "learning_rate": 0.002, "loss": 2.3208, "step": 337260 }, { "epoch": 1.3037914985078318, "grad_norm": 0.09692249447107315, "learning_rate": 0.002, "loss": 2.3405, "step": 337270 }, { "epoch": 1.3038301557112153, "grad_norm": 0.11600619554519653, "learning_rate": 0.002, "loss": 2.3345, "step": 337280 }, { "epoch": 1.3038688129145986, "grad_norm": 0.09627915918827057, "learning_rate": 0.002, "loss": 2.3225, "step": 337290 }, { "epoch": 1.3039074701179818, "grad_norm": 0.11454172432422638, "learning_rate": 0.002, "loss": 2.3258, "step": 337300 }, { "epoch": 1.303946127321365, "grad_norm": 0.09462732821702957, "learning_rate": 0.002, "loss": 2.3119, "step": 337310 }, { "epoch": 1.3039847845247483, "grad_norm": 0.10958430916070938, "learning_rate": 0.002, "loss": 2.3421, "step": 337320 }, { "epoch": 1.3040234417281316, "grad_norm": 0.1141655296087265, "learning_rate": 0.002, "loss": 2.3333, "step": 337330 }, { "epoch": 1.304062098931515, "grad_norm": 0.11504334211349487, "learning_rate": 0.002, "loss": 2.3369, "step": 337340 }, { "epoch": 1.3041007561348983, "grad_norm": 0.1253875344991684, "learning_rate": 0.002, "loss": 2.3267, "step": 337350 }, { "epoch": 1.3041394133382815, "grad_norm": 0.11920283734798431, "learning_rate": 0.002, "loss": 2.337, "step": 337360 }, { "epoch": 1.3041780705416648, "grad_norm": 0.1002359464764595, "learning_rate": 0.002, "loss": 2.3323, "step": 337370 }, { "epoch": 1.304216727745048, "grad_norm": 0.09919452667236328, "learning_rate": 0.002, "loss": 2.3379, "step": 337380 }, { "epoch": 1.3042553849484313, "grad_norm": 0.10681872814893723, "learning_rate": 0.002, "loss": 2.3344, "step": 337390 }, { "epoch": 1.3042940421518145, "grad_norm": 0.10820811986923218, "learning_rate": 0.002, "loss": 2.334, "step": 337400 }, { "epoch": 1.3043326993551978, "grad_norm": 0.11254443228244781, "learning_rate": 0.002, "loss": 2.342, "step": 337410 }, { "epoch": 1.304371356558581, "grad_norm": 0.0999123752117157, "learning_rate": 0.002, "loss": 2.3164, "step": 337420 }, { "epoch": 1.3044100137619643, "grad_norm": 0.10298678278923035, "learning_rate": 0.002, "loss": 2.3346, "step": 337430 }, { "epoch": 1.3044486709653476, "grad_norm": 0.11172180622816086, "learning_rate": 0.002, "loss": 2.3398, "step": 337440 }, { "epoch": 1.304487328168731, "grad_norm": 0.10319048166275024, "learning_rate": 0.002, "loss": 2.3362, "step": 337450 }, { "epoch": 1.3045259853721143, "grad_norm": 0.10047553479671478, "learning_rate": 0.002, "loss": 2.3383, "step": 337460 }, { "epoch": 1.3045646425754975, "grad_norm": 0.11037591099739075, "learning_rate": 0.002, "loss": 2.3311, "step": 337470 }, { "epoch": 1.3046032997788808, "grad_norm": 0.11529282480478287, "learning_rate": 0.002, "loss": 2.3301, "step": 337480 }, { "epoch": 1.304641956982264, "grad_norm": 0.10569468140602112, "learning_rate": 0.002, "loss": 2.3293, "step": 337490 }, { "epoch": 1.3046806141856473, "grad_norm": 0.10355425626039505, "learning_rate": 0.002, "loss": 2.3461, "step": 337500 }, { "epoch": 1.3047192713890308, "grad_norm": 0.102251797914505, "learning_rate": 0.002, "loss": 2.3364, "step": 337510 }, { "epoch": 1.304757928592414, "grad_norm": 0.09056451916694641, "learning_rate": 0.002, "loss": 2.3272, "step": 337520 }, { "epoch": 1.3047965857957973, "grad_norm": 0.13302507996559143, "learning_rate": 0.002, "loss": 2.3347, "step": 337530 }, { "epoch": 1.3048352429991805, "grad_norm": 0.09038243442773819, "learning_rate": 0.002, "loss": 2.3343, "step": 337540 }, { "epoch": 1.3048739002025638, "grad_norm": 0.09669879823923111, "learning_rate": 0.002, "loss": 2.3317, "step": 337550 }, { "epoch": 1.304912557405947, "grad_norm": 0.11306969076395035, "learning_rate": 0.002, "loss": 2.3159, "step": 337560 }, { "epoch": 1.3049512146093303, "grad_norm": 0.11504258960485458, "learning_rate": 0.002, "loss": 2.3388, "step": 337570 }, { "epoch": 1.3049898718127135, "grad_norm": 0.1048077791929245, "learning_rate": 0.002, "loss": 2.3377, "step": 337580 }, { "epoch": 1.3050285290160968, "grad_norm": 0.09368283301591873, "learning_rate": 0.002, "loss": 2.3462, "step": 337590 }, { "epoch": 1.30506718621948, "grad_norm": 0.10178250074386597, "learning_rate": 0.002, "loss": 2.3311, "step": 337600 }, { "epoch": 1.3051058434228633, "grad_norm": 0.12559524178504944, "learning_rate": 0.002, "loss": 2.3373, "step": 337610 }, { "epoch": 1.3051445006262468, "grad_norm": 0.1250172257423401, "learning_rate": 0.002, "loss": 2.3404, "step": 337620 }, { "epoch": 1.30518315782963, "grad_norm": 0.10265842080116272, "learning_rate": 0.002, "loss": 2.3333, "step": 337630 }, { "epoch": 1.3052218150330133, "grad_norm": 0.11880707740783691, "learning_rate": 0.002, "loss": 2.3421, "step": 337640 }, { "epoch": 1.3052604722363965, "grad_norm": 0.11822383105754852, "learning_rate": 0.002, "loss": 2.3255, "step": 337650 }, { "epoch": 1.3052991294397798, "grad_norm": 0.10905078053474426, "learning_rate": 0.002, "loss": 2.3355, "step": 337660 }, { "epoch": 1.305337786643163, "grad_norm": 0.10963452607393265, "learning_rate": 0.002, "loss": 2.3514, "step": 337670 }, { "epoch": 1.3053764438465465, "grad_norm": 0.12794677913188934, "learning_rate": 0.002, "loss": 2.3253, "step": 337680 }, { "epoch": 1.3054151010499297, "grad_norm": 0.10933011770248413, "learning_rate": 0.002, "loss": 2.3281, "step": 337690 }, { "epoch": 1.305453758253313, "grad_norm": 0.10404634475708008, "learning_rate": 0.002, "loss": 2.3359, "step": 337700 }, { "epoch": 1.3054924154566963, "grad_norm": 0.11655057221651077, "learning_rate": 0.002, "loss": 2.3645, "step": 337710 }, { "epoch": 1.3055310726600795, "grad_norm": 0.09879942238330841, "learning_rate": 0.002, "loss": 2.3414, "step": 337720 }, { "epoch": 1.3055697298634628, "grad_norm": 0.09191339462995529, "learning_rate": 0.002, "loss": 2.3458, "step": 337730 }, { "epoch": 1.305608387066846, "grad_norm": 0.14594559371471405, "learning_rate": 0.002, "loss": 2.3335, "step": 337740 }, { "epoch": 1.3056470442702293, "grad_norm": 0.10261886566877365, "learning_rate": 0.002, "loss": 2.3484, "step": 337750 }, { "epoch": 1.3056857014736125, "grad_norm": 0.10173392295837402, "learning_rate": 0.002, "loss": 2.3316, "step": 337760 }, { "epoch": 1.3057243586769958, "grad_norm": 0.1198262944817543, "learning_rate": 0.002, "loss": 2.3405, "step": 337770 }, { "epoch": 1.305763015880379, "grad_norm": 0.0957404300570488, "learning_rate": 0.002, "loss": 2.3414, "step": 337780 }, { "epoch": 1.3058016730837625, "grad_norm": 0.10244777798652649, "learning_rate": 0.002, "loss": 2.3423, "step": 337790 }, { "epoch": 1.3058403302871457, "grad_norm": 0.09964311122894287, "learning_rate": 0.002, "loss": 2.3313, "step": 337800 }, { "epoch": 1.305878987490529, "grad_norm": 0.1049494594335556, "learning_rate": 0.002, "loss": 2.3283, "step": 337810 }, { "epoch": 1.3059176446939122, "grad_norm": 0.09348540008068085, "learning_rate": 0.002, "loss": 2.3355, "step": 337820 }, { "epoch": 1.3059563018972955, "grad_norm": 0.12033034861087799, "learning_rate": 0.002, "loss": 2.3493, "step": 337830 }, { "epoch": 1.3059949591006788, "grad_norm": 0.10889285057783127, "learning_rate": 0.002, "loss": 2.3607, "step": 337840 }, { "epoch": 1.3060336163040622, "grad_norm": 0.1031394749879837, "learning_rate": 0.002, "loss": 2.3314, "step": 337850 }, { "epoch": 1.3060722735074455, "grad_norm": 0.10475998371839523, "learning_rate": 0.002, "loss": 2.3401, "step": 337860 }, { "epoch": 1.3061109307108287, "grad_norm": 0.11214390397071838, "learning_rate": 0.002, "loss": 2.3358, "step": 337870 }, { "epoch": 1.306149587914212, "grad_norm": 0.10629909485578537, "learning_rate": 0.002, "loss": 2.3393, "step": 337880 }, { "epoch": 1.3061882451175952, "grad_norm": 0.10484512895345688, "learning_rate": 0.002, "loss": 2.333, "step": 337890 }, { "epoch": 1.3062269023209785, "grad_norm": 0.10622546821832657, "learning_rate": 0.002, "loss": 2.3382, "step": 337900 }, { "epoch": 1.3062655595243617, "grad_norm": 0.11129456013441086, "learning_rate": 0.002, "loss": 2.3353, "step": 337910 }, { "epoch": 1.306304216727745, "grad_norm": 0.11849325895309448, "learning_rate": 0.002, "loss": 2.3353, "step": 337920 }, { "epoch": 1.3063428739311282, "grad_norm": 0.10488150268793106, "learning_rate": 0.002, "loss": 2.3331, "step": 337930 }, { "epoch": 1.3063815311345115, "grad_norm": 0.09617189317941666, "learning_rate": 0.002, "loss": 2.3311, "step": 337940 }, { "epoch": 1.3064201883378947, "grad_norm": 0.09484464675188065, "learning_rate": 0.002, "loss": 2.3292, "step": 337950 }, { "epoch": 1.3064588455412782, "grad_norm": 0.10381089895963669, "learning_rate": 0.002, "loss": 2.3385, "step": 337960 }, { "epoch": 1.3064975027446615, "grad_norm": 0.10935788601636887, "learning_rate": 0.002, "loss": 2.3456, "step": 337970 }, { "epoch": 1.3065361599480447, "grad_norm": 0.114654541015625, "learning_rate": 0.002, "loss": 2.3251, "step": 337980 }, { "epoch": 1.306574817151428, "grad_norm": 0.10088855028152466, "learning_rate": 0.002, "loss": 2.343, "step": 337990 }, { "epoch": 1.3066134743548112, "grad_norm": 0.10404116660356522, "learning_rate": 0.002, "loss": 2.3465, "step": 338000 }, { "epoch": 1.3066521315581945, "grad_norm": 0.10011463612318039, "learning_rate": 0.002, "loss": 2.3342, "step": 338010 }, { "epoch": 1.306690788761578, "grad_norm": 0.11404065042734146, "learning_rate": 0.002, "loss": 2.3467, "step": 338020 }, { "epoch": 1.3067294459649612, "grad_norm": 0.09019742161035538, "learning_rate": 0.002, "loss": 2.3285, "step": 338030 }, { "epoch": 1.3067681031683445, "grad_norm": 0.11440113186836243, "learning_rate": 0.002, "loss": 2.3333, "step": 338040 }, { "epoch": 1.3068067603717277, "grad_norm": 0.1026352122426033, "learning_rate": 0.002, "loss": 2.3287, "step": 338050 }, { "epoch": 1.306845417575111, "grad_norm": 0.10444994270801544, "learning_rate": 0.002, "loss": 2.3328, "step": 338060 }, { "epoch": 1.3068840747784942, "grad_norm": 0.09628769010305405, "learning_rate": 0.002, "loss": 2.3386, "step": 338070 }, { "epoch": 1.3069227319818775, "grad_norm": 0.09596199542284012, "learning_rate": 0.002, "loss": 2.3441, "step": 338080 }, { "epoch": 1.3069613891852607, "grad_norm": 0.11345992237329483, "learning_rate": 0.002, "loss": 2.3311, "step": 338090 }, { "epoch": 1.307000046388644, "grad_norm": 0.12929995357990265, "learning_rate": 0.002, "loss": 2.3515, "step": 338100 }, { "epoch": 1.3070387035920272, "grad_norm": 0.1082843467593193, "learning_rate": 0.002, "loss": 2.3367, "step": 338110 }, { "epoch": 1.3070773607954105, "grad_norm": 0.42977941036224365, "learning_rate": 0.002, "loss": 2.3326, "step": 338120 }, { "epoch": 1.307116017998794, "grad_norm": 0.10232046246528625, "learning_rate": 0.002, "loss": 2.3404, "step": 338130 }, { "epoch": 1.3071546752021772, "grad_norm": 0.1078309491276741, "learning_rate": 0.002, "loss": 2.3251, "step": 338140 }, { "epoch": 1.3071933324055605, "grad_norm": 0.09055358916521072, "learning_rate": 0.002, "loss": 2.3326, "step": 338150 }, { "epoch": 1.3072319896089437, "grad_norm": 0.11110270768404007, "learning_rate": 0.002, "loss": 2.3251, "step": 338160 }, { "epoch": 1.307270646812327, "grad_norm": 0.09694137424230576, "learning_rate": 0.002, "loss": 2.3427, "step": 338170 }, { "epoch": 1.3073093040157102, "grad_norm": 0.10096246004104614, "learning_rate": 0.002, "loss": 2.3263, "step": 338180 }, { "epoch": 1.3073479612190937, "grad_norm": 0.13043995201587677, "learning_rate": 0.002, "loss": 2.3485, "step": 338190 }, { "epoch": 1.307386618422477, "grad_norm": 0.10851225256919861, "learning_rate": 0.002, "loss": 2.3292, "step": 338200 }, { "epoch": 1.3074252756258602, "grad_norm": 0.09410696476697922, "learning_rate": 0.002, "loss": 2.3282, "step": 338210 }, { "epoch": 1.3074639328292434, "grad_norm": 0.09398966282606125, "learning_rate": 0.002, "loss": 2.3366, "step": 338220 }, { "epoch": 1.3075025900326267, "grad_norm": 0.12850618362426758, "learning_rate": 0.002, "loss": 2.3416, "step": 338230 }, { "epoch": 1.30754124723601, "grad_norm": 0.10962171852588654, "learning_rate": 0.002, "loss": 2.3186, "step": 338240 }, { "epoch": 1.3075799044393932, "grad_norm": 0.09918195009231567, "learning_rate": 0.002, "loss": 2.3504, "step": 338250 }, { "epoch": 1.3076185616427765, "grad_norm": 0.1096796989440918, "learning_rate": 0.002, "loss": 2.3288, "step": 338260 }, { "epoch": 1.3076572188461597, "grad_norm": 0.11385281383991241, "learning_rate": 0.002, "loss": 2.3277, "step": 338270 }, { "epoch": 1.307695876049543, "grad_norm": 0.09344173222780228, "learning_rate": 0.002, "loss": 2.3363, "step": 338280 }, { "epoch": 1.3077345332529262, "grad_norm": 0.10489895939826965, "learning_rate": 0.002, "loss": 2.3217, "step": 338290 }, { "epoch": 1.3077731904563097, "grad_norm": 0.08784829825162888, "learning_rate": 0.002, "loss": 2.3445, "step": 338300 }, { "epoch": 1.307811847659693, "grad_norm": 0.11536280065774918, "learning_rate": 0.002, "loss": 2.3339, "step": 338310 }, { "epoch": 1.3078505048630762, "grad_norm": 0.11776147037744522, "learning_rate": 0.002, "loss": 2.3401, "step": 338320 }, { "epoch": 1.3078891620664594, "grad_norm": 0.13148784637451172, "learning_rate": 0.002, "loss": 2.332, "step": 338330 }, { "epoch": 1.3079278192698427, "grad_norm": 0.10201974213123322, "learning_rate": 0.002, "loss": 2.3284, "step": 338340 }, { "epoch": 1.307966476473226, "grad_norm": 0.09577985852956772, "learning_rate": 0.002, "loss": 2.3401, "step": 338350 }, { "epoch": 1.3080051336766094, "grad_norm": 0.10251278430223465, "learning_rate": 0.002, "loss": 2.3196, "step": 338360 }, { "epoch": 1.3080437908799927, "grad_norm": 0.1167164072394371, "learning_rate": 0.002, "loss": 2.3246, "step": 338370 }, { "epoch": 1.308082448083376, "grad_norm": 0.10759881138801575, "learning_rate": 0.002, "loss": 2.324, "step": 338380 }, { "epoch": 1.3081211052867592, "grad_norm": 0.09826534241437912, "learning_rate": 0.002, "loss": 2.3322, "step": 338390 }, { "epoch": 1.3081597624901424, "grad_norm": 0.10461527854204178, "learning_rate": 0.002, "loss": 2.34, "step": 338400 }, { "epoch": 1.3081984196935257, "grad_norm": 0.10983319580554962, "learning_rate": 0.002, "loss": 2.3497, "step": 338410 }, { "epoch": 1.308237076896909, "grad_norm": 0.12366301566362381, "learning_rate": 0.002, "loss": 2.3402, "step": 338420 }, { "epoch": 1.3082757341002922, "grad_norm": 0.10852979123592377, "learning_rate": 0.002, "loss": 2.3308, "step": 338430 }, { "epoch": 1.3083143913036754, "grad_norm": 0.09494627267122269, "learning_rate": 0.002, "loss": 2.3237, "step": 338440 }, { "epoch": 1.3083530485070587, "grad_norm": 0.10899453610181808, "learning_rate": 0.002, "loss": 2.3313, "step": 338450 }, { "epoch": 1.3083917057104422, "grad_norm": 0.12259631603956223, "learning_rate": 0.002, "loss": 2.3214, "step": 338460 }, { "epoch": 1.3084303629138254, "grad_norm": 0.10453185439109802, "learning_rate": 0.002, "loss": 2.3385, "step": 338470 }, { "epoch": 1.3084690201172087, "grad_norm": 0.1154693216085434, "learning_rate": 0.002, "loss": 2.3289, "step": 338480 }, { "epoch": 1.308507677320592, "grad_norm": 0.10357803106307983, "learning_rate": 0.002, "loss": 2.3409, "step": 338490 }, { "epoch": 1.3085463345239752, "grad_norm": 0.0999743640422821, "learning_rate": 0.002, "loss": 2.3231, "step": 338500 }, { "epoch": 1.3085849917273584, "grad_norm": 0.10579682141542435, "learning_rate": 0.002, "loss": 2.3339, "step": 338510 }, { "epoch": 1.3086236489307417, "grad_norm": 0.09436147660017014, "learning_rate": 0.002, "loss": 2.3319, "step": 338520 }, { "epoch": 1.3086623061341252, "grad_norm": 0.09775619208812714, "learning_rate": 0.002, "loss": 2.3377, "step": 338530 }, { "epoch": 1.3087009633375084, "grad_norm": 0.12779007852077484, "learning_rate": 0.002, "loss": 2.3458, "step": 338540 }, { "epoch": 1.3087396205408917, "grad_norm": 0.08898934721946716, "learning_rate": 0.002, "loss": 2.3239, "step": 338550 }, { "epoch": 1.308778277744275, "grad_norm": 0.0936533585190773, "learning_rate": 0.002, "loss": 2.3339, "step": 338560 }, { "epoch": 1.3088169349476582, "grad_norm": 0.12484470009803772, "learning_rate": 0.002, "loss": 2.34, "step": 338570 }, { "epoch": 1.3088555921510414, "grad_norm": 0.08969317376613617, "learning_rate": 0.002, "loss": 2.3274, "step": 338580 }, { "epoch": 1.3088942493544247, "grad_norm": 0.09576170891523361, "learning_rate": 0.002, "loss": 2.3451, "step": 338590 }, { "epoch": 1.308932906557808, "grad_norm": 0.11190787702798843, "learning_rate": 0.002, "loss": 2.3378, "step": 338600 }, { "epoch": 1.3089715637611912, "grad_norm": 0.11296659708023071, "learning_rate": 0.002, "loss": 2.3257, "step": 338610 }, { "epoch": 1.3090102209645744, "grad_norm": 0.09852434694766998, "learning_rate": 0.002, "loss": 2.3337, "step": 338620 }, { "epoch": 1.309048878167958, "grad_norm": 0.0959552451968193, "learning_rate": 0.002, "loss": 2.3364, "step": 338630 }, { "epoch": 1.3090875353713411, "grad_norm": 0.10912944376468658, "learning_rate": 0.002, "loss": 2.3222, "step": 338640 }, { "epoch": 1.3091261925747244, "grad_norm": 0.10973319411277771, "learning_rate": 0.002, "loss": 2.3338, "step": 338650 }, { "epoch": 1.3091648497781077, "grad_norm": 0.21840856969356537, "learning_rate": 0.002, "loss": 2.3413, "step": 338660 }, { "epoch": 1.309203506981491, "grad_norm": 0.09725868701934814, "learning_rate": 0.002, "loss": 2.3202, "step": 338670 }, { "epoch": 1.3092421641848742, "grad_norm": 0.09659942239522934, "learning_rate": 0.002, "loss": 2.3334, "step": 338680 }, { "epoch": 1.3092808213882574, "grad_norm": 0.1109233871102333, "learning_rate": 0.002, "loss": 2.3352, "step": 338690 }, { "epoch": 1.3093194785916409, "grad_norm": 0.11313378065824509, "learning_rate": 0.002, "loss": 2.3305, "step": 338700 }, { "epoch": 1.3093581357950241, "grad_norm": 0.11242232471704483, "learning_rate": 0.002, "loss": 2.3313, "step": 338710 }, { "epoch": 1.3093967929984074, "grad_norm": 0.08756279200315475, "learning_rate": 0.002, "loss": 2.3312, "step": 338720 }, { "epoch": 1.3094354502017906, "grad_norm": 0.10244888067245483, "learning_rate": 0.002, "loss": 2.3379, "step": 338730 }, { "epoch": 1.309474107405174, "grad_norm": 0.10156544297933578, "learning_rate": 0.002, "loss": 2.344, "step": 338740 }, { "epoch": 1.3095127646085571, "grad_norm": 0.10466597974300385, "learning_rate": 0.002, "loss": 2.3436, "step": 338750 }, { "epoch": 1.3095514218119404, "grad_norm": 0.10462167859077454, "learning_rate": 0.002, "loss": 2.3362, "step": 338760 }, { "epoch": 1.3095900790153236, "grad_norm": 0.1474732607603073, "learning_rate": 0.002, "loss": 2.3406, "step": 338770 }, { "epoch": 1.309628736218707, "grad_norm": 0.10373160243034363, "learning_rate": 0.002, "loss": 2.338, "step": 338780 }, { "epoch": 1.3096673934220902, "grad_norm": 0.08936724811792374, "learning_rate": 0.002, "loss": 2.3313, "step": 338790 }, { "epoch": 1.3097060506254736, "grad_norm": 0.09592811018228531, "learning_rate": 0.002, "loss": 2.3249, "step": 338800 }, { "epoch": 1.3097447078288569, "grad_norm": 0.1223786324262619, "learning_rate": 0.002, "loss": 2.3215, "step": 338810 }, { "epoch": 1.3097833650322401, "grad_norm": 0.10772044211626053, "learning_rate": 0.002, "loss": 2.3411, "step": 338820 }, { "epoch": 1.3098220222356234, "grad_norm": 0.111927330493927, "learning_rate": 0.002, "loss": 2.3381, "step": 338830 }, { "epoch": 1.3098606794390066, "grad_norm": 0.10590586811304092, "learning_rate": 0.002, "loss": 2.3377, "step": 338840 }, { "epoch": 1.3098993366423899, "grad_norm": 0.11565124988555908, "learning_rate": 0.002, "loss": 2.3294, "step": 338850 }, { "epoch": 1.3099379938457731, "grad_norm": 0.09856195747852325, "learning_rate": 0.002, "loss": 2.3369, "step": 338860 }, { "epoch": 1.3099766510491566, "grad_norm": 0.10407767444849014, "learning_rate": 0.002, "loss": 2.3481, "step": 338870 }, { "epoch": 1.3100153082525399, "grad_norm": 0.11623556166887283, "learning_rate": 0.002, "loss": 2.3288, "step": 338880 }, { "epoch": 1.3100539654559231, "grad_norm": 0.12146317958831787, "learning_rate": 0.002, "loss": 2.3313, "step": 338890 }, { "epoch": 1.3100926226593064, "grad_norm": 0.09300675243139267, "learning_rate": 0.002, "loss": 2.3263, "step": 338900 }, { "epoch": 1.3101312798626896, "grad_norm": 0.0998629629611969, "learning_rate": 0.002, "loss": 2.3279, "step": 338910 }, { "epoch": 1.3101699370660729, "grad_norm": 0.09744694083929062, "learning_rate": 0.002, "loss": 2.3232, "step": 338920 }, { "epoch": 1.3102085942694561, "grad_norm": 0.09870826452970505, "learning_rate": 0.002, "loss": 2.3309, "step": 338930 }, { "epoch": 1.3102472514728394, "grad_norm": 0.11773059517145157, "learning_rate": 0.002, "loss": 2.3302, "step": 338940 }, { "epoch": 1.3102859086762226, "grad_norm": 0.10237418860197067, "learning_rate": 0.002, "loss": 2.308, "step": 338950 }, { "epoch": 1.3103245658796059, "grad_norm": 0.105544313788414, "learning_rate": 0.002, "loss": 2.3315, "step": 338960 }, { "epoch": 1.3103632230829894, "grad_norm": 0.10724999010562897, "learning_rate": 0.002, "loss": 2.3291, "step": 338970 }, { "epoch": 1.3104018802863726, "grad_norm": 0.09732942283153534, "learning_rate": 0.002, "loss": 2.3303, "step": 338980 }, { "epoch": 1.3104405374897559, "grad_norm": 0.10429544001817703, "learning_rate": 0.002, "loss": 2.3451, "step": 338990 }, { "epoch": 1.3104791946931391, "grad_norm": 0.10754740983247757, "learning_rate": 0.002, "loss": 2.3379, "step": 339000 }, { "epoch": 1.3105178518965224, "grad_norm": 0.09705036878585815, "learning_rate": 0.002, "loss": 2.318, "step": 339010 }, { "epoch": 1.3105565090999056, "grad_norm": 0.13875411450862885, "learning_rate": 0.002, "loss": 2.3217, "step": 339020 }, { "epoch": 1.3105951663032889, "grad_norm": 0.10345365107059479, "learning_rate": 0.002, "loss": 2.3126, "step": 339030 }, { "epoch": 1.3106338235066723, "grad_norm": 0.11298981308937073, "learning_rate": 0.002, "loss": 2.3274, "step": 339040 }, { "epoch": 1.3106724807100556, "grad_norm": 0.09629258513450623, "learning_rate": 0.002, "loss": 2.3437, "step": 339050 }, { "epoch": 1.3107111379134389, "grad_norm": 0.1021524965763092, "learning_rate": 0.002, "loss": 2.3352, "step": 339060 }, { "epoch": 1.310749795116822, "grad_norm": 0.10271374136209488, "learning_rate": 0.002, "loss": 2.3361, "step": 339070 }, { "epoch": 1.3107884523202054, "grad_norm": 0.12685607373714447, "learning_rate": 0.002, "loss": 2.3389, "step": 339080 }, { "epoch": 1.3108271095235886, "grad_norm": 0.1172247901558876, "learning_rate": 0.002, "loss": 2.337, "step": 339090 }, { "epoch": 1.3108657667269719, "grad_norm": 0.09642387181520462, "learning_rate": 0.002, "loss": 2.3359, "step": 339100 }, { "epoch": 1.3109044239303551, "grad_norm": 0.10917683690786362, "learning_rate": 0.002, "loss": 2.3301, "step": 339110 }, { "epoch": 1.3109430811337384, "grad_norm": 0.10852925479412079, "learning_rate": 0.002, "loss": 2.321, "step": 339120 }, { "epoch": 1.3109817383371216, "grad_norm": 0.1264217495918274, "learning_rate": 0.002, "loss": 2.323, "step": 339130 }, { "epoch": 1.311020395540505, "grad_norm": 0.09436733275651932, "learning_rate": 0.002, "loss": 2.3145, "step": 339140 }, { "epoch": 1.3110590527438883, "grad_norm": 0.1040552482008934, "learning_rate": 0.002, "loss": 2.3284, "step": 339150 }, { "epoch": 1.3110977099472716, "grad_norm": 0.10122077912092209, "learning_rate": 0.002, "loss": 2.3314, "step": 339160 }, { "epoch": 1.3111363671506548, "grad_norm": 0.08897639065980911, "learning_rate": 0.002, "loss": 2.3284, "step": 339170 }, { "epoch": 1.311175024354038, "grad_norm": 0.1053071916103363, "learning_rate": 0.002, "loss": 2.3355, "step": 339180 }, { "epoch": 1.3112136815574214, "grad_norm": 0.11119119822978973, "learning_rate": 0.002, "loss": 2.3461, "step": 339190 }, { "epoch": 1.3112523387608048, "grad_norm": 0.101591557264328, "learning_rate": 0.002, "loss": 2.3294, "step": 339200 }, { "epoch": 1.311290995964188, "grad_norm": 0.1332840770483017, "learning_rate": 0.002, "loss": 2.336, "step": 339210 }, { "epoch": 1.3113296531675713, "grad_norm": 0.1152091696858406, "learning_rate": 0.002, "loss": 2.3267, "step": 339220 }, { "epoch": 1.3113683103709546, "grad_norm": 0.12425046414136887, "learning_rate": 0.002, "loss": 2.3421, "step": 339230 }, { "epoch": 1.3114069675743378, "grad_norm": 0.09523022174835205, "learning_rate": 0.002, "loss": 2.3362, "step": 339240 }, { "epoch": 1.311445624777721, "grad_norm": 0.09999441355466843, "learning_rate": 0.002, "loss": 2.3314, "step": 339250 }, { "epoch": 1.3114842819811043, "grad_norm": 0.10136357694864273, "learning_rate": 0.002, "loss": 2.3344, "step": 339260 }, { "epoch": 1.3115229391844876, "grad_norm": 0.22549983859062195, "learning_rate": 0.002, "loss": 2.3205, "step": 339270 }, { "epoch": 1.3115615963878708, "grad_norm": 0.10386420786380768, "learning_rate": 0.002, "loss": 2.3403, "step": 339280 }, { "epoch": 1.311600253591254, "grad_norm": 0.09532750397920609, "learning_rate": 0.002, "loss": 2.3454, "step": 339290 }, { "epoch": 1.3116389107946373, "grad_norm": 0.11204537749290466, "learning_rate": 0.002, "loss": 2.3229, "step": 339300 }, { "epoch": 1.3116775679980208, "grad_norm": 0.10338576138019562, "learning_rate": 0.002, "loss": 2.3468, "step": 339310 }, { "epoch": 1.311716225201404, "grad_norm": 0.11117055267095566, "learning_rate": 0.002, "loss": 2.3341, "step": 339320 }, { "epoch": 1.3117548824047873, "grad_norm": 0.09967552870512009, "learning_rate": 0.002, "loss": 2.3235, "step": 339330 }, { "epoch": 1.3117935396081706, "grad_norm": 0.14006535708904266, "learning_rate": 0.002, "loss": 2.3284, "step": 339340 }, { "epoch": 1.3118321968115538, "grad_norm": 0.09871932864189148, "learning_rate": 0.002, "loss": 2.3331, "step": 339350 }, { "epoch": 1.311870854014937, "grad_norm": 0.09871833026409149, "learning_rate": 0.002, "loss": 2.3124, "step": 339360 }, { "epoch": 1.3119095112183206, "grad_norm": 0.09466978162527084, "learning_rate": 0.002, "loss": 2.3403, "step": 339370 }, { "epoch": 1.3119481684217038, "grad_norm": 0.10804964601993561, "learning_rate": 0.002, "loss": 2.3115, "step": 339380 }, { "epoch": 1.311986825625087, "grad_norm": 0.09714234620332718, "learning_rate": 0.002, "loss": 2.3301, "step": 339390 }, { "epoch": 1.3120254828284703, "grad_norm": 0.12230552732944489, "learning_rate": 0.002, "loss": 2.3386, "step": 339400 }, { "epoch": 1.3120641400318536, "grad_norm": 0.12029729783535004, "learning_rate": 0.002, "loss": 2.3346, "step": 339410 }, { "epoch": 1.3121027972352368, "grad_norm": 0.08929474651813507, "learning_rate": 0.002, "loss": 2.3405, "step": 339420 }, { "epoch": 1.31214145443862, "grad_norm": 0.11715824156999588, "learning_rate": 0.002, "loss": 2.3255, "step": 339430 }, { "epoch": 1.3121801116420033, "grad_norm": 0.10215269029140472, "learning_rate": 0.002, "loss": 2.3381, "step": 339440 }, { "epoch": 1.3122187688453866, "grad_norm": 0.10783392935991287, "learning_rate": 0.002, "loss": 2.3404, "step": 339450 }, { "epoch": 1.3122574260487698, "grad_norm": 0.0915178582072258, "learning_rate": 0.002, "loss": 2.3306, "step": 339460 }, { "epoch": 1.312296083252153, "grad_norm": 0.09194423258304596, "learning_rate": 0.002, "loss": 2.3406, "step": 339470 }, { "epoch": 1.3123347404555366, "grad_norm": 0.10409944504499435, "learning_rate": 0.002, "loss": 2.3501, "step": 339480 }, { "epoch": 1.3123733976589198, "grad_norm": 0.10948554426431656, "learning_rate": 0.002, "loss": 2.3317, "step": 339490 }, { "epoch": 1.312412054862303, "grad_norm": 0.10612896084785461, "learning_rate": 0.002, "loss": 2.3227, "step": 339500 }, { "epoch": 1.3124507120656863, "grad_norm": 0.09984373301267624, "learning_rate": 0.002, "loss": 2.346, "step": 339510 }, { "epoch": 1.3124893692690696, "grad_norm": 0.12539131939411163, "learning_rate": 0.002, "loss": 2.3403, "step": 339520 }, { "epoch": 1.3125280264724528, "grad_norm": 0.12929649651050568, "learning_rate": 0.002, "loss": 2.3337, "step": 339530 }, { "epoch": 1.3125666836758363, "grad_norm": 0.11457858979701996, "learning_rate": 0.002, "loss": 2.3438, "step": 339540 }, { "epoch": 1.3126053408792195, "grad_norm": 0.11001051962375641, "learning_rate": 0.002, "loss": 2.3267, "step": 339550 }, { "epoch": 1.3126439980826028, "grad_norm": 0.09543713927268982, "learning_rate": 0.002, "loss": 2.3549, "step": 339560 }, { "epoch": 1.312682655285986, "grad_norm": 0.10295376926660538, "learning_rate": 0.002, "loss": 2.331, "step": 339570 }, { "epoch": 1.3127213124893693, "grad_norm": 0.12589390575885773, "learning_rate": 0.002, "loss": 2.3335, "step": 339580 }, { "epoch": 1.3127599696927525, "grad_norm": 0.10750047862529755, "learning_rate": 0.002, "loss": 2.3253, "step": 339590 }, { "epoch": 1.3127986268961358, "grad_norm": 0.10956700891256332, "learning_rate": 0.002, "loss": 2.3375, "step": 339600 }, { "epoch": 1.312837284099519, "grad_norm": 0.09963594377040863, "learning_rate": 0.002, "loss": 2.3416, "step": 339610 }, { "epoch": 1.3128759413029023, "grad_norm": 0.11178959906101227, "learning_rate": 0.002, "loss": 2.3322, "step": 339620 }, { "epoch": 1.3129145985062856, "grad_norm": 0.09853495657444, "learning_rate": 0.002, "loss": 2.3483, "step": 339630 }, { "epoch": 1.3129532557096688, "grad_norm": 0.1059534028172493, "learning_rate": 0.002, "loss": 2.3285, "step": 339640 }, { "epoch": 1.3129919129130523, "grad_norm": 0.12822778522968292, "learning_rate": 0.002, "loss": 2.3541, "step": 339650 }, { "epoch": 1.3130305701164355, "grad_norm": 0.12121497094631195, "learning_rate": 0.002, "loss": 2.3315, "step": 339660 }, { "epoch": 1.3130692273198188, "grad_norm": 0.10710785537958145, "learning_rate": 0.002, "loss": 2.3329, "step": 339670 }, { "epoch": 1.313107884523202, "grad_norm": 0.0960579365491867, "learning_rate": 0.002, "loss": 2.3299, "step": 339680 }, { "epoch": 1.3131465417265853, "grad_norm": 0.11348490417003632, "learning_rate": 0.002, "loss": 2.3331, "step": 339690 }, { "epoch": 1.3131851989299685, "grad_norm": 0.1104402244091034, "learning_rate": 0.002, "loss": 2.3379, "step": 339700 }, { "epoch": 1.313223856133352, "grad_norm": 0.10747892409563065, "learning_rate": 0.002, "loss": 2.337, "step": 339710 }, { "epoch": 1.3132625133367353, "grad_norm": 0.11154567450284958, "learning_rate": 0.002, "loss": 2.3456, "step": 339720 }, { "epoch": 1.3133011705401185, "grad_norm": 0.10602911561727524, "learning_rate": 0.002, "loss": 2.3487, "step": 339730 }, { "epoch": 1.3133398277435018, "grad_norm": 0.11539280414581299, "learning_rate": 0.002, "loss": 2.3428, "step": 339740 }, { "epoch": 1.313378484946885, "grad_norm": 0.11169593036174774, "learning_rate": 0.002, "loss": 2.3314, "step": 339750 }, { "epoch": 1.3134171421502683, "grad_norm": 0.11478446424007416, "learning_rate": 0.002, "loss": 2.3517, "step": 339760 }, { "epoch": 1.3134557993536515, "grad_norm": 0.10598297417163849, "learning_rate": 0.002, "loss": 2.3525, "step": 339770 }, { "epoch": 1.3134944565570348, "grad_norm": 0.10229714959859848, "learning_rate": 0.002, "loss": 2.3333, "step": 339780 }, { "epoch": 1.313533113760418, "grad_norm": 0.12123233079910278, "learning_rate": 0.002, "loss": 2.3336, "step": 339790 }, { "epoch": 1.3135717709638013, "grad_norm": 0.10334493219852448, "learning_rate": 0.002, "loss": 2.3268, "step": 339800 }, { "epoch": 1.3136104281671845, "grad_norm": 0.10026278346776962, "learning_rate": 0.002, "loss": 2.322, "step": 339810 }, { "epoch": 1.313649085370568, "grad_norm": 0.11529593914747238, "learning_rate": 0.002, "loss": 2.3291, "step": 339820 }, { "epoch": 1.3136877425739513, "grad_norm": 0.11085256189107895, "learning_rate": 0.002, "loss": 2.3345, "step": 339830 }, { "epoch": 1.3137263997773345, "grad_norm": 0.0989978238940239, "learning_rate": 0.002, "loss": 2.3382, "step": 339840 }, { "epoch": 1.3137650569807178, "grad_norm": 0.09534777700901031, "learning_rate": 0.002, "loss": 2.3254, "step": 339850 }, { "epoch": 1.313803714184101, "grad_norm": 0.13268207013607025, "learning_rate": 0.002, "loss": 2.3348, "step": 339860 }, { "epoch": 1.3138423713874843, "grad_norm": 0.09691354632377625, "learning_rate": 0.002, "loss": 2.3379, "step": 339870 }, { "epoch": 1.3138810285908677, "grad_norm": 0.11440901458263397, "learning_rate": 0.002, "loss": 2.3274, "step": 339880 }, { "epoch": 1.313919685794251, "grad_norm": 0.09623962640762329, "learning_rate": 0.002, "loss": 2.3366, "step": 339890 }, { "epoch": 1.3139583429976343, "grad_norm": 0.09594407677650452, "learning_rate": 0.002, "loss": 2.3369, "step": 339900 }, { "epoch": 1.3139970002010175, "grad_norm": 0.12013139575719833, "learning_rate": 0.002, "loss": 2.3396, "step": 339910 }, { "epoch": 1.3140356574044008, "grad_norm": 0.09702719002962112, "learning_rate": 0.002, "loss": 2.327, "step": 339920 }, { "epoch": 1.314074314607784, "grad_norm": 0.12030521035194397, "learning_rate": 0.002, "loss": 2.3374, "step": 339930 }, { "epoch": 1.3141129718111673, "grad_norm": 0.10773120820522308, "learning_rate": 0.002, "loss": 2.344, "step": 339940 }, { "epoch": 1.3141516290145505, "grad_norm": 0.09532781690359116, "learning_rate": 0.002, "loss": 2.3372, "step": 339950 }, { "epoch": 1.3141902862179338, "grad_norm": 0.09022223204374313, "learning_rate": 0.002, "loss": 2.3383, "step": 339960 }, { "epoch": 1.314228943421317, "grad_norm": 0.12461259961128235, "learning_rate": 0.002, "loss": 2.3264, "step": 339970 }, { "epoch": 1.3142676006247003, "grad_norm": 0.1056964248418808, "learning_rate": 0.002, "loss": 2.3354, "step": 339980 }, { "epoch": 1.3143062578280837, "grad_norm": 0.12232209742069244, "learning_rate": 0.002, "loss": 2.3281, "step": 339990 }, { "epoch": 1.314344915031467, "grad_norm": 0.12208342552185059, "learning_rate": 0.002, "loss": 2.3228, "step": 340000 }, { "epoch": 1.3143835722348503, "grad_norm": 0.10568199306726456, "learning_rate": 0.002, "loss": 2.3407, "step": 340010 }, { "epoch": 1.3144222294382335, "grad_norm": 0.09507480263710022, "learning_rate": 0.002, "loss": 2.3332, "step": 340020 }, { "epoch": 1.3144608866416168, "grad_norm": 0.10756602883338928, "learning_rate": 0.002, "loss": 2.3556, "step": 340030 }, { "epoch": 1.314499543845, "grad_norm": 0.08802318572998047, "learning_rate": 0.002, "loss": 2.3363, "step": 340040 }, { "epoch": 1.3145382010483835, "grad_norm": 0.13032051920890808, "learning_rate": 0.002, "loss": 2.3314, "step": 340050 }, { "epoch": 1.3145768582517667, "grad_norm": 0.09277044236660004, "learning_rate": 0.002, "loss": 2.342, "step": 340060 }, { "epoch": 1.31461551545515, "grad_norm": 0.1054871678352356, "learning_rate": 0.002, "loss": 2.3202, "step": 340070 }, { "epoch": 1.3146541726585332, "grad_norm": 0.11483970284461975, "learning_rate": 0.002, "loss": 2.3335, "step": 340080 }, { "epoch": 1.3146928298619165, "grad_norm": 0.09837651997804642, "learning_rate": 0.002, "loss": 2.3332, "step": 340090 }, { "epoch": 1.3147314870652997, "grad_norm": 0.1070932149887085, "learning_rate": 0.002, "loss": 2.3274, "step": 340100 }, { "epoch": 1.314770144268683, "grad_norm": 0.09720432758331299, "learning_rate": 0.002, "loss": 2.3357, "step": 340110 }, { "epoch": 1.3148088014720662, "grad_norm": 0.11132647842168808, "learning_rate": 0.002, "loss": 2.3432, "step": 340120 }, { "epoch": 1.3148474586754495, "grad_norm": 0.10063239187002182, "learning_rate": 0.002, "loss": 2.325, "step": 340130 }, { "epoch": 1.3148861158788328, "grad_norm": 0.10028837621212006, "learning_rate": 0.002, "loss": 2.3504, "step": 340140 }, { "epoch": 1.314924773082216, "grad_norm": 0.10856592655181885, "learning_rate": 0.002, "loss": 2.337, "step": 340150 }, { "epoch": 1.3149634302855995, "grad_norm": 0.09780581295490265, "learning_rate": 0.002, "loss": 2.3453, "step": 340160 }, { "epoch": 1.3150020874889827, "grad_norm": 0.14413388073444366, "learning_rate": 0.002, "loss": 2.3204, "step": 340170 }, { "epoch": 1.315040744692366, "grad_norm": 0.11259722709655762, "learning_rate": 0.002, "loss": 2.3452, "step": 340180 }, { "epoch": 1.3150794018957492, "grad_norm": 0.09913891553878784, "learning_rate": 0.002, "loss": 2.3334, "step": 340190 }, { "epoch": 1.3151180590991325, "grad_norm": 0.10148324817419052, "learning_rate": 0.002, "loss": 2.3383, "step": 340200 }, { "epoch": 1.3151567163025157, "grad_norm": 0.13096053898334503, "learning_rate": 0.002, "loss": 2.3297, "step": 340210 }, { "epoch": 1.3151953735058992, "grad_norm": 0.10373006761074066, "learning_rate": 0.002, "loss": 2.3329, "step": 340220 }, { "epoch": 1.3152340307092825, "grad_norm": 0.12267054617404938, "learning_rate": 0.002, "loss": 2.3424, "step": 340230 }, { "epoch": 1.3152726879126657, "grad_norm": 0.0968276634812355, "learning_rate": 0.002, "loss": 2.3273, "step": 340240 }, { "epoch": 1.315311345116049, "grad_norm": 0.1325179785490036, "learning_rate": 0.002, "loss": 2.3321, "step": 340250 }, { "epoch": 1.3153500023194322, "grad_norm": 0.12446484714746475, "learning_rate": 0.002, "loss": 2.3362, "step": 340260 }, { "epoch": 1.3153886595228155, "grad_norm": 0.11135133355855942, "learning_rate": 0.002, "loss": 2.3441, "step": 340270 }, { "epoch": 1.3154273167261987, "grad_norm": 0.1034776121377945, "learning_rate": 0.002, "loss": 2.335, "step": 340280 }, { "epoch": 1.315465973929582, "grad_norm": 0.11215350031852722, "learning_rate": 0.002, "loss": 2.3137, "step": 340290 }, { "epoch": 1.3155046311329652, "grad_norm": 0.09734928607940674, "learning_rate": 0.002, "loss": 2.3396, "step": 340300 }, { "epoch": 1.3155432883363485, "grad_norm": 0.09327472746372223, "learning_rate": 0.002, "loss": 2.3284, "step": 340310 }, { "epoch": 1.315581945539732, "grad_norm": 0.12669144570827484, "learning_rate": 0.002, "loss": 2.325, "step": 340320 }, { "epoch": 1.3156206027431152, "grad_norm": 0.10268600285053253, "learning_rate": 0.002, "loss": 2.3496, "step": 340330 }, { "epoch": 1.3156592599464985, "grad_norm": 0.09908057749271393, "learning_rate": 0.002, "loss": 2.3367, "step": 340340 }, { "epoch": 1.3156979171498817, "grad_norm": 0.10985678434371948, "learning_rate": 0.002, "loss": 2.342, "step": 340350 }, { "epoch": 1.315736574353265, "grad_norm": 0.09564399719238281, "learning_rate": 0.002, "loss": 2.337, "step": 340360 }, { "epoch": 1.3157752315566482, "grad_norm": 0.1189763993024826, "learning_rate": 0.002, "loss": 2.3401, "step": 340370 }, { "epoch": 1.3158138887600315, "grad_norm": 0.10353276133537292, "learning_rate": 0.002, "loss": 2.3235, "step": 340380 }, { "epoch": 1.315852545963415, "grad_norm": 0.09505298733711243, "learning_rate": 0.002, "loss": 2.3295, "step": 340390 }, { "epoch": 1.3158912031667982, "grad_norm": 0.10912010073661804, "learning_rate": 0.002, "loss": 2.34, "step": 340400 }, { "epoch": 1.3159298603701814, "grad_norm": 0.11756809055805206, "learning_rate": 0.002, "loss": 2.3404, "step": 340410 }, { "epoch": 1.3159685175735647, "grad_norm": 0.09390808641910553, "learning_rate": 0.002, "loss": 2.3498, "step": 340420 }, { "epoch": 1.316007174776948, "grad_norm": 0.108702652156353, "learning_rate": 0.002, "loss": 2.3376, "step": 340430 }, { "epoch": 1.3160458319803312, "grad_norm": 0.09625627100467682, "learning_rate": 0.002, "loss": 2.3346, "step": 340440 }, { "epoch": 1.3160844891837145, "grad_norm": 0.09767688810825348, "learning_rate": 0.002, "loss": 2.3265, "step": 340450 }, { "epoch": 1.3161231463870977, "grad_norm": 0.11160442978143692, "learning_rate": 0.002, "loss": 2.3343, "step": 340460 }, { "epoch": 1.316161803590481, "grad_norm": 0.08791060745716095, "learning_rate": 0.002, "loss": 2.3444, "step": 340470 }, { "epoch": 1.3162004607938642, "grad_norm": 0.12832403182983398, "learning_rate": 0.002, "loss": 2.3451, "step": 340480 }, { "epoch": 1.3162391179972477, "grad_norm": 0.0986124649643898, "learning_rate": 0.002, "loss": 2.3397, "step": 340490 }, { "epoch": 1.316277775200631, "grad_norm": 0.12974348664283752, "learning_rate": 0.002, "loss": 2.3461, "step": 340500 }, { "epoch": 1.3163164324040142, "grad_norm": 0.11423316597938538, "learning_rate": 0.002, "loss": 2.335, "step": 340510 }, { "epoch": 1.3163550896073974, "grad_norm": 0.1260526031255722, "learning_rate": 0.002, "loss": 2.3328, "step": 340520 }, { "epoch": 1.3163937468107807, "grad_norm": 0.10613839328289032, "learning_rate": 0.002, "loss": 2.3492, "step": 340530 }, { "epoch": 1.316432404014164, "grad_norm": 0.10647633671760559, "learning_rate": 0.002, "loss": 2.3513, "step": 340540 }, { "epoch": 1.3164710612175472, "grad_norm": 0.09708483517169952, "learning_rate": 0.002, "loss": 2.3346, "step": 340550 }, { "epoch": 1.3165097184209307, "grad_norm": 0.10543903708457947, "learning_rate": 0.002, "loss": 2.3415, "step": 340560 }, { "epoch": 1.316548375624314, "grad_norm": 0.12418416142463684, "learning_rate": 0.002, "loss": 2.348, "step": 340570 }, { "epoch": 1.3165870328276972, "grad_norm": 0.09926290810108185, "learning_rate": 0.002, "loss": 2.3424, "step": 340580 }, { "epoch": 1.3166256900310804, "grad_norm": 0.11084403097629547, "learning_rate": 0.002, "loss": 2.3336, "step": 340590 }, { "epoch": 1.3166643472344637, "grad_norm": 0.10365424305200577, "learning_rate": 0.002, "loss": 2.3365, "step": 340600 }, { "epoch": 1.316703004437847, "grad_norm": 0.11727438867092133, "learning_rate": 0.002, "loss": 2.3463, "step": 340610 }, { "epoch": 1.3167416616412302, "grad_norm": 0.10081959515810013, "learning_rate": 0.002, "loss": 2.3355, "step": 340620 }, { "epoch": 1.3167803188446134, "grad_norm": 0.09295682609081268, "learning_rate": 0.002, "loss": 2.34, "step": 340630 }, { "epoch": 1.3168189760479967, "grad_norm": 0.11949366331100464, "learning_rate": 0.002, "loss": 2.3417, "step": 340640 }, { "epoch": 1.31685763325138, "grad_norm": 0.09315700083971024, "learning_rate": 0.002, "loss": 2.33, "step": 340650 }, { "epoch": 1.3168962904547634, "grad_norm": 0.09729575365781784, "learning_rate": 0.002, "loss": 2.3325, "step": 340660 }, { "epoch": 1.3169349476581467, "grad_norm": 0.09525522589683533, "learning_rate": 0.002, "loss": 2.3382, "step": 340670 }, { "epoch": 1.31697360486153, "grad_norm": 0.12238912284374237, "learning_rate": 0.002, "loss": 2.3417, "step": 340680 }, { "epoch": 1.3170122620649132, "grad_norm": 0.09936051815748215, "learning_rate": 0.002, "loss": 2.3317, "step": 340690 }, { "epoch": 1.3170509192682964, "grad_norm": 0.10798247903585434, "learning_rate": 0.002, "loss": 2.3518, "step": 340700 }, { "epoch": 1.3170895764716797, "grad_norm": 0.12307767570018768, "learning_rate": 0.002, "loss": 2.3387, "step": 340710 }, { "epoch": 1.317128233675063, "grad_norm": 0.11636736243963242, "learning_rate": 0.002, "loss": 2.3402, "step": 340720 }, { "epoch": 1.3171668908784464, "grad_norm": 0.12146612256765366, "learning_rate": 0.002, "loss": 2.3371, "step": 340730 }, { "epoch": 1.3172055480818297, "grad_norm": 0.09943657368421555, "learning_rate": 0.002, "loss": 2.3326, "step": 340740 }, { "epoch": 1.317244205285213, "grad_norm": 0.10115927457809448, "learning_rate": 0.002, "loss": 2.3353, "step": 340750 }, { "epoch": 1.3172828624885962, "grad_norm": 0.09202753007411957, "learning_rate": 0.002, "loss": 2.3399, "step": 340760 }, { "epoch": 1.3173215196919794, "grad_norm": 0.114414744079113, "learning_rate": 0.002, "loss": 2.3264, "step": 340770 }, { "epoch": 1.3173601768953627, "grad_norm": 0.09059332311153412, "learning_rate": 0.002, "loss": 2.3375, "step": 340780 }, { "epoch": 1.317398834098746, "grad_norm": 0.11916932463645935, "learning_rate": 0.002, "loss": 2.3502, "step": 340790 }, { "epoch": 1.3174374913021292, "grad_norm": 0.13115397095680237, "learning_rate": 0.002, "loss": 2.3304, "step": 340800 }, { "epoch": 1.3174761485055124, "grad_norm": 0.1227148175239563, "learning_rate": 0.002, "loss": 2.3416, "step": 340810 }, { "epoch": 1.3175148057088957, "grad_norm": 0.10211624950170517, "learning_rate": 0.002, "loss": 2.348, "step": 340820 }, { "epoch": 1.3175534629122791, "grad_norm": 0.1126120314002037, "learning_rate": 0.002, "loss": 2.3223, "step": 340830 }, { "epoch": 1.3175921201156624, "grad_norm": 0.10091283917427063, "learning_rate": 0.002, "loss": 2.3166, "step": 340840 }, { "epoch": 1.3176307773190457, "grad_norm": 0.10280703008174896, "learning_rate": 0.002, "loss": 2.3317, "step": 340850 }, { "epoch": 1.317669434522429, "grad_norm": 0.09712085127830505, "learning_rate": 0.002, "loss": 2.3348, "step": 340860 }, { "epoch": 1.3177080917258122, "grad_norm": 0.11485341191291809, "learning_rate": 0.002, "loss": 2.3393, "step": 340870 }, { "epoch": 1.3177467489291954, "grad_norm": 0.09300262480974197, "learning_rate": 0.002, "loss": 2.3385, "step": 340880 }, { "epoch": 1.3177854061325787, "grad_norm": 0.09692247211933136, "learning_rate": 0.002, "loss": 2.3448, "step": 340890 }, { "epoch": 1.3178240633359621, "grad_norm": 0.10225555300712585, "learning_rate": 0.002, "loss": 2.3409, "step": 340900 }, { "epoch": 1.3178627205393454, "grad_norm": 0.09950529783964157, "learning_rate": 0.002, "loss": 2.3226, "step": 340910 }, { "epoch": 1.3179013777427286, "grad_norm": 0.10115771740674973, "learning_rate": 0.002, "loss": 2.3185, "step": 340920 }, { "epoch": 1.317940034946112, "grad_norm": 0.11324211210012436, "learning_rate": 0.002, "loss": 2.3348, "step": 340930 }, { "epoch": 1.3179786921494951, "grad_norm": 0.12060297280550003, "learning_rate": 0.002, "loss": 2.3433, "step": 340940 }, { "epoch": 1.3180173493528784, "grad_norm": 0.10018421709537506, "learning_rate": 0.002, "loss": 2.3411, "step": 340950 }, { "epoch": 1.3180560065562617, "grad_norm": 0.10009391605854034, "learning_rate": 0.002, "loss": 2.3359, "step": 340960 }, { "epoch": 1.318094663759645, "grad_norm": 0.10127507150173187, "learning_rate": 0.002, "loss": 2.3272, "step": 340970 }, { "epoch": 1.3181333209630282, "grad_norm": 0.09859684854745865, "learning_rate": 0.002, "loss": 2.3283, "step": 340980 }, { "epoch": 1.3181719781664114, "grad_norm": 0.12744922935962677, "learning_rate": 0.002, "loss": 2.337, "step": 340990 }, { "epoch": 1.3182106353697949, "grad_norm": 0.11380188912153244, "learning_rate": 0.002, "loss": 2.329, "step": 341000 }, { "epoch": 1.3182492925731781, "grad_norm": 0.11590145528316498, "learning_rate": 0.002, "loss": 2.3401, "step": 341010 }, { "epoch": 1.3182879497765614, "grad_norm": 0.10371170192956924, "learning_rate": 0.002, "loss": 2.33, "step": 341020 }, { "epoch": 1.3183266069799446, "grad_norm": 0.13881763815879822, "learning_rate": 0.002, "loss": 2.3429, "step": 341030 }, { "epoch": 1.318365264183328, "grad_norm": 0.09833131730556488, "learning_rate": 0.002, "loss": 2.3534, "step": 341040 }, { "epoch": 1.3184039213867111, "grad_norm": 0.10128436982631683, "learning_rate": 0.002, "loss": 2.3292, "step": 341050 }, { "epoch": 1.3184425785900946, "grad_norm": 0.10208149254322052, "learning_rate": 0.002, "loss": 2.3172, "step": 341060 }, { "epoch": 1.3184812357934779, "grad_norm": 0.10042634606361389, "learning_rate": 0.002, "loss": 2.3299, "step": 341070 }, { "epoch": 1.3185198929968611, "grad_norm": 0.10970644652843475, "learning_rate": 0.002, "loss": 2.3396, "step": 341080 }, { "epoch": 1.3185585502002444, "grad_norm": 0.08735210448503494, "learning_rate": 0.002, "loss": 2.3278, "step": 341090 }, { "epoch": 1.3185972074036276, "grad_norm": 0.0989793911576271, "learning_rate": 0.002, "loss": 2.3416, "step": 341100 }, { "epoch": 1.3186358646070109, "grad_norm": 0.10791467875242233, "learning_rate": 0.002, "loss": 2.3248, "step": 341110 }, { "epoch": 1.3186745218103941, "grad_norm": 0.11252657324075699, "learning_rate": 0.002, "loss": 2.336, "step": 341120 }, { "epoch": 1.3187131790137774, "grad_norm": 0.10048529505729675, "learning_rate": 0.002, "loss": 2.3467, "step": 341130 }, { "epoch": 1.3187518362171606, "grad_norm": 0.11139585077762604, "learning_rate": 0.002, "loss": 2.3349, "step": 341140 }, { "epoch": 1.3187904934205439, "grad_norm": 0.09323547780513763, "learning_rate": 0.002, "loss": 2.3405, "step": 341150 }, { "epoch": 1.3188291506239271, "grad_norm": 0.10829438269138336, "learning_rate": 0.002, "loss": 2.343, "step": 341160 }, { "epoch": 1.3188678078273106, "grad_norm": 0.09060853719711304, "learning_rate": 0.002, "loss": 2.328, "step": 341170 }, { "epoch": 1.3189064650306939, "grad_norm": 0.10160361230373383, "learning_rate": 0.002, "loss": 2.3265, "step": 341180 }, { "epoch": 1.3189451222340771, "grad_norm": 0.09072021394968033, "learning_rate": 0.002, "loss": 2.3458, "step": 341190 }, { "epoch": 1.3189837794374604, "grad_norm": 0.09953247755765915, "learning_rate": 0.002, "loss": 2.3433, "step": 341200 }, { "epoch": 1.3190224366408436, "grad_norm": 0.09971699118614197, "learning_rate": 0.002, "loss": 2.3448, "step": 341210 }, { "epoch": 1.3190610938442269, "grad_norm": 0.10871617496013641, "learning_rate": 0.002, "loss": 2.3387, "step": 341220 }, { "epoch": 1.3190997510476103, "grad_norm": 0.12742142379283905, "learning_rate": 0.002, "loss": 2.3213, "step": 341230 }, { "epoch": 1.3191384082509936, "grad_norm": 0.09261079877614975, "learning_rate": 0.002, "loss": 2.3326, "step": 341240 }, { "epoch": 1.3191770654543769, "grad_norm": 0.10547822713851929, "learning_rate": 0.002, "loss": 2.319, "step": 341250 }, { "epoch": 1.31921572265776, "grad_norm": 0.11054132878780365, "learning_rate": 0.002, "loss": 2.3318, "step": 341260 }, { "epoch": 1.3192543798611434, "grad_norm": 0.10952828079462051, "learning_rate": 0.002, "loss": 2.3308, "step": 341270 }, { "epoch": 1.3192930370645266, "grad_norm": 0.09914680570363998, "learning_rate": 0.002, "loss": 2.3261, "step": 341280 }, { "epoch": 1.3193316942679099, "grad_norm": 0.10533926635980606, "learning_rate": 0.002, "loss": 2.3429, "step": 341290 }, { "epoch": 1.3193703514712931, "grad_norm": 0.10060252249240875, "learning_rate": 0.002, "loss": 2.3326, "step": 341300 }, { "epoch": 1.3194090086746764, "grad_norm": 0.09601141512393951, "learning_rate": 0.002, "loss": 2.3171, "step": 341310 }, { "epoch": 1.3194476658780596, "grad_norm": 0.09400826692581177, "learning_rate": 0.002, "loss": 2.3246, "step": 341320 }, { "epoch": 1.3194863230814429, "grad_norm": 0.13698619604110718, "learning_rate": 0.002, "loss": 2.3351, "step": 341330 }, { "epoch": 1.3195249802848263, "grad_norm": 0.10330939292907715, "learning_rate": 0.002, "loss": 2.3237, "step": 341340 }, { "epoch": 1.3195636374882096, "grad_norm": 0.12004543840885162, "learning_rate": 0.002, "loss": 2.335, "step": 341350 }, { "epoch": 1.3196022946915928, "grad_norm": 0.09568639099597931, "learning_rate": 0.002, "loss": 2.3302, "step": 341360 }, { "epoch": 1.319640951894976, "grad_norm": 0.10221485048532486, "learning_rate": 0.002, "loss": 2.3414, "step": 341370 }, { "epoch": 1.3196796090983594, "grad_norm": 0.12222684174776077, "learning_rate": 0.002, "loss": 2.3339, "step": 341380 }, { "epoch": 1.3197182663017426, "grad_norm": 0.1150476261973381, "learning_rate": 0.002, "loss": 2.3282, "step": 341390 }, { "epoch": 1.319756923505126, "grad_norm": 0.09879761189222336, "learning_rate": 0.002, "loss": 2.3444, "step": 341400 }, { "epoch": 1.3197955807085093, "grad_norm": 0.1274539828300476, "learning_rate": 0.002, "loss": 2.3329, "step": 341410 }, { "epoch": 1.3198342379118926, "grad_norm": 0.11528249830007553, "learning_rate": 0.002, "loss": 2.3316, "step": 341420 }, { "epoch": 1.3198728951152758, "grad_norm": 0.10103266686201096, "learning_rate": 0.002, "loss": 2.3199, "step": 341430 }, { "epoch": 1.319911552318659, "grad_norm": 0.12781168520450592, "learning_rate": 0.002, "loss": 2.3402, "step": 341440 }, { "epoch": 1.3199502095220423, "grad_norm": 0.10776611417531967, "learning_rate": 0.002, "loss": 2.3337, "step": 341450 }, { "epoch": 1.3199888667254256, "grad_norm": 0.10403729975223541, "learning_rate": 0.002, "loss": 2.3439, "step": 341460 }, { "epoch": 1.3200275239288088, "grad_norm": 0.10193544626235962, "learning_rate": 0.002, "loss": 2.3449, "step": 341470 }, { "epoch": 1.320066181132192, "grad_norm": 0.10320829600095749, "learning_rate": 0.002, "loss": 2.3372, "step": 341480 }, { "epoch": 1.3201048383355753, "grad_norm": 0.1006522998213768, "learning_rate": 0.002, "loss": 2.3216, "step": 341490 }, { "epoch": 1.3201434955389586, "grad_norm": 0.1228528618812561, "learning_rate": 0.002, "loss": 2.3257, "step": 341500 }, { "epoch": 1.320182152742342, "grad_norm": 0.09381001442670822, "learning_rate": 0.002, "loss": 2.3296, "step": 341510 }, { "epoch": 1.3202208099457253, "grad_norm": 0.0969909206032753, "learning_rate": 0.002, "loss": 2.3342, "step": 341520 }, { "epoch": 1.3202594671491086, "grad_norm": 0.10850610584020615, "learning_rate": 0.002, "loss": 2.3266, "step": 341530 }, { "epoch": 1.3202981243524918, "grad_norm": 0.10401296615600586, "learning_rate": 0.002, "loss": 2.3317, "step": 341540 }, { "epoch": 1.320336781555875, "grad_norm": 0.10126899927854538, "learning_rate": 0.002, "loss": 2.3247, "step": 341550 }, { "epoch": 1.3203754387592583, "grad_norm": 0.11133930832147598, "learning_rate": 0.002, "loss": 2.3306, "step": 341560 }, { "epoch": 1.3204140959626418, "grad_norm": 0.0961000993847847, "learning_rate": 0.002, "loss": 2.336, "step": 341570 }, { "epoch": 1.320452753166025, "grad_norm": 0.10838087648153305, "learning_rate": 0.002, "loss": 2.34, "step": 341580 }, { "epoch": 1.3204914103694083, "grad_norm": 0.12038574367761612, "learning_rate": 0.002, "loss": 2.3454, "step": 341590 }, { "epoch": 1.3205300675727916, "grad_norm": 0.10663647949695587, "learning_rate": 0.002, "loss": 2.3233, "step": 341600 }, { "epoch": 1.3205687247761748, "grad_norm": 0.10277918726205826, "learning_rate": 0.002, "loss": 2.3399, "step": 341610 }, { "epoch": 1.320607381979558, "grad_norm": 0.12107924371957779, "learning_rate": 0.002, "loss": 2.3524, "step": 341620 }, { "epoch": 1.3206460391829413, "grad_norm": 0.08732627332210541, "learning_rate": 0.002, "loss": 2.3423, "step": 341630 }, { "epoch": 1.3206846963863246, "grad_norm": 0.10941670089960098, "learning_rate": 0.002, "loss": 2.3449, "step": 341640 }, { "epoch": 1.3207233535897078, "grad_norm": 0.09457511454820633, "learning_rate": 0.002, "loss": 2.3346, "step": 341650 }, { "epoch": 1.320762010793091, "grad_norm": 0.09568420797586441, "learning_rate": 0.002, "loss": 2.3316, "step": 341660 }, { "epoch": 1.3208006679964743, "grad_norm": 0.10222925990819931, "learning_rate": 0.002, "loss": 2.3245, "step": 341670 }, { "epoch": 1.3208393251998578, "grad_norm": 0.10123637318611145, "learning_rate": 0.002, "loss": 2.3331, "step": 341680 }, { "epoch": 1.320877982403241, "grad_norm": 0.1014849916100502, "learning_rate": 0.002, "loss": 2.3398, "step": 341690 }, { "epoch": 1.3209166396066243, "grad_norm": 0.10959955304861069, "learning_rate": 0.002, "loss": 2.3184, "step": 341700 }, { "epoch": 1.3209552968100076, "grad_norm": 0.10240094363689423, "learning_rate": 0.002, "loss": 2.3292, "step": 341710 }, { "epoch": 1.3209939540133908, "grad_norm": 0.10184129327535629, "learning_rate": 0.002, "loss": 2.3444, "step": 341720 }, { "epoch": 1.321032611216774, "grad_norm": 0.12254176288843155, "learning_rate": 0.002, "loss": 2.3364, "step": 341730 }, { "epoch": 1.3210712684201575, "grad_norm": 0.11405491828918457, "learning_rate": 0.002, "loss": 2.343, "step": 341740 }, { "epoch": 1.3211099256235408, "grad_norm": 0.1007409393787384, "learning_rate": 0.002, "loss": 2.3323, "step": 341750 }, { "epoch": 1.321148582826924, "grad_norm": 0.11579888314008713, "learning_rate": 0.002, "loss": 2.3503, "step": 341760 }, { "epoch": 1.3211872400303073, "grad_norm": 0.09673046320676804, "learning_rate": 0.002, "loss": 2.331, "step": 341770 }, { "epoch": 1.3212258972336905, "grad_norm": 0.10171656310558319, "learning_rate": 0.002, "loss": 2.3303, "step": 341780 }, { "epoch": 1.3212645544370738, "grad_norm": 0.11281340569257736, "learning_rate": 0.002, "loss": 2.3335, "step": 341790 }, { "epoch": 1.321303211640457, "grad_norm": 0.0931347906589508, "learning_rate": 0.002, "loss": 2.325, "step": 341800 }, { "epoch": 1.3213418688438403, "grad_norm": 0.09975960850715637, "learning_rate": 0.002, "loss": 2.3373, "step": 341810 }, { "epoch": 1.3213805260472236, "grad_norm": 0.1423596441745758, "learning_rate": 0.002, "loss": 2.3418, "step": 341820 }, { "epoch": 1.3214191832506068, "grad_norm": 0.11021801829338074, "learning_rate": 0.002, "loss": 2.3425, "step": 341830 }, { "epoch": 1.32145784045399, "grad_norm": 0.10833445191383362, "learning_rate": 0.002, "loss": 2.322, "step": 341840 }, { "epoch": 1.3214964976573735, "grad_norm": 0.10257164388895035, "learning_rate": 0.002, "loss": 2.3467, "step": 341850 }, { "epoch": 1.3215351548607568, "grad_norm": 0.1051739826798439, "learning_rate": 0.002, "loss": 2.3406, "step": 341860 }, { "epoch": 1.32157381206414, "grad_norm": 0.242488294839859, "learning_rate": 0.002, "loss": 2.3434, "step": 341870 }, { "epoch": 1.3216124692675233, "grad_norm": 0.10353244841098785, "learning_rate": 0.002, "loss": 2.332, "step": 341880 }, { "epoch": 1.3216511264709065, "grad_norm": 0.11375704407691956, "learning_rate": 0.002, "loss": 2.3325, "step": 341890 }, { "epoch": 1.3216897836742898, "grad_norm": 0.09590550512075424, "learning_rate": 0.002, "loss": 2.3393, "step": 341900 }, { "epoch": 1.3217284408776733, "grad_norm": 0.0975589007139206, "learning_rate": 0.002, "loss": 2.3354, "step": 341910 }, { "epoch": 1.3217670980810565, "grad_norm": 0.11673767119646072, "learning_rate": 0.002, "loss": 2.3454, "step": 341920 }, { "epoch": 1.3218057552844398, "grad_norm": 0.10108847171068192, "learning_rate": 0.002, "loss": 2.3173, "step": 341930 }, { "epoch": 1.321844412487823, "grad_norm": 0.10159999877214432, "learning_rate": 0.002, "loss": 2.3376, "step": 341940 }, { "epoch": 1.3218830696912063, "grad_norm": 0.10191165655851364, "learning_rate": 0.002, "loss": 2.3394, "step": 341950 }, { "epoch": 1.3219217268945895, "grad_norm": 0.09570711106061935, "learning_rate": 0.002, "loss": 2.3422, "step": 341960 }, { "epoch": 1.3219603840979728, "grad_norm": 0.10358276218175888, "learning_rate": 0.002, "loss": 2.3218, "step": 341970 }, { "epoch": 1.321999041301356, "grad_norm": 0.10976678878068924, "learning_rate": 0.002, "loss": 2.329, "step": 341980 }, { "epoch": 1.3220376985047393, "grad_norm": 0.1106809452176094, "learning_rate": 0.002, "loss": 2.3365, "step": 341990 }, { "epoch": 1.3220763557081225, "grad_norm": 0.12253010272979736, "learning_rate": 0.002, "loss": 2.3313, "step": 342000 }, { "epoch": 1.3221150129115058, "grad_norm": 0.09194432944059372, "learning_rate": 0.002, "loss": 2.3343, "step": 342010 }, { "epoch": 1.3221536701148893, "grad_norm": 0.10456662625074387, "learning_rate": 0.002, "loss": 2.328, "step": 342020 }, { "epoch": 1.3221923273182725, "grad_norm": 0.12826426327228546, "learning_rate": 0.002, "loss": 2.322, "step": 342030 }, { "epoch": 1.3222309845216558, "grad_norm": 0.11366898566484451, "learning_rate": 0.002, "loss": 2.3356, "step": 342040 }, { "epoch": 1.322269641725039, "grad_norm": 0.10935889929533005, "learning_rate": 0.002, "loss": 2.3263, "step": 342050 }, { "epoch": 1.3223082989284223, "grad_norm": 0.1325126737356186, "learning_rate": 0.002, "loss": 2.3246, "step": 342060 }, { "epoch": 1.3223469561318055, "grad_norm": 0.10648973286151886, "learning_rate": 0.002, "loss": 2.3436, "step": 342070 }, { "epoch": 1.322385613335189, "grad_norm": 0.10392796248197556, "learning_rate": 0.002, "loss": 2.3323, "step": 342080 }, { "epoch": 1.3224242705385723, "grad_norm": 0.09765364974737167, "learning_rate": 0.002, "loss": 2.3105, "step": 342090 }, { "epoch": 1.3224629277419555, "grad_norm": 0.09096361696720123, "learning_rate": 0.002, "loss": 2.3408, "step": 342100 }, { "epoch": 1.3225015849453388, "grad_norm": 0.12809644639492035, "learning_rate": 0.002, "loss": 2.325, "step": 342110 }, { "epoch": 1.322540242148722, "grad_norm": 0.09639798104763031, "learning_rate": 0.002, "loss": 2.3315, "step": 342120 }, { "epoch": 1.3225788993521053, "grad_norm": 0.10138837993144989, "learning_rate": 0.002, "loss": 2.3425, "step": 342130 }, { "epoch": 1.3226175565554885, "grad_norm": 0.1052599549293518, "learning_rate": 0.002, "loss": 2.3411, "step": 342140 }, { "epoch": 1.3226562137588718, "grad_norm": 0.116051085293293, "learning_rate": 0.002, "loss": 2.3531, "step": 342150 }, { "epoch": 1.322694870962255, "grad_norm": 0.11769842356443405, "learning_rate": 0.002, "loss": 2.3341, "step": 342160 }, { "epoch": 1.3227335281656383, "grad_norm": 0.1063615009188652, "learning_rate": 0.002, "loss": 2.3333, "step": 342170 }, { "epoch": 1.3227721853690215, "grad_norm": 0.10278849303722382, "learning_rate": 0.002, "loss": 2.3129, "step": 342180 }, { "epoch": 1.322810842572405, "grad_norm": 0.0904436707496643, "learning_rate": 0.002, "loss": 2.3302, "step": 342190 }, { "epoch": 1.3228494997757883, "grad_norm": 0.10121815651655197, "learning_rate": 0.002, "loss": 2.3305, "step": 342200 }, { "epoch": 1.3228881569791715, "grad_norm": 0.08775201439857483, "learning_rate": 0.002, "loss": 2.33, "step": 342210 }, { "epoch": 1.3229268141825548, "grad_norm": 0.10893165320158005, "learning_rate": 0.002, "loss": 2.3216, "step": 342220 }, { "epoch": 1.322965471385938, "grad_norm": 0.11072231829166412, "learning_rate": 0.002, "loss": 2.3322, "step": 342230 }, { "epoch": 1.3230041285893213, "grad_norm": 0.1255996972322464, "learning_rate": 0.002, "loss": 2.3337, "step": 342240 }, { "epoch": 1.3230427857927047, "grad_norm": 0.09480229765176773, "learning_rate": 0.002, "loss": 2.3301, "step": 342250 }, { "epoch": 1.323081442996088, "grad_norm": 0.10230188071727753, "learning_rate": 0.002, "loss": 2.3329, "step": 342260 }, { "epoch": 1.3231201001994712, "grad_norm": 0.11351063847541809, "learning_rate": 0.002, "loss": 2.3431, "step": 342270 }, { "epoch": 1.3231587574028545, "grad_norm": 0.10005253553390503, "learning_rate": 0.002, "loss": 2.3468, "step": 342280 }, { "epoch": 1.3231974146062377, "grad_norm": 0.1019071415066719, "learning_rate": 0.002, "loss": 2.3346, "step": 342290 }, { "epoch": 1.323236071809621, "grad_norm": 0.10154785215854645, "learning_rate": 0.002, "loss": 2.3226, "step": 342300 }, { "epoch": 1.3232747290130042, "grad_norm": 0.13221405446529388, "learning_rate": 0.002, "loss": 2.3354, "step": 342310 }, { "epoch": 1.3233133862163875, "grad_norm": 0.09709381312131882, "learning_rate": 0.002, "loss": 2.3322, "step": 342320 }, { "epoch": 1.3233520434197708, "grad_norm": 0.09548242390155792, "learning_rate": 0.002, "loss": 2.3186, "step": 342330 }, { "epoch": 1.323390700623154, "grad_norm": 0.10958176106214523, "learning_rate": 0.002, "loss": 2.3307, "step": 342340 }, { "epoch": 1.3234293578265375, "grad_norm": 0.1257767677307129, "learning_rate": 0.002, "loss": 2.3238, "step": 342350 }, { "epoch": 1.3234680150299207, "grad_norm": 0.10943162441253662, "learning_rate": 0.002, "loss": 2.3465, "step": 342360 }, { "epoch": 1.323506672233304, "grad_norm": 0.1096360981464386, "learning_rate": 0.002, "loss": 2.3543, "step": 342370 }, { "epoch": 1.3235453294366872, "grad_norm": 0.13019192218780518, "learning_rate": 0.002, "loss": 2.3379, "step": 342380 }, { "epoch": 1.3235839866400705, "grad_norm": 0.09725780040025711, "learning_rate": 0.002, "loss": 2.3343, "step": 342390 }, { "epoch": 1.3236226438434537, "grad_norm": 0.12376907467842102, "learning_rate": 0.002, "loss": 2.3207, "step": 342400 }, { "epoch": 1.323661301046837, "grad_norm": 0.11352759599685669, "learning_rate": 0.002, "loss": 2.336, "step": 342410 }, { "epoch": 1.3236999582502205, "grad_norm": 0.1032281219959259, "learning_rate": 0.002, "loss": 2.3256, "step": 342420 }, { "epoch": 1.3237386154536037, "grad_norm": 0.09839282929897308, "learning_rate": 0.002, "loss": 2.344, "step": 342430 }, { "epoch": 1.323777272656987, "grad_norm": 0.11424583196640015, "learning_rate": 0.002, "loss": 2.3266, "step": 342440 }, { "epoch": 1.3238159298603702, "grad_norm": 0.11875501275062561, "learning_rate": 0.002, "loss": 2.3459, "step": 342450 }, { "epoch": 1.3238545870637535, "grad_norm": 0.1253490298986435, "learning_rate": 0.002, "loss": 2.3322, "step": 342460 }, { "epoch": 1.3238932442671367, "grad_norm": 0.11857567727565765, "learning_rate": 0.002, "loss": 2.3332, "step": 342470 }, { "epoch": 1.32393190147052, "grad_norm": 0.09765864163637161, "learning_rate": 0.002, "loss": 2.3417, "step": 342480 }, { "epoch": 1.3239705586739032, "grad_norm": 0.09970621764659882, "learning_rate": 0.002, "loss": 2.3277, "step": 342490 }, { "epoch": 1.3240092158772865, "grad_norm": 0.10559707880020142, "learning_rate": 0.002, "loss": 2.3221, "step": 342500 }, { "epoch": 1.3240478730806697, "grad_norm": 0.10606026649475098, "learning_rate": 0.002, "loss": 2.3298, "step": 342510 }, { "epoch": 1.3240865302840532, "grad_norm": 0.0962778627872467, "learning_rate": 0.002, "loss": 2.3438, "step": 342520 }, { "epoch": 1.3241251874874365, "grad_norm": 0.12174908816814423, "learning_rate": 0.002, "loss": 2.3412, "step": 342530 }, { "epoch": 1.3241638446908197, "grad_norm": 0.09986695647239685, "learning_rate": 0.002, "loss": 2.3278, "step": 342540 }, { "epoch": 1.324202501894203, "grad_norm": 0.09701001644134521, "learning_rate": 0.002, "loss": 2.3222, "step": 342550 }, { "epoch": 1.3242411590975862, "grad_norm": 0.11607201397418976, "learning_rate": 0.002, "loss": 2.3421, "step": 342560 }, { "epoch": 1.3242798163009695, "grad_norm": 0.10730637609958649, "learning_rate": 0.002, "loss": 2.332, "step": 342570 }, { "epoch": 1.3243184735043527, "grad_norm": 0.11313966661691666, "learning_rate": 0.002, "loss": 2.3389, "step": 342580 }, { "epoch": 1.3243571307077362, "grad_norm": 0.1085088849067688, "learning_rate": 0.002, "loss": 2.3304, "step": 342590 }, { "epoch": 1.3243957879111194, "grad_norm": 0.10107274353504181, "learning_rate": 0.002, "loss": 2.3431, "step": 342600 }, { "epoch": 1.3244344451145027, "grad_norm": 0.09417583793401718, "learning_rate": 0.002, "loss": 2.3418, "step": 342610 }, { "epoch": 1.324473102317886, "grad_norm": 0.12345251441001892, "learning_rate": 0.002, "loss": 2.3259, "step": 342620 }, { "epoch": 1.3245117595212692, "grad_norm": 0.11787278205156326, "learning_rate": 0.002, "loss": 2.3191, "step": 342630 }, { "epoch": 1.3245504167246525, "grad_norm": 0.0970306396484375, "learning_rate": 0.002, "loss": 2.3365, "step": 342640 }, { "epoch": 1.3245890739280357, "grad_norm": 0.09616800397634506, "learning_rate": 0.002, "loss": 2.3316, "step": 342650 }, { "epoch": 1.324627731131419, "grad_norm": 0.10771799832582474, "learning_rate": 0.002, "loss": 2.3508, "step": 342660 }, { "epoch": 1.3246663883348022, "grad_norm": 0.1141650378704071, "learning_rate": 0.002, "loss": 2.3202, "step": 342670 }, { "epoch": 1.3247050455381855, "grad_norm": 0.1058678925037384, "learning_rate": 0.002, "loss": 2.3424, "step": 342680 }, { "epoch": 1.324743702741569, "grad_norm": 0.09846770018339157, "learning_rate": 0.002, "loss": 2.3394, "step": 342690 }, { "epoch": 1.3247823599449522, "grad_norm": 0.10203972458839417, "learning_rate": 0.002, "loss": 2.3419, "step": 342700 }, { "epoch": 1.3248210171483354, "grad_norm": 0.12421320378780365, "learning_rate": 0.002, "loss": 2.3424, "step": 342710 }, { "epoch": 1.3248596743517187, "grad_norm": 0.09551411122083664, "learning_rate": 0.002, "loss": 2.3306, "step": 342720 }, { "epoch": 1.324898331555102, "grad_norm": 0.10334688425064087, "learning_rate": 0.002, "loss": 2.3348, "step": 342730 }, { "epoch": 1.3249369887584852, "grad_norm": 0.0986914187669754, "learning_rate": 0.002, "loss": 2.3455, "step": 342740 }, { "epoch": 1.3249756459618685, "grad_norm": 0.0937899798154831, "learning_rate": 0.002, "loss": 2.3382, "step": 342750 }, { "epoch": 1.325014303165252, "grad_norm": 0.10014919936656952, "learning_rate": 0.002, "loss": 2.3448, "step": 342760 }, { "epoch": 1.3250529603686352, "grad_norm": 0.10135438293218613, "learning_rate": 0.002, "loss": 2.3188, "step": 342770 }, { "epoch": 1.3250916175720184, "grad_norm": 0.1364315301179886, "learning_rate": 0.002, "loss": 2.3531, "step": 342780 }, { "epoch": 1.3251302747754017, "grad_norm": 0.11440681666135788, "learning_rate": 0.002, "loss": 2.3389, "step": 342790 }, { "epoch": 1.325168931978785, "grad_norm": 0.09707643836736679, "learning_rate": 0.002, "loss": 2.3384, "step": 342800 }, { "epoch": 1.3252075891821682, "grad_norm": 0.0961620956659317, "learning_rate": 0.002, "loss": 2.3458, "step": 342810 }, { "epoch": 1.3252462463855514, "grad_norm": 0.10802191495895386, "learning_rate": 0.002, "loss": 2.3456, "step": 342820 }, { "epoch": 1.3252849035889347, "grad_norm": 0.1003083810210228, "learning_rate": 0.002, "loss": 2.3209, "step": 342830 }, { "epoch": 1.325323560792318, "grad_norm": 0.10295507311820984, "learning_rate": 0.002, "loss": 2.3327, "step": 342840 }, { "epoch": 1.3253622179957012, "grad_norm": 0.11288342624902725, "learning_rate": 0.002, "loss": 2.3404, "step": 342850 }, { "epoch": 1.3254008751990847, "grad_norm": 0.09611604362726212, "learning_rate": 0.002, "loss": 2.3303, "step": 342860 }, { "epoch": 1.325439532402468, "grad_norm": 0.09539765119552612, "learning_rate": 0.002, "loss": 2.3354, "step": 342870 }, { "epoch": 1.3254781896058512, "grad_norm": 0.15975433588027954, "learning_rate": 0.002, "loss": 2.3179, "step": 342880 }, { "epoch": 1.3255168468092344, "grad_norm": 0.11723661422729492, "learning_rate": 0.002, "loss": 2.3267, "step": 342890 }, { "epoch": 1.3255555040126177, "grad_norm": 0.09128891676664352, "learning_rate": 0.002, "loss": 2.3458, "step": 342900 }, { "epoch": 1.325594161216001, "grad_norm": 0.09708354622125626, "learning_rate": 0.002, "loss": 2.3424, "step": 342910 }, { "epoch": 1.3256328184193844, "grad_norm": 0.1222986951470375, "learning_rate": 0.002, "loss": 2.335, "step": 342920 }, { "epoch": 1.3256714756227677, "grad_norm": 0.1060582846403122, "learning_rate": 0.002, "loss": 2.3296, "step": 342930 }, { "epoch": 1.325710132826151, "grad_norm": 0.08390968292951584, "learning_rate": 0.002, "loss": 2.3601, "step": 342940 }, { "epoch": 1.3257487900295342, "grad_norm": 0.09373115003108978, "learning_rate": 0.002, "loss": 2.3201, "step": 342950 }, { "epoch": 1.3257874472329174, "grad_norm": 0.11386244744062424, "learning_rate": 0.002, "loss": 2.3439, "step": 342960 }, { "epoch": 1.3258261044363007, "grad_norm": 0.0926695317029953, "learning_rate": 0.002, "loss": 2.3513, "step": 342970 }, { "epoch": 1.325864761639684, "grad_norm": 0.09657523781061172, "learning_rate": 0.002, "loss": 2.3356, "step": 342980 }, { "epoch": 1.3259034188430672, "grad_norm": 0.1014002114534378, "learning_rate": 0.002, "loss": 2.3319, "step": 342990 }, { "epoch": 1.3259420760464504, "grad_norm": 0.12703995406627655, "learning_rate": 0.002, "loss": 2.3393, "step": 343000 }, { "epoch": 1.3259807332498337, "grad_norm": 0.29733362793922424, "learning_rate": 0.002, "loss": 2.3258, "step": 343010 }, { "epoch": 1.326019390453217, "grad_norm": 0.11396072059869766, "learning_rate": 0.002, "loss": 2.3293, "step": 343020 }, { "epoch": 1.3260580476566004, "grad_norm": 0.1067725121974945, "learning_rate": 0.002, "loss": 2.3538, "step": 343030 }, { "epoch": 1.3260967048599837, "grad_norm": 0.09846443682909012, "learning_rate": 0.002, "loss": 2.3306, "step": 343040 }, { "epoch": 1.326135362063367, "grad_norm": 0.1201457604765892, "learning_rate": 0.002, "loss": 2.3382, "step": 343050 }, { "epoch": 1.3261740192667502, "grad_norm": 0.09486748278141022, "learning_rate": 0.002, "loss": 2.354, "step": 343060 }, { "epoch": 1.3262126764701334, "grad_norm": 0.09636746346950531, "learning_rate": 0.002, "loss": 2.3526, "step": 343070 }, { "epoch": 1.3262513336735167, "grad_norm": 0.10028290003538132, "learning_rate": 0.002, "loss": 2.3261, "step": 343080 }, { "epoch": 1.3262899908769001, "grad_norm": 0.11330897361040115, "learning_rate": 0.002, "loss": 2.3416, "step": 343090 }, { "epoch": 1.3263286480802834, "grad_norm": 0.13019509613513947, "learning_rate": 0.002, "loss": 2.3477, "step": 343100 }, { "epoch": 1.3263673052836666, "grad_norm": 0.10553409159183502, "learning_rate": 0.002, "loss": 2.333, "step": 343110 }, { "epoch": 1.32640596248705, "grad_norm": 0.09633781015872955, "learning_rate": 0.002, "loss": 2.3389, "step": 343120 }, { "epoch": 1.3264446196904331, "grad_norm": 0.08768466860055923, "learning_rate": 0.002, "loss": 2.3281, "step": 343130 }, { "epoch": 1.3264832768938164, "grad_norm": 0.10579250752925873, "learning_rate": 0.002, "loss": 2.3475, "step": 343140 }, { "epoch": 1.3265219340971997, "grad_norm": 0.09313761442899704, "learning_rate": 0.002, "loss": 2.3318, "step": 343150 }, { "epoch": 1.326560591300583, "grad_norm": 0.10495753586292267, "learning_rate": 0.002, "loss": 2.3324, "step": 343160 }, { "epoch": 1.3265992485039662, "grad_norm": 0.11235906183719635, "learning_rate": 0.002, "loss": 2.3412, "step": 343170 }, { "epoch": 1.3266379057073494, "grad_norm": 0.09539853781461716, "learning_rate": 0.002, "loss": 2.3322, "step": 343180 }, { "epoch": 1.3266765629107327, "grad_norm": 0.09865597635507584, "learning_rate": 0.002, "loss": 2.3313, "step": 343190 }, { "epoch": 1.3267152201141161, "grad_norm": 0.09811379760503769, "learning_rate": 0.002, "loss": 2.3388, "step": 343200 }, { "epoch": 1.3267538773174994, "grad_norm": 0.09318213909864426, "learning_rate": 0.002, "loss": 2.3322, "step": 343210 }, { "epoch": 1.3267925345208826, "grad_norm": 0.11621811240911484, "learning_rate": 0.002, "loss": 2.3344, "step": 343220 }, { "epoch": 1.326831191724266, "grad_norm": 0.11613493412733078, "learning_rate": 0.002, "loss": 2.3242, "step": 343230 }, { "epoch": 1.3268698489276491, "grad_norm": 0.12907305359840393, "learning_rate": 0.002, "loss": 2.3186, "step": 343240 }, { "epoch": 1.3269085061310324, "grad_norm": 0.1041342243552208, "learning_rate": 0.002, "loss": 2.3476, "step": 343250 }, { "epoch": 1.3269471633344159, "grad_norm": 0.09700960665941238, "learning_rate": 0.002, "loss": 2.3237, "step": 343260 }, { "epoch": 1.3269858205377991, "grad_norm": 0.11570774763822556, "learning_rate": 0.002, "loss": 2.3234, "step": 343270 }, { "epoch": 1.3270244777411824, "grad_norm": 0.08997366577386856, "learning_rate": 0.002, "loss": 2.3402, "step": 343280 }, { "epoch": 1.3270631349445656, "grad_norm": 0.11459000408649445, "learning_rate": 0.002, "loss": 2.347, "step": 343290 }, { "epoch": 1.3271017921479489, "grad_norm": 0.09992536157369614, "learning_rate": 0.002, "loss": 2.3281, "step": 343300 }, { "epoch": 1.3271404493513321, "grad_norm": 0.10141663253307343, "learning_rate": 0.002, "loss": 2.3325, "step": 343310 }, { "epoch": 1.3271791065547154, "grad_norm": 0.1016429141163826, "learning_rate": 0.002, "loss": 2.3359, "step": 343320 }, { "epoch": 1.3272177637580986, "grad_norm": 0.1101653203368187, "learning_rate": 0.002, "loss": 2.3449, "step": 343330 }, { "epoch": 1.3272564209614819, "grad_norm": 0.10410647839307785, "learning_rate": 0.002, "loss": 2.3266, "step": 343340 }, { "epoch": 1.3272950781648651, "grad_norm": 0.1013018786907196, "learning_rate": 0.002, "loss": 2.3175, "step": 343350 }, { "epoch": 1.3273337353682484, "grad_norm": 0.12418507039546967, "learning_rate": 0.002, "loss": 2.346, "step": 343360 }, { "epoch": 1.3273723925716319, "grad_norm": 0.1398543268442154, "learning_rate": 0.002, "loss": 2.3433, "step": 343370 }, { "epoch": 1.3274110497750151, "grad_norm": 0.10456975549459457, "learning_rate": 0.002, "loss": 2.3275, "step": 343380 }, { "epoch": 1.3274497069783984, "grad_norm": 0.09469009190797806, "learning_rate": 0.002, "loss": 2.3469, "step": 343390 }, { "epoch": 1.3274883641817816, "grad_norm": 0.10595841705799103, "learning_rate": 0.002, "loss": 2.3136, "step": 343400 }, { "epoch": 1.3275270213851649, "grad_norm": 0.10216021537780762, "learning_rate": 0.002, "loss": 2.3377, "step": 343410 }, { "epoch": 1.3275656785885481, "grad_norm": 0.11457862704992294, "learning_rate": 0.002, "loss": 2.33, "step": 343420 }, { "epoch": 1.3276043357919316, "grad_norm": 0.1330697238445282, "learning_rate": 0.002, "loss": 2.3332, "step": 343430 }, { "epoch": 1.3276429929953149, "grad_norm": 0.09366831928491592, "learning_rate": 0.002, "loss": 2.3397, "step": 343440 }, { "epoch": 1.327681650198698, "grad_norm": 0.11288196593523026, "learning_rate": 0.002, "loss": 2.3399, "step": 343450 }, { "epoch": 1.3277203074020814, "grad_norm": 0.10931092500686646, "learning_rate": 0.002, "loss": 2.3252, "step": 343460 }, { "epoch": 1.3277589646054646, "grad_norm": 0.11085681617259979, "learning_rate": 0.002, "loss": 2.3356, "step": 343470 }, { "epoch": 1.3277976218088479, "grad_norm": 0.11561580002307892, "learning_rate": 0.002, "loss": 2.3435, "step": 343480 }, { "epoch": 1.3278362790122311, "grad_norm": 0.09727177768945694, "learning_rate": 0.002, "loss": 2.3326, "step": 343490 }, { "epoch": 1.3278749362156144, "grad_norm": 0.09696878492832184, "learning_rate": 0.002, "loss": 2.3573, "step": 343500 }, { "epoch": 1.3279135934189976, "grad_norm": 0.10486599802970886, "learning_rate": 0.002, "loss": 2.3318, "step": 343510 }, { "epoch": 1.3279522506223809, "grad_norm": 0.10580369830131531, "learning_rate": 0.002, "loss": 2.3507, "step": 343520 }, { "epoch": 1.3279909078257641, "grad_norm": 0.0963820368051529, "learning_rate": 0.002, "loss": 2.3268, "step": 343530 }, { "epoch": 1.3280295650291476, "grad_norm": 0.10694960504770279, "learning_rate": 0.002, "loss": 2.3294, "step": 343540 }, { "epoch": 1.3280682222325308, "grad_norm": 0.11083703488111496, "learning_rate": 0.002, "loss": 2.3352, "step": 343550 }, { "epoch": 1.328106879435914, "grad_norm": 0.11412572115659714, "learning_rate": 0.002, "loss": 2.3322, "step": 343560 }, { "epoch": 1.3281455366392974, "grad_norm": 0.09420335292816162, "learning_rate": 0.002, "loss": 2.3395, "step": 343570 }, { "epoch": 1.3281841938426806, "grad_norm": 0.1165669858455658, "learning_rate": 0.002, "loss": 2.3393, "step": 343580 }, { "epoch": 1.3282228510460639, "grad_norm": 0.09835314005613327, "learning_rate": 0.002, "loss": 2.337, "step": 343590 }, { "epoch": 1.3282615082494473, "grad_norm": 0.11818283051252365, "learning_rate": 0.002, "loss": 2.3371, "step": 343600 }, { "epoch": 1.3283001654528306, "grad_norm": 0.10762883722782135, "learning_rate": 0.002, "loss": 2.3279, "step": 343610 }, { "epoch": 1.3283388226562138, "grad_norm": 0.10828974097967148, "learning_rate": 0.002, "loss": 2.3348, "step": 343620 }, { "epoch": 1.328377479859597, "grad_norm": 0.1360919326543808, "learning_rate": 0.002, "loss": 2.3412, "step": 343630 }, { "epoch": 1.3284161370629803, "grad_norm": 0.10663457214832306, "learning_rate": 0.002, "loss": 2.3261, "step": 343640 }, { "epoch": 1.3284547942663636, "grad_norm": 0.17864476144313812, "learning_rate": 0.002, "loss": 2.3291, "step": 343650 }, { "epoch": 1.3284934514697468, "grad_norm": 0.0979250967502594, "learning_rate": 0.002, "loss": 2.3281, "step": 343660 }, { "epoch": 1.32853210867313, "grad_norm": 0.09882812201976776, "learning_rate": 0.002, "loss": 2.3383, "step": 343670 }, { "epoch": 1.3285707658765133, "grad_norm": 0.09272123873233795, "learning_rate": 0.002, "loss": 2.3187, "step": 343680 }, { "epoch": 1.3286094230798966, "grad_norm": 0.10529595613479614, "learning_rate": 0.002, "loss": 2.3163, "step": 343690 }, { "epoch": 1.3286480802832799, "grad_norm": 0.10470674186944962, "learning_rate": 0.002, "loss": 2.3447, "step": 343700 }, { "epoch": 1.3286867374866633, "grad_norm": 0.11703024804592133, "learning_rate": 0.002, "loss": 2.3271, "step": 343710 }, { "epoch": 1.3287253946900466, "grad_norm": 0.100809745490551, "learning_rate": 0.002, "loss": 2.327, "step": 343720 }, { "epoch": 1.3287640518934298, "grad_norm": 0.1015714555978775, "learning_rate": 0.002, "loss": 2.3311, "step": 343730 }, { "epoch": 1.328802709096813, "grad_norm": 0.10203837603330612, "learning_rate": 0.002, "loss": 2.3313, "step": 343740 }, { "epoch": 1.3288413663001963, "grad_norm": 0.11030900478363037, "learning_rate": 0.002, "loss": 2.3258, "step": 343750 }, { "epoch": 1.3288800235035796, "grad_norm": 0.10859356820583344, "learning_rate": 0.002, "loss": 2.325, "step": 343760 }, { "epoch": 1.328918680706963, "grad_norm": 0.10833962261676788, "learning_rate": 0.002, "loss": 2.3347, "step": 343770 }, { "epoch": 1.3289573379103463, "grad_norm": 0.10143111646175385, "learning_rate": 0.002, "loss": 2.3255, "step": 343780 }, { "epoch": 1.3289959951137296, "grad_norm": 0.1062939316034317, "learning_rate": 0.002, "loss": 2.3308, "step": 343790 }, { "epoch": 1.3290346523171128, "grad_norm": 0.16646593809127808, "learning_rate": 0.002, "loss": 2.3421, "step": 343800 }, { "epoch": 1.329073309520496, "grad_norm": 0.10195731371641159, "learning_rate": 0.002, "loss": 2.3467, "step": 343810 }, { "epoch": 1.3291119667238793, "grad_norm": 0.10865110158920288, "learning_rate": 0.002, "loss": 2.3364, "step": 343820 }, { "epoch": 1.3291506239272626, "grad_norm": 0.10576749593019485, "learning_rate": 0.002, "loss": 2.3433, "step": 343830 }, { "epoch": 1.3291892811306458, "grad_norm": 0.09654872119426727, "learning_rate": 0.002, "loss": 2.3427, "step": 343840 }, { "epoch": 1.329227938334029, "grad_norm": 0.1124534159898758, "learning_rate": 0.002, "loss": 2.3337, "step": 343850 }, { "epoch": 1.3292665955374123, "grad_norm": 0.12102707475423813, "learning_rate": 0.002, "loss": 2.3439, "step": 343860 }, { "epoch": 1.3293052527407956, "grad_norm": 0.12327728420495987, "learning_rate": 0.002, "loss": 2.3374, "step": 343870 }, { "epoch": 1.329343909944179, "grad_norm": 0.086943618953228, "learning_rate": 0.002, "loss": 2.3277, "step": 343880 }, { "epoch": 1.3293825671475623, "grad_norm": 0.09946412593126297, "learning_rate": 0.002, "loss": 2.333, "step": 343890 }, { "epoch": 1.3294212243509456, "grad_norm": 0.12250570207834244, "learning_rate": 0.002, "loss": 2.3339, "step": 343900 }, { "epoch": 1.3294598815543288, "grad_norm": 0.1032809242606163, "learning_rate": 0.002, "loss": 2.3372, "step": 343910 }, { "epoch": 1.329498538757712, "grad_norm": 0.12389107048511505, "learning_rate": 0.002, "loss": 2.3268, "step": 343920 }, { "epoch": 1.3295371959610953, "grad_norm": 0.09774418920278549, "learning_rate": 0.002, "loss": 2.3387, "step": 343930 }, { "epoch": 1.3295758531644788, "grad_norm": 0.0978965312242508, "learning_rate": 0.002, "loss": 2.3335, "step": 343940 }, { "epoch": 1.329614510367862, "grad_norm": 0.09504812955856323, "learning_rate": 0.002, "loss": 2.3338, "step": 343950 }, { "epoch": 1.3296531675712453, "grad_norm": 0.10925097018480301, "learning_rate": 0.002, "loss": 2.3389, "step": 343960 }, { "epoch": 1.3296918247746286, "grad_norm": 0.11287527531385422, "learning_rate": 0.002, "loss": 2.3175, "step": 343970 }, { "epoch": 1.3297304819780118, "grad_norm": 0.09954530745744705, "learning_rate": 0.002, "loss": 2.3325, "step": 343980 }, { "epoch": 1.329769139181395, "grad_norm": 0.09281942993402481, "learning_rate": 0.002, "loss": 2.3409, "step": 343990 }, { "epoch": 1.3298077963847783, "grad_norm": 0.09409678727388382, "learning_rate": 0.002, "loss": 2.3368, "step": 344000 }, { "epoch": 1.3298464535881616, "grad_norm": 0.11386945843696594, "learning_rate": 0.002, "loss": 2.3251, "step": 344010 }, { "epoch": 1.3298851107915448, "grad_norm": 0.09691344201564789, "learning_rate": 0.002, "loss": 2.3524, "step": 344020 }, { "epoch": 1.329923767994928, "grad_norm": 0.11871121823787689, "learning_rate": 0.002, "loss": 2.3319, "step": 344030 }, { "epoch": 1.3299624251983113, "grad_norm": 0.10736346244812012, "learning_rate": 0.002, "loss": 2.3376, "step": 344040 }, { "epoch": 1.3300010824016948, "grad_norm": 0.11998474597930908, "learning_rate": 0.002, "loss": 2.3278, "step": 344050 }, { "epoch": 1.330039739605078, "grad_norm": 0.11618227511644363, "learning_rate": 0.002, "loss": 2.3355, "step": 344060 }, { "epoch": 1.3300783968084613, "grad_norm": 0.11615073680877686, "learning_rate": 0.002, "loss": 2.3383, "step": 344070 }, { "epoch": 1.3301170540118445, "grad_norm": 0.09982374310493469, "learning_rate": 0.002, "loss": 2.3314, "step": 344080 }, { "epoch": 1.3301557112152278, "grad_norm": 0.10129628330469131, "learning_rate": 0.002, "loss": 2.3413, "step": 344090 }, { "epoch": 1.330194368418611, "grad_norm": 0.10706644505262375, "learning_rate": 0.002, "loss": 2.3233, "step": 344100 }, { "epoch": 1.3302330256219945, "grad_norm": 0.11260034143924713, "learning_rate": 0.002, "loss": 2.3466, "step": 344110 }, { "epoch": 1.3302716828253778, "grad_norm": 0.11245223879814148, "learning_rate": 0.002, "loss": 2.3298, "step": 344120 }, { "epoch": 1.330310340028761, "grad_norm": 0.1035739928483963, "learning_rate": 0.002, "loss": 2.3257, "step": 344130 }, { "epoch": 1.3303489972321443, "grad_norm": 0.13902145624160767, "learning_rate": 0.002, "loss": 2.3465, "step": 344140 }, { "epoch": 1.3303876544355275, "grad_norm": 0.11603587120771408, "learning_rate": 0.002, "loss": 2.3384, "step": 344150 }, { "epoch": 1.3304263116389108, "grad_norm": 0.09771740436553955, "learning_rate": 0.002, "loss": 2.3336, "step": 344160 }, { "epoch": 1.330464968842294, "grad_norm": 0.0917857438325882, "learning_rate": 0.002, "loss": 2.3489, "step": 344170 }, { "epoch": 1.3305036260456773, "grad_norm": 0.10400768369436264, "learning_rate": 0.002, "loss": 2.3304, "step": 344180 }, { "epoch": 1.3305422832490605, "grad_norm": 0.0979352667927742, "learning_rate": 0.002, "loss": 2.3251, "step": 344190 }, { "epoch": 1.3305809404524438, "grad_norm": 0.10228806734085083, "learning_rate": 0.002, "loss": 2.3338, "step": 344200 }, { "epoch": 1.3306195976558273, "grad_norm": 0.09991668164730072, "learning_rate": 0.002, "loss": 2.3332, "step": 344210 }, { "epoch": 1.3306582548592105, "grad_norm": 0.11020371317863464, "learning_rate": 0.002, "loss": 2.3529, "step": 344220 }, { "epoch": 1.3306969120625938, "grad_norm": 0.0935186892747879, "learning_rate": 0.002, "loss": 2.3225, "step": 344230 }, { "epoch": 1.330735569265977, "grad_norm": 0.09985950589179993, "learning_rate": 0.002, "loss": 2.3148, "step": 344240 }, { "epoch": 1.3307742264693603, "grad_norm": 0.10477671027183533, "learning_rate": 0.002, "loss": 2.316, "step": 344250 }, { "epoch": 1.3308128836727435, "grad_norm": 0.10817205160856247, "learning_rate": 0.002, "loss": 2.3326, "step": 344260 }, { "epoch": 1.3308515408761268, "grad_norm": 0.10950993001461029, "learning_rate": 0.002, "loss": 2.312, "step": 344270 }, { "epoch": 1.3308901980795103, "grad_norm": 0.1234552189707756, "learning_rate": 0.002, "loss": 2.3261, "step": 344280 }, { "epoch": 1.3309288552828935, "grad_norm": 0.09903118014335632, "learning_rate": 0.002, "loss": 2.3289, "step": 344290 }, { "epoch": 1.3309675124862768, "grad_norm": 0.11547055095434189, "learning_rate": 0.002, "loss": 2.3294, "step": 344300 }, { "epoch": 1.33100616968966, "grad_norm": 0.10943587124347687, "learning_rate": 0.002, "loss": 2.342, "step": 344310 }, { "epoch": 1.3310448268930433, "grad_norm": 0.1084182858467102, "learning_rate": 0.002, "loss": 2.3375, "step": 344320 }, { "epoch": 1.3310834840964265, "grad_norm": 0.09529057145118713, "learning_rate": 0.002, "loss": 2.3324, "step": 344330 }, { "epoch": 1.3311221412998098, "grad_norm": 0.1016959473490715, "learning_rate": 0.002, "loss": 2.3469, "step": 344340 }, { "epoch": 1.331160798503193, "grad_norm": 0.11874452978372574, "learning_rate": 0.002, "loss": 2.3274, "step": 344350 }, { "epoch": 1.3311994557065763, "grad_norm": 0.10183338075876236, "learning_rate": 0.002, "loss": 2.3431, "step": 344360 }, { "epoch": 1.3312381129099595, "grad_norm": 0.11162742972373962, "learning_rate": 0.002, "loss": 2.3416, "step": 344370 }, { "epoch": 1.331276770113343, "grad_norm": 0.10805539786815643, "learning_rate": 0.002, "loss": 2.3401, "step": 344380 }, { "epoch": 1.3313154273167263, "grad_norm": 0.11087726801633835, "learning_rate": 0.002, "loss": 2.3266, "step": 344390 }, { "epoch": 1.3313540845201095, "grad_norm": 0.10322079062461853, "learning_rate": 0.002, "loss": 2.3282, "step": 344400 }, { "epoch": 1.3313927417234928, "grad_norm": 0.10526745766401291, "learning_rate": 0.002, "loss": 2.3263, "step": 344410 }, { "epoch": 1.331431398926876, "grad_norm": 0.10890132188796997, "learning_rate": 0.002, "loss": 2.3276, "step": 344420 }, { "epoch": 1.3314700561302593, "grad_norm": 0.10265296697616577, "learning_rate": 0.002, "loss": 2.3278, "step": 344430 }, { "epoch": 1.3315087133336425, "grad_norm": 0.09728667885065079, "learning_rate": 0.002, "loss": 2.3113, "step": 344440 }, { "epoch": 1.331547370537026, "grad_norm": 0.1127006933093071, "learning_rate": 0.002, "loss": 2.3138, "step": 344450 }, { "epoch": 1.3315860277404092, "grad_norm": 0.12087395042181015, "learning_rate": 0.002, "loss": 2.3133, "step": 344460 }, { "epoch": 1.3316246849437925, "grad_norm": 0.12616465985774994, "learning_rate": 0.002, "loss": 2.3297, "step": 344470 }, { "epoch": 1.3316633421471757, "grad_norm": 0.1002395749092102, "learning_rate": 0.002, "loss": 2.3386, "step": 344480 }, { "epoch": 1.331701999350559, "grad_norm": 0.10422340780496597, "learning_rate": 0.002, "loss": 2.3298, "step": 344490 }, { "epoch": 1.3317406565539422, "grad_norm": 0.09601067751646042, "learning_rate": 0.002, "loss": 2.3387, "step": 344500 }, { "epoch": 1.3317793137573255, "grad_norm": 0.11861871182918549, "learning_rate": 0.002, "loss": 2.3345, "step": 344510 }, { "epoch": 1.3318179709607088, "grad_norm": 0.09754236042499542, "learning_rate": 0.002, "loss": 2.332, "step": 344520 }, { "epoch": 1.331856628164092, "grad_norm": 0.10452759265899658, "learning_rate": 0.002, "loss": 2.345, "step": 344530 }, { "epoch": 1.3318952853674753, "grad_norm": 0.10563155263662338, "learning_rate": 0.002, "loss": 2.3379, "step": 344540 }, { "epoch": 1.3319339425708587, "grad_norm": 0.13401366770267487, "learning_rate": 0.002, "loss": 2.3371, "step": 344550 }, { "epoch": 1.331972599774242, "grad_norm": 0.10596875101327896, "learning_rate": 0.002, "loss": 2.3282, "step": 344560 }, { "epoch": 1.3320112569776252, "grad_norm": 0.11349476128816605, "learning_rate": 0.002, "loss": 2.3211, "step": 344570 }, { "epoch": 1.3320499141810085, "grad_norm": 0.1171863004565239, "learning_rate": 0.002, "loss": 2.3299, "step": 344580 }, { "epoch": 1.3320885713843917, "grad_norm": 0.09667433053255081, "learning_rate": 0.002, "loss": 2.3469, "step": 344590 }, { "epoch": 1.332127228587775, "grad_norm": 0.10653450340032578, "learning_rate": 0.002, "loss": 2.3274, "step": 344600 }, { "epoch": 1.3321658857911582, "grad_norm": 0.10144241154193878, "learning_rate": 0.002, "loss": 2.3208, "step": 344610 }, { "epoch": 1.3322045429945417, "grad_norm": 0.11736975610256195, "learning_rate": 0.002, "loss": 2.3398, "step": 344620 }, { "epoch": 1.332243200197925, "grad_norm": 0.09723929315805435, "learning_rate": 0.002, "loss": 2.3407, "step": 344630 }, { "epoch": 1.3322818574013082, "grad_norm": 0.11661922931671143, "learning_rate": 0.002, "loss": 2.3285, "step": 344640 }, { "epoch": 1.3323205146046915, "grad_norm": 0.10617439448833466, "learning_rate": 0.002, "loss": 2.3331, "step": 344650 }, { "epoch": 1.3323591718080747, "grad_norm": 0.11689261347055435, "learning_rate": 0.002, "loss": 2.3289, "step": 344660 }, { "epoch": 1.332397829011458, "grad_norm": 0.10449826717376709, "learning_rate": 0.002, "loss": 2.337, "step": 344670 }, { "epoch": 1.3324364862148412, "grad_norm": 0.09332484751939774, "learning_rate": 0.002, "loss": 2.3423, "step": 344680 }, { "epoch": 1.3324751434182245, "grad_norm": 0.11841703951358795, "learning_rate": 0.002, "loss": 2.3253, "step": 344690 }, { "epoch": 1.3325138006216077, "grad_norm": 0.09929771721363068, "learning_rate": 0.002, "loss": 2.347, "step": 344700 }, { "epoch": 1.332552457824991, "grad_norm": 0.1107405349612236, "learning_rate": 0.002, "loss": 2.3314, "step": 344710 }, { "epoch": 1.3325911150283745, "grad_norm": 0.11031211912631989, "learning_rate": 0.002, "loss": 2.3433, "step": 344720 }, { "epoch": 1.3326297722317577, "grad_norm": 0.1112285703420639, "learning_rate": 0.002, "loss": 2.3343, "step": 344730 }, { "epoch": 1.332668429435141, "grad_norm": 0.09748245030641556, "learning_rate": 0.002, "loss": 2.3495, "step": 344740 }, { "epoch": 1.3327070866385242, "grad_norm": 0.11218224465847015, "learning_rate": 0.002, "loss": 2.3304, "step": 344750 }, { "epoch": 1.3327457438419075, "grad_norm": 0.12697817385196686, "learning_rate": 0.002, "loss": 2.3345, "step": 344760 }, { "epoch": 1.3327844010452907, "grad_norm": 0.10008467733860016, "learning_rate": 0.002, "loss": 2.3279, "step": 344770 }, { "epoch": 1.332823058248674, "grad_norm": 0.09765961021184921, "learning_rate": 0.002, "loss": 2.3432, "step": 344780 }, { "epoch": 1.3328617154520574, "grad_norm": 0.09477970749139786, "learning_rate": 0.002, "loss": 2.3287, "step": 344790 }, { "epoch": 1.3329003726554407, "grad_norm": 0.09909243881702423, "learning_rate": 0.002, "loss": 2.3486, "step": 344800 }, { "epoch": 1.332939029858824, "grad_norm": 0.1287049949169159, "learning_rate": 0.002, "loss": 2.3171, "step": 344810 }, { "epoch": 1.3329776870622072, "grad_norm": 0.09282911568880081, "learning_rate": 0.002, "loss": 2.339, "step": 344820 }, { "epoch": 1.3330163442655905, "grad_norm": 0.1277105212211609, "learning_rate": 0.002, "loss": 2.3204, "step": 344830 }, { "epoch": 1.3330550014689737, "grad_norm": 0.1023770347237587, "learning_rate": 0.002, "loss": 2.3288, "step": 344840 }, { "epoch": 1.333093658672357, "grad_norm": 0.10521621257066727, "learning_rate": 0.002, "loss": 2.3453, "step": 344850 }, { "epoch": 1.3331323158757402, "grad_norm": 0.11044228821992874, "learning_rate": 0.002, "loss": 2.3392, "step": 344860 }, { "epoch": 1.3331709730791235, "grad_norm": 0.11694236099720001, "learning_rate": 0.002, "loss": 2.3412, "step": 344870 }, { "epoch": 1.3332096302825067, "grad_norm": 0.1320260465145111, "learning_rate": 0.002, "loss": 2.3501, "step": 344880 }, { "epoch": 1.3332482874858902, "grad_norm": 0.12980304658412933, "learning_rate": 0.002, "loss": 2.3279, "step": 344890 }, { "epoch": 1.3332869446892734, "grad_norm": 0.11151394248008728, "learning_rate": 0.002, "loss": 2.3408, "step": 344900 }, { "epoch": 1.3333256018926567, "grad_norm": 0.09492821246385574, "learning_rate": 0.002, "loss": 2.3198, "step": 344910 }, { "epoch": 1.33336425909604, "grad_norm": 0.11971234530210495, "learning_rate": 0.002, "loss": 2.3246, "step": 344920 }, { "epoch": 1.3334029162994232, "grad_norm": 0.10840121656656265, "learning_rate": 0.002, "loss": 2.3369, "step": 344930 }, { "epoch": 1.3334415735028065, "grad_norm": 0.10081777721643448, "learning_rate": 0.002, "loss": 2.3497, "step": 344940 }, { "epoch": 1.33348023070619, "grad_norm": 0.09995166212320328, "learning_rate": 0.002, "loss": 2.337, "step": 344950 }, { "epoch": 1.3335188879095732, "grad_norm": 0.10326774418354034, "learning_rate": 0.002, "loss": 2.3303, "step": 344960 }, { "epoch": 1.3335575451129564, "grad_norm": 0.1096605733036995, "learning_rate": 0.002, "loss": 2.3168, "step": 344970 }, { "epoch": 1.3335962023163397, "grad_norm": 0.08859793841838837, "learning_rate": 0.002, "loss": 2.3169, "step": 344980 }, { "epoch": 1.333634859519723, "grad_norm": 0.12324798107147217, "learning_rate": 0.002, "loss": 2.3396, "step": 344990 }, { "epoch": 1.3336735167231062, "grad_norm": 0.09335483610630035, "learning_rate": 0.002, "loss": 2.3445, "step": 345000 }, { "epoch": 1.3337121739264894, "grad_norm": 0.09280901402235031, "learning_rate": 0.002, "loss": 2.3107, "step": 345010 }, { "epoch": 1.3337508311298727, "grad_norm": 0.10214249044656754, "learning_rate": 0.002, "loss": 2.3311, "step": 345020 }, { "epoch": 1.333789488333256, "grad_norm": 0.09952248632907867, "learning_rate": 0.002, "loss": 2.3303, "step": 345030 }, { "epoch": 1.3338281455366392, "grad_norm": 0.10830455273389816, "learning_rate": 0.002, "loss": 2.3425, "step": 345040 }, { "epoch": 1.3338668027400225, "grad_norm": 0.10797925293445587, "learning_rate": 0.002, "loss": 2.3233, "step": 345050 }, { "epoch": 1.333905459943406, "grad_norm": 0.1023314893245697, "learning_rate": 0.002, "loss": 2.3384, "step": 345060 }, { "epoch": 1.3339441171467892, "grad_norm": 0.11088632792234421, "learning_rate": 0.002, "loss": 2.3367, "step": 345070 }, { "epoch": 1.3339827743501724, "grad_norm": 0.10849376022815704, "learning_rate": 0.002, "loss": 2.334, "step": 345080 }, { "epoch": 1.3340214315535557, "grad_norm": 0.10615264624357224, "learning_rate": 0.002, "loss": 2.3348, "step": 345090 }, { "epoch": 1.334060088756939, "grad_norm": 0.11973266303539276, "learning_rate": 0.002, "loss": 2.3458, "step": 345100 }, { "epoch": 1.3340987459603222, "grad_norm": 0.11139839887619019, "learning_rate": 0.002, "loss": 2.3288, "step": 345110 }, { "epoch": 1.3341374031637057, "grad_norm": 0.1239207535982132, "learning_rate": 0.002, "loss": 2.3585, "step": 345120 }, { "epoch": 1.334176060367089, "grad_norm": 0.09981207549571991, "learning_rate": 0.002, "loss": 2.3178, "step": 345130 }, { "epoch": 1.3342147175704722, "grad_norm": 0.1028282567858696, "learning_rate": 0.002, "loss": 2.3297, "step": 345140 }, { "epoch": 1.3342533747738554, "grad_norm": 0.11014251410961151, "learning_rate": 0.002, "loss": 2.3367, "step": 345150 }, { "epoch": 1.3342920319772387, "grad_norm": 0.14106076955795288, "learning_rate": 0.002, "loss": 2.3328, "step": 345160 }, { "epoch": 1.334330689180622, "grad_norm": 0.09924093633890152, "learning_rate": 0.002, "loss": 2.3265, "step": 345170 }, { "epoch": 1.3343693463840052, "grad_norm": 0.1121000200510025, "learning_rate": 0.002, "loss": 2.3211, "step": 345180 }, { "epoch": 1.3344080035873884, "grad_norm": 0.10735615342855453, "learning_rate": 0.002, "loss": 2.3275, "step": 345190 }, { "epoch": 1.3344466607907717, "grad_norm": 0.09548373520374298, "learning_rate": 0.002, "loss": 2.3274, "step": 345200 }, { "epoch": 1.334485317994155, "grad_norm": 0.10804015398025513, "learning_rate": 0.002, "loss": 2.3259, "step": 345210 }, { "epoch": 1.3345239751975382, "grad_norm": 0.11735550314188004, "learning_rate": 0.002, "loss": 2.3375, "step": 345220 }, { "epoch": 1.3345626324009217, "grad_norm": 0.10910956561565399, "learning_rate": 0.002, "loss": 2.3244, "step": 345230 }, { "epoch": 1.334601289604305, "grad_norm": 0.11945263296365738, "learning_rate": 0.002, "loss": 2.3416, "step": 345240 }, { "epoch": 1.3346399468076882, "grad_norm": 0.09623727947473526, "learning_rate": 0.002, "loss": 2.3269, "step": 345250 }, { "epoch": 1.3346786040110714, "grad_norm": 0.11667291074991226, "learning_rate": 0.002, "loss": 2.3342, "step": 345260 }, { "epoch": 1.3347172612144547, "grad_norm": 0.12302277237176895, "learning_rate": 0.002, "loss": 2.3355, "step": 345270 }, { "epoch": 1.334755918417838, "grad_norm": 0.10679807513952255, "learning_rate": 0.002, "loss": 2.3168, "step": 345280 }, { "epoch": 1.3347945756212214, "grad_norm": 0.09852226823568344, "learning_rate": 0.002, "loss": 2.332, "step": 345290 }, { "epoch": 1.3348332328246046, "grad_norm": 0.10933008790016174, "learning_rate": 0.002, "loss": 2.328, "step": 345300 }, { "epoch": 1.334871890027988, "grad_norm": 0.09499634057283401, "learning_rate": 0.002, "loss": 2.3354, "step": 345310 }, { "epoch": 1.3349105472313711, "grad_norm": 0.11454311013221741, "learning_rate": 0.002, "loss": 2.3338, "step": 345320 }, { "epoch": 1.3349492044347544, "grad_norm": 0.10472351312637329, "learning_rate": 0.002, "loss": 2.332, "step": 345330 }, { "epoch": 1.3349878616381377, "grad_norm": 0.10407882183790207, "learning_rate": 0.002, "loss": 2.3304, "step": 345340 }, { "epoch": 1.335026518841521, "grad_norm": 0.10216553509235382, "learning_rate": 0.002, "loss": 2.3471, "step": 345350 }, { "epoch": 1.3350651760449042, "grad_norm": 0.12331422418355942, "learning_rate": 0.002, "loss": 2.3167, "step": 345360 }, { "epoch": 1.3351038332482874, "grad_norm": 0.10996781289577484, "learning_rate": 0.002, "loss": 2.3429, "step": 345370 }, { "epoch": 1.3351424904516707, "grad_norm": 0.09760157763957977, "learning_rate": 0.002, "loss": 2.3337, "step": 345380 }, { "epoch": 1.335181147655054, "grad_norm": 0.0954732596874237, "learning_rate": 0.002, "loss": 2.3139, "step": 345390 }, { "epoch": 1.3352198048584374, "grad_norm": 0.10300255566835403, "learning_rate": 0.002, "loss": 2.3446, "step": 345400 }, { "epoch": 1.3352584620618206, "grad_norm": 0.0967373326420784, "learning_rate": 0.002, "loss": 2.3361, "step": 345410 }, { "epoch": 1.335297119265204, "grad_norm": 0.110762819647789, "learning_rate": 0.002, "loss": 2.3383, "step": 345420 }, { "epoch": 1.3353357764685871, "grad_norm": 0.10032440721988678, "learning_rate": 0.002, "loss": 2.324, "step": 345430 }, { "epoch": 1.3353744336719704, "grad_norm": 0.1212349459528923, "learning_rate": 0.002, "loss": 2.3427, "step": 345440 }, { "epoch": 1.3354130908753536, "grad_norm": 0.1062375158071518, "learning_rate": 0.002, "loss": 2.3467, "step": 345450 }, { "epoch": 1.3354517480787371, "grad_norm": 0.1182907298207283, "learning_rate": 0.002, "loss": 2.3338, "step": 345460 }, { "epoch": 1.3354904052821204, "grad_norm": 0.10918321460485458, "learning_rate": 0.002, "loss": 2.3268, "step": 345470 }, { "epoch": 1.3355290624855036, "grad_norm": 0.1067313551902771, "learning_rate": 0.002, "loss": 2.3382, "step": 345480 }, { "epoch": 1.3355677196888869, "grad_norm": 0.09972357749938965, "learning_rate": 0.002, "loss": 2.3346, "step": 345490 }, { "epoch": 1.3356063768922701, "grad_norm": 0.12399875372648239, "learning_rate": 0.002, "loss": 2.3384, "step": 345500 }, { "epoch": 1.3356450340956534, "grad_norm": 0.10412443429231644, "learning_rate": 0.002, "loss": 2.3342, "step": 345510 }, { "epoch": 1.3356836912990366, "grad_norm": 0.11216738820075989, "learning_rate": 0.002, "loss": 2.3383, "step": 345520 }, { "epoch": 1.3357223485024199, "grad_norm": 0.10026592761278152, "learning_rate": 0.002, "loss": 2.3446, "step": 345530 }, { "epoch": 1.3357610057058031, "grad_norm": 0.11337658017873764, "learning_rate": 0.002, "loss": 2.3332, "step": 345540 }, { "epoch": 1.3357996629091864, "grad_norm": 0.1043723002076149, "learning_rate": 0.002, "loss": 2.3151, "step": 345550 }, { "epoch": 1.3358383201125696, "grad_norm": 0.09790969640016556, "learning_rate": 0.002, "loss": 2.3194, "step": 345560 }, { "epoch": 1.3358769773159531, "grad_norm": 0.1073826476931572, "learning_rate": 0.002, "loss": 2.318, "step": 345570 }, { "epoch": 1.3359156345193364, "grad_norm": 0.1073329970240593, "learning_rate": 0.002, "loss": 2.3371, "step": 345580 }, { "epoch": 1.3359542917227196, "grad_norm": 0.09542405605316162, "learning_rate": 0.002, "loss": 2.3308, "step": 345590 }, { "epoch": 1.3359929489261029, "grad_norm": 0.10292265564203262, "learning_rate": 0.002, "loss": 2.3435, "step": 345600 }, { "epoch": 1.3360316061294861, "grad_norm": 0.1295763999223709, "learning_rate": 0.002, "loss": 2.3409, "step": 345610 }, { "epoch": 1.3360702633328694, "grad_norm": 0.1814815104007721, "learning_rate": 0.002, "loss": 2.3331, "step": 345620 }, { "epoch": 1.3361089205362529, "grad_norm": 0.10305207967758179, "learning_rate": 0.002, "loss": 2.3282, "step": 345630 }, { "epoch": 1.336147577739636, "grad_norm": 0.11229586601257324, "learning_rate": 0.002, "loss": 2.3351, "step": 345640 }, { "epoch": 1.3361862349430194, "grad_norm": 0.10201216489076614, "learning_rate": 0.002, "loss": 2.345, "step": 345650 }, { "epoch": 1.3362248921464026, "grad_norm": 0.10922586172819138, "learning_rate": 0.002, "loss": 2.3245, "step": 345660 }, { "epoch": 1.3362635493497859, "grad_norm": 0.09847729653120041, "learning_rate": 0.002, "loss": 2.3293, "step": 345670 }, { "epoch": 1.3363022065531691, "grad_norm": 0.10319638252258301, "learning_rate": 0.002, "loss": 2.332, "step": 345680 }, { "epoch": 1.3363408637565524, "grad_norm": 0.12661710381507874, "learning_rate": 0.002, "loss": 2.3433, "step": 345690 }, { "epoch": 1.3363795209599356, "grad_norm": 0.12502607703208923, "learning_rate": 0.002, "loss": 2.3214, "step": 345700 }, { "epoch": 1.3364181781633189, "grad_norm": 0.10091539472341537, "learning_rate": 0.002, "loss": 2.3565, "step": 345710 }, { "epoch": 1.3364568353667021, "grad_norm": 0.10177405923604965, "learning_rate": 0.002, "loss": 2.3532, "step": 345720 }, { "epoch": 1.3364954925700854, "grad_norm": 0.16185766458511353, "learning_rate": 0.002, "loss": 2.3304, "step": 345730 }, { "epoch": 1.3365341497734688, "grad_norm": 0.09906337410211563, "learning_rate": 0.002, "loss": 2.3328, "step": 345740 }, { "epoch": 1.336572806976852, "grad_norm": 0.12838874757289886, "learning_rate": 0.002, "loss": 2.3435, "step": 345750 }, { "epoch": 1.3366114641802354, "grad_norm": 0.10789935290813446, "learning_rate": 0.002, "loss": 2.3387, "step": 345760 }, { "epoch": 1.3366501213836186, "grad_norm": 0.09930015355348587, "learning_rate": 0.002, "loss": 2.3479, "step": 345770 }, { "epoch": 1.3366887785870019, "grad_norm": 0.10896844416856766, "learning_rate": 0.002, "loss": 2.3317, "step": 345780 }, { "epoch": 1.336727435790385, "grad_norm": 0.10427610576152802, "learning_rate": 0.002, "loss": 2.3327, "step": 345790 }, { "epoch": 1.3367660929937686, "grad_norm": 0.14572319388389587, "learning_rate": 0.002, "loss": 2.3313, "step": 345800 }, { "epoch": 1.3368047501971518, "grad_norm": 0.10233652591705322, "learning_rate": 0.002, "loss": 2.3302, "step": 345810 }, { "epoch": 1.336843407400535, "grad_norm": 0.107231505215168, "learning_rate": 0.002, "loss": 2.3441, "step": 345820 }, { "epoch": 1.3368820646039183, "grad_norm": 0.0979439839720726, "learning_rate": 0.002, "loss": 2.3362, "step": 345830 }, { "epoch": 1.3369207218073016, "grad_norm": 0.09307397156953812, "learning_rate": 0.002, "loss": 2.3256, "step": 345840 }, { "epoch": 1.3369593790106848, "grad_norm": 0.10109108686447144, "learning_rate": 0.002, "loss": 2.339, "step": 345850 }, { "epoch": 1.336998036214068, "grad_norm": 0.10365600138902664, "learning_rate": 0.002, "loss": 2.3252, "step": 345860 }, { "epoch": 1.3370366934174513, "grad_norm": 0.10309231281280518, "learning_rate": 0.002, "loss": 2.3385, "step": 345870 }, { "epoch": 1.3370753506208346, "grad_norm": 0.13914698362350464, "learning_rate": 0.002, "loss": 2.315, "step": 345880 }, { "epoch": 1.3371140078242179, "grad_norm": 0.1049744263291359, "learning_rate": 0.002, "loss": 2.3263, "step": 345890 }, { "epoch": 1.337152665027601, "grad_norm": 0.10597793012857437, "learning_rate": 0.002, "loss": 2.3318, "step": 345900 }, { "epoch": 1.3371913222309846, "grad_norm": 0.10046493262052536, "learning_rate": 0.002, "loss": 2.3269, "step": 345910 }, { "epoch": 1.3372299794343678, "grad_norm": 0.09887133538722992, "learning_rate": 0.002, "loss": 2.3349, "step": 345920 }, { "epoch": 1.337268636637751, "grad_norm": 0.19354459643363953, "learning_rate": 0.002, "loss": 2.3336, "step": 345930 }, { "epoch": 1.3373072938411343, "grad_norm": 0.10834994912147522, "learning_rate": 0.002, "loss": 2.3351, "step": 345940 }, { "epoch": 1.3373459510445176, "grad_norm": 0.1194472461938858, "learning_rate": 0.002, "loss": 2.3521, "step": 345950 }, { "epoch": 1.3373846082479008, "grad_norm": 0.09798796474933624, "learning_rate": 0.002, "loss": 2.3359, "step": 345960 }, { "epoch": 1.3374232654512843, "grad_norm": 0.09186162054538727, "learning_rate": 0.002, "loss": 2.3375, "step": 345970 }, { "epoch": 1.3374619226546676, "grad_norm": 0.10261370986700058, "learning_rate": 0.002, "loss": 2.3441, "step": 345980 }, { "epoch": 1.3375005798580508, "grad_norm": 0.1267152577638626, "learning_rate": 0.002, "loss": 2.3224, "step": 345990 }, { "epoch": 1.337539237061434, "grad_norm": 0.10745598375797272, "learning_rate": 0.002, "loss": 2.3301, "step": 346000 }, { "epoch": 1.3375778942648173, "grad_norm": 0.10257593542337418, "learning_rate": 0.002, "loss": 2.3197, "step": 346010 }, { "epoch": 1.3376165514682006, "grad_norm": 0.11340894550085068, "learning_rate": 0.002, "loss": 2.3295, "step": 346020 }, { "epoch": 1.3376552086715838, "grad_norm": 0.1047341376543045, "learning_rate": 0.002, "loss": 2.333, "step": 346030 }, { "epoch": 1.337693865874967, "grad_norm": 0.10924689471721649, "learning_rate": 0.002, "loss": 2.3156, "step": 346040 }, { "epoch": 1.3377325230783503, "grad_norm": 0.10795491933822632, "learning_rate": 0.002, "loss": 2.3413, "step": 346050 }, { "epoch": 1.3377711802817336, "grad_norm": 0.10510119795799255, "learning_rate": 0.002, "loss": 2.3336, "step": 346060 }, { "epoch": 1.337809837485117, "grad_norm": 0.10001824796199799, "learning_rate": 0.002, "loss": 2.3573, "step": 346070 }, { "epoch": 1.3378484946885003, "grad_norm": 0.12009944766759872, "learning_rate": 0.002, "loss": 2.3369, "step": 346080 }, { "epoch": 1.3378871518918836, "grad_norm": 0.10906676948070526, "learning_rate": 0.002, "loss": 2.341, "step": 346090 }, { "epoch": 1.3379258090952668, "grad_norm": 0.09728783369064331, "learning_rate": 0.002, "loss": 2.3278, "step": 346100 }, { "epoch": 1.33796446629865, "grad_norm": 0.10508811473846436, "learning_rate": 0.002, "loss": 2.3379, "step": 346110 }, { "epoch": 1.3380031235020333, "grad_norm": 0.09222324192523956, "learning_rate": 0.002, "loss": 2.3399, "step": 346120 }, { "epoch": 1.3380417807054166, "grad_norm": 0.10402661561965942, "learning_rate": 0.002, "loss": 2.3282, "step": 346130 }, { "epoch": 1.3380804379088, "grad_norm": 0.12433262169361115, "learning_rate": 0.002, "loss": 2.3397, "step": 346140 }, { "epoch": 1.3381190951121833, "grad_norm": 0.10922899842262268, "learning_rate": 0.002, "loss": 2.3146, "step": 346150 }, { "epoch": 1.3381577523155666, "grad_norm": 0.10978659242391586, "learning_rate": 0.002, "loss": 2.3269, "step": 346160 }, { "epoch": 1.3381964095189498, "grad_norm": 0.10603276640176773, "learning_rate": 0.002, "loss": 2.3332, "step": 346170 }, { "epoch": 1.338235066722333, "grad_norm": 0.0904986634850502, "learning_rate": 0.002, "loss": 2.318, "step": 346180 }, { "epoch": 1.3382737239257163, "grad_norm": 0.10122311115264893, "learning_rate": 0.002, "loss": 2.3446, "step": 346190 }, { "epoch": 1.3383123811290996, "grad_norm": 0.09152434021234512, "learning_rate": 0.002, "loss": 2.3452, "step": 346200 }, { "epoch": 1.3383510383324828, "grad_norm": 0.1075081005692482, "learning_rate": 0.002, "loss": 2.3324, "step": 346210 }, { "epoch": 1.338389695535866, "grad_norm": 0.11105257272720337, "learning_rate": 0.002, "loss": 2.3267, "step": 346220 }, { "epoch": 1.3384283527392493, "grad_norm": 0.0925607830286026, "learning_rate": 0.002, "loss": 2.3481, "step": 346230 }, { "epoch": 1.3384670099426328, "grad_norm": 0.11009430140256882, "learning_rate": 0.002, "loss": 2.3369, "step": 346240 }, { "epoch": 1.338505667146016, "grad_norm": 0.1331329345703125, "learning_rate": 0.002, "loss": 2.3444, "step": 346250 }, { "epoch": 1.3385443243493993, "grad_norm": 0.11093542724847794, "learning_rate": 0.002, "loss": 2.326, "step": 346260 }, { "epoch": 1.3385829815527825, "grad_norm": 0.08782430738210678, "learning_rate": 0.002, "loss": 2.3467, "step": 346270 }, { "epoch": 1.3386216387561658, "grad_norm": 0.10207182168960571, "learning_rate": 0.002, "loss": 2.3398, "step": 346280 }, { "epoch": 1.338660295959549, "grad_norm": 0.11096299439668655, "learning_rate": 0.002, "loss": 2.3401, "step": 346290 }, { "epoch": 1.3386989531629323, "grad_norm": 0.1043325737118721, "learning_rate": 0.002, "loss": 2.3372, "step": 346300 }, { "epoch": 1.3387376103663158, "grad_norm": 0.10775876045227051, "learning_rate": 0.002, "loss": 2.3231, "step": 346310 }, { "epoch": 1.338776267569699, "grad_norm": 0.11067583411931992, "learning_rate": 0.002, "loss": 2.3279, "step": 346320 }, { "epoch": 1.3388149247730823, "grad_norm": 0.11076585203409195, "learning_rate": 0.002, "loss": 2.3273, "step": 346330 }, { "epoch": 1.3388535819764655, "grad_norm": 0.11195888370275497, "learning_rate": 0.002, "loss": 2.3262, "step": 346340 }, { "epoch": 1.3388922391798488, "grad_norm": 0.0904572531580925, "learning_rate": 0.002, "loss": 2.3309, "step": 346350 }, { "epoch": 1.338930896383232, "grad_norm": 0.11926350742578506, "learning_rate": 0.002, "loss": 2.3218, "step": 346360 }, { "epoch": 1.3389695535866153, "grad_norm": 0.10238216817378998, "learning_rate": 0.002, "loss": 2.3287, "step": 346370 }, { "epoch": 1.3390082107899985, "grad_norm": 0.09457932412624359, "learning_rate": 0.002, "loss": 2.3394, "step": 346380 }, { "epoch": 1.3390468679933818, "grad_norm": 0.10814918577671051, "learning_rate": 0.002, "loss": 2.3272, "step": 346390 }, { "epoch": 1.339085525196765, "grad_norm": 0.09990043193101883, "learning_rate": 0.002, "loss": 2.3535, "step": 346400 }, { "epoch": 1.3391241824001485, "grad_norm": 0.12533685564994812, "learning_rate": 0.002, "loss": 2.3305, "step": 346410 }, { "epoch": 1.3391628396035318, "grad_norm": 0.10196174681186676, "learning_rate": 0.002, "loss": 2.3358, "step": 346420 }, { "epoch": 1.339201496806915, "grad_norm": 0.10151616483926773, "learning_rate": 0.002, "loss": 2.3401, "step": 346430 }, { "epoch": 1.3392401540102983, "grad_norm": 0.11178027838468552, "learning_rate": 0.002, "loss": 2.3245, "step": 346440 }, { "epoch": 1.3392788112136815, "grad_norm": 0.0982932522892952, "learning_rate": 0.002, "loss": 2.3296, "step": 346450 }, { "epoch": 1.3393174684170648, "grad_norm": 0.11447232961654663, "learning_rate": 0.002, "loss": 2.3281, "step": 346460 }, { "epoch": 1.339356125620448, "grad_norm": 0.1159837618470192, "learning_rate": 0.002, "loss": 2.3294, "step": 346470 }, { "epoch": 1.3393947828238315, "grad_norm": 0.1221732422709465, "learning_rate": 0.002, "loss": 2.3313, "step": 346480 }, { "epoch": 1.3394334400272148, "grad_norm": 0.11282023787498474, "learning_rate": 0.002, "loss": 2.3339, "step": 346490 }, { "epoch": 1.339472097230598, "grad_norm": 0.10463476926088333, "learning_rate": 0.002, "loss": 2.3342, "step": 346500 }, { "epoch": 1.3395107544339813, "grad_norm": 0.11097527295351028, "learning_rate": 0.002, "loss": 2.3503, "step": 346510 }, { "epoch": 1.3395494116373645, "grad_norm": 0.1008048802614212, "learning_rate": 0.002, "loss": 2.3315, "step": 346520 }, { "epoch": 1.3395880688407478, "grad_norm": 0.12519340217113495, "learning_rate": 0.002, "loss": 2.3437, "step": 346530 }, { "epoch": 1.339626726044131, "grad_norm": 0.13712844252586365, "learning_rate": 0.002, "loss": 2.3355, "step": 346540 }, { "epoch": 1.3396653832475143, "grad_norm": 0.11297990381717682, "learning_rate": 0.002, "loss": 2.3401, "step": 346550 }, { "epoch": 1.3397040404508975, "grad_norm": 0.09795337170362473, "learning_rate": 0.002, "loss": 2.3275, "step": 346560 }, { "epoch": 1.3397426976542808, "grad_norm": 0.09507597237825394, "learning_rate": 0.002, "loss": 2.344, "step": 346570 }, { "epoch": 1.3397813548576643, "grad_norm": 0.1199960857629776, "learning_rate": 0.002, "loss": 2.3379, "step": 346580 }, { "epoch": 1.3398200120610475, "grad_norm": 0.11006709933280945, "learning_rate": 0.002, "loss": 2.3367, "step": 346590 }, { "epoch": 1.3398586692644308, "grad_norm": 0.09973582625389099, "learning_rate": 0.002, "loss": 2.3199, "step": 346600 }, { "epoch": 1.339897326467814, "grad_norm": 0.09433923661708832, "learning_rate": 0.002, "loss": 2.332, "step": 346610 }, { "epoch": 1.3399359836711973, "grad_norm": 0.10537783801555634, "learning_rate": 0.002, "loss": 2.3242, "step": 346620 }, { "epoch": 1.3399746408745805, "grad_norm": 0.10902126133441925, "learning_rate": 0.002, "loss": 2.3403, "step": 346630 }, { "epoch": 1.3400132980779638, "grad_norm": 0.1410796344280243, "learning_rate": 0.002, "loss": 2.3378, "step": 346640 }, { "epoch": 1.3400519552813472, "grad_norm": 0.09471646696329117, "learning_rate": 0.002, "loss": 2.3482, "step": 346650 }, { "epoch": 1.3400906124847305, "grad_norm": 0.09297826886177063, "learning_rate": 0.002, "loss": 2.3399, "step": 346660 }, { "epoch": 1.3401292696881137, "grad_norm": 0.09805559366941452, "learning_rate": 0.002, "loss": 2.3256, "step": 346670 }, { "epoch": 1.340167926891497, "grad_norm": 0.1065869852900505, "learning_rate": 0.002, "loss": 2.3305, "step": 346680 }, { "epoch": 1.3402065840948802, "grad_norm": 0.1044825091958046, "learning_rate": 0.002, "loss": 2.3315, "step": 346690 }, { "epoch": 1.3402452412982635, "grad_norm": 0.12405242770910263, "learning_rate": 0.002, "loss": 2.3411, "step": 346700 }, { "epoch": 1.3402838985016468, "grad_norm": 0.096046082675457, "learning_rate": 0.002, "loss": 2.3288, "step": 346710 }, { "epoch": 1.34032255570503, "grad_norm": 0.11131946742534637, "learning_rate": 0.002, "loss": 2.3306, "step": 346720 }, { "epoch": 1.3403612129084133, "grad_norm": 0.10490655899047852, "learning_rate": 0.002, "loss": 2.3393, "step": 346730 }, { "epoch": 1.3403998701117965, "grad_norm": 0.09529917687177658, "learning_rate": 0.002, "loss": 2.3307, "step": 346740 }, { "epoch": 1.34043852731518, "grad_norm": 0.09577424079179764, "learning_rate": 0.002, "loss": 2.3256, "step": 346750 }, { "epoch": 1.3404771845185632, "grad_norm": 0.10369036346673965, "learning_rate": 0.002, "loss": 2.3484, "step": 346760 }, { "epoch": 1.3405158417219465, "grad_norm": 0.09698975831270218, "learning_rate": 0.002, "loss": 2.3425, "step": 346770 }, { "epoch": 1.3405544989253297, "grad_norm": 0.11344336718320847, "learning_rate": 0.002, "loss": 2.3456, "step": 346780 }, { "epoch": 1.340593156128713, "grad_norm": 0.09168387204408646, "learning_rate": 0.002, "loss": 2.3184, "step": 346790 }, { "epoch": 1.3406318133320962, "grad_norm": 0.09688182920217514, "learning_rate": 0.002, "loss": 2.3211, "step": 346800 }, { "epoch": 1.3406704705354797, "grad_norm": 0.11499480158090591, "learning_rate": 0.002, "loss": 2.3229, "step": 346810 }, { "epoch": 1.340709127738863, "grad_norm": 0.10256528854370117, "learning_rate": 0.002, "loss": 2.3342, "step": 346820 }, { "epoch": 1.3407477849422462, "grad_norm": 0.11224600672721863, "learning_rate": 0.002, "loss": 2.3186, "step": 346830 }, { "epoch": 1.3407864421456295, "grad_norm": 0.11144499480724335, "learning_rate": 0.002, "loss": 2.3335, "step": 346840 }, { "epoch": 1.3408250993490127, "grad_norm": 0.09368912875652313, "learning_rate": 0.002, "loss": 2.34, "step": 346850 }, { "epoch": 1.340863756552396, "grad_norm": 0.10852344334125519, "learning_rate": 0.002, "loss": 2.3391, "step": 346860 }, { "epoch": 1.3409024137557792, "grad_norm": 0.09394946694374084, "learning_rate": 0.002, "loss": 2.3306, "step": 346870 }, { "epoch": 1.3409410709591625, "grad_norm": 0.10553434491157532, "learning_rate": 0.002, "loss": 2.318, "step": 346880 }, { "epoch": 1.3409797281625457, "grad_norm": 0.15650472044944763, "learning_rate": 0.002, "loss": 2.3417, "step": 346890 }, { "epoch": 1.341018385365929, "grad_norm": 0.10130909085273743, "learning_rate": 0.002, "loss": 2.3398, "step": 346900 }, { "epoch": 1.3410570425693122, "grad_norm": 0.109117291867733, "learning_rate": 0.002, "loss": 2.3297, "step": 346910 }, { "epoch": 1.3410956997726957, "grad_norm": 0.11375482380390167, "learning_rate": 0.002, "loss": 2.3257, "step": 346920 }, { "epoch": 1.341134356976079, "grad_norm": 0.13961325585842133, "learning_rate": 0.002, "loss": 2.3362, "step": 346930 }, { "epoch": 1.3411730141794622, "grad_norm": 0.10730651766061783, "learning_rate": 0.002, "loss": 2.3507, "step": 346940 }, { "epoch": 1.3412116713828455, "grad_norm": 0.10354288667440414, "learning_rate": 0.002, "loss": 2.3396, "step": 346950 }, { "epoch": 1.3412503285862287, "grad_norm": 0.10738546401262283, "learning_rate": 0.002, "loss": 2.3331, "step": 346960 }, { "epoch": 1.341288985789612, "grad_norm": 0.10498213022947311, "learning_rate": 0.002, "loss": 2.3317, "step": 346970 }, { "epoch": 1.3413276429929955, "grad_norm": 0.0919991061091423, "learning_rate": 0.002, "loss": 2.3246, "step": 346980 }, { "epoch": 1.3413663001963787, "grad_norm": 0.1167188286781311, "learning_rate": 0.002, "loss": 2.3327, "step": 346990 }, { "epoch": 1.341404957399762, "grad_norm": 0.11551283299922943, "learning_rate": 0.002, "loss": 2.327, "step": 347000 }, { "epoch": 1.3414436146031452, "grad_norm": 0.11017212271690369, "learning_rate": 0.002, "loss": 2.3337, "step": 347010 }, { "epoch": 1.3414822718065285, "grad_norm": 0.1336277276277542, "learning_rate": 0.002, "loss": 2.322, "step": 347020 }, { "epoch": 1.3415209290099117, "grad_norm": 0.09945376962423325, "learning_rate": 0.002, "loss": 2.3362, "step": 347030 }, { "epoch": 1.341559586213295, "grad_norm": 0.0978284403681755, "learning_rate": 0.002, "loss": 2.3235, "step": 347040 }, { "epoch": 1.3415982434166782, "grad_norm": 0.11005248874425888, "learning_rate": 0.002, "loss": 2.3533, "step": 347050 }, { "epoch": 1.3416369006200615, "grad_norm": 0.11907824128866196, "learning_rate": 0.002, "loss": 2.3492, "step": 347060 }, { "epoch": 1.3416755578234447, "grad_norm": 0.10550861805677414, "learning_rate": 0.002, "loss": 2.3186, "step": 347070 }, { "epoch": 1.341714215026828, "grad_norm": 0.0961516946554184, "learning_rate": 0.002, "loss": 2.3302, "step": 347080 }, { "epoch": 1.3417528722302114, "grad_norm": 0.12996280193328857, "learning_rate": 0.002, "loss": 2.3353, "step": 347090 }, { "epoch": 1.3417915294335947, "grad_norm": 0.1039133295416832, "learning_rate": 0.002, "loss": 2.3371, "step": 347100 }, { "epoch": 1.341830186636978, "grad_norm": 0.09067199379205704, "learning_rate": 0.002, "loss": 2.3309, "step": 347110 }, { "epoch": 1.3418688438403612, "grad_norm": 0.12177203595638275, "learning_rate": 0.002, "loss": 2.3286, "step": 347120 }, { "epoch": 1.3419075010437445, "grad_norm": 0.12148578464984894, "learning_rate": 0.002, "loss": 2.3284, "step": 347130 }, { "epoch": 1.3419461582471277, "grad_norm": 0.10434716939926147, "learning_rate": 0.002, "loss": 2.3338, "step": 347140 }, { "epoch": 1.3419848154505112, "grad_norm": 0.10731692612171173, "learning_rate": 0.002, "loss": 2.3287, "step": 347150 }, { "epoch": 1.3420234726538944, "grad_norm": 0.0981302484869957, "learning_rate": 0.002, "loss": 2.33, "step": 347160 }, { "epoch": 1.3420621298572777, "grad_norm": 0.09808428585529327, "learning_rate": 0.002, "loss": 2.3337, "step": 347170 }, { "epoch": 1.342100787060661, "grad_norm": 0.1706089824438095, "learning_rate": 0.002, "loss": 2.3383, "step": 347180 }, { "epoch": 1.3421394442640442, "grad_norm": 0.12091352045536041, "learning_rate": 0.002, "loss": 2.3403, "step": 347190 }, { "epoch": 1.3421781014674274, "grad_norm": 0.10970164835453033, "learning_rate": 0.002, "loss": 2.333, "step": 347200 }, { "epoch": 1.3422167586708107, "grad_norm": 0.08676803857088089, "learning_rate": 0.002, "loss": 2.3366, "step": 347210 }, { "epoch": 1.342255415874194, "grad_norm": 0.14231640100479126, "learning_rate": 0.002, "loss": 2.3286, "step": 347220 }, { "epoch": 1.3422940730775772, "grad_norm": 0.0979641005396843, "learning_rate": 0.002, "loss": 2.3426, "step": 347230 }, { "epoch": 1.3423327302809605, "grad_norm": 0.10979917645454407, "learning_rate": 0.002, "loss": 2.3302, "step": 347240 }, { "epoch": 1.3423713874843437, "grad_norm": 0.10588161647319794, "learning_rate": 0.002, "loss": 2.3474, "step": 347250 }, { "epoch": 1.3424100446877272, "grad_norm": 0.10137680917978287, "learning_rate": 0.002, "loss": 2.3458, "step": 347260 }, { "epoch": 1.3424487018911104, "grad_norm": 0.10275550186634064, "learning_rate": 0.002, "loss": 2.3324, "step": 347270 }, { "epoch": 1.3424873590944937, "grad_norm": 0.10416662693023682, "learning_rate": 0.002, "loss": 2.3341, "step": 347280 }, { "epoch": 1.342526016297877, "grad_norm": 0.1014779582619667, "learning_rate": 0.002, "loss": 2.3345, "step": 347290 }, { "epoch": 1.3425646735012602, "grad_norm": 0.10870113223791122, "learning_rate": 0.002, "loss": 2.323, "step": 347300 }, { "epoch": 1.3426033307046434, "grad_norm": 0.11813866347074509, "learning_rate": 0.002, "loss": 2.3401, "step": 347310 }, { "epoch": 1.342641987908027, "grad_norm": 0.10769974440336227, "learning_rate": 0.002, "loss": 2.3246, "step": 347320 }, { "epoch": 1.3426806451114102, "grad_norm": 0.10103698074817657, "learning_rate": 0.002, "loss": 2.3488, "step": 347330 }, { "epoch": 1.3427193023147934, "grad_norm": 0.12572821974754333, "learning_rate": 0.002, "loss": 2.3415, "step": 347340 }, { "epoch": 1.3427579595181767, "grad_norm": 0.11496661603450775, "learning_rate": 0.002, "loss": 2.3579, "step": 347350 }, { "epoch": 1.34279661672156, "grad_norm": 0.09949234873056412, "learning_rate": 0.002, "loss": 2.349, "step": 347360 }, { "epoch": 1.3428352739249432, "grad_norm": 0.099911630153656, "learning_rate": 0.002, "loss": 2.329, "step": 347370 }, { "epoch": 1.3428739311283264, "grad_norm": 0.10400547832250595, "learning_rate": 0.002, "loss": 2.3393, "step": 347380 }, { "epoch": 1.3429125883317097, "grad_norm": 0.10007713735103607, "learning_rate": 0.002, "loss": 2.3302, "step": 347390 }, { "epoch": 1.342951245535093, "grad_norm": 0.10954345762729645, "learning_rate": 0.002, "loss": 2.3356, "step": 347400 }, { "epoch": 1.3429899027384762, "grad_norm": 0.09668856859207153, "learning_rate": 0.002, "loss": 2.3427, "step": 347410 }, { "epoch": 1.3430285599418594, "grad_norm": 0.12628354132175446, "learning_rate": 0.002, "loss": 2.3317, "step": 347420 }, { "epoch": 1.343067217145243, "grad_norm": 0.1271989941596985, "learning_rate": 0.002, "loss": 2.331, "step": 347430 }, { "epoch": 1.3431058743486262, "grad_norm": 0.1027301475405693, "learning_rate": 0.002, "loss": 2.334, "step": 347440 }, { "epoch": 1.3431445315520094, "grad_norm": 0.10065386444330215, "learning_rate": 0.002, "loss": 2.3425, "step": 347450 }, { "epoch": 1.3431831887553927, "grad_norm": 0.10471511632204056, "learning_rate": 0.002, "loss": 2.332, "step": 347460 }, { "epoch": 1.343221845958776, "grad_norm": 0.10855276137590408, "learning_rate": 0.002, "loss": 2.348, "step": 347470 }, { "epoch": 1.3432605031621592, "grad_norm": 0.11760521680116653, "learning_rate": 0.002, "loss": 2.3412, "step": 347480 }, { "epoch": 1.3432991603655426, "grad_norm": 0.10220196098089218, "learning_rate": 0.002, "loss": 2.3196, "step": 347490 }, { "epoch": 1.343337817568926, "grad_norm": 0.10600744187831879, "learning_rate": 0.002, "loss": 2.3302, "step": 347500 }, { "epoch": 1.3433764747723091, "grad_norm": 0.09794536978006363, "learning_rate": 0.002, "loss": 2.3387, "step": 347510 }, { "epoch": 1.3434151319756924, "grad_norm": 0.09697246551513672, "learning_rate": 0.002, "loss": 2.3148, "step": 347520 }, { "epoch": 1.3434537891790757, "grad_norm": 0.10189753025770187, "learning_rate": 0.002, "loss": 2.3297, "step": 347530 }, { "epoch": 1.343492446382459, "grad_norm": 0.11046060919761658, "learning_rate": 0.002, "loss": 2.3225, "step": 347540 }, { "epoch": 1.3435311035858422, "grad_norm": 0.1047743633389473, "learning_rate": 0.002, "loss": 2.3347, "step": 347550 }, { "epoch": 1.3435697607892254, "grad_norm": 0.10901220142841339, "learning_rate": 0.002, "loss": 2.3406, "step": 347560 }, { "epoch": 1.3436084179926087, "grad_norm": 0.10370327532291412, "learning_rate": 0.002, "loss": 2.3436, "step": 347570 }, { "epoch": 1.343647075195992, "grad_norm": 0.12953785061836243, "learning_rate": 0.002, "loss": 2.3431, "step": 347580 }, { "epoch": 1.3436857323993752, "grad_norm": 0.10009057819843292, "learning_rate": 0.002, "loss": 2.3284, "step": 347590 }, { "epoch": 1.3437243896027586, "grad_norm": 0.10040712356567383, "learning_rate": 0.002, "loss": 2.3274, "step": 347600 }, { "epoch": 1.343763046806142, "grad_norm": 0.10058463364839554, "learning_rate": 0.002, "loss": 2.34, "step": 347610 }, { "epoch": 1.3438017040095251, "grad_norm": 0.10221666097640991, "learning_rate": 0.002, "loss": 2.3445, "step": 347620 }, { "epoch": 1.3438403612129084, "grad_norm": 0.09885133802890778, "learning_rate": 0.002, "loss": 2.3315, "step": 347630 }, { "epoch": 1.3438790184162916, "grad_norm": 0.08774641901254654, "learning_rate": 0.002, "loss": 2.346, "step": 347640 }, { "epoch": 1.343917675619675, "grad_norm": 0.11479109525680542, "learning_rate": 0.002, "loss": 2.3468, "step": 347650 }, { "epoch": 1.3439563328230584, "grad_norm": 0.09676390886306763, "learning_rate": 0.002, "loss": 2.3261, "step": 347660 }, { "epoch": 1.3439949900264416, "grad_norm": 0.08994947373867035, "learning_rate": 0.002, "loss": 2.3392, "step": 347670 }, { "epoch": 1.3440336472298249, "grad_norm": 0.09621430188417435, "learning_rate": 0.002, "loss": 2.3335, "step": 347680 }, { "epoch": 1.3440723044332081, "grad_norm": 0.10796340554952621, "learning_rate": 0.002, "loss": 2.3433, "step": 347690 }, { "epoch": 1.3441109616365914, "grad_norm": 0.15210728347301483, "learning_rate": 0.002, "loss": 2.3377, "step": 347700 }, { "epoch": 1.3441496188399746, "grad_norm": 0.12892857193946838, "learning_rate": 0.002, "loss": 2.3255, "step": 347710 }, { "epoch": 1.3441882760433579, "grad_norm": 0.10418952256441116, "learning_rate": 0.002, "loss": 2.3345, "step": 347720 }, { "epoch": 1.3442269332467411, "grad_norm": 0.11700917780399323, "learning_rate": 0.002, "loss": 2.3434, "step": 347730 }, { "epoch": 1.3442655904501244, "grad_norm": 0.09763418138027191, "learning_rate": 0.002, "loss": 2.329, "step": 347740 }, { "epoch": 1.3443042476535076, "grad_norm": 0.10042642056941986, "learning_rate": 0.002, "loss": 2.3205, "step": 347750 }, { "epoch": 1.344342904856891, "grad_norm": 0.10885453969240189, "learning_rate": 0.002, "loss": 2.3273, "step": 347760 }, { "epoch": 1.3443815620602744, "grad_norm": 0.09971655905246735, "learning_rate": 0.002, "loss": 2.3428, "step": 347770 }, { "epoch": 1.3444202192636576, "grad_norm": 0.08985363692045212, "learning_rate": 0.002, "loss": 2.3221, "step": 347780 }, { "epoch": 1.3444588764670409, "grad_norm": 0.12392828613519669, "learning_rate": 0.002, "loss": 2.3381, "step": 347790 }, { "epoch": 1.3444975336704241, "grad_norm": 0.11634746938943863, "learning_rate": 0.002, "loss": 2.3346, "step": 347800 }, { "epoch": 1.3445361908738074, "grad_norm": 0.12065248936414719, "learning_rate": 0.002, "loss": 2.3492, "step": 347810 }, { "epoch": 1.3445748480771906, "grad_norm": 0.10127338021993637, "learning_rate": 0.002, "loss": 2.3305, "step": 347820 }, { "epoch": 1.344613505280574, "grad_norm": 0.10347548872232437, "learning_rate": 0.002, "loss": 2.3371, "step": 347830 }, { "epoch": 1.3446521624839574, "grad_norm": 0.11170519143342972, "learning_rate": 0.002, "loss": 2.3209, "step": 347840 }, { "epoch": 1.3446908196873406, "grad_norm": 0.09464278817176819, "learning_rate": 0.002, "loss": 2.3517, "step": 347850 }, { "epoch": 1.3447294768907239, "grad_norm": 0.10744038224220276, "learning_rate": 0.002, "loss": 2.3292, "step": 347860 }, { "epoch": 1.3447681340941071, "grad_norm": 0.1069311797618866, "learning_rate": 0.002, "loss": 2.3246, "step": 347870 }, { "epoch": 1.3448067912974904, "grad_norm": 0.11059171706438065, "learning_rate": 0.002, "loss": 2.3429, "step": 347880 }, { "epoch": 1.3448454485008736, "grad_norm": 0.09943494945764542, "learning_rate": 0.002, "loss": 2.3218, "step": 347890 }, { "epoch": 1.3448841057042569, "grad_norm": 0.10599261522293091, "learning_rate": 0.002, "loss": 2.333, "step": 347900 }, { "epoch": 1.3449227629076401, "grad_norm": 0.10283409804105759, "learning_rate": 0.002, "loss": 2.3297, "step": 347910 }, { "epoch": 1.3449614201110234, "grad_norm": 0.1135038286447525, "learning_rate": 0.002, "loss": 2.3339, "step": 347920 }, { "epoch": 1.3450000773144066, "grad_norm": 0.11863948404788971, "learning_rate": 0.002, "loss": 2.3286, "step": 347930 }, { "epoch": 1.34503873451779, "grad_norm": 0.10357358306646347, "learning_rate": 0.002, "loss": 2.3282, "step": 347940 }, { "epoch": 1.3450773917211734, "grad_norm": 0.12235188484191895, "learning_rate": 0.002, "loss": 2.3277, "step": 347950 }, { "epoch": 1.3451160489245566, "grad_norm": 0.11046276986598969, "learning_rate": 0.002, "loss": 2.3386, "step": 347960 }, { "epoch": 1.3451547061279399, "grad_norm": 0.11126643419265747, "learning_rate": 0.002, "loss": 2.3409, "step": 347970 }, { "epoch": 1.345193363331323, "grad_norm": 0.10441526770591736, "learning_rate": 0.002, "loss": 2.3106, "step": 347980 }, { "epoch": 1.3452320205347064, "grad_norm": 0.11165712773799896, "learning_rate": 0.002, "loss": 2.3433, "step": 347990 }, { "epoch": 1.3452706777380898, "grad_norm": 0.10283760726451874, "learning_rate": 0.002, "loss": 2.3395, "step": 348000 }, { "epoch": 1.345309334941473, "grad_norm": 0.12190571427345276, "learning_rate": 0.002, "loss": 2.3408, "step": 348010 }, { "epoch": 1.3453479921448563, "grad_norm": 0.10616656392812729, "learning_rate": 0.002, "loss": 2.327, "step": 348020 }, { "epoch": 1.3453866493482396, "grad_norm": 0.11338208615779877, "learning_rate": 0.002, "loss": 2.3352, "step": 348030 }, { "epoch": 1.3454253065516228, "grad_norm": 0.1024947240948677, "learning_rate": 0.002, "loss": 2.3385, "step": 348040 }, { "epoch": 1.345463963755006, "grad_norm": 0.13262341916561127, "learning_rate": 0.002, "loss": 2.3298, "step": 348050 }, { "epoch": 1.3455026209583894, "grad_norm": 0.10618232935667038, "learning_rate": 0.002, "loss": 2.3479, "step": 348060 }, { "epoch": 1.3455412781617726, "grad_norm": 0.10304050147533417, "learning_rate": 0.002, "loss": 2.3284, "step": 348070 }, { "epoch": 1.3455799353651559, "grad_norm": 0.0965445265173912, "learning_rate": 0.002, "loss": 2.3547, "step": 348080 }, { "epoch": 1.345618592568539, "grad_norm": 0.11161847412586212, "learning_rate": 0.002, "loss": 2.3398, "step": 348090 }, { "epoch": 1.3456572497719226, "grad_norm": 0.1187560185790062, "learning_rate": 0.002, "loss": 2.3222, "step": 348100 }, { "epoch": 1.3456959069753058, "grad_norm": 0.09679316729307175, "learning_rate": 0.002, "loss": 2.3324, "step": 348110 }, { "epoch": 1.345734564178689, "grad_norm": 0.12807625532150269, "learning_rate": 0.002, "loss": 2.3171, "step": 348120 }, { "epoch": 1.3457732213820723, "grad_norm": 0.10582450777292252, "learning_rate": 0.002, "loss": 2.3173, "step": 348130 }, { "epoch": 1.3458118785854556, "grad_norm": 0.10744981467723846, "learning_rate": 0.002, "loss": 2.3309, "step": 348140 }, { "epoch": 1.3458505357888388, "grad_norm": 0.10922669619321823, "learning_rate": 0.002, "loss": 2.3448, "step": 348150 }, { "epoch": 1.345889192992222, "grad_norm": 0.10119405388832092, "learning_rate": 0.002, "loss": 2.3305, "step": 348160 }, { "epoch": 1.3459278501956056, "grad_norm": 0.11563913524150848, "learning_rate": 0.002, "loss": 2.3371, "step": 348170 }, { "epoch": 1.3459665073989888, "grad_norm": 0.10904233902692795, "learning_rate": 0.002, "loss": 2.3243, "step": 348180 }, { "epoch": 1.346005164602372, "grad_norm": 0.11013088375329971, "learning_rate": 0.002, "loss": 2.3431, "step": 348190 }, { "epoch": 1.3460438218057553, "grad_norm": 0.09150010347366333, "learning_rate": 0.002, "loss": 2.3412, "step": 348200 }, { "epoch": 1.3460824790091386, "grad_norm": 0.1301971673965454, "learning_rate": 0.002, "loss": 2.3492, "step": 348210 }, { "epoch": 1.3461211362125218, "grad_norm": 0.10532008856534958, "learning_rate": 0.002, "loss": 2.3473, "step": 348220 }, { "epoch": 1.346159793415905, "grad_norm": 0.10974858701229095, "learning_rate": 0.002, "loss": 2.3217, "step": 348230 }, { "epoch": 1.3461984506192883, "grad_norm": 0.16881871223449707, "learning_rate": 0.002, "loss": 2.3462, "step": 348240 }, { "epoch": 1.3462371078226716, "grad_norm": 0.0989382266998291, "learning_rate": 0.002, "loss": 2.3198, "step": 348250 }, { "epoch": 1.3462757650260548, "grad_norm": 0.09359890222549438, "learning_rate": 0.002, "loss": 2.3303, "step": 348260 }, { "epoch": 1.3463144222294383, "grad_norm": 0.12183345854282379, "learning_rate": 0.002, "loss": 2.3455, "step": 348270 }, { "epoch": 1.3463530794328216, "grad_norm": 0.12346727401018143, "learning_rate": 0.002, "loss": 2.3445, "step": 348280 }, { "epoch": 1.3463917366362048, "grad_norm": 0.10327605903148651, "learning_rate": 0.002, "loss": 2.3307, "step": 348290 }, { "epoch": 1.346430393839588, "grad_norm": 0.10280732810497284, "learning_rate": 0.002, "loss": 2.3248, "step": 348300 }, { "epoch": 1.3464690510429713, "grad_norm": 0.11782421916723251, "learning_rate": 0.002, "loss": 2.335, "step": 348310 }, { "epoch": 1.3465077082463546, "grad_norm": 0.09593365341424942, "learning_rate": 0.002, "loss": 2.3269, "step": 348320 }, { "epoch": 1.3465463654497378, "grad_norm": 0.09912115335464478, "learning_rate": 0.002, "loss": 2.3268, "step": 348330 }, { "epoch": 1.3465850226531213, "grad_norm": 0.10711175948381424, "learning_rate": 0.002, "loss": 2.3449, "step": 348340 }, { "epoch": 1.3466236798565046, "grad_norm": 0.11354491114616394, "learning_rate": 0.002, "loss": 2.3153, "step": 348350 }, { "epoch": 1.3466623370598878, "grad_norm": 0.11319012194871902, "learning_rate": 0.002, "loss": 2.3163, "step": 348360 }, { "epoch": 1.346700994263271, "grad_norm": 0.1476483941078186, "learning_rate": 0.002, "loss": 2.3318, "step": 348370 }, { "epoch": 1.3467396514666543, "grad_norm": 0.10306962579488754, "learning_rate": 0.002, "loss": 2.3454, "step": 348380 }, { "epoch": 1.3467783086700376, "grad_norm": 0.10173012316226959, "learning_rate": 0.002, "loss": 2.3428, "step": 348390 }, { "epoch": 1.3468169658734208, "grad_norm": 0.0901515930891037, "learning_rate": 0.002, "loss": 2.323, "step": 348400 }, { "epoch": 1.346855623076804, "grad_norm": 0.11575210839509964, "learning_rate": 0.002, "loss": 2.3442, "step": 348410 }, { "epoch": 1.3468942802801873, "grad_norm": 0.11052270233631134, "learning_rate": 0.002, "loss": 2.3356, "step": 348420 }, { "epoch": 1.3469329374835706, "grad_norm": 0.1113486960530281, "learning_rate": 0.002, "loss": 2.3476, "step": 348430 }, { "epoch": 1.346971594686954, "grad_norm": 0.10080469399690628, "learning_rate": 0.002, "loss": 2.3269, "step": 348440 }, { "epoch": 1.3470102518903373, "grad_norm": 0.09699404239654541, "learning_rate": 0.002, "loss": 2.3438, "step": 348450 }, { "epoch": 1.3470489090937205, "grad_norm": 0.09992402046918869, "learning_rate": 0.002, "loss": 2.3285, "step": 348460 }, { "epoch": 1.3470875662971038, "grad_norm": 0.10376543551683426, "learning_rate": 0.002, "loss": 2.3274, "step": 348470 }, { "epoch": 1.347126223500487, "grad_norm": 0.11510486155748367, "learning_rate": 0.002, "loss": 2.3292, "step": 348480 }, { "epoch": 1.3471648807038703, "grad_norm": 0.11083221435546875, "learning_rate": 0.002, "loss": 2.3285, "step": 348490 }, { "epoch": 1.3472035379072536, "grad_norm": 0.12963175773620605, "learning_rate": 0.002, "loss": 2.3315, "step": 348500 }, { "epoch": 1.347242195110637, "grad_norm": 0.1083407923579216, "learning_rate": 0.002, "loss": 2.3316, "step": 348510 }, { "epoch": 1.3472808523140203, "grad_norm": 0.11508948355913162, "learning_rate": 0.002, "loss": 2.3427, "step": 348520 }, { "epoch": 1.3473195095174035, "grad_norm": 0.09246627986431122, "learning_rate": 0.002, "loss": 2.3341, "step": 348530 }, { "epoch": 1.3473581667207868, "grad_norm": 0.10896582156419754, "learning_rate": 0.002, "loss": 2.3436, "step": 348540 }, { "epoch": 1.34739682392417, "grad_norm": 0.10738394409418106, "learning_rate": 0.002, "loss": 2.3455, "step": 348550 }, { "epoch": 1.3474354811275533, "grad_norm": 0.09998328238725662, "learning_rate": 0.002, "loss": 2.3336, "step": 348560 }, { "epoch": 1.3474741383309365, "grad_norm": 0.10204174369573593, "learning_rate": 0.002, "loss": 2.3524, "step": 348570 }, { "epoch": 1.3475127955343198, "grad_norm": 0.11803248524665833, "learning_rate": 0.002, "loss": 2.3335, "step": 348580 }, { "epoch": 1.347551452737703, "grad_norm": 0.10468433797359467, "learning_rate": 0.002, "loss": 2.3319, "step": 348590 }, { "epoch": 1.3475901099410863, "grad_norm": 0.11109494417905807, "learning_rate": 0.002, "loss": 2.3396, "step": 348600 }, { "epoch": 1.3476287671444698, "grad_norm": 0.1056119054555893, "learning_rate": 0.002, "loss": 2.3342, "step": 348610 }, { "epoch": 1.347667424347853, "grad_norm": 0.10069143772125244, "learning_rate": 0.002, "loss": 2.3361, "step": 348620 }, { "epoch": 1.3477060815512363, "grad_norm": 0.12363746762275696, "learning_rate": 0.002, "loss": 2.323, "step": 348630 }, { "epoch": 1.3477447387546195, "grad_norm": 0.11203087866306305, "learning_rate": 0.002, "loss": 2.3405, "step": 348640 }, { "epoch": 1.3477833959580028, "grad_norm": 0.09826168417930603, "learning_rate": 0.002, "loss": 2.3428, "step": 348650 }, { "epoch": 1.347822053161386, "grad_norm": 0.10257137566804886, "learning_rate": 0.002, "loss": 2.3354, "step": 348660 }, { "epoch": 1.3478607103647695, "grad_norm": 0.10058474540710449, "learning_rate": 0.002, "loss": 2.3416, "step": 348670 }, { "epoch": 1.3478993675681528, "grad_norm": 0.10483745485544205, "learning_rate": 0.002, "loss": 2.3272, "step": 348680 }, { "epoch": 1.347938024771536, "grad_norm": 0.12295182794332504, "learning_rate": 0.002, "loss": 2.3383, "step": 348690 }, { "epoch": 1.3479766819749193, "grad_norm": 0.12495746463537216, "learning_rate": 0.002, "loss": 2.3271, "step": 348700 }, { "epoch": 1.3480153391783025, "grad_norm": 0.10037694126367569, "learning_rate": 0.002, "loss": 2.3423, "step": 348710 }, { "epoch": 1.3480539963816858, "grad_norm": 0.10300444066524506, "learning_rate": 0.002, "loss": 2.3327, "step": 348720 }, { "epoch": 1.348092653585069, "grad_norm": 0.11531723290681839, "learning_rate": 0.002, "loss": 2.3462, "step": 348730 }, { "epoch": 1.3481313107884523, "grad_norm": 0.09703969955444336, "learning_rate": 0.002, "loss": 2.3383, "step": 348740 }, { "epoch": 1.3481699679918355, "grad_norm": 0.11398639529943466, "learning_rate": 0.002, "loss": 2.3436, "step": 348750 }, { "epoch": 1.3482086251952188, "grad_norm": 0.09538202732801437, "learning_rate": 0.002, "loss": 2.3361, "step": 348760 }, { "epoch": 1.348247282398602, "grad_norm": 0.10117063671350479, "learning_rate": 0.002, "loss": 2.3383, "step": 348770 }, { "epoch": 1.3482859396019855, "grad_norm": 0.1012919619679451, "learning_rate": 0.002, "loss": 2.3477, "step": 348780 }, { "epoch": 1.3483245968053688, "grad_norm": 0.10244598984718323, "learning_rate": 0.002, "loss": 2.3193, "step": 348790 }, { "epoch": 1.348363254008752, "grad_norm": 0.11705554276704788, "learning_rate": 0.002, "loss": 2.3454, "step": 348800 }, { "epoch": 1.3484019112121353, "grad_norm": 0.10573650896549225, "learning_rate": 0.002, "loss": 2.3242, "step": 348810 }, { "epoch": 1.3484405684155185, "grad_norm": 0.16497986018657684, "learning_rate": 0.002, "loss": 2.3324, "step": 348820 }, { "epoch": 1.3484792256189018, "grad_norm": 0.09472975134849548, "learning_rate": 0.002, "loss": 2.3365, "step": 348830 }, { "epoch": 1.3485178828222852, "grad_norm": 0.12757426500320435, "learning_rate": 0.002, "loss": 2.3341, "step": 348840 }, { "epoch": 1.3485565400256685, "grad_norm": 0.10717236250638962, "learning_rate": 0.002, "loss": 2.3359, "step": 348850 }, { "epoch": 1.3485951972290517, "grad_norm": 0.09877166152000427, "learning_rate": 0.002, "loss": 2.3357, "step": 348860 }, { "epoch": 1.348633854432435, "grad_norm": 0.12250571697950363, "learning_rate": 0.002, "loss": 2.344, "step": 348870 }, { "epoch": 1.3486725116358182, "grad_norm": 0.12091425806283951, "learning_rate": 0.002, "loss": 2.3502, "step": 348880 }, { "epoch": 1.3487111688392015, "grad_norm": 0.1172761395573616, "learning_rate": 0.002, "loss": 2.3395, "step": 348890 }, { "epoch": 1.3487498260425848, "grad_norm": 0.10755904018878937, "learning_rate": 0.002, "loss": 2.3374, "step": 348900 }, { "epoch": 1.348788483245968, "grad_norm": 0.10510231554508209, "learning_rate": 0.002, "loss": 2.3419, "step": 348910 }, { "epoch": 1.3488271404493513, "grad_norm": 0.15903323888778687, "learning_rate": 0.002, "loss": 2.337, "step": 348920 }, { "epoch": 1.3488657976527345, "grad_norm": 0.11088036000728607, "learning_rate": 0.002, "loss": 2.3479, "step": 348930 }, { "epoch": 1.3489044548561178, "grad_norm": 0.16186174750328064, "learning_rate": 0.002, "loss": 2.3404, "step": 348940 }, { "epoch": 1.3489431120595012, "grad_norm": 0.09969766438007355, "learning_rate": 0.002, "loss": 2.3333, "step": 348950 }, { "epoch": 1.3489817692628845, "grad_norm": 0.10213224589824677, "learning_rate": 0.002, "loss": 2.3371, "step": 348960 }, { "epoch": 1.3490204264662677, "grad_norm": 0.10633454471826553, "learning_rate": 0.002, "loss": 2.3461, "step": 348970 }, { "epoch": 1.349059083669651, "grad_norm": 0.09949535876512527, "learning_rate": 0.002, "loss": 2.3403, "step": 348980 }, { "epoch": 1.3490977408730342, "grad_norm": 0.10401295125484467, "learning_rate": 0.002, "loss": 2.3283, "step": 348990 }, { "epoch": 1.3491363980764175, "grad_norm": 0.1099301129579544, "learning_rate": 0.002, "loss": 2.3343, "step": 349000 }, { "epoch": 1.349175055279801, "grad_norm": 0.1048002764582634, "learning_rate": 0.002, "loss": 2.34, "step": 349010 }, { "epoch": 1.3492137124831842, "grad_norm": 0.14082182943820953, "learning_rate": 0.002, "loss": 2.3221, "step": 349020 }, { "epoch": 1.3492523696865675, "grad_norm": 0.09485018253326416, "learning_rate": 0.002, "loss": 2.3452, "step": 349030 }, { "epoch": 1.3492910268899507, "grad_norm": 0.11179543286561966, "learning_rate": 0.002, "loss": 2.3451, "step": 349040 }, { "epoch": 1.349329684093334, "grad_norm": 0.1056070551276207, "learning_rate": 0.002, "loss": 2.3311, "step": 349050 }, { "epoch": 1.3493683412967172, "grad_norm": 0.09375675767660141, "learning_rate": 0.002, "loss": 2.3512, "step": 349060 }, { "epoch": 1.3494069985001005, "grad_norm": 0.09856487810611725, "learning_rate": 0.002, "loss": 2.323, "step": 349070 }, { "epoch": 1.3494456557034837, "grad_norm": 0.10716529190540314, "learning_rate": 0.002, "loss": 2.3399, "step": 349080 }, { "epoch": 1.349484312906867, "grad_norm": 0.09925325959920883, "learning_rate": 0.002, "loss": 2.3397, "step": 349090 }, { "epoch": 1.3495229701102502, "grad_norm": 0.10733172297477722, "learning_rate": 0.002, "loss": 2.3316, "step": 349100 }, { "epoch": 1.3495616273136335, "grad_norm": 0.11140187084674835, "learning_rate": 0.002, "loss": 2.3233, "step": 349110 }, { "epoch": 1.349600284517017, "grad_norm": 0.12716157734394073, "learning_rate": 0.002, "loss": 2.3359, "step": 349120 }, { "epoch": 1.3496389417204002, "grad_norm": 0.11385093629360199, "learning_rate": 0.002, "loss": 2.3286, "step": 349130 }, { "epoch": 1.3496775989237835, "grad_norm": 0.10158204287290573, "learning_rate": 0.002, "loss": 2.3482, "step": 349140 }, { "epoch": 1.3497162561271667, "grad_norm": 0.11852376908063889, "learning_rate": 0.002, "loss": 2.3176, "step": 349150 }, { "epoch": 1.34975491333055, "grad_norm": 0.10581693798303604, "learning_rate": 0.002, "loss": 2.3354, "step": 349160 }, { "epoch": 1.3497935705339332, "grad_norm": 0.10468754917383194, "learning_rate": 0.002, "loss": 2.3288, "step": 349170 }, { "epoch": 1.3498322277373167, "grad_norm": 0.09875061362981796, "learning_rate": 0.002, "loss": 2.332, "step": 349180 }, { "epoch": 1.3498708849407, "grad_norm": 0.11517211049795151, "learning_rate": 0.002, "loss": 2.3345, "step": 349190 }, { "epoch": 1.3499095421440832, "grad_norm": 0.10867352038621902, "learning_rate": 0.002, "loss": 2.3357, "step": 349200 }, { "epoch": 1.3499481993474665, "grad_norm": 0.09441282600164413, "learning_rate": 0.002, "loss": 2.3426, "step": 349210 }, { "epoch": 1.3499868565508497, "grad_norm": 0.1236996278166771, "learning_rate": 0.002, "loss": 2.3276, "step": 349220 }, { "epoch": 1.350025513754233, "grad_norm": 0.09415584057569504, "learning_rate": 0.002, "loss": 2.3414, "step": 349230 }, { "epoch": 1.3500641709576162, "grad_norm": 0.08901593089103699, "learning_rate": 0.002, "loss": 2.3219, "step": 349240 }, { "epoch": 1.3501028281609995, "grad_norm": 0.10068287700414658, "learning_rate": 0.002, "loss": 2.3232, "step": 349250 }, { "epoch": 1.3501414853643827, "grad_norm": 0.09829511493444443, "learning_rate": 0.002, "loss": 2.3252, "step": 349260 }, { "epoch": 1.350180142567766, "grad_norm": 0.10522589832544327, "learning_rate": 0.002, "loss": 2.33, "step": 349270 }, { "epoch": 1.3502187997711492, "grad_norm": 0.1001722514629364, "learning_rate": 0.002, "loss": 2.3386, "step": 349280 }, { "epoch": 1.3502574569745327, "grad_norm": 0.11357532441616058, "learning_rate": 0.002, "loss": 2.3383, "step": 349290 }, { "epoch": 1.350296114177916, "grad_norm": 0.10763426870107651, "learning_rate": 0.002, "loss": 2.3322, "step": 349300 }, { "epoch": 1.3503347713812992, "grad_norm": 0.12531541287899017, "learning_rate": 0.002, "loss": 2.3317, "step": 349310 }, { "epoch": 1.3503734285846825, "grad_norm": 0.09431840479373932, "learning_rate": 0.002, "loss": 2.3356, "step": 349320 }, { "epoch": 1.3504120857880657, "grad_norm": 0.10716231912374496, "learning_rate": 0.002, "loss": 2.3191, "step": 349330 }, { "epoch": 1.350450742991449, "grad_norm": 0.13337059319019318, "learning_rate": 0.002, "loss": 2.3341, "step": 349340 }, { "epoch": 1.3504894001948324, "grad_norm": 0.09658301621675491, "learning_rate": 0.002, "loss": 2.3278, "step": 349350 }, { "epoch": 1.3505280573982157, "grad_norm": 0.11998984217643738, "learning_rate": 0.002, "loss": 2.348, "step": 349360 }, { "epoch": 1.350566714601599, "grad_norm": 0.22490796446800232, "learning_rate": 0.002, "loss": 2.3361, "step": 349370 }, { "epoch": 1.3506053718049822, "grad_norm": 0.12542197108268738, "learning_rate": 0.002, "loss": 2.3457, "step": 349380 }, { "epoch": 1.3506440290083654, "grad_norm": 0.101040780544281, "learning_rate": 0.002, "loss": 2.3444, "step": 349390 }, { "epoch": 1.3506826862117487, "grad_norm": 0.1157328262925148, "learning_rate": 0.002, "loss": 2.3522, "step": 349400 }, { "epoch": 1.350721343415132, "grad_norm": 0.10763093084096909, "learning_rate": 0.002, "loss": 2.332, "step": 349410 }, { "epoch": 1.3507600006185152, "grad_norm": 0.10246003419160843, "learning_rate": 0.002, "loss": 2.3328, "step": 349420 }, { "epoch": 1.3507986578218985, "grad_norm": 0.09681911766529083, "learning_rate": 0.002, "loss": 2.3248, "step": 349430 }, { "epoch": 1.3508373150252817, "grad_norm": 0.10902806371450424, "learning_rate": 0.002, "loss": 2.34, "step": 349440 }, { "epoch": 1.350875972228665, "grad_norm": 0.08912651985883713, "learning_rate": 0.002, "loss": 2.3392, "step": 349450 }, { "epoch": 1.3509146294320484, "grad_norm": 0.11318572610616684, "learning_rate": 0.002, "loss": 2.3239, "step": 349460 }, { "epoch": 1.3509532866354317, "grad_norm": 0.12583884596824646, "learning_rate": 0.002, "loss": 2.321, "step": 349470 }, { "epoch": 1.350991943838815, "grad_norm": 0.10462572425603867, "learning_rate": 0.002, "loss": 2.3498, "step": 349480 }, { "epoch": 1.3510306010421982, "grad_norm": 0.1022852286696434, "learning_rate": 0.002, "loss": 2.3612, "step": 349490 }, { "epoch": 1.3510692582455814, "grad_norm": 0.11671693623065948, "learning_rate": 0.002, "loss": 2.3121, "step": 349500 }, { "epoch": 1.3511079154489647, "grad_norm": 0.10456795245409012, "learning_rate": 0.002, "loss": 2.3171, "step": 349510 }, { "epoch": 1.3511465726523482, "grad_norm": 0.10208845138549805, "learning_rate": 0.002, "loss": 2.345, "step": 349520 }, { "epoch": 1.3511852298557314, "grad_norm": 0.09636218845844269, "learning_rate": 0.002, "loss": 2.3327, "step": 349530 }, { "epoch": 1.3512238870591147, "grad_norm": 0.11306154727935791, "learning_rate": 0.002, "loss": 2.3145, "step": 349540 }, { "epoch": 1.351262544262498, "grad_norm": 0.21951696276664734, "learning_rate": 0.002, "loss": 2.3294, "step": 349550 }, { "epoch": 1.3513012014658812, "grad_norm": 0.11239375919103622, "learning_rate": 0.002, "loss": 2.3414, "step": 349560 }, { "epoch": 1.3513398586692644, "grad_norm": 0.10407473891973495, "learning_rate": 0.002, "loss": 2.3247, "step": 349570 }, { "epoch": 1.3513785158726477, "grad_norm": 0.09843426942825317, "learning_rate": 0.002, "loss": 2.338, "step": 349580 }, { "epoch": 1.351417173076031, "grad_norm": 0.1171356588602066, "learning_rate": 0.002, "loss": 2.3473, "step": 349590 }, { "epoch": 1.3514558302794142, "grad_norm": 0.10250692814588547, "learning_rate": 0.002, "loss": 2.3317, "step": 349600 }, { "epoch": 1.3514944874827974, "grad_norm": 0.12931481003761292, "learning_rate": 0.002, "loss": 2.3309, "step": 349610 }, { "epoch": 1.3515331446861807, "grad_norm": 0.11812961846590042, "learning_rate": 0.002, "loss": 2.3226, "step": 349620 }, { "epoch": 1.3515718018895642, "grad_norm": 0.09838510304689407, "learning_rate": 0.002, "loss": 2.3365, "step": 349630 }, { "epoch": 1.3516104590929474, "grad_norm": 0.08915498852729797, "learning_rate": 0.002, "loss": 2.3417, "step": 349640 }, { "epoch": 1.3516491162963307, "grad_norm": 0.11878800392150879, "learning_rate": 0.002, "loss": 2.3316, "step": 349650 }, { "epoch": 1.351687773499714, "grad_norm": 0.10394832491874695, "learning_rate": 0.002, "loss": 2.3302, "step": 349660 }, { "epoch": 1.3517264307030972, "grad_norm": 0.12385991215705872, "learning_rate": 0.002, "loss": 2.3431, "step": 349670 }, { "epoch": 1.3517650879064804, "grad_norm": 0.1123647391796112, "learning_rate": 0.002, "loss": 2.3504, "step": 349680 }, { "epoch": 1.351803745109864, "grad_norm": 0.10797573626041412, "learning_rate": 0.002, "loss": 2.3358, "step": 349690 }, { "epoch": 1.3518424023132471, "grad_norm": 0.10810215026140213, "learning_rate": 0.002, "loss": 2.3324, "step": 349700 }, { "epoch": 1.3518810595166304, "grad_norm": 0.08901609480381012, "learning_rate": 0.002, "loss": 2.3245, "step": 349710 }, { "epoch": 1.3519197167200137, "grad_norm": 0.11496555805206299, "learning_rate": 0.002, "loss": 2.3274, "step": 349720 }, { "epoch": 1.351958373923397, "grad_norm": 0.10802195221185684, "learning_rate": 0.002, "loss": 2.3258, "step": 349730 }, { "epoch": 1.3519970311267802, "grad_norm": 0.11436771601438522, "learning_rate": 0.002, "loss": 2.3452, "step": 349740 }, { "epoch": 1.3520356883301634, "grad_norm": 0.09937600791454315, "learning_rate": 0.002, "loss": 2.3317, "step": 349750 }, { "epoch": 1.3520743455335467, "grad_norm": 0.09579189121723175, "learning_rate": 0.002, "loss": 2.3352, "step": 349760 }, { "epoch": 1.35211300273693, "grad_norm": 0.11161386221647263, "learning_rate": 0.002, "loss": 2.3445, "step": 349770 }, { "epoch": 1.3521516599403132, "grad_norm": 0.10039962083101273, "learning_rate": 0.002, "loss": 2.3334, "step": 349780 }, { "epoch": 1.3521903171436964, "grad_norm": 0.11212499439716339, "learning_rate": 0.002, "loss": 2.3457, "step": 349790 }, { "epoch": 1.35222897434708, "grad_norm": 0.11101280152797699, "learning_rate": 0.002, "loss": 2.3377, "step": 349800 }, { "epoch": 1.3522676315504631, "grad_norm": 0.10846508294343948, "learning_rate": 0.002, "loss": 2.3376, "step": 349810 }, { "epoch": 1.3523062887538464, "grad_norm": 0.09751371294260025, "learning_rate": 0.002, "loss": 2.3367, "step": 349820 }, { "epoch": 1.3523449459572296, "grad_norm": 0.09648773819208145, "learning_rate": 0.002, "loss": 2.342, "step": 349830 }, { "epoch": 1.352383603160613, "grad_norm": 0.08779910206794739, "learning_rate": 0.002, "loss": 2.3292, "step": 349840 }, { "epoch": 1.3524222603639962, "grad_norm": 0.10542741417884827, "learning_rate": 0.002, "loss": 2.3242, "step": 349850 }, { "epoch": 1.3524609175673796, "grad_norm": 0.09809595346450806, "learning_rate": 0.002, "loss": 2.3201, "step": 349860 }, { "epoch": 1.3524995747707629, "grad_norm": 0.11002374440431595, "learning_rate": 0.002, "loss": 2.3364, "step": 349870 }, { "epoch": 1.3525382319741461, "grad_norm": 0.11683285981416702, "learning_rate": 0.002, "loss": 2.3265, "step": 349880 }, { "epoch": 1.3525768891775294, "grad_norm": 0.6189273595809937, "learning_rate": 0.002, "loss": 2.3243, "step": 349890 }, { "epoch": 1.3526155463809126, "grad_norm": 0.12348686903715134, "learning_rate": 0.002, "loss": 2.3372, "step": 349900 }, { "epoch": 1.3526542035842959, "grad_norm": 0.11133924126625061, "learning_rate": 0.002, "loss": 2.3446, "step": 349910 }, { "epoch": 1.3526928607876791, "grad_norm": 0.13373364508152008, "learning_rate": 0.002, "loss": 2.34, "step": 349920 }, { "epoch": 1.3527315179910624, "grad_norm": 0.10956840217113495, "learning_rate": 0.002, "loss": 2.3155, "step": 349930 }, { "epoch": 1.3527701751944456, "grad_norm": 0.09929367899894714, "learning_rate": 0.002, "loss": 2.3333, "step": 349940 }, { "epoch": 1.352808832397829, "grad_norm": 0.12658318877220154, "learning_rate": 0.002, "loss": 2.3288, "step": 349950 }, { "epoch": 1.3528474896012124, "grad_norm": 0.09387831389904022, "learning_rate": 0.002, "loss": 2.3361, "step": 349960 }, { "epoch": 1.3528861468045956, "grad_norm": 0.09798414260149002, "learning_rate": 0.002, "loss": 2.3318, "step": 349970 }, { "epoch": 1.3529248040079789, "grad_norm": 0.11518450081348419, "learning_rate": 0.002, "loss": 2.3409, "step": 349980 }, { "epoch": 1.3529634612113621, "grad_norm": 0.10312693566083908, "learning_rate": 0.002, "loss": 2.3277, "step": 349990 }, { "epoch": 1.3530021184147454, "grad_norm": 0.10474686324596405, "learning_rate": 0.002, "loss": 2.3257, "step": 350000 }, { "epoch": 1.3530407756181286, "grad_norm": 0.11365187913179398, "learning_rate": 0.002, "loss": 2.3427, "step": 350010 }, { "epoch": 1.3530794328215119, "grad_norm": 0.12246767431497574, "learning_rate": 0.002, "loss": 2.3287, "step": 350020 }, { "epoch": 1.3531180900248954, "grad_norm": 0.09271487593650818, "learning_rate": 0.002, "loss": 2.347, "step": 350030 }, { "epoch": 1.3531567472282786, "grad_norm": 0.11318147927522659, "learning_rate": 0.002, "loss": 2.3363, "step": 350040 }, { "epoch": 1.3531954044316619, "grad_norm": 0.11305706202983856, "learning_rate": 0.002, "loss": 2.3341, "step": 350050 }, { "epoch": 1.3532340616350451, "grad_norm": 0.08688071370124817, "learning_rate": 0.002, "loss": 2.3336, "step": 350060 }, { "epoch": 1.3532727188384284, "grad_norm": 0.09745533764362335, "learning_rate": 0.002, "loss": 2.3367, "step": 350070 }, { "epoch": 1.3533113760418116, "grad_norm": 0.09462425112724304, "learning_rate": 0.002, "loss": 2.336, "step": 350080 }, { "epoch": 1.3533500332451949, "grad_norm": 0.09649679064750671, "learning_rate": 0.002, "loss": 2.3311, "step": 350090 }, { "epoch": 1.3533886904485781, "grad_norm": 0.09767809510231018, "learning_rate": 0.002, "loss": 2.344, "step": 350100 }, { "epoch": 1.3534273476519614, "grad_norm": 0.09438911825418472, "learning_rate": 0.002, "loss": 2.3396, "step": 350110 }, { "epoch": 1.3534660048553446, "grad_norm": 0.10685234516859055, "learning_rate": 0.002, "loss": 2.3276, "step": 350120 }, { "epoch": 1.353504662058728, "grad_norm": 0.10139115899801254, "learning_rate": 0.002, "loss": 2.3234, "step": 350130 }, { "epoch": 1.3535433192621114, "grad_norm": 0.09849409013986588, "learning_rate": 0.002, "loss": 2.3377, "step": 350140 }, { "epoch": 1.3535819764654946, "grad_norm": 0.09482110291719437, "learning_rate": 0.002, "loss": 2.3154, "step": 350150 }, { "epoch": 1.3536206336688779, "grad_norm": 0.12369535118341446, "learning_rate": 0.002, "loss": 2.338, "step": 350160 }, { "epoch": 1.3536592908722611, "grad_norm": 0.11014200001955032, "learning_rate": 0.002, "loss": 2.3296, "step": 350170 }, { "epoch": 1.3536979480756444, "grad_norm": 0.10463564097881317, "learning_rate": 0.002, "loss": 2.3369, "step": 350180 }, { "epoch": 1.3537366052790276, "grad_norm": 0.09717349708080292, "learning_rate": 0.002, "loss": 2.3234, "step": 350190 }, { "epoch": 1.353775262482411, "grad_norm": 0.09717091917991638, "learning_rate": 0.002, "loss": 2.3395, "step": 350200 }, { "epoch": 1.3538139196857943, "grad_norm": 0.09814690798521042, "learning_rate": 0.002, "loss": 2.335, "step": 350210 }, { "epoch": 1.3538525768891776, "grad_norm": 0.11123622953891754, "learning_rate": 0.002, "loss": 2.3142, "step": 350220 }, { "epoch": 1.3538912340925608, "grad_norm": 0.11225827038288116, "learning_rate": 0.002, "loss": 2.34, "step": 350230 }, { "epoch": 1.353929891295944, "grad_norm": 0.09374217689037323, "learning_rate": 0.002, "loss": 2.3523, "step": 350240 }, { "epoch": 1.3539685484993274, "grad_norm": 0.1104871854186058, "learning_rate": 0.002, "loss": 2.3358, "step": 350250 }, { "epoch": 1.3540072057027106, "grad_norm": 0.09894196689128876, "learning_rate": 0.002, "loss": 2.3389, "step": 350260 }, { "epoch": 1.3540458629060939, "grad_norm": 0.12607525289058685, "learning_rate": 0.002, "loss": 2.3378, "step": 350270 }, { "epoch": 1.354084520109477, "grad_norm": 0.11109902709722519, "learning_rate": 0.002, "loss": 2.3312, "step": 350280 }, { "epoch": 1.3541231773128604, "grad_norm": 0.10130560398101807, "learning_rate": 0.002, "loss": 2.3347, "step": 350290 }, { "epoch": 1.3541618345162438, "grad_norm": 0.09814395010471344, "learning_rate": 0.002, "loss": 2.332, "step": 350300 }, { "epoch": 1.354200491719627, "grad_norm": 0.10805349797010422, "learning_rate": 0.002, "loss": 2.3342, "step": 350310 }, { "epoch": 1.3542391489230103, "grad_norm": 0.09742667526006699, "learning_rate": 0.002, "loss": 2.3277, "step": 350320 }, { "epoch": 1.3542778061263936, "grad_norm": 0.10429896414279938, "learning_rate": 0.002, "loss": 2.3495, "step": 350330 }, { "epoch": 1.3543164633297768, "grad_norm": 0.09611230343580246, "learning_rate": 0.002, "loss": 2.3272, "step": 350340 }, { "epoch": 1.35435512053316, "grad_norm": 0.23421809077262878, "learning_rate": 0.002, "loss": 2.3268, "step": 350350 }, { "epoch": 1.3543937777365433, "grad_norm": 0.10822147876024246, "learning_rate": 0.002, "loss": 2.3485, "step": 350360 }, { "epoch": 1.3544324349399268, "grad_norm": 0.10089278966188431, "learning_rate": 0.002, "loss": 2.335, "step": 350370 }, { "epoch": 1.35447109214331, "grad_norm": 0.11248739063739777, "learning_rate": 0.002, "loss": 2.363, "step": 350380 }, { "epoch": 1.3545097493466933, "grad_norm": 0.12073154747486115, "learning_rate": 0.002, "loss": 2.3257, "step": 350390 }, { "epoch": 1.3545484065500766, "grad_norm": 0.10004567354917526, "learning_rate": 0.002, "loss": 2.3454, "step": 350400 }, { "epoch": 1.3545870637534598, "grad_norm": 0.12040378898382187, "learning_rate": 0.002, "loss": 2.3298, "step": 350410 }, { "epoch": 1.354625720956843, "grad_norm": 0.09634535014629364, "learning_rate": 0.002, "loss": 2.338, "step": 350420 }, { "epoch": 1.3546643781602263, "grad_norm": 0.10254807770252228, "learning_rate": 0.002, "loss": 2.3324, "step": 350430 }, { "epoch": 1.3547030353636096, "grad_norm": 0.11771095544099808, "learning_rate": 0.002, "loss": 2.324, "step": 350440 }, { "epoch": 1.3547416925669928, "grad_norm": 0.10953339189291, "learning_rate": 0.002, "loss": 2.3247, "step": 350450 }, { "epoch": 1.354780349770376, "grad_norm": 0.12906228005886078, "learning_rate": 0.002, "loss": 2.332, "step": 350460 }, { "epoch": 1.3548190069737596, "grad_norm": 0.10269488394260406, "learning_rate": 0.002, "loss": 2.3295, "step": 350470 }, { "epoch": 1.3548576641771428, "grad_norm": 0.10697735846042633, "learning_rate": 0.002, "loss": 2.346, "step": 350480 }, { "epoch": 1.354896321380526, "grad_norm": 0.09774033725261688, "learning_rate": 0.002, "loss": 2.3365, "step": 350490 }, { "epoch": 1.3549349785839093, "grad_norm": 0.1603395640850067, "learning_rate": 0.002, "loss": 2.3243, "step": 350500 }, { "epoch": 1.3549736357872926, "grad_norm": 0.12372355908155441, "learning_rate": 0.002, "loss": 2.3399, "step": 350510 }, { "epoch": 1.3550122929906758, "grad_norm": 0.11275654286146164, "learning_rate": 0.002, "loss": 2.3269, "step": 350520 }, { "epoch": 1.355050950194059, "grad_norm": 0.09468483924865723, "learning_rate": 0.002, "loss": 2.3233, "step": 350530 }, { "epoch": 1.3550896073974426, "grad_norm": 0.10514874756336212, "learning_rate": 0.002, "loss": 2.3361, "step": 350540 }, { "epoch": 1.3551282646008258, "grad_norm": 0.10283022373914719, "learning_rate": 0.002, "loss": 2.3371, "step": 350550 }, { "epoch": 1.355166921804209, "grad_norm": 0.09229005128145218, "learning_rate": 0.002, "loss": 2.3272, "step": 350560 }, { "epoch": 1.3552055790075923, "grad_norm": 0.14236804842948914, "learning_rate": 0.002, "loss": 2.3415, "step": 350570 }, { "epoch": 1.3552442362109756, "grad_norm": 0.09927023947238922, "learning_rate": 0.002, "loss": 2.322, "step": 350580 }, { "epoch": 1.3552828934143588, "grad_norm": 0.09392958134412766, "learning_rate": 0.002, "loss": 2.3481, "step": 350590 }, { "epoch": 1.355321550617742, "grad_norm": 0.11411365866661072, "learning_rate": 0.002, "loss": 2.3282, "step": 350600 }, { "epoch": 1.3553602078211253, "grad_norm": 0.09241783618927002, "learning_rate": 0.002, "loss": 2.3481, "step": 350610 }, { "epoch": 1.3553988650245086, "grad_norm": 0.08613024652004242, "learning_rate": 0.002, "loss": 2.3314, "step": 350620 }, { "epoch": 1.3554375222278918, "grad_norm": 0.10598557442426682, "learning_rate": 0.002, "loss": 2.3264, "step": 350630 }, { "epoch": 1.3554761794312753, "grad_norm": 0.09892724454402924, "learning_rate": 0.002, "loss": 2.3425, "step": 350640 }, { "epoch": 1.3555148366346585, "grad_norm": 0.09699424356222153, "learning_rate": 0.002, "loss": 2.3291, "step": 350650 }, { "epoch": 1.3555534938380418, "grad_norm": 0.11693083494901657, "learning_rate": 0.002, "loss": 2.3176, "step": 350660 }, { "epoch": 1.355592151041425, "grad_norm": 0.09478957206010818, "learning_rate": 0.002, "loss": 2.3399, "step": 350670 }, { "epoch": 1.3556308082448083, "grad_norm": 0.10289830714464188, "learning_rate": 0.002, "loss": 2.3435, "step": 350680 }, { "epoch": 1.3556694654481916, "grad_norm": 0.09411660581827164, "learning_rate": 0.002, "loss": 2.3183, "step": 350690 }, { "epoch": 1.355708122651575, "grad_norm": 0.1026243194937706, "learning_rate": 0.002, "loss": 2.3442, "step": 350700 }, { "epoch": 1.3557467798549583, "grad_norm": 0.11500430107116699, "learning_rate": 0.002, "loss": 2.3358, "step": 350710 }, { "epoch": 1.3557854370583415, "grad_norm": 0.105681873857975, "learning_rate": 0.002, "loss": 2.3404, "step": 350720 }, { "epoch": 1.3558240942617248, "grad_norm": 0.08912233263254166, "learning_rate": 0.002, "loss": 2.3496, "step": 350730 }, { "epoch": 1.355862751465108, "grad_norm": 0.0909724161028862, "learning_rate": 0.002, "loss": 2.3283, "step": 350740 }, { "epoch": 1.3559014086684913, "grad_norm": 0.10319610685110092, "learning_rate": 0.002, "loss": 2.3455, "step": 350750 }, { "epoch": 1.3559400658718745, "grad_norm": 0.10236437618732452, "learning_rate": 0.002, "loss": 2.3341, "step": 350760 }, { "epoch": 1.3559787230752578, "grad_norm": 0.09389732778072357, "learning_rate": 0.002, "loss": 2.3173, "step": 350770 }, { "epoch": 1.356017380278641, "grad_norm": 0.1131930947303772, "learning_rate": 0.002, "loss": 2.3475, "step": 350780 }, { "epoch": 1.3560560374820243, "grad_norm": 0.10111220926046371, "learning_rate": 0.002, "loss": 2.3411, "step": 350790 }, { "epoch": 1.3560946946854076, "grad_norm": 0.09880499541759491, "learning_rate": 0.002, "loss": 2.3326, "step": 350800 }, { "epoch": 1.356133351888791, "grad_norm": 0.09543951600790024, "learning_rate": 0.002, "loss": 2.3104, "step": 350810 }, { "epoch": 1.3561720090921743, "grad_norm": 0.10903646051883698, "learning_rate": 0.002, "loss": 2.326, "step": 350820 }, { "epoch": 1.3562106662955575, "grad_norm": 0.09331028163433075, "learning_rate": 0.002, "loss": 2.3232, "step": 350830 }, { "epoch": 1.3562493234989408, "grad_norm": 0.13966050744056702, "learning_rate": 0.002, "loss": 2.3229, "step": 350840 }, { "epoch": 1.356287980702324, "grad_norm": 0.09700989723205566, "learning_rate": 0.002, "loss": 2.3331, "step": 350850 }, { "epoch": 1.3563266379057073, "grad_norm": 0.0926334485411644, "learning_rate": 0.002, "loss": 2.3391, "step": 350860 }, { "epoch": 1.3563652951090908, "grad_norm": 0.09399153292179108, "learning_rate": 0.002, "loss": 2.338, "step": 350870 }, { "epoch": 1.356403952312474, "grad_norm": 0.1077730804681778, "learning_rate": 0.002, "loss": 2.3422, "step": 350880 }, { "epoch": 1.3564426095158573, "grad_norm": 0.1043611541390419, "learning_rate": 0.002, "loss": 2.3338, "step": 350890 }, { "epoch": 1.3564812667192405, "grad_norm": 0.10705311596393585, "learning_rate": 0.002, "loss": 2.3356, "step": 350900 }, { "epoch": 1.3565199239226238, "grad_norm": 0.11739810556173325, "learning_rate": 0.002, "loss": 2.3411, "step": 350910 }, { "epoch": 1.356558581126007, "grad_norm": 0.10188135504722595, "learning_rate": 0.002, "loss": 2.3425, "step": 350920 }, { "epoch": 1.3565972383293903, "grad_norm": 0.11405863612890244, "learning_rate": 0.002, "loss": 2.3247, "step": 350930 }, { "epoch": 1.3566358955327735, "grad_norm": 0.10311403125524521, "learning_rate": 0.002, "loss": 2.3297, "step": 350940 }, { "epoch": 1.3566745527361568, "grad_norm": 0.11608370393514633, "learning_rate": 0.002, "loss": 2.3389, "step": 350950 }, { "epoch": 1.35671320993954, "grad_norm": 0.10479841381311417, "learning_rate": 0.002, "loss": 2.3288, "step": 350960 }, { "epoch": 1.3567518671429233, "grad_norm": 0.1082431823015213, "learning_rate": 0.002, "loss": 2.3302, "step": 350970 }, { "epoch": 1.3567905243463068, "grad_norm": 0.12166009098291397, "learning_rate": 0.002, "loss": 2.3499, "step": 350980 }, { "epoch": 1.35682918154969, "grad_norm": 0.10090559720993042, "learning_rate": 0.002, "loss": 2.3483, "step": 350990 }, { "epoch": 1.3568678387530733, "grad_norm": 0.11029312014579773, "learning_rate": 0.002, "loss": 2.3343, "step": 351000 }, { "epoch": 1.3569064959564565, "grad_norm": 0.11490152031183243, "learning_rate": 0.002, "loss": 2.3299, "step": 351010 }, { "epoch": 1.3569451531598398, "grad_norm": 0.10307451337575912, "learning_rate": 0.002, "loss": 2.326, "step": 351020 }, { "epoch": 1.356983810363223, "grad_norm": 0.14087946712970734, "learning_rate": 0.002, "loss": 2.3431, "step": 351030 }, { "epoch": 1.3570224675666065, "grad_norm": 0.09875258058309555, "learning_rate": 0.002, "loss": 2.3461, "step": 351040 }, { "epoch": 1.3570611247699897, "grad_norm": 0.10351992398500443, "learning_rate": 0.002, "loss": 2.3374, "step": 351050 }, { "epoch": 1.357099781973373, "grad_norm": 0.10319630056619644, "learning_rate": 0.002, "loss": 2.3239, "step": 351060 }, { "epoch": 1.3571384391767563, "grad_norm": 0.12239965796470642, "learning_rate": 0.002, "loss": 2.3386, "step": 351070 }, { "epoch": 1.3571770963801395, "grad_norm": 0.10036638379096985, "learning_rate": 0.002, "loss": 2.3317, "step": 351080 }, { "epoch": 1.3572157535835228, "grad_norm": 0.13001009821891785, "learning_rate": 0.002, "loss": 2.3309, "step": 351090 }, { "epoch": 1.357254410786906, "grad_norm": 0.12365800142288208, "learning_rate": 0.002, "loss": 2.3307, "step": 351100 }, { "epoch": 1.3572930679902893, "grad_norm": 0.10206878185272217, "learning_rate": 0.002, "loss": 2.3388, "step": 351110 }, { "epoch": 1.3573317251936725, "grad_norm": 0.11229734122753143, "learning_rate": 0.002, "loss": 2.325, "step": 351120 }, { "epoch": 1.3573703823970558, "grad_norm": 0.1121644452214241, "learning_rate": 0.002, "loss": 2.3288, "step": 351130 }, { "epoch": 1.357409039600439, "grad_norm": 0.10506831109523773, "learning_rate": 0.002, "loss": 2.346, "step": 351140 }, { "epoch": 1.3574476968038225, "grad_norm": 0.10747390240430832, "learning_rate": 0.002, "loss": 2.3302, "step": 351150 }, { "epoch": 1.3574863540072057, "grad_norm": 0.10540571063756943, "learning_rate": 0.002, "loss": 2.3395, "step": 351160 }, { "epoch": 1.357525011210589, "grad_norm": 0.1473851203918457, "learning_rate": 0.002, "loss": 2.3311, "step": 351170 }, { "epoch": 1.3575636684139722, "grad_norm": 0.101223424077034, "learning_rate": 0.002, "loss": 2.3302, "step": 351180 }, { "epoch": 1.3576023256173555, "grad_norm": 0.09359554946422577, "learning_rate": 0.002, "loss": 2.3191, "step": 351190 }, { "epoch": 1.3576409828207388, "grad_norm": 0.1014396995306015, "learning_rate": 0.002, "loss": 2.331, "step": 351200 }, { "epoch": 1.3576796400241222, "grad_norm": 0.11498954892158508, "learning_rate": 0.002, "loss": 2.3426, "step": 351210 }, { "epoch": 1.3577182972275055, "grad_norm": 0.11524534970521927, "learning_rate": 0.002, "loss": 2.3313, "step": 351220 }, { "epoch": 1.3577569544308887, "grad_norm": 0.13116922974586487, "learning_rate": 0.002, "loss": 2.321, "step": 351230 }, { "epoch": 1.357795611634272, "grad_norm": 0.09713736921548843, "learning_rate": 0.002, "loss": 2.3309, "step": 351240 }, { "epoch": 1.3578342688376552, "grad_norm": 0.09525876492261887, "learning_rate": 0.002, "loss": 2.3424, "step": 351250 }, { "epoch": 1.3578729260410385, "grad_norm": 0.12362363934516907, "learning_rate": 0.002, "loss": 2.3425, "step": 351260 }, { "epoch": 1.3579115832444217, "grad_norm": 0.1071556881070137, "learning_rate": 0.002, "loss": 2.3359, "step": 351270 }, { "epoch": 1.357950240447805, "grad_norm": 0.09683693200349808, "learning_rate": 0.002, "loss": 2.3413, "step": 351280 }, { "epoch": 1.3579888976511882, "grad_norm": 0.10740751773118973, "learning_rate": 0.002, "loss": 2.3289, "step": 351290 }, { "epoch": 1.3580275548545715, "grad_norm": 0.09097153693437576, "learning_rate": 0.002, "loss": 2.3352, "step": 351300 }, { "epoch": 1.3580662120579547, "grad_norm": 0.1114463284611702, "learning_rate": 0.002, "loss": 2.3206, "step": 351310 }, { "epoch": 1.3581048692613382, "grad_norm": 0.14950144290924072, "learning_rate": 0.002, "loss": 2.3312, "step": 351320 }, { "epoch": 1.3581435264647215, "grad_norm": 0.08972033858299255, "learning_rate": 0.002, "loss": 2.3173, "step": 351330 }, { "epoch": 1.3581821836681047, "grad_norm": 0.1413266509771347, "learning_rate": 0.002, "loss": 2.3444, "step": 351340 }, { "epoch": 1.358220840871488, "grad_norm": 0.11101408302783966, "learning_rate": 0.002, "loss": 2.3421, "step": 351350 }, { "epoch": 1.3582594980748712, "grad_norm": 0.09727130085229874, "learning_rate": 0.002, "loss": 2.3429, "step": 351360 }, { "epoch": 1.3582981552782545, "grad_norm": 0.11083702743053436, "learning_rate": 0.002, "loss": 2.3249, "step": 351370 }, { "epoch": 1.358336812481638, "grad_norm": 0.09823834896087646, "learning_rate": 0.002, "loss": 2.3385, "step": 351380 }, { "epoch": 1.3583754696850212, "grad_norm": 0.11632708460092545, "learning_rate": 0.002, "loss": 2.3349, "step": 351390 }, { "epoch": 1.3584141268884045, "grad_norm": 0.13511040806770325, "learning_rate": 0.002, "loss": 2.3305, "step": 351400 }, { "epoch": 1.3584527840917877, "grad_norm": 0.10550139844417572, "learning_rate": 0.002, "loss": 2.3379, "step": 351410 }, { "epoch": 1.358491441295171, "grad_norm": 0.0951557382941246, "learning_rate": 0.002, "loss": 2.33, "step": 351420 }, { "epoch": 1.3585300984985542, "grad_norm": 0.10970769822597504, "learning_rate": 0.002, "loss": 2.3355, "step": 351430 }, { "epoch": 1.3585687557019375, "grad_norm": 0.11074388772249222, "learning_rate": 0.002, "loss": 2.3459, "step": 351440 }, { "epoch": 1.3586074129053207, "grad_norm": 0.08945836871862411, "learning_rate": 0.002, "loss": 2.3487, "step": 351450 }, { "epoch": 1.358646070108704, "grad_norm": 0.10373366624116898, "learning_rate": 0.002, "loss": 2.3427, "step": 351460 }, { "epoch": 1.3586847273120872, "grad_norm": 0.09923812747001648, "learning_rate": 0.002, "loss": 2.3347, "step": 351470 }, { "epoch": 1.3587233845154705, "grad_norm": 0.11788978427648544, "learning_rate": 0.002, "loss": 2.3411, "step": 351480 }, { "epoch": 1.358762041718854, "grad_norm": 0.11119283735752106, "learning_rate": 0.002, "loss": 2.3296, "step": 351490 }, { "epoch": 1.3588006989222372, "grad_norm": 0.09925679862499237, "learning_rate": 0.002, "loss": 2.3462, "step": 351500 }, { "epoch": 1.3588393561256205, "grad_norm": 0.10013158619403839, "learning_rate": 0.002, "loss": 2.3355, "step": 351510 }, { "epoch": 1.3588780133290037, "grad_norm": 0.11524486541748047, "learning_rate": 0.002, "loss": 2.34, "step": 351520 }, { "epoch": 1.358916670532387, "grad_norm": 0.10622724145650864, "learning_rate": 0.002, "loss": 2.3445, "step": 351530 }, { "epoch": 1.3589553277357702, "grad_norm": 0.09704194217920303, "learning_rate": 0.002, "loss": 2.3352, "step": 351540 }, { "epoch": 1.3589939849391537, "grad_norm": 0.11838492751121521, "learning_rate": 0.002, "loss": 2.3284, "step": 351550 }, { "epoch": 1.359032642142537, "grad_norm": 0.0970839112997055, "learning_rate": 0.002, "loss": 2.3295, "step": 351560 }, { "epoch": 1.3590712993459202, "grad_norm": 0.10759337991476059, "learning_rate": 0.002, "loss": 2.3308, "step": 351570 }, { "epoch": 1.3591099565493034, "grad_norm": 0.0967593565583229, "learning_rate": 0.002, "loss": 2.3337, "step": 351580 }, { "epoch": 1.3591486137526867, "grad_norm": 0.10393026471138, "learning_rate": 0.002, "loss": 2.3197, "step": 351590 }, { "epoch": 1.35918727095607, "grad_norm": 0.10174359381198883, "learning_rate": 0.002, "loss": 2.3348, "step": 351600 }, { "epoch": 1.3592259281594532, "grad_norm": 0.09679939597845078, "learning_rate": 0.002, "loss": 2.3402, "step": 351610 }, { "epoch": 1.3592645853628365, "grad_norm": 0.10922277718782425, "learning_rate": 0.002, "loss": 2.329, "step": 351620 }, { "epoch": 1.3593032425662197, "grad_norm": 0.14478425681591034, "learning_rate": 0.002, "loss": 2.332, "step": 351630 }, { "epoch": 1.359341899769603, "grad_norm": 0.10015156120061874, "learning_rate": 0.002, "loss": 2.3419, "step": 351640 }, { "epoch": 1.3593805569729862, "grad_norm": 0.12160390615463257, "learning_rate": 0.002, "loss": 2.3272, "step": 351650 }, { "epoch": 1.3594192141763697, "grad_norm": 0.10180576890707016, "learning_rate": 0.002, "loss": 2.3203, "step": 351660 }, { "epoch": 1.359457871379753, "grad_norm": 0.10145292431116104, "learning_rate": 0.002, "loss": 2.331, "step": 351670 }, { "epoch": 1.3594965285831362, "grad_norm": 0.14017429947853088, "learning_rate": 0.002, "loss": 2.338, "step": 351680 }, { "epoch": 1.3595351857865194, "grad_norm": 0.10480217635631561, "learning_rate": 0.002, "loss": 2.3496, "step": 351690 }, { "epoch": 1.3595738429899027, "grad_norm": 0.0997604951262474, "learning_rate": 0.002, "loss": 2.3362, "step": 351700 }, { "epoch": 1.359612500193286, "grad_norm": 0.10736975818872452, "learning_rate": 0.002, "loss": 2.3332, "step": 351710 }, { "epoch": 1.3596511573966694, "grad_norm": 0.09866006672382355, "learning_rate": 0.002, "loss": 2.3233, "step": 351720 }, { "epoch": 1.3596898146000527, "grad_norm": 0.10628994554281235, "learning_rate": 0.002, "loss": 2.3269, "step": 351730 }, { "epoch": 1.359728471803436, "grad_norm": 0.11439824104309082, "learning_rate": 0.002, "loss": 2.3306, "step": 351740 }, { "epoch": 1.3597671290068192, "grad_norm": 0.10647710412740707, "learning_rate": 0.002, "loss": 2.3358, "step": 351750 }, { "epoch": 1.3598057862102024, "grad_norm": 0.09515728056430817, "learning_rate": 0.002, "loss": 2.3266, "step": 351760 }, { "epoch": 1.3598444434135857, "grad_norm": 0.1239086389541626, "learning_rate": 0.002, "loss": 2.3454, "step": 351770 }, { "epoch": 1.359883100616969, "grad_norm": 0.10595386475324631, "learning_rate": 0.002, "loss": 2.3283, "step": 351780 }, { "epoch": 1.3599217578203522, "grad_norm": 0.09424883872270584, "learning_rate": 0.002, "loss": 2.3321, "step": 351790 }, { "epoch": 1.3599604150237354, "grad_norm": 0.10692436248064041, "learning_rate": 0.002, "loss": 2.3305, "step": 351800 }, { "epoch": 1.3599990722271187, "grad_norm": 0.10175547748804092, "learning_rate": 0.002, "loss": 2.3337, "step": 351810 }, { "epoch": 1.3600377294305022, "grad_norm": 0.09597337990999222, "learning_rate": 0.002, "loss": 2.3261, "step": 351820 }, { "epoch": 1.3600763866338854, "grad_norm": 0.12366979569196701, "learning_rate": 0.002, "loss": 2.3436, "step": 351830 }, { "epoch": 1.3601150438372687, "grad_norm": 0.0925799235701561, "learning_rate": 0.002, "loss": 2.3301, "step": 351840 }, { "epoch": 1.360153701040652, "grad_norm": 0.10603862255811691, "learning_rate": 0.002, "loss": 2.3485, "step": 351850 }, { "epoch": 1.3601923582440352, "grad_norm": 0.12175677716732025, "learning_rate": 0.002, "loss": 2.3518, "step": 351860 }, { "epoch": 1.3602310154474184, "grad_norm": 0.10555287450551987, "learning_rate": 0.002, "loss": 2.3228, "step": 351870 }, { "epoch": 1.3602696726508017, "grad_norm": 0.10692015290260315, "learning_rate": 0.002, "loss": 2.3355, "step": 351880 }, { "epoch": 1.3603083298541851, "grad_norm": 0.10687336325645447, "learning_rate": 0.002, "loss": 2.332, "step": 351890 }, { "epoch": 1.3603469870575684, "grad_norm": 0.10911957919597626, "learning_rate": 0.002, "loss": 2.3318, "step": 351900 }, { "epoch": 1.3603856442609517, "grad_norm": 0.13761015236377716, "learning_rate": 0.002, "loss": 2.3402, "step": 351910 }, { "epoch": 1.360424301464335, "grad_norm": 0.108570896089077, "learning_rate": 0.002, "loss": 2.3321, "step": 351920 }, { "epoch": 1.3604629586677182, "grad_norm": 0.10664588212966919, "learning_rate": 0.002, "loss": 2.3388, "step": 351930 }, { "epoch": 1.3605016158711014, "grad_norm": 0.11082687228918076, "learning_rate": 0.002, "loss": 2.347, "step": 351940 }, { "epoch": 1.3605402730744847, "grad_norm": 0.11756327748298645, "learning_rate": 0.002, "loss": 2.3324, "step": 351950 }, { "epoch": 1.360578930277868, "grad_norm": 0.10725127905607224, "learning_rate": 0.002, "loss": 2.3359, "step": 351960 }, { "epoch": 1.3606175874812512, "grad_norm": 0.1116841658949852, "learning_rate": 0.002, "loss": 2.3434, "step": 351970 }, { "epoch": 1.3606562446846344, "grad_norm": 0.10605955868959427, "learning_rate": 0.002, "loss": 2.3409, "step": 351980 }, { "epoch": 1.360694901888018, "grad_norm": 0.10080024600028992, "learning_rate": 0.002, "loss": 2.3234, "step": 351990 }, { "epoch": 1.3607335590914011, "grad_norm": 0.12175345420837402, "learning_rate": 0.002, "loss": 2.3312, "step": 352000 }, { "epoch": 1.3607722162947844, "grad_norm": 0.10977376252412796, "learning_rate": 0.002, "loss": 2.3469, "step": 352010 }, { "epoch": 1.3608108734981677, "grad_norm": 0.17455539107322693, "learning_rate": 0.002, "loss": 2.3248, "step": 352020 }, { "epoch": 1.360849530701551, "grad_norm": 0.10920149087905884, "learning_rate": 0.002, "loss": 2.3291, "step": 352030 }, { "epoch": 1.3608881879049342, "grad_norm": 0.09943852573633194, "learning_rate": 0.002, "loss": 2.317, "step": 352040 }, { "epoch": 1.3609268451083174, "grad_norm": 0.09689683467149734, "learning_rate": 0.002, "loss": 2.3315, "step": 352050 }, { "epoch": 1.3609655023117009, "grad_norm": 0.13160747289657593, "learning_rate": 0.002, "loss": 2.3299, "step": 352060 }, { "epoch": 1.3610041595150841, "grad_norm": 0.11040982604026794, "learning_rate": 0.002, "loss": 2.3448, "step": 352070 }, { "epoch": 1.3610428167184674, "grad_norm": 0.10374343395233154, "learning_rate": 0.002, "loss": 2.3269, "step": 352080 }, { "epoch": 1.3610814739218506, "grad_norm": 0.10358106344938278, "learning_rate": 0.002, "loss": 2.3296, "step": 352090 }, { "epoch": 1.361120131125234, "grad_norm": 0.1339714080095291, "learning_rate": 0.002, "loss": 2.3276, "step": 352100 }, { "epoch": 1.3611587883286171, "grad_norm": 0.12838685512542725, "learning_rate": 0.002, "loss": 2.3466, "step": 352110 }, { "epoch": 1.3611974455320004, "grad_norm": 0.11431363970041275, "learning_rate": 0.002, "loss": 2.3433, "step": 352120 }, { "epoch": 1.3612361027353836, "grad_norm": 0.10388395935297012, "learning_rate": 0.002, "loss": 2.3329, "step": 352130 }, { "epoch": 1.361274759938767, "grad_norm": 0.10466761142015457, "learning_rate": 0.002, "loss": 2.3355, "step": 352140 }, { "epoch": 1.3613134171421502, "grad_norm": 0.10688184946775436, "learning_rate": 0.002, "loss": 2.3417, "step": 352150 }, { "epoch": 1.3613520743455336, "grad_norm": 0.10324890166521072, "learning_rate": 0.002, "loss": 2.3395, "step": 352160 }, { "epoch": 1.3613907315489169, "grad_norm": 0.10610228776931763, "learning_rate": 0.002, "loss": 2.3277, "step": 352170 }, { "epoch": 1.3614293887523001, "grad_norm": 0.12272738665342331, "learning_rate": 0.002, "loss": 2.33, "step": 352180 }, { "epoch": 1.3614680459556834, "grad_norm": 0.18759821355342865, "learning_rate": 0.002, "loss": 2.3437, "step": 352190 }, { "epoch": 1.3615067031590666, "grad_norm": 0.10147456079721451, "learning_rate": 0.002, "loss": 2.3392, "step": 352200 }, { "epoch": 1.3615453603624499, "grad_norm": 0.10508518666028976, "learning_rate": 0.002, "loss": 2.3425, "step": 352210 }, { "epoch": 1.3615840175658331, "grad_norm": 0.10307451337575912, "learning_rate": 0.002, "loss": 2.3318, "step": 352220 }, { "epoch": 1.3616226747692166, "grad_norm": 0.11710530519485474, "learning_rate": 0.002, "loss": 2.321, "step": 352230 }, { "epoch": 1.3616613319725999, "grad_norm": 0.10675527155399323, "learning_rate": 0.002, "loss": 2.3402, "step": 352240 }, { "epoch": 1.3616999891759831, "grad_norm": 0.09699858725070953, "learning_rate": 0.002, "loss": 2.3339, "step": 352250 }, { "epoch": 1.3617386463793664, "grad_norm": 0.11721307784318924, "learning_rate": 0.002, "loss": 2.3343, "step": 352260 }, { "epoch": 1.3617773035827496, "grad_norm": 0.10319364070892334, "learning_rate": 0.002, "loss": 2.3372, "step": 352270 }, { "epoch": 1.3618159607861329, "grad_norm": 0.09261713922023773, "learning_rate": 0.002, "loss": 2.3218, "step": 352280 }, { "epoch": 1.3618546179895161, "grad_norm": 0.09714782238006592, "learning_rate": 0.002, "loss": 2.3283, "step": 352290 }, { "epoch": 1.3618932751928994, "grad_norm": 0.11221813410520554, "learning_rate": 0.002, "loss": 2.3377, "step": 352300 }, { "epoch": 1.3619319323962826, "grad_norm": 0.10760895907878876, "learning_rate": 0.002, "loss": 2.3123, "step": 352310 }, { "epoch": 1.3619705895996659, "grad_norm": 0.10258577764034271, "learning_rate": 0.002, "loss": 2.3266, "step": 352320 }, { "epoch": 1.3620092468030494, "grad_norm": 0.11032426357269287, "learning_rate": 0.002, "loss": 2.3552, "step": 352330 }, { "epoch": 1.3620479040064326, "grad_norm": 0.10629115253686905, "learning_rate": 0.002, "loss": 2.3422, "step": 352340 }, { "epoch": 1.3620865612098159, "grad_norm": 0.11356248706579208, "learning_rate": 0.002, "loss": 2.3258, "step": 352350 }, { "epoch": 1.3621252184131991, "grad_norm": 0.1022658571600914, "learning_rate": 0.002, "loss": 2.3273, "step": 352360 }, { "epoch": 1.3621638756165824, "grad_norm": 0.13350942730903625, "learning_rate": 0.002, "loss": 2.3353, "step": 352370 }, { "epoch": 1.3622025328199656, "grad_norm": 0.10261652618646622, "learning_rate": 0.002, "loss": 2.3503, "step": 352380 }, { "epoch": 1.3622411900233489, "grad_norm": 0.1132517158985138, "learning_rate": 0.002, "loss": 2.3447, "step": 352390 }, { "epoch": 1.3622798472267323, "grad_norm": 0.09861195087432861, "learning_rate": 0.002, "loss": 2.3196, "step": 352400 }, { "epoch": 1.3623185044301156, "grad_norm": 0.10845063626766205, "learning_rate": 0.002, "loss": 2.3456, "step": 352410 }, { "epoch": 1.3623571616334988, "grad_norm": 0.13102145493030548, "learning_rate": 0.002, "loss": 2.329, "step": 352420 }, { "epoch": 1.362395818836882, "grad_norm": 0.1068115234375, "learning_rate": 0.002, "loss": 2.335, "step": 352430 }, { "epoch": 1.3624344760402654, "grad_norm": 0.10941077768802643, "learning_rate": 0.002, "loss": 2.3344, "step": 352440 }, { "epoch": 1.3624731332436486, "grad_norm": 0.11071078479290009, "learning_rate": 0.002, "loss": 2.3484, "step": 352450 }, { "epoch": 1.3625117904470319, "grad_norm": 0.1088944599032402, "learning_rate": 0.002, "loss": 2.3215, "step": 352460 }, { "epoch": 1.362550447650415, "grad_norm": 0.09720829129219055, "learning_rate": 0.002, "loss": 2.3314, "step": 352470 }, { "epoch": 1.3625891048537984, "grad_norm": 0.18038299679756165, "learning_rate": 0.002, "loss": 2.3364, "step": 352480 }, { "epoch": 1.3626277620571816, "grad_norm": 0.1162460446357727, "learning_rate": 0.002, "loss": 2.332, "step": 352490 }, { "epoch": 1.362666419260565, "grad_norm": 0.1099858358502388, "learning_rate": 0.002, "loss": 2.3478, "step": 352500 }, { "epoch": 1.3627050764639483, "grad_norm": 0.11105599254369736, "learning_rate": 0.002, "loss": 2.3369, "step": 352510 }, { "epoch": 1.3627437336673316, "grad_norm": 0.1036272943019867, "learning_rate": 0.002, "loss": 2.3341, "step": 352520 }, { "epoch": 1.3627823908707148, "grad_norm": 0.10909338295459747, "learning_rate": 0.002, "loss": 2.3364, "step": 352530 }, { "epoch": 1.362821048074098, "grad_norm": 0.12408467382192612, "learning_rate": 0.002, "loss": 2.3355, "step": 352540 }, { "epoch": 1.3628597052774813, "grad_norm": 0.11634545773267746, "learning_rate": 0.002, "loss": 2.3413, "step": 352550 }, { "epoch": 1.3628983624808648, "grad_norm": 0.10679116100072861, "learning_rate": 0.002, "loss": 2.3286, "step": 352560 }, { "epoch": 1.362937019684248, "grad_norm": 0.09850242733955383, "learning_rate": 0.002, "loss": 2.342, "step": 352570 }, { "epoch": 1.3629756768876313, "grad_norm": 0.1210622563958168, "learning_rate": 0.002, "loss": 2.3282, "step": 352580 }, { "epoch": 1.3630143340910146, "grad_norm": 0.11602368950843811, "learning_rate": 0.002, "loss": 2.3355, "step": 352590 }, { "epoch": 1.3630529912943978, "grad_norm": 0.09763062745332718, "learning_rate": 0.002, "loss": 2.3372, "step": 352600 }, { "epoch": 1.363091648497781, "grad_norm": 0.09681380540132523, "learning_rate": 0.002, "loss": 2.3299, "step": 352610 }, { "epoch": 1.3631303057011643, "grad_norm": 0.10678580403327942, "learning_rate": 0.002, "loss": 2.3374, "step": 352620 }, { "epoch": 1.3631689629045476, "grad_norm": 0.10410351306200027, "learning_rate": 0.002, "loss": 2.3487, "step": 352630 }, { "epoch": 1.3632076201079308, "grad_norm": 0.10169476270675659, "learning_rate": 0.002, "loss": 2.3405, "step": 352640 }, { "epoch": 1.363246277311314, "grad_norm": 0.11437112092971802, "learning_rate": 0.002, "loss": 2.3283, "step": 352650 }, { "epoch": 1.3632849345146973, "grad_norm": 0.1199316531419754, "learning_rate": 0.002, "loss": 2.3402, "step": 352660 }, { "epoch": 1.3633235917180808, "grad_norm": 0.09013929218053818, "learning_rate": 0.002, "loss": 2.33, "step": 352670 }, { "epoch": 1.363362248921464, "grad_norm": 0.09707517921924591, "learning_rate": 0.002, "loss": 2.3545, "step": 352680 }, { "epoch": 1.3634009061248473, "grad_norm": 0.11999980360269547, "learning_rate": 0.002, "loss": 2.3332, "step": 352690 }, { "epoch": 1.3634395633282306, "grad_norm": 0.1064855232834816, "learning_rate": 0.002, "loss": 2.3339, "step": 352700 }, { "epoch": 1.3634782205316138, "grad_norm": 0.11436594277620316, "learning_rate": 0.002, "loss": 2.3252, "step": 352710 }, { "epoch": 1.363516877734997, "grad_norm": 0.10475486516952515, "learning_rate": 0.002, "loss": 2.3276, "step": 352720 }, { "epoch": 1.3635555349383806, "grad_norm": 0.10481761395931244, "learning_rate": 0.002, "loss": 2.3291, "step": 352730 }, { "epoch": 1.3635941921417638, "grad_norm": 0.09346312284469604, "learning_rate": 0.002, "loss": 2.3292, "step": 352740 }, { "epoch": 1.363632849345147, "grad_norm": 0.10701143741607666, "learning_rate": 0.002, "loss": 2.3257, "step": 352750 }, { "epoch": 1.3636715065485303, "grad_norm": 0.10135741531848907, "learning_rate": 0.002, "loss": 2.3378, "step": 352760 }, { "epoch": 1.3637101637519136, "grad_norm": 0.13325202465057373, "learning_rate": 0.002, "loss": 2.3485, "step": 352770 }, { "epoch": 1.3637488209552968, "grad_norm": 0.09188321977853775, "learning_rate": 0.002, "loss": 2.3336, "step": 352780 }, { "epoch": 1.36378747815868, "grad_norm": 0.10058906674385071, "learning_rate": 0.002, "loss": 2.3235, "step": 352790 }, { "epoch": 1.3638261353620633, "grad_norm": 0.09714401513338089, "learning_rate": 0.002, "loss": 2.3385, "step": 352800 }, { "epoch": 1.3638647925654466, "grad_norm": 0.11655130237340927, "learning_rate": 0.002, "loss": 2.3257, "step": 352810 }, { "epoch": 1.3639034497688298, "grad_norm": 0.1096164807677269, "learning_rate": 0.002, "loss": 2.3282, "step": 352820 }, { "epoch": 1.363942106972213, "grad_norm": 0.10694403201341629, "learning_rate": 0.002, "loss": 2.3315, "step": 352830 }, { "epoch": 1.3639807641755965, "grad_norm": 0.1013718917965889, "learning_rate": 0.002, "loss": 2.3411, "step": 352840 }, { "epoch": 1.3640194213789798, "grad_norm": 0.10866200178861618, "learning_rate": 0.002, "loss": 2.3371, "step": 352850 }, { "epoch": 1.364058078582363, "grad_norm": 0.10821247845888138, "learning_rate": 0.002, "loss": 2.3286, "step": 352860 }, { "epoch": 1.3640967357857463, "grad_norm": 0.09890243411064148, "learning_rate": 0.002, "loss": 2.35, "step": 352870 }, { "epoch": 1.3641353929891296, "grad_norm": 0.11495185643434525, "learning_rate": 0.002, "loss": 2.3194, "step": 352880 }, { "epoch": 1.3641740501925128, "grad_norm": 0.09019946306943893, "learning_rate": 0.002, "loss": 2.3295, "step": 352890 }, { "epoch": 1.3642127073958963, "grad_norm": 0.11374910175800323, "learning_rate": 0.002, "loss": 2.3225, "step": 352900 }, { "epoch": 1.3642513645992795, "grad_norm": 0.10232894122600555, "learning_rate": 0.002, "loss": 2.3367, "step": 352910 }, { "epoch": 1.3642900218026628, "grad_norm": 0.10844322293996811, "learning_rate": 0.002, "loss": 2.3404, "step": 352920 }, { "epoch": 1.364328679006046, "grad_norm": 0.10210633277893066, "learning_rate": 0.002, "loss": 2.3334, "step": 352930 }, { "epoch": 1.3643673362094293, "grad_norm": 0.09644097834825516, "learning_rate": 0.002, "loss": 2.3179, "step": 352940 }, { "epoch": 1.3644059934128125, "grad_norm": 0.10927636176347733, "learning_rate": 0.002, "loss": 2.3351, "step": 352950 }, { "epoch": 1.3644446506161958, "grad_norm": 0.10157135128974915, "learning_rate": 0.002, "loss": 2.3328, "step": 352960 }, { "epoch": 1.364483307819579, "grad_norm": 0.11467833817005157, "learning_rate": 0.002, "loss": 2.3229, "step": 352970 }, { "epoch": 1.3645219650229623, "grad_norm": 0.1262151449918747, "learning_rate": 0.002, "loss": 2.3549, "step": 352980 }, { "epoch": 1.3645606222263456, "grad_norm": 0.10918152332305908, "learning_rate": 0.002, "loss": 2.3206, "step": 352990 }, { "epoch": 1.3645992794297288, "grad_norm": 0.09364178776741028, "learning_rate": 0.002, "loss": 2.3221, "step": 353000 }, { "epoch": 1.3646379366331123, "grad_norm": 0.11354406923055649, "learning_rate": 0.002, "loss": 2.3286, "step": 353010 }, { "epoch": 1.3646765938364955, "grad_norm": 0.10014007240533829, "learning_rate": 0.002, "loss": 2.3324, "step": 353020 }, { "epoch": 1.3647152510398788, "grad_norm": 0.11784714460372925, "learning_rate": 0.002, "loss": 2.3329, "step": 353030 }, { "epoch": 1.364753908243262, "grad_norm": 0.10475286841392517, "learning_rate": 0.002, "loss": 2.3369, "step": 353040 }, { "epoch": 1.3647925654466453, "grad_norm": 0.1111367717385292, "learning_rate": 0.002, "loss": 2.3297, "step": 353050 }, { "epoch": 1.3648312226500285, "grad_norm": 0.10144095867872238, "learning_rate": 0.002, "loss": 2.3271, "step": 353060 }, { "epoch": 1.364869879853412, "grad_norm": 0.10360066592693329, "learning_rate": 0.002, "loss": 2.3333, "step": 353070 }, { "epoch": 1.3649085370567953, "grad_norm": 0.11523858457803726, "learning_rate": 0.002, "loss": 2.3332, "step": 353080 }, { "epoch": 1.3649471942601785, "grad_norm": 0.09359196573495865, "learning_rate": 0.002, "loss": 2.3256, "step": 353090 }, { "epoch": 1.3649858514635618, "grad_norm": 0.11894936114549637, "learning_rate": 0.002, "loss": 2.3339, "step": 353100 }, { "epoch": 1.365024508666945, "grad_norm": 0.09457987546920776, "learning_rate": 0.002, "loss": 2.3407, "step": 353110 }, { "epoch": 1.3650631658703283, "grad_norm": 0.1078084260225296, "learning_rate": 0.002, "loss": 2.3256, "step": 353120 }, { "epoch": 1.3651018230737115, "grad_norm": 0.09385760128498077, "learning_rate": 0.002, "loss": 2.3264, "step": 353130 }, { "epoch": 1.3651404802770948, "grad_norm": 0.0927666574716568, "learning_rate": 0.002, "loss": 2.3209, "step": 353140 }, { "epoch": 1.365179137480478, "grad_norm": 0.1224551871418953, "learning_rate": 0.002, "loss": 2.3251, "step": 353150 }, { "epoch": 1.3652177946838613, "grad_norm": 0.0971512421965599, "learning_rate": 0.002, "loss": 2.3242, "step": 353160 }, { "epoch": 1.3652564518872445, "grad_norm": 0.1582522988319397, "learning_rate": 0.002, "loss": 2.3461, "step": 353170 }, { "epoch": 1.365295109090628, "grad_norm": 0.11045419424772263, "learning_rate": 0.002, "loss": 2.3534, "step": 353180 }, { "epoch": 1.3653337662940113, "grad_norm": 0.09811891615390778, "learning_rate": 0.002, "loss": 2.3407, "step": 353190 }, { "epoch": 1.3653724234973945, "grad_norm": 0.12702874839305878, "learning_rate": 0.002, "loss": 2.3295, "step": 353200 }, { "epoch": 1.3654110807007778, "grad_norm": 0.09151419252157211, "learning_rate": 0.002, "loss": 2.3337, "step": 353210 }, { "epoch": 1.365449737904161, "grad_norm": 0.157108873128891, "learning_rate": 0.002, "loss": 2.3479, "step": 353220 }, { "epoch": 1.3654883951075443, "grad_norm": 0.44005656242370605, "learning_rate": 0.002, "loss": 2.337, "step": 353230 }, { "epoch": 1.3655270523109277, "grad_norm": 0.11164695769548416, "learning_rate": 0.002, "loss": 2.3377, "step": 353240 }, { "epoch": 1.365565709514311, "grad_norm": 0.12131067365407944, "learning_rate": 0.002, "loss": 2.3412, "step": 353250 }, { "epoch": 1.3656043667176943, "grad_norm": 0.09036579728126526, "learning_rate": 0.002, "loss": 2.3247, "step": 353260 }, { "epoch": 1.3656430239210775, "grad_norm": 0.11555355042219162, "learning_rate": 0.002, "loss": 2.3382, "step": 353270 }, { "epoch": 1.3656816811244608, "grad_norm": 0.09683941304683685, "learning_rate": 0.002, "loss": 2.3268, "step": 353280 }, { "epoch": 1.365720338327844, "grad_norm": 0.09810633212327957, "learning_rate": 0.002, "loss": 2.3356, "step": 353290 }, { "epoch": 1.3657589955312273, "grad_norm": 0.1062944084405899, "learning_rate": 0.002, "loss": 2.3464, "step": 353300 }, { "epoch": 1.3657976527346105, "grad_norm": 0.1608039289712906, "learning_rate": 0.002, "loss": 2.3421, "step": 353310 }, { "epoch": 1.3658363099379938, "grad_norm": 0.10838613659143448, "learning_rate": 0.002, "loss": 2.3407, "step": 353320 }, { "epoch": 1.365874967141377, "grad_norm": 0.12178441137075424, "learning_rate": 0.002, "loss": 2.3313, "step": 353330 }, { "epoch": 1.3659136243447603, "grad_norm": 0.11368858814239502, "learning_rate": 0.002, "loss": 2.3508, "step": 353340 }, { "epoch": 1.3659522815481437, "grad_norm": 0.09420602023601532, "learning_rate": 0.002, "loss": 2.3422, "step": 353350 }, { "epoch": 1.365990938751527, "grad_norm": 0.11776195466518402, "learning_rate": 0.002, "loss": 2.3349, "step": 353360 }, { "epoch": 1.3660295959549102, "grad_norm": 0.10489777475595474, "learning_rate": 0.002, "loss": 2.3308, "step": 353370 }, { "epoch": 1.3660682531582935, "grad_norm": 0.11508101224899292, "learning_rate": 0.002, "loss": 2.3167, "step": 353380 }, { "epoch": 1.3661069103616768, "grad_norm": 0.1090180054306984, "learning_rate": 0.002, "loss": 2.3326, "step": 353390 }, { "epoch": 1.36614556756506, "grad_norm": 0.10367222130298615, "learning_rate": 0.002, "loss": 2.3398, "step": 353400 }, { "epoch": 1.3661842247684435, "grad_norm": 0.10160892456769943, "learning_rate": 0.002, "loss": 2.3272, "step": 353410 }, { "epoch": 1.3662228819718267, "grad_norm": 0.1016288623213768, "learning_rate": 0.002, "loss": 2.3337, "step": 353420 }, { "epoch": 1.36626153917521, "grad_norm": 0.09615586698055267, "learning_rate": 0.002, "loss": 2.3266, "step": 353430 }, { "epoch": 1.3663001963785932, "grad_norm": 0.09929027408361435, "learning_rate": 0.002, "loss": 2.3275, "step": 353440 }, { "epoch": 1.3663388535819765, "grad_norm": 0.11279615014791489, "learning_rate": 0.002, "loss": 2.3234, "step": 353450 }, { "epoch": 1.3663775107853597, "grad_norm": 0.10219208896160126, "learning_rate": 0.002, "loss": 2.3421, "step": 353460 }, { "epoch": 1.366416167988743, "grad_norm": 0.10220801830291748, "learning_rate": 0.002, "loss": 2.3351, "step": 353470 }, { "epoch": 1.3664548251921262, "grad_norm": 0.10270463675260544, "learning_rate": 0.002, "loss": 2.3277, "step": 353480 }, { "epoch": 1.3664934823955095, "grad_norm": 0.10466257482767105, "learning_rate": 0.002, "loss": 2.347, "step": 353490 }, { "epoch": 1.3665321395988927, "grad_norm": 0.09947629272937775, "learning_rate": 0.002, "loss": 2.3218, "step": 353500 }, { "epoch": 1.366570796802276, "grad_norm": 0.09577259421348572, "learning_rate": 0.002, "loss": 2.322, "step": 353510 }, { "epoch": 1.3666094540056595, "grad_norm": 0.0886072888970375, "learning_rate": 0.002, "loss": 2.3367, "step": 353520 }, { "epoch": 1.3666481112090427, "grad_norm": 0.11046717315912247, "learning_rate": 0.002, "loss": 2.3232, "step": 353530 }, { "epoch": 1.366686768412426, "grad_norm": 0.10688751935958862, "learning_rate": 0.002, "loss": 2.3295, "step": 353540 }, { "epoch": 1.3667254256158092, "grad_norm": 0.0929543748497963, "learning_rate": 0.002, "loss": 2.335, "step": 353550 }, { "epoch": 1.3667640828191925, "grad_norm": 0.09754712879657745, "learning_rate": 0.002, "loss": 2.3272, "step": 353560 }, { "epoch": 1.3668027400225757, "grad_norm": 0.11045172810554504, "learning_rate": 0.002, "loss": 2.3339, "step": 353570 }, { "epoch": 1.3668413972259592, "grad_norm": 0.10240747779607773, "learning_rate": 0.002, "loss": 2.3329, "step": 353580 }, { "epoch": 1.3668800544293425, "grad_norm": 0.11092318594455719, "learning_rate": 0.002, "loss": 2.349, "step": 353590 }, { "epoch": 1.3669187116327257, "grad_norm": 0.10332795232534409, "learning_rate": 0.002, "loss": 2.3225, "step": 353600 }, { "epoch": 1.366957368836109, "grad_norm": 0.10368195176124573, "learning_rate": 0.002, "loss": 2.3399, "step": 353610 }, { "epoch": 1.3669960260394922, "grad_norm": 0.11492601782083511, "learning_rate": 0.002, "loss": 2.3293, "step": 353620 }, { "epoch": 1.3670346832428755, "grad_norm": 0.10416862368583679, "learning_rate": 0.002, "loss": 2.3385, "step": 353630 }, { "epoch": 1.3670733404462587, "grad_norm": 0.10875711590051651, "learning_rate": 0.002, "loss": 2.3391, "step": 353640 }, { "epoch": 1.367111997649642, "grad_norm": 0.09621744602918625, "learning_rate": 0.002, "loss": 2.3328, "step": 353650 }, { "epoch": 1.3671506548530252, "grad_norm": 0.10564836859703064, "learning_rate": 0.002, "loss": 2.3286, "step": 353660 }, { "epoch": 1.3671893120564085, "grad_norm": 0.12837018072605133, "learning_rate": 0.002, "loss": 2.3418, "step": 353670 }, { "epoch": 1.3672279692597917, "grad_norm": 0.10778024792671204, "learning_rate": 0.002, "loss": 2.3252, "step": 353680 }, { "epoch": 1.3672666264631752, "grad_norm": 0.11407023668289185, "learning_rate": 0.002, "loss": 2.3287, "step": 353690 }, { "epoch": 1.3673052836665585, "grad_norm": 0.12429900467395782, "learning_rate": 0.002, "loss": 2.3229, "step": 353700 }, { "epoch": 1.3673439408699417, "grad_norm": 0.09666662663221359, "learning_rate": 0.002, "loss": 2.3263, "step": 353710 }, { "epoch": 1.367382598073325, "grad_norm": 0.10815172642469406, "learning_rate": 0.002, "loss": 2.3107, "step": 353720 }, { "epoch": 1.3674212552767082, "grad_norm": 0.10770297050476074, "learning_rate": 0.002, "loss": 2.3393, "step": 353730 }, { "epoch": 1.3674599124800915, "grad_norm": 0.10925965011119843, "learning_rate": 0.002, "loss": 2.3261, "step": 353740 }, { "epoch": 1.367498569683475, "grad_norm": 0.114189013838768, "learning_rate": 0.002, "loss": 2.3358, "step": 353750 }, { "epoch": 1.3675372268868582, "grad_norm": 0.11431930959224701, "learning_rate": 0.002, "loss": 2.328, "step": 353760 }, { "epoch": 1.3675758840902414, "grad_norm": 0.1838698387145996, "learning_rate": 0.002, "loss": 2.3265, "step": 353770 }, { "epoch": 1.3676145412936247, "grad_norm": 0.10204703360795975, "learning_rate": 0.002, "loss": 2.3339, "step": 353780 }, { "epoch": 1.367653198497008, "grad_norm": 0.10525187104940414, "learning_rate": 0.002, "loss": 2.3219, "step": 353790 }, { "epoch": 1.3676918557003912, "grad_norm": 0.11447043716907501, "learning_rate": 0.002, "loss": 2.33, "step": 353800 }, { "epoch": 1.3677305129037745, "grad_norm": 0.10773241519927979, "learning_rate": 0.002, "loss": 2.3288, "step": 353810 }, { "epoch": 1.3677691701071577, "grad_norm": 0.11622647196054459, "learning_rate": 0.002, "loss": 2.3289, "step": 353820 }, { "epoch": 1.367807827310541, "grad_norm": 0.13911838829517365, "learning_rate": 0.002, "loss": 2.3313, "step": 353830 }, { "epoch": 1.3678464845139242, "grad_norm": 0.11007165908813477, "learning_rate": 0.002, "loss": 2.3289, "step": 353840 }, { "epoch": 1.3678851417173077, "grad_norm": 0.09723573178052902, "learning_rate": 0.002, "loss": 2.3527, "step": 353850 }, { "epoch": 1.367923798920691, "grad_norm": 0.10949946939945221, "learning_rate": 0.002, "loss": 2.3482, "step": 353860 }, { "epoch": 1.3679624561240742, "grad_norm": 0.09649864584207535, "learning_rate": 0.002, "loss": 2.3268, "step": 353870 }, { "epoch": 1.3680011133274574, "grad_norm": 0.09300088882446289, "learning_rate": 0.002, "loss": 2.3349, "step": 353880 }, { "epoch": 1.3680397705308407, "grad_norm": 0.11034957319498062, "learning_rate": 0.002, "loss": 2.3286, "step": 353890 }, { "epoch": 1.368078427734224, "grad_norm": 0.10465884208679199, "learning_rate": 0.002, "loss": 2.3404, "step": 353900 }, { "epoch": 1.3681170849376072, "grad_norm": 0.12185883522033691, "learning_rate": 0.002, "loss": 2.3265, "step": 353910 }, { "epoch": 1.3681557421409907, "grad_norm": 0.08666864782571793, "learning_rate": 0.002, "loss": 2.3443, "step": 353920 }, { "epoch": 1.368194399344374, "grad_norm": 0.11534685641527176, "learning_rate": 0.002, "loss": 2.3422, "step": 353930 }, { "epoch": 1.3682330565477572, "grad_norm": 0.12451288104057312, "learning_rate": 0.002, "loss": 2.3326, "step": 353940 }, { "epoch": 1.3682717137511404, "grad_norm": 0.10793226212263107, "learning_rate": 0.002, "loss": 2.334, "step": 353950 }, { "epoch": 1.3683103709545237, "grad_norm": 0.09760882705450058, "learning_rate": 0.002, "loss": 2.3247, "step": 353960 }, { "epoch": 1.368349028157907, "grad_norm": 0.09534558653831482, "learning_rate": 0.002, "loss": 2.3336, "step": 353970 }, { "epoch": 1.3683876853612902, "grad_norm": 0.10450071841478348, "learning_rate": 0.002, "loss": 2.3315, "step": 353980 }, { "epoch": 1.3684263425646734, "grad_norm": 0.09383483231067657, "learning_rate": 0.002, "loss": 2.3399, "step": 353990 }, { "epoch": 1.3684649997680567, "grad_norm": 0.11411093175411224, "learning_rate": 0.002, "loss": 2.3261, "step": 354000 }, { "epoch": 1.36850365697144, "grad_norm": 0.10227649658918381, "learning_rate": 0.002, "loss": 2.3343, "step": 354010 }, { "epoch": 1.3685423141748234, "grad_norm": 0.10927411168813705, "learning_rate": 0.002, "loss": 2.3371, "step": 354020 }, { "epoch": 1.3685809713782067, "grad_norm": 0.14527519047260284, "learning_rate": 0.002, "loss": 2.3258, "step": 354030 }, { "epoch": 1.36861962858159, "grad_norm": 0.10132671892642975, "learning_rate": 0.002, "loss": 2.3211, "step": 354040 }, { "epoch": 1.3686582857849732, "grad_norm": 0.10364679992198944, "learning_rate": 0.002, "loss": 2.3413, "step": 354050 }, { "epoch": 1.3686969429883564, "grad_norm": 0.09082487225532532, "learning_rate": 0.002, "loss": 2.3208, "step": 354060 }, { "epoch": 1.3687356001917397, "grad_norm": 0.10917646437883377, "learning_rate": 0.002, "loss": 2.341, "step": 354070 }, { "epoch": 1.368774257395123, "grad_norm": 0.11109345406293869, "learning_rate": 0.002, "loss": 2.3212, "step": 354080 }, { "epoch": 1.3688129145985064, "grad_norm": 0.10947670042514801, "learning_rate": 0.002, "loss": 2.3299, "step": 354090 }, { "epoch": 1.3688515718018897, "grad_norm": 0.20453643798828125, "learning_rate": 0.002, "loss": 2.3276, "step": 354100 }, { "epoch": 1.368890229005273, "grad_norm": 0.260464608669281, "learning_rate": 0.002, "loss": 2.3408, "step": 354110 }, { "epoch": 1.3689288862086562, "grad_norm": 0.10048062354326248, "learning_rate": 0.002, "loss": 2.3398, "step": 354120 }, { "epoch": 1.3689675434120394, "grad_norm": 0.09973052889108658, "learning_rate": 0.002, "loss": 2.347, "step": 354130 }, { "epoch": 1.3690062006154227, "grad_norm": 0.10623440146446228, "learning_rate": 0.002, "loss": 2.3227, "step": 354140 }, { "epoch": 1.369044857818806, "grad_norm": 0.10756231099367142, "learning_rate": 0.002, "loss": 2.3437, "step": 354150 }, { "epoch": 1.3690835150221892, "grad_norm": 0.09614838659763336, "learning_rate": 0.002, "loss": 2.3289, "step": 354160 }, { "epoch": 1.3691221722255724, "grad_norm": 0.099722720682621, "learning_rate": 0.002, "loss": 2.3403, "step": 354170 }, { "epoch": 1.3691608294289557, "grad_norm": 0.10293753445148468, "learning_rate": 0.002, "loss": 2.3243, "step": 354180 }, { "epoch": 1.3691994866323391, "grad_norm": 0.09563116729259491, "learning_rate": 0.002, "loss": 2.3351, "step": 354190 }, { "epoch": 1.3692381438357224, "grad_norm": 0.10133104026317596, "learning_rate": 0.002, "loss": 2.3513, "step": 354200 }, { "epoch": 1.3692768010391057, "grad_norm": 0.13343344628810883, "learning_rate": 0.002, "loss": 2.3202, "step": 354210 }, { "epoch": 1.369315458242489, "grad_norm": 0.09180284291505814, "learning_rate": 0.002, "loss": 2.3216, "step": 354220 }, { "epoch": 1.3693541154458722, "grad_norm": 0.11000876873731613, "learning_rate": 0.002, "loss": 2.3254, "step": 354230 }, { "epoch": 1.3693927726492554, "grad_norm": 0.1341230273246765, "learning_rate": 0.002, "loss": 2.3317, "step": 354240 }, { "epoch": 1.3694314298526387, "grad_norm": 0.09811849147081375, "learning_rate": 0.002, "loss": 2.3231, "step": 354250 }, { "epoch": 1.3694700870560221, "grad_norm": 0.11047971248626709, "learning_rate": 0.002, "loss": 2.3248, "step": 354260 }, { "epoch": 1.3695087442594054, "grad_norm": 0.09250407665967941, "learning_rate": 0.002, "loss": 2.332, "step": 354270 }, { "epoch": 1.3695474014627886, "grad_norm": 0.11461371183395386, "learning_rate": 0.002, "loss": 2.3407, "step": 354280 }, { "epoch": 1.369586058666172, "grad_norm": 0.10704251378774643, "learning_rate": 0.002, "loss": 2.3287, "step": 354290 }, { "epoch": 1.3696247158695551, "grad_norm": 0.10264694690704346, "learning_rate": 0.002, "loss": 2.3474, "step": 354300 }, { "epoch": 1.3696633730729384, "grad_norm": 0.10288636386394501, "learning_rate": 0.002, "loss": 2.3278, "step": 354310 }, { "epoch": 1.3697020302763216, "grad_norm": 0.10215355455875397, "learning_rate": 0.002, "loss": 2.3273, "step": 354320 }, { "epoch": 1.369740687479705, "grad_norm": 0.1196942999958992, "learning_rate": 0.002, "loss": 2.3494, "step": 354330 }, { "epoch": 1.3697793446830882, "grad_norm": 0.11092463880777359, "learning_rate": 0.002, "loss": 2.3298, "step": 354340 }, { "epoch": 1.3698180018864714, "grad_norm": 0.11861385405063629, "learning_rate": 0.002, "loss": 2.3402, "step": 354350 }, { "epoch": 1.3698566590898549, "grad_norm": 0.1024026945233345, "learning_rate": 0.002, "loss": 2.3298, "step": 354360 }, { "epoch": 1.3698953162932381, "grad_norm": 0.11804378032684326, "learning_rate": 0.002, "loss": 2.3242, "step": 354370 }, { "epoch": 1.3699339734966214, "grad_norm": 0.11109177023172379, "learning_rate": 0.002, "loss": 2.3314, "step": 354380 }, { "epoch": 1.3699726307000046, "grad_norm": 0.1200912669301033, "learning_rate": 0.002, "loss": 2.3464, "step": 354390 }, { "epoch": 1.3700112879033879, "grad_norm": 0.09660663455724716, "learning_rate": 0.002, "loss": 2.3377, "step": 354400 }, { "epoch": 1.3700499451067711, "grad_norm": 0.11022187024354935, "learning_rate": 0.002, "loss": 2.3408, "step": 354410 }, { "epoch": 1.3700886023101546, "grad_norm": 0.10194243490695953, "learning_rate": 0.002, "loss": 2.3485, "step": 354420 }, { "epoch": 1.3701272595135379, "grad_norm": 0.10244203358888626, "learning_rate": 0.002, "loss": 2.3397, "step": 354430 }, { "epoch": 1.3701659167169211, "grad_norm": 0.10079459100961685, "learning_rate": 0.002, "loss": 2.3341, "step": 354440 }, { "epoch": 1.3702045739203044, "grad_norm": 0.09805863350629807, "learning_rate": 0.002, "loss": 2.3401, "step": 354450 }, { "epoch": 1.3702432311236876, "grad_norm": 0.0914018452167511, "learning_rate": 0.002, "loss": 2.3275, "step": 354460 }, { "epoch": 1.3702818883270709, "grad_norm": 0.09827131032943726, "learning_rate": 0.002, "loss": 2.3232, "step": 354470 }, { "epoch": 1.3703205455304541, "grad_norm": 0.10076434910297394, "learning_rate": 0.002, "loss": 2.333, "step": 354480 }, { "epoch": 1.3703592027338374, "grad_norm": 0.10175915062427521, "learning_rate": 0.002, "loss": 2.3354, "step": 354490 }, { "epoch": 1.3703978599372206, "grad_norm": 0.1084328219294548, "learning_rate": 0.002, "loss": 2.3335, "step": 354500 }, { "epoch": 1.3704365171406039, "grad_norm": 0.10804764926433563, "learning_rate": 0.002, "loss": 2.3452, "step": 354510 }, { "epoch": 1.3704751743439871, "grad_norm": 0.10715720057487488, "learning_rate": 0.002, "loss": 2.3198, "step": 354520 }, { "epoch": 1.3705138315473706, "grad_norm": 0.10957840830087662, "learning_rate": 0.002, "loss": 2.328, "step": 354530 }, { "epoch": 1.3705524887507539, "grad_norm": 0.09168438613414764, "learning_rate": 0.002, "loss": 2.3346, "step": 354540 }, { "epoch": 1.3705911459541371, "grad_norm": 0.10639364272356033, "learning_rate": 0.002, "loss": 2.3418, "step": 354550 }, { "epoch": 1.3706298031575204, "grad_norm": 0.09193290770053864, "learning_rate": 0.002, "loss": 2.3391, "step": 354560 }, { "epoch": 1.3706684603609036, "grad_norm": 0.11018170416355133, "learning_rate": 0.002, "loss": 2.3335, "step": 354570 }, { "epoch": 1.3707071175642869, "grad_norm": 0.15490789711475372, "learning_rate": 0.002, "loss": 2.3354, "step": 354580 }, { "epoch": 1.3707457747676703, "grad_norm": 0.09622285515069962, "learning_rate": 0.002, "loss": 2.323, "step": 354590 }, { "epoch": 1.3707844319710536, "grad_norm": 0.0934017077088356, "learning_rate": 0.002, "loss": 2.3426, "step": 354600 }, { "epoch": 1.3708230891744368, "grad_norm": 0.09973679482936859, "learning_rate": 0.002, "loss": 2.3364, "step": 354610 }, { "epoch": 1.37086174637782, "grad_norm": 0.09377430379390717, "learning_rate": 0.002, "loss": 2.3318, "step": 354620 }, { "epoch": 1.3709004035812034, "grad_norm": 0.10160693526268005, "learning_rate": 0.002, "loss": 2.3366, "step": 354630 }, { "epoch": 1.3709390607845866, "grad_norm": 0.11587730795145035, "learning_rate": 0.002, "loss": 2.3362, "step": 354640 }, { "epoch": 1.3709777179879699, "grad_norm": 0.10981699824333191, "learning_rate": 0.002, "loss": 2.3371, "step": 354650 }, { "epoch": 1.371016375191353, "grad_norm": 0.11354506760835648, "learning_rate": 0.002, "loss": 2.3343, "step": 354660 }, { "epoch": 1.3710550323947364, "grad_norm": 0.09859279543161392, "learning_rate": 0.002, "loss": 2.3416, "step": 354670 }, { "epoch": 1.3710936895981196, "grad_norm": 0.10488973557949066, "learning_rate": 0.002, "loss": 2.3534, "step": 354680 }, { "epoch": 1.3711323468015029, "grad_norm": 0.09859029203653336, "learning_rate": 0.002, "loss": 2.326, "step": 354690 }, { "epoch": 1.3711710040048863, "grad_norm": 0.11239251494407654, "learning_rate": 0.002, "loss": 2.3289, "step": 354700 }, { "epoch": 1.3712096612082696, "grad_norm": 0.11284992843866348, "learning_rate": 0.002, "loss": 2.3427, "step": 354710 }, { "epoch": 1.3712483184116528, "grad_norm": 0.12570716440677643, "learning_rate": 0.002, "loss": 2.3476, "step": 354720 }, { "epoch": 1.371286975615036, "grad_norm": 0.11355306953191757, "learning_rate": 0.002, "loss": 2.3222, "step": 354730 }, { "epoch": 1.3713256328184193, "grad_norm": 0.14632190763950348, "learning_rate": 0.002, "loss": 2.3171, "step": 354740 }, { "epoch": 1.3713642900218026, "grad_norm": 0.11878865957260132, "learning_rate": 0.002, "loss": 2.3314, "step": 354750 }, { "epoch": 1.371402947225186, "grad_norm": 0.11377181857824326, "learning_rate": 0.002, "loss": 2.3424, "step": 354760 }, { "epoch": 1.3714416044285693, "grad_norm": 0.11107166111469269, "learning_rate": 0.002, "loss": 2.3324, "step": 354770 }, { "epoch": 1.3714802616319526, "grad_norm": 0.11419668048620224, "learning_rate": 0.002, "loss": 2.3412, "step": 354780 }, { "epoch": 1.3715189188353358, "grad_norm": 0.09458251297473907, "learning_rate": 0.002, "loss": 2.3307, "step": 354790 }, { "epoch": 1.371557576038719, "grad_norm": 0.10594692826271057, "learning_rate": 0.002, "loss": 2.3303, "step": 354800 }, { "epoch": 1.3715962332421023, "grad_norm": 0.10339496284723282, "learning_rate": 0.002, "loss": 2.33, "step": 354810 }, { "epoch": 1.3716348904454856, "grad_norm": 0.09302469342947006, "learning_rate": 0.002, "loss": 2.342, "step": 354820 }, { "epoch": 1.3716735476488688, "grad_norm": 0.08864486217498779, "learning_rate": 0.002, "loss": 2.3364, "step": 354830 }, { "epoch": 1.371712204852252, "grad_norm": 0.11280500888824463, "learning_rate": 0.002, "loss": 2.3338, "step": 354840 }, { "epoch": 1.3717508620556353, "grad_norm": 0.09763745963573456, "learning_rate": 0.002, "loss": 2.3335, "step": 354850 }, { "epoch": 1.3717895192590186, "grad_norm": 0.11580676585435867, "learning_rate": 0.002, "loss": 2.3329, "step": 354860 }, { "epoch": 1.371828176462402, "grad_norm": 0.10815642029047012, "learning_rate": 0.002, "loss": 2.3183, "step": 354870 }, { "epoch": 1.3718668336657853, "grad_norm": 0.14305159449577332, "learning_rate": 0.002, "loss": 2.3521, "step": 354880 }, { "epoch": 1.3719054908691686, "grad_norm": 0.10197746008634567, "learning_rate": 0.002, "loss": 2.3324, "step": 354890 }, { "epoch": 1.3719441480725518, "grad_norm": 0.14328710734844208, "learning_rate": 0.002, "loss": 2.3306, "step": 354900 }, { "epoch": 1.371982805275935, "grad_norm": 0.11703839153051376, "learning_rate": 0.002, "loss": 2.3519, "step": 354910 }, { "epoch": 1.3720214624793183, "grad_norm": 0.11150151491165161, "learning_rate": 0.002, "loss": 2.3411, "step": 354920 }, { "epoch": 1.3720601196827018, "grad_norm": 0.11460588872432709, "learning_rate": 0.002, "loss": 2.3303, "step": 354930 }, { "epoch": 1.372098776886085, "grad_norm": 0.09653277695178986, "learning_rate": 0.002, "loss": 2.3206, "step": 354940 }, { "epoch": 1.3721374340894683, "grad_norm": 0.2815849781036377, "learning_rate": 0.002, "loss": 2.331, "step": 354950 }, { "epoch": 1.3721760912928516, "grad_norm": 0.13114270567893982, "learning_rate": 0.002, "loss": 2.3348, "step": 354960 }, { "epoch": 1.3722147484962348, "grad_norm": 0.09608525037765503, "learning_rate": 0.002, "loss": 2.3419, "step": 354970 }, { "epoch": 1.372253405699618, "grad_norm": 0.11423958837985992, "learning_rate": 0.002, "loss": 2.3567, "step": 354980 }, { "epoch": 1.3722920629030013, "grad_norm": 0.11959332227706909, "learning_rate": 0.002, "loss": 2.3337, "step": 354990 }, { "epoch": 1.3723307201063846, "grad_norm": 0.09793872386217117, "learning_rate": 0.002, "loss": 2.3483, "step": 355000 }, { "epoch": 1.3723693773097678, "grad_norm": 0.09575875848531723, "learning_rate": 0.002, "loss": 2.3538, "step": 355010 }, { "epoch": 1.372408034513151, "grad_norm": 0.1274711787700653, "learning_rate": 0.002, "loss": 2.3385, "step": 355020 }, { "epoch": 1.3724466917165343, "grad_norm": 0.10284113138914108, "learning_rate": 0.002, "loss": 2.3246, "step": 355030 }, { "epoch": 1.3724853489199178, "grad_norm": 0.12274546921253204, "learning_rate": 0.002, "loss": 2.3375, "step": 355040 }, { "epoch": 1.372524006123301, "grad_norm": 0.1329374462366104, "learning_rate": 0.002, "loss": 2.34, "step": 355050 }, { "epoch": 1.3725626633266843, "grad_norm": 0.11847500503063202, "learning_rate": 0.002, "loss": 2.338, "step": 355060 }, { "epoch": 1.3726013205300676, "grad_norm": 0.13184745609760284, "learning_rate": 0.002, "loss": 2.3303, "step": 355070 }, { "epoch": 1.3726399777334508, "grad_norm": 0.10280844569206238, "learning_rate": 0.002, "loss": 2.3416, "step": 355080 }, { "epoch": 1.372678634936834, "grad_norm": 0.10201731324195862, "learning_rate": 0.002, "loss": 2.3368, "step": 355090 }, { "epoch": 1.3727172921402175, "grad_norm": 0.09594844281673431, "learning_rate": 0.002, "loss": 2.3335, "step": 355100 }, { "epoch": 1.3727559493436008, "grad_norm": 0.1228349506855011, "learning_rate": 0.002, "loss": 2.3175, "step": 355110 }, { "epoch": 1.372794606546984, "grad_norm": 0.09336409717798233, "learning_rate": 0.002, "loss": 2.3163, "step": 355120 }, { "epoch": 1.3728332637503673, "grad_norm": 0.1344430148601532, "learning_rate": 0.002, "loss": 2.329, "step": 355130 }, { "epoch": 1.3728719209537505, "grad_norm": 0.1103283166885376, "learning_rate": 0.002, "loss": 2.3399, "step": 355140 }, { "epoch": 1.3729105781571338, "grad_norm": 0.10305920988321304, "learning_rate": 0.002, "loss": 2.323, "step": 355150 }, { "epoch": 1.372949235360517, "grad_norm": 0.12979035079479218, "learning_rate": 0.002, "loss": 2.3489, "step": 355160 }, { "epoch": 1.3729878925639003, "grad_norm": 0.10871124267578125, "learning_rate": 0.002, "loss": 2.3378, "step": 355170 }, { "epoch": 1.3730265497672836, "grad_norm": 0.09896957129240036, "learning_rate": 0.002, "loss": 2.33, "step": 355180 }, { "epoch": 1.3730652069706668, "grad_norm": 0.09285487234592438, "learning_rate": 0.002, "loss": 2.3295, "step": 355190 }, { "epoch": 1.37310386417405, "grad_norm": 0.09617581963539124, "learning_rate": 0.002, "loss": 2.3212, "step": 355200 }, { "epoch": 1.3731425213774335, "grad_norm": 0.11290497332811356, "learning_rate": 0.002, "loss": 2.3397, "step": 355210 }, { "epoch": 1.3731811785808168, "grad_norm": 0.11208027601242065, "learning_rate": 0.002, "loss": 2.3346, "step": 355220 }, { "epoch": 1.3732198357842, "grad_norm": 0.10715988278388977, "learning_rate": 0.002, "loss": 2.3328, "step": 355230 }, { "epoch": 1.3732584929875833, "grad_norm": 0.11387352645397186, "learning_rate": 0.002, "loss": 2.3262, "step": 355240 }, { "epoch": 1.3732971501909665, "grad_norm": 0.09642688184976578, "learning_rate": 0.002, "loss": 2.3415, "step": 355250 }, { "epoch": 1.3733358073943498, "grad_norm": 0.09048770368099213, "learning_rate": 0.002, "loss": 2.3273, "step": 355260 }, { "epoch": 1.3733744645977333, "grad_norm": 0.0994555652141571, "learning_rate": 0.002, "loss": 2.3395, "step": 355270 }, { "epoch": 1.3734131218011165, "grad_norm": 0.10325551778078079, "learning_rate": 0.002, "loss": 2.3491, "step": 355280 }, { "epoch": 1.3734517790044998, "grad_norm": 0.1034950464963913, "learning_rate": 0.002, "loss": 2.3371, "step": 355290 }, { "epoch": 1.373490436207883, "grad_norm": 0.10025890916585922, "learning_rate": 0.002, "loss": 2.3336, "step": 355300 }, { "epoch": 1.3735290934112663, "grad_norm": 0.09707777202129364, "learning_rate": 0.002, "loss": 2.333, "step": 355310 }, { "epoch": 1.3735677506146495, "grad_norm": 0.11595097929239273, "learning_rate": 0.002, "loss": 2.3345, "step": 355320 }, { "epoch": 1.3736064078180328, "grad_norm": 0.09489137679338455, "learning_rate": 0.002, "loss": 2.325, "step": 355330 }, { "epoch": 1.373645065021416, "grad_norm": 0.09553955495357513, "learning_rate": 0.002, "loss": 2.3251, "step": 355340 }, { "epoch": 1.3736837222247993, "grad_norm": 0.1255011111497879, "learning_rate": 0.002, "loss": 2.3508, "step": 355350 }, { "epoch": 1.3737223794281825, "grad_norm": 0.11157847940921783, "learning_rate": 0.002, "loss": 2.3406, "step": 355360 }, { "epoch": 1.3737610366315658, "grad_norm": 0.19698411226272583, "learning_rate": 0.002, "loss": 2.3216, "step": 355370 }, { "epoch": 1.3737996938349493, "grad_norm": 0.10712388902902603, "learning_rate": 0.002, "loss": 2.3248, "step": 355380 }, { "epoch": 1.3738383510383325, "grad_norm": 0.11198710650205612, "learning_rate": 0.002, "loss": 2.3463, "step": 355390 }, { "epoch": 1.3738770082417158, "grad_norm": 0.0910465344786644, "learning_rate": 0.002, "loss": 2.3362, "step": 355400 }, { "epoch": 1.373915665445099, "grad_norm": 0.11695914715528488, "learning_rate": 0.002, "loss": 2.3351, "step": 355410 }, { "epoch": 1.3739543226484823, "grad_norm": 0.09684840589761734, "learning_rate": 0.002, "loss": 2.3269, "step": 355420 }, { "epoch": 1.3739929798518655, "grad_norm": 0.0928514152765274, "learning_rate": 0.002, "loss": 2.3307, "step": 355430 }, { "epoch": 1.374031637055249, "grad_norm": 0.11242695897817612, "learning_rate": 0.002, "loss": 2.3335, "step": 355440 }, { "epoch": 1.3740702942586323, "grad_norm": 0.11720636487007141, "learning_rate": 0.002, "loss": 2.3357, "step": 355450 }, { "epoch": 1.3741089514620155, "grad_norm": 0.11983829736709595, "learning_rate": 0.002, "loss": 2.3364, "step": 355460 }, { "epoch": 1.3741476086653988, "grad_norm": 0.09531054645776749, "learning_rate": 0.002, "loss": 2.3482, "step": 355470 }, { "epoch": 1.374186265868782, "grad_norm": 0.0979180783033371, "learning_rate": 0.002, "loss": 2.3411, "step": 355480 }, { "epoch": 1.3742249230721653, "grad_norm": 0.1277099996805191, "learning_rate": 0.002, "loss": 2.3331, "step": 355490 }, { "epoch": 1.3742635802755485, "grad_norm": 0.09821344912052155, "learning_rate": 0.002, "loss": 2.3319, "step": 355500 }, { "epoch": 1.3743022374789318, "grad_norm": 0.11369414627552032, "learning_rate": 0.002, "loss": 2.338, "step": 355510 }, { "epoch": 1.374340894682315, "grad_norm": 0.10324325412511826, "learning_rate": 0.002, "loss": 2.3358, "step": 355520 }, { "epoch": 1.3743795518856983, "grad_norm": 0.11418458819389343, "learning_rate": 0.002, "loss": 2.3264, "step": 355530 }, { "epoch": 1.3744182090890815, "grad_norm": 0.09707921743392944, "learning_rate": 0.002, "loss": 2.333, "step": 355540 }, { "epoch": 1.374456866292465, "grad_norm": 0.10141601413488388, "learning_rate": 0.002, "loss": 2.3309, "step": 355550 }, { "epoch": 1.3744955234958482, "grad_norm": 0.10970473289489746, "learning_rate": 0.002, "loss": 2.3446, "step": 355560 }, { "epoch": 1.3745341806992315, "grad_norm": 0.11045132577419281, "learning_rate": 0.002, "loss": 2.3379, "step": 355570 }, { "epoch": 1.3745728379026148, "grad_norm": 0.09881190955638885, "learning_rate": 0.002, "loss": 2.328, "step": 355580 }, { "epoch": 1.374611495105998, "grad_norm": 0.09053805470466614, "learning_rate": 0.002, "loss": 2.3394, "step": 355590 }, { "epoch": 1.3746501523093813, "grad_norm": 0.10699243098497391, "learning_rate": 0.002, "loss": 2.3349, "step": 355600 }, { "epoch": 1.3746888095127647, "grad_norm": 0.11124745011329651, "learning_rate": 0.002, "loss": 2.3415, "step": 355610 }, { "epoch": 1.374727466716148, "grad_norm": 0.12792594730854034, "learning_rate": 0.002, "loss": 2.3267, "step": 355620 }, { "epoch": 1.3747661239195312, "grad_norm": 0.10298825055360794, "learning_rate": 0.002, "loss": 2.3279, "step": 355630 }, { "epoch": 1.3748047811229145, "grad_norm": 0.08798182010650635, "learning_rate": 0.002, "loss": 2.3397, "step": 355640 }, { "epoch": 1.3748434383262977, "grad_norm": 0.09169424325227737, "learning_rate": 0.002, "loss": 2.3285, "step": 355650 }, { "epoch": 1.374882095529681, "grad_norm": 0.11178061366081238, "learning_rate": 0.002, "loss": 2.325, "step": 355660 }, { "epoch": 1.3749207527330642, "grad_norm": 0.17487318813800812, "learning_rate": 0.002, "loss": 2.3416, "step": 355670 }, { "epoch": 1.3749594099364475, "grad_norm": 0.09531193971633911, "learning_rate": 0.002, "loss": 2.3148, "step": 355680 }, { "epoch": 1.3749980671398307, "grad_norm": 0.10043787211179733, "learning_rate": 0.002, "loss": 2.3284, "step": 355690 }, { "epoch": 1.375036724343214, "grad_norm": 0.09354715794324875, "learning_rate": 0.002, "loss": 2.3342, "step": 355700 }, { "epoch": 1.3750753815465975, "grad_norm": 0.10695581138134003, "learning_rate": 0.002, "loss": 2.3387, "step": 355710 }, { "epoch": 1.3751140387499807, "grad_norm": 0.10047696530818939, "learning_rate": 0.002, "loss": 2.3315, "step": 355720 }, { "epoch": 1.375152695953364, "grad_norm": 0.10591744631528854, "learning_rate": 0.002, "loss": 2.3318, "step": 355730 }, { "epoch": 1.3751913531567472, "grad_norm": 0.098115935921669, "learning_rate": 0.002, "loss": 2.3341, "step": 355740 }, { "epoch": 1.3752300103601305, "grad_norm": 0.11012642085552216, "learning_rate": 0.002, "loss": 2.3297, "step": 355750 }, { "epoch": 1.3752686675635137, "grad_norm": 0.10438597947359085, "learning_rate": 0.002, "loss": 2.3385, "step": 355760 }, { "epoch": 1.375307324766897, "grad_norm": 0.09538904577493668, "learning_rate": 0.002, "loss": 2.32, "step": 355770 }, { "epoch": 1.3753459819702805, "grad_norm": 0.09819392114877701, "learning_rate": 0.002, "loss": 2.346, "step": 355780 }, { "epoch": 1.3753846391736637, "grad_norm": 0.12182353436946869, "learning_rate": 0.002, "loss": 2.3277, "step": 355790 }, { "epoch": 1.375423296377047, "grad_norm": 0.11237728595733643, "learning_rate": 0.002, "loss": 2.3097, "step": 355800 }, { "epoch": 1.3754619535804302, "grad_norm": 0.09839800745248795, "learning_rate": 0.002, "loss": 2.3335, "step": 355810 }, { "epoch": 1.3755006107838135, "grad_norm": 0.0951833724975586, "learning_rate": 0.002, "loss": 2.3342, "step": 355820 }, { "epoch": 1.3755392679871967, "grad_norm": 0.0866539478302002, "learning_rate": 0.002, "loss": 2.3467, "step": 355830 }, { "epoch": 1.37557792519058, "grad_norm": 0.10665132105350494, "learning_rate": 0.002, "loss": 2.3616, "step": 355840 }, { "epoch": 1.3756165823939632, "grad_norm": 0.10727176815271378, "learning_rate": 0.002, "loss": 2.3339, "step": 355850 }, { "epoch": 1.3756552395973465, "grad_norm": 0.09651970863342285, "learning_rate": 0.002, "loss": 2.3322, "step": 355860 }, { "epoch": 1.3756938968007297, "grad_norm": 0.10916649550199509, "learning_rate": 0.002, "loss": 2.3389, "step": 355870 }, { "epoch": 1.3757325540041132, "grad_norm": 0.1125960648059845, "learning_rate": 0.002, "loss": 2.3264, "step": 355880 }, { "epoch": 1.3757712112074965, "grad_norm": 0.09366412460803986, "learning_rate": 0.002, "loss": 2.3336, "step": 355890 }, { "epoch": 1.3758098684108797, "grad_norm": 0.09899480640888214, "learning_rate": 0.002, "loss": 2.3124, "step": 355900 }, { "epoch": 1.375848525614263, "grad_norm": 0.09413845092058182, "learning_rate": 0.002, "loss": 2.328, "step": 355910 }, { "epoch": 1.3758871828176462, "grad_norm": 0.10501842945814133, "learning_rate": 0.002, "loss": 2.3293, "step": 355920 }, { "epoch": 1.3759258400210295, "grad_norm": 0.10373161733150482, "learning_rate": 0.002, "loss": 2.3297, "step": 355930 }, { "epoch": 1.3759644972244127, "grad_norm": 0.0969945639371872, "learning_rate": 0.002, "loss": 2.3522, "step": 355940 }, { "epoch": 1.3760031544277962, "grad_norm": 0.11098074913024902, "learning_rate": 0.002, "loss": 2.3404, "step": 355950 }, { "epoch": 1.3760418116311794, "grad_norm": 0.1038602814078331, "learning_rate": 0.002, "loss": 2.3323, "step": 355960 }, { "epoch": 1.3760804688345627, "grad_norm": 0.10629703104496002, "learning_rate": 0.002, "loss": 2.3397, "step": 355970 }, { "epoch": 1.376119126037946, "grad_norm": 0.10306701809167862, "learning_rate": 0.002, "loss": 2.3351, "step": 355980 }, { "epoch": 1.3761577832413292, "grad_norm": 0.11155600100755692, "learning_rate": 0.002, "loss": 2.3247, "step": 355990 }, { "epoch": 1.3761964404447125, "grad_norm": 0.10721419006586075, "learning_rate": 0.002, "loss": 2.3551, "step": 356000 }, { "epoch": 1.3762350976480957, "grad_norm": 0.11784236878156662, "learning_rate": 0.002, "loss": 2.3202, "step": 356010 }, { "epoch": 1.376273754851479, "grad_norm": 0.10429581254720688, "learning_rate": 0.002, "loss": 2.3379, "step": 356020 }, { "epoch": 1.3763124120548622, "grad_norm": 0.10380349308252335, "learning_rate": 0.002, "loss": 2.3243, "step": 356030 }, { "epoch": 1.3763510692582455, "grad_norm": 0.09363957494497299, "learning_rate": 0.002, "loss": 2.3433, "step": 356040 }, { "epoch": 1.376389726461629, "grad_norm": 0.10458462685346603, "learning_rate": 0.002, "loss": 2.3317, "step": 356050 }, { "epoch": 1.3764283836650122, "grad_norm": 0.12209296226501465, "learning_rate": 0.002, "loss": 2.337, "step": 356060 }, { "epoch": 1.3764670408683954, "grad_norm": 0.08871419727802277, "learning_rate": 0.002, "loss": 2.3454, "step": 356070 }, { "epoch": 1.3765056980717787, "grad_norm": 0.11016503721475601, "learning_rate": 0.002, "loss": 2.3294, "step": 356080 }, { "epoch": 1.376544355275162, "grad_norm": 0.10212699323892593, "learning_rate": 0.002, "loss": 2.345, "step": 356090 }, { "epoch": 1.3765830124785452, "grad_norm": 0.13530485332012177, "learning_rate": 0.002, "loss": 2.3344, "step": 356100 }, { "epoch": 1.3766216696819285, "grad_norm": 0.09179184585809708, "learning_rate": 0.002, "loss": 2.302, "step": 356110 }, { "epoch": 1.376660326885312, "grad_norm": 0.09778521209955215, "learning_rate": 0.002, "loss": 2.344, "step": 356120 }, { "epoch": 1.3766989840886952, "grad_norm": 0.10612337291240692, "learning_rate": 0.002, "loss": 2.3218, "step": 356130 }, { "epoch": 1.3767376412920784, "grad_norm": 0.11075197905302048, "learning_rate": 0.002, "loss": 2.3323, "step": 356140 }, { "epoch": 1.3767762984954617, "grad_norm": 0.11034604161977768, "learning_rate": 0.002, "loss": 2.3402, "step": 356150 }, { "epoch": 1.376814955698845, "grad_norm": 0.11376018822193146, "learning_rate": 0.002, "loss": 2.3281, "step": 356160 }, { "epoch": 1.3768536129022282, "grad_norm": 0.0910055935382843, "learning_rate": 0.002, "loss": 2.342, "step": 356170 }, { "epoch": 1.3768922701056114, "grad_norm": 0.09447870403528214, "learning_rate": 0.002, "loss": 2.3365, "step": 356180 }, { "epoch": 1.3769309273089947, "grad_norm": 0.10973922908306122, "learning_rate": 0.002, "loss": 2.3295, "step": 356190 }, { "epoch": 1.376969584512378, "grad_norm": 0.11534488201141357, "learning_rate": 0.002, "loss": 2.3353, "step": 356200 }, { "epoch": 1.3770082417157612, "grad_norm": 0.11993777006864548, "learning_rate": 0.002, "loss": 2.3313, "step": 356210 }, { "epoch": 1.3770468989191447, "grad_norm": 0.10797804594039917, "learning_rate": 0.002, "loss": 2.3286, "step": 356220 }, { "epoch": 1.377085556122528, "grad_norm": 0.09470128268003464, "learning_rate": 0.002, "loss": 2.3187, "step": 356230 }, { "epoch": 1.3771242133259112, "grad_norm": 0.107524573802948, "learning_rate": 0.002, "loss": 2.3235, "step": 356240 }, { "epoch": 1.3771628705292944, "grad_norm": 0.09606486558914185, "learning_rate": 0.002, "loss": 2.3429, "step": 356250 }, { "epoch": 1.3772015277326777, "grad_norm": 0.12199854850769043, "learning_rate": 0.002, "loss": 2.3344, "step": 356260 }, { "epoch": 1.377240184936061, "grad_norm": 0.0933234840631485, "learning_rate": 0.002, "loss": 2.3313, "step": 356270 }, { "epoch": 1.3772788421394442, "grad_norm": 0.09945408999919891, "learning_rate": 0.002, "loss": 2.3313, "step": 356280 }, { "epoch": 1.3773174993428277, "grad_norm": 0.1054026335477829, "learning_rate": 0.002, "loss": 2.3302, "step": 356290 }, { "epoch": 1.377356156546211, "grad_norm": 0.34597858786582947, "learning_rate": 0.002, "loss": 2.3363, "step": 356300 }, { "epoch": 1.3773948137495942, "grad_norm": 0.1390652060508728, "learning_rate": 0.002, "loss": 2.3311, "step": 356310 }, { "epoch": 1.3774334709529774, "grad_norm": 0.12162581086158752, "learning_rate": 0.002, "loss": 2.3416, "step": 356320 }, { "epoch": 1.3774721281563607, "grad_norm": 0.09069560468196869, "learning_rate": 0.002, "loss": 2.3152, "step": 356330 }, { "epoch": 1.377510785359744, "grad_norm": 0.11227056384086609, "learning_rate": 0.002, "loss": 2.3259, "step": 356340 }, { "epoch": 1.3775494425631272, "grad_norm": 0.11093819886445999, "learning_rate": 0.002, "loss": 2.3435, "step": 356350 }, { "epoch": 1.3775880997665104, "grad_norm": 0.09435505419969559, "learning_rate": 0.002, "loss": 2.3407, "step": 356360 }, { "epoch": 1.3776267569698937, "grad_norm": 0.10290578752756119, "learning_rate": 0.002, "loss": 2.3273, "step": 356370 }, { "epoch": 1.377665414173277, "grad_norm": 0.12918859720230103, "learning_rate": 0.002, "loss": 2.3419, "step": 356380 }, { "epoch": 1.3777040713766604, "grad_norm": 0.09759011119604111, "learning_rate": 0.002, "loss": 2.3397, "step": 356390 }, { "epoch": 1.3777427285800437, "grad_norm": 0.10140181332826614, "learning_rate": 0.002, "loss": 2.3333, "step": 356400 }, { "epoch": 1.377781385783427, "grad_norm": 0.0912579819560051, "learning_rate": 0.002, "loss": 2.3345, "step": 356410 }, { "epoch": 1.3778200429868102, "grad_norm": 0.10252673923969269, "learning_rate": 0.002, "loss": 2.3425, "step": 356420 }, { "epoch": 1.3778587001901934, "grad_norm": 0.11278831213712692, "learning_rate": 0.002, "loss": 2.3481, "step": 356430 }, { "epoch": 1.3778973573935767, "grad_norm": 0.1057935580611229, "learning_rate": 0.002, "loss": 2.3363, "step": 356440 }, { "epoch": 1.3779360145969601, "grad_norm": 0.09558992087841034, "learning_rate": 0.002, "loss": 2.3439, "step": 356450 }, { "epoch": 1.3779746718003434, "grad_norm": 0.09526700526475906, "learning_rate": 0.002, "loss": 2.3183, "step": 356460 }, { "epoch": 1.3780133290037266, "grad_norm": 0.10227543860673904, "learning_rate": 0.002, "loss": 2.3502, "step": 356470 }, { "epoch": 1.37805198620711, "grad_norm": 0.09496919810771942, "learning_rate": 0.002, "loss": 2.3195, "step": 356480 }, { "epoch": 1.3780906434104931, "grad_norm": 0.09326247125864029, "learning_rate": 0.002, "loss": 2.3554, "step": 356490 }, { "epoch": 1.3781293006138764, "grad_norm": 0.11264954507350922, "learning_rate": 0.002, "loss": 2.3394, "step": 356500 }, { "epoch": 1.3781679578172596, "grad_norm": 0.10654882341623306, "learning_rate": 0.002, "loss": 2.3301, "step": 356510 }, { "epoch": 1.378206615020643, "grad_norm": 0.08847260475158691, "learning_rate": 0.002, "loss": 2.3425, "step": 356520 }, { "epoch": 1.3782452722240262, "grad_norm": 0.14755184948444366, "learning_rate": 0.002, "loss": 2.318, "step": 356530 }, { "epoch": 1.3782839294274094, "grad_norm": 0.1040208637714386, "learning_rate": 0.002, "loss": 2.3391, "step": 356540 }, { "epoch": 1.3783225866307927, "grad_norm": 0.08862719684839249, "learning_rate": 0.002, "loss": 2.3228, "step": 356550 }, { "epoch": 1.3783612438341761, "grad_norm": 0.10133068263530731, "learning_rate": 0.002, "loss": 2.348, "step": 356560 }, { "epoch": 1.3783999010375594, "grad_norm": 0.09677974134683609, "learning_rate": 0.002, "loss": 2.337, "step": 356570 }, { "epoch": 1.3784385582409426, "grad_norm": 0.12741930782794952, "learning_rate": 0.002, "loss": 2.3255, "step": 356580 }, { "epoch": 1.3784772154443259, "grad_norm": 0.12969206273555756, "learning_rate": 0.002, "loss": 2.3369, "step": 356590 }, { "epoch": 1.3785158726477091, "grad_norm": 0.10940320789813995, "learning_rate": 0.002, "loss": 2.3505, "step": 356600 }, { "epoch": 1.3785545298510924, "grad_norm": 0.1125948429107666, "learning_rate": 0.002, "loss": 2.3432, "step": 356610 }, { "epoch": 1.3785931870544759, "grad_norm": 0.10383554548025131, "learning_rate": 0.002, "loss": 2.3203, "step": 356620 }, { "epoch": 1.3786318442578591, "grad_norm": 0.100620336830616, "learning_rate": 0.002, "loss": 2.3347, "step": 356630 }, { "epoch": 1.3786705014612424, "grad_norm": 0.1173454001545906, "learning_rate": 0.002, "loss": 2.3311, "step": 356640 }, { "epoch": 1.3787091586646256, "grad_norm": 0.11233677715063095, "learning_rate": 0.002, "loss": 2.341, "step": 356650 }, { "epoch": 1.3787478158680089, "grad_norm": 0.09890108555555344, "learning_rate": 0.002, "loss": 2.3371, "step": 356660 }, { "epoch": 1.3787864730713921, "grad_norm": 0.10544534772634506, "learning_rate": 0.002, "loss": 2.3306, "step": 356670 }, { "epoch": 1.3788251302747754, "grad_norm": 0.10439340025186539, "learning_rate": 0.002, "loss": 2.3378, "step": 356680 }, { "epoch": 1.3788637874781586, "grad_norm": 0.11262357234954834, "learning_rate": 0.002, "loss": 2.3333, "step": 356690 }, { "epoch": 1.3789024446815419, "grad_norm": 0.09836552292108536, "learning_rate": 0.002, "loss": 2.3357, "step": 356700 }, { "epoch": 1.3789411018849251, "grad_norm": 0.11344776302576065, "learning_rate": 0.002, "loss": 2.3282, "step": 356710 }, { "epoch": 1.3789797590883084, "grad_norm": 0.09220697730779648, "learning_rate": 0.002, "loss": 2.347, "step": 356720 }, { "epoch": 1.3790184162916919, "grad_norm": 0.09780165553092957, "learning_rate": 0.002, "loss": 2.3329, "step": 356730 }, { "epoch": 1.3790570734950751, "grad_norm": 0.09286107122898102, "learning_rate": 0.002, "loss": 2.3377, "step": 356740 }, { "epoch": 1.3790957306984584, "grad_norm": 0.1060047447681427, "learning_rate": 0.002, "loss": 2.3347, "step": 356750 }, { "epoch": 1.3791343879018416, "grad_norm": 0.11789479851722717, "learning_rate": 0.002, "loss": 2.3408, "step": 356760 }, { "epoch": 1.3791730451052249, "grad_norm": 0.09493661671876907, "learning_rate": 0.002, "loss": 2.3534, "step": 356770 }, { "epoch": 1.3792117023086081, "grad_norm": 0.13432522118091583, "learning_rate": 0.002, "loss": 2.3418, "step": 356780 }, { "epoch": 1.3792503595119916, "grad_norm": 0.10457032173871994, "learning_rate": 0.002, "loss": 2.3372, "step": 356790 }, { "epoch": 1.3792890167153748, "grad_norm": 0.12527017295360565, "learning_rate": 0.002, "loss": 2.3461, "step": 356800 }, { "epoch": 1.379327673918758, "grad_norm": 0.09906395524740219, "learning_rate": 0.002, "loss": 2.3243, "step": 356810 }, { "epoch": 1.3793663311221414, "grad_norm": 0.10826694965362549, "learning_rate": 0.002, "loss": 2.3317, "step": 356820 }, { "epoch": 1.3794049883255246, "grad_norm": 0.10286567360162735, "learning_rate": 0.002, "loss": 2.3318, "step": 356830 }, { "epoch": 1.3794436455289079, "grad_norm": 0.11156069487333298, "learning_rate": 0.002, "loss": 2.3338, "step": 356840 }, { "epoch": 1.379482302732291, "grad_norm": 0.10198958963155746, "learning_rate": 0.002, "loss": 2.3197, "step": 356850 }, { "epoch": 1.3795209599356744, "grad_norm": 0.08753270655870438, "learning_rate": 0.002, "loss": 2.3258, "step": 356860 }, { "epoch": 1.3795596171390576, "grad_norm": 0.11631825566291809, "learning_rate": 0.002, "loss": 2.3344, "step": 356870 }, { "epoch": 1.3795982743424409, "grad_norm": 0.0976443886756897, "learning_rate": 0.002, "loss": 2.3288, "step": 356880 }, { "epoch": 1.3796369315458241, "grad_norm": 0.09814205765724182, "learning_rate": 0.002, "loss": 2.337, "step": 356890 }, { "epoch": 1.3796755887492076, "grad_norm": 0.10800322145223618, "learning_rate": 0.002, "loss": 2.3227, "step": 356900 }, { "epoch": 1.3797142459525908, "grad_norm": 0.09989839792251587, "learning_rate": 0.002, "loss": 2.3289, "step": 356910 }, { "epoch": 1.379752903155974, "grad_norm": 0.10988453030586243, "learning_rate": 0.002, "loss": 2.3259, "step": 356920 }, { "epoch": 1.3797915603593573, "grad_norm": 0.14615663886070251, "learning_rate": 0.002, "loss": 2.3306, "step": 356930 }, { "epoch": 1.3798302175627406, "grad_norm": 0.09930089116096497, "learning_rate": 0.002, "loss": 2.3306, "step": 356940 }, { "epoch": 1.3798688747661239, "grad_norm": 0.10417861491441727, "learning_rate": 0.002, "loss": 2.3289, "step": 356950 }, { "epoch": 1.3799075319695073, "grad_norm": 0.1246800571680069, "learning_rate": 0.002, "loss": 2.3546, "step": 356960 }, { "epoch": 1.3799461891728906, "grad_norm": 0.1527402251958847, "learning_rate": 0.002, "loss": 2.3301, "step": 356970 }, { "epoch": 1.3799848463762738, "grad_norm": 0.1109328642487526, "learning_rate": 0.002, "loss": 2.3372, "step": 356980 }, { "epoch": 1.380023503579657, "grad_norm": 0.09175754338502884, "learning_rate": 0.002, "loss": 2.334, "step": 356990 }, { "epoch": 1.3800621607830403, "grad_norm": 0.10926374047994614, "learning_rate": 0.002, "loss": 2.3504, "step": 357000 }, { "epoch": 1.3801008179864236, "grad_norm": 0.10283466428518295, "learning_rate": 0.002, "loss": 2.3272, "step": 357010 }, { "epoch": 1.3801394751898068, "grad_norm": 0.10054881870746613, "learning_rate": 0.002, "loss": 2.3346, "step": 357020 }, { "epoch": 1.38017813239319, "grad_norm": 0.10364895313978195, "learning_rate": 0.002, "loss": 2.3427, "step": 357030 }, { "epoch": 1.3802167895965733, "grad_norm": 0.10115136951208115, "learning_rate": 0.002, "loss": 2.3215, "step": 357040 }, { "epoch": 1.3802554467999566, "grad_norm": 0.10140955448150635, "learning_rate": 0.002, "loss": 2.3407, "step": 357050 }, { "epoch": 1.3802941040033399, "grad_norm": 0.11943522095680237, "learning_rate": 0.002, "loss": 2.3424, "step": 357060 }, { "epoch": 1.3803327612067233, "grad_norm": 0.10424049943685532, "learning_rate": 0.002, "loss": 2.3506, "step": 357070 }, { "epoch": 1.3803714184101066, "grad_norm": 0.1278669834136963, "learning_rate": 0.002, "loss": 2.3128, "step": 357080 }, { "epoch": 1.3804100756134898, "grad_norm": 0.11559665203094482, "learning_rate": 0.002, "loss": 2.3262, "step": 357090 }, { "epoch": 1.380448732816873, "grad_norm": 0.10717646032571793, "learning_rate": 0.002, "loss": 2.3311, "step": 357100 }, { "epoch": 1.3804873900202563, "grad_norm": 0.0966046005487442, "learning_rate": 0.002, "loss": 2.3227, "step": 357110 }, { "epoch": 1.3805260472236396, "grad_norm": 0.09488651901483536, "learning_rate": 0.002, "loss": 2.3307, "step": 357120 }, { "epoch": 1.380564704427023, "grad_norm": 0.09918989986181259, "learning_rate": 0.002, "loss": 2.3303, "step": 357130 }, { "epoch": 1.3806033616304063, "grad_norm": 0.12172359973192215, "learning_rate": 0.002, "loss": 2.3224, "step": 357140 }, { "epoch": 1.3806420188337896, "grad_norm": 0.10451477020978928, "learning_rate": 0.002, "loss": 2.3237, "step": 357150 }, { "epoch": 1.3806806760371728, "grad_norm": 0.10900402814149857, "learning_rate": 0.002, "loss": 2.3361, "step": 357160 }, { "epoch": 1.380719333240556, "grad_norm": 0.10119115561246872, "learning_rate": 0.002, "loss": 2.349, "step": 357170 }, { "epoch": 1.3807579904439393, "grad_norm": 0.10229839384555817, "learning_rate": 0.002, "loss": 2.3301, "step": 357180 }, { "epoch": 1.3807966476473226, "grad_norm": 0.11427944153547287, "learning_rate": 0.002, "loss": 2.3237, "step": 357190 }, { "epoch": 1.3808353048507058, "grad_norm": 0.11322027444839478, "learning_rate": 0.002, "loss": 2.3324, "step": 357200 }, { "epoch": 1.380873962054089, "grad_norm": 0.10636843740940094, "learning_rate": 0.002, "loss": 2.344, "step": 357210 }, { "epoch": 1.3809126192574723, "grad_norm": 0.09347525238990784, "learning_rate": 0.002, "loss": 2.3349, "step": 357220 }, { "epoch": 1.3809512764608556, "grad_norm": 0.11774121224880219, "learning_rate": 0.002, "loss": 2.3405, "step": 357230 }, { "epoch": 1.380989933664239, "grad_norm": 0.0972491055727005, "learning_rate": 0.002, "loss": 2.3305, "step": 357240 }, { "epoch": 1.3810285908676223, "grad_norm": 0.10950019210577011, "learning_rate": 0.002, "loss": 2.3412, "step": 357250 }, { "epoch": 1.3810672480710056, "grad_norm": 0.09386036545038223, "learning_rate": 0.002, "loss": 2.3268, "step": 357260 }, { "epoch": 1.3811059052743888, "grad_norm": 0.10270935297012329, "learning_rate": 0.002, "loss": 2.3318, "step": 357270 }, { "epoch": 1.381144562477772, "grad_norm": 0.11999564617872238, "learning_rate": 0.002, "loss": 2.3409, "step": 357280 }, { "epoch": 1.3811832196811553, "grad_norm": 0.09688630700111389, "learning_rate": 0.002, "loss": 2.3535, "step": 357290 }, { "epoch": 1.3812218768845388, "grad_norm": 0.10284639149904251, "learning_rate": 0.002, "loss": 2.3494, "step": 357300 }, { "epoch": 1.381260534087922, "grad_norm": 0.09171400219202042, "learning_rate": 0.002, "loss": 2.3352, "step": 357310 }, { "epoch": 1.3812991912913053, "grad_norm": 0.09951532632112503, "learning_rate": 0.002, "loss": 2.3186, "step": 357320 }, { "epoch": 1.3813378484946885, "grad_norm": 0.08959772437810898, "learning_rate": 0.002, "loss": 2.3217, "step": 357330 }, { "epoch": 1.3813765056980718, "grad_norm": 0.12968987226486206, "learning_rate": 0.002, "loss": 2.3249, "step": 357340 }, { "epoch": 1.381415162901455, "grad_norm": 0.13226859271526337, "learning_rate": 0.002, "loss": 2.3364, "step": 357350 }, { "epoch": 1.3814538201048383, "grad_norm": 0.11036866158246994, "learning_rate": 0.002, "loss": 2.3451, "step": 357360 }, { "epoch": 1.3814924773082216, "grad_norm": 0.10080908238887787, "learning_rate": 0.002, "loss": 2.3456, "step": 357370 }, { "epoch": 1.3815311345116048, "grad_norm": 0.10770279169082642, "learning_rate": 0.002, "loss": 2.3294, "step": 357380 }, { "epoch": 1.381569791714988, "grad_norm": 0.12061317265033722, "learning_rate": 0.002, "loss": 2.3413, "step": 357390 }, { "epoch": 1.3816084489183713, "grad_norm": 0.09101912379264832, "learning_rate": 0.002, "loss": 2.3395, "step": 357400 }, { "epoch": 1.3816471061217548, "grad_norm": 0.11742638051509857, "learning_rate": 0.002, "loss": 2.348, "step": 357410 }, { "epoch": 1.381685763325138, "grad_norm": 0.10257943719625473, "learning_rate": 0.002, "loss": 2.3397, "step": 357420 }, { "epoch": 1.3817244205285213, "grad_norm": 0.10319139808416367, "learning_rate": 0.002, "loss": 2.3271, "step": 357430 }, { "epoch": 1.3817630777319045, "grad_norm": 0.09902940690517426, "learning_rate": 0.002, "loss": 2.3383, "step": 357440 }, { "epoch": 1.3818017349352878, "grad_norm": 0.125919371843338, "learning_rate": 0.002, "loss": 2.327, "step": 357450 }, { "epoch": 1.381840392138671, "grad_norm": 0.10221952944993973, "learning_rate": 0.002, "loss": 2.3128, "step": 357460 }, { "epoch": 1.3818790493420545, "grad_norm": 0.10029791295528412, "learning_rate": 0.002, "loss": 2.3275, "step": 357470 }, { "epoch": 1.3819177065454378, "grad_norm": 0.1684672236442566, "learning_rate": 0.002, "loss": 2.3414, "step": 357480 }, { "epoch": 1.381956363748821, "grad_norm": 0.10184746235609055, "learning_rate": 0.002, "loss": 2.3377, "step": 357490 }, { "epoch": 1.3819950209522043, "grad_norm": 0.10539903491735458, "learning_rate": 0.002, "loss": 2.3341, "step": 357500 }, { "epoch": 1.3820336781555875, "grad_norm": 0.10304230451583862, "learning_rate": 0.002, "loss": 2.3286, "step": 357510 }, { "epoch": 1.3820723353589708, "grad_norm": 0.11429215967655182, "learning_rate": 0.002, "loss": 2.3344, "step": 357520 }, { "epoch": 1.382110992562354, "grad_norm": 0.08892321586608887, "learning_rate": 0.002, "loss": 2.3196, "step": 357530 }, { "epoch": 1.3821496497657373, "grad_norm": 0.08578456193208694, "learning_rate": 0.002, "loss": 2.338, "step": 357540 }, { "epoch": 1.3821883069691205, "grad_norm": 0.09519366919994354, "learning_rate": 0.002, "loss": 2.3514, "step": 357550 }, { "epoch": 1.3822269641725038, "grad_norm": 0.08658157289028168, "learning_rate": 0.002, "loss": 2.3312, "step": 357560 }, { "epoch": 1.3822656213758873, "grad_norm": 0.1024087518453598, "learning_rate": 0.002, "loss": 2.3298, "step": 357570 }, { "epoch": 1.3823042785792705, "grad_norm": 0.11436939984560013, "learning_rate": 0.002, "loss": 2.3543, "step": 357580 }, { "epoch": 1.3823429357826538, "grad_norm": 0.11564888060092926, "learning_rate": 0.002, "loss": 2.3364, "step": 357590 }, { "epoch": 1.382381592986037, "grad_norm": 0.10661925375461578, "learning_rate": 0.002, "loss": 2.3467, "step": 357600 }, { "epoch": 1.3824202501894203, "grad_norm": 0.11697540432214737, "learning_rate": 0.002, "loss": 2.3457, "step": 357610 }, { "epoch": 1.3824589073928035, "grad_norm": 0.10966644436120987, "learning_rate": 0.002, "loss": 2.3223, "step": 357620 }, { "epoch": 1.3824975645961868, "grad_norm": 0.11463762074708939, "learning_rate": 0.002, "loss": 2.3161, "step": 357630 }, { "epoch": 1.3825362217995703, "grad_norm": 0.09555447101593018, "learning_rate": 0.002, "loss": 2.3275, "step": 357640 }, { "epoch": 1.3825748790029535, "grad_norm": 0.13319911062717438, "learning_rate": 0.002, "loss": 2.3397, "step": 357650 }, { "epoch": 1.3826135362063368, "grad_norm": 0.1469419002532959, "learning_rate": 0.002, "loss": 2.3287, "step": 357660 }, { "epoch": 1.38265219340972, "grad_norm": 0.11750228703022003, "learning_rate": 0.002, "loss": 2.3193, "step": 357670 }, { "epoch": 1.3826908506131033, "grad_norm": 0.1029818207025528, "learning_rate": 0.002, "loss": 2.3313, "step": 357680 }, { "epoch": 1.3827295078164865, "grad_norm": 0.1653258204460144, "learning_rate": 0.002, "loss": 2.3425, "step": 357690 }, { "epoch": 1.3827681650198698, "grad_norm": 0.09607890993356705, "learning_rate": 0.002, "loss": 2.3258, "step": 357700 }, { "epoch": 1.382806822223253, "grad_norm": 0.09096881747245789, "learning_rate": 0.002, "loss": 2.3456, "step": 357710 }, { "epoch": 1.3828454794266363, "grad_norm": 0.10659847408533096, "learning_rate": 0.002, "loss": 2.3352, "step": 357720 }, { "epoch": 1.3828841366300195, "grad_norm": 0.0992022231221199, "learning_rate": 0.002, "loss": 2.3377, "step": 357730 }, { "epoch": 1.382922793833403, "grad_norm": 0.09695616364479065, "learning_rate": 0.002, "loss": 2.3364, "step": 357740 }, { "epoch": 1.3829614510367862, "grad_norm": 0.08763127774000168, "learning_rate": 0.002, "loss": 2.3252, "step": 357750 }, { "epoch": 1.3830001082401695, "grad_norm": 0.09782769531011581, "learning_rate": 0.002, "loss": 2.3472, "step": 357760 }, { "epoch": 1.3830387654435528, "grad_norm": 0.1507205367088318, "learning_rate": 0.002, "loss": 2.3189, "step": 357770 }, { "epoch": 1.383077422646936, "grad_norm": 0.10128536820411682, "learning_rate": 0.002, "loss": 2.3248, "step": 357780 }, { "epoch": 1.3831160798503193, "grad_norm": 0.09506537765264511, "learning_rate": 0.002, "loss": 2.3256, "step": 357790 }, { "epoch": 1.3831547370537025, "grad_norm": 0.09646282345056534, "learning_rate": 0.002, "loss": 2.3384, "step": 357800 }, { "epoch": 1.383193394257086, "grad_norm": 0.10690893232822418, "learning_rate": 0.002, "loss": 2.3449, "step": 357810 }, { "epoch": 1.3832320514604692, "grad_norm": 0.11854461580514908, "learning_rate": 0.002, "loss": 2.3416, "step": 357820 }, { "epoch": 1.3832707086638525, "grad_norm": 0.11158133298158646, "learning_rate": 0.002, "loss": 2.336, "step": 357830 }, { "epoch": 1.3833093658672357, "grad_norm": 0.09612828493118286, "learning_rate": 0.002, "loss": 2.3227, "step": 357840 }, { "epoch": 1.383348023070619, "grad_norm": 0.10760582238435745, "learning_rate": 0.002, "loss": 2.3391, "step": 357850 }, { "epoch": 1.3833866802740022, "grad_norm": 0.12931786477565765, "learning_rate": 0.002, "loss": 2.3306, "step": 357860 }, { "epoch": 1.3834253374773855, "grad_norm": 0.10564881563186646, "learning_rate": 0.002, "loss": 2.341, "step": 357870 }, { "epoch": 1.3834639946807687, "grad_norm": 0.10905144363641739, "learning_rate": 0.002, "loss": 2.3354, "step": 357880 }, { "epoch": 1.383502651884152, "grad_norm": 0.09472287446260452, "learning_rate": 0.002, "loss": 2.3404, "step": 357890 }, { "epoch": 1.3835413090875353, "grad_norm": 0.1405135542154312, "learning_rate": 0.002, "loss": 2.3483, "step": 357900 }, { "epoch": 1.3835799662909187, "grad_norm": 0.11076802015304565, "learning_rate": 0.002, "loss": 2.3328, "step": 357910 }, { "epoch": 1.383618623494302, "grad_norm": 0.10980372130870819, "learning_rate": 0.002, "loss": 2.3448, "step": 357920 }, { "epoch": 1.3836572806976852, "grad_norm": 0.1058148592710495, "learning_rate": 0.002, "loss": 2.3274, "step": 357930 }, { "epoch": 1.3836959379010685, "grad_norm": 0.09596095979213715, "learning_rate": 0.002, "loss": 2.3265, "step": 357940 }, { "epoch": 1.3837345951044517, "grad_norm": 0.11825399100780487, "learning_rate": 0.002, "loss": 2.3384, "step": 357950 }, { "epoch": 1.383773252307835, "grad_norm": 0.10999801754951477, "learning_rate": 0.002, "loss": 2.3177, "step": 357960 }, { "epoch": 1.3838119095112182, "grad_norm": 0.10320445150136948, "learning_rate": 0.002, "loss": 2.3406, "step": 357970 }, { "epoch": 1.3838505667146017, "grad_norm": 0.12310822308063507, "learning_rate": 0.002, "loss": 2.3356, "step": 357980 }, { "epoch": 1.383889223917985, "grad_norm": 0.104934923350811, "learning_rate": 0.002, "loss": 2.3389, "step": 357990 }, { "epoch": 1.3839278811213682, "grad_norm": 0.10744967311620712, "learning_rate": 0.002, "loss": 2.3574, "step": 358000 }, { "epoch": 1.3839665383247515, "grad_norm": 0.11633177101612091, "learning_rate": 0.002, "loss": 2.3337, "step": 358010 }, { "epoch": 1.3840051955281347, "grad_norm": 0.12066573649644852, "learning_rate": 0.002, "loss": 2.3272, "step": 358020 }, { "epoch": 1.384043852731518, "grad_norm": 0.12387596070766449, "learning_rate": 0.002, "loss": 2.3279, "step": 358030 }, { "epoch": 1.3840825099349012, "grad_norm": 0.10241378098726273, "learning_rate": 0.002, "loss": 2.3558, "step": 358040 }, { "epoch": 1.3841211671382845, "grad_norm": 0.09486771374940872, "learning_rate": 0.002, "loss": 2.3433, "step": 358050 }, { "epoch": 1.3841598243416677, "grad_norm": 0.11558888107538223, "learning_rate": 0.002, "loss": 2.3413, "step": 358060 }, { "epoch": 1.384198481545051, "grad_norm": 0.10875887423753738, "learning_rate": 0.002, "loss": 2.3261, "step": 358070 }, { "epoch": 1.3842371387484345, "grad_norm": 0.11380153149366379, "learning_rate": 0.002, "loss": 2.343, "step": 358080 }, { "epoch": 1.3842757959518177, "grad_norm": 0.11291273683309555, "learning_rate": 0.002, "loss": 2.3249, "step": 358090 }, { "epoch": 1.384314453155201, "grad_norm": 0.12594982981681824, "learning_rate": 0.002, "loss": 2.3484, "step": 358100 }, { "epoch": 1.3843531103585842, "grad_norm": 0.10149511694908142, "learning_rate": 0.002, "loss": 2.3365, "step": 358110 }, { "epoch": 1.3843917675619675, "grad_norm": 0.09986498206853867, "learning_rate": 0.002, "loss": 2.3446, "step": 358120 }, { "epoch": 1.3844304247653507, "grad_norm": 0.09570245444774628, "learning_rate": 0.002, "loss": 2.3456, "step": 358130 }, { "epoch": 1.384469081968734, "grad_norm": 0.11640851199626923, "learning_rate": 0.002, "loss": 2.3292, "step": 358140 }, { "epoch": 1.3845077391721174, "grad_norm": 0.11998428404331207, "learning_rate": 0.002, "loss": 2.3317, "step": 358150 }, { "epoch": 1.3845463963755007, "grad_norm": 0.10228558629751205, "learning_rate": 0.002, "loss": 2.3429, "step": 358160 }, { "epoch": 1.384585053578884, "grad_norm": 0.09900269657373428, "learning_rate": 0.002, "loss": 2.3373, "step": 358170 }, { "epoch": 1.3846237107822672, "grad_norm": 0.10694219172000885, "learning_rate": 0.002, "loss": 2.3463, "step": 358180 }, { "epoch": 1.3846623679856505, "grad_norm": 0.0999547615647316, "learning_rate": 0.002, "loss": 2.3367, "step": 358190 }, { "epoch": 1.3847010251890337, "grad_norm": 0.11348982900381088, "learning_rate": 0.002, "loss": 2.3364, "step": 358200 }, { "epoch": 1.384739682392417, "grad_norm": 0.09742119908332825, "learning_rate": 0.002, "loss": 2.3288, "step": 358210 }, { "epoch": 1.3847783395958002, "grad_norm": 0.11021881550550461, "learning_rate": 0.002, "loss": 2.3202, "step": 358220 }, { "epoch": 1.3848169967991835, "grad_norm": 0.09835400432348251, "learning_rate": 0.002, "loss": 2.3225, "step": 358230 }, { "epoch": 1.3848556540025667, "grad_norm": 0.10762475430965424, "learning_rate": 0.002, "loss": 2.3412, "step": 358240 }, { "epoch": 1.3848943112059502, "grad_norm": 0.0981389731168747, "learning_rate": 0.002, "loss": 2.3202, "step": 358250 }, { "epoch": 1.3849329684093334, "grad_norm": 0.12566789984703064, "learning_rate": 0.002, "loss": 2.3271, "step": 358260 }, { "epoch": 1.3849716256127167, "grad_norm": 0.10963409394025803, "learning_rate": 0.002, "loss": 2.3275, "step": 358270 }, { "epoch": 1.3850102828161, "grad_norm": 0.10804201662540436, "learning_rate": 0.002, "loss": 2.3347, "step": 358280 }, { "epoch": 1.3850489400194832, "grad_norm": 0.10372433066368103, "learning_rate": 0.002, "loss": 2.3312, "step": 358290 }, { "epoch": 1.3850875972228665, "grad_norm": 0.10548718273639679, "learning_rate": 0.002, "loss": 2.3247, "step": 358300 }, { "epoch": 1.38512625442625, "grad_norm": 0.09937731176614761, "learning_rate": 0.002, "loss": 2.3415, "step": 358310 }, { "epoch": 1.3851649116296332, "grad_norm": 0.10259462893009186, "learning_rate": 0.002, "loss": 2.3315, "step": 358320 }, { "epoch": 1.3852035688330164, "grad_norm": 0.10795610398054123, "learning_rate": 0.002, "loss": 2.3236, "step": 358330 }, { "epoch": 1.3852422260363997, "grad_norm": 0.0990600511431694, "learning_rate": 0.002, "loss": 2.3273, "step": 358340 }, { "epoch": 1.385280883239783, "grad_norm": 0.10372160375118256, "learning_rate": 0.002, "loss": 2.3402, "step": 358350 }, { "epoch": 1.3853195404431662, "grad_norm": 0.10864154994487762, "learning_rate": 0.002, "loss": 2.3406, "step": 358360 }, { "epoch": 1.3853581976465494, "grad_norm": 0.10648924112319946, "learning_rate": 0.002, "loss": 2.3285, "step": 358370 }, { "epoch": 1.3853968548499327, "grad_norm": 0.1119183599948883, "learning_rate": 0.002, "loss": 2.3267, "step": 358380 }, { "epoch": 1.385435512053316, "grad_norm": 0.525481104850769, "learning_rate": 0.002, "loss": 2.3384, "step": 358390 }, { "epoch": 1.3854741692566992, "grad_norm": 0.1384838968515396, "learning_rate": 0.002, "loss": 2.3261, "step": 358400 }, { "epoch": 1.3855128264600824, "grad_norm": 0.09210643172264099, "learning_rate": 0.002, "loss": 2.3253, "step": 358410 }, { "epoch": 1.385551483663466, "grad_norm": 0.08706945180892944, "learning_rate": 0.002, "loss": 2.3303, "step": 358420 }, { "epoch": 1.3855901408668492, "grad_norm": 0.10544677823781967, "learning_rate": 0.002, "loss": 2.3314, "step": 358430 }, { "epoch": 1.3856287980702324, "grad_norm": 0.12038781493902206, "learning_rate": 0.002, "loss": 2.3346, "step": 358440 }, { "epoch": 1.3856674552736157, "grad_norm": 0.10919291526079178, "learning_rate": 0.002, "loss": 2.3373, "step": 358450 }, { "epoch": 1.385706112476999, "grad_norm": 0.10312280058860779, "learning_rate": 0.002, "loss": 2.3366, "step": 358460 }, { "epoch": 1.3857447696803822, "grad_norm": 0.09364982694387436, "learning_rate": 0.002, "loss": 2.3251, "step": 358470 }, { "epoch": 1.3857834268837657, "grad_norm": 0.12228646129369736, "learning_rate": 0.002, "loss": 2.3475, "step": 358480 }, { "epoch": 1.385822084087149, "grad_norm": 0.09567874670028687, "learning_rate": 0.002, "loss": 2.3216, "step": 358490 }, { "epoch": 1.3858607412905322, "grad_norm": 0.1218157559633255, "learning_rate": 0.002, "loss": 2.3299, "step": 358500 }, { "epoch": 1.3858993984939154, "grad_norm": 0.12315016239881516, "learning_rate": 0.002, "loss": 2.3248, "step": 358510 }, { "epoch": 1.3859380556972987, "grad_norm": 0.11362304538488388, "learning_rate": 0.002, "loss": 2.341, "step": 358520 }, { "epoch": 1.385976712900682, "grad_norm": 0.10522307455539703, "learning_rate": 0.002, "loss": 2.3433, "step": 358530 }, { "epoch": 1.3860153701040652, "grad_norm": 0.09985428303480148, "learning_rate": 0.002, "loss": 2.3359, "step": 358540 }, { "epoch": 1.3860540273074484, "grad_norm": 0.10328204929828644, "learning_rate": 0.002, "loss": 2.3285, "step": 358550 }, { "epoch": 1.3860926845108317, "grad_norm": 0.10569297522306442, "learning_rate": 0.002, "loss": 2.3237, "step": 358560 }, { "epoch": 1.386131341714215, "grad_norm": 0.10771090537309647, "learning_rate": 0.002, "loss": 2.3289, "step": 358570 }, { "epoch": 1.3861699989175982, "grad_norm": 0.11412060260772705, "learning_rate": 0.002, "loss": 2.3132, "step": 358580 }, { "epoch": 1.3862086561209817, "grad_norm": 0.10503178834915161, "learning_rate": 0.002, "loss": 2.3302, "step": 358590 }, { "epoch": 1.386247313324365, "grad_norm": 0.10429660230875015, "learning_rate": 0.002, "loss": 2.3359, "step": 358600 }, { "epoch": 1.3862859705277482, "grad_norm": 0.09722950309515, "learning_rate": 0.002, "loss": 2.3426, "step": 358610 }, { "epoch": 1.3863246277311314, "grad_norm": 0.10863056033849716, "learning_rate": 0.002, "loss": 2.3477, "step": 358620 }, { "epoch": 1.3863632849345147, "grad_norm": 0.09604120999574661, "learning_rate": 0.002, "loss": 2.3323, "step": 358630 }, { "epoch": 1.386401942137898, "grad_norm": 0.12591049075126648, "learning_rate": 0.002, "loss": 2.3528, "step": 358640 }, { "epoch": 1.3864405993412814, "grad_norm": 0.12707343697547913, "learning_rate": 0.002, "loss": 2.3405, "step": 358650 }, { "epoch": 1.3864792565446646, "grad_norm": 0.09873824566602707, "learning_rate": 0.002, "loss": 2.3439, "step": 358660 }, { "epoch": 1.386517913748048, "grad_norm": 0.1484958827495575, "learning_rate": 0.002, "loss": 2.3351, "step": 358670 }, { "epoch": 1.3865565709514311, "grad_norm": 0.10666509717702866, "learning_rate": 0.002, "loss": 2.3451, "step": 358680 }, { "epoch": 1.3865952281548144, "grad_norm": 0.1105133444070816, "learning_rate": 0.002, "loss": 2.3351, "step": 358690 }, { "epoch": 1.3866338853581976, "grad_norm": 0.0936323031783104, "learning_rate": 0.002, "loss": 2.3203, "step": 358700 }, { "epoch": 1.386672542561581, "grad_norm": 0.09979099780321121, "learning_rate": 0.002, "loss": 2.3351, "step": 358710 }, { "epoch": 1.3867111997649642, "grad_norm": 0.0981900617480278, "learning_rate": 0.002, "loss": 2.3436, "step": 358720 }, { "epoch": 1.3867498569683474, "grad_norm": 0.11693184822797775, "learning_rate": 0.002, "loss": 2.3455, "step": 358730 }, { "epoch": 1.3867885141717307, "grad_norm": 0.10722755640745163, "learning_rate": 0.002, "loss": 2.328, "step": 358740 }, { "epoch": 1.386827171375114, "grad_norm": 0.10744725167751312, "learning_rate": 0.002, "loss": 2.3412, "step": 358750 }, { "epoch": 1.3868658285784974, "grad_norm": 0.09629779309034348, "learning_rate": 0.002, "loss": 2.3358, "step": 358760 }, { "epoch": 1.3869044857818806, "grad_norm": 0.09576866030693054, "learning_rate": 0.002, "loss": 2.3205, "step": 358770 }, { "epoch": 1.3869431429852639, "grad_norm": 0.11830944567918777, "learning_rate": 0.002, "loss": 2.3175, "step": 358780 }, { "epoch": 1.3869818001886471, "grad_norm": 0.1023031547665596, "learning_rate": 0.002, "loss": 2.3452, "step": 358790 }, { "epoch": 1.3870204573920304, "grad_norm": 0.09811899065971375, "learning_rate": 0.002, "loss": 2.3427, "step": 358800 }, { "epoch": 1.3870591145954136, "grad_norm": 0.11076534539461136, "learning_rate": 0.002, "loss": 2.3268, "step": 358810 }, { "epoch": 1.3870977717987971, "grad_norm": 0.11109668761491776, "learning_rate": 0.002, "loss": 2.3401, "step": 358820 }, { "epoch": 1.3871364290021804, "grad_norm": 0.10413765162229538, "learning_rate": 0.002, "loss": 2.3328, "step": 358830 }, { "epoch": 1.3871750862055636, "grad_norm": 0.11741053313016891, "learning_rate": 0.002, "loss": 2.3405, "step": 358840 }, { "epoch": 1.3872137434089469, "grad_norm": 0.09895078837871552, "learning_rate": 0.002, "loss": 2.3249, "step": 358850 }, { "epoch": 1.3872524006123301, "grad_norm": 0.10803353041410446, "learning_rate": 0.002, "loss": 2.3266, "step": 358860 }, { "epoch": 1.3872910578157134, "grad_norm": 0.16418716311454773, "learning_rate": 0.002, "loss": 2.3257, "step": 358870 }, { "epoch": 1.3873297150190966, "grad_norm": 0.11566440016031265, "learning_rate": 0.002, "loss": 2.3417, "step": 358880 }, { "epoch": 1.3873683722224799, "grad_norm": 0.0987687036395073, "learning_rate": 0.002, "loss": 2.3181, "step": 358890 }, { "epoch": 1.3874070294258631, "grad_norm": 0.09825720638036728, "learning_rate": 0.002, "loss": 2.3302, "step": 358900 }, { "epoch": 1.3874456866292464, "grad_norm": 0.12824301421642303, "learning_rate": 0.002, "loss": 2.3316, "step": 358910 }, { "epoch": 1.3874843438326296, "grad_norm": 0.11537756025791168, "learning_rate": 0.002, "loss": 2.3325, "step": 358920 }, { "epoch": 1.3875230010360131, "grad_norm": 0.09872827678918839, "learning_rate": 0.002, "loss": 2.3255, "step": 358930 }, { "epoch": 1.3875616582393964, "grad_norm": 0.10614890605211258, "learning_rate": 0.002, "loss": 2.3153, "step": 358940 }, { "epoch": 1.3876003154427796, "grad_norm": 0.1058836579322815, "learning_rate": 0.002, "loss": 2.339, "step": 358950 }, { "epoch": 1.3876389726461629, "grad_norm": 0.36678752303123474, "learning_rate": 0.002, "loss": 2.3472, "step": 358960 }, { "epoch": 1.3876776298495461, "grad_norm": 0.12122151255607605, "learning_rate": 0.002, "loss": 2.324, "step": 358970 }, { "epoch": 1.3877162870529294, "grad_norm": 0.12098393589258194, "learning_rate": 0.002, "loss": 2.3328, "step": 358980 }, { "epoch": 1.3877549442563129, "grad_norm": 0.10260053724050522, "learning_rate": 0.002, "loss": 2.3394, "step": 358990 }, { "epoch": 1.387793601459696, "grad_norm": 0.1203232929110527, "learning_rate": 0.002, "loss": 2.3557, "step": 359000 }, { "epoch": 1.3878322586630794, "grad_norm": 0.10592683404684067, "learning_rate": 0.002, "loss": 2.3261, "step": 359010 }, { "epoch": 1.3878709158664626, "grad_norm": 0.09129156172275543, "learning_rate": 0.002, "loss": 2.338, "step": 359020 }, { "epoch": 1.3879095730698459, "grad_norm": 0.11400771886110306, "learning_rate": 0.002, "loss": 2.3265, "step": 359030 }, { "epoch": 1.387948230273229, "grad_norm": 0.10435612499713898, "learning_rate": 0.002, "loss": 2.3293, "step": 359040 }, { "epoch": 1.3879868874766124, "grad_norm": 0.11092580854892731, "learning_rate": 0.002, "loss": 2.335, "step": 359050 }, { "epoch": 1.3880255446799956, "grad_norm": 0.10389309376478195, "learning_rate": 0.002, "loss": 2.3313, "step": 359060 }, { "epoch": 1.3880642018833789, "grad_norm": 0.11955828219652176, "learning_rate": 0.002, "loss": 2.332, "step": 359070 }, { "epoch": 1.3881028590867621, "grad_norm": 0.0991957038640976, "learning_rate": 0.002, "loss": 2.3346, "step": 359080 }, { "epoch": 1.3881415162901454, "grad_norm": 0.11172988265752792, "learning_rate": 0.002, "loss": 2.3251, "step": 359090 }, { "epoch": 1.3881801734935288, "grad_norm": 0.10796627402305603, "learning_rate": 0.002, "loss": 2.3473, "step": 359100 }, { "epoch": 1.388218830696912, "grad_norm": 0.0978311151266098, "learning_rate": 0.002, "loss": 2.3169, "step": 359110 }, { "epoch": 1.3882574879002954, "grad_norm": 0.0951458290219307, "learning_rate": 0.002, "loss": 2.3105, "step": 359120 }, { "epoch": 1.3882961451036786, "grad_norm": 0.1115274578332901, "learning_rate": 0.002, "loss": 2.3335, "step": 359130 }, { "epoch": 1.3883348023070619, "grad_norm": 0.10567060858011246, "learning_rate": 0.002, "loss": 2.3162, "step": 359140 }, { "epoch": 1.388373459510445, "grad_norm": 0.09773063659667969, "learning_rate": 0.002, "loss": 2.3283, "step": 359150 }, { "epoch": 1.3884121167138286, "grad_norm": 0.10530774295330048, "learning_rate": 0.002, "loss": 2.3345, "step": 359160 }, { "epoch": 1.3884507739172118, "grad_norm": 0.11766993254423141, "learning_rate": 0.002, "loss": 2.3396, "step": 359170 }, { "epoch": 1.388489431120595, "grad_norm": 0.10456059873104095, "learning_rate": 0.002, "loss": 2.316, "step": 359180 }, { "epoch": 1.3885280883239783, "grad_norm": 0.11992094665765762, "learning_rate": 0.002, "loss": 2.3408, "step": 359190 }, { "epoch": 1.3885667455273616, "grad_norm": 0.10693403333425522, "learning_rate": 0.002, "loss": 2.3293, "step": 359200 }, { "epoch": 1.3886054027307448, "grad_norm": 0.11005581170320511, "learning_rate": 0.002, "loss": 2.3381, "step": 359210 }, { "epoch": 1.388644059934128, "grad_norm": 0.08628341555595398, "learning_rate": 0.002, "loss": 2.3443, "step": 359220 }, { "epoch": 1.3886827171375113, "grad_norm": 0.09540165960788727, "learning_rate": 0.002, "loss": 2.342, "step": 359230 }, { "epoch": 1.3887213743408946, "grad_norm": 0.10182127356529236, "learning_rate": 0.002, "loss": 2.3213, "step": 359240 }, { "epoch": 1.3887600315442779, "grad_norm": 0.08753172308206558, "learning_rate": 0.002, "loss": 2.326, "step": 359250 }, { "epoch": 1.388798688747661, "grad_norm": 0.0912495106458664, "learning_rate": 0.002, "loss": 2.3412, "step": 359260 }, { "epoch": 1.3888373459510446, "grad_norm": 0.1422264277935028, "learning_rate": 0.002, "loss": 2.3417, "step": 359270 }, { "epoch": 1.3888760031544278, "grad_norm": 0.0967712327837944, "learning_rate": 0.002, "loss": 2.3414, "step": 359280 }, { "epoch": 1.388914660357811, "grad_norm": 0.11819314956665039, "learning_rate": 0.002, "loss": 2.3358, "step": 359290 }, { "epoch": 1.3889533175611943, "grad_norm": 0.1206083670258522, "learning_rate": 0.002, "loss": 2.3366, "step": 359300 }, { "epoch": 1.3889919747645776, "grad_norm": 0.11072078347206116, "learning_rate": 0.002, "loss": 2.3353, "step": 359310 }, { "epoch": 1.3890306319679608, "grad_norm": 0.10273478180170059, "learning_rate": 0.002, "loss": 2.3197, "step": 359320 }, { "epoch": 1.3890692891713443, "grad_norm": 0.09189434349536896, "learning_rate": 0.002, "loss": 2.3193, "step": 359330 }, { "epoch": 1.3891079463747276, "grad_norm": 0.09685275703668594, "learning_rate": 0.002, "loss": 2.3291, "step": 359340 }, { "epoch": 1.3891466035781108, "grad_norm": 0.0961252748966217, "learning_rate": 0.002, "loss": 2.3405, "step": 359350 }, { "epoch": 1.389185260781494, "grad_norm": 0.10798119753599167, "learning_rate": 0.002, "loss": 2.3274, "step": 359360 }, { "epoch": 1.3892239179848773, "grad_norm": 0.13294091820716858, "learning_rate": 0.002, "loss": 2.338, "step": 359370 }, { "epoch": 1.3892625751882606, "grad_norm": 0.10411449521780014, "learning_rate": 0.002, "loss": 2.3493, "step": 359380 }, { "epoch": 1.3893012323916438, "grad_norm": 0.09397763758897781, "learning_rate": 0.002, "loss": 2.347, "step": 359390 }, { "epoch": 1.389339889595027, "grad_norm": 0.11398205906152725, "learning_rate": 0.002, "loss": 2.3342, "step": 359400 }, { "epoch": 1.3893785467984103, "grad_norm": 0.11523891240358353, "learning_rate": 0.002, "loss": 2.3348, "step": 359410 }, { "epoch": 1.3894172040017936, "grad_norm": 0.10378719866275787, "learning_rate": 0.002, "loss": 2.3311, "step": 359420 }, { "epoch": 1.3894558612051768, "grad_norm": 0.10406482219696045, "learning_rate": 0.002, "loss": 2.3419, "step": 359430 }, { "epoch": 1.3894945184085603, "grad_norm": 0.0997418463230133, "learning_rate": 0.002, "loss": 2.3239, "step": 359440 }, { "epoch": 1.3895331756119436, "grad_norm": 0.10954898595809937, "learning_rate": 0.002, "loss": 2.3405, "step": 359450 }, { "epoch": 1.3895718328153268, "grad_norm": 0.1074637845158577, "learning_rate": 0.002, "loss": 2.3258, "step": 359460 }, { "epoch": 1.38961049001871, "grad_norm": 0.09442642331123352, "learning_rate": 0.002, "loss": 2.3366, "step": 359470 }, { "epoch": 1.3896491472220933, "grad_norm": 0.1044730693101883, "learning_rate": 0.002, "loss": 2.3419, "step": 359480 }, { "epoch": 1.3896878044254766, "grad_norm": 0.1017158254981041, "learning_rate": 0.002, "loss": 2.3377, "step": 359490 }, { "epoch": 1.38972646162886, "grad_norm": 0.09050361067056656, "learning_rate": 0.002, "loss": 2.3386, "step": 359500 }, { "epoch": 1.3897651188322433, "grad_norm": 0.11728296428918839, "learning_rate": 0.002, "loss": 2.3324, "step": 359510 }, { "epoch": 1.3898037760356265, "grad_norm": 0.13631664216518402, "learning_rate": 0.002, "loss": 2.3309, "step": 359520 }, { "epoch": 1.3898424332390098, "grad_norm": 0.11105327308177948, "learning_rate": 0.002, "loss": 2.3389, "step": 359530 }, { "epoch": 1.389881090442393, "grad_norm": 0.10104014724493027, "learning_rate": 0.002, "loss": 2.3283, "step": 359540 }, { "epoch": 1.3899197476457763, "grad_norm": 0.10071936994791031, "learning_rate": 0.002, "loss": 2.3436, "step": 359550 }, { "epoch": 1.3899584048491596, "grad_norm": 0.10410552471876144, "learning_rate": 0.002, "loss": 2.3352, "step": 359560 }, { "epoch": 1.3899970620525428, "grad_norm": 0.10601193457841873, "learning_rate": 0.002, "loss": 2.341, "step": 359570 }, { "epoch": 1.390035719255926, "grad_norm": 0.09772384911775589, "learning_rate": 0.002, "loss": 2.3409, "step": 359580 }, { "epoch": 1.3900743764593093, "grad_norm": 0.095488540828228, "learning_rate": 0.002, "loss": 2.3372, "step": 359590 }, { "epoch": 1.3901130336626928, "grad_norm": 0.119688019156456, "learning_rate": 0.002, "loss": 2.3394, "step": 359600 }, { "epoch": 1.390151690866076, "grad_norm": 0.11805914342403412, "learning_rate": 0.002, "loss": 2.3443, "step": 359610 }, { "epoch": 1.3901903480694593, "grad_norm": 0.08788260817527771, "learning_rate": 0.002, "loss": 2.3281, "step": 359620 }, { "epoch": 1.3902290052728425, "grad_norm": 0.09670418500900269, "learning_rate": 0.002, "loss": 2.3595, "step": 359630 }, { "epoch": 1.3902676624762258, "grad_norm": 0.09463132172822952, "learning_rate": 0.002, "loss": 2.3191, "step": 359640 }, { "epoch": 1.390306319679609, "grad_norm": 0.12268111854791641, "learning_rate": 0.002, "loss": 2.3347, "step": 359650 }, { "epoch": 1.3903449768829923, "grad_norm": 0.09824145585298538, "learning_rate": 0.002, "loss": 2.3338, "step": 359660 }, { "epoch": 1.3903836340863758, "grad_norm": 0.09898674488067627, "learning_rate": 0.002, "loss": 2.3336, "step": 359670 }, { "epoch": 1.390422291289759, "grad_norm": 0.1264234334230423, "learning_rate": 0.002, "loss": 2.3334, "step": 359680 }, { "epoch": 1.3904609484931423, "grad_norm": 0.10478637367486954, "learning_rate": 0.002, "loss": 2.3349, "step": 359690 }, { "epoch": 1.3904996056965255, "grad_norm": 0.10049188882112503, "learning_rate": 0.002, "loss": 2.3298, "step": 359700 }, { "epoch": 1.3905382628999088, "grad_norm": 0.11678900569677353, "learning_rate": 0.002, "loss": 2.3439, "step": 359710 }, { "epoch": 1.390576920103292, "grad_norm": 0.09387555718421936, "learning_rate": 0.002, "loss": 2.3488, "step": 359720 }, { "epoch": 1.3906155773066753, "grad_norm": 0.13321635127067566, "learning_rate": 0.002, "loss": 2.322, "step": 359730 }, { "epoch": 1.3906542345100585, "grad_norm": 0.11627347767353058, "learning_rate": 0.002, "loss": 2.3305, "step": 359740 }, { "epoch": 1.3906928917134418, "grad_norm": 0.23211270570755005, "learning_rate": 0.002, "loss": 2.339, "step": 359750 }, { "epoch": 1.390731548916825, "grad_norm": 0.10206664353609085, "learning_rate": 0.002, "loss": 2.3368, "step": 359760 }, { "epoch": 1.3907702061202085, "grad_norm": 0.09255781769752502, "learning_rate": 0.002, "loss": 2.3397, "step": 359770 }, { "epoch": 1.3908088633235918, "grad_norm": 0.10072731226682663, "learning_rate": 0.002, "loss": 2.3455, "step": 359780 }, { "epoch": 1.390847520526975, "grad_norm": 0.09593206644058228, "learning_rate": 0.002, "loss": 2.3335, "step": 359790 }, { "epoch": 1.3908861777303583, "grad_norm": 0.1118595078587532, "learning_rate": 0.002, "loss": 2.3413, "step": 359800 }, { "epoch": 1.3909248349337415, "grad_norm": 0.10689254850149155, "learning_rate": 0.002, "loss": 2.3495, "step": 359810 }, { "epoch": 1.3909634921371248, "grad_norm": 0.10757198929786682, "learning_rate": 0.002, "loss": 2.3269, "step": 359820 }, { "epoch": 1.391002149340508, "grad_norm": 0.09484315663576126, "learning_rate": 0.002, "loss": 2.3403, "step": 359830 }, { "epoch": 1.3910408065438915, "grad_norm": 0.10556916147470474, "learning_rate": 0.002, "loss": 2.3336, "step": 359840 }, { "epoch": 1.3910794637472748, "grad_norm": 0.09785880893468857, "learning_rate": 0.002, "loss": 2.3447, "step": 359850 }, { "epoch": 1.391118120950658, "grad_norm": 0.12440207600593567, "learning_rate": 0.002, "loss": 2.3435, "step": 359860 }, { "epoch": 1.3911567781540413, "grad_norm": 0.1069926843047142, "learning_rate": 0.002, "loss": 2.3331, "step": 359870 }, { "epoch": 1.3911954353574245, "grad_norm": 0.10705050081014633, "learning_rate": 0.002, "loss": 2.3373, "step": 359880 }, { "epoch": 1.3912340925608078, "grad_norm": 0.09222240746021271, "learning_rate": 0.002, "loss": 2.3492, "step": 359890 }, { "epoch": 1.391272749764191, "grad_norm": 0.10790204256772995, "learning_rate": 0.002, "loss": 2.3416, "step": 359900 }, { "epoch": 1.3913114069675743, "grad_norm": 0.12623713910579681, "learning_rate": 0.002, "loss": 2.3343, "step": 359910 }, { "epoch": 1.3913500641709575, "grad_norm": 0.10312226414680481, "learning_rate": 0.002, "loss": 2.3312, "step": 359920 }, { "epoch": 1.3913887213743408, "grad_norm": 0.0920531153678894, "learning_rate": 0.002, "loss": 2.3391, "step": 359930 }, { "epoch": 1.3914273785777242, "grad_norm": 0.10971745103597641, "learning_rate": 0.002, "loss": 2.3345, "step": 359940 }, { "epoch": 1.3914660357811075, "grad_norm": 0.1152036041021347, "learning_rate": 0.002, "loss": 2.3254, "step": 359950 }, { "epoch": 1.3915046929844908, "grad_norm": 0.09976490586996078, "learning_rate": 0.002, "loss": 2.3389, "step": 359960 }, { "epoch": 1.391543350187874, "grad_norm": 0.10785773396492004, "learning_rate": 0.002, "loss": 2.3433, "step": 359970 }, { "epoch": 1.3915820073912573, "grad_norm": 0.09680186957120895, "learning_rate": 0.002, "loss": 2.34, "step": 359980 }, { "epoch": 1.3916206645946405, "grad_norm": 0.11459063738584518, "learning_rate": 0.002, "loss": 2.3297, "step": 359990 }, { "epoch": 1.3916593217980238, "grad_norm": 0.09679178148508072, "learning_rate": 0.002, "loss": 2.3295, "step": 360000 }, { "epoch": 1.3916979790014072, "grad_norm": 0.14078864455223083, "learning_rate": 0.002, "loss": 2.3296, "step": 360010 }, { "epoch": 1.3917366362047905, "grad_norm": 0.10744940489530563, "learning_rate": 0.002, "loss": 2.3368, "step": 360020 }, { "epoch": 1.3917752934081737, "grad_norm": 0.10858958959579468, "learning_rate": 0.002, "loss": 2.3354, "step": 360030 }, { "epoch": 1.391813950611557, "grad_norm": 0.11119197309017181, "learning_rate": 0.002, "loss": 2.3436, "step": 360040 }, { "epoch": 1.3918526078149402, "grad_norm": 0.09662743657827377, "learning_rate": 0.002, "loss": 2.3279, "step": 360050 }, { "epoch": 1.3918912650183235, "grad_norm": 0.10182948410511017, "learning_rate": 0.002, "loss": 2.3459, "step": 360060 }, { "epoch": 1.3919299222217068, "grad_norm": 0.10580704361200333, "learning_rate": 0.002, "loss": 2.3146, "step": 360070 }, { "epoch": 1.39196857942509, "grad_norm": 0.09505116194486618, "learning_rate": 0.002, "loss": 2.3218, "step": 360080 }, { "epoch": 1.3920072366284733, "grad_norm": 0.10502129048109055, "learning_rate": 0.002, "loss": 2.3312, "step": 360090 }, { "epoch": 1.3920458938318565, "grad_norm": 0.11738190054893494, "learning_rate": 0.002, "loss": 2.3392, "step": 360100 }, { "epoch": 1.39208455103524, "grad_norm": 0.09682303667068481, "learning_rate": 0.002, "loss": 2.3354, "step": 360110 }, { "epoch": 1.3921232082386232, "grad_norm": 0.10412992537021637, "learning_rate": 0.002, "loss": 2.3245, "step": 360120 }, { "epoch": 1.3921618654420065, "grad_norm": 0.08938931673765182, "learning_rate": 0.002, "loss": 2.3429, "step": 360130 }, { "epoch": 1.3922005226453897, "grad_norm": 0.10273377597332001, "learning_rate": 0.002, "loss": 2.3454, "step": 360140 }, { "epoch": 1.392239179848773, "grad_norm": 0.11520109325647354, "learning_rate": 0.002, "loss": 2.3136, "step": 360150 }, { "epoch": 1.3922778370521562, "grad_norm": 0.10236816108226776, "learning_rate": 0.002, "loss": 2.3306, "step": 360160 }, { "epoch": 1.3923164942555395, "grad_norm": 0.10257626324892044, "learning_rate": 0.002, "loss": 2.357, "step": 360170 }, { "epoch": 1.392355151458923, "grad_norm": 0.09983783960342407, "learning_rate": 0.002, "loss": 2.3319, "step": 360180 }, { "epoch": 1.3923938086623062, "grad_norm": 0.10540879517793655, "learning_rate": 0.002, "loss": 2.3301, "step": 360190 }, { "epoch": 1.3924324658656895, "grad_norm": 0.1019587367773056, "learning_rate": 0.002, "loss": 2.3477, "step": 360200 }, { "epoch": 1.3924711230690727, "grad_norm": 0.11383510380983353, "learning_rate": 0.002, "loss": 2.3359, "step": 360210 }, { "epoch": 1.392509780272456, "grad_norm": 0.10606590658426285, "learning_rate": 0.002, "loss": 2.3358, "step": 360220 }, { "epoch": 1.3925484374758392, "grad_norm": 0.1148306354880333, "learning_rate": 0.002, "loss": 2.3284, "step": 360230 }, { "epoch": 1.3925870946792225, "grad_norm": 0.10644423961639404, "learning_rate": 0.002, "loss": 2.3266, "step": 360240 }, { "epoch": 1.3926257518826057, "grad_norm": 0.117405965924263, "learning_rate": 0.002, "loss": 2.3325, "step": 360250 }, { "epoch": 1.392664409085989, "grad_norm": 0.12969310581684113, "learning_rate": 0.002, "loss": 2.3419, "step": 360260 }, { "epoch": 1.3927030662893722, "grad_norm": 0.10582044720649719, "learning_rate": 0.002, "loss": 2.3319, "step": 360270 }, { "epoch": 1.3927417234927557, "grad_norm": 0.1080719605088234, "learning_rate": 0.002, "loss": 2.3389, "step": 360280 }, { "epoch": 1.392780380696139, "grad_norm": 0.11880721896886826, "learning_rate": 0.002, "loss": 2.3241, "step": 360290 }, { "epoch": 1.3928190378995222, "grad_norm": 0.0958956629037857, "learning_rate": 0.002, "loss": 2.3447, "step": 360300 }, { "epoch": 1.3928576951029055, "grad_norm": 0.11437147110700607, "learning_rate": 0.002, "loss": 2.3324, "step": 360310 }, { "epoch": 1.3928963523062887, "grad_norm": 0.09938332438468933, "learning_rate": 0.002, "loss": 2.3349, "step": 360320 }, { "epoch": 1.392935009509672, "grad_norm": 0.10863950103521347, "learning_rate": 0.002, "loss": 2.334, "step": 360330 }, { "epoch": 1.3929736667130554, "grad_norm": 0.1936277449131012, "learning_rate": 0.002, "loss": 2.3366, "step": 360340 }, { "epoch": 1.3930123239164387, "grad_norm": 0.09787117689847946, "learning_rate": 0.002, "loss": 2.334, "step": 360350 }, { "epoch": 1.393050981119822, "grad_norm": 0.09849102050065994, "learning_rate": 0.002, "loss": 2.314, "step": 360360 }, { "epoch": 1.3930896383232052, "grad_norm": 0.10864541679620743, "learning_rate": 0.002, "loss": 2.3363, "step": 360370 }, { "epoch": 1.3931282955265885, "grad_norm": 0.11145168542861938, "learning_rate": 0.002, "loss": 2.3352, "step": 360380 }, { "epoch": 1.3931669527299717, "grad_norm": 0.13048399984836578, "learning_rate": 0.002, "loss": 2.32, "step": 360390 }, { "epoch": 1.393205609933355, "grad_norm": 0.0969235748052597, "learning_rate": 0.002, "loss": 2.3299, "step": 360400 }, { "epoch": 1.3932442671367382, "grad_norm": 0.10593166947364807, "learning_rate": 0.002, "loss": 2.3247, "step": 360410 }, { "epoch": 1.3932829243401215, "grad_norm": 0.11622758954763412, "learning_rate": 0.002, "loss": 2.3432, "step": 360420 }, { "epoch": 1.3933215815435047, "grad_norm": 0.11350737512111664, "learning_rate": 0.002, "loss": 2.325, "step": 360430 }, { "epoch": 1.393360238746888, "grad_norm": 0.10533896088600159, "learning_rate": 0.002, "loss": 2.3362, "step": 360440 }, { "epoch": 1.3933988959502714, "grad_norm": 0.11156805604696274, "learning_rate": 0.002, "loss": 2.3369, "step": 360450 }, { "epoch": 1.3934375531536547, "grad_norm": 0.09442062675952911, "learning_rate": 0.002, "loss": 2.3287, "step": 360460 }, { "epoch": 1.393476210357038, "grad_norm": 0.13392575085163116, "learning_rate": 0.002, "loss": 2.338, "step": 360470 }, { "epoch": 1.3935148675604212, "grad_norm": 0.10841104388237, "learning_rate": 0.002, "loss": 2.3414, "step": 360480 }, { "epoch": 1.3935535247638045, "grad_norm": 0.11690452694892883, "learning_rate": 0.002, "loss": 2.3294, "step": 360490 }, { "epoch": 1.3935921819671877, "grad_norm": 0.10026668012142181, "learning_rate": 0.002, "loss": 2.3392, "step": 360500 }, { "epoch": 1.3936308391705712, "grad_norm": 0.11332149803638458, "learning_rate": 0.002, "loss": 2.3356, "step": 360510 }, { "epoch": 1.3936694963739544, "grad_norm": 0.10418102890253067, "learning_rate": 0.002, "loss": 2.3263, "step": 360520 }, { "epoch": 1.3937081535773377, "grad_norm": 0.10783516615629196, "learning_rate": 0.002, "loss": 2.3472, "step": 360530 }, { "epoch": 1.393746810780721, "grad_norm": 0.11106953024864197, "learning_rate": 0.002, "loss": 2.3112, "step": 360540 }, { "epoch": 1.3937854679841042, "grad_norm": 0.11125322431325912, "learning_rate": 0.002, "loss": 2.3392, "step": 360550 }, { "epoch": 1.3938241251874874, "grad_norm": 0.11771386861801147, "learning_rate": 0.002, "loss": 2.342, "step": 360560 }, { "epoch": 1.3938627823908707, "grad_norm": 0.10649606585502625, "learning_rate": 0.002, "loss": 2.3348, "step": 360570 }, { "epoch": 1.393901439594254, "grad_norm": 0.10038311779499054, "learning_rate": 0.002, "loss": 2.3375, "step": 360580 }, { "epoch": 1.3939400967976372, "grad_norm": 0.1158284842967987, "learning_rate": 0.002, "loss": 2.3393, "step": 360590 }, { "epoch": 1.3939787540010204, "grad_norm": 0.10317362844944, "learning_rate": 0.002, "loss": 2.3401, "step": 360600 }, { "epoch": 1.3940174112044037, "grad_norm": 0.10186164081096649, "learning_rate": 0.002, "loss": 2.3365, "step": 360610 }, { "epoch": 1.3940560684077872, "grad_norm": 0.09978730976581573, "learning_rate": 0.002, "loss": 2.34, "step": 360620 }, { "epoch": 1.3940947256111704, "grad_norm": 0.10873579978942871, "learning_rate": 0.002, "loss": 2.3432, "step": 360630 }, { "epoch": 1.3941333828145537, "grad_norm": 0.09863824397325516, "learning_rate": 0.002, "loss": 2.3313, "step": 360640 }, { "epoch": 1.394172040017937, "grad_norm": 0.12052670121192932, "learning_rate": 0.002, "loss": 2.3174, "step": 360650 }, { "epoch": 1.3942106972213202, "grad_norm": 0.12854629755020142, "learning_rate": 0.002, "loss": 2.3422, "step": 360660 }, { "epoch": 1.3942493544247034, "grad_norm": 0.09690230339765549, "learning_rate": 0.002, "loss": 2.3323, "step": 360670 }, { "epoch": 1.394288011628087, "grad_norm": 0.10591220110654831, "learning_rate": 0.002, "loss": 2.325, "step": 360680 }, { "epoch": 1.3943266688314702, "grad_norm": 0.09987891465425491, "learning_rate": 0.002, "loss": 2.3309, "step": 360690 }, { "epoch": 1.3943653260348534, "grad_norm": 0.1403607428073883, "learning_rate": 0.002, "loss": 2.3277, "step": 360700 }, { "epoch": 1.3944039832382367, "grad_norm": 0.10875146090984344, "learning_rate": 0.002, "loss": 2.3434, "step": 360710 }, { "epoch": 1.39444264044162, "grad_norm": 0.10704370588064194, "learning_rate": 0.002, "loss": 2.3298, "step": 360720 }, { "epoch": 1.3944812976450032, "grad_norm": 0.09713513404130936, "learning_rate": 0.002, "loss": 2.3423, "step": 360730 }, { "epoch": 1.3945199548483864, "grad_norm": 0.12021619081497192, "learning_rate": 0.002, "loss": 2.3364, "step": 360740 }, { "epoch": 1.3945586120517697, "grad_norm": 0.10152607411146164, "learning_rate": 0.002, "loss": 2.3428, "step": 360750 }, { "epoch": 1.394597269255153, "grad_norm": 0.11064693331718445, "learning_rate": 0.002, "loss": 2.3507, "step": 360760 }, { "epoch": 1.3946359264585362, "grad_norm": 0.11342901736497879, "learning_rate": 0.002, "loss": 2.3444, "step": 360770 }, { "epoch": 1.3946745836619194, "grad_norm": 0.10161430388689041, "learning_rate": 0.002, "loss": 2.3304, "step": 360780 }, { "epoch": 1.394713240865303, "grad_norm": 0.09243433177471161, "learning_rate": 0.002, "loss": 2.3361, "step": 360790 }, { "epoch": 1.3947518980686862, "grad_norm": 0.12722088396549225, "learning_rate": 0.002, "loss": 2.3284, "step": 360800 }, { "epoch": 1.3947905552720694, "grad_norm": 0.10662361234426498, "learning_rate": 0.002, "loss": 2.3328, "step": 360810 }, { "epoch": 1.3948292124754527, "grad_norm": 0.0985378846526146, "learning_rate": 0.002, "loss": 2.3413, "step": 360820 }, { "epoch": 1.394867869678836, "grad_norm": 0.10576797276735306, "learning_rate": 0.002, "loss": 2.3344, "step": 360830 }, { "epoch": 1.3949065268822192, "grad_norm": 0.10594084113836288, "learning_rate": 0.002, "loss": 2.3233, "step": 360840 }, { "epoch": 1.3949451840856026, "grad_norm": 0.09589492529630661, "learning_rate": 0.002, "loss": 2.3326, "step": 360850 }, { "epoch": 1.394983841288986, "grad_norm": 0.1067117303609848, "learning_rate": 0.002, "loss": 2.3332, "step": 360860 }, { "epoch": 1.3950224984923691, "grad_norm": 0.10901029407978058, "learning_rate": 0.002, "loss": 2.3385, "step": 360870 }, { "epoch": 1.3950611556957524, "grad_norm": 0.11935501545667648, "learning_rate": 0.002, "loss": 2.326, "step": 360880 }, { "epoch": 1.3950998128991356, "grad_norm": 0.12973760068416595, "learning_rate": 0.002, "loss": 2.3522, "step": 360890 }, { "epoch": 1.395138470102519, "grad_norm": 0.10274084657430649, "learning_rate": 0.002, "loss": 2.3252, "step": 360900 }, { "epoch": 1.3951771273059022, "grad_norm": 0.11378609389066696, "learning_rate": 0.002, "loss": 2.3298, "step": 360910 }, { "epoch": 1.3952157845092854, "grad_norm": 0.12882862985134125, "learning_rate": 0.002, "loss": 2.3369, "step": 360920 }, { "epoch": 1.3952544417126687, "grad_norm": 0.10811634361743927, "learning_rate": 0.002, "loss": 2.3537, "step": 360930 }, { "epoch": 1.395293098916052, "grad_norm": 0.0853014886379242, "learning_rate": 0.002, "loss": 2.341, "step": 360940 }, { "epoch": 1.3953317561194352, "grad_norm": 0.09347040951251984, "learning_rate": 0.002, "loss": 2.3253, "step": 360950 }, { "epoch": 1.3953704133228186, "grad_norm": 0.10422652214765549, "learning_rate": 0.002, "loss": 2.3303, "step": 360960 }, { "epoch": 1.395409070526202, "grad_norm": 0.09387336671352386, "learning_rate": 0.002, "loss": 2.3355, "step": 360970 }, { "epoch": 1.3954477277295851, "grad_norm": 0.09877435117959976, "learning_rate": 0.002, "loss": 2.3453, "step": 360980 }, { "epoch": 1.3954863849329684, "grad_norm": 0.10496794432401657, "learning_rate": 0.002, "loss": 2.3282, "step": 360990 }, { "epoch": 1.3955250421363516, "grad_norm": 0.1177574172616005, "learning_rate": 0.002, "loss": 2.3305, "step": 361000 }, { "epoch": 1.395563699339735, "grad_norm": 0.12681037187576294, "learning_rate": 0.002, "loss": 2.3342, "step": 361010 }, { "epoch": 1.3956023565431184, "grad_norm": 0.11275091767311096, "learning_rate": 0.002, "loss": 2.3307, "step": 361020 }, { "epoch": 1.3956410137465016, "grad_norm": 0.10215546935796738, "learning_rate": 0.002, "loss": 2.3301, "step": 361030 }, { "epoch": 1.3956796709498849, "grad_norm": 0.09558103233575821, "learning_rate": 0.002, "loss": 2.3165, "step": 361040 }, { "epoch": 1.3957183281532681, "grad_norm": 0.11078792810440063, "learning_rate": 0.002, "loss": 2.3294, "step": 361050 }, { "epoch": 1.3957569853566514, "grad_norm": 0.10702776163816452, "learning_rate": 0.002, "loss": 2.3165, "step": 361060 }, { "epoch": 1.3957956425600346, "grad_norm": 0.14717505872249603, "learning_rate": 0.002, "loss": 2.3464, "step": 361070 }, { "epoch": 1.3958342997634179, "grad_norm": 0.09539902955293655, "learning_rate": 0.002, "loss": 2.3447, "step": 361080 }, { "epoch": 1.3958729569668011, "grad_norm": 0.10105487704277039, "learning_rate": 0.002, "loss": 2.3316, "step": 361090 }, { "epoch": 1.3959116141701844, "grad_norm": 0.10328929871320724, "learning_rate": 0.002, "loss": 2.3301, "step": 361100 }, { "epoch": 1.3959502713735676, "grad_norm": 0.10702311247587204, "learning_rate": 0.002, "loss": 2.3408, "step": 361110 }, { "epoch": 1.395988928576951, "grad_norm": 0.10122741758823395, "learning_rate": 0.002, "loss": 2.3252, "step": 361120 }, { "epoch": 1.3960275857803344, "grad_norm": 0.10583388060331345, "learning_rate": 0.002, "loss": 2.3151, "step": 361130 }, { "epoch": 1.3960662429837176, "grad_norm": 0.10187561064958572, "learning_rate": 0.002, "loss": 2.3239, "step": 361140 }, { "epoch": 1.3961049001871009, "grad_norm": 0.096242755651474, "learning_rate": 0.002, "loss": 2.3453, "step": 361150 }, { "epoch": 1.3961435573904841, "grad_norm": 0.1273539960384369, "learning_rate": 0.002, "loss": 2.3357, "step": 361160 }, { "epoch": 1.3961822145938674, "grad_norm": 0.12333947420120239, "learning_rate": 0.002, "loss": 2.3351, "step": 361170 }, { "epoch": 1.3962208717972506, "grad_norm": 0.09781967103481293, "learning_rate": 0.002, "loss": 2.3314, "step": 361180 }, { "epoch": 1.396259529000634, "grad_norm": 0.11135595291852951, "learning_rate": 0.002, "loss": 2.3365, "step": 361190 }, { "epoch": 1.3962981862040174, "grad_norm": 0.1160992905497551, "learning_rate": 0.002, "loss": 2.3378, "step": 361200 }, { "epoch": 1.3963368434074006, "grad_norm": 0.10552875697612762, "learning_rate": 0.002, "loss": 2.3243, "step": 361210 }, { "epoch": 1.3963755006107839, "grad_norm": 0.11450695246458054, "learning_rate": 0.002, "loss": 2.3301, "step": 361220 }, { "epoch": 1.3964141578141671, "grad_norm": 0.10146085172891617, "learning_rate": 0.002, "loss": 2.3438, "step": 361230 }, { "epoch": 1.3964528150175504, "grad_norm": 0.09327413886785507, "learning_rate": 0.002, "loss": 2.3252, "step": 361240 }, { "epoch": 1.3964914722209336, "grad_norm": 0.13081274926662445, "learning_rate": 0.002, "loss": 2.3353, "step": 361250 }, { "epoch": 1.3965301294243169, "grad_norm": 0.10812999308109283, "learning_rate": 0.002, "loss": 2.3299, "step": 361260 }, { "epoch": 1.3965687866277001, "grad_norm": 0.1000206246972084, "learning_rate": 0.002, "loss": 2.3341, "step": 361270 }, { "epoch": 1.3966074438310834, "grad_norm": 0.09798294305801392, "learning_rate": 0.002, "loss": 2.3243, "step": 361280 }, { "epoch": 1.3966461010344666, "grad_norm": 0.09942685812711716, "learning_rate": 0.002, "loss": 2.336, "step": 361290 }, { "epoch": 1.39668475823785, "grad_norm": 0.09794165939092636, "learning_rate": 0.002, "loss": 2.3336, "step": 361300 }, { "epoch": 1.3967234154412334, "grad_norm": 0.1085113063454628, "learning_rate": 0.002, "loss": 2.3342, "step": 361310 }, { "epoch": 1.3967620726446166, "grad_norm": 0.09627203643321991, "learning_rate": 0.002, "loss": 2.3414, "step": 361320 }, { "epoch": 1.3968007298479999, "grad_norm": 0.09349404275417328, "learning_rate": 0.002, "loss": 2.34, "step": 361330 }, { "epoch": 1.396839387051383, "grad_norm": 0.12713338434696198, "learning_rate": 0.002, "loss": 2.3399, "step": 361340 }, { "epoch": 1.3968780442547664, "grad_norm": 0.10971616953611374, "learning_rate": 0.002, "loss": 2.3371, "step": 361350 }, { "epoch": 1.3969167014581498, "grad_norm": 0.1190105527639389, "learning_rate": 0.002, "loss": 2.3334, "step": 361360 }, { "epoch": 1.396955358661533, "grad_norm": 0.10451462119817734, "learning_rate": 0.002, "loss": 2.3318, "step": 361370 }, { "epoch": 1.3969940158649163, "grad_norm": 0.1128426343202591, "learning_rate": 0.002, "loss": 2.3221, "step": 361380 }, { "epoch": 1.3970326730682996, "grad_norm": 0.12514308094978333, "learning_rate": 0.002, "loss": 2.3356, "step": 361390 }, { "epoch": 1.3970713302716828, "grad_norm": 0.10585001111030579, "learning_rate": 0.002, "loss": 2.3303, "step": 361400 }, { "epoch": 1.397109987475066, "grad_norm": 0.12662208080291748, "learning_rate": 0.002, "loss": 2.323, "step": 361410 }, { "epoch": 1.3971486446784493, "grad_norm": 0.08943864703178406, "learning_rate": 0.002, "loss": 2.3188, "step": 361420 }, { "epoch": 1.3971873018818326, "grad_norm": 0.09058155119419098, "learning_rate": 0.002, "loss": 2.3319, "step": 361430 }, { "epoch": 1.3972259590852159, "grad_norm": 0.1279940903186798, "learning_rate": 0.002, "loss": 2.3297, "step": 361440 }, { "epoch": 1.397264616288599, "grad_norm": 0.10139375925064087, "learning_rate": 0.002, "loss": 2.3392, "step": 361450 }, { "epoch": 1.3973032734919826, "grad_norm": 0.10489026457071304, "learning_rate": 0.002, "loss": 2.3326, "step": 361460 }, { "epoch": 1.3973419306953658, "grad_norm": 0.09946117550134659, "learning_rate": 0.002, "loss": 2.3326, "step": 361470 }, { "epoch": 1.397380587898749, "grad_norm": 0.10231003165245056, "learning_rate": 0.002, "loss": 2.3294, "step": 361480 }, { "epoch": 1.3974192451021323, "grad_norm": 0.10625860840082169, "learning_rate": 0.002, "loss": 2.3303, "step": 361490 }, { "epoch": 1.3974579023055156, "grad_norm": 0.09422457218170166, "learning_rate": 0.002, "loss": 2.3272, "step": 361500 }, { "epoch": 1.3974965595088988, "grad_norm": 0.12820343673229218, "learning_rate": 0.002, "loss": 2.3339, "step": 361510 }, { "epoch": 1.397535216712282, "grad_norm": 0.11814692616462708, "learning_rate": 0.002, "loss": 2.3436, "step": 361520 }, { "epoch": 1.3975738739156656, "grad_norm": 0.09457693994045258, "learning_rate": 0.002, "loss": 2.3338, "step": 361530 }, { "epoch": 1.3976125311190488, "grad_norm": 0.11984442174434662, "learning_rate": 0.002, "loss": 2.3386, "step": 361540 }, { "epoch": 1.397651188322432, "grad_norm": 0.11762730032205582, "learning_rate": 0.002, "loss": 2.3352, "step": 361550 }, { "epoch": 1.3976898455258153, "grad_norm": 0.10839294642210007, "learning_rate": 0.002, "loss": 2.3343, "step": 361560 }, { "epoch": 1.3977285027291986, "grad_norm": 0.09241145104169846, "learning_rate": 0.002, "loss": 2.3419, "step": 361570 }, { "epoch": 1.3977671599325818, "grad_norm": 0.10676850378513336, "learning_rate": 0.002, "loss": 2.3224, "step": 361580 }, { "epoch": 1.397805817135965, "grad_norm": 0.10594607889652252, "learning_rate": 0.002, "loss": 2.3343, "step": 361590 }, { "epoch": 1.3978444743393483, "grad_norm": 0.27866330742836, "learning_rate": 0.002, "loss": 2.3452, "step": 361600 }, { "epoch": 1.3978831315427316, "grad_norm": 0.09651079773902893, "learning_rate": 0.002, "loss": 2.3202, "step": 361610 }, { "epoch": 1.3979217887461148, "grad_norm": 0.13887152075767517, "learning_rate": 0.002, "loss": 2.3364, "step": 361620 }, { "epoch": 1.3979604459494983, "grad_norm": 0.11219456791877747, "learning_rate": 0.002, "loss": 2.3222, "step": 361630 }, { "epoch": 1.3979991031528816, "grad_norm": 0.12171217054128647, "learning_rate": 0.002, "loss": 2.3515, "step": 361640 }, { "epoch": 1.3980377603562648, "grad_norm": 0.09833841770887375, "learning_rate": 0.002, "loss": 2.3363, "step": 361650 }, { "epoch": 1.398076417559648, "grad_norm": 0.10252264142036438, "learning_rate": 0.002, "loss": 2.3325, "step": 361660 }, { "epoch": 1.3981150747630313, "grad_norm": 0.11962525546550751, "learning_rate": 0.002, "loss": 2.3193, "step": 361670 }, { "epoch": 1.3981537319664146, "grad_norm": 0.10162299871444702, "learning_rate": 0.002, "loss": 2.3302, "step": 361680 }, { "epoch": 1.3981923891697978, "grad_norm": 0.0998658686876297, "learning_rate": 0.002, "loss": 2.3342, "step": 361690 }, { "epoch": 1.3982310463731813, "grad_norm": 0.10175598412752151, "learning_rate": 0.002, "loss": 2.336, "step": 361700 }, { "epoch": 1.3982697035765645, "grad_norm": 0.09900136291980743, "learning_rate": 0.002, "loss": 2.3337, "step": 361710 }, { "epoch": 1.3983083607799478, "grad_norm": 0.10916583985090256, "learning_rate": 0.002, "loss": 2.3362, "step": 361720 }, { "epoch": 1.398347017983331, "grad_norm": 0.12456495314836502, "learning_rate": 0.002, "loss": 2.3185, "step": 361730 }, { "epoch": 1.3983856751867143, "grad_norm": 0.100282222032547, "learning_rate": 0.002, "loss": 2.314, "step": 361740 }, { "epoch": 1.3984243323900976, "grad_norm": 0.11090005934238434, "learning_rate": 0.002, "loss": 2.3241, "step": 361750 }, { "epoch": 1.3984629895934808, "grad_norm": 0.11740148812532425, "learning_rate": 0.002, "loss": 2.3435, "step": 361760 }, { "epoch": 1.398501646796864, "grad_norm": 0.12703080475330353, "learning_rate": 0.002, "loss": 2.3371, "step": 361770 }, { "epoch": 1.3985403040002473, "grad_norm": 0.11310010403394699, "learning_rate": 0.002, "loss": 2.3273, "step": 361780 }, { "epoch": 1.3985789612036306, "grad_norm": 0.11828218400478363, "learning_rate": 0.002, "loss": 2.3113, "step": 361790 }, { "epoch": 1.398617618407014, "grad_norm": 0.115929014980793, "learning_rate": 0.002, "loss": 2.3364, "step": 361800 }, { "epoch": 1.3986562756103973, "grad_norm": 0.11614751070737839, "learning_rate": 0.002, "loss": 2.3355, "step": 361810 }, { "epoch": 1.3986949328137805, "grad_norm": 0.09835601598024368, "learning_rate": 0.002, "loss": 2.342, "step": 361820 }, { "epoch": 1.3987335900171638, "grad_norm": 0.10991106182336807, "learning_rate": 0.002, "loss": 2.3294, "step": 361830 }, { "epoch": 1.398772247220547, "grad_norm": 0.10826396942138672, "learning_rate": 0.002, "loss": 2.3273, "step": 361840 }, { "epoch": 1.3988109044239303, "grad_norm": 0.11880354583263397, "learning_rate": 0.002, "loss": 2.3338, "step": 361850 }, { "epoch": 1.3988495616273136, "grad_norm": 0.09766586124897003, "learning_rate": 0.002, "loss": 2.3385, "step": 361860 }, { "epoch": 1.398888218830697, "grad_norm": 0.11801117658615112, "learning_rate": 0.002, "loss": 2.3311, "step": 361870 }, { "epoch": 1.3989268760340803, "grad_norm": 0.11416234076023102, "learning_rate": 0.002, "loss": 2.3203, "step": 361880 }, { "epoch": 1.3989655332374635, "grad_norm": 0.09224196523427963, "learning_rate": 0.002, "loss": 2.3393, "step": 361890 }, { "epoch": 1.3990041904408468, "grad_norm": 0.11558035016059875, "learning_rate": 0.002, "loss": 2.3416, "step": 361900 }, { "epoch": 1.39904284764423, "grad_norm": 0.10817205905914307, "learning_rate": 0.002, "loss": 2.3244, "step": 361910 }, { "epoch": 1.3990815048476133, "grad_norm": 0.10724747180938721, "learning_rate": 0.002, "loss": 2.333, "step": 361920 }, { "epoch": 1.3991201620509965, "grad_norm": 0.6057680249214172, "learning_rate": 0.002, "loss": 2.3283, "step": 361930 }, { "epoch": 1.3991588192543798, "grad_norm": 0.09721146523952484, "learning_rate": 0.002, "loss": 2.3409, "step": 361940 }, { "epoch": 1.399197476457763, "grad_norm": 0.11369055509567261, "learning_rate": 0.002, "loss": 2.3342, "step": 361950 }, { "epoch": 1.3992361336611463, "grad_norm": 0.10831263661384583, "learning_rate": 0.002, "loss": 2.3288, "step": 361960 }, { "epoch": 1.3992747908645298, "grad_norm": 0.09343111515045166, "learning_rate": 0.002, "loss": 2.3399, "step": 361970 }, { "epoch": 1.399313448067913, "grad_norm": 0.09698399901390076, "learning_rate": 0.002, "loss": 2.3365, "step": 361980 }, { "epoch": 1.3993521052712963, "grad_norm": 0.1151181012392044, "learning_rate": 0.002, "loss": 2.3341, "step": 361990 }, { "epoch": 1.3993907624746795, "grad_norm": 0.10255439579486847, "learning_rate": 0.002, "loss": 2.3343, "step": 362000 }, { "epoch": 1.3994294196780628, "grad_norm": 0.10207153111696243, "learning_rate": 0.002, "loss": 2.3273, "step": 362010 }, { "epoch": 1.399468076881446, "grad_norm": 0.11824337393045425, "learning_rate": 0.002, "loss": 2.3268, "step": 362020 }, { "epoch": 1.3995067340848293, "grad_norm": 0.10297773778438568, "learning_rate": 0.002, "loss": 2.3403, "step": 362030 }, { "epoch": 1.3995453912882128, "grad_norm": 0.1008480042219162, "learning_rate": 0.002, "loss": 2.3338, "step": 362040 }, { "epoch": 1.399584048491596, "grad_norm": 0.11089113354682922, "learning_rate": 0.002, "loss": 2.3389, "step": 362050 }, { "epoch": 1.3996227056949793, "grad_norm": 0.11389949172735214, "learning_rate": 0.002, "loss": 2.3291, "step": 362060 }, { "epoch": 1.3996613628983625, "grad_norm": 0.10151943564414978, "learning_rate": 0.002, "loss": 2.3336, "step": 362070 }, { "epoch": 1.3997000201017458, "grad_norm": 0.10547202080488205, "learning_rate": 0.002, "loss": 2.3242, "step": 362080 }, { "epoch": 1.399738677305129, "grad_norm": 0.12970007956027985, "learning_rate": 0.002, "loss": 2.3417, "step": 362090 }, { "epoch": 1.3997773345085123, "grad_norm": 0.09537054598331451, "learning_rate": 0.002, "loss": 2.3148, "step": 362100 }, { "epoch": 1.3998159917118955, "grad_norm": 0.10798255354166031, "learning_rate": 0.002, "loss": 2.3308, "step": 362110 }, { "epoch": 1.3998546489152788, "grad_norm": 0.12179353088140488, "learning_rate": 0.002, "loss": 2.3263, "step": 362120 }, { "epoch": 1.399893306118662, "grad_norm": 0.1268961876630783, "learning_rate": 0.002, "loss": 2.3194, "step": 362130 }, { "epoch": 1.3999319633220455, "grad_norm": 0.09666255861520767, "learning_rate": 0.002, "loss": 2.3389, "step": 362140 }, { "epoch": 1.3999706205254288, "grad_norm": 0.10677926242351532, "learning_rate": 0.002, "loss": 2.3365, "step": 362150 }, { "epoch": 1.400009277728812, "grad_norm": 0.13625779747962952, "learning_rate": 0.002, "loss": 2.3427, "step": 362160 }, { "epoch": 1.4000479349321953, "grad_norm": 0.10409170389175415, "learning_rate": 0.002, "loss": 2.3193, "step": 362170 }, { "epoch": 1.4000865921355785, "grad_norm": 0.12508094310760498, "learning_rate": 0.002, "loss": 2.3365, "step": 362180 }, { "epoch": 1.4001252493389618, "grad_norm": 0.12312871962785721, "learning_rate": 0.002, "loss": 2.3046, "step": 362190 }, { "epoch": 1.4001639065423452, "grad_norm": 0.1070362851023674, "learning_rate": 0.002, "loss": 2.3187, "step": 362200 }, { "epoch": 1.4002025637457285, "grad_norm": 0.10340160876512527, "learning_rate": 0.002, "loss": 2.3324, "step": 362210 }, { "epoch": 1.4002412209491117, "grad_norm": 0.11799286305904388, "learning_rate": 0.002, "loss": 2.3419, "step": 362220 }, { "epoch": 1.400279878152495, "grad_norm": 0.10353390127420425, "learning_rate": 0.002, "loss": 2.3503, "step": 362230 }, { "epoch": 1.4003185353558782, "grad_norm": 0.10391611605882645, "learning_rate": 0.002, "loss": 2.3357, "step": 362240 }, { "epoch": 1.4003571925592615, "grad_norm": 0.09352357685565948, "learning_rate": 0.002, "loss": 2.3319, "step": 362250 }, { "epoch": 1.4003958497626448, "grad_norm": 0.10108699649572372, "learning_rate": 0.002, "loss": 2.331, "step": 362260 }, { "epoch": 1.400434506966028, "grad_norm": 0.1228623315691948, "learning_rate": 0.002, "loss": 2.3313, "step": 362270 }, { "epoch": 1.4004731641694113, "grad_norm": 0.09886107593774796, "learning_rate": 0.002, "loss": 2.3226, "step": 362280 }, { "epoch": 1.4005118213727945, "grad_norm": 0.1335236132144928, "learning_rate": 0.002, "loss": 2.3403, "step": 362290 }, { "epoch": 1.4005504785761778, "grad_norm": 0.10030423104763031, "learning_rate": 0.002, "loss": 2.3405, "step": 362300 }, { "epoch": 1.4005891357795612, "grad_norm": 0.1142667829990387, "learning_rate": 0.002, "loss": 2.3146, "step": 362310 }, { "epoch": 1.4006277929829445, "grad_norm": 0.10299000144004822, "learning_rate": 0.002, "loss": 2.3446, "step": 362320 }, { "epoch": 1.4006664501863277, "grad_norm": 0.11370020359754562, "learning_rate": 0.002, "loss": 2.335, "step": 362330 }, { "epoch": 1.400705107389711, "grad_norm": 0.10238875448703766, "learning_rate": 0.002, "loss": 2.3346, "step": 362340 }, { "epoch": 1.4007437645930942, "grad_norm": 0.09951310604810715, "learning_rate": 0.002, "loss": 2.3326, "step": 362350 }, { "epoch": 1.4007824217964775, "grad_norm": 0.10246714949607849, "learning_rate": 0.002, "loss": 2.3283, "step": 362360 }, { "epoch": 1.400821078999861, "grad_norm": 0.11719667911529541, "learning_rate": 0.002, "loss": 2.3233, "step": 362370 }, { "epoch": 1.4008597362032442, "grad_norm": 0.10228227823972702, "learning_rate": 0.002, "loss": 2.3329, "step": 362380 }, { "epoch": 1.4008983934066275, "grad_norm": 0.10831063240766525, "learning_rate": 0.002, "loss": 2.3305, "step": 362390 }, { "epoch": 1.4009370506100107, "grad_norm": 0.09752275049686432, "learning_rate": 0.002, "loss": 2.3188, "step": 362400 }, { "epoch": 1.400975707813394, "grad_norm": 0.11090794950723648, "learning_rate": 0.002, "loss": 2.345, "step": 362410 }, { "epoch": 1.4010143650167772, "grad_norm": 0.09280040860176086, "learning_rate": 0.002, "loss": 2.3331, "step": 362420 }, { "epoch": 1.4010530222201605, "grad_norm": 0.10434489697217941, "learning_rate": 0.002, "loss": 2.3374, "step": 362430 }, { "epoch": 1.4010916794235437, "grad_norm": 0.10599362105131149, "learning_rate": 0.002, "loss": 2.3283, "step": 362440 }, { "epoch": 1.401130336626927, "grad_norm": 0.12312032282352448, "learning_rate": 0.002, "loss": 2.3439, "step": 362450 }, { "epoch": 1.4011689938303102, "grad_norm": 0.12890659272670746, "learning_rate": 0.002, "loss": 2.3341, "step": 362460 }, { "epoch": 1.4012076510336935, "grad_norm": 0.09986308217048645, "learning_rate": 0.002, "loss": 2.3292, "step": 362470 }, { "epoch": 1.401246308237077, "grad_norm": 0.09984946250915527, "learning_rate": 0.002, "loss": 2.3404, "step": 362480 }, { "epoch": 1.4012849654404602, "grad_norm": 0.109229676425457, "learning_rate": 0.002, "loss": 2.3254, "step": 362490 }, { "epoch": 1.4013236226438435, "grad_norm": 0.12256049364805222, "learning_rate": 0.002, "loss": 2.3391, "step": 362500 }, { "epoch": 1.4013622798472267, "grad_norm": 0.10545402020215988, "learning_rate": 0.002, "loss": 2.3268, "step": 362510 }, { "epoch": 1.40140093705061, "grad_norm": 0.09840114414691925, "learning_rate": 0.002, "loss": 2.3435, "step": 362520 }, { "epoch": 1.4014395942539932, "grad_norm": 0.09764132648706436, "learning_rate": 0.002, "loss": 2.3226, "step": 362530 }, { "epoch": 1.4014782514573767, "grad_norm": 0.09643712639808655, "learning_rate": 0.002, "loss": 2.3329, "step": 362540 }, { "epoch": 1.40151690866076, "grad_norm": 0.10782516002655029, "learning_rate": 0.002, "loss": 2.3304, "step": 362550 }, { "epoch": 1.4015555658641432, "grad_norm": 0.09751346707344055, "learning_rate": 0.002, "loss": 2.315, "step": 362560 }, { "epoch": 1.4015942230675265, "grad_norm": 0.13143301010131836, "learning_rate": 0.002, "loss": 2.3149, "step": 362570 }, { "epoch": 1.4016328802709097, "grad_norm": 0.10341697186231613, "learning_rate": 0.002, "loss": 2.3311, "step": 362580 }, { "epoch": 1.401671537474293, "grad_norm": 0.09820285439491272, "learning_rate": 0.002, "loss": 2.3435, "step": 362590 }, { "epoch": 1.4017101946776762, "grad_norm": 0.0969579741358757, "learning_rate": 0.002, "loss": 2.3388, "step": 362600 }, { "epoch": 1.4017488518810595, "grad_norm": 0.11445106565952301, "learning_rate": 0.002, "loss": 2.3344, "step": 362610 }, { "epoch": 1.4017875090844427, "grad_norm": 0.1103585809469223, "learning_rate": 0.002, "loss": 2.3548, "step": 362620 }, { "epoch": 1.401826166287826, "grad_norm": 0.09358467161655426, "learning_rate": 0.002, "loss": 2.3466, "step": 362630 }, { "epoch": 1.4018648234912092, "grad_norm": 0.09893258661031723, "learning_rate": 0.002, "loss": 2.3433, "step": 362640 }, { "epoch": 1.4019034806945927, "grad_norm": 0.09271512925624847, "learning_rate": 0.002, "loss": 2.3337, "step": 362650 }, { "epoch": 1.401942137897976, "grad_norm": 0.10333232581615448, "learning_rate": 0.002, "loss": 2.323, "step": 362660 }, { "epoch": 1.4019807951013592, "grad_norm": 0.10539565235376358, "learning_rate": 0.002, "loss": 2.3378, "step": 362670 }, { "epoch": 1.4020194523047425, "grad_norm": 0.10242871195077896, "learning_rate": 0.002, "loss": 2.3405, "step": 362680 }, { "epoch": 1.4020581095081257, "grad_norm": 0.09406136721372604, "learning_rate": 0.002, "loss": 2.3337, "step": 362690 }, { "epoch": 1.402096766711509, "grad_norm": 0.10209919512271881, "learning_rate": 0.002, "loss": 2.3341, "step": 362700 }, { "epoch": 1.4021354239148924, "grad_norm": 0.11888878792524338, "learning_rate": 0.002, "loss": 2.3418, "step": 362710 }, { "epoch": 1.4021740811182757, "grad_norm": 0.10515199601650238, "learning_rate": 0.002, "loss": 2.3439, "step": 362720 }, { "epoch": 1.402212738321659, "grad_norm": 0.11428892612457275, "learning_rate": 0.002, "loss": 2.3315, "step": 362730 }, { "epoch": 1.4022513955250422, "grad_norm": 0.12660294771194458, "learning_rate": 0.002, "loss": 2.3446, "step": 362740 }, { "epoch": 1.4022900527284254, "grad_norm": 0.0967639684677124, "learning_rate": 0.002, "loss": 2.3293, "step": 362750 }, { "epoch": 1.4023287099318087, "grad_norm": 0.10571729391813278, "learning_rate": 0.002, "loss": 2.3323, "step": 362760 }, { "epoch": 1.402367367135192, "grad_norm": 0.12200155854225159, "learning_rate": 0.002, "loss": 2.342, "step": 362770 }, { "epoch": 1.4024060243385752, "grad_norm": 0.10133996605873108, "learning_rate": 0.002, "loss": 2.333, "step": 362780 }, { "epoch": 1.4024446815419584, "grad_norm": 0.10742209106683731, "learning_rate": 0.002, "loss": 2.344, "step": 362790 }, { "epoch": 1.4024833387453417, "grad_norm": 0.10386163741350174, "learning_rate": 0.002, "loss": 2.3172, "step": 362800 }, { "epoch": 1.402521995948725, "grad_norm": 0.1222449317574501, "learning_rate": 0.002, "loss": 2.3339, "step": 362810 }, { "epoch": 1.4025606531521084, "grad_norm": 0.10506697744131088, "learning_rate": 0.002, "loss": 2.3131, "step": 362820 }, { "epoch": 1.4025993103554917, "grad_norm": 0.10943559557199478, "learning_rate": 0.002, "loss": 2.3266, "step": 362830 }, { "epoch": 1.402637967558875, "grad_norm": 0.1146540567278862, "learning_rate": 0.002, "loss": 2.3289, "step": 362840 }, { "epoch": 1.4026766247622582, "grad_norm": 0.10004156082868576, "learning_rate": 0.002, "loss": 2.3371, "step": 362850 }, { "epoch": 1.4027152819656414, "grad_norm": 0.11623933911323547, "learning_rate": 0.002, "loss": 2.3375, "step": 362860 }, { "epoch": 1.4027539391690247, "grad_norm": 0.12447261810302734, "learning_rate": 0.002, "loss": 2.3422, "step": 362870 }, { "epoch": 1.4027925963724082, "grad_norm": 0.11130033433437347, "learning_rate": 0.002, "loss": 2.3396, "step": 362880 }, { "epoch": 1.4028312535757914, "grad_norm": 0.10304639488458633, "learning_rate": 0.002, "loss": 2.3428, "step": 362890 }, { "epoch": 1.4028699107791747, "grad_norm": 0.1037617176771164, "learning_rate": 0.002, "loss": 2.3306, "step": 362900 }, { "epoch": 1.402908567982558, "grad_norm": 0.09812625497579575, "learning_rate": 0.002, "loss": 2.3351, "step": 362910 }, { "epoch": 1.4029472251859412, "grad_norm": 0.12238926440477371, "learning_rate": 0.002, "loss": 2.3353, "step": 362920 }, { "epoch": 1.4029858823893244, "grad_norm": 0.09530317038297653, "learning_rate": 0.002, "loss": 2.33, "step": 362930 }, { "epoch": 1.4030245395927077, "grad_norm": 0.0930962935090065, "learning_rate": 0.002, "loss": 2.3192, "step": 362940 }, { "epoch": 1.403063196796091, "grad_norm": 0.1084480956196785, "learning_rate": 0.002, "loss": 2.3336, "step": 362950 }, { "epoch": 1.4031018539994742, "grad_norm": 0.10741405189037323, "learning_rate": 0.002, "loss": 2.3338, "step": 362960 }, { "epoch": 1.4031405112028574, "grad_norm": 0.1136438176035881, "learning_rate": 0.002, "loss": 2.3383, "step": 362970 }, { "epoch": 1.4031791684062407, "grad_norm": 0.09555595368146896, "learning_rate": 0.002, "loss": 2.3427, "step": 362980 }, { "epoch": 1.4032178256096242, "grad_norm": 0.10760748386383057, "learning_rate": 0.002, "loss": 2.3288, "step": 362990 }, { "epoch": 1.4032564828130074, "grad_norm": 0.09433700144290924, "learning_rate": 0.002, "loss": 2.3299, "step": 363000 }, { "epoch": 1.4032951400163907, "grad_norm": 0.1083793193101883, "learning_rate": 0.002, "loss": 2.334, "step": 363010 }, { "epoch": 1.403333797219774, "grad_norm": 0.09169959276914597, "learning_rate": 0.002, "loss": 2.3465, "step": 363020 }, { "epoch": 1.4033724544231572, "grad_norm": 0.10351970046758652, "learning_rate": 0.002, "loss": 2.3375, "step": 363030 }, { "epoch": 1.4034111116265404, "grad_norm": 0.09821586310863495, "learning_rate": 0.002, "loss": 2.3365, "step": 363040 }, { "epoch": 1.403449768829924, "grad_norm": 0.12562578916549683, "learning_rate": 0.002, "loss": 2.3455, "step": 363050 }, { "epoch": 1.4034884260333071, "grad_norm": 0.10481049865484238, "learning_rate": 0.002, "loss": 2.3316, "step": 363060 }, { "epoch": 1.4035270832366904, "grad_norm": 0.4016679525375366, "learning_rate": 0.002, "loss": 2.3145, "step": 363070 }, { "epoch": 1.4035657404400737, "grad_norm": 0.1034378632903099, "learning_rate": 0.002, "loss": 2.3302, "step": 363080 }, { "epoch": 1.403604397643457, "grad_norm": 0.1017390713095665, "learning_rate": 0.002, "loss": 2.326, "step": 363090 }, { "epoch": 1.4036430548468402, "grad_norm": 0.1351192742586136, "learning_rate": 0.002, "loss": 2.3265, "step": 363100 }, { "epoch": 1.4036817120502234, "grad_norm": 0.1087784543633461, "learning_rate": 0.002, "loss": 2.3221, "step": 363110 }, { "epoch": 1.4037203692536067, "grad_norm": 0.11712558567523956, "learning_rate": 0.002, "loss": 2.3379, "step": 363120 }, { "epoch": 1.40375902645699, "grad_norm": 0.1175539493560791, "learning_rate": 0.002, "loss": 2.3316, "step": 363130 }, { "epoch": 1.4037976836603732, "grad_norm": 0.10677841305732727, "learning_rate": 0.002, "loss": 2.3312, "step": 363140 }, { "epoch": 1.4038363408637564, "grad_norm": 0.1184573546051979, "learning_rate": 0.002, "loss": 2.3395, "step": 363150 }, { "epoch": 1.40387499806714, "grad_norm": 0.12204091250896454, "learning_rate": 0.002, "loss": 2.3299, "step": 363160 }, { "epoch": 1.4039136552705231, "grad_norm": 0.11024865508079529, "learning_rate": 0.002, "loss": 2.3256, "step": 363170 }, { "epoch": 1.4039523124739064, "grad_norm": 0.1106250137090683, "learning_rate": 0.002, "loss": 2.3371, "step": 363180 }, { "epoch": 1.4039909696772896, "grad_norm": 0.09033829718828201, "learning_rate": 0.002, "loss": 2.3335, "step": 363190 }, { "epoch": 1.404029626880673, "grad_norm": 0.12094565480947495, "learning_rate": 0.002, "loss": 2.3403, "step": 363200 }, { "epoch": 1.4040682840840562, "grad_norm": 0.09226465225219727, "learning_rate": 0.002, "loss": 2.3396, "step": 363210 }, { "epoch": 1.4041069412874396, "grad_norm": 0.14574606716632843, "learning_rate": 0.002, "loss": 2.3279, "step": 363220 }, { "epoch": 1.4041455984908229, "grad_norm": 0.11849888414144516, "learning_rate": 0.002, "loss": 2.3447, "step": 363230 }, { "epoch": 1.4041842556942061, "grad_norm": 0.12022794783115387, "learning_rate": 0.002, "loss": 2.3158, "step": 363240 }, { "epoch": 1.4042229128975894, "grad_norm": 0.10561049729585648, "learning_rate": 0.002, "loss": 2.3221, "step": 363250 }, { "epoch": 1.4042615701009726, "grad_norm": 0.10209091007709503, "learning_rate": 0.002, "loss": 2.3322, "step": 363260 }, { "epoch": 1.4043002273043559, "grad_norm": 0.1343676745891571, "learning_rate": 0.002, "loss": 2.3381, "step": 363270 }, { "epoch": 1.4043388845077391, "grad_norm": 0.11447706818580627, "learning_rate": 0.002, "loss": 2.3185, "step": 363280 }, { "epoch": 1.4043775417111224, "grad_norm": 0.09987994283437729, "learning_rate": 0.002, "loss": 2.3322, "step": 363290 }, { "epoch": 1.4044161989145056, "grad_norm": 0.11488515883684158, "learning_rate": 0.002, "loss": 2.3303, "step": 363300 }, { "epoch": 1.404454856117889, "grad_norm": 0.09707038849592209, "learning_rate": 0.002, "loss": 2.3425, "step": 363310 }, { "epoch": 1.4044935133212724, "grad_norm": 0.10826553404331207, "learning_rate": 0.002, "loss": 2.3242, "step": 363320 }, { "epoch": 1.4045321705246556, "grad_norm": 0.09534227848052979, "learning_rate": 0.002, "loss": 2.3365, "step": 363330 }, { "epoch": 1.4045708277280389, "grad_norm": 0.11304795742034912, "learning_rate": 0.002, "loss": 2.336, "step": 363340 }, { "epoch": 1.4046094849314221, "grad_norm": 0.09416774660348892, "learning_rate": 0.002, "loss": 2.3206, "step": 363350 }, { "epoch": 1.4046481421348054, "grad_norm": 0.10979488492012024, "learning_rate": 0.002, "loss": 2.3417, "step": 363360 }, { "epoch": 1.4046867993381886, "grad_norm": 0.10304318368434906, "learning_rate": 0.002, "loss": 2.3356, "step": 363370 }, { "epoch": 1.4047254565415719, "grad_norm": 0.09270067512989044, "learning_rate": 0.002, "loss": 2.3178, "step": 363380 }, { "epoch": 1.4047641137449554, "grad_norm": 0.11495853215456009, "learning_rate": 0.002, "loss": 2.3385, "step": 363390 }, { "epoch": 1.4048027709483386, "grad_norm": 0.31204378604888916, "learning_rate": 0.002, "loss": 2.3335, "step": 363400 }, { "epoch": 1.4048414281517219, "grad_norm": 0.09133657068014145, "learning_rate": 0.002, "loss": 2.3533, "step": 363410 }, { "epoch": 1.4048800853551051, "grad_norm": 0.09207748621702194, "learning_rate": 0.002, "loss": 2.3267, "step": 363420 }, { "epoch": 1.4049187425584884, "grad_norm": 0.10387468338012695, "learning_rate": 0.002, "loss": 2.3276, "step": 363430 }, { "epoch": 1.4049573997618716, "grad_norm": 0.4840529263019562, "learning_rate": 0.002, "loss": 2.3339, "step": 363440 }, { "epoch": 1.4049960569652549, "grad_norm": 0.10210797935724258, "learning_rate": 0.002, "loss": 2.3521, "step": 363450 }, { "epoch": 1.4050347141686381, "grad_norm": 0.10467180609703064, "learning_rate": 0.002, "loss": 2.3356, "step": 363460 }, { "epoch": 1.4050733713720214, "grad_norm": 0.08897180110216141, "learning_rate": 0.002, "loss": 2.3361, "step": 363470 }, { "epoch": 1.4051120285754046, "grad_norm": 0.0998954102396965, "learning_rate": 0.002, "loss": 2.3423, "step": 363480 }, { "epoch": 1.405150685778788, "grad_norm": 0.09620607644319534, "learning_rate": 0.002, "loss": 2.3493, "step": 363490 }, { "epoch": 1.4051893429821714, "grad_norm": 0.11393487453460693, "learning_rate": 0.002, "loss": 2.3361, "step": 363500 }, { "epoch": 1.4052280001855546, "grad_norm": 0.10956080257892609, "learning_rate": 0.002, "loss": 2.3372, "step": 363510 }, { "epoch": 1.4052666573889379, "grad_norm": 0.1093173399567604, "learning_rate": 0.002, "loss": 2.3408, "step": 363520 }, { "epoch": 1.405305314592321, "grad_norm": 0.12005335092544556, "learning_rate": 0.002, "loss": 2.3301, "step": 363530 }, { "epoch": 1.4053439717957044, "grad_norm": 0.11257109045982361, "learning_rate": 0.002, "loss": 2.3502, "step": 363540 }, { "epoch": 1.4053826289990876, "grad_norm": 0.11301475763320923, "learning_rate": 0.002, "loss": 2.3455, "step": 363550 }, { "epoch": 1.405421286202471, "grad_norm": 0.11234022676944733, "learning_rate": 0.002, "loss": 2.3269, "step": 363560 }, { "epoch": 1.4054599434058543, "grad_norm": 0.10995227843523026, "learning_rate": 0.002, "loss": 2.3314, "step": 363570 }, { "epoch": 1.4054986006092376, "grad_norm": 0.10490180552005768, "learning_rate": 0.002, "loss": 2.3322, "step": 363580 }, { "epoch": 1.4055372578126208, "grad_norm": 0.10406752675771713, "learning_rate": 0.002, "loss": 2.3296, "step": 363590 }, { "epoch": 1.405575915016004, "grad_norm": 0.14566640555858612, "learning_rate": 0.002, "loss": 2.3288, "step": 363600 }, { "epoch": 1.4056145722193873, "grad_norm": 0.10529456287622452, "learning_rate": 0.002, "loss": 2.3382, "step": 363610 }, { "epoch": 1.4056532294227706, "grad_norm": 0.09102689474821091, "learning_rate": 0.002, "loss": 2.338, "step": 363620 }, { "epoch": 1.4056918866261539, "grad_norm": 0.1229463443160057, "learning_rate": 0.002, "loss": 2.3417, "step": 363630 }, { "epoch": 1.405730543829537, "grad_norm": 0.12026394158601761, "learning_rate": 0.002, "loss": 2.3169, "step": 363640 }, { "epoch": 1.4057692010329204, "grad_norm": 0.10644323378801346, "learning_rate": 0.002, "loss": 2.3312, "step": 363650 }, { "epoch": 1.4058078582363038, "grad_norm": 0.16524867713451385, "learning_rate": 0.002, "loss": 2.3393, "step": 363660 }, { "epoch": 1.405846515439687, "grad_norm": 0.09728322923183441, "learning_rate": 0.002, "loss": 2.3407, "step": 363670 }, { "epoch": 1.4058851726430703, "grad_norm": 0.098272904753685, "learning_rate": 0.002, "loss": 2.3236, "step": 363680 }, { "epoch": 1.4059238298464536, "grad_norm": 0.11414486169815063, "learning_rate": 0.002, "loss": 2.3341, "step": 363690 }, { "epoch": 1.4059624870498368, "grad_norm": 0.09679889678955078, "learning_rate": 0.002, "loss": 2.3374, "step": 363700 }, { "epoch": 1.40600114425322, "grad_norm": 0.09830894321203232, "learning_rate": 0.002, "loss": 2.3247, "step": 363710 }, { "epoch": 1.4060398014566033, "grad_norm": 0.10768172889947891, "learning_rate": 0.002, "loss": 2.3391, "step": 363720 }, { "epoch": 1.4060784586599868, "grad_norm": 0.09654033929109573, "learning_rate": 0.002, "loss": 2.3405, "step": 363730 }, { "epoch": 1.40611711586337, "grad_norm": 0.11248601973056793, "learning_rate": 0.002, "loss": 2.3241, "step": 363740 }, { "epoch": 1.4061557730667533, "grad_norm": 0.10043682903051376, "learning_rate": 0.002, "loss": 2.3436, "step": 363750 }, { "epoch": 1.4061944302701366, "grad_norm": 0.1022840142250061, "learning_rate": 0.002, "loss": 2.3416, "step": 363760 }, { "epoch": 1.4062330874735198, "grad_norm": 0.09865662455558777, "learning_rate": 0.002, "loss": 2.3415, "step": 363770 }, { "epoch": 1.406271744676903, "grad_norm": 0.10069121420383453, "learning_rate": 0.002, "loss": 2.3266, "step": 363780 }, { "epoch": 1.4063104018802863, "grad_norm": 0.10596741735935211, "learning_rate": 0.002, "loss": 2.3349, "step": 363790 }, { "epoch": 1.4063490590836696, "grad_norm": 0.09834664314985275, "learning_rate": 0.002, "loss": 2.3379, "step": 363800 }, { "epoch": 1.4063877162870528, "grad_norm": 0.1031440794467926, "learning_rate": 0.002, "loss": 2.3311, "step": 363810 }, { "epoch": 1.406426373490436, "grad_norm": 0.10697752237319946, "learning_rate": 0.002, "loss": 2.3399, "step": 363820 }, { "epoch": 1.4064650306938196, "grad_norm": 0.10356352478265762, "learning_rate": 0.002, "loss": 2.3289, "step": 363830 }, { "epoch": 1.4065036878972028, "grad_norm": 0.09256229549646378, "learning_rate": 0.002, "loss": 2.3275, "step": 363840 }, { "epoch": 1.406542345100586, "grad_norm": 0.09180346131324768, "learning_rate": 0.002, "loss": 2.336, "step": 363850 }, { "epoch": 1.4065810023039693, "grad_norm": 0.1671026051044464, "learning_rate": 0.002, "loss": 2.3154, "step": 363860 }, { "epoch": 1.4066196595073526, "grad_norm": 0.13014864921569824, "learning_rate": 0.002, "loss": 2.3278, "step": 363870 }, { "epoch": 1.4066583167107358, "grad_norm": 0.08843419700860977, "learning_rate": 0.002, "loss": 2.3479, "step": 363880 }, { "epoch": 1.406696973914119, "grad_norm": 0.097466379404068, "learning_rate": 0.002, "loss": 2.337, "step": 363890 }, { "epoch": 1.4067356311175025, "grad_norm": 0.10403325408697128, "learning_rate": 0.002, "loss": 2.3414, "step": 363900 }, { "epoch": 1.4067742883208858, "grad_norm": 0.09658373147249222, "learning_rate": 0.002, "loss": 2.3289, "step": 363910 }, { "epoch": 1.406812945524269, "grad_norm": 0.09964865446090698, "learning_rate": 0.002, "loss": 2.318, "step": 363920 }, { "epoch": 1.4068516027276523, "grad_norm": 0.11245810985565186, "learning_rate": 0.002, "loss": 2.3463, "step": 363930 }, { "epoch": 1.4068902599310356, "grad_norm": 0.09797230362892151, "learning_rate": 0.002, "loss": 2.3215, "step": 363940 }, { "epoch": 1.4069289171344188, "grad_norm": 0.10533495247364044, "learning_rate": 0.002, "loss": 2.3423, "step": 363950 }, { "epoch": 1.406967574337802, "grad_norm": 0.1286928802728653, "learning_rate": 0.002, "loss": 2.3389, "step": 363960 }, { "epoch": 1.4070062315411853, "grad_norm": 0.10884454101324081, "learning_rate": 0.002, "loss": 2.333, "step": 363970 }, { "epoch": 1.4070448887445686, "grad_norm": 0.1721695214509964, "learning_rate": 0.002, "loss": 2.3382, "step": 363980 }, { "epoch": 1.4070835459479518, "grad_norm": 0.08932375907897949, "learning_rate": 0.002, "loss": 2.3256, "step": 363990 }, { "epoch": 1.4071222031513353, "grad_norm": 0.13959693908691406, "learning_rate": 0.002, "loss": 2.3405, "step": 364000 }, { "epoch": 1.4071608603547185, "grad_norm": 0.0952187329530716, "learning_rate": 0.002, "loss": 2.3397, "step": 364010 }, { "epoch": 1.4071995175581018, "grad_norm": 0.12597648799419403, "learning_rate": 0.002, "loss": 2.3411, "step": 364020 }, { "epoch": 1.407238174761485, "grad_norm": 0.10271839797496796, "learning_rate": 0.002, "loss": 2.313, "step": 364030 }, { "epoch": 1.4072768319648683, "grad_norm": 0.10194234549999237, "learning_rate": 0.002, "loss": 2.3334, "step": 364040 }, { "epoch": 1.4073154891682516, "grad_norm": 0.11540882289409637, "learning_rate": 0.002, "loss": 2.3196, "step": 364050 }, { "epoch": 1.407354146371635, "grad_norm": 0.10244659334421158, "learning_rate": 0.002, "loss": 2.3377, "step": 364060 }, { "epoch": 1.4073928035750183, "grad_norm": 0.09166279435157776, "learning_rate": 0.002, "loss": 2.3415, "step": 364070 }, { "epoch": 1.4074314607784015, "grad_norm": 0.09806598722934723, "learning_rate": 0.002, "loss": 2.3238, "step": 364080 }, { "epoch": 1.4074701179817848, "grad_norm": 0.108644999563694, "learning_rate": 0.002, "loss": 2.3253, "step": 364090 }, { "epoch": 1.407508775185168, "grad_norm": 0.11653917282819748, "learning_rate": 0.002, "loss": 2.3326, "step": 364100 }, { "epoch": 1.4075474323885513, "grad_norm": 0.13030320405960083, "learning_rate": 0.002, "loss": 2.3309, "step": 364110 }, { "epoch": 1.4075860895919345, "grad_norm": 0.11179164052009583, "learning_rate": 0.002, "loss": 2.3232, "step": 364120 }, { "epoch": 1.4076247467953178, "grad_norm": 0.11357536166906357, "learning_rate": 0.002, "loss": 2.3262, "step": 364130 }, { "epoch": 1.407663403998701, "grad_norm": 0.11446142196655273, "learning_rate": 0.002, "loss": 2.3276, "step": 364140 }, { "epoch": 1.4077020612020843, "grad_norm": 0.11989977210760117, "learning_rate": 0.002, "loss": 2.3493, "step": 364150 }, { "epoch": 1.4077407184054676, "grad_norm": 0.11088665574789047, "learning_rate": 0.002, "loss": 2.3284, "step": 364160 }, { "epoch": 1.407779375608851, "grad_norm": 0.10767869651317596, "learning_rate": 0.002, "loss": 2.344, "step": 364170 }, { "epoch": 1.4078180328122343, "grad_norm": 0.1473172903060913, "learning_rate": 0.002, "loss": 2.3384, "step": 364180 }, { "epoch": 1.4078566900156175, "grad_norm": 0.11636603623628616, "learning_rate": 0.002, "loss": 2.3294, "step": 364190 }, { "epoch": 1.4078953472190008, "grad_norm": 0.11040837317705154, "learning_rate": 0.002, "loss": 2.3307, "step": 364200 }, { "epoch": 1.407934004422384, "grad_norm": 0.1069677472114563, "learning_rate": 0.002, "loss": 2.3383, "step": 364210 }, { "epoch": 1.4079726616257673, "grad_norm": 0.11505875736474991, "learning_rate": 0.002, "loss": 2.3313, "step": 364220 }, { "epoch": 1.4080113188291508, "grad_norm": 0.10425962507724762, "learning_rate": 0.002, "loss": 2.3406, "step": 364230 }, { "epoch": 1.408049976032534, "grad_norm": 0.11092174798250198, "learning_rate": 0.002, "loss": 2.3434, "step": 364240 }, { "epoch": 1.4080886332359173, "grad_norm": 0.11644835770130157, "learning_rate": 0.002, "loss": 2.3475, "step": 364250 }, { "epoch": 1.4081272904393005, "grad_norm": 0.0934944674372673, "learning_rate": 0.002, "loss": 2.3269, "step": 364260 }, { "epoch": 1.4081659476426838, "grad_norm": 0.09497929364442825, "learning_rate": 0.002, "loss": 2.3414, "step": 364270 }, { "epoch": 1.408204604846067, "grad_norm": 0.11320208758115768, "learning_rate": 0.002, "loss": 2.3333, "step": 364280 }, { "epoch": 1.4082432620494503, "grad_norm": 0.09991753101348877, "learning_rate": 0.002, "loss": 2.3467, "step": 364290 }, { "epoch": 1.4082819192528335, "grad_norm": 0.0956939086318016, "learning_rate": 0.002, "loss": 2.331, "step": 364300 }, { "epoch": 1.4083205764562168, "grad_norm": 0.11093162000179291, "learning_rate": 0.002, "loss": 2.3244, "step": 364310 }, { "epoch": 1.4083592336596, "grad_norm": 0.11404040455818176, "learning_rate": 0.002, "loss": 2.3399, "step": 364320 }, { "epoch": 1.4083978908629833, "grad_norm": 0.1058291420340538, "learning_rate": 0.002, "loss": 2.3194, "step": 364330 }, { "epoch": 1.4084365480663668, "grad_norm": 0.1125858873128891, "learning_rate": 0.002, "loss": 2.3363, "step": 364340 }, { "epoch": 1.40847520526975, "grad_norm": 0.1192534789443016, "learning_rate": 0.002, "loss": 2.3436, "step": 364350 }, { "epoch": 1.4085138624731333, "grad_norm": 0.09552609175443649, "learning_rate": 0.002, "loss": 2.3295, "step": 364360 }, { "epoch": 1.4085525196765165, "grad_norm": 0.11429965496063232, "learning_rate": 0.002, "loss": 2.3298, "step": 364370 }, { "epoch": 1.4085911768798998, "grad_norm": 0.14036065340042114, "learning_rate": 0.002, "loss": 2.3246, "step": 364380 }, { "epoch": 1.408629834083283, "grad_norm": 0.11110293120145798, "learning_rate": 0.002, "loss": 2.3492, "step": 364390 }, { "epoch": 1.4086684912866665, "grad_norm": 0.10643504559993744, "learning_rate": 0.002, "loss": 2.3418, "step": 364400 }, { "epoch": 1.4087071484900497, "grad_norm": 0.11138386279344559, "learning_rate": 0.002, "loss": 2.3354, "step": 364410 }, { "epoch": 1.408745805693433, "grad_norm": 0.09993134438991547, "learning_rate": 0.002, "loss": 2.3441, "step": 364420 }, { "epoch": 1.4087844628968162, "grad_norm": 0.09777616709470749, "learning_rate": 0.002, "loss": 2.3367, "step": 364430 }, { "epoch": 1.4088231201001995, "grad_norm": 0.12403468787670135, "learning_rate": 0.002, "loss": 2.3415, "step": 364440 }, { "epoch": 1.4088617773035828, "grad_norm": 0.09901606291532516, "learning_rate": 0.002, "loss": 2.3327, "step": 364450 }, { "epoch": 1.408900434506966, "grad_norm": 0.11089719086885452, "learning_rate": 0.002, "loss": 2.3373, "step": 364460 }, { "epoch": 1.4089390917103493, "grad_norm": 0.09899328649044037, "learning_rate": 0.002, "loss": 2.3577, "step": 364470 }, { "epoch": 1.4089777489137325, "grad_norm": 0.11358582228422165, "learning_rate": 0.002, "loss": 2.3154, "step": 364480 }, { "epoch": 1.4090164061171158, "grad_norm": 0.13312110304832458, "learning_rate": 0.002, "loss": 2.3369, "step": 364490 }, { "epoch": 1.409055063320499, "grad_norm": 0.11911635845899582, "learning_rate": 0.002, "loss": 2.3432, "step": 364500 }, { "epoch": 1.4090937205238825, "grad_norm": 0.12836365401744843, "learning_rate": 0.002, "loss": 2.3334, "step": 364510 }, { "epoch": 1.4091323777272657, "grad_norm": 0.0932023897767067, "learning_rate": 0.002, "loss": 2.3246, "step": 364520 }, { "epoch": 1.409171034930649, "grad_norm": 0.09835109859704971, "learning_rate": 0.002, "loss": 2.3331, "step": 364530 }, { "epoch": 1.4092096921340322, "grad_norm": 0.11520157009363174, "learning_rate": 0.002, "loss": 2.332, "step": 364540 }, { "epoch": 1.4092483493374155, "grad_norm": 0.09594057500362396, "learning_rate": 0.002, "loss": 2.3381, "step": 364550 }, { "epoch": 1.4092870065407987, "grad_norm": 0.1021113321185112, "learning_rate": 0.002, "loss": 2.3273, "step": 364560 }, { "epoch": 1.4093256637441822, "grad_norm": 0.11449400335550308, "learning_rate": 0.002, "loss": 2.3217, "step": 364570 }, { "epoch": 1.4093643209475655, "grad_norm": 0.0967157632112503, "learning_rate": 0.002, "loss": 2.3362, "step": 364580 }, { "epoch": 1.4094029781509487, "grad_norm": 0.235229030251503, "learning_rate": 0.002, "loss": 2.3344, "step": 364590 }, { "epoch": 1.409441635354332, "grad_norm": 0.11157149076461792, "learning_rate": 0.002, "loss": 2.3299, "step": 364600 }, { "epoch": 1.4094802925577152, "grad_norm": 0.10770371556282043, "learning_rate": 0.002, "loss": 2.3252, "step": 364610 }, { "epoch": 1.4095189497610985, "grad_norm": 0.10761409252882004, "learning_rate": 0.002, "loss": 2.3313, "step": 364620 }, { "epoch": 1.4095576069644817, "grad_norm": 0.12079044431447983, "learning_rate": 0.002, "loss": 2.3277, "step": 364630 }, { "epoch": 1.409596264167865, "grad_norm": 0.1049729436635971, "learning_rate": 0.002, "loss": 2.3471, "step": 364640 }, { "epoch": 1.4096349213712482, "grad_norm": 0.1049463227391243, "learning_rate": 0.002, "loss": 2.3268, "step": 364650 }, { "epoch": 1.4096735785746315, "grad_norm": 0.10356732457876205, "learning_rate": 0.002, "loss": 2.317, "step": 364660 }, { "epoch": 1.4097122357780147, "grad_norm": 0.11769746989011765, "learning_rate": 0.002, "loss": 2.3259, "step": 364670 }, { "epoch": 1.4097508929813982, "grad_norm": 0.34086525440216064, "learning_rate": 0.002, "loss": 2.327, "step": 364680 }, { "epoch": 1.4097895501847815, "grad_norm": 0.11214648932218552, "learning_rate": 0.002, "loss": 2.3417, "step": 364690 }, { "epoch": 1.4098282073881647, "grad_norm": 0.10398946702480316, "learning_rate": 0.002, "loss": 2.3392, "step": 364700 }, { "epoch": 1.409866864591548, "grad_norm": 0.10326844453811646, "learning_rate": 0.002, "loss": 2.3366, "step": 364710 }, { "epoch": 1.4099055217949312, "grad_norm": 0.11874198168516159, "learning_rate": 0.002, "loss": 2.3531, "step": 364720 }, { "epoch": 1.4099441789983145, "grad_norm": 0.1078546792268753, "learning_rate": 0.002, "loss": 2.3286, "step": 364730 }, { "epoch": 1.409982836201698, "grad_norm": 0.11323689669370651, "learning_rate": 0.002, "loss": 2.3239, "step": 364740 }, { "epoch": 1.4100214934050812, "grad_norm": 0.10939283668994904, "learning_rate": 0.002, "loss": 2.3409, "step": 364750 }, { "epoch": 1.4100601506084645, "grad_norm": 0.11744766682386398, "learning_rate": 0.002, "loss": 2.3317, "step": 364760 }, { "epoch": 1.4100988078118477, "grad_norm": 0.11937874555587769, "learning_rate": 0.002, "loss": 2.3255, "step": 364770 }, { "epoch": 1.410137465015231, "grad_norm": 0.1032233014702797, "learning_rate": 0.002, "loss": 2.3582, "step": 364780 }, { "epoch": 1.4101761222186142, "grad_norm": 0.10637333244085312, "learning_rate": 0.002, "loss": 2.3231, "step": 364790 }, { "epoch": 1.4102147794219975, "grad_norm": 0.13863548636436462, "learning_rate": 0.002, "loss": 2.3391, "step": 364800 }, { "epoch": 1.4102534366253807, "grad_norm": 0.1211773008108139, "learning_rate": 0.002, "loss": 2.338, "step": 364810 }, { "epoch": 1.410292093828764, "grad_norm": 0.10527975112199783, "learning_rate": 0.002, "loss": 2.321, "step": 364820 }, { "epoch": 1.4103307510321472, "grad_norm": 0.10959915816783905, "learning_rate": 0.002, "loss": 2.3358, "step": 364830 }, { "epoch": 1.4103694082355305, "grad_norm": 0.0984559953212738, "learning_rate": 0.002, "loss": 2.3265, "step": 364840 }, { "epoch": 1.410408065438914, "grad_norm": 0.13312171399593353, "learning_rate": 0.002, "loss": 2.3329, "step": 364850 }, { "epoch": 1.4104467226422972, "grad_norm": 0.11278527230024338, "learning_rate": 0.002, "loss": 2.3419, "step": 364860 }, { "epoch": 1.4104853798456805, "grad_norm": 0.10411836206912994, "learning_rate": 0.002, "loss": 2.322, "step": 364870 }, { "epoch": 1.4105240370490637, "grad_norm": 0.10412270575761795, "learning_rate": 0.002, "loss": 2.324, "step": 364880 }, { "epoch": 1.410562694252447, "grad_norm": 0.10608673840761185, "learning_rate": 0.002, "loss": 2.3394, "step": 364890 }, { "epoch": 1.4106013514558302, "grad_norm": 0.12006331980228424, "learning_rate": 0.002, "loss": 2.324, "step": 364900 }, { "epoch": 1.4106400086592137, "grad_norm": 0.09856969118118286, "learning_rate": 0.002, "loss": 2.3387, "step": 364910 }, { "epoch": 1.410678665862597, "grad_norm": 0.1013568788766861, "learning_rate": 0.002, "loss": 2.3342, "step": 364920 }, { "epoch": 1.4107173230659802, "grad_norm": 0.14561229944229126, "learning_rate": 0.002, "loss": 2.333, "step": 364930 }, { "epoch": 1.4107559802693634, "grad_norm": 0.12932388484477997, "learning_rate": 0.002, "loss": 2.3354, "step": 364940 }, { "epoch": 1.4107946374727467, "grad_norm": 0.10552596300840378, "learning_rate": 0.002, "loss": 2.331, "step": 364950 }, { "epoch": 1.41083329467613, "grad_norm": 0.1043313518166542, "learning_rate": 0.002, "loss": 2.3353, "step": 364960 }, { "epoch": 1.4108719518795132, "grad_norm": 0.09932465851306915, "learning_rate": 0.002, "loss": 2.3311, "step": 364970 }, { "epoch": 1.4109106090828964, "grad_norm": 0.08606306463479996, "learning_rate": 0.002, "loss": 2.3268, "step": 364980 }, { "epoch": 1.4109492662862797, "grad_norm": 0.09718270599842072, "learning_rate": 0.002, "loss": 2.3312, "step": 364990 }, { "epoch": 1.410987923489663, "grad_norm": 0.10544393956661224, "learning_rate": 0.002, "loss": 2.3389, "step": 365000 }, { "epoch": 1.4110265806930462, "grad_norm": 0.10633678734302521, "learning_rate": 0.002, "loss": 2.321, "step": 365010 }, { "epoch": 1.4110652378964297, "grad_norm": 0.09697526693344116, "learning_rate": 0.002, "loss": 2.3207, "step": 365020 }, { "epoch": 1.411103895099813, "grad_norm": 0.1041555181145668, "learning_rate": 0.002, "loss": 2.3332, "step": 365030 }, { "epoch": 1.4111425523031962, "grad_norm": 0.1068888008594513, "learning_rate": 0.002, "loss": 2.3376, "step": 365040 }, { "epoch": 1.4111812095065794, "grad_norm": 0.10143444687128067, "learning_rate": 0.002, "loss": 2.3347, "step": 365050 }, { "epoch": 1.4112198667099627, "grad_norm": 0.09093233197927475, "learning_rate": 0.002, "loss": 2.3194, "step": 365060 }, { "epoch": 1.411258523913346, "grad_norm": 0.1119607537984848, "learning_rate": 0.002, "loss": 2.3352, "step": 365070 }, { "epoch": 1.4112971811167294, "grad_norm": 0.09111730009317398, "learning_rate": 0.002, "loss": 2.3516, "step": 365080 }, { "epoch": 1.4113358383201127, "grad_norm": 0.10968484729528427, "learning_rate": 0.002, "loss": 2.3347, "step": 365090 }, { "epoch": 1.411374495523496, "grad_norm": 0.1032860204577446, "learning_rate": 0.002, "loss": 2.3302, "step": 365100 }, { "epoch": 1.4114131527268792, "grad_norm": 0.12258035689592361, "learning_rate": 0.002, "loss": 2.3098, "step": 365110 }, { "epoch": 1.4114518099302624, "grad_norm": 0.09528249502182007, "learning_rate": 0.002, "loss": 2.3266, "step": 365120 }, { "epoch": 1.4114904671336457, "grad_norm": 0.10955754667520523, "learning_rate": 0.002, "loss": 2.3565, "step": 365130 }, { "epoch": 1.411529124337029, "grad_norm": 0.10430938005447388, "learning_rate": 0.002, "loss": 2.3319, "step": 365140 }, { "epoch": 1.4115677815404122, "grad_norm": 0.1117718517780304, "learning_rate": 0.002, "loss": 2.325, "step": 365150 }, { "epoch": 1.4116064387437954, "grad_norm": 0.08946974575519562, "learning_rate": 0.002, "loss": 2.34, "step": 365160 }, { "epoch": 1.4116450959471787, "grad_norm": 0.13333655893802643, "learning_rate": 0.002, "loss": 2.3375, "step": 365170 }, { "epoch": 1.411683753150562, "grad_norm": 0.09432690590620041, "learning_rate": 0.002, "loss": 2.3318, "step": 365180 }, { "epoch": 1.4117224103539454, "grad_norm": 0.09220901131629944, "learning_rate": 0.002, "loss": 2.3331, "step": 365190 }, { "epoch": 1.4117610675573287, "grad_norm": 0.1282465159893036, "learning_rate": 0.002, "loss": 2.3298, "step": 365200 }, { "epoch": 1.411799724760712, "grad_norm": 0.0940774604678154, "learning_rate": 0.002, "loss": 2.3366, "step": 365210 }, { "epoch": 1.4118383819640952, "grad_norm": 0.09490504115819931, "learning_rate": 0.002, "loss": 2.3412, "step": 365220 }, { "epoch": 1.4118770391674784, "grad_norm": 0.107475645840168, "learning_rate": 0.002, "loss": 2.3064, "step": 365230 }, { "epoch": 1.4119156963708617, "grad_norm": 0.11461813002824783, "learning_rate": 0.002, "loss": 2.3225, "step": 365240 }, { "epoch": 1.4119543535742451, "grad_norm": 0.10359986126422882, "learning_rate": 0.002, "loss": 2.3354, "step": 365250 }, { "epoch": 1.4119930107776284, "grad_norm": 0.10789109021425247, "learning_rate": 0.002, "loss": 2.3311, "step": 365260 }, { "epoch": 1.4120316679810117, "grad_norm": 0.10059541463851929, "learning_rate": 0.002, "loss": 2.3539, "step": 365270 }, { "epoch": 1.412070325184395, "grad_norm": 0.10454394668340683, "learning_rate": 0.002, "loss": 2.317, "step": 365280 }, { "epoch": 1.4121089823877782, "grad_norm": 0.10084279626607895, "learning_rate": 0.002, "loss": 2.3462, "step": 365290 }, { "epoch": 1.4121476395911614, "grad_norm": 0.10830729454755783, "learning_rate": 0.002, "loss": 2.3235, "step": 365300 }, { "epoch": 1.4121862967945447, "grad_norm": 0.08894408494234085, "learning_rate": 0.002, "loss": 2.3249, "step": 365310 }, { "epoch": 1.412224953997928, "grad_norm": 0.10965152084827423, "learning_rate": 0.002, "loss": 2.3439, "step": 365320 }, { "epoch": 1.4122636112013112, "grad_norm": 0.09731363505125046, "learning_rate": 0.002, "loss": 2.3513, "step": 365330 }, { "epoch": 1.4123022684046944, "grad_norm": 0.10241921246051788, "learning_rate": 0.002, "loss": 2.3368, "step": 365340 }, { "epoch": 1.412340925608078, "grad_norm": 0.09518715739250183, "learning_rate": 0.002, "loss": 2.3357, "step": 365350 }, { "epoch": 1.4123795828114611, "grad_norm": 0.10591299831867218, "learning_rate": 0.002, "loss": 2.3263, "step": 365360 }, { "epoch": 1.4124182400148444, "grad_norm": 0.10860461741685867, "learning_rate": 0.002, "loss": 2.3245, "step": 365370 }, { "epoch": 1.4124568972182276, "grad_norm": 0.1118435487151146, "learning_rate": 0.002, "loss": 2.3234, "step": 365380 }, { "epoch": 1.412495554421611, "grad_norm": 0.10134666413068771, "learning_rate": 0.002, "loss": 2.3402, "step": 365390 }, { "epoch": 1.4125342116249942, "grad_norm": 0.12348821014165878, "learning_rate": 0.002, "loss": 2.3419, "step": 365400 }, { "epoch": 1.4125728688283774, "grad_norm": 0.099838025867939, "learning_rate": 0.002, "loss": 2.3501, "step": 365410 }, { "epoch": 1.4126115260317609, "grad_norm": 0.11787385493516922, "learning_rate": 0.002, "loss": 2.329, "step": 365420 }, { "epoch": 1.4126501832351441, "grad_norm": 0.12467779964208603, "learning_rate": 0.002, "loss": 2.3296, "step": 365430 }, { "epoch": 1.4126888404385274, "grad_norm": 0.12273652106523514, "learning_rate": 0.002, "loss": 2.3482, "step": 365440 }, { "epoch": 1.4127274976419106, "grad_norm": 0.0944012925028801, "learning_rate": 0.002, "loss": 2.3526, "step": 365450 }, { "epoch": 1.4127661548452939, "grad_norm": 0.09575633704662323, "learning_rate": 0.002, "loss": 2.323, "step": 365460 }, { "epoch": 1.4128048120486771, "grad_norm": 0.09414394199848175, "learning_rate": 0.002, "loss": 2.3326, "step": 365470 }, { "epoch": 1.4128434692520604, "grad_norm": 0.10007159411907196, "learning_rate": 0.002, "loss": 2.3303, "step": 365480 }, { "epoch": 1.4128821264554436, "grad_norm": 0.10673514753580093, "learning_rate": 0.002, "loss": 2.3298, "step": 365490 }, { "epoch": 1.412920783658827, "grad_norm": 0.09196875244379044, "learning_rate": 0.002, "loss": 2.3274, "step": 365500 }, { "epoch": 1.4129594408622101, "grad_norm": 0.09712628275156021, "learning_rate": 0.002, "loss": 2.3247, "step": 365510 }, { "epoch": 1.4129980980655936, "grad_norm": 0.09691095352172852, "learning_rate": 0.002, "loss": 2.3362, "step": 365520 }, { "epoch": 1.4130367552689769, "grad_norm": 0.1034843698143959, "learning_rate": 0.002, "loss": 2.3183, "step": 365530 }, { "epoch": 1.4130754124723601, "grad_norm": 0.12032202631235123, "learning_rate": 0.002, "loss": 2.3291, "step": 365540 }, { "epoch": 1.4131140696757434, "grad_norm": 0.11367922276258469, "learning_rate": 0.002, "loss": 2.3357, "step": 365550 }, { "epoch": 1.4131527268791266, "grad_norm": 0.10583038628101349, "learning_rate": 0.002, "loss": 2.333, "step": 365560 }, { "epoch": 1.4131913840825099, "grad_norm": 0.11696499586105347, "learning_rate": 0.002, "loss": 2.3206, "step": 365570 }, { "epoch": 1.4132300412858931, "grad_norm": 0.1028081476688385, "learning_rate": 0.002, "loss": 2.3307, "step": 365580 }, { "epoch": 1.4132686984892766, "grad_norm": 0.09909705817699432, "learning_rate": 0.002, "loss": 2.3412, "step": 365590 }, { "epoch": 1.4133073556926599, "grad_norm": 0.09184190630912781, "learning_rate": 0.002, "loss": 2.3445, "step": 365600 }, { "epoch": 1.4133460128960431, "grad_norm": 0.10988624393939972, "learning_rate": 0.002, "loss": 2.3246, "step": 365610 }, { "epoch": 1.4133846700994264, "grad_norm": 0.10959646850824356, "learning_rate": 0.002, "loss": 2.3298, "step": 365620 }, { "epoch": 1.4134233273028096, "grad_norm": 0.11385467648506165, "learning_rate": 0.002, "loss": 2.3246, "step": 365630 }, { "epoch": 1.4134619845061929, "grad_norm": 0.11543765664100647, "learning_rate": 0.002, "loss": 2.327, "step": 365640 }, { "epoch": 1.4135006417095761, "grad_norm": 0.0983387678861618, "learning_rate": 0.002, "loss": 2.3416, "step": 365650 }, { "epoch": 1.4135392989129594, "grad_norm": 0.10942315310239792, "learning_rate": 0.002, "loss": 2.3238, "step": 365660 }, { "epoch": 1.4135779561163426, "grad_norm": 0.09957767277956009, "learning_rate": 0.002, "loss": 2.3304, "step": 365670 }, { "epoch": 1.4136166133197259, "grad_norm": 0.11526834964752197, "learning_rate": 0.002, "loss": 2.3163, "step": 365680 }, { "epoch": 1.4136552705231094, "grad_norm": 0.10145354270935059, "learning_rate": 0.002, "loss": 2.3246, "step": 365690 }, { "epoch": 1.4136939277264926, "grad_norm": 0.09600527584552765, "learning_rate": 0.002, "loss": 2.3318, "step": 365700 }, { "epoch": 1.4137325849298759, "grad_norm": 0.0961616262793541, "learning_rate": 0.002, "loss": 2.3479, "step": 365710 }, { "epoch": 1.413771242133259, "grad_norm": 0.12369763851165771, "learning_rate": 0.002, "loss": 2.3373, "step": 365720 }, { "epoch": 1.4138098993366424, "grad_norm": 0.10327655076980591, "learning_rate": 0.002, "loss": 2.3385, "step": 365730 }, { "epoch": 1.4138485565400256, "grad_norm": 0.10974455624818802, "learning_rate": 0.002, "loss": 2.3175, "step": 365740 }, { "epoch": 1.4138872137434089, "grad_norm": 0.09836733341217041, "learning_rate": 0.002, "loss": 2.3275, "step": 365750 }, { "epoch": 1.4139258709467923, "grad_norm": 0.09874507784843445, "learning_rate": 0.002, "loss": 2.3266, "step": 365760 }, { "epoch": 1.4139645281501756, "grad_norm": 0.11693933606147766, "learning_rate": 0.002, "loss": 2.3362, "step": 365770 }, { "epoch": 1.4140031853535588, "grad_norm": 0.08909876644611359, "learning_rate": 0.002, "loss": 2.3235, "step": 365780 }, { "epoch": 1.414041842556942, "grad_norm": 0.1115279570221901, "learning_rate": 0.002, "loss": 2.3301, "step": 365790 }, { "epoch": 1.4140804997603253, "grad_norm": 0.10595174878835678, "learning_rate": 0.002, "loss": 2.3208, "step": 365800 }, { "epoch": 1.4141191569637086, "grad_norm": 0.11007455736398697, "learning_rate": 0.002, "loss": 2.3268, "step": 365810 }, { "epoch": 1.4141578141670919, "grad_norm": 0.10507000237703323, "learning_rate": 0.002, "loss": 2.3439, "step": 365820 }, { "epoch": 1.414196471370475, "grad_norm": 0.09129108488559723, "learning_rate": 0.002, "loss": 2.3315, "step": 365830 }, { "epoch": 1.4142351285738584, "grad_norm": 0.10857604444026947, "learning_rate": 0.002, "loss": 2.329, "step": 365840 }, { "epoch": 1.4142737857772416, "grad_norm": 0.13335253298282623, "learning_rate": 0.002, "loss": 2.3431, "step": 365850 }, { "epoch": 1.414312442980625, "grad_norm": 0.1100061908364296, "learning_rate": 0.002, "loss": 2.3298, "step": 365860 }, { "epoch": 1.4143511001840083, "grad_norm": 0.11533210426568985, "learning_rate": 0.002, "loss": 2.3305, "step": 365870 }, { "epoch": 1.4143897573873916, "grad_norm": 0.12122216075658798, "learning_rate": 0.002, "loss": 2.3381, "step": 365880 }, { "epoch": 1.4144284145907748, "grad_norm": 0.11135326325893402, "learning_rate": 0.002, "loss": 2.3314, "step": 365890 }, { "epoch": 1.414467071794158, "grad_norm": 0.09590534120798111, "learning_rate": 0.002, "loss": 2.3284, "step": 365900 }, { "epoch": 1.4145057289975413, "grad_norm": 0.11739147454500198, "learning_rate": 0.002, "loss": 2.3308, "step": 365910 }, { "epoch": 1.4145443862009246, "grad_norm": 0.09624744206666946, "learning_rate": 0.002, "loss": 2.3408, "step": 365920 }, { "epoch": 1.414583043404308, "grad_norm": 0.12246581166982651, "learning_rate": 0.002, "loss": 2.3351, "step": 365930 }, { "epoch": 1.4146217006076913, "grad_norm": 0.11366311460733414, "learning_rate": 0.002, "loss": 2.3329, "step": 365940 }, { "epoch": 1.4146603578110746, "grad_norm": 0.1130039170384407, "learning_rate": 0.002, "loss": 2.3307, "step": 365950 }, { "epoch": 1.4146990150144578, "grad_norm": 0.1092824786901474, "learning_rate": 0.002, "loss": 2.3515, "step": 365960 }, { "epoch": 1.414737672217841, "grad_norm": 0.10728099197149277, "learning_rate": 0.002, "loss": 2.3173, "step": 365970 }, { "epoch": 1.4147763294212243, "grad_norm": 0.12284889072179794, "learning_rate": 0.002, "loss": 2.3364, "step": 365980 }, { "epoch": 1.4148149866246076, "grad_norm": 0.11151456832885742, "learning_rate": 0.002, "loss": 2.3503, "step": 365990 }, { "epoch": 1.4148536438279908, "grad_norm": 0.10078704357147217, "learning_rate": 0.002, "loss": 2.3392, "step": 366000 }, { "epoch": 1.414892301031374, "grad_norm": 0.11053545773029327, "learning_rate": 0.002, "loss": 2.3226, "step": 366010 }, { "epoch": 1.4149309582347573, "grad_norm": 0.10661806166172028, "learning_rate": 0.002, "loss": 2.3471, "step": 366020 }, { "epoch": 1.4149696154381408, "grad_norm": 0.1044822558760643, "learning_rate": 0.002, "loss": 2.342, "step": 366030 }, { "epoch": 1.415008272641524, "grad_norm": 0.09812510758638382, "learning_rate": 0.002, "loss": 2.3395, "step": 366040 }, { "epoch": 1.4150469298449073, "grad_norm": 0.10488618165254593, "learning_rate": 0.002, "loss": 2.3186, "step": 366050 }, { "epoch": 1.4150855870482906, "grad_norm": 0.10980476438999176, "learning_rate": 0.002, "loss": 2.3445, "step": 366060 }, { "epoch": 1.4151242442516738, "grad_norm": 0.10456784814596176, "learning_rate": 0.002, "loss": 2.3369, "step": 366070 }, { "epoch": 1.415162901455057, "grad_norm": 0.09372668713331223, "learning_rate": 0.002, "loss": 2.3287, "step": 366080 }, { "epoch": 1.4152015586584406, "grad_norm": 0.12072702497243881, "learning_rate": 0.002, "loss": 2.3133, "step": 366090 }, { "epoch": 1.4152402158618238, "grad_norm": 0.10739337652921677, "learning_rate": 0.002, "loss": 2.3308, "step": 366100 }, { "epoch": 1.415278873065207, "grad_norm": 0.11193457990884781, "learning_rate": 0.002, "loss": 2.3355, "step": 366110 }, { "epoch": 1.4153175302685903, "grad_norm": 0.12541808187961578, "learning_rate": 0.002, "loss": 2.3139, "step": 366120 }, { "epoch": 1.4153561874719736, "grad_norm": 0.09714280813932419, "learning_rate": 0.002, "loss": 2.3297, "step": 366130 }, { "epoch": 1.4153948446753568, "grad_norm": 0.1137126013636589, "learning_rate": 0.002, "loss": 2.326, "step": 366140 }, { "epoch": 1.41543350187874, "grad_norm": 0.1105848103761673, "learning_rate": 0.002, "loss": 2.3267, "step": 366150 }, { "epoch": 1.4154721590821233, "grad_norm": 0.09818024933338165, "learning_rate": 0.002, "loss": 2.3524, "step": 366160 }, { "epoch": 1.4155108162855066, "grad_norm": 0.12252239137887955, "learning_rate": 0.002, "loss": 2.3373, "step": 366170 }, { "epoch": 1.4155494734888898, "grad_norm": 0.10770494490861893, "learning_rate": 0.002, "loss": 2.342, "step": 366180 }, { "epoch": 1.415588130692273, "grad_norm": 0.09204906225204468, "learning_rate": 0.002, "loss": 2.3404, "step": 366190 }, { "epoch": 1.4156267878956565, "grad_norm": 0.10548800230026245, "learning_rate": 0.002, "loss": 2.3387, "step": 366200 }, { "epoch": 1.4156654450990398, "grad_norm": 0.12471897155046463, "learning_rate": 0.002, "loss": 2.3459, "step": 366210 }, { "epoch": 1.415704102302423, "grad_norm": 0.11144419759511948, "learning_rate": 0.002, "loss": 2.3306, "step": 366220 }, { "epoch": 1.4157427595058063, "grad_norm": 0.09332024306058884, "learning_rate": 0.002, "loss": 2.3289, "step": 366230 }, { "epoch": 1.4157814167091896, "grad_norm": 0.11328937113285065, "learning_rate": 0.002, "loss": 2.3233, "step": 366240 }, { "epoch": 1.4158200739125728, "grad_norm": 0.10417918860912323, "learning_rate": 0.002, "loss": 2.3396, "step": 366250 }, { "epoch": 1.4158587311159563, "grad_norm": 0.0939640998840332, "learning_rate": 0.002, "loss": 2.3176, "step": 366260 }, { "epoch": 1.4158973883193395, "grad_norm": 0.10409481823444366, "learning_rate": 0.002, "loss": 2.3288, "step": 366270 }, { "epoch": 1.4159360455227228, "grad_norm": 0.1057303249835968, "learning_rate": 0.002, "loss": 2.3478, "step": 366280 }, { "epoch": 1.415974702726106, "grad_norm": 0.11792177706956863, "learning_rate": 0.002, "loss": 2.3342, "step": 366290 }, { "epoch": 1.4160133599294893, "grad_norm": 0.10138144344091415, "learning_rate": 0.002, "loss": 2.3296, "step": 366300 }, { "epoch": 1.4160520171328725, "grad_norm": 0.0995168387889862, "learning_rate": 0.002, "loss": 2.3476, "step": 366310 }, { "epoch": 1.4160906743362558, "grad_norm": 0.1047981008887291, "learning_rate": 0.002, "loss": 2.3429, "step": 366320 }, { "epoch": 1.416129331539639, "grad_norm": 0.2975539565086365, "learning_rate": 0.002, "loss": 2.3224, "step": 366330 }, { "epoch": 1.4161679887430223, "grad_norm": 0.09719040989875793, "learning_rate": 0.002, "loss": 2.3409, "step": 366340 }, { "epoch": 1.4162066459464056, "grad_norm": 0.10597871243953705, "learning_rate": 0.002, "loss": 2.3466, "step": 366350 }, { "epoch": 1.4162453031497888, "grad_norm": 0.132241353392601, "learning_rate": 0.002, "loss": 2.3204, "step": 366360 }, { "epoch": 1.4162839603531723, "grad_norm": 0.09745777398347855, "learning_rate": 0.002, "loss": 2.3343, "step": 366370 }, { "epoch": 1.4163226175565555, "grad_norm": 0.09836073219776154, "learning_rate": 0.002, "loss": 2.3243, "step": 366380 }, { "epoch": 1.4163612747599388, "grad_norm": 0.12061022222042084, "learning_rate": 0.002, "loss": 2.3303, "step": 366390 }, { "epoch": 1.416399931963322, "grad_norm": 0.11182274669408798, "learning_rate": 0.002, "loss": 2.332, "step": 366400 }, { "epoch": 1.4164385891667053, "grad_norm": 0.10241756588220596, "learning_rate": 0.002, "loss": 2.3246, "step": 366410 }, { "epoch": 1.4164772463700885, "grad_norm": 0.10544509440660477, "learning_rate": 0.002, "loss": 2.3298, "step": 366420 }, { "epoch": 1.416515903573472, "grad_norm": 0.10424435138702393, "learning_rate": 0.002, "loss": 2.3372, "step": 366430 }, { "epoch": 1.4165545607768553, "grad_norm": 0.10981172323226929, "learning_rate": 0.002, "loss": 2.3461, "step": 366440 }, { "epoch": 1.4165932179802385, "grad_norm": 0.11870953440666199, "learning_rate": 0.002, "loss": 2.3435, "step": 366450 }, { "epoch": 1.4166318751836218, "grad_norm": 0.1339559555053711, "learning_rate": 0.002, "loss": 2.3265, "step": 366460 }, { "epoch": 1.416670532387005, "grad_norm": 0.09286986291408539, "learning_rate": 0.002, "loss": 2.3388, "step": 366470 }, { "epoch": 1.4167091895903883, "grad_norm": 0.11012223362922668, "learning_rate": 0.002, "loss": 2.338, "step": 366480 }, { "epoch": 1.4167478467937715, "grad_norm": 0.21809683740139008, "learning_rate": 0.002, "loss": 2.3404, "step": 366490 }, { "epoch": 1.4167865039971548, "grad_norm": 0.09550125151872635, "learning_rate": 0.002, "loss": 2.3315, "step": 366500 }, { "epoch": 1.416825161200538, "grad_norm": 0.21449320018291473, "learning_rate": 0.002, "loss": 2.3206, "step": 366510 }, { "epoch": 1.4168638184039213, "grad_norm": 0.10646815598011017, "learning_rate": 0.002, "loss": 2.3385, "step": 366520 }, { "epoch": 1.4169024756073045, "grad_norm": 0.1135336235165596, "learning_rate": 0.002, "loss": 2.3349, "step": 366530 }, { "epoch": 1.416941132810688, "grad_norm": 0.09614226967096329, "learning_rate": 0.002, "loss": 2.337, "step": 366540 }, { "epoch": 1.4169797900140713, "grad_norm": 0.11009273678064346, "learning_rate": 0.002, "loss": 2.3262, "step": 366550 }, { "epoch": 1.4170184472174545, "grad_norm": 0.10821156948804855, "learning_rate": 0.002, "loss": 2.3323, "step": 366560 }, { "epoch": 1.4170571044208378, "grad_norm": 0.09521789848804474, "learning_rate": 0.002, "loss": 2.3585, "step": 366570 }, { "epoch": 1.417095761624221, "grad_norm": 0.10291599482297897, "learning_rate": 0.002, "loss": 2.3478, "step": 366580 }, { "epoch": 1.4171344188276043, "grad_norm": 0.10520762950181961, "learning_rate": 0.002, "loss": 2.3444, "step": 366590 }, { "epoch": 1.4171730760309877, "grad_norm": 0.09714417904615402, "learning_rate": 0.002, "loss": 2.3309, "step": 366600 }, { "epoch": 1.417211733234371, "grad_norm": 0.1105012446641922, "learning_rate": 0.002, "loss": 2.3268, "step": 366610 }, { "epoch": 1.4172503904377542, "grad_norm": 0.10851927101612091, "learning_rate": 0.002, "loss": 2.3363, "step": 366620 }, { "epoch": 1.4172890476411375, "grad_norm": 0.09911957383155823, "learning_rate": 0.002, "loss": 2.3307, "step": 366630 }, { "epoch": 1.4173277048445208, "grad_norm": 0.09575625509023666, "learning_rate": 0.002, "loss": 2.3304, "step": 366640 }, { "epoch": 1.417366362047904, "grad_norm": 0.10080332309007645, "learning_rate": 0.002, "loss": 2.3349, "step": 366650 }, { "epoch": 1.4174050192512873, "grad_norm": 0.10602813214063644, "learning_rate": 0.002, "loss": 2.348, "step": 366660 }, { "epoch": 1.4174436764546705, "grad_norm": 0.11299441754817963, "learning_rate": 0.002, "loss": 2.3259, "step": 366670 }, { "epoch": 1.4174823336580538, "grad_norm": 0.13318519294261932, "learning_rate": 0.002, "loss": 2.3449, "step": 366680 }, { "epoch": 1.417520990861437, "grad_norm": 0.10320179164409637, "learning_rate": 0.002, "loss": 2.3332, "step": 366690 }, { "epoch": 1.4175596480648203, "grad_norm": 0.09384647756814957, "learning_rate": 0.002, "loss": 2.3281, "step": 366700 }, { "epoch": 1.4175983052682037, "grad_norm": 0.3169427812099457, "learning_rate": 0.002, "loss": 2.3421, "step": 366710 }, { "epoch": 1.417636962471587, "grad_norm": 0.11232533305883408, "learning_rate": 0.002, "loss": 2.3203, "step": 366720 }, { "epoch": 1.4176756196749702, "grad_norm": 0.10294479131698608, "learning_rate": 0.002, "loss": 2.3336, "step": 366730 }, { "epoch": 1.4177142768783535, "grad_norm": 0.12189421057701111, "learning_rate": 0.002, "loss": 2.3268, "step": 366740 }, { "epoch": 1.4177529340817367, "grad_norm": 0.10026071965694427, "learning_rate": 0.002, "loss": 2.3334, "step": 366750 }, { "epoch": 1.41779159128512, "grad_norm": 0.10128407925367355, "learning_rate": 0.002, "loss": 2.3232, "step": 366760 }, { "epoch": 1.4178302484885035, "grad_norm": 0.11087165772914886, "learning_rate": 0.002, "loss": 2.3289, "step": 366770 }, { "epoch": 1.4178689056918867, "grad_norm": 0.10710074752569199, "learning_rate": 0.002, "loss": 2.3256, "step": 366780 }, { "epoch": 1.41790756289527, "grad_norm": 0.1305713802576065, "learning_rate": 0.002, "loss": 2.3366, "step": 366790 }, { "epoch": 1.4179462200986532, "grad_norm": 0.10092976689338684, "learning_rate": 0.002, "loss": 2.3498, "step": 366800 }, { "epoch": 1.4179848773020365, "grad_norm": 0.10149747133255005, "learning_rate": 0.002, "loss": 2.3438, "step": 366810 }, { "epoch": 1.4180235345054197, "grad_norm": 0.09356102347373962, "learning_rate": 0.002, "loss": 2.3254, "step": 366820 }, { "epoch": 1.418062191708803, "grad_norm": 0.09790142625570297, "learning_rate": 0.002, "loss": 2.3429, "step": 366830 }, { "epoch": 1.4181008489121862, "grad_norm": 0.11745724827051163, "learning_rate": 0.002, "loss": 2.3415, "step": 366840 }, { "epoch": 1.4181395061155695, "grad_norm": 0.08513977378606796, "learning_rate": 0.002, "loss": 2.3155, "step": 366850 }, { "epoch": 1.4181781633189527, "grad_norm": 0.09998525679111481, "learning_rate": 0.002, "loss": 2.3259, "step": 366860 }, { "epoch": 1.418216820522336, "grad_norm": 0.10445107519626617, "learning_rate": 0.002, "loss": 2.3438, "step": 366870 }, { "epoch": 1.4182554777257195, "grad_norm": 0.09004156291484833, "learning_rate": 0.002, "loss": 2.3339, "step": 366880 }, { "epoch": 1.4182941349291027, "grad_norm": 0.1079174131155014, "learning_rate": 0.002, "loss": 2.3433, "step": 366890 }, { "epoch": 1.418332792132486, "grad_norm": 0.10222215950489044, "learning_rate": 0.002, "loss": 2.3494, "step": 366900 }, { "epoch": 1.4183714493358692, "grad_norm": 0.10205639153718948, "learning_rate": 0.002, "loss": 2.3485, "step": 366910 }, { "epoch": 1.4184101065392525, "grad_norm": 0.13565658032894135, "learning_rate": 0.002, "loss": 2.3284, "step": 366920 }, { "epoch": 1.4184487637426357, "grad_norm": 0.10180047899484634, "learning_rate": 0.002, "loss": 2.3327, "step": 366930 }, { "epoch": 1.4184874209460192, "grad_norm": 0.1170867383480072, "learning_rate": 0.002, "loss": 2.3416, "step": 366940 }, { "epoch": 1.4185260781494025, "grad_norm": 0.11132314056158066, "learning_rate": 0.002, "loss": 2.3401, "step": 366950 }, { "epoch": 1.4185647353527857, "grad_norm": 0.11016213893890381, "learning_rate": 0.002, "loss": 2.3318, "step": 366960 }, { "epoch": 1.418603392556169, "grad_norm": 0.10189254581928253, "learning_rate": 0.002, "loss": 2.3393, "step": 366970 }, { "epoch": 1.4186420497595522, "grad_norm": 0.10096590965986252, "learning_rate": 0.002, "loss": 2.3435, "step": 366980 }, { "epoch": 1.4186807069629355, "grad_norm": 0.10366704314947128, "learning_rate": 0.002, "loss": 2.3406, "step": 366990 }, { "epoch": 1.4187193641663187, "grad_norm": 0.10980086028575897, "learning_rate": 0.002, "loss": 2.3333, "step": 367000 }, { "epoch": 1.418758021369702, "grad_norm": 0.1058797687292099, "learning_rate": 0.002, "loss": 2.334, "step": 367010 }, { "epoch": 1.4187966785730852, "grad_norm": 0.11351893842220306, "learning_rate": 0.002, "loss": 2.3275, "step": 367020 }, { "epoch": 1.4188353357764685, "grad_norm": 0.10458218306303024, "learning_rate": 0.002, "loss": 2.328, "step": 367030 }, { "epoch": 1.4188739929798517, "grad_norm": 0.11217319220304489, "learning_rate": 0.002, "loss": 2.3362, "step": 367040 }, { "epoch": 1.4189126501832352, "grad_norm": 0.09893601387739182, "learning_rate": 0.002, "loss": 2.3347, "step": 367050 }, { "epoch": 1.4189513073866185, "grad_norm": 0.1001359298825264, "learning_rate": 0.002, "loss": 2.3431, "step": 367060 }, { "epoch": 1.4189899645900017, "grad_norm": 0.12899377942085266, "learning_rate": 0.002, "loss": 2.3364, "step": 367070 }, { "epoch": 1.419028621793385, "grad_norm": 0.1014057919383049, "learning_rate": 0.002, "loss": 2.3444, "step": 367080 }, { "epoch": 1.4190672789967682, "grad_norm": 0.10943603515625, "learning_rate": 0.002, "loss": 2.3347, "step": 367090 }, { "epoch": 1.4191059362001515, "grad_norm": 0.09798979014158249, "learning_rate": 0.002, "loss": 2.3554, "step": 367100 }, { "epoch": 1.419144593403535, "grad_norm": 0.11311440914869308, "learning_rate": 0.002, "loss": 2.3444, "step": 367110 }, { "epoch": 1.4191832506069182, "grad_norm": 0.098808154463768, "learning_rate": 0.002, "loss": 2.3371, "step": 367120 }, { "epoch": 1.4192219078103014, "grad_norm": 0.10621762275695801, "learning_rate": 0.002, "loss": 2.3255, "step": 367130 }, { "epoch": 1.4192605650136847, "grad_norm": 0.11025074124336243, "learning_rate": 0.002, "loss": 2.3265, "step": 367140 }, { "epoch": 1.419299222217068, "grad_norm": 0.1080571860074997, "learning_rate": 0.002, "loss": 2.349, "step": 367150 }, { "epoch": 1.4193378794204512, "grad_norm": 0.14338891208171844, "learning_rate": 0.002, "loss": 2.3393, "step": 367160 }, { "epoch": 1.4193765366238345, "grad_norm": 0.10458295047283173, "learning_rate": 0.002, "loss": 2.3263, "step": 367170 }, { "epoch": 1.4194151938272177, "grad_norm": 0.1376844346523285, "learning_rate": 0.002, "loss": 2.3399, "step": 367180 }, { "epoch": 1.419453851030601, "grad_norm": 0.11458317935466766, "learning_rate": 0.002, "loss": 2.3386, "step": 367190 }, { "epoch": 1.4194925082339842, "grad_norm": 0.13006827235221863, "learning_rate": 0.002, "loss": 2.3285, "step": 367200 }, { "epoch": 1.4195311654373677, "grad_norm": 0.10229130834341049, "learning_rate": 0.002, "loss": 2.3413, "step": 367210 }, { "epoch": 1.419569822640751, "grad_norm": 0.20018444955348969, "learning_rate": 0.002, "loss": 2.3135, "step": 367220 }, { "epoch": 1.4196084798441342, "grad_norm": 0.09971259534358978, "learning_rate": 0.002, "loss": 2.3326, "step": 367230 }, { "epoch": 1.4196471370475174, "grad_norm": 0.08571271598339081, "learning_rate": 0.002, "loss": 2.3325, "step": 367240 }, { "epoch": 1.4196857942509007, "grad_norm": 0.1437324583530426, "learning_rate": 0.002, "loss": 2.3437, "step": 367250 }, { "epoch": 1.419724451454284, "grad_norm": 0.09790416061878204, "learning_rate": 0.002, "loss": 2.3259, "step": 367260 }, { "epoch": 1.4197631086576672, "grad_norm": 0.09812484681606293, "learning_rate": 0.002, "loss": 2.3446, "step": 367270 }, { "epoch": 1.4198017658610507, "grad_norm": 0.09241976588964462, "learning_rate": 0.002, "loss": 2.3365, "step": 367280 }, { "epoch": 1.419840423064434, "grad_norm": 0.12094639241695404, "learning_rate": 0.002, "loss": 2.3318, "step": 367290 }, { "epoch": 1.4198790802678172, "grad_norm": 0.13233411312103271, "learning_rate": 0.002, "loss": 2.3296, "step": 367300 }, { "epoch": 1.4199177374712004, "grad_norm": 0.09673107415437698, "learning_rate": 0.002, "loss": 2.344, "step": 367310 }, { "epoch": 1.4199563946745837, "grad_norm": 0.11193081736564636, "learning_rate": 0.002, "loss": 2.3377, "step": 367320 }, { "epoch": 1.419995051877967, "grad_norm": 0.09532983601093292, "learning_rate": 0.002, "loss": 2.3278, "step": 367330 }, { "epoch": 1.4200337090813502, "grad_norm": 0.09918580204248428, "learning_rate": 0.002, "loss": 2.3409, "step": 367340 }, { "epoch": 1.4200723662847334, "grad_norm": 0.11038313806056976, "learning_rate": 0.002, "loss": 2.3211, "step": 367350 }, { "epoch": 1.4201110234881167, "grad_norm": 0.0948074534535408, "learning_rate": 0.002, "loss": 2.3224, "step": 367360 }, { "epoch": 1.4201496806915, "grad_norm": 0.09734196960926056, "learning_rate": 0.002, "loss": 2.3229, "step": 367370 }, { "epoch": 1.4201883378948834, "grad_norm": 0.10987944900989532, "learning_rate": 0.002, "loss": 2.3464, "step": 367380 }, { "epoch": 1.4202269950982667, "grad_norm": 0.11198877543210983, "learning_rate": 0.002, "loss": 2.3339, "step": 367390 }, { "epoch": 1.42026565230165, "grad_norm": 0.10422197729349136, "learning_rate": 0.002, "loss": 2.3423, "step": 367400 }, { "epoch": 1.4203043095050332, "grad_norm": 0.10399855673313141, "learning_rate": 0.002, "loss": 2.3246, "step": 367410 }, { "epoch": 1.4203429667084164, "grad_norm": 0.09684939682483673, "learning_rate": 0.002, "loss": 2.3352, "step": 367420 }, { "epoch": 1.4203816239117997, "grad_norm": 0.12432610988616943, "learning_rate": 0.002, "loss": 2.3416, "step": 367430 }, { "epoch": 1.420420281115183, "grad_norm": 0.10199954360723495, "learning_rate": 0.002, "loss": 2.3371, "step": 367440 }, { "epoch": 1.4204589383185664, "grad_norm": 0.09740272164344788, "learning_rate": 0.002, "loss": 2.3362, "step": 367450 }, { "epoch": 1.4204975955219497, "grad_norm": 0.10742583125829697, "learning_rate": 0.002, "loss": 2.3355, "step": 367460 }, { "epoch": 1.420536252725333, "grad_norm": 0.11151610314846039, "learning_rate": 0.002, "loss": 2.3354, "step": 367470 }, { "epoch": 1.4205749099287162, "grad_norm": 0.08460313826799393, "learning_rate": 0.002, "loss": 2.3302, "step": 367480 }, { "epoch": 1.4206135671320994, "grad_norm": 0.10881251096725464, "learning_rate": 0.002, "loss": 2.3129, "step": 367490 }, { "epoch": 1.4206522243354827, "grad_norm": 0.1039658933877945, "learning_rate": 0.002, "loss": 2.3456, "step": 367500 }, { "epoch": 1.420690881538866, "grad_norm": 0.09391934424638748, "learning_rate": 0.002, "loss": 2.3362, "step": 367510 }, { "epoch": 1.4207295387422492, "grad_norm": 0.10123565047979355, "learning_rate": 0.002, "loss": 2.3255, "step": 367520 }, { "epoch": 1.4207681959456324, "grad_norm": 0.10684125870466232, "learning_rate": 0.002, "loss": 2.3304, "step": 367530 }, { "epoch": 1.4208068531490157, "grad_norm": 0.10670426487922668, "learning_rate": 0.002, "loss": 2.3487, "step": 367540 }, { "epoch": 1.4208455103523991, "grad_norm": 0.10557490587234497, "learning_rate": 0.002, "loss": 2.3333, "step": 367550 }, { "epoch": 1.4208841675557824, "grad_norm": 0.08854088187217712, "learning_rate": 0.002, "loss": 2.3292, "step": 367560 }, { "epoch": 1.4209228247591656, "grad_norm": 0.10692249983549118, "learning_rate": 0.002, "loss": 2.3377, "step": 367570 }, { "epoch": 1.420961481962549, "grad_norm": 0.10752949863672256, "learning_rate": 0.002, "loss": 2.3251, "step": 367580 }, { "epoch": 1.4210001391659322, "grad_norm": 0.12726660072803497, "learning_rate": 0.002, "loss": 2.3271, "step": 367590 }, { "epoch": 1.4210387963693154, "grad_norm": 0.10740122944116592, "learning_rate": 0.002, "loss": 2.3395, "step": 367600 }, { "epoch": 1.4210774535726987, "grad_norm": 0.129148468375206, "learning_rate": 0.002, "loss": 2.3161, "step": 367610 }, { "epoch": 1.4211161107760821, "grad_norm": 0.10967687517404556, "learning_rate": 0.002, "loss": 2.321, "step": 367620 }, { "epoch": 1.4211547679794654, "grad_norm": 0.09522423148155212, "learning_rate": 0.002, "loss": 2.3344, "step": 367630 }, { "epoch": 1.4211934251828486, "grad_norm": 0.10406163334846497, "learning_rate": 0.002, "loss": 2.3309, "step": 367640 }, { "epoch": 1.4212320823862319, "grad_norm": 0.09662437438964844, "learning_rate": 0.002, "loss": 2.3406, "step": 367650 }, { "epoch": 1.4212707395896151, "grad_norm": 0.09962788969278336, "learning_rate": 0.002, "loss": 2.3231, "step": 367660 }, { "epoch": 1.4213093967929984, "grad_norm": 0.0924009308218956, "learning_rate": 0.002, "loss": 2.3166, "step": 367670 }, { "epoch": 1.4213480539963816, "grad_norm": 0.09442176669836044, "learning_rate": 0.002, "loss": 2.3471, "step": 367680 }, { "epoch": 1.421386711199765, "grad_norm": 0.11521480232477188, "learning_rate": 0.002, "loss": 2.3361, "step": 367690 }, { "epoch": 1.4214253684031481, "grad_norm": 0.10343587398529053, "learning_rate": 0.002, "loss": 2.3368, "step": 367700 }, { "epoch": 1.4214640256065314, "grad_norm": 0.10477244853973389, "learning_rate": 0.002, "loss": 2.3246, "step": 367710 }, { "epoch": 1.4215026828099149, "grad_norm": 0.1124812439084053, "learning_rate": 0.002, "loss": 2.3445, "step": 367720 }, { "epoch": 1.4215413400132981, "grad_norm": 0.11153688281774521, "learning_rate": 0.002, "loss": 2.3269, "step": 367730 }, { "epoch": 1.4215799972166814, "grad_norm": 0.09929315000772476, "learning_rate": 0.002, "loss": 2.3218, "step": 367740 }, { "epoch": 1.4216186544200646, "grad_norm": 0.10471302270889282, "learning_rate": 0.002, "loss": 2.3399, "step": 367750 }, { "epoch": 1.4216573116234479, "grad_norm": 0.10565510392189026, "learning_rate": 0.002, "loss": 2.3329, "step": 367760 }, { "epoch": 1.4216959688268311, "grad_norm": 0.12525208294391632, "learning_rate": 0.002, "loss": 2.3456, "step": 367770 }, { "epoch": 1.4217346260302144, "grad_norm": 0.10688397288322449, "learning_rate": 0.002, "loss": 2.3335, "step": 367780 }, { "epoch": 1.4217732832335979, "grad_norm": 0.0988677367568016, "learning_rate": 0.002, "loss": 2.3475, "step": 367790 }, { "epoch": 1.4218119404369811, "grad_norm": 0.10281901806592941, "learning_rate": 0.002, "loss": 2.3325, "step": 367800 }, { "epoch": 1.4218505976403644, "grad_norm": 0.10552622377872467, "learning_rate": 0.002, "loss": 2.3348, "step": 367810 }, { "epoch": 1.4218892548437476, "grad_norm": 0.12458661198616028, "learning_rate": 0.002, "loss": 2.3371, "step": 367820 }, { "epoch": 1.4219279120471309, "grad_norm": 0.11455637961626053, "learning_rate": 0.002, "loss": 2.3398, "step": 367830 }, { "epoch": 1.4219665692505141, "grad_norm": 0.08602002263069153, "learning_rate": 0.002, "loss": 2.3318, "step": 367840 }, { "epoch": 1.4220052264538974, "grad_norm": 0.10147720575332642, "learning_rate": 0.002, "loss": 2.3485, "step": 367850 }, { "epoch": 1.4220438836572806, "grad_norm": 0.10182800889015198, "learning_rate": 0.002, "loss": 2.3238, "step": 367860 }, { "epoch": 1.4220825408606639, "grad_norm": 0.10629668831825256, "learning_rate": 0.002, "loss": 2.3364, "step": 367870 }, { "epoch": 1.4221211980640471, "grad_norm": 0.09327349066734314, "learning_rate": 0.002, "loss": 2.3368, "step": 367880 }, { "epoch": 1.4221598552674306, "grad_norm": 0.11980682611465454, "learning_rate": 0.002, "loss": 2.3346, "step": 367890 }, { "epoch": 1.4221985124708139, "grad_norm": 0.09699162095785141, "learning_rate": 0.002, "loss": 2.3228, "step": 367900 }, { "epoch": 1.422237169674197, "grad_norm": 0.10681085288524628, "learning_rate": 0.002, "loss": 2.3288, "step": 367910 }, { "epoch": 1.4222758268775804, "grad_norm": 0.09521599113941193, "learning_rate": 0.002, "loss": 2.3495, "step": 367920 }, { "epoch": 1.4223144840809636, "grad_norm": 0.10524185746908188, "learning_rate": 0.002, "loss": 2.3271, "step": 367930 }, { "epoch": 1.4223531412843469, "grad_norm": 0.0984361320734024, "learning_rate": 0.002, "loss": 2.3339, "step": 367940 }, { "epoch": 1.4223917984877303, "grad_norm": 0.0989803597331047, "learning_rate": 0.002, "loss": 2.3309, "step": 367950 }, { "epoch": 1.4224304556911136, "grad_norm": 0.11651834100484848, "learning_rate": 0.002, "loss": 2.3459, "step": 367960 }, { "epoch": 1.4224691128944968, "grad_norm": 0.09913109242916107, "learning_rate": 0.002, "loss": 2.3341, "step": 367970 }, { "epoch": 1.42250777009788, "grad_norm": 0.12161785364151001, "learning_rate": 0.002, "loss": 2.328, "step": 367980 }, { "epoch": 1.4225464273012633, "grad_norm": 0.1221100389957428, "learning_rate": 0.002, "loss": 2.3462, "step": 367990 }, { "epoch": 1.4225850845046466, "grad_norm": 0.10555516183376312, "learning_rate": 0.002, "loss": 2.3214, "step": 368000 }, { "epoch": 1.4226237417080299, "grad_norm": 0.21151964366436005, "learning_rate": 0.002, "loss": 2.3402, "step": 368010 }, { "epoch": 1.422662398911413, "grad_norm": 0.0941983163356781, "learning_rate": 0.002, "loss": 2.3352, "step": 368020 }, { "epoch": 1.4227010561147964, "grad_norm": 0.10658541321754456, "learning_rate": 0.002, "loss": 2.333, "step": 368030 }, { "epoch": 1.4227397133181796, "grad_norm": 0.1082150936126709, "learning_rate": 0.002, "loss": 2.3111, "step": 368040 }, { "epoch": 1.4227783705215629, "grad_norm": 0.10653243958950043, "learning_rate": 0.002, "loss": 2.3353, "step": 368050 }, { "epoch": 1.4228170277249463, "grad_norm": 0.08912888169288635, "learning_rate": 0.002, "loss": 2.3239, "step": 368060 }, { "epoch": 1.4228556849283296, "grad_norm": 0.09850385040044785, "learning_rate": 0.002, "loss": 2.3363, "step": 368070 }, { "epoch": 1.4228943421317128, "grad_norm": 0.09942353516817093, "learning_rate": 0.002, "loss": 2.3529, "step": 368080 }, { "epoch": 1.422932999335096, "grad_norm": 0.13291941583156586, "learning_rate": 0.002, "loss": 2.3331, "step": 368090 }, { "epoch": 1.4229716565384793, "grad_norm": 0.10999099165201187, "learning_rate": 0.002, "loss": 2.3494, "step": 368100 }, { "epoch": 1.4230103137418626, "grad_norm": 0.10195674747228622, "learning_rate": 0.002, "loss": 2.3411, "step": 368110 }, { "epoch": 1.423048970945246, "grad_norm": 0.12106695771217346, "learning_rate": 0.002, "loss": 2.3246, "step": 368120 }, { "epoch": 1.4230876281486293, "grad_norm": 0.09300248324871063, "learning_rate": 0.002, "loss": 2.329, "step": 368130 }, { "epoch": 1.4231262853520126, "grad_norm": 0.11889013648033142, "learning_rate": 0.002, "loss": 2.3333, "step": 368140 }, { "epoch": 1.4231649425553958, "grad_norm": 0.10581057518720627, "learning_rate": 0.002, "loss": 2.3319, "step": 368150 }, { "epoch": 1.423203599758779, "grad_norm": 0.14475677907466888, "learning_rate": 0.002, "loss": 2.3206, "step": 368160 }, { "epoch": 1.4232422569621623, "grad_norm": 0.10251044481992722, "learning_rate": 0.002, "loss": 2.3262, "step": 368170 }, { "epoch": 1.4232809141655456, "grad_norm": 0.10213552415370941, "learning_rate": 0.002, "loss": 2.3326, "step": 368180 }, { "epoch": 1.4233195713689288, "grad_norm": 0.09757188707590103, "learning_rate": 0.002, "loss": 2.3335, "step": 368190 }, { "epoch": 1.423358228572312, "grad_norm": 0.11053182929754257, "learning_rate": 0.002, "loss": 2.3168, "step": 368200 }, { "epoch": 1.4233968857756953, "grad_norm": 0.08805803209543228, "learning_rate": 0.002, "loss": 2.3372, "step": 368210 }, { "epoch": 1.4234355429790786, "grad_norm": 0.11464353650808334, "learning_rate": 0.002, "loss": 2.309, "step": 368220 }, { "epoch": 1.423474200182462, "grad_norm": 0.10245181620121002, "learning_rate": 0.002, "loss": 2.3404, "step": 368230 }, { "epoch": 1.4235128573858453, "grad_norm": 0.09253382682800293, "learning_rate": 0.002, "loss": 2.3305, "step": 368240 }, { "epoch": 1.4235515145892286, "grad_norm": 0.08895239233970642, "learning_rate": 0.002, "loss": 2.3307, "step": 368250 }, { "epoch": 1.4235901717926118, "grad_norm": 0.12682317197322845, "learning_rate": 0.002, "loss": 2.3273, "step": 368260 }, { "epoch": 1.423628828995995, "grad_norm": 0.10514622181653976, "learning_rate": 0.002, "loss": 2.3362, "step": 368270 }, { "epoch": 1.4236674861993783, "grad_norm": 0.10735808312892914, "learning_rate": 0.002, "loss": 2.3429, "step": 368280 }, { "epoch": 1.4237061434027618, "grad_norm": 0.09588049352169037, "learning_rate": 0.002, "loss": 2.3246, "step": 368290 }, { "epoch": 1.423744800606145, "grad_norm": 0.10734907537698746, "learning_rate": 0.002, "loss": 2.3205, "step": 368300 }, { "epoch": 1.4237834578095283, "grad_norm": 0.09214717894792557, "learning_rate": 0.002, "loss": 2.3393, "step": 368310 }, { "epoch": 1.4238221150129116, "grad_norm": 0.08925966918468475, "learning_rate": 0.002, "loss": 2.3354, "step": 368320 }, { "epoch": 1.4238607722162948, "grad_norm": 0.11620984971523285, "learning_rate": 0.002, "loss": 2.3174, "step": 368330 }, { "epoch": 1.423899429419678, "grad_norm": 0.13195545971393585, "learning_rate": 0.002, "loss": 2.3223, "step": 368340 }, { "epoch": 1.4239380866230613, "grad_norm": 0.10492658615112305, "learning_rate": 0.002, "loss": 2.3372, "step": 368350 }, { "epoch": 1.4239767438264446, "grad_norm": 0.1065715104341507, "learning_rate": 0.002, "loss": 2.3364, "step": 368360 }, { "epoch": 1.4240154010298278, "grad_norm": 0.09849032759666443, "learning_rate": 0.002, "loss": 2.3481, "step": 368370 }, { "epoch": 1.424054058233211, "grad_norm": 0.10577639192342758, "learning_rate": 0.002, "loss": 2.3178, "step": 368380 }, { "epoch": 1.4240927154365943, "grad_norm": 0.12574061751365662, "learning_rate": 0.002, "loss": 2.3265, "step": 368390 }, { "epoch": 1.4241313726399778, "grad_norm": 0.11132027953863144, "learning_rate": 0.002, "loss": 2.3451, "step": 368400 }, { "epoch": 1.424170029843361, "grad_norm": 0.10425229370594025, "learning_rate": 0.002, "loss": 2.3323, "step": 368410 }, { "epoch": 1.4242086870467443, "grad_norm": 0.09737608581781387, "learning_rate": 0.002, "loss": 2.3429, "step": 368420 }, { "epoch": 1.4242473442501276, "grad_norm": 0.11113197356462479, "learning_rate": 0.002, "loss": 2.3281, "step": 368430 }, { "epoch": 1.4242860014535108, "grad_norm": 0.10532024502754211, "learning_rate": 0.002, "loss": 2.3284, "step": 368440 }, { "epoch": 1.424324658656894, "grad_norm": 0.09628026187419891, "learning_rate": 0.002, "loss": 2.3298, "step": 368450 }, { "epoch": 1.4243633158602775, "grad_norm": 0.1088952049612999, "learning_rate": 0.002, "loss": 2.3394, "step": 368460 }, { "epoch": 1.4244019730636608, "grad_norm": 0.12860263884067535, "learning_rate": 0.002, "loss": 2.3451, "step": 368470 }, { "epoch": 1.424440630267044, "grad_norm": 0.10668892413377762, "learning_rate": 0.002, "loss": 2.3293, "step": 368480 }, { "epoch": 1.4244792874704273, "grad_norm": 0.1027596965432167, "learning_rate": 0.002, "loss": 2.323, "step": 368490 }, { "epoch": 1.4245179446738105, "grad_norm": 0.12930242717266083, "learning_rate": 0.002, "loss": 2.3291, "step": 368500 }, { "epoch": 1.4245566018771938, "grad_norm": 0.10408979654312134, "learning_rate": 0.002, "loss": 2.3386, "step": 368510 }, { "epoch": 1.424595259080577, "grad_norm": 0.09692550450563431, "learning_rate": 0.002, "loss": 2.3466, "step": 368520 }, { "epoch": 1.4246339162839603, "grad_norm": 0.10290974378585815, "learning_rate": 0.002, "loss": 2.3312, "step": 368530 }, { "epoch": 1.4246725734873436, "grad_norm": 0.08995725214481354, "learning_rate": 0.002, "loss": 2.3483, "step": 368540 }, { "epoch": 1.4247112306907268, "grad_norm": 0.09780704975128174, "learning_rate": 0.002, "loss": 2.3225, "step": 368550 }, { "epoch": 1.42474988789411, "grad_norm": 0.10348577052354813, "learning_rate": 0.002, "loss": 2.3406, "step": 368560 }, { "epoch": 1.4247885450974935, "grad_norm": 0.10124948620796204, "learning_rate": 0.002, "loss": 2.3308, "step": 368570 }, { "epoch": 1.4248272023008768, "grad_norm": 0.11615990102291107, "learning_rate": 0.002, "loss": 2.3218, "step": 368580 }, { "epoch": 1.42486585950426, "grad_norm": 0.11272797733545303, "learning_rate": 0.002, "loss": 2.32, "step": 368590 }, { "epoch": 1.4249045167076433, "grad_norm": 0.09842094779014587, "learning_rate": 0.002, "loss": 2.3447, "step": 368600 }, { "epoch": 1.4249431739110265, "grad_norm": 0.11287661641836166, "learning_rate": 0.002, "loss": 2.3403, "step": 368610 }, { "epoch": 1.4249818311144098, "grad_norm": 0.11041462421417236, "learning_rate": 0.002, "loss": 2.3466, "step": 368620 }, { "epoch": 1.4250204883177933, "grad_norm": 0.1251877099275589, "learning_rate": 0.002, "loss": 2.3365, "step": 368630 }, { "epoch": 1.4250591455211765, "grad_norm": 0.0997016504406929, "learning_rate": 0.002, "loss": 2.3239, "step": 368640 }, { "epoch": 1.4250978027245598, "grad_norm": 0.16072382032871246, "learning_rate": 0.002, "loss": 2.3373, "step": 368650 }, { "epoch": 1.425136459927943, "grad_norm": 0.09736277163028717, "learning_rate": 0.002, "loss": 2.3119, "step": 368660 }, { "epoch": 1.4251751171313263, "grad_norm": 0.10369211435317993, "learning_rate": 0.002, "loss": 2.3184, "step": 368670 }, { "epoch": 1.4252137743347095, "grad_norm": 0.09280867129564285, "learning_rate": 0.002, "loss": 2.344, "step": 368680 }, { "epoch": 1.4252524315380928, "grad_norm": 0.09761892259120941, "learning_rate": 0.002, "loss": 2.3269, "step": 368690 }, { "epoch": 1.425291088741476, "grad_norm": 0.13509036600589752, "learning_rate": 0.002, "loss": 2.347, "step": 368700 }, { "epoch": 1.4253297459448593, "grad_norm": 0.09694381058216095, "learning_rate": 0.002, "loss": 2.3261, "step": 368710 }, { "epoch": 1.4253684031482425, "grad_norm": 0.11534082889556885, "learning_rate": 0.002, "loss": 2.3197, "step": 368720 }, { "epoch": 1.4254070603516258, "grad_norm": 0.09117257595062256, "learning_rate": 0.002, "loss": 2.3168, "step": 368730 }, { "epoch": 1.4254457175550093, "grad_norm": 0.10641550272703171, "learning_rate": 0.002, "loss": 2.3452, "step": 368740 }, { "epoch": 1.4254843747583925, "grad_norm": 0.13109582662582397, "learning_rate": 0.002, "loss": 2.3316, "step": 368750 }, { "epoch": 1.4255230319617758, "grad_norm": 0.09147099405527115, "learning_rate": 0.002, "loss": 2.3435, "step": 368760 }, { "epoch": 1.425561689165159, "grad_norm": 0.12726427614688873, "learning_rate": 0.002, "loss": 2.3243, "step": 368770 }, { "epoch": 1.4256003463685423, "grad_norm": 0.10484074056148529, "learning_rate": 0.002, "loss": 2.3282, "step": 368780 }, { "epoch": 1.4256390035719255, "grad_norm": 0.09867366403341293, "learning_rate": 0.002, "loss": 2.3391, "step": 368790 }, { "epoch": 1.425677660775309, "grad_norm": 0.10545188188552856, "learning_rate": 0.002, "loss": 2.3242, "step": 368800 }, { "epoch": 1.4257163179786922, "grad_norm": 0.11329960078001022, "learning_rate": 0.002, "loss": 2.3237, "step": 368810 }, { "epoch": 1.4257549751820755, "grad_norm": 0.1431681513786316, "learning_rate": 0.002, "loss": 2.3254, "step": 368820 }, { "epoch": 1.4257936323854588, "grad_norm": 0.4852242171764374, "learning_rate": 0.002, "loss": 2.3297, "step": 368830 }, { "epoch": 1.425832289588842, "grad_norm": 0.10883507877588272, "learning_rate": 0.002, "loss": 2.3389, "step": 368840 }, { "epoch": 1.4258709467922253, "grad_norm": 0.08958674222230911, "learning_rate": 0.002, "loss": 2.3361, "step": 368850 }, { "epoch": 1.4259096039956085, "grad_norm": 0.11914104223251343, "learning_rate": 0.002, "loss": 2.3358, "step": 368860 }, { "epoch": 1.4259482611989918, "grad_norm": 0.08615414053201675, "learning_rate": 0.002, "loss": 2.322, "step": 368870 }, { "epoch": 1.425986918402375, "grad_norm": 0.09670282155275345, "learning_rate": 0.002, "loss": 2.3271, "step": 368880 }, { "epoch": 1.4260255756057583, "grad_norm": 0.0999981239438057, "learning_rate": 0.002, "loss": 2.3404, "step": 368890 }, { "epoch": 1.4260642328091415, "grad_norm": 0.09854158759117126, "learning_rate": 0.002, "loss": 2.3445, "step": 368900 }, { "epoch": 1.426102890012525, "grad_norm": 0.11261072754859924, "learning_rate": 0.002, "loss": 2.3327, "step": 368910 }, { "epoch": 1.4261415472159082, "grad_norm": 0.09439534693956375, "learning_rate": 0.002, "loss": 2.332, "step": 368920 }, { "epoch": 1.4261802044192915, "grad_norm": 0.10984434932470322, "learning_rate": 0.002, "loss": 2.3412, "step": 368930 }, { "epoch": 1.4262188616226747, "grad_norm": 0.1016111746430397, "learning_rate": 0.002, "loss": 2.3498, "step": 368940 }, { "epoch": 1.426257518826058, "grad_norm": 0.0948866456747055, "learning_rate": 0.002, "loss": 2.3409, "step": 368950 }, { "epoch": 1.4262961760294413, "grad_norm": 0.08846337348222733, "learning_rate": 0.002, "loss": 2.3272, "step": 368960 }, { "epoch": 1.4263348332328247, "grad_norm": 0.11411886662244797, "learning_rate": 0.002, "loss": 2.3452, "step": 368970 }, { "epoch": 1.426373490436208, "grad_norm": 0.09665101021528244, "learning_rate": 0.002, "loss": 2.3242, "step": 368980 }, { "epoch": 1.4264121476395912, "grad_norm": 0.10688138008117676, "learning_rate": 0.002, "loss": 2.3388, "step": 368990 }, { "epoch": 1.4264508048429745, "grad_norm": 0.11031637340784073, "learning_rate": 0.002, "loss": 2.3469, "step": 369000 }, { "epoch": 1.4264894620463577, "grad_norm": 0.14109115302562714, "learning_rate": 0.002, "loss": 2.3363, "step": 369010 }, { "epoch": 1.426528119249741, "grad_norm": 0.11171339452266693, "learning_rate": 0.002, "loss": 2.3209, "step": 369020 }, { "epoch": 1.4265667764531242, "grad_norm": 0.12456446141004562, "learning_rate": 0.002, "loss": 2.3341, "step": 369030 }, { "epoch": 1.4266054336565075, "grad_norm": 0.10644245147705078, "learning_rate": 0.002, "loss": 2.3277, "step": 369040 }, { "epoch": 1.4266440908598907, "grad_norm": 0.11632377654314041, "learning_rate": 0.002, "loss": 2.3263, "step": 369050 }, { "epoch": 1.426682748063274, "grad_norm": 0.09376497566699982, "learning_rate": 0.002, "loss": 2.3332, "step": 369060 }, { "epoch": 1.4267214052666575, "grad_norm": 0.1096901223063469, "learning_rate": 0.002, "loss": 2.3472, "step": 369070 }, { "epoch": 1.4267600624700407, "grad_norm": 0.09440205991268158, "learning_rate": 0.002, "loss": 2.3257, "step": 369080 }, { "epoch": 1.426798719673424, "grad_norm": 0.10393696278333664, "learning_rate": 0.002, "loss": 2.3336, "step": 369090 }, { "epoch": 1.4268373768768072, "grad_norm": 0.0952950194478035, "learning_rate": 0.002, "loss": 2.3179, "step": 369100 }, { "epoch": 1.4268760340801905, "grad_norm": 0.09243932366371155, "learning_rate": 0.002, "loss": 2.3195, "step": 369110 }, { "epoch": 1.4269146912835737, "grad_norm": 0.10410625487565994, "learning_rate": 0.002, "loss": 2.3262, "step": 369120 }, { "epoch": 1.426953348486957, "grad_norm": 0.09936773777008057, "learning_rate": 0.002, "loss": 2.3361, "step": 369130 }, { "epoch": 1.4269920056903405, "grad_norm": 0.10040454566478729, "learning_rate": 0.002, "loss": 2.344, "step": 369140 }, { "epoch": 1.4270306628937237, "grad_norm": 0.11334496736526489, "learning_rate": 0.002, "loss": 2.3488, "step": 369150 }, { "epoch": 1.427069320097107, "grad_norm": 0.09408103674650192, "learning_rate": 0.002, "loss": 2.3282, "step": 369160 }, { "epoch": 1.4271079773004902, "grad_norm": 0.09497067332267761, "learning_rate": 0.002, "loss": 2.3296, "step": 369170 }, { "epoch": 1.4271466345038735, "grad_norm": 0.10084699094295502, "learning_rate": 0.002, "loss": 2.3189, "step": 369180 }, { "epoch": 1.4271852917072567, "grad_norm": 0.1148439347743988, "learning_rate": 0.002, "loss": 2.3391, "step": 369190 }, { "epoch": 1.42722394891064, "grad_norm": 0.11306999623775482, "learning_rate": 0.002, "loss": 2.3263, "step": 369200 }, { "epoch": 1.4272626061140232, "grad_norm": 0.10049940645694733, "learning_rate": 0.002, "loss": 2.3301, "step": 369210 }, { "epoch": 1.4273012633174065, "grad_norm": 0.10067114233970642, "learning_rate": 0.002, "loss": 2.3304, "step": 369220 }, { "epoch": 1.4273399205207897, "grad_norm": 0.10959344357252121, "learning_rate": 0.002, "loss": 2.3324, "step": 369230 }, { "epoch": 1.4273785777241732, "grad_norm": 0.12859250605106354, "learning_rate": 0.002, "loss": 2.3222, "step": 369240 }, { "epoch": 1.4274172349275565, "grad_norm": 0.099395252764225, "learning_rate": 0.002, "loss": 2.3535, "step": 369250 }, { "epoch": 1.4274558921309397, "grad_norm": 0.09150033444166183, "learning_rate": 0.002, "loss": 2.3327, "step": 369260 }, { "epoch": 1.427494549334323, "grad_norm": 0.11286135017871857, "learning_rate": 0.002, "loss": 2.3309, "step": 369270 }, { "epoch": 1.4275332065377062, "grad_norm": 0.11032780259847641, "learning_rate": 0.002, "loss": 2.3321, "step": 369280 }, { "epoch": 1.4275718637410895, "grad_norm": 0.0984359011054039, "learning_rate": 0.002, "loss": 2.3246, "step": 369290 }, { "epoch": 1.4276105209444727, "grad_norm": 0.10086100548505783, "learning_rate": 0.002, "loss": 2.3293, "step": 369300 }, { "epoch": 1.4276491781478562, "grad_norm": 0.09905387461185455, "learning_rate": 0.002, "loss": 2.3313, "step": 369310 }, { "epoch": 1.4276878353512394, "grad_norm": 0.09202684462070465, "learning_rate": 0.002, "loss": 2.3303, "step": 369320 }, { "epoch": 1.4277264925546227, "grad_norm": 0.11295720189809799, "learning_rate": 0.002, "loss": 2.3358, "step": 369330 }, { "epoch": 1.427765149758006, "grad_norm": 0.13472798466682434, "learning_rate": 0.002, "loss": 2.3171, "step": 369340 }, { "epoch": 1.4278038069613892, "grad_norm": 0.1097302958369255, "learning_rate": 0.002, "loss": 2.3346, "step": 369350 }, { "epoch": 1.4278424641647725, "grad_norm": 0.1095571219921112, "learning_rate": 0.002, "loss": 2.3361, "step": 369360 }, { "epoch": 1.4278811213681557, "grad_norm": 0.09615956991910934, "learning_rate": 0.002, "loss": 2.3211, "step": 369370 }, { "epoch": 1.427919778571539, "grad_norm": 0.20836985111236572, "learning_rate": 0.002, "loss": 2.3153, "step": 369380 }, { "epoch": 1.4279584357749222, "grad_norm": 0.09848025441169739, "learning_rate": 0.002, "loss": 2.3212, "step": 369390 }, { "epoch": 1.4279970929783055, "grad_norm": 0.09631534665822983, "learning_rate": 0.002, "loss": 2.3255, "step": 369400 }, { "epoch": 1.428035750181689, "grad_norm": 0.12165029346942902, "learning_rate": 0.002, "loss": 2.324, "step": 369410 }, { "epoch": 1.4280744073850722, "grad_norm": 0.11290930956602097, "learning_rate": 0.002, "loss": 2.3275, "step": 369420 }, { "epoch": 1.4281130645884554, "grad_norm": 0.10189209133386612, "learning_rate": 0.002, "loss": 2.3423, "step": 369430 }, { "epoch": 1.4281517217918387, "grad_norm": 0.1195901483297348, "learning_rate": 0.002, "loss": 2.3473, "step": 369440 }, { "epoch": 1.428190378995222, "grad_norm": 0.1144580990076065, "learning_rate": 0.002, "loss": 2.3358, "step": 369450 }, { "epoch": 1.4282290361986052, "grad_norm": 0.10400616377592087, "learning_rate": 0.002, "loss": 2.3267, "step": 369460 }, { "epoch": 1.4282676934019884, "grad_norm": 0.09329728782176971, "learning_rate": 0.002, "loss": 2.3381, "step": 369470 }, { "epoch": 1.428306350605372, "grad_norm": 0.09955257922410965, "learning_rate": 0.002, "loss": 2.3454, "step": 369480 }, { "epoch": 1.4283450078087552, "grad_norm": 0.11704539507627487, "learning_rate": 0.002, "loss": 2.3359, "step": 369490 }, { "epoch": 1.4283836650121384, "grad_norm": 0.1107201874256134, "learning_rate": 0.002, "loss": 2.3384, "step": 369500 }, { "epoch": 1.4284223222155217, "grad_norm": 0.09703484922647476, "learning_rate": 0.002, "loss": 2.3372, "step": 369510 }, { "epoch": 1.428460979418905, "grad_norm": 0.11836137622594833, "learning_rate": 0.002, "loss": 2.3231, "step": 369520 }, { "epoch": 1.4284996366222882, "grad_norm": 0.10974926501512527, "learning_rate": 0.002, "loss": 2.3283, "step": 369530 }, { "epoch": 1.4285382938256714, "grad_norm": 0.12903998792171478, "learning_rate": 0.002, "loss": 2.3228, "step": 369540 }, { "epoch": 1.4285769510290547, "grad_norm": 0.09003593027591705, "learning_rate": 0.002, "loss": 2.3368, "step": 369550 }, { "epoch": 1.428615608232438, "grad_norm": 0.11508678644895554, "learning_rate": 0.002, "loss": 2.3415, "step": 369560 }, { "epoch": 1.4286542654358212, "grad_norm": 0.11840968579053879, "learning_rate": 0.002, "loss": 2.3418, "step": 369570 }, { "epoch": 1.4286929226392047, "grad_norm": 0.10051307082176208, "learning_rate": 0.002, "loss": 2.3251, "step": 369580 }, { "epoch": 1.428731579842588, "grad_norm": 0.1256880760192871, "learning_rate": 0.002, "loss": 2.3483, "step": 369590 }, { "epoch": 1.4287702370459712, "grad_norm": 0.11354075372219086, "learning_rate": 0.002, "loss": 2.35, "step": 369600 }, { "epoch": 1.4288088942493544, "grad_norm": 0.11274783313274384, "learning_rate": 0.002, "loss": 2.3343, "step": 369610 }, { "epoch": 1.4288475514527377, "grad_norm": 0.10234702378511429, "learning_rate": 0.002, "loss": 2.3379, "step": 369620 }, { "epoch": 1.428886208656121, "grad_norm": 0.12185943126678467, "learning_rate": 0.002, "loss": 2.3444, "step": 369630 }, { "epoch": 1.4289248658595042, "grad_norm": 0.12358567118644714, "learning_rate": 0.002, "loss": 2.3332, "step": 369640 }, { "epoch": 1.4289635230628877, "grad_norm": 0.10948533564805984, "learning_rate": 0.002, "loss": 2.3316, "step": 369650 }, { "epoch": 1.429002180266271, "grad_norm": 0.09104648977518082, "learning_rate": 0.002, "loss": 2.3296, "step": 369660 }, { "epoch": 1.4290408374696542, "grad_norm": 0.10487092286348343, "learning_rate": 0.002, "loss": 2.3277, "step": 369670 }, { "epoch": 1.4290794946730374, "grad_norm": 0.10738363116979599, "learning_rate": 0.002, "loss": 2.3316, "step": 369680 }, { "epoch": 1.4291181518764207, "grad_norm": 0.1259232610464096, "learning_rate": 0.002, "loss": 2.3485, "step": 369690 }, { "epoch": 1.429156809079804, "grad_norm": 0.09970113635063171, "learning_rate": 0.002, "loss": 2.3399, "step": 369700 }, { "epoch": 1.4291954662831872, "grad_norm": 0.10582879930734634, "learning_rate": 0.002, "loss": 2.3348, "step": 369710 }, { "epoch": 1.4292341234865704, "grad_norm": 0.10421253740787506, "learning_rate": 0.002, "loss": 2.3233, "step": 369720 }, { "epoch": 1.4292727806899537, "grad_norm": 0.10977336764335632, "learning_rate": 0.002, "loss": 2.3391, "step": 369730 }, { "epoch": 1.429311437893337, "grad_norm": 0.12770582735538483, "learning_rate": 0.002, "loss": 2.3253, "step": 369740 }, { "epoch": 1.4293500950967204, "grad_norm": 0.09583185613155365, "learning_rate": 0.002, "loss": 2.3406, "step": 369750 }, { "epoch": 1.4293887523001036, "grad_norm": 0.11037515103816986, "learning_rate": 0.002, "loss": 2.3304, "step": 369760 }, { "epoch": 1.429427409503487, "grad_norm": 0.1029893308877945, "learning_rate": 0.002, "loss": 2.3427, "step": 369770 }, { "epoch": 1.4294660667068702, "grad_norm": 0.10304558277130127, "learning_rate": 0.002, "loss": 2.3296, "step": 369780 }, { "epoch": 1.4295047239102534, "grad_norm": 0.12139219045639038, "learning_rate": 0.002, "loss": 2.3371, "step": 369790 }, { "epoch": 1.4295433811136367, "grad_norm": 0.10977718979120255, "learning_rate": 0.002, "loss": 2.3292, "step": 369800 }, { "epoch": 1.4295820383170201, "grad_norm": 0.10141117870807648, "learning_rate": 0.002, "loss": 2.3252, "step": 369810 }, { "epoch": 1.4296206955204034, "grad_norm": 0.10547979921102524, "learning_rate": 0.002, "loss": 2.3404, "step": 369820 }, { "epoch": 1.4296593527237866, "grad_norm": 0.0912381038069725, "learning_rate": 0.002, "loss": 2.32, "step": 369830 }, { "epoch": 1.4296980099271699, "grad_norm": 0.09478603303432465, "learning_rate": 0.002, "loss": 2.3329, "step": 369840 }, { "epoch": 1.4297366671305531, "grad_norm": 0.10601737350225449, "learning_rate": 0.002, "loss": 2.3401, "step": 369850 }, { "epoch": 1.4297753243339364, "grad_norm": 0.11179604381322861, "learning_rate": 0.002, "loss": 2.3353, "step": 369860 }, { "epoch": 1.4298139815373196, "grad_norm": 0.10227113217115402, "learning_rate": 0.002, "loss": 2.3323, "step": 369870 }, { "epoch": 1.429852638740703, "grad_norm": 0.0985608771443367, "learning_rate": 0.002, "loss": 2.3215, "step": 369880 }, { "epoch": 1.4298912959440861, "grad_norm": 0.11969029903411865, "learning_rate": 0.002, "loss": 2.3113, "step": 369890 }, { "epoch": 1.4299299531474694, "grad_norm": 0.10263413190841675, "learning_rate": 0.002, "loss": 2.3292, "step": 369900 }, { "epoch": 1.4299686103508527, "grad_norm": 0.12281263619661331, "learning_rate": 0.002, "loss": 2.3311, "step": 369910 }, { "epoch": 1.4300072675542361, "grad_norm": 0.10118019580841064, "learning_rate": 0.002, "loss": 2.3248, "step": 369920 }, { "epoch": 1.4300459247576194, "grad_norm": 0.0986642837524414, "learning_rate": 0.002, "loss": 2.3297, "step": 369930 }, { "epoch": 1.4300845819610026, "grad_norm": 0.10727502405643463, "learning_rate": 0.002, "loss": 2.3269, "step": 369940 }, { "epoch": 1.4301232391643859, "grad_norm": 0.09434787184000015, "learning_rate": 0.002, "loss": 2.334, "step": 369950 }, { "epoch": 1.4301618963677691, "grad_norm": 0.103243887424469, "learning_rate": 0.002, "loss": 2.3292, "step": 369960 }, { "epoch": 1.4302005535711524, "grad_norm": 0.11206620931625366, "learning_rate": 0.002, "loss": 2.3472, "step": 369970 }, { "epoch": 1.4302392107745359, "grad_norm": 0.11155019700527191, "learning_rate": 0.002, "loss": 2.3323, "step": 369980 }, { "epoch": 1.4302778679779191, "grad_norm": 0.10238495469093323, "learning_rate": 0.002, "loss": 2.3304, "step": 369990 }, { "epoch": 1.4303165251813024, "grad_norm": 0.11440207809209824, "learning_rate": 0.002, "loss": 2.3438, "step": 370000 }, { "epoch": 1.4303551823846856, "grad_norm": 0.10964350402355194, "learning_rate": 0.002, "loss": 2.3365, "step": 370010 }, { "epoch": 1.4303938395880689, "grad_norm": 0.09666993468999863, "learning_rate": 0.002, "loss": 2.3196, "step": 370020 }, { "epoch": 1.4304324967914521, "grad_norm": 0.10145600140094757, "learning_rate": 0.002, "loss": 2.3368, "step": 370030 }, { "epoch": 1.4304711539948354, "grad_norm": 0.12468435615301132, "learning_rate": 0.002, "loss": 2.3439, "step": 370040 }, { "epoch": 1.4305098111982186, "grad_norm": 0.10010754317045212, "learning_rate": 0.002, "loss": 2.3292, "step": 370050 }, { "epoch": 1.4305484684016019, "grad_norm": 0.0995609387755394, "learning_rate": 0.002, "loss": 2.3257, "step": 370060 }, { "epoch": 1.4305871256049851, "grad_norm": 0.0948537290096283, "learning_rate": 0.002, "loss": 2.3093, "step": 370070 }, { "epoch": 1.4306257828083684, "grad_norm": 0.1278780996799469, "learning_rate": 0.002, "loss": 2.3506, "step": 370080 }, { "epoch": 1.4306644400117519, "grad_norm": 0.09928832203149796, "learning_rate": 0.002, "loss": 2.3223, "step": 370090 }, { "epoch": 1.4307030972151351, "grad_norm": 0.11928533017635345, "learning_rate": 0.002, "loss": 2.3268, "step": 370100 }, { "epoch": 1.4307417544185184, "grad_norm": 0.13536189496517181, "learning_rate": 0.002, "loss": 2.3446, "step": 370110 }, { "epoch": 1.4307804116219016, "grad_norm": 0.10143552720546722, "learning_rate": 0.002, "loss": 2.3389, "step": 370120 }, { "epoch": 1.4308190688252849, "grad_norm": 0.10408347100019455, "learning_rate": 0.002, "loss": 2.3269, "step": 370130 }, { "epoch": 1.4308577260286681, "grad_norm": 0.09588273614645004, "learning_rate": 0.002, "loss": 2.3324, "step": 370140 }, { "epoch": 1.4308963832320516, "grad_norm": 0.09177899360656738, "learning_rate": 0.002, "loss": 2.3305, "step": 370150 }, { "epoch": 1.4309350404354348, "grad_norm": 0.11381170153617859, "learning_rate": 0.002, "loss": 2.3408, "step": 370160 }, { "epoch": 1.430973697638818, "grad_norm": 0.09797380119562149, "learning_rate": 0.002, "loss": 2.3411, "step": 370170 }, { "epoch": 1.4310123548422014, "grad_norm": 0.11212203651666641, "learning_rate": 0.002, "loss": 2.333, "step": 370180 }, { "epoch": 1.4310510120455846, "grad_norm": 0.10767749696969986, "learning_rate": 0.002, "loss": 2.3211, "step": 370190 }, { "epoch": 1.4310896692489679, "grad_norm": 0.09107781946659088, "learning_rate": 0.002, "loss": 2.3274, "step": 370200 }, { "epoch": 1.431128326452351, "grad_norm": 0.10297054052352905, "learning_rate": 0.002, "loss": 2.3232, "step": 370210 }, { "epoch": 1.4311669836557344, "grad_norm": 0.09875304996967316, "learning_rate": 0.002, "loss": 2.3323, "step": 370220 }, { "epoch": 1.4312056408591176, "grad_norm": 0.0951622724533081, "learning_rate": 0.002, "loss": 2.3404, "step": 370230 }, { "epoch": 1.4312442980625009, "grad_norm": 0.10550680011510849, "learning_rate": 0.002, "loss": 2.3385, "step": 370240 }, { "epoch": 1.4312829552658841, "grad_norm": 0.10152022540569305, "learning_rate": 0.002, "loss": 2.328, "step": 370250 }, { "epoch": 1.4313216124692676, "grad_norm": 0.09326834976673126, "learning_rate": 0.002, "loss": 2.3377, "step": 370260 }, { "epoch": 1.4313602696726508, "grad_norm": 0.10848992317914963, "learning_rate": 0.002, "loss": 2.3288, "step": 370270 }, { "epoch": 1.431398926876034, "grad_norm": 0.09691794961690903, "learning_rate": 0.002, "loss": 2.3606, "step": 370280 }, { "epoch": 1.4314375840794173, "grad_norm": 0.12346416711807251, "learning_rate": 0.002, "loss": 2.3291, "step": 370290 }, { "epoch": 1.4314762412828006, "grad_norm": 0.10652803629636765, "learning_rate": 0.002, "loss": 2.3496, "step": 370300 }, { "epoch": 1.4315148984861839, "grad_norm": 0.13868898153305054, "learning_rate": 0.002, "loss": 2.3231, "step": 370310 }, { "epoch": 1.4315535556895673, "grad_norm": 0.10139944404363632, "learning_rate": 0.002, "loss": 2.3264, "step": 370320 }, { "epoch": 1.4315922128929506, "grad_norm": 0.10313576459884644, "learning_rate": 0.002, "loss": 2.3426, "step": 370330 }, { "epoch": 1.4316308700963338, "grad_norm": 0.09152504056692123, "learning_rate": 0.002, "loss": 2.317, "step": 370340 }, { "epoch": 1.431669527299717, "grad_norm": 0.10575311630964279, "learning_rate": 0.002, "loss": 2.3391, "step": 370350 }, { "epoch": 1.4317081845031003, "grad_norm": 0.14318452775478363, "learning_rate": 0.002, "loss": 2.3358, "step": 370360 }, { "epoch": 1.4317468417064836, "grad_norm": 0.11087655276060104, "learning_rate": 0.002, "loss": 2.3458, "step": 370370 }, { "epoch": 1.4317854989098668, "grad_norm": 0.10831261426210403, "learning_rate": 0.002, "loss": 2.3384, "step": 370380 }, { "epoch": 1.43182415611325, "grad_norm": 0.1127522811293602, "learning_rate": 0.002, "loss": 2.3397, "step": 370390 }, { "epoch": 1.4318628133166333, "grad_norm": 0.09608136862516403, "learning_rate": 0.002, "loss": 2.3346, "step": 370400 }, { "epoch": 1.4319014705200166, "grad_norm": 0.11116312444210052, "learning_rate": 0.002, "loss": 2.3238, "step": 370410 }, { "epoch": 1.4319401277233998, "grad_norm": 0.10763781517744064, "learning_rate": 0.002, "loss": 2.3444, "step": 370420 }, { "epoch": 1.4319787849267833, "grad_norm": 0.1139378771185875, "learning_rate": 0.002, "loss": 2.3324, "step": 370430 }, { "epoch": 1.4320174421301666, "grad_norm": 0.11814486235380173, "learning_rate": 0.002, "loss": 2.3305, "step": 370440 }, { "epoch": 1.4320560993335498, "grad_norm": 0.11366909742355347, "learning_rate": 0.002, "loss": 2.328, "step": 370450 }, { "epoch": 1.432094756536933, "grad_norm": 0.10304597020149231, "learning_rate": 0.002, "loss": 2.3376, "step": 370460 }, { "epoch": 1.4321334137403163, "grad_norm": 0.09563199430704117, "learning_rate": 0.002, "loss": 2.3244, "step": 370470 }, { "epoch": 1.4321720709436996, "grad_norm": 0.09992068260908127, "learning_rate": 0.002, "loss": 2.344, "step": 370480 }, { "epoch": 1.432210728147083, "grad_norm": 0.11581120640039444, "learning_rate": 0.002, "loss": 2.3242, "step": 370490 }, { "epoch": 1.4322493853504663, "grad_norm": 0.09946183860301971, "learning_rate": 0.002, "loss": 2.3317, "step": 370500 }, { "epoch": 1.4322880425538496, "grad_norm": 0.10362624377012253, "learning_rate": 0.002, "loss": 2.3338, "step": 370510 }, { "epoch": 1.4323266997572328, "grad_norm": 0.0960688441991806, "learning_rate": 0.002, "loss": 2.3299, "step": 370520 }, { "epoch": 1.432365356960616, "grad_norm": 0.11272174119949341, "learning_rate": 0.002, "loss": 2.3241, "step": 370530 }, { "epoch": 1.4324040141639993, "grad_norm": 0.09724114090204239, "learning_rate": 0.002, "loss": 2.3303, "step": 370540 }, { "epoch": 1.4324426713673826, "grad_norm": 0.08760736137628555, "learning_rate": 0.002, "loss": 2.3381, "step": 370550 }, { "epoch": 1.4324813285707658, "grad_norm": 0.13241741061210632, "learning_rate": 0.002, "loss": 2.3424, "step": 370560 }, { "epoch": 1.432519985774149, "grad_norm": 0.11077427119016647, "learning_rate": 0.002, "loss": 2.3247, "step": 370570 }, { "epoch": 1.4325586429775323, "grad_norm": 0.09922739863395691, "learning_rate": 0.002, "loss": 2.3307, "step": 370580 }, { "epoch": 1.4325973001809156, "grad_norm": 0.11597199738025665, "learning_rate": 0.002, "loss": 2.3262, "step": 370590 }, { "epoch": 1.432635957384299, "grad_norm": 0.11011666059494019, "learning_rate": 0.002, "loss": 2.3338, "step": 370600 }, { "epoch": 1.4326746145876823, "grad_norm": 0.12466747313737869, "learning_rate": 0.002, "loss": 2.3221, "step": 370610 }, { "epoch": 1.4327132717910656, "grad_norm": 0.10244567692279816, "learning_rate": 0.002, "loss": 2.3354, "step": 370620 }, { "epoch": 1.4327519289944488, "grad_norm": 0.09919389337301254, "learning_rate": 0.002, "loss": 2.3407, "step": 370630 }, { "epoch": 1.432790586197832, "grad_norm": 0.11798831075429916, "learning_rate": 0.002, "loss": 2.3239, "step": 370640 }, { "epoch": 1.4328292434012153, "grad_norm": 0.12733817100524902, "learning_rate": 0.002, "loss": 2.3358, "step": 370650 }, { "epoch": 1.4328679006045988, "grad_norm": 0.09250468760728836, "learning_rate": 0.002, "loss": 2.3419, "step": 370660 }, { "epoch": 1.432906557807982, "grad_norm": 0.13374805450439453, "learning_rate": 0.002, "loss": 2.3394, "step": 370670 }, { "epoch": 1.4329452150113653, "grad_norm": 0.10336792469024658, "learning_rate": 0.002, "loss": 2.3239, "step": 370680 }, { "epoch": 1.4329838722147485, "grad_norm": 0.10279474407434464, "learning_rate": 0.002, "loss": 2.3289, "step": 370690 }, { "epoch": 1.4330225294181318, "grad_norm": 0.09461446106433868, "learning_rate": 0.002, "loss": 2.3316, "step": 370700 }, { "epoch": 1.433061186621515, "grad_norm": 0.14921467006206512, "learning_rate": 0.002, "loss": 2.3483, "step": 370710 }, { "epoch": 1.4330998438248983, "grad_norm": 0.16347302496433258, "learning_rate": 0.002, "loss": 2.3416, "step": 370720 }, { "epoch": 1.4331385010282816, "grad_norm": 0.11512413620948792, "learning_rate": 0.002, "loss": 2.3478, "step": 370730 }, { "epoch": 1.4331771582316648, "grad_norm": 0.08879565447568893, "learning_rate": 0.002, "loss": 2.3443, "step": 370740 }, { "epoch": 1.433215815435048, "grad_norm": 0.10276377946138382, "learning_rate": 0.002, "loss": 2.3452, "step": 370750 }, { "epoch": 1.4332544726384313, "grad_norm": 0.09875697642564774, "learning_rate": 0.002, "loss": 2.3487, "step": 370760 }, { "epoch": 1.4332931298418148, "grad_norm": 0.1068931594491005, "learning_rate": 0.002, "loss": 2.3206, "step": 370770 }, { "epoch": 1.433331787045198, "grad_norm": 0.11076817661523819, "learning_rate": 0.002, "loss": 2.3244, "step": 370780 }, { "epoch": 1.4333704442485813, "grad_norm": 0.10693301260471344, "learning_rate": 0.002, "loss": 2.3284, "step": 370790 }, { "epoch": 1.4334091014519645, "grad_norm": 0.12842826545238495, "learning_rate": 0.002, "loss": 2.3307, "step": 370800 }, { "epoch": 1.4334477586553478, "grad_norm": 0.10350693762302399, "learning_rate": 0.002, "loss": 2.3146, "step": 370810 }, { "epoch": 1.433486415858731, "grad_norm": 0.09163505584001541, "learning_rate": 0.002, "loss": 2.3326, "step": 370820 }, { "epoch": 1.4335250730621145, "grad_norm": 0.11667435616254807, "learning_rate": 0.002, "loss": 2.3269, "step": 370830 }, { "epoch": 1.4335637302654978, "grad_norm": 0.10743537545204163, "learning_rate": 0.002, "loss": 2.3297, "step": 370840 }, { "epoch": 1.433602387468881, "grad_norm": 0.1471959948539734, "learning_rate": 0.002, "loss": 2.3454, "step": 370850 }, { "epoch": 1.4336410446722643, "grad_norm": 0.10187181085348129, "learning_rate": 0.002, "loss": 2.3344, "step": 370860 }, { "epoch": 1.4336797018756475, "grad_norm": 0.09852719306945801, "learning_rate": 0.002, "loss": 2.3245, "step": 370870 }, { "epoch": 1.4337183590790308, "grad_norm": 0.10457748174667358, "learning_rate": 0.002, "loss": 2.3216, "step": 370880 }, { "epoch": 1.433757016282414, "grad_norm": 0.12135262787342072, "learning_rate": 0.002, "loss": 2.3566, "step": 370890 }, { "epoch": 1.4337956734857973, "grad_norm": 0.12262275815010071, "learning_rate": 0.002, "loss": 2.3362, "step": 370900 }, { "epoch": 1.4338343306891805, "grad_norm": 0.10123652964830399, "learning_rate": 0.002, "loss": 2.3342, "step": 370910 }, { "epoch": 1.4338729878925638, "grad_norm": 0.10346807539463043, "learning_rate": 0.002, "loss": 2.3341, "step": 370920 }, { "epoch": 1.433911645095947, "grad_norm": 0.1011311262845993, "learning_rate": 0.002, "loss": 2.3338, "step": 370930 }, { "epoch": 1.4339503022993305, "grad_norm": 0.10298784077167511, "learning_rate": 0.002, "loss": 2.324, "step": 370940 }, { "epoch": 1.4339889595027138, "grad_norm": 0.10493668168783188, "learning_rate": 0.002, "loss": 2.3322, "step": 370950 }, { "epoch": 1.434027616706097, "grad_norm": 0.10941628366708755, "learning_rate": 0.002, "loss": 2.335, "step": 370960 }, { "epoch": 1.4340662739094803, "grad_norm": 0.1208387240767479, "learning_rate": 0.002, "loss": 2.3187, "step": 370970 }, { "epoch": 1.4341049311128635, "grad_norm": 0.10983309894800186, "learning_rate": 0.002, "loss": 2.3085, "step": 370980 }, { "epoch": 1.4341435883162468, "grad_norm": 0.10467513650655746, "learning_rate": 0.002, "loss": 2.3432, "step": 370990 }, { "epoch": 1.4341822455196302, "grad_norm": 0.10286015272140503, "learning_rate": 0.002, "loss": 2.3398, "step": 371000 }, { "epoch": 1.4342209027230135, "grad_norm": 0.09991083294153214, "learning_rate": 0.002, "loss": 2.3285, "step": 371010 }, { "epoch": 1.4342595599263968, "grad_norm": 0.09633886069059372, "learning_rate": 0.002, "loss": 2.3447, "step": 371020 }, { "epoch": 1.43429821712978, "grad_norm": 0.10873128473758698, "learning_rate": 0.002, "loss": 2.3215, "step": 371030 }, { "epoch": 1.4343368743331633, "grad_norm": 0.11154817789793015, "learning_rate": 0.002, "loss": 2.3358, "step": 371040 }, { "epoch": 1.4343755315365465, "grad_norm": 0.12193711847066879, "learning_rate": 0.002, "loss": 2.3279, "step": 371050 }, { "epoch": 1.4344141887399298, "grad_norm": 0.0954829752445221, "learning_rate": 0.002, "loss": 2.3397, "step": 371060 }, { "epoch": 1.434452845943313, "grad_norm": 0.1483481079339981, "learning_rate": 0.002, "loss": 2.3415, "step": 371070 }, { "epoch": 1.4344915031466963, "grad_norm": 0.1041557714343071, "learning_rate": 0.002, "loss": 2.3365, "step": 371080 }, { "epoch": 1.4345301603500795, "grad_norm": 0.10764243453741074, "learning_rate": 0.002, "loss": 2.3235, "step": 371090 }, { "epoch": 1.434568817553463, "grad_norm": 0.100117988884449, "learning_rate": 0.002, "loss": 2.3336, "step": 371100 }, { "epoch": 1.4346074747568462, "grad_norm": 0.09908339381217957, "learning_rate": 0.002, "loss": 2.3282, "step": 371110 }, { "epoch": 1.4346461319602295, "grad_norm": 0.11333918571472168, "learning_rate": 0.002, "loss": 2.3281, "step": 371120 }, { "epoch": 1.4346847891636128, "grad_norm": 0.10722298920154572, "learning_rate": 0.002, "loss": 2.3378, "step": 371130 }, { "epoch": 1.434723446366996, "grad_norm": 0.11224247515201569, "learning_rate": 0.002, "loss": 2.336, "step": 371140 }, { "epoch": 1.4347621035703793, "grad_norm": 0.14375075697898865, "learning_rate": 0.002, "loss": 2.3538, "step": 371150 }, { "epoch": 1.4348007607737625, "grad_norm": 0.11185460537672043, "learning_rate": 0.002, "loss": 2.3448, "step": 371160 }, { "epoch": 1.434839417977146, "grad_norm": 0.10739337652921677, "learning_rate": 0.002, "loss": 2.3429, "step": 371170 }, { "epoch": 1.4348780751805292, "grad_norm": 0.108052097260952, "learning_rate": 0.002, "loss": 2.3306, "step": 371180 }, { "epoch": 1.4349167323839125, "grad_norm": 0.11570432037115097, "learning_rate": 0.002, "loss": 2.3518, "step": 371190 }, { "epoch": 1.4349553895872957, "grad_norm": 0.0992949903011322, "learning_rate": 0.002, "loss": 2.3311, "step": 371200 }, { "epoch": 1.434994046790679, "grad_norm": 0.10696582496166229, "learning_rate": 0.002, "loss": 2.3378, "step": 371210 }, { "epoch": 1.4350327039940622, "grad_norm": 0.10340410470962524, "learning_rate": 0.002, "loss": 2.3374, "step": 371220 }, { "epoch": 1.4350713611974455, "grad_norm": 0.10558187961578369, "learning_rate": 0.002, "loss": 2.3448, "step": 371230 }, { "epoch": 1.4351100184008287, "grad_norm": 0.12378218024969101, "learning_rate": 0.002, "loss": 2.3416, "step": 371240 }, { "epoch": 1.435148675604212, "grad_norm": 0.09785618633031845, "learning_rate": 0.002, "loss": 2.3268, "step": 371250 }, { "epoch": 1.4351873328075953, "grad_norm": 0.0971897691488266, "learning_rate": 0.002, "loss": 2.3336, "step": 371260 }, { "epoch": 1.4352259900109787, "grad_norm": 0.12853021919727325, "learning_rate": 0.002, "loss": 2.3299, "step": 371270 }, { "epoch": 1.435264647214362, "grad_norm": 0.0908111110329628, "learning_rate": 0.002, "loss": 2.3299, "step": 371280 }, { "epoch": 1.4353033044177452, "grad_norm": 0.1069912314414978, "learning_rate": 0.002, "loss": 2.3317, "step": 371290 }, { "epoch": 1.4353419616211285, "grad_norm": 0.10096617788076401, "learning_rate": 0.002, "loss": 2.3474, "step": 371300 }, { "epoch": 1.4353806188245117, "grad_norm": 0.10879544913768768, "learning_rate": 0.002, "loss": 2.3365, "step": 371310 }, { "epoch": 1.435419276027895, "grad_norm": 0.1102469265460968, "learning_rate": 0.002, "loss": 2.333, "step": 371320 }, { "epoch": 1.4354579332312782, "grad_norm": 0.09746189415454865, "learning_rate": 0.002, "loss": 2.3136, "step": 371330 }, { "epoch": 1.4354965904346617, "grad_norm": 0.1017862856388092, "learning_rate": 0.002, "loss": 2.3462, "step": 371340 }, { "epoch": 1.435535247638045, "grad_norm": 0.09203585982322693, "learning_rate": 0.002, "loss": 2.3485, "step": 371350 }, { "epoch": 1.4355739048414282, "grad_norm": 0.09847685694694519, "learning_rate": 0.002, "loss": 2.3402, "step": 371360 }, { "epoch": 1.4356125620448115, "grad_norm": 0.11136972904205322, "learning_rate": 0.002, "loss": 2.3349, "step": 371370 }, { "epoch": 1.4356512192481947, "grad_norm": 0.10409975051879883, "learning_rate": 0.002, "loss": 2.3375, "step": 371380 }, { "epoch": 1.435689876451578, "grad_norm": 0.10757970064878464, "learning_rate": 0.002, "loss": 2.3419, "step": 371390 }, { "epoch": 1.4357285336549612, "grad_norm": 0.11430218070745468, "learning_rate": 0.002, "loss": 2.3356, "step": 371400 }, { "epoch": 1.4357671908583445, "grad_norm": 0.11079767346382141, "learning_rate": 0.002, "loss": 2.3478, "step": 371410 }, { "epoch": 1.4358058480617277, "grad_norm": 0.0886615738272667, "learning_rate": 0.002, "loss": 2.3514, "step": 371420 }, { "epoch": 1.435844505265111, "grad_norm": 0.09973558038473129, "learning_rate": 0.002, "loss": 2.3308, "step": 371430 }, { "epoch": 1.4358831624684945, "grad_norm": 0.0972694531083107, "learning_rate": 0.002, "loss": 2.3306, "step": 371440 }, { "epoch": 1.4359218196718777, "grad_norm": 0.11059543490409851, "learning_rate": 0.002, "loss": 2.3218, "step": 371450 }, { "epoch": 1.435960476875261, "grad_norm": 0.09584806114435196, "learning_rate": 0.002, "loss": 2.3368, "step": 371460 }, { "epoch": 1.4359991340786442, "grad_norm": 0.10012396425008774, "learning_rate": 0.002, "loss": 2.3167, "step": 371470 }, { "epoch": 1.4360377912820275, "grad_norm": 0.11090181022882462, "learning_rate": 0.002, "loss": 2.3334, "step": 371480 }, { "epoch": 1.4360764484854107, "grad_norm": 0.10269306600093842, "learning_rate": 0.002, "loss": 2.3298, "step": 371490 }, { "epoch": 1.436115105688794, "grad_norm": 0.11868039518594742, "learning_rate": 0.002, "loss": 2.3259, "step": 371500 }, { "epoch": 1.4361537628921774, "grad_norm": 0.12083914875984192, "learning_rate": 0.002, "loss": 2.3493, "step": 371510 }, { "epoch": 1.4361924200955607, "grad_norm": 0.10587406903505325, "learning_rate": 0.002, "loss": 2.3175, "step": 371520 }, { "epoch": 1.436231077298944, "grad_norm": 0.10817192494869232, "learning_rate": 0.002, "loss": 2.3332, "step": 371530 }, { "epoch": 1.4362697345023272, "grad_norm": 0.09226145595312119, "learning_rate": 0.002, "loss": 2.3317, "step": 371540 }, { "epoch": 1.4363083917057105, "grad_norm": 0.09368760883808136, "learning_rate": 0.002, "loss": 2.3373, "step": 371550 }, { "epoch": 1.4363470489090937, "grad_norm": 0.10719560086727142, "learning_rate": 0.002, "loss": 2.3314, "step": 371560 }, { "epoch": 1.436385706112477, "grad_norm": 0.09072700887918472, "learning_rate": 0.002, "loss": 2.3342, "step": 371570 }, { "epoch": 1.4364243633158602, "grad_norm": 0.0942125990986824, "learning_rate": 0.002, "loss": 2.3313, "step": 371580 }, { "epoch": 1.4364630205192435, "grad_norm": 0.13440567255020142, "learning_rate": 0.002, "loss": 2.3347, "step": 371590 }, { "epoch": 1.4365016777226267, "grad_norm": 0.26764971017837524, "learning_rate": 0.002, "loss": 2.3364, "step": 371600 }, { "epoch": 1.4365403349260102, "grad_norm": 0.14455781877040863, "learning_rate": 0.002, "loss": 2.3425, "step": 371610 }, { "epoch": 1.4365789921293934, "grad_norm": 0.10112746059894562, "learning_rate": 0.002, "loss": 2.3277, "step": 371620 }, { "epoch": 1.4366176493327767, "grad_norm": 0.09363212436437607, "learning_rate": 0.002, "loss": 2.3466, "step": 371630 }, { "epoch": 1.43665630653616, "grad_norm": 0.10647587478160858, "learning_rate": 0.002, "loss": 2.3271, "step": 371640 }, { "epoch": 1.4366949637395432, "grad_norm": 0.10569415986537933, "learning_rate": 0.002, "loss": 2.3339, "step": 371650 }, { "epoch": 1.4367336209429264, "grad_norm": 0.1061614602804184, "learning_rate": 0.002, "loss": 2.3223, "step": 371660 }, { "epoch": 1.4367722781463097, "grad_norm": 0.10273336619138718, "learning_rate": 0.002, "loss": 2.32, "step": 371670 }, { "epoch": 1.4368109353496932, "grad_norm": 0.11125043779611588, "learning_rate": 0.002, "loss": 2.3344, "step": 371680 }, { "epoch": 1.4368495925530764, "grad_norm": 0.09265997260808945, "learning_rate": 0.002, "loss": 2.3194, "step": 371690 }, { "epoch": 1.4368882497564597, "grad_norm": 0.11448623239994049, "learning_rate": 0.002, "loss": 2.3463, "step": 371700 }, { "epoch": 1.436926906959843, "grad_norm": 0.12354601919651031, "learning_rate": 0.002, "loss": 2.3091, "step": 371710 }, { "epoch": 1.4369655641632262, "grad_norm": 0.10530796647071838, "learning_rate": 0.002, "loss": 2.338, "step": 371720 }, { "epoch": 1.4370042213666094, "grad_norm": 0.12017491459846497, "learning_rate": 0.002, "loss": 2.3344, "step": 371730 }, { "epoch": 1.4370428785699927, "grad_norm": 0.14511741697788239, "learning_rate": 0.002, "loss": 2.3383, "step": 371740 }, { "epoch": 1.437081535773376, "grad_norm": 0.09568100422620773, "learning_rate": 0.002, "loss": 2.3272, "step": 371750 }, { "epoch": 1.4371201929767592, "grad_norm": 0.09584043174982071, "learning_rate": 0.002, "loss": 2.3411, "step": 371760 }, { "epoch": 1.4371588501801424, "grad_norm": 0.10420922189950943, "learning_rate": 0.002, "loss": 2.3296, "step": 371770 }, { "epoch": 1.437197507383526, "grad_norm": 0.11375915259122849, "learning_rate": 0.002, "loss": 2.3334, "step": 371780 }, { "epoch": 1.4372361645869092, "grad_norm": 0.10612479597330093, "learning_rate": 0.002, "loss": 2.3438, "step": 371790 }, { "epoch": 1.4372748217902924, "grad_norm": 0.10375890135765076, "learning_rate": 0.002, "loss": 2.3383, "step": 371800 }, { "epoch": 1.4373134789936757, "grad_norm": 0.09391949325799942, "learning_rate": 0.002, "loss": 2.3363, "step": 371810 }, { "epoch": 1.437352136197059, "grad_norm": 0.10322212427854538, "learning_rate": 0.002, "loss": 2.3426, "step": 371820 }, { "epoch": 1.4373907934004422, "grad_norm": 0.10103631019592285, "learning_rate": 0.002, "loss": 2.3314, "step": 371830 }, { "epoch": 1.4374294506038257, "grad_norm": 0.11324197798967361, "learning_rate": 0.002, "loss": 2.3302, "step": 371840 }, { "epoch": 1.437468107807209, "grad_norm": 0.10173353552818298, "learning_rate": 0.002, "loss": 2.3318, "step": 371850 }, { "epoch": 1.4375067650105922, "grad_norm": 0.10180971771478653, "learning_rate": 0.002, "loss": 2.3252, "step": 371860 }, { "epoch": 1.4375454222139754, "grad_norm": 0.14309395849704742, "learning_rate": 0.002, "loss": 2.3412, "step": 371870 }, { "epoch": 1.4375840794173587, "grad_norm": 0.11053165793418884, "learning_rate": 0.002, "loss": 2.3341, "step": 371880 }, { "epoch": 1.437622736620742, "grad_norm": 0.10481183230876923, "learning_rate": 0.002, "loss": 2.3273, "step": 371890 }, { "epoch": 1.4376613938241252, "grad_norm": 0.13200727105140686, "learning_rate": 0.002, "loss": 2.3266, "step": 371900 }, { "epoch": 1.4377000510275084, "grad_norm": 0.0949479267001152, "learning_rate": 0.002, "loss": 2.3289, "step": 371910 }, { "epoch": 1.4377387082308917, "grad_norm": 0.11998516321182251, "learning_rate": 0.002, "loss": 2.3428, "step": 371920 }, { "epoch": 1.437777365434275, "grad_norm": 0.1394270956516266, "learning_rate": 0.002, "loss": 2.3361, "step": 371930 }, { "epoch": 1.4378160226376582, "grad_norm": 0.09979026019573212, "learning_rate": 0.002, "loss": 2.3167, "step": 371940 }, { "epoch": 1.4378546798410416, "grad_norm": 0.10514336824417114, "learning_rate": 0.002, "loss": 2.3366, "step": 371950 }, { "epoch": 1.437893337044425, "grad_norm": 0.09712082147598267, "learning_rate": 0.002, "loss": 2.3536, "step": 371960 }, { "epoch": 1.4379319942478082, "grad_norm": 0.10522717982530594, "learning_rate": 0.002, "loss": 2.3236, "step": 371970 }, { "epoch": 1.4379706514511914, "grad_norm": 0.10137762129306793, "learning_rate": 0.002, "loss": 2.3213, "step": 371980 }, { "epoch": 1.4380093086545747, "grad_norm": 0.1008489802479744, "learning_rate": 0.002, "loss": 2.3364, "step": 371990 }, { "epoch": 1.438047965857958, "grad_norm": 0.12621283531188965, "learning_rate": 0.002, "loss": 2.3385, "step": 372000 }, { "epoch": 1.4380866230613414, "grad_norm": 0.12713102996349335, "learning_rate": 0.002, "loss": 2.3391, "step": 372010 }, { "epoch": 1.4381252802647246, "grad_norm": 0.13091817498207092, "learning_rate": 0.002, "loss": 2.3387, "step": 372020 }, { "epoch": 1.438163937468108, "grad_norm": 0.09806538373231888, "learning_rate": 0.002, "loss": 2.3253, "step": 372030 }, { "epoch": 1.4382025946714911, "grad_norm": 0.12110111862421036, "learning_rate": 0.002, "loss": 2.337, "step": 372040 }, { "epoch": 1.4382412518748744, "grad_norm": 0.10088954120874405, "learning_rate": 0.002, "loss": 2.3385, "step": 372050 }, { "epoch": 1.4382799090782576, "grad_norm": 0.12509940564632416, "learning_rate": 0.002, "loss": 2.3374, "step": 372060 }, { "epoch": 1.438318566281641, "grad_norm": 0.11874911934137344, "learning_rate": 0.002, "loss": 2.3291, "step": 372070 }, { "epoch": 1.4383572234850242, "grad_norm": 0.12909932434558868, "learning_rate": 0.002, "loss": 2.3266, "step": 372080 }, { "epoch": 1.4383958806884074, "grad_norm": 0.09152228385210037, "learning_rate": 0.002, "loss": 2.3356, "step": 372090 }, { "epoch": 1.4384345378917907, "grad_norm": 0.1060309037566185, "learning_rate": 0.002, "loss": 2.342, "step": 372100 }, { "epoch": 1.438473195095174, "grad_norm": 0.1071513444185257, "learning_rate": 0.002, "loss": 2.3482, "step": 372110 }, { "epoch": 1.4385118522985574, "grad_norm": 0.1377035528421402, "learning_rate": 0.002, "loss": 2.3326, "step": 372120 }, { "epoch": 1.4385505095019406, "grad_norm": 0.11575419455766678, "learning_rate": 0.002, "loss": 2.339, "step": 372130 }, { "epoch": 1.4385891667053239, "grad_norm": 0.12348438054323196, "learning_rate": 0.002, "loss": 2.35, "step": 372140 }, { "epoch": 1.4386278239087071, "grad_norm": 0.09979524463415146, "learning_rate": 0.002, "loss": 2.3386, "step": 372150 }, { "epoch": 1.4386664811120904, "grad_norm": 0.10092800855636597, "learning_rate": 0.002, "loss": 2.3223, "step": 372160 }, { "epoch": 1.4387051383154736, "grad_norm": 0.09440913051366806, "learning_rate": 0.002, "loss": 2.3387, "step": 372170 }, { "epoch": 1.4387437955188571, "grad_norm": 0.12174414098262787, "learning_rate": 0.002, "loss": 2.3351, "step": 372180 }, { "epoch": 1.4387824527222404, "grad_norm": 0.10224572569131851, "learning_rate": 0.002, "loss": 2.3194, "step": 372190 }, { "epoch": 1.4388211099256236, "grad_norm": 0.1092875599861145, "learning_rate": 0.002, "loss": 2.3447, "step": 372200 }, { "epoch": 1.4388597671290069, "grad_norm": 0.10130403935909271, "learning_rate": 0.002, "loss": 2.3349, "step": 372210 }, { "epoch": 1.4388984243323901, "grad_norm": 0.12170014530420303, "learning_rate": 0.002, "loss": 2.3579, "step": 372220 }, { "epoch": 1.4389370815357734, "grad_norm": 0.1164335086941719, "learning_rate": 0.002, "loss": 2.3388, "step": 372230 }, { "epoch": 1.4389757387391566, "grad_norm": 0.10222030431032181, "learning_rate": 0.002, "loss": 2.3359, "step": 372240 }, { "epoch": 1.4390143959425399, "grad_norm": 0.11554889380931854, "learning_rate": 0.002, "loss": 2.3138, "step": 372250 }, { "epoch": 1.4390530531459231, "grad_norm": 0.11896473169326782, "learning_rate": 0.002, "loss": 2.3273, "step": 372260 }, { "epoch": 1.4390917103493064, "grad_norm": 0.09890048950910568, "learning_rate": 0.002, "loss": 2.3402, "step": 372270 }, { "epoch": 1.4391303675526896, "grad_norm": 0.10989270359277725, "learning_rate": 0.002, "loss": 2.3345, "step": 372280 }, { "epoch": 1.4391690247560731, "grad_norm": 0.10503815114498138, "learning_rate": 0.002, "loss": 2.3277, "step": 372290 }, { "epoch": 1.4392076819594564, "grad_norm": 0.10028046369552612, "learning_rate": 0.002, "loss": 2.3421, "step": 372300 }, { "epoch": 1.4392463391628396, "grad_norm": 0.10095027834177017, "learning_rate": 0.002, "loss": 2.3333, "step": 372310 }, { "epoch": 1.4392849963662229, "grad_norm": 0.11232791095972061, "learning_rate": 0.002, "loss": 2.3353, "step": 372320 }, { "epoch": 1.4393236535696061, "grad_norm": 0.11338062584400177, "learning_rate": 0.002, "loss": 2.3398, "step": 372330 }, { "epoch": 1.4393623107729894, "grad_norm": 0.12447841465473175, "learning_rate": 0.002, "loss": 2.3374, "step": 372340 }, { "epoch": 1.4394009679763728, "grad_norm": 0.10727968066930771, "learning_rate": 0.002, "loss": 2.3289, "step": 372350 }, { "epoch": 1.439439625179756, "grad_norm": 0.12589871883392334, "learning_rate": 0.002, "loss": 2.3306, "step": 372360 }, { "epoch": 1.4394782823831394, "grad_norm": 0.09049642831087112, "learning_rate": 0.002, "loss": 2.3187, "step": 372370 }, { "epoch": 1.4395169395865226, "grad_norm": 0.10203811526298523, "learning_rate": 0.002, "loss": 2.3366, "step": 372380 }, { "epoch": 1.4395555967899059, "grad_norm": 0.11543015390634537, "learning_rate": 0.002, "loss": 2.3265, "step": 372390 }, { "epoch": 1.439594253993289, "grad_norm": 0.10948812961578369, "learning_rate": 0.002, "loss": 2.3258, "step": 372400 }, { "epoch": 1.4396329111966724, "grad_norm": 0.11874116957187653, "learning_rate": 0.002, "loss": 2.335, "step": 372410 }, { "epoch": 1.4396715684000556, "grad_norm": 0.09726309031248093, "learning_rate": 0.002, "loss": 2.3535, "step": 372420 }, { "epoch": 1.4397102256034389, "grad_norm": 0.10006396472454071, "learning_rate": 0.002, "loss": 2.3336, "step": 372430 }, { "epoch": 1.4397488828068221, "grad_norm": 0.10876845568418503, "learning_rate": 0.002, "loss": 2.3474, "step": 372440 }, { "epoch": 1.4397875400102054, "grad_norm": 0.10026299953460693, "learning_rate": 0.002, "loss": 2.3279, "step": 372450 }, { "epoch": 1.4398261972135888, "grad_norm": 0.0973251610994339, "learning_rate": 0.002, "loss": 2.3359, "step": 372460 }, { "epoch": 1.439864854416972, "grad_norm": 0.11038711667060852, "learning_rate": 0.002, "loss": 2.329, "step": 372470 }, { "epoch": 1.4399035116203553, "grad_norm": 0.11058340966701508, "learning_rate": 0.002, "loss": 2.3419, "step": 372480 }, { "epoch": 1.4399421688237386, "grad_norm": 0.09188947081565857, "learning_rate": 0.002, "loss": 2.3379, "step": 372490 }, { "epoch": 1.4399808260271219, "grad_norm": 0.1101926639676094, "learning_rate": 0.002, "loss": 2.336, "step": 372500 }, { "epoch": 1.440019483230505, "grad_norm": 0.2913362681865692, "learning_rate": 0.002, "loss": 2.3484, "step": 372510 }, { "epoch": 1.4400581404338886, "grad_norm": 0.13520562648773193, "learning_rate": 0.002, "loss": 2.3509, "step": 372520 }, { "epoch": 1.4400967976372718, "grad_norm": 0.09579768031835556, "learning_rate": 0.002, "loss": 2.3318, "step": 372530 }, { "epoch": 1.440135454840655, "grad_norm": 0.1331591010093689, "learning_rate": 0.002, "loss": 2.3323, "step": 372540 }, { "epoch": 1.4401741120440383, "grad_norm": 0.10888656973838806, "learning_rate": 0.002, "loss": 2.3248, "step": 372550 }, { "epoch": 1.4402127692474216, "grad_norm": 0.09306596219539642, "learning_rate": 0.002, "loss": 2.3304, "step": 372560 }, { "epoch": 1.4402514264508048, "grad_norm": 0.104879230260849, "learning_rate": 0.002, "loss": 2.3394, "step": 372570 }, { "epoch": 1.440290083654188, "grad_norm": 0.09254723787307739, "learning_rate": 0.002, "loss": 2.3313, "step": 372580 }, { "epoch": 1.4403287408575713, "grad_norm": 0.09524793177843094, "learning_rate": 0.002, "loss": 2.3348, "step": 372590 }, { "epoch": 1.4403673980609546, "grad_norm": 0.09294036030769348, "learning_rate": 0.002, "loss": 2.342, "step": 372600 }, { "epoch": 1.4404060552643378, "grad_norm": 0.11674389243125916, "learning_rate": 0.002, "loss": 2.3211, "step": 372610 }, { "epoch": 1.440444712467721, "grad_norm": 0.10271896421909332, "learning_rate": 0.002, "loss": 2.3306, "step": 372620 }, { "epoch": 1.4404833696711046, "grad_norm": 0.11283275485038757, "learning_rate": 0.002, "loss": 2.3302, "step": 372630 }, { "epoch": 1.4405220268744878, "grad_norm": 0.1119888424873352, "learning_rate": 0.002, "loss": 2.3176, "step": 372640 }, { "epoch": 1.440560684077871, "grad_norm": 0.09449056535959244, "learning_rate": 0.002, "loss": 2.3314, "step": 372650 }, { "epoch": 1.4405993412812543, "grad_norm": 0.09541455656290054, "learning_rate": 0.002, "loss": 2.3301, "step": 372660 }, { "epoch": 1.4406379984846376, "grad_norm": 0.1262357532978058, "learning_rate": 0.002, "loss": 2.3368, "step": 372670 }, { "epoch": 1.4406766556880208, "grad_norm": 0.10903370380401611, "learning_rate": 0.002, "loss": 2.331, "step": 372680 }, { "epoch": 1.4407153128914043, "grad_norm": 0.11235390603542328, "learning_rate": 0.002, "loss": 2.3327, "step": 372690 }, { "epoch": 1.4407539700947876, "grad_norm": 0.1031249538064003, "learning_rate": 0.002, "loss": 2.3367, "step": 372700 }, { "epoch": 1.4407926272981708, "grad_norm": 0.10202572494745255, "learning_rate": 0.002, "loss": 2.3438, "step": 372710 }, { "epoch": 1.440831284501554, "grad_norm": 0.10625158995389938, "learning_rate": 0.002, "loss": 2.327, "step": 372720 }, { "epoch": 1.4408699417049373, "grad_norm": 0.10402391105890274, "learning_rate": 0.002, "loss": 2.3145, "step": 372730 }, { "epoch": 1.4409085989083206, "grad_norm": 0.09886647760868073, "learning_rate": 0.002, "loss": 2.3351, "step": 372740 }, { "epoch": 1.4409472561117038, "grad_norm": 0.10791648924350739, "learning_rate": 0.002, "loss": 2.3272, "step": 372750 }, { "epoch": 1.440985913315087, "grad_norm": 0.10573247075080872, "learning_rate": 0.002, "loss": 2.3566, "step": 372760 }, { "epoch": 1.4410245705184703, "grad_norm": 0.08799899369478226, "learning_rate": 0.002, "loss": 2.3311, "step": 372770 }, { "epoch": 1.4410632277218536, "grad_norm": 0.12291400879621506, "learning_rate": 0.002, "loss": 2.3315, "step": 372780 }, { "epoch": 1.4411018849252368, "grad_norm": 0.09974503517150879, "learning_rate": 0.002, "loss": 2.3419, "step": 372790 }, { "epoch": 1.4411405421286203, "grad_norm": 0.09110969305038452, "learning_rate": 0.002, "loss": 2.3266, "step": 372800 }, { "epoch": 1.4411791993320036, "grad_norm": 0.09680027514696121, "learning_rate": 0.002, "loss": 2.3544, "step": 372810 }, { "epoch": 1.4412178565353868, "grad_norm": 0.10247483104467392, "learning_rate": 0.002, "loss": 2.3184, "step": 372820 }, { "epoch": 1.44125651373877, "grad_norm": 0.13590359687805176, "learning_rate": 0.002, "loss": 2.3347, "step": 372830 }, { "epoch": 1.4412951709421533, "grad_norm": 0.0950893685221672, "learning_rate": 0.002, "loss": 2.3321, "step": 372840 }, { "epoch": 1.4413338281455366, "grad_norm": 0.10643938183784485, "learning_rate": 0.002, "loss": 2.3454, "step": 372850 }, { "epoch": 1.44137248534892, "grad_norm": 0.09532453864812851, "learning_rate": 0.002, "loss": 2.3329, "step": 372860 }, { "epoch": 1.4414111425523033, "grad_norm": 0.10018929094076157, "learning_rate": 0.002, "loss": 2.3295, "step": 372870 }, { "epoch": 1.4414497997556865, "grad_norm": 0.1031702384352684, "learning_rate": 0.002, "loss": 2.3468, "step": 372880 }, { "epoch": 1.4414884569590698, "grad_norm": 0.11543572694063187, "learning_rate": 0.002, "loss": 2.3474, "step": 372890 }, { "epoch": 1.441527114162453, "grad_norm": 0.08839228004217148, "learning_rate": 0.002, "loss": 2.3202, "step": 372900 }, { "epoch": 1.4415657713658363, "grad_norm": 0.11016961932182312, "learning_rate": 0.002, "loss": 2.3151, "step": 372910 }, { "epoch": 1.4416044285692196, "grad_norm": 0.10460019111633301, "learning_rate": 0.002, "loss": 2.3444, "step": 372920 }, { "epoch": 1.4416430857726028, "grad_norm": 0.14126625657081604, "learning_rate": 0.002, "loss": 2.3414, "step": 372930 }, { "epoch": 1.441681742975986, "grad_norm": 0.10394603759050369, "learning_rate": 0.002, "loss": 2.3515, "step": 372940 }, { "epoch": 1.4417204001793693, "grad_norm": 0.10750212520360947, "learning_rate": 0.002, "loss": 2.3286, "step": 372950 }, { "epoch": 1.4417590573827528, "grad_norm": 0.1336800754070282, "learning_rate": 0.002, "loss": 2.3298, "step": 372960 }, { "epoch": 1.441797714586136, "grad_norm": 0.11134114116430283, "learning_rate": 0.002, "loss": 2.3421, "step": 372970 }, { "epoch": 1.4418363717895193, "grad_norm": 0.10422998666763306, "learning_rate": 0.002, "loss": 2.3353, "step": 372980 }, { "epoch": 1.4418750289929025, "grad_norm": 0.10585644096136093, "learning_rate": 0.002, "loss": 2.34, "step": 372990 }, { "epoch": 1.4419136861962858, "grad_norm": 0.11790245026350021, "learning_rate": 0.002, "loss": 2.3297, "step": 373000 }, { "epoch": 1.441952343399669, "grad_norm": 0.10526230931282043, "learning_rate": 0.002, "loss": 2.3371, "step": 373010 }, { "epoch": 1.4419910006030523, "grad_norm": 0.0975332036614418, "learning_rate": 0.002, "loss": 2.3344, "step": 373020 }, { "epoch": 1.4420296578064358, "grad_norm": 0.10068307816982269, "learning_rate": 0.002, "loss": 2.3247, "step": 373030 }, { "epoch": 1.442068315009819, "grad_norm": 0.11506905406713486, "learning_rate": 0.002, "loss": 2.3324, "step": 373040 }, { "epoch": 1.4421069722132023, "grad_norm": 0.10308445990085602, "learning_rate": 0.002, "loss": 2.3465, "step": 373050 }, { "epoch": 1.4421456294165855, "grad_norm": 0.10953045636415482, "learning_rate": 0.002, "loss": 2.3242, "step": 373060 }, { "epoch": 1.4421842866199688, "grad_norm": 0.10586000978946686, "learning_rate": 0.002, "loss": 2.3512, "step": 373070 }, { "epoch": 1.442222943823352, "grad_norm": 0.11068696528673172, "learning_rate": 0.002, "loss": 2.3288, "step": 373080 }, { "epoch": 1.4422616010267353, "grad_norm": 0.10807348042726517, "learning_rate": 0.002, "loss": 2.3155, "step": 373090 }, { "epoch": 1.4423002582301185, "grad_norm": 0.10999464243650436, "learning_rate": 0.002, "loss": 2.3452, "step": 373100 }, { "epoch": 1.4423389154335018, "grad_norm": 0.0915793851017952, "learning_rate": 0.002, "loss": 2.3556, "step": 373110 }, { "epoch": 1.442377572636885, "grad_norm": 0.1043555811047554, "learning_rate": 0.002, "loss": 2.3262, "step": 373120 }, { "epoch": 1.4424162298402685, "grad_norm": 0.1145467758178711, "learning_rate": 0.002, "loss": 2.3281, "step": 373130 }, { "epoch": 1.4424548870436518, "grad_norm": 0.10055268555879593, "learning_rate": 0.002, "loss": 2.3286, "step": 373140 }, { "epoch": 1.442493544247035, "grad_norm": 0.11176237463951111, "learning_rate": 0.002, "loss": 2.3241, "step": 373150 }, { "epoch": 1.4425322014504183, "grad_norm": 0.10501953214406967, "learning_rate": 0.002, "loss": 2.3336, "step": 373160 }, { "epoch": 1.4425708586538015, "grad_norm": 0.09456057846546173, "learning_rate": 0.002, "loss": 2.3438, "step": 373170 }, { "epoch": 1.4426095158571848, "grad_norm": 0.09280961751937866, "learning_rate": 0.002, "loss": 2.344, "step": 373180 }, { "epoch": 1.442648173060568, "grad_norm": 0.10198284685611725, "learning_rate": 0.002, "loss": 2.3348, "step": 373190 }, { "epoch": 1.4426868302639515, "grad_norm": 0.10452098399400711, "learning_rate": 0.002, "loss": 2.3302, "step": 373200 }, { "epoch": 1.4427254874673348, "grad_norm": 0.09441589564085007, "learning_rate": 0.002, "loss": 2.3248, "step": 373210 }, { "epoch": 1.442764144670718, "grad_norm": 0.11990165710449219, "learning_rate": 0.002, "loss": 2.3231, "step": 373220 }, { "epoch": 1.4428028018741013, "grad_norm": 0.10947558283805847, "learning_rate": 0.002, "loss": 2.331, "step": 373230 }, { "epoch": 1.4428414590774845, "grad_norm": 0.1156168282032013, "learning_rate": 0.002, "loss": 2.3442, "step": 373240 }, { "epoch": 1.4428801162808678, "grad_norm": 0.09495019912719727, "learning_rate": 0.002, "loss": 2.3326, "step": 373250 }, { "epoch": 1.442918773484251, "grad_norm": 0.1100836843252182, "learning_rate": 0.002, "loss": 2.3411, "step": 373260 }, { "epoch": 1.4429574306876343, "grad_norm": 0.11163510382175446, "learning_rate": 0.002, "loss": 2.3317, "step": 373270 }, { "epoch": 1.4429960878910175, "grad_norm": 0.09736929833889008, "learning_rate": 0.002, "loss": 2.337, "step": 373280 }, { "epoch": 1.4430347450944008, "grad_norm": 0.09500592201948166, "learning_rate": 0.002, "loss": 2.337, "step": 373290 }, { "epoch": 1.4430734022977842, "grad_norm": 0.11622362583875656, "learning_rate": 0.002, "loss": 2.3403, "step": 373300 }, { "epoch": 1.4431120595011675, "grad_norm": 0.09297885000705719, "learning_rate": 0.002, "loss": 2.3331, "step": 373310 }, { "epoch": 1.4431507167045508, "grad_norm": 0.11191659420728683, "learning_rate": 0.002, "loss": 2.337, "step": 373320 }, { "epoch": 1.443189373907934, "grad_norm": 0.11442656069993973, "learning_rate": 0.002, "loss": 2.3221, "step": 373330 }, { "epoch": 1.4432280311113173, "grad_norm": 0.1114255040884018, "learning_rate": 0.002, "loss": 2.3377, "step": 373340 }, { "epoch": 1.4432666883147005, "grad_norm": 0.09715000540018082, "learning_rate": 0.002, "loss": 2.3452, "step": 373350 }, { "epoch": 1.4433053455180838, "grad_norm": 0.10942159593105316, "learning_rate": 0.002, "loss": 2.3296, "step": 373360 }, { "epoch": 1.4433440027214672, "grad_norm": 0.10923704504966736, "learning_rate": 0.002, "loss": 2.3236, "step": 373370 }, { "epoch": 1.4433826599248505, "grad_norm": 0.10843712091445923, "learning_rate": 0.002, "loss": 2.3379, "step": 373380 }, { "epoch": 1.4434213171282337, "grad_norm": 0.1049380972981453, "learning_rate": 0.002, "loss": 2.3285, "step": 373390 }, { "epoch": 1.443459974331617, "grad_norm": 0.10164526849985123, "learning_rate": 0.002, "loss": 2.3341, "step": 373400 }, { "epoch": 1.4434986315350002, "grad_norm": 0.09803440421819687, "learning_rate": 0.002, "loss": 2.3347, "step": 373410 }, { "epoch": 1.4435372887383835, "grad_norm": 0.09612613916397095, "learning_rate": 0.002, "loss": 2.3279, "step": 373420 }, { "epoch": 1.4435759459417667, "grad_norm": 0.1065620705485344, "learning_rate": 0.002, "loss": 2.3306, "step": 373430 }, { "epoch": 1.44361460314515, "grad_norm": 0.10151802748441696, "learning_rate": 0.002, "loss": 2.3358, "step": 373440 }, { "epoch": 1.4436532603485333, "grad_norm": 0.09904639422893524, "learning_rate": 0.002, "loss": 2.3175, "step": 373450 }, { "epoch": 1.4436919175519165, "grad_norm": 0.09335681051015854, "learning_rate": 0.002, "loss": 2.3418, "step": 373460 }, { "epoch": 1.4437305747553, "grad_norm": 0.10069163888692856, "learning_rate": 0.002, "loss": 2.3247, "step": 373470 }, { "epoch": 1.4437692319586832, "grad_norm": 0.15649782121181488, "learning_rate": 0.002, "loss": 2.3343, "step": 373480 }, { "epoch": 1.4438078891620665, "grad_norm": 0.10765941441059113, "learning_rate": 0.002, "loss": 2.3203, "step": 373490 }, { "epoch": 1.4438465463654497, "grad_norm": 0.10586480796337128, "learning_rate": 0.002, "loss": 2.3349, "step": 373500 }, { "epoch": 1.443885203568833, "grad_norm": 0.09421324729919434, "learning_rate": 0.002, "loss": 2.3263, "step": 373510 }, { "epoch": 1.4439238607722162, "grad_norm": 0.08989991247653961, "learning_rate": 0.002, "loss": 2.3391, "step": 373520 }, { "epoch": 1.4439625179755995, "grad_norm": 0.10273010283708572, "learning_rate": 0.002, "loss": 2.34, "step": 373530 }, { "epoch": 1.444001175178983, "grad_norm": 0.11481129378080368, "learning_rate": 0.002, "loss": 2.3334, "step": 373540 }, { "epoch": 1.4440398323823662, "grad_norm": 0.12491723895072937, "learning_rate": 0.002, "loss": 2.3349, "step": 373550 }, { "epoch": 1.4440784895857495, "grad_norm": 0.10945707559585571, "learning_rate": 0.002, "loss": 2.3253, "step": 373560 }, { "epoch": 1.4441171467891327, "grad_norm": 0.10013280808925629, "learning_rate": 0.002, "loss": 2.3438, "step": 373570 }, { "epoch": 1.444155803992516, "grad_norm": 0.10499537736177444, "learning_rate": 0.002, "loss": 2.3396, "step": 373580 }, { "epoch": 1.4441944611958992, "grad_norm": 0.08887854218482971, "learning_rate": 0.002, "loss": 2.3292, "step": 373590 }, { "epoch": 1.4442331183992825, "grad_norm": 0.10871432721614838, "learning_rate": 0.002, "loss": 2.3402, "step": 373600 }, { "epoch": 1.4442717756026657, "grad_norm": 0.1107456162571907, "learning_rate": 0.002, "loss": 2.345, "step": 373610 }, { "epoch": 1.444310432806049, "grad_norm": 0.11917014420032501, "learning_rate": 0.002, "loss": 2.334, "step": 373620 }, { "epoch": 1.4443490900094322, "grad_norm": 0.10580477118492126, "learning_rate": 0.002, "loss": 2.3358, "step": 373630 }, { "epoch": 1.4443877472128157, "grad_norm": 0.10364178568124771, "learning_rate": 0.002, "loss": 2.3317, "step": 373640 }, { "epoch": 1.444426404416199, "grad_norm": 0.11463356763124466, "learning_rate": 0.002, "loss": 2.336, "step": 373650 }, { "epoch": 1.4444650616195822, "grad_norm": 0.09823574125766754, "learning_rate": 0.002, "loss": 2.3211, "step": 373660 }, { "epoch": 1.4445037188229655, "grad_norm": 0.13183921575546265, "learning_rate": 0.002, "loss": 2.3514, "step": 373670 }, { "epoch": 1.4445423760263487, "grad_norm": 0.09379124641418457, "learning_rate": 0.002, "loss": 2.3397, "step": 373680 }, { "epoch": 1.444581033229732, "grad_norm": 0.08960752934217453, "learning_rate": 0.002, "loss": 2.349, "step": 373690 }, { "epoch": 1.4446196904331154, "grad_norm": 0.12396460771560669, "learning_rate": 0.002, "loss": 2.3359, "step": 373700 }, { "epoch": 1.4446583476364987, "grad_norm": 0.10179699957370758, "learning_rate": 0.002, "loss": 2.3238, "step": 373710 }, { "epoch": 1.444697004839882, "grad_norm": 0.10384551435709, "learning_rate": 0.002, "loss": 2.3306, "step": 373720 }, { "epoch": 1.4447356620432652, "grad_norm": 0.10939621925354004, "learning_rate": 0.002, "loss": 2.3473, "step": 373730 }, { "epoch": 1.4447743192466485, "grad_norm": 0.10100575536489487, "learning_rate": 0.002, "loss": 2.3405, "step": 373740 }, { "epoch": 1.4448129764500317, "grad_norm": 0.10216158628463745, "learning_rate": 0.002, "loss": 2.3495, "step": 373750 }, { "epoch": 1.444851633653415, "grad_norm": 0.1268494725227356, "learning_rate": 0.002, "loss": 2.3337, "step": 373760 }, { "epoch": 1.4448902908567982, "grad_norm": 0.10251862555742264, "learning_rate": 0.002, "loss": 2.3474, "step": 373770 }, { "epoch": 1.4449289480601815, "grad_norm": 0.1032252311706543, "learning_rate": 0.002, "loss": 2.3386, "step": 373780 }, { "epoch": 1.4449676052635647, "grad_norm": 0.09969396889209747, "learning_rate": 0.002, "loss": 2.334, "step": 373790 }, { "epoch": 1.445006262466948, "grad_norm": 0.11264482885599136, "learning_rate": 0.002, "loss": 2.3368, "step": 373800 }, { "epoch": 1.4450449196703314, "grad_norm": 0.10568080842494965, "learning_rate": 0.002, "loss": 2.3362, "step": 373810 }, { "epoch": 1.4450835768737147, "grad_norm": 0.09127136319875717, "learning_rate": 0.002, "loss": 2.343, "step": 373820 }, { "epoch": 1.445122234077098, "grad_norm": 0.10587592422962189, "learning_rate": 0.002, "loss": 2.3273, "step": 373830 }, { "epoch": 1.4451608912804812, "grad_norm": 0.11365770548582077, "learning_rate": 0.002, "loss": 2.3302, "step": 373840 }, { "epoch": 1.4451995484838644, "grad_norm": 0.11311656981706619, "learning_rate": 0.002, "loss": 2.3463, "step": 373850 }, { "epoch": 1.4452382056872477, "grad_norm": 0.10600202530622482, "learning_rate": 0.002, "loss": 2.3233, "step": 373860 }, { "epoch": 1.4452768628906312, "grad_norm": 0.12684810161590576, "learning_rate": 0.002, "loss": 2.3245, "step": 373870 }, { "epoch": 1.4453155200940144, "grad_norm": 0.11892486363649368, "learning_rate": 0.002, "loss": 2.3182, "step": 373880 }, { "epoch": 1.4453541772973977, "grad_norm": 0.09933153539896011, "learning_rate": 0.002, "loss": 2.3354, "step": 373890 }, { "epoch": 1.445392834500781, "grad_norm": 0.11293446272611618, "learning_rate": 0.002, "loss": 2.3471, "step": 373900 }, { "epoch": 1.4454314917041642, "grad_norm": 0.1038590744137764, "learning_rate": 0.002, "loss": 2.322, "step": 373910 }, { "epoch": 1.4454701489075474, "grad_norm": 0.11509344726800919, "learning_rate": 0.002, "loss": 2.333, "step": 373920 }, { "epoch": 1.4455088061109307, "grad_norm": 0.0943727046251297, "learning_rate": 0.002, "loss": 2.3236, "step": 373930 }, { "epoch": 1.445547463314314, "grad_norm": 0.10036980360746384, "learning_rate": 0.002, "loss": 2.3255, "step": 373940 }, { "epoch": 1.4455861205176972, "grad_norm": 0.11143596470355988, "learning_rate": 0.002, "loss": 2.3364, "step": 373950 }, { "epoch": 1.4456247777210804, "grad_norm": 0.10541340708732605, "learning_rate": 0.002, "loss": 2.3253, "step": 373960 }, { "epoch": 1.4456634349244637, "grad_norm": 0.10758457332849503, "learning_rate": 0.002, "loss": 2.3346, "step": 373970 }, { "epoch": 1.4457020921278472, "grad_norm": 0.12086843699216843, "learning_rate": 0.002, "loss": 2.3389, "step": 373980 }, { "epoch": 1.4457407493312304, "grad_norm": 0.09697914123535156, "learning_rate": 0.002, "loss": 2.3433, "step": 373990 }, { "epoch": 1.4457794065346137, "grad_norm": 0.09337199479341507, "learning_rate": 0.002, "loss": 2.3191, "step": 374000 }, { "epoch": 1.445818063737997, "grad_norm": 0.14395758509635925, "learning_rate": 0.002, "loss": 2.3344, "step": 374010 }, { "epoch": 1.4458567209413802, "grad_norm": 0.11079844832420349, "learning_rate": 0.002, "loss": 2.3355, "step": 374020 }, { "epoch": 1.4458953781447634, "grad_norm": 0.1177358403801918, "learning_rate": 0.002, "loss": 2.327, "step": 374030 }, { "epoch": 1.445934035348147, "grad_norm": 0.10449257493019104, "learning_rate": 0.002, "loss": 2.3178, "step": 374040 }, { "epoch": 1.4459726925515302, "grad_norm": 0.10623761266469955, "learning_rate": 0.002, "loss": 2.3429, "step": 374050 }, { "epoch": 1.4460113497549134, "grad_norm": 0.11113753914833069, "learning_rate": 0.002, "loss": 2.3408, "step": 374060 }, { "epoch": 1.4460500069582967, "grad_norm": 0.10962066054344177, "learning_rate": 0.002, "loss": 2.3208, "step": 374070 }, { "epoch": 1.44608866416168, "grad_norm": 0.11375898867845535, "learning_rate": 0.002, "loss": 2.3277, "step": 374080 }, { "epoch": 1.4461273213650632, "grad_norm": 0.08839374035596848, "learning_rate": 0.002, "loss": 2.3213, "step": 374090 }, { "epoch": 1.4461659785684464, "grad_norm": 0.11471235752105713, "learning_rate": 0.002, "loss": 2.3525, "step": 374100 }, { "epoch": 1.4462046357718297, "grad_norm": 0.10845217853784561, "learning_rate": 0.002, "loss": 2.3458, "step": 374110 }, { "epoch": 1.446243292975213, "grad_norm": 0.10325498133897781, "learning_rate": 0.002, "loss": 2.3433, "step": 374120 }, { "epoch": 1.4462819501785962, "grad_norm": 0.0919291228055954, "learning_rate": 0.002, "loss": 2.3454, "step": 374130 }, { "epoch": 1.4463206073819794, "grad_norm": 0.10365841537714005, "learning_rate": 0.002, "loss": 2.3486, "step": 374140 }, { "epoch": 1.446359264585363, "grad_norm": 0.0947713851928711, "learning_rate": 0.002, "loss": 2.327, "step": 374150 }, { "epoch": 1.4463979217887462, "grad_norm": 0.11603802442550659, "learning_rate": 0.002, "loss": 2.3307, "step": 374160 }, { "epoch": 1.4464365789921294, "grad_norm": 0.10967438668012619, "learning_rate": 0.002, "loss": 2.3296, "step": 374170 }, { "epoch": 1.4464752361955127, "grad_norm": 0.11252371966838837, "learning_rate": 0.002, "loss": 2.3391, "step": 374180 }, { "epoch": 1.446513893398896, "grad_norm": 0.09533124417066574, "learning_rate": 0.002, "loss": 2.3407, "step": 374190 }, { "epoch": 1.4465525506022792, "grad_norm": 0.10764352977275848, "learning_rate": 0.002, "loss": 2.3315, "step": 374200 }, { "epoch": 1.4465912078056626, "grad_norm": 0.10111424326896667, "learning_rate": 0.002, "loss": 2.3398, "step": 374210 }, { "epoch": 1.446629865009046, "grad_norm": 0.11522792279720306, "learning_rate": 0.002, "loss": 2.351, "step": 374220 }, { "epoch": 1.4466685222124291, "grad_norm": 0.09983590245246887, "learning_rate": 0.002, "loss": 2.3311, "step": 374230 }, { "epoch": 1.4467071794158124, "grad_norm": 0.10484399646520615, "learning_rate": 0.002, "loss": 2.3202, "step": 374240 }, { "epoch": 1.4467458366191956, "grad_norm": 0.12569187581539154, "learning_rate": 0.002, "loss": 2.3381, "step": 374250 }, { "epoch": 1.446784493822579, "grad_norm": 0.10629899054765701, "learning_rate": 0.002, "loss": 2.3385, "step": 374260 }, { "epoch": 1.4468231510259622, "grad_norm": 0.10669715702533722, "learning_rate": 0.002, "loss": 2.3303, "step": 374270 }, { "epoch": 1.4468618082293454, "grad_norm": 0.1442335695028305, "learning_rate": 0.002, "loss": 2.3344, "step": 374280 }, { "epoch": 1.4469004654327287, "grad_norm": 0.1721041202545166, "learning_rate": 0.002, "loss": 2.3526, "step": 374290 }, { "epoch": 1.446939122636112, "grad_norm": 0.1116456538438797, "learning_rate": 0.002, "loss": 2.3317, "step": 374300 }, { "epoch": 1.4469777798394952, "grad_norm": 0.11294256150722504, "learning_rate": 0.002, "loss": 2.3395, "step": 374310 }, { "epoch": 1.4470164370428786, "grad_norm": 0.08833135664463043, "learning_rate": 0.002, "loss": 2.3265, "step": 374320 }, { "epoch": 1.4470550942462619, "grad_norm": 0.09844326227903366, "learning_rate": 0.002, "loss": 2.329, "step": 374330 }, { "epoch": 1.4470937514496451, "grad_norm": 0.1368217021226883, "learning_rate": 0.002, "loss": 2.3388, "step": 374340 }, { "epoch": 1.4471324086530284, "grad_norm": 0.1035735234618187, "learning_rate": 0.002, "loss": 2.3357, "step": 374350 }, { "epoch": 1.4471710658564116, "grad_norm": 0.09134948998689651, "learning_rate": 0.002, "loss": 2.334, "step": 374360 }, { "epoch": 1.447209723059795, "grad_norm": 0.11816652864217758, "learning_rate": 0.002, "loss": 2.3366, "step": 374370 }, { "epoch": 1.4472483802631784, "grad_norm": 0.1168443039059639, "learning_rate": 0.002, "loss": 2.3345, "step": 374380 }, { "epoch": 1.4472870374665616, "grad_norm": 0.0934310182929039, "learning_rate": 0.002, "loss": 2.3372, "step": 374390 }, { "epoch": 1.4473256946699449, "grad_norm": 0.10161089152097702, "learning_rate": 0.002, "loss": 2.3264, "step": 374400 }, { "epoch": 1.4473643518733281, "grad_norm": 0.10577352344989777, "learning_rate": 0.002, "loss": 2.3205, "step": 374410 }, { "epoch": 1.4474030090767114, "grad_norm": 0.10527346283197403, "learning_rate": 0.002, "loss": 2.3448, "step": 374420 }, { "epoch": 1.4474416662800946, "grad_norm": 0.09274306893348694, "learning_rate": 0.002, "loss": 2.3429, "step": 374430 }, { "epoch": 1.4474803234834779, "grad_norm": 0.10285604000091553, "learning_rate": 0.002, "loss": 2.344, "step": 374440 }, { "epoch": 1.4475189806868611, "grad_norm": 0.11490141600370407, "learning_rate": 0.002, "loss": 2.3291, "step": 374450 }, { "epoch": 1.4475576378902444, "grad_norm": 0.1002807691693306, "learning_rate": 0.002, "loss": 2.3345, "step": 374460 }, { "epoch": 1.4475962950936276, "grad_norm": 0.10294970870018005, "learning_rate": 0.002, "loss": 2.3317, "step": 374470 }, { "epoch": 1.447634952297011, "grad_norm": 0.10106101632118225, "learning_rate": 0.002, "loss": 2.34, "step": 374480 }, { "epoch": 1.4476736095003944, "grad_norm": 0.12514422833919525, "learning_rate": 0.002, "loss": 2.337, "step": 374490 }, { "epoch": 1.4477122667037776, "grad_norm": 0.10771064460277557, "learning_rate": 0.002, "loss": 2.332, "step": 374500 }, { "epoch": 1.4477509239071609, "grad_norm": 0.11269853264093399, "learning_rate": 0.002, "loss": 2.3406, "step": 374510 }, { "epoch": 1.4477895811105441, "grad_norm": 0.10505794733762741, "learning_rate": 0.002, "loss": 2.3377, "step": 374520 }, { "epoch": 1.4478282383139274, "grad_norm": 0.08940539509057999, "learning_rate": 0.002, "loss": 2.3332, "step": 374530 }, { "epoch": 1.4478668955173106, "grad_norm": 0.10429862141609192, "learning_rate": 0.002, "loss": 2.3349, "step": 374540 }, { "epoch": 1.447905552720694, "grad_norm": 0.09255179017782211, "learning_rate": 0.002, "loss": 2.3493, "step": 374550 }, { "epoch": 1.4479442099240774, "grad_norm": 0.13329355418682098, "learning_rate": 0.002, "loss": 2.3428, "step": 374560 }, { "epoch": 1.4479828671274606, "grad_norm": 0.10370327532291412, "learning_rate": 0.002, "loss": 2.3466, "step": 374570 }, { "epoch": 1.4480215243308439, "grad_norm": 0.10171709209680557, "learning_rate": 0.002, "loss": 2.3521, "step": 374580 }, { "epoch": 1.448060181534227, "grad_norm": 0.1019992083311081, "learning_rate": 0.002, "loss": 2.3268, "step": 374590 }, { "epoch": 1.4480988387376104, "grad_norm": 0.10621504485607147, "learning_rate": 0.002, "loss": 2.3215, "step": 374600 }, { "epoch": 1.4481374959409936, "grad_norm": 0.0987897738814354, "learning_rate": 0.002, "loss": 2.3322, "step": 374610 }, { "epoch": 1.4481761531443769, "grad_norm": 0.11098815500736237, "learning_rate": 0.002, "loss": 2.3443, "step": 374620 }, { "epoch": 1.4482148103477601, "grad_norm": 0.09835632145404816, "learning_rate": 0.002, "loss": 2.3305, "step": 374630 }, { "epoch": 1.4482534675511434, "grad_norm": 0.11043904721736908, "learning_rate": 0.002, "loss": 2.3239, "step": 374640 }, { "epoch": 1.4482921247545266, "grad_norm": 0.1077338308095932, "learning_rate": 0.002, "loss": 2.3298, "step": 374650 }, { "epoch": 1.44833078195791, "grad_norm": 0.10682597011327744, "learning_rate": 0.002, "loss": 2.3446, "step": 374660 }, { "epoch": 1.4483694391612933, "grad_norm": 0.10108894109725952, "learning_rate": 0.002, "loss": 2.326, "step": 374670 }, { "epoch": 1.4484080963646766, "grad_norm": 0.10345392674207687, "learning_rate": 0.002, "loss": 2.3351, "step": 374680 }, { "epoch": 1.4484467535680599, "grad_norm": 0.09838715195655823, "learning_rate": 0.002, "loss": 2.3363, "step": 374690 }, { "epoch": 1.448485410771443, "grad_norm": 0.08886837959289551, "learning_rate": 0.002, "loss": 2.328, "step": 374700 }, { "epoch": 1.4485240679748264, "grad_norm": 0.09721381217241287, "learning_rate": 0.002, "loss": 2.3211, "step": 374710 }, { "epoch": 1.4485627251782098, "grad_norm": 0.09373707324266434, "learning_rate": 0.002, "loss": 2.3358, "step": 374720 }, { "epoch": 1.448601382381593, "grad_norm": 0.10279977321624756, "learning_rate": 0.002, "loss": 2.3399, "step": 374730 }, { "epoch": 1.4486400395849763, "grad_norm": 0.11030231416225433, "learning_rate": 0.002, "loss": 2.3298, "step": 374740 }, { "epoch": 1.4486786967883596, "grad_norm": 0.18042241036891937, "learning_rate": 0.002, "loss": 2.3279, "step": 374750 }, { "epoch": 1.4487173539917428, "grad_norm": 0.11614906787872314, "learning_rate": 0.002, "loss": 2.3271, "step": 374760 }, { "epoch": 1.448756011195126, "grad_norm": 0.1080334410071373, "learning_rate": 0.002, "loss": 2.3272, "step": 374770 }, { "epoch": 1.4487946683985093, "grad_norm": 0.11193375289440155, "learning_rate": 0.002, "loss": 2.3375, "step": 374780 }, { "epoch": 1.4488333256018926, "grad_norm": 0.10424870997667313, "learning_rate": 0.002, "loss": 2.338, "step": 374790 }, { "epoch": 1.4488719828052758, "grad_norm": 0.12174300104379654, "learning_rate": 0.002, "loss": 2.3271, "step": 374800 }, { "epoch": 1.448910640008659, "grad_norm": 0.09835692495107651, "learning_rate": 0.002, "loss": 2.3521, "step": 374810 }, { "epoch": 1.4489492972120426, "grad_norm": 0.09823215752840042, "learning_rate": 0.002, "loss": 2.3285, "step": 374820 }, { "epoch": 1.4489879544154258, "grad_norm": 0.10451362282037735, "learning_rate": 0.002, "loss": 2.328, "step": 374830 }, { "epoch": 1.449026611618809, "grad_norm": 0.0901821181178093, "learning_rate": 0.002, "loss": 2.3428, "step": 374840 }, { "epoch": 1.4490652688221923, "grad_norm": 0.09381404519081116, "learning_rate": 0.002, "loss": 2.3551, "step": 374850 }, { "epoch": 1.4491039260255756, "grad_norm": 0.0980507880449295, "learning_rate": 0.002, "loss": 2.3277, "step": 374860 }, { "epoch": 1.4491425832289588, "grad_norm": 0.11305723339319229, "learning_rate": 0.002, "loss": 2.3465, "step": 374870 }, { "epoch": 1.449181240432342, "grad_norm": 0.11033497005701065, "learning_rate": 0.002, "loss": 2.3241, "step": 374880 }, { "epoch": 1.4492198976357256, "grad_norm": 0.1097717210650444, "learning_rate": 0.002, "loss": 2.3401, "step": 374890 }, { "epoch": 1.4492585548391088, "grad_norm": 0.11004069447517395, "learning_rate": 0.002, "loss": 2.3276, "step": 374900 }, { "epoch": 1.449297212042492, "grad_norm": 0.10695581883192062, "learning_rate": 0.002, "loss": 2.3407, "step": 374910 }, { "epoch": 1.4493358692458753, "grad_norm": 0.1037338525056839, "learning_rate": 0.002, "loss": 2.3255, "step": 374920 }, { "epoch": 1.4493745264492586, "grad_norm": 0.1020689606666565, "learning_rate": 0.002, "loss": 2.3322, "step": 374930 }, { "epoch": 1.4494131836526418, "grad_norm": 0.11201310902833939, "learning_rate": 0.002, "loss": 2.3398, "step": 374940 }, { "epoch": 1.449451840856025, "grad_norm": 0.09727621078491211, "learning_rate": 0.002, "loss": 2.3164, "step": 374950 }, { "epoch": 1.4494904980594083, "grad_norm": 0.10292645543813705, "learning_rate": 0.002, "loss": 2.3371, "step": 374960 }, { "epoch": 1.4495291552627916, "grad_norm": 0.09569789469242096, "learning_rate": 0.002, "loss": 2.341, "step": 374970 }, { "epoch": 1.4495678124661748, "grad_norm": 0.09703312069177628, "learning_rate": 0.002, "loss": 2.3414, "step": 374980 }, { "epoch": 1.4496064696695583, "grad_norm": 0.10222285240888596, "learning_rate": 0.002, "loss": 2.3256, "step": 374990 }, { "epoch": 1.4496451268729416, "grad_norm": 0.11462683975696564, "learning_rate": 0.002, "loss": 2.3392, "step": 375000 }, { "epoch": 1.4496837840763248, "grad_norm": 0.10431713610887527, "learning_rate": 0.002, "loss": 2.3274, "step": 375010 }, { "epoch": 1.449722441279708, "grad_norm": 0.09858272969722748, "learning_rate": 0.002, "loss": 2.3478, "step": 375020 }, { "epoch": 1.4497610984830913, "grad_norm": 0.12057144939899445, "learning_rate": 0.002, "loss": 2.3345, "step": 375030 }, { "epoch": 1.4497997556864746, "grad_norm": 0.10652951151132584, "learning_rate": 0.002, "loss": 2.3385, "step": 375040 }, { "epoch": 1.4498384128898578, "grad_norm": 0.1226535513997078, "learning_rate": 0.002, "loss": 2.333, "step": 375050 }, { "epoch": 1.4498770700932413, "grad_norm": 0.09500709921121597, "learning_rate": 0.002, "loss": 2.3354, "step": 375060 }, { "epoch": 1.4499157272966245, "grad_norm": 0.10096275806427002, "learning_rate": 0.002, "loss": 2.3286, "step": 375070 }, { "epoch": 1.4499543845000078, "grad_norm": 0.09361151605844498, "learning_rate": 0.002, "loss": 2.3229, "step": 375080 }, { "epoch": 1.449993041703391, "grad_norm": 0.09544364362955093, "learning_rate": 0.002, "loss": 2.3195, "step": 375090 }, { "epoch": 1.4500316989067743, "grad_norm": 0.1043727919459343, "learning_rate": 0.002, "loss": 2.341, "step": 375100 }, { "epoch": 1.4500703561101576, "grad_norm": 0.17423953115940094, "learning_rate": 0.002, "loss": 2.3478, "step": 375110 }, { "epoch": 1.4501090133135408, "grad_norm": 0.10786591470241547, "learning_rate": 0.002, "loss": 2.3409, "step": 375120 }, { "epoch": 1.450147670516924, "grad_norm": 0.10140660405158997, "learning_rate": 0.002, "loss": 2.3429, "step": 375130 }, { "epoch": 1.4501863277203073, "grad_norm": 0.0983930379152298, "learning_rate": 0.002, "loss": 2.3311, "step": 375140 }, { "epoch": 1.4502249849236906, "grad_norm": 0.10323572158813477, "learning_rate": 0.002, "loss": 2.3385, "step": 375150 }, { "epoch": 1.450263642127074, "grad_norm": 0.10888148099184036, "learning_rate": 0.002, "loss": 2.3378, "step": 375160 }, { "epoch": 1.4503022993304573, "grad_norm": 0.12469719350337982, "learning_rate": 0.002, "loss": 2.3316, "step": 375170 }, { "epoch": 1.4503409565338405, "grad_norm": 0.08669634163379669, "learning_rate": 0.002, "loss": 2.3364, "step": 375180 }, { "epoch": 1.4503796137372238, "grad_norm": 0.09478214383125305, "learning_rate": 0.002, "loss": 2.3373, "step": 375190 }, { "epoch": 1.450418270940607, "grad_norm": 0.1561504751443863, "learning_rate": 0.002, "loss": 2.3234, "step": 375200 }, { "epoch": 1.4504569281439903, "grad_norm": 0.09788401424884796, "learning_rate": 0.002, "loss": 2.3395, "step": 375210 }, { "epoch": 1.4504955853473736, "grad_norm": 0.10349985957145691, "learning_rate": 0.002, "loss": 2.3277, "step": 375220 }, { "epoch": 1.450534242550757, "grad_norm": 0.8829423189163208, "learning_rate": 0.002, "loss": 2.3356, "step": 375230 }, { "epoch": 1.4505728997541403, "grad_norm": 0.12734265625476837, "learning_rate": 0.002, "loss": 2.3317, "step": 375240 }, { "epoch": 1.4506115569575235, "grad_norm": 0.1019926369190216, "learning_rate": 0.002, "loss": 2.3514, "step": 375250 }, { "epoch": 1.4506502141609068, "grad_norm": 0.10075262188911438, "learning_rate": 0.002, "loss": 2.3318, "step": 375260 }, { "epoch": 1.45068887136429, "grad_norm": 0.09703978151082993, "learning_rate": 0.002, "loss": 2.349, "step": 375270 }, { "epoch": 1.4507275285676733, "grad_norm": 0.10557262599468231, "learning_rate": 0.002, "loss": 2.334, "step": 375280 }, { "epoch": 1.4507661857710565, "grad_norm": 0.10136254876852036, "learning_rate": 0.002, "loss": 2.3319, "step": 375290 }, { "epoch": 1.4508048429744398, "grad_norm": 0.09263923764228821, "learning_rate": 0.002, "loss": 2.3361, "step": 375300 }, { "epoch": 1.450843500177823, "grad_norm": 0.12233812361955643, "learning_rate": 0.002, "loss": 2.3217, "step": 375310 }, { "epoch": 1.4508821573812063, "grad_norm": 0.10406205803155899, "learning_rate": 0.002, "loss": 2.3373, "step": 375320 }, { "epoch": 1.4509208145845898, "grad_norm": 0.11512085795402527, "learning_rate": 0.002, "loss": 2.3369, "step": 375330 }, { "epoch": 1.450959471787973, "grad_norm": 0.10936763137578964, "learning_rate": 0.002, "loss": 2.3313, "step": 375340 }, { "epoch": 1.4509981289913563, "grad_norm": 0.12254436314105988, "learning_rate": 0.002, "loss": 2.3378, "step": 375350 }, { "epoch": 1.4510367861947395, "grad_norm": 0.10936383157968521, "learning_rate": 0.002, "loss": 2.3359, "step": 375360 }, { "epoch": 1.4510754433981228, "grad_norm": 0.1023615226149559, "learning_rate": 0.002, "loss": 2.3214, "step": 375370 }, { "epoch": 1.451114100601506, "grad_norm": 0.0934467613697052, "learning_rate": 0.002, "loss": 2.3392, "step": 375380 }, { "epoch": 1.4511527578048893, "grad_norm": 0.09618617594242096, "learning_rate": 0.002, "loss": 2.3425, "step": 375390 }, { "epoch": 1.4511914150082728, "grad_norm": 0.12151792645454407, "learning_rate": 0.002, "loss": 2.3391, "step": 375400 }, { "epoch": 1.451230072211656, "grad_norm": 0.09098203480243683, "learning_rate": 0.002, "loss": 2.3297, "step": 375410 }, { "epoch": 1.4512687294150393, "grad_norm": 0.14736823737621307, "learning_rate": 0.002, "loss": 2.3399, "step": 375420 }, { "epoch": 1.4513073866184225, "grad_norm": 0.10443034023046494, "learning_rate": 0.002, "loss": 2.3302, "step": 375430 }, { "epoch": 1.4513460438218058, "grad_norm": 0.103264719247818, "learning_rate": 0.002, "loss": 2.3476, "step": 375440 }, { "epoch": 1.451384701025189, "grad_norm": 0.12326298654079437, "learning_rate": 0.002, "loss": 2.3257, "step": 375450 }, { "epoch": 1.4514233582285723, "grad_norm": 0.10376644879579544, "learning_rate": 0.002, "loss": 2.3363, "step": 375460 }, { "epoch": 1.4514620154319555, "grad_norm": 0.12066710740327835, "learning_rate": 0.002, "loss": 2.3354, "step": 375470 }, { "epoch": 1.4515006726353388, "grad_norm": 0.09960687160491943, "learning_rate": 0.002, "loss": 2.3308, "step": 375480 }, { "epoch": 1.451539329838722, "grad_norm": 0.10828446596860886, "learning_rate": 0.002, "loss": 2.3301, "step": 375490 }, { "epoch": 1.4515779870421055, "grad_norm": 0.09412367641925812, "learning_rate": 0.002, "loss": 2.3225, "step": 375500 }, { "epoch": 1.4516166442454888, "grad_norm": 0.11398924887180328, "learning_rate": 0.002, "loss": 2.3339, "step": 375510 }, { "epoch": 1.451655301448872, "grad_norm": 0.10298270732164383, "learning_rate": 0.002, "loss": 2.3403, "step": 375520 }, { "epoch": 1.4516939586522553, "grad_norm": 0.16039882600307465, "learning_rate": 0.002, "loss": 2.3352, "step": 375530 }, { "epoch": 1.4517326158556385, "grad_norm": 0.08908867835998535, "learning_rate": 0.002, "loss": 2.3465, "step": 375540 }, { "epoch": 1.4517712730590218, "grad_norm": 0.11916134506464005, "learning_rate": 0.002, "loss": 2.3327, "step": 375550 }, { "epoch": 1.4518099302624052, "grad_norm": 0.11261797696352005, "learning_rate": 0.002, "loss": 2.3388, "step": 375560 }, { "epoch": 1.4518485874657885, "grad_norm": 0.10552900284528732, "learning_rate": 0.002, "loss": 2.3267, "step": 375570 }, { "epoch": 1.4518872446691717, "grad_norm": 0.10971780866384506, "learning_rate": 0.002, "loss": 2.3311, "step": 375580 }, { "epoch": 1.451925901872555, "grad_norm": 0.10335514694452286, "learning_rate": 0.002, "loss": 2.3357, "step": 375590 }, { "epoch": 1.4519645590759382, "grad_norm": 0.11158467084169388, "learning_rate": 0.002, "loss": 2.3183, "step": 375600 }, { "epoch": 1.4520032162793215, "grad_norm": 0.10805515944957733, "learning_rate": 0.002, "loss": 2.3263, "step": 375610 }, { "epoch": 1.4520418734827047, "grad_norm": 0.09409262239933014, "learning_rate": 0.002, "loss": 2.3196, "step": 375620 }, { "epoch": 1.452080530686088, "grad_norm": 0.08583605289459229, "learning_rate": 0.002, "loss": 2.3344, "step": 375630 }, { "epoch": 1.4521191878894713, "grad_norm": 0.0960557758808136, "learning_rate": 0.002, "loss": 2.3273, "step": 375640 }, { "epoch": 1.4521578450928545, "grad_norm": 0.10765881836414337, "learning_rate": 0.002, "loss": 2.3264, "step": 375650 }, { "epoch": 1.4521965022962378, "grad_norm": 0.09392059594392776, "learning_rate": 0.002, "loss": 2.3335, "step": 375660 }, { "epoch": 1.4522351594996212, "grad_norm": 0.10155150294303894, "learning_rate": 0.002, "loss": 2.346, "step": 375670 }, { "epoch": 1.4522738167030045, "grad_norm": 0.09984404593706131, "learning_rate": 0.002, "loss": 2.3288, "step": 375680 }, { "epoch": 1.4523124739063877, "grad_norm": 0.10842925310134888, "learning_rate": 0.002, "loss": 2.3172, "step": 375690 }, { "epoch": 1.452351131109771, "grad_norm": 0.0975777730345726, "learning_rate": 0.002, "loss": 2.3283, "step": 375700 }, { "epoch": 1.4523897883131542, "grad_norm": 0.1043943464756012, "learning_rate": 0.002, "loss": 2.3217, "step": 375710 }, { "epoch": 1.4524284455165375, "grad_norm": 0.09681592136621475, "learning_rate": 0.002, "loss": 2.3228, "step": 375720 }, { "epoch": 1.452467102719921, "grad_norm": 0.09512980282306671, "learning_rate": 0.002, "loss": 2.3239, "step": 375730 }, { "epoch": 1.4525057599233042, "grad_norm": 0.09035705775022507, "learning_rate": 0.002, "loss": 2.3391, "step": 375740 }, { "epoch": 1.4525444171266875, "grad_norm": 0.10851699858903885, "learning_rate": 0.002, "loss": 2.3436, "step": 375750 }, { "epoch": 1.4525830743300707, "grad_norm": 0.10928475111722946, "learning_rate": 0.002, "loss": 2.337, "step": 375760 }, { "epoch": 1.452621731533454, "grad_norm": 0.08919432759284973, "learning_rate": 0.002, "loss": 2.3331, "step": 375770 }, { "epoch": 1.4526603887368372, "grad_norm": 0.10121583938598633, "learning_rate": 0.002, "loss": 2.3301, "step": 375780 }, { "epoch": 1.4526990459402205, "grad_norm": 0.10289706289768219, "learning_rate": 0.002, "loss": 2.3442, "step": 375790 }, { "epoch": 1.4527377031436037, "grad_norm": 0.1273885816335678, "learning_rate": 0.002, "loss": 2.3265, "step": 375800 }, { "epoch": 1.452776360346987, "grad_norm": 0.10660862177610397, "learning_rate": 0.002, "loss": 2.335, "step": 375810 }, { "epoch": 1.4528150175503702, "grad_norm": 0.09819815307855606, "learning_rate": 0.002, "loss": 2.3319, "step": 375820 }, { "epoch": 1.4528536747537535, "grad_norm": 0.0908627137541771, "learning_rate": 0.002, "loss": 2.3406, "step": 375830 }, { "epoch": 1.452892331957137, "grad_norm": 0.11086965352296829, "learning_rate": 0.002, "loss": 2.3295, "step": 375840 }, { "epoch": 1.4529309891605202, "grad_norm": 0.11052382737398148, "learning_rate": 0.002, "loss": 2.3502, "step": 375850 }, { "epoch": 1.4529696463639035, "grad_norm": 0.09581246227025986, "learning_rate": 0.002, "loss": 2.3305, "step": 375860 }, { "epoch": 1.4530083035672867, "grad_norm": 0.13408619165420532, "learning_rate": 0.002, "loss": 2.3236, "step": 375870 }, { "epoch": 1.45304696077067, "grad_norm": 0.10207131505012512, "learning_rate": 0.002, "loss": 2.3224, "step": 375880 }, { "epoch": 1.4530856179740532, "grad_norm": 0.10608415305614471, "learning_rate": 0.002, "loss": 2.3287, "step": 375890 }, { "epoch": 1.4531242751774367, "grad_norm": 0.10444062203168869, "learning_rate": 0.002, "loss": 2.3275, "step": 375900 }, { "epoch": 1.45316293238082, "grad_norm": 0.10438374429941177, "learning_rate": 0.002, "loss": 2.321, "step": 375910 }, { "epoch": 1.4532015895842032, "grad_norm": 0.13011299073696136, "learning_rate": 0.002, "loss": 2.3462, "step": 375920 }, { "epoch": 1.4532402467875865, "grad_norm": 0.11127454042434692, "learning_rate": 0.002, "loss": 2.3259, "step": 375930 }, { "epoch": 1.4532789039909697, "grad_norm": 0.10193561762571335, "learning_rate": 0.002, "loss": 2.3311, "step": 375940 }, { "epoch": 1.453317561194353, "grad_norm": 0.14563407003879547, "learning_rate": 0.002, "loss": 2.3425, "step": 375950 }, { "epoch": 1.4533562183977362, "grad_norm": 0.10720785707235336, "learning_rate": 0.002, "loss": 2.3273, "step": 375960 }, { "epoch": 1.4533948756011195, "grad_norm": 0.13315080106258392, "learning_rate": 0.002, "loss": 2.3284, "step": 375970 }, { "epoch": 1.4534335328045027, "grad_norm": 0.09596723318099976, "learning_rate": 0.002, "loss": 2.326, "step": 375980 }, { "epoch": 1.453472190007886, "grad_norm": 0.09336968511343002, "learning_rate": 0.002, "loss": 2.3493, "step": 375990 }, { "epoch": 1.4535108472112692, "grad_norm": 0.10079991817474365, "learning_rate": 0.002, "loss": 2.3471, "step": 376000 }, { "epoch": 1.4535495044146527, "grad_norm": 0.1005874052643776, "learning_rate": 0.002, "loss": 2.3336, "step": 376010 }, { "epoch": 1.453588161618036, "grad_norm": 0.1043470948934555, "learning_rate": 0.002, "loss": 2.3298, "step": 376020 }, { "epoch": 1.4536268188214192, "grad_norm": 0.1530982404947281, "learning_rate": 0.002, "loss": 2.3308, "step": 376030 }, { "epoch": 1.4536654760248024, "grad_norm": 0.12257708609104156, "learning_rate": 0.002, "loss": 2.3339, "step": 376040 }, { "epoch": 1.4537041332281857, "grad_norm": 0.0920775905251503, "learning_rate": 0.002, "loss": 2.3445, "step": 376050 }, { "epoch": 1.453742790431569, "grad_norm": 0.09294003248214722, "learning_rate": 0.002, "loss": 2.3423, "step": 376060 }, { "epoch": 1.4537814476349524, "grad_norm": 0.09124291688203812, "learning_rate": 0.002, "loss": 2.3189, "step": 376070 }, { "epoch": 1.4538201048383357, "grad_norm": 0.11236456781625748, "learning_rate": 0.002, "loss": 2.3252, "step": 376080 }, { "epoch": 1.453858762041719, "grad_norm": 0.0982632264494896, "learning_rate": 0.002, "loss": 2.332, "step": 376090 }, { "epoch": 1.4538974192451022, "grad_norm": 0.09487993270158768, "learning_rate": 0.002, "loss": 2.3226, "step": 376100 }, { "epoch": 1.4539360764484854, "grad_norm": 0.10584530234336853, "learning_rate": 0.002, "loss": 2.3436, "step": 376110 }, { "epoch": 1.4539747336518687, "grad_norm": 0.11138825863599777, "learning_rate": 0.002, "loss": 2.3286, "step": 376120 }, { "epoch": 1.454013390855252, "grad_norm": 0.11696814000606537, "learning_rate": 0.002, "loss": 2.3349, "step": 376130 }, { "epoch": 1.4540520480586352, "grad_norm": 0.10326026380062103, "learning_rate": 0.002, "loss": 2.3356, "step": 376140 }, { "epoch": 1.4540907052620184, "grad_norm": 0.10197250545024872, "learning_rate": 0.002, "loss": 2.3473, "step": 376150 }, { "epoch": 1.4541293624654017, "grad_norm": 0.09561163932085037, "learning_rate": 0.002, "loss": 2.3342, "step": 376160 }, { "epoch": 1.454168019668785, "grad_norm": 0.1013605147600174, "learning_rate": 0.002, "loss": 2.3364, "step": 376170 }, { "epoch": 1.4542066768721684, "grad_norm": 0.1090017557144165, "learning_rate": 0.002, "loss": 2.333, "step": 376180 }, { "epoch": 1.4542453340755517, "grad_norm": 0.09583274275064468, "learning_rate": 0.002, "loss": 2.3351, "step": 376190 }, { "epoch": 1.454283991278935, "grad_norm": 0.11330226063728333, "learning_rate": 0.002, "loss": 2.3163, "step": 376200 }, { "epoch": 1.4543226484823182, "grad_norm": 0.19393405318260193, "learning_rate": 0.002, "loss": 2.3257, "step": 376210 }, { "epoch": 1.4543613056857014, "grad_norm": 0.08783680945634842, "learning_rate": 0.002, "loss": 2.3337, "step": 376220 }, { "epoch": 1.4543999628890847, "grad_norm": 0.09217392653226852, "learning_rate": 0.002, "loss": 2.307, "step": 376230 }, { "epoch": 1.4544386200924682, "grad_norm": 0.12929874658584595, "learning_rate": 0.002, "loss": 2.3251, "step": 376240 }, { "epoch": 1.4544772772958514, "grad_norm": 0.09602414816617966, "learning_rate": 0.002, "loss": 2.3202, "step": 376250 }, { "epoch": 1.4545159344992347, "grad_norm": 0.09557639807462692, "learning_rate": 0.002, "loss": 2.3365, "step": 376260 }, { "epoch": 1.454554591702618, "grad_norm": 0.10556343197822571, "learning_rate": 0.002, "loss": 2.3344, "step": 376270 }, { "epoch": 1.4545932489060012, "grad_norm": 0.12296749651432037, "learning_rate": 0.002, "loss": 2.3209, "step": 376280 }, { "epoch": 1.4546319061093844, "grad_norm": 0.10371430218219757, "learning_rate": 0.002, "loss": 2.321, "step": 376290 }, { "epoch": 1.4546705633127677, "grad_norm": 0.09034077078104019, "learning_rate": 0.002, "loss": 2.3209, "step": 376300 }, { "epoch": 1.454709220516151, "grad_norm": 0.10609731823205948, "learning_rate": 0.002, "loss": 2.3269, "step": 376310 }, { "epoch": 1.4547478777195342, "grad_norm": 0.11761943250894547, "learning_rate": 0.002, "loss": 2.3197, "step": 376320 }, { "epoch": 1.4547865349229174, "grad_norm": 0.10954983532428741, "learning_rate": 0.002, "loss": 2.3388, "step": 376330 }, { "epoch": 1.4548251921263007, "grad_norm": 0.09743141382932663, "learning_rate": 0.002, "loss": 2.3191, "step": 376340 }, { "epoch": 1.4548638493296842, "grad_norm": 0.10274147242307663, "learning_rate": 0.002, "loss": 2.3154, "step": 376350 }, { "epoch": 1.4549025065330674, "grad_norm": 0.10671969503164291, "learning_rate": 0.002, "loss": 2.3297, "step": 376360 }, { "epoch": 1.4549411637364507, "grad_norm": 0.09649311006069183, "learning_rate": 0.002, "loss": 2.3179, "step": 376370 }, { "epoch": 1.454979820939834, "grad_norm": 0.10887061059474945, "learning_rate": 0.002, "loss": 2.3414, "step": 376380 }, { "epoch": 1.4550184781432172, "grad_norm": 0.09647853672504425, "learning_rate": 0.002, "loss": 2.3283, "step": 376390 }, { "epoch": 1.4550571353466004, "grad_norm": 0.09214019030332565, "learning_rate": 0.002, "loss": 2.3219, "step": 376400 }, { "epoch": 1.455095792549984, "grad_norm": 0.13373766839504242, "learning_rate": 0.002, "loss": 2.337, "step": 376410 }, { "epoch": 1.4551344497533671, "grad_norm": 0.09883366525173187, "learning_rate": 0.002, "loss": 2.3376, "step": 376420 }, { "epoch": 1.4551731069567504, "grad_norm": 0.08454938232898712, "learning_rate": 0.002, "loss": 2.3304, "step": 376430 }, { "epoch": 1.4552117641601336, "grad_norm": 0.09244359284639359, "learning_rate": 0.002, "loss": 2.3309, "step": 376440 }, { "epoch": 1.455250421363517, "grad_norm": 0.11290130019187927, "learning_rate": 0.002, "loss": 2.3236, "step": 376450 }, { "epoch": 1.4552890785669002, "grad_norm": 0.12210394442081451, "learning_rate": 0.002, "loss": 2.3384, "step": 376460 }, { "epoch": 1.4553277357702834, "grad_norm": 0.10246371477842331, "learning_rate": 0.002, "loss": 2.3429, "step": 376470 }, { "epoch": 1.4553663929736667, "grad_norm": 0.09917712956666946, "learning_rate": 0.002, "loss": 2.3422, "step": 376480 }, { "epoch": 1.45540505017705, "grad_norm": 0.11077070236206055, "learning_rate": 0.002, "loss": 2.3277, "step": 376490 }, { "epoch": 1.4554437073804332, "grad_norm": 0.10274385660886765, "learning_rate": 0.002, "loss": 2.3348, "step": 376500 }, { "epoch": 1.4554823645838164, "grad_norm": 0.09519847482442856, "learning_rate": 0.002, "loss": 2.3361, "step": 376510 }, { "epoch": 1.4555210217871999, "grad_norm": 0.10795559734106064, "learning_rate": 0.002, "loss": 2.3372, "step": 376520 }, { "epoch": 1.4555596789905831, "grad_norm": 0.09167300164699554, "learning_rate": 0.002, "loss": 2.3304, "step": 376530 }, { "epoch": 1.4555983361939664, "grad_norm": 0.09464503079652786, "learning_rate": 0.002, "loss": 2.3373, "step": 376540 }, { "epoch": 1.4556369933973496, "grad_norm": 0.11465884000062943, "learning_rate": 0.002, "loss": 2.335, "step": 376550 }, { "epoch": 1.455675650600733, "grad_norm": 0.08918526023626328, "learning_rate": 0.002, "loss": 2.3379, "step": 376560 }, { "epoch": 1.4557143078041161, "grad_norm": 0.11020664125680923, "learning_rate": 0.002, "loss": 2.3287, "step": 376570 }, { "epoch": 1.4557529650074996, "grad_norm": 0.09470126032829285, "learning_rate": 0.002, "loss": 2.3381, "step": 376580 }, { "epoch": 1.4557916222108829, "grad_norm": 0.09877173602581024, "learning_rate": 0.002, "loss": 2.3368, "step": 376590 }, { "epoch": 1.4558302794142661, "grad_norm": 0.08799102902412415, "learning_rate": 0.002, "loss": 2.33, "step": 376600 }, { "epoch": 1.4558689366176494, "grad_norm": 0.09482432901859283, "learning_rate": 0.002, "loss": 2.3203, "step": 376610 }, { "epoch": 1.4559075938210326, "grad_norm": 0.12014751881361008, "learning_rate": 0.002, "loss": 2.3294, "step": 376620 }, { "epoch": 1.4559462510244159, "grad_norm": 0.09875979274511337, "learning_rate": 0.002, "loss": 2.3308, "step": 376630 }, { "epoch": 1.4559849082277991, "grad_norm": 0.09889055788516998, "learning_rate": 0.002, "loss": 2.3344, "step": 376640 }, { "epoch": 1.4560235654311824, "grad_norm": 0.10025811195373535, "learning_rate": 0.002, "loss": 2.3333, "step": 376650 }, { "epoch": 1.4560622226345656, "grad_norm": 0.1114993616938591, "learning_rate": 0.002, "loss": 2.3342, "step": 376660 }, { "epoch": 1.456100879837949, "grad_norm": 0.10707735270261765, "learning_rate": 0.002, "loss": 2.3268, "step": 376670 }, { "epoch": 1.4561395370413321, "grad_norm": 0.1128287985920906, "learning_rate": 0.002, "loss": 2.3227, "step": 376680 }, { "epoch": 1.4561781942447156, "grad_norm": 0.09880905598402023, "learning_rate": 0.002, "loss": 2.3287, "step": 376690 }, { "epoch": 1.4562168514480989, "grad_norm": 0.120734304189682, "learning_rate": 0.002, "loss": 2.3348, "step": 376700 }, { "epoch": 1.4562555086514821, "grad_norm": 0.0999571681022644, "learning_rate": 0.002, "loss": 2.3418, "step": 376710 }, { "epoch": 1.4562941658548654, "grad_norm": 0.1232922375202179, "learning_rate": 0.002, "loss": 2.3266, "step": 376720 }, { "epoch": 1.4563328230582486, "grad_norm": 0.1032818928360939, "learning_rate": 0.002, "loss": 2.344, "step": 376730 }, { "epoch": 1.4563714802616319, "grad_norm": 0.0872046947479248, "learning_rate": 0.002, "loss": 2.3282, "step": 376740 }, { "epoch": 1.4564101374650154, "grad_norm": 0.09458109736442566, "learning_rate": 0.002, "loss": 2.3253, "step": 376750 }, { "epoch": 1.4564487946683986, "grad_norm": 0.11395496875047684, "learning_rate": 0.002, "loss": 2.3446, "step": 376760 }, { "epoch": 1.4564874518717819, "grad_norm": 0.09434529393911362, "learning_rate": 0.002, "loss": 2.3223, "step": 376770 }, { "epoch": 1.456526109075165, "grad_norm": 0.1036214679479599, "learning_rate": 0.002, "loss": 2.3356, "step": 376780 }, { "epoch": 1.4565647662785484, "grad_norm": 0.12814341485500336, "learning_rate": 0.002, "loss": 2.3333, "step": 376790 }, { "epoch": 1.4566034234819316, "grad_norm": 0.0936191976070404, "learning_rate": 0.002, "loss": 2.3252, "step": 376800 }, { "epoch": 1.4566420806853149, "grad_norm": 0.10926380008459091, "learning_rate": 0.002, "loss": 2.3379, "step": 376810 }, { "epoch": 1.4566807378886981, "grad_norm": 0.0996718481183052, "learning_rate": 0.002, "loss": 2.3402, "step": 376820 }, { "epoch": 1.4567193950920814, "grad_norm": 0.11856534332036972, "learning_rate": 0.002, "loss": 2.3249, "step": 376830 }, { "epoch": 1.4567580522954646, "grad_norm": 0.08964511752128601, "learning_rate": 0.002, "loss": 2.3354, "step": 376840 }, { "epoch": 1.456796709498848, "grad_norm": 0.10973100364208221, "learning_rate": 0.002, "loss": 2.3355, "step": 376850 }, { "epoch": 1.4568353667022313, "grad_norm": 0.09312520176172256, "learning_rate": 0.002, "loss": 2.3338, "step": 376860 }, { "epoch": 1.4568740239056146, "grad_norm": 0.09315123409032822, "learning_rate": 0.002, "loss": 2.3283, "step": 376870 }, { "epoch": 1.4569126811089979, "grad_norm": 0.10019497573375702, "learning_rate": 0.002, "loss": 2.3349, "step": 376880 }, { "epoch": 1.456951338312381, "grad_norm": 0.10009029507637024, "learning_rate": 0.002, "loss": 2.3237, "step": 376890 }, { "epoch": 1.4569899955157644, "grad_norm": 0.10956374555826187, "learning_rate": 0.002, "loss": 2.3247, "step": 376900 }, { "epoch": 1.4570286527191476, "grad_norm": 0.08636657893657684, "learning_rate": 0.002, "loss": 2.3317, "step": 376910 }, { "epoch": 1.457067309922531, "grad_norm": 0.10253308713436127, "learning_rate": 0.002, "loss": 2.3313, "step": 376920 }, { "epoch": 1.4571059671259143, "grad_norm": 0.12445124238729477, "learning_rate": 0.002, "loss": 2.3366, "step": 376930 }, { "epoch": 1.4571446243292976, "grad_norm": 0.10057014971971512, "learning_rate": 0.002, "loss": 2.3332, "step": 376940 }, { "epoch": 1.4571832815326808, "grad_norm": 0.09831283986568451, "learning_rate": 0.002, "loss": 2.3432, "step": 376950 }, { "epoch": 1.457221938736064, "grad_norm": 0.11100064963102341, "learning_rate": 0.002, "loss": 2.3125, "step": 376960 }, { "epoch": 1.4572605959394473, "grad_norm": 0.10230191051959991, "learning_rate": 0.002, "loss": 2.3238, "step": 376970 }, { "epoch": 1.4572992531428306, "grad_norm": 0.1209181398153305, "learning_rate": 0.002, "loss": 2.3327, "step": 376980 }, { "epoch": 1.4573379103462138, "grad_norm": 0.12005867809057236, "learning_rate": 0.002, "loss": 2.3147, "step": 376990 }, { "epoch": 1.457376567549597, "grad_norm": 0.09351613372564316, "learning_rate": 0.002, "loss": 2.3222, "step": 377000 }, { "epoch": 1.4574152247529804, "grad_norm": 0.117181695997715, "learning_rate": 0.002, "loss": 2.3441, "step": 377010 }, { "epoch": 1.4574538819563638, "grad_norm": 0.09451831877231598, "learning_rate": 0.002, "loss": 2.3238, "step": 377020 }, { "epoch": 1.457492539159747, "grad_norm": 0.10694488883018494, "learning_rate": 0.002, "loss": 2.3336, "step": 377030 }, { "epoch": 1.4575311963631303, "grad_norm": 0.10419536381959915, "learning_rate": 0.002, "loss": 2.3434, "step": 377040 }, { "epoch": 1.4575698535665136, "grad_norm": 0.09017110615968704, "learning_rate": 0.002, "loss": 2.3299, "step": 377050 }, { "epoch": 1.4576085107698968, "grad_norm": 0.09499950706958771, "learning_rate": 0.002, "loss": 2.3255, "step": 377060 }, { "epoch": 1.45764716797328, "grad_norm": 0.1026366576552391, "learning_rate": 0.002, "loss": 2.3405, "step": 377070 }, { "epoch": 1.4576858251766633, "grad_norm": 0.11473041772842407, "learning_rate": 0.002, "loss": 2.3351, "step": 377080 }, { "epoch": 1.4577244823800468, "grad_norm": 0.11454009264707565, "learning_rate": 0.002, "loss": 2.3283, "step": 377090 }, { "epoch": 1.45776313958343, "grad_norm": 0.10464174300432205, "learning_rate": 0.002, "loss": 2.3349, "step": 377100 }, { "epoch": 1.4578017967868133, "grad_norm": 0.10501186549663544, "learning_rate": 0.002, "loss": 2.3452, "step": 377110 }, { "epoch": 1.4578404539901966, "grad_norm": 0.11394108086824417, "learning_rate": 0.002, "loss": 2.3375, "step": 377120 }, { "epoch": 1.4578791111935798, "grad_norm": 0.10638362914323807, "learning_rate": 0.002, "loss": 2.3155, "step": 377130 }, { "epoch": 1.457917768396963, "grad_norm": 0.11045945435762405, "learning_rate": 0.002, "loss": 2.3194, "step": 377140 }, { "epoch": 1.4579564256003463, "grad_norm": 0.09159348905086517, "learning_rate": 0.002, "loss": 2.3333, "step": 377150 }, { "epoch": 1.4579950828037296, "grad_norm": 0.10121490061283112, "learning_rate": 0.002, "loss": 2.3437, "step": 377160 }, { "epoch": 1.4580337400071128, "grad_norm": 0.09991408884525299, "learning_rate": 0.002, "loss": 2.3377, "step": 377170 }, { "epoch": 1.458072397210496, "grad_norm": 0.11415287107229233, "learning_rate": 0.002, "loss": 2.3397, "step": 377180 }, { "epoch": 1.4581110544138796, "grad_norm": 0.10838111490011215, "learning_rate": 0.002, "loss": 2.3239, "step": 377190 }, { "epoch": 1.4581497116172628, "grad_norm": 0.3309662938117981, "learning_rate": 0.002, "loss": 2.34, "step": 377200 }, { "epoch": 1.458188368820646, "grad_norm": 0.09142623096704483, "learning_rate": 0.002, "loss": 2.3437, "step": 377210 }, { "epoch": 1.4582270260240293, "grad_norm": 0.09920763224363327, "learning_rate": 0.002, "loss": 2.3299, "step": 377220 }, { "epoch": 1.4582656832274126, "grad_norm": 0.10411565005779266, "learning_rate": 0.002, "loss": 2.3355, "step": 377230 }, { "epoch": 1.4583043404307958, "grad_norm": 0.11159609258174896, "learning_rate": 0.002, "loss": 2.3299, "step": 377240 }, { "epoch": 1.458342997634179, "grad_norm": 0.14390446245670319, "learning_rate": 0.002, "loss": 2.3407, "step": 377250 }, { "epoch": 1.4583816548375625, "grad_norm": 0.11994519084692001, "learning_rate": 0.002, "loss": 2.337, "step": 377260 }, { "epoch": 1.4584203120409458, "grad_norm": 0.09185947477817535, "learning_rate": 0.002, "loss": 2.3313, "step": 377270 }, { "epoch": 1.458458969244329, "grad_norm": 0.0885382816195488, "learning_rate": 0.002, "loss": 2.3232, "step": 377280 }, { "epoch": 1.4584976264477123, "grad_norm": 0.10349847376346588, "learning_rate": 0.002, "loss": 2.3318, "step": 377290 }, { "epoch": 1.4585362836510956, "grad_norm": 0.10722818970680237, "learning_rate": 0.002, "loss": 2.3334, "step": 377300 }, { "epoch": 1.4585749408544788, "grad_norm": 0.10624206811189651, "learning_rate": 0.002, "loss": 2.3386, "step": 377310 }, { "epoch": 1.458613598057862, "grad_norm": 0.09444423019886017, "learning_rate": 0.002, "loss": 2.3306, "step": 377320 }, { "epoch": 1.4586522552612453, "grad_norm": 0.1137901172041893, "learning_rate": 0.002, "loss": 2.3447, "step": 377330 }, { "epoch": 1.4586909124646286, "grad_norm": 0.10508893430233002, "learning_rate": 0.002, "loss": 2.3385, "step": 377340 }, { "epoch": 1.4587295696680118, "grad_norm": 0.1037166640162468, "learning_rate": 0.002, "loss": 2.351, "step": 377350 }, { "epoch": 1.4587682268713953, "grad_norm": 0.11206698417663574, "learning_rate": 0.002, "loss": 2.3208, "step": 377360 }, { "epoch": 1.4588068840747785, "grad_norm": 0.11649308353662491, "learning_rate": 0.002, "loss": 2.3298, "step": 377370 }, { "epoch": 1.4588455412781618, "grad_norm": 0.08731440454721451, "learning_rate": 0.002, "loss": 2.3413, "step": 377380 }, { "epoch": 1.458884198481545, "grad_norm": 0.09099102020263672, "learning_rate": 0.002, "loss": 2.334, "step": 377390 }, { "epoch": 1.4589228556849283, "grad_norm": 0.0877019613981247, "learning_rate": 0.002, "loss": 2.3381, "step": 377400 }, { "epoch": 1.4589615128883116, "grad_norm": 0.1126909926533699, "learning_rate": 0.002, "loss": 2.3433, "step": 377410 }, { "epoch": 1.4590001700916948, "grad_norm": 0.10737772285938263, "learning_rate": 0.002, "loss": 2.3371, "step": 377420 }, { "epoch": 1.4590388272950783, "grad_norm": 0.1171116977930069, "learning_rate": 0.002, "loss": 2.3528, "step": 377430 }, { "epoch": 1.4590774844984615, "grad_norm": 0.10207915306091309, "learning_rate": 0.002, "loss": 2.3266, "step": 377440 }, { "epoch": 1.4591161417018448, "grad_norm": 0.10968824476003647, "learning_rate": 0.002, "loss": 2.3375, "step": 377450 }, { "epoch": 1.459154798905228, "grad_norm": 0.12337537109851837, "learning_rate": 0.002, "loss": 2.3474, "step": 377460 }, { "epoch": 1.4591934561086113, "grad_norm": 0.0989091545343399, "learning_rate": 0.002, "loss": 2.337, "step": 377470 }, { "epoch": 1.4592321133119945, "grad_norm": 0.10799914598464966, "learning_rate": 0.002, "loss": 2.3236, "step": 377480 }, { "epoch": 1.4592707705153778, "grad_norm": 0.10297781229019165, "learning_rate": 0.002, "loss": 2.3407, "step": 377490 }, { "epoch": 1.459309427718761, "grad_norm": 0.09718317538499832, "learning_rate": 0.002, "loss": 2.3217, "step": 377500 }, { "epoch": 1.4593480849221443, "grad_norm": 0.1368032544851303, "learning_rate": 0.002, "loss": 2.3321, "step": 377510 }, { "epoch": 1.4593867421255275, "grad_norm": 0.10432197153568268, "learning_rate": 0.002, "loss": 2.3441, "step": 377520 }, { "epoch": 1.459425399328911, "grad_norm": 0.10635431855916977, "learning_rate": 0.002, "loss": 2.3146, "step": 377530 }, { "epoch": 1.4594640565322943, "grad_norm": 0.10219579190015793, "learning_rate": 0.002, "loss": 2.3313, "step": 377540 }, { "epoch": 1.4595027137356775, "grad_norm": 0.09423661231994629, "learning_rate": 0.002, "loss": 2.3445, "step": 377550 }, { "epoch": 1.4595413709390608, "grad_norm": 0.09498999267816544, "learning_rate": 0.002, "loss": 2.3175, "step": 377560 }, { "epoch": 1.459580028142444, "grad_norm": 0.10256657749414444, "learning_rate": 0.002, "loss": 2.3351, "step": 377570 }, { "epoch": 1.4596186853458273, "grad_norm": 0.10926370322704315, "learning_rate": 0.002, "loss": 2.3368, "step": 377580 }, { "epoch": 1.4596573425492108, "grad_norm": 0.10336575657129288, "learning_rate": 0.002, "loss": 2.3306, "step": 377590 }, { "epoch": 1.459695999752594, "grad_norm": 0.11170030385255814, "learning_rate": 0.002, "loss": 2.3331, "step": 377600 }, { "epoch": 1.4597346569559773, "grad_norm": 0.11618107557296753, "learning_rate": 0.002, "loss": 2.3358, "step": 377610 }, { "epoch": 1.4597733141593605, "grad_norm": 0.10613735765218735, "learning_rate": 0.002, "loss": 2.3315, "step": 377620 }, { "epoch": 1.4598119713627438, "grad_norm": 0.1042288988828659, "learning_rate": 0.002, "loss": 2.3226, "step": 377630 }, { "epoch": 1.459850628566127, "grad_norm": 0.09028930217027664, "learning_rate": 0.002, "loss": 2.3212, "step": 377640 }, { "epoch": 1.4598892857695103, "grad_norm": 0.11619842052459717, "learning_rate": 0.002, "loss": 2.3385, "step": 377650 }, { "epoch": 1.4599279429728935, "grad_norm": 0.24519114196300507, "learning_rate": 0.002, "loss": 2.337, "step": 377660 }, { "epoch": 1.4599666001762768, "grad_norm": 0.09756876528263092, "learning_rate": 0.002, "loss": 2.3307, "step": 377670 }, { "epoch": 1.46000525737966, "grad_norm": 0.1016656905412674, "learning_rate": 0.002, "loss": 2.3295, "step": 377680 }, { "epoch": 1.4600439145830433, "grad_norm": 0.09221766144037247, "learning_rate": 0.002, "loss": 2.3269, "step": 377690 }, { "epoch": 1.4600825717864268, "grad_norm": 0.10488689690828323, "learning_rate": 0.002, "loss": 2.3454, "step": 377700 }, { "epoch": 1.46012122898981, "grad_norm": 0.10696990042924881, "learning_rate": 0.002, "loss": 2.3362, "step": 377710 }, { "epoch": 1.4601598861931933, "grad_norm": 0.10517062246799469, "learning_rate": 0.002, "loss": 2.3251, "step": 377720 }, { "epoch": 1.4601985433965765, "grad_norm": 0.10095066577196121, "learning_rate": 0.002, "loss": 2.3392, "step": 377730 }, { "epoch": 1.4602372005999598, "grad_norm": 0.09365548193454742, "learning_rate": 0.002, "loss": 2.3176, "step": 377740 }, { "epoch": 1.460275857803343, "grad_norm": 0.11912385374307632, "learning_rate": 0.002, "loss": 2.3465, "step": 377750 }, { "epoch": 1.4603145150067265, "grad_norm": 0.1095656305551529, "learning_rate": 0.002, "loss": 2.3078, "step": 377760 }, { "epoch": 1.4603531722101097, "grad_norm": 0.0963665023446083, "learning_rate": 0.002, "loss": 2.3339, "step": 377770 }, { "epoch": 1.460391829413493, "grad_norm": 0.10575723648071289, "learning_rate": 0.002, "loss": 2.3179, "step": 377780 }, { "epoch": 1.4604304866168762, "grad_norm": 0.10483500361442566, "learning_rate": 0.002, "loss": 2.3331, "step": 377790 }, { "epoch": 1.4604691438202595, "grad_norm": 0.10449696332216263, "learning_rate": 0.002, "loss": 2.3307, "step": 377800 }, { "epoch": 1.4605078010236427, "grad_norm": 0.12368028610944748, "learning_rate": 0.002, "loss": 2.3459, "step": 377810 }, { "epoch": 1.460546458227026, "grad_norm": 0.09814267605543137, "learning_rate": 0.002, "loss": 2.3291, "step": 377820 }, { "epoch": 1.4605851154304093, "grad_norm": 0.09859205782413483, "learning_rate": 0.002, "loss": 2.3255, "step": 377830 }, { "epoch": 1.4606237726337925, "grad_norm": 0.08651961386203766, "learning_rate": 0.002, "loss": 2.333, "step": 377840 }, { "epoch": 1.4606624298371758, "grad_norm": 0.11479639261960983, "learning_rate": 0.002, "loss": 2.3283, "step": 377850 }, { "epoch": 1.460701087040559, "grad_norm": 0.10261834412813187, "learning_rate": 0.002, "loss": 2.3291, "step": 377860 }, { "epoch": 1.4607397442439425, "grad_norm": 0.09389077872037888, "learning_rate": 0.002, "loss": 2.3317, "step": 377870 }, { "epoch": 1.4607784014473257, "grad_norm": 0.10234487801790237, "learning_rate": 0.002, "loss": 2.3387, "step": 377880 }, { "epoch": 1.460817058650709, "grad_norm": 0.1089874878525734, "learning_rate": 0.002, "loss": 2.3287, "step": 377890 }, { "epoch": 1.4608557158540922, "grad_norm": 0.12259659171104431, "learning_rate": 0.002, "loss": 2.341, "step": 377900 }, { "epoch": 1.4608943730574755, "grad_norm": 0.10895906388759613, "learning_rate": 0.002, "loss": 2.3476, "step": 377910 }, { "epoch": 1.4609330302608587, "grad_norm": 0.21146343648433685, "learning_rate": 0.002, "loss": 2.3311, "step": 377920 }, { "epoch": 1.4609716874642422, "grad_norm": 0.0988226905465126, "learning_rate": 0.002, "loss": 2.3247, "step": 377930 }, { "epoch": 1.4610103446676255, "grad_norm": 0.10519076883792877, "learning_rate": 0.002, "loss": 2.3434, "step": 377940 }, { "epoch": 1.4610490018710087, "grad_norm": 0.09804467856884003, "learning_rate": 0.002, "loss": 2.3386, "step": 377950 }, { "epoch": 1.461087659074392, "grad_norm": 0.08854498714208603, "learning_rate": 0.002, "loss": 2.3334, "step": 377960 }, { "epoch": 1.4611263162777752, "grad_norm": 0.11327465623617172, "learning_rate": 0.002, "loss": 2.3375, "step": 377970 }, { "epoch": 1.4611649734811585, "grad_norm": 0.09992703795433044, "learning_rate": 0.002, "loss": 2.3385, "step": 377980 }, { "epoch": 1.4612036306845417, "grad_norm": 0.10690799355506897, "learning_rate": 0.002, "loss": 2.3184, "step": 377990 }, { "epoch": 1.461242287887925, "grad_norm": 0.11535067856311798, "learning_rate": 0.002, "loss": 2.3312, "step": 378000 }, { "epoch": 1.4612809450913082, "grad_norm": 0.11228501796722412, "learning_rate": 0.002, "loss": 2.3284, "step": 378010 }, { "epoch": 1.4613196022946915, "grad_norm": 0.1051812395453453, "learning_rate": 0.002, "loss": 2.3387, "step": 378020 }, { "epoch": 1.4613582594980747, "grad_norm": 0.09751439839601517, "learning_rate": 0.002, "loss": 2.3369, "step": 378030 }, { "epoch": 1.4613969167014582, "grad_norm": 0.12290656566619873, "learning_rate": 0.002, "loss": 2.3187, "step": 378040 }, { "epoch": 1.4614355739048415, "grad_norm": 0.11187057942152023, "learning_rate": 0.002, "loss": 2.3271, "step": 378050 }, { "epoch": 1.4614742311082247, "grad_norm": 0.11574528366327286, "learning_rate": 0.002, "loss": 2.351, "step": 378060 }, { "epoch": 1.461512888311608, "grad_norm": 0.2262689471244812, "learning_rate": 0.002, "loss": 2.3227, "step": 378070 }, { "epoch": 1.4615515455149912, "grad_norm": 0.10998236387968063, "learning_rate": 0.002, "loss": 2.354, "step": 378080 }, { "epoch": 1.4615902027183745, "grad_norm": 0.12994958460330963, "learning_rate": 0.002, "loss": 2.3474, "step": 378090 }, { "epoch": 1.461628859921758, "grad_norm": 1.093963384628296, "learning_rate": 0.002, "loss": 2.3421, "step": 378100 }, { "epoch": 1.4616675171251412, "grad_norm": 0.12165454775094986, "learning_rate": 0.002, "loss": 2.3522, "step": 378110 }, { "epoch": 1.4617061743285245, "grad_norm": 0.1254856437444687, "learning_rate": 0.002, "loss": 2.343, "step": 378120 }, { "epoch": 1.4617448315319077, "grad_norm": 0.09542275965213776, "learning_rate": 0.002, "loss": 2.3466, "step": 378130 }, { "epoch": 1.461783488735291, "grad_norm": 0.11822837591171265, "learning_rate": 0.002, "loss": 2.344, "step": 378140 }, { "epoch": 1.4618221459386742, "grad_norm": 0.10291677713394165, "learning_rate": 0.002, "loss": 2.3396, "step": 378150 }, { "epoch": 1.4618608031420575, "grad_norm": 0.09813756495714188, "learning_rate": 0.002, "loss": 2.3295, "step": 378160 }, { "epoch": 1.4618994603454407, "grad_norm": 0.15027743577957153, "learning_rate": 0.002, "loss": 2.3358, "step": 378170 }, { "epoch": 1.461938117548824, "grad_norm": 0.10020308941602707, "learning_rate": 0.002, "loss": 2.3463, "step": 378180 }, { "epoch": 1.4619767747522072, "grad_norm": 0.21416075527668, "learning_rate": 0.002, "loss": 2.3398, "step": 378190 }, { "epoch": 1.4620154319555905, "grad_norm": 0.10870862007141113, "learning_rate": 0.002, "loss": 2.3411, "step": 378200 }, { "epoch": 1.462054089158974, "grad_norm": 0.10715006291866302, "learning_rate": 0.002, "loss": 2.3345, "step": 378210 }, { "epoch": 1.4620927463623572, "grad_norm": 0.10555832833051682, "learning_rate": 0.002, "loss": 2.3386, "step": 378220 }, { "epoch": 1.4621314035657405, "grad_norm": 0.1517215073108673, "learning_rate": 0.002, "loss": 2.3349, "step": 378230 }, { "epoch": 1.4621700607691237, "grad_norm": 0.1058911606669426, "learning_rate": 0.002, "loss": 2.3191, "step": 378240 }, { "epoch": 1.462208717972507, "grad_norm": 0.09682868421077728, "learning_rate": 0.002, "loss": 2.3453, "step": 378250 }, { "epoch": 1.4622473751758902, "grad_norm": 0.09074389934539795, "learning_rate": 0.002, "loss": 2.3324, "step": 378260 }, { "epoch": 1.4622860323792737, "grad_norm": 0.11195127665996552, "learning_rate": 0.002, "loss": 2.3276, "step": 378270 }, { "epoch": 1.462324689582657, "grad_norm": 0.1032756045460701, "learning_rate": 0.002, "loss": 2.3291, "step": 378280 }, { "epoch": 1.4623633467860402, "grad_norm": 0.10251615196466446, "learning_rate": 0.002, "loss": 2.3426, "step": 378290 }, { "epoch": 1.4624020039894234, "grad_norm": 0.11106961965560913, "learning_rate": 0.002, "loss": 2.316, "step": 378300 }, { "epoch": 1.4624406611928067, "grad_norm": 0.10614031553268433, "learning_rate": 0.002, "loss": 2.3256, "step": 378310 }, { "epoch": 1.46247931839619, "grad_norm": 0.1287071406841278, "learning_rate": 0.002, "loss": 2.3155, "step": 378320 }, { "epoch": 1.4625179755995732, "grad_norm": 0.11179511249065399, "learning_rate": 0.002, "loss": 2.3564, "step": 378330 }, { "epoch": 1.4625566328029564, "grad_norm": 0.11233927309513092, "learning_rate": 0.002, "loss": 2.3202, "step": 378340 }, { "epoch": 1.4625952900063397, "grad_norm": 0.13310958445072174, "learning_rate": 0.002, "loss": 2.3406, "step": 378350 }, { "epoch": 1.462633947209723, "grad_norm": 0.11643460392951965, "learning_rate": 0.002, "loss": 2.3491, "step": 378360 }, { "epoch": 1.4626726044131062, "grad_norm": 0.115506611764431, "learning_rate": 0.002, "loss": 2.3322, "step": 378370 }, { "epoch": 1.4627112616164897, "grad_norm": 0.1059376448392868, "learning_rate": 0.002, "loss": 2.3184, "step": 378380 }, { "epoch": 1.462749918819873, "grad_norm": 0.12319108843803406, "learning_rate": 0.002, "loss": 2.3307, "step": 378390 }, { "epoch": 1.4627885760232562, "grad_norm": 0.09284790605306625, "learning_rate": 0.002, "loss": 2.3297, "step": 378400 }, { "epoch": 1.4628272332266394, "grad_norm": 0.1385800689458847, "learning_rate": 0.002, "loss": 2.3381, "step": 378410 }, { "epoch": 1.4628658904300227, "grad_norm": 0.11187001317739487, "learning_rate": 0.002, "loss": 2.3397, "step": 378420 }, { "epoch": 1.462904547633406, "grad_norm": 0.1008906215429306, "learning_rate": 0.002, "loss": 2.3287, "step": 378430 }, { "epoch": 1.4629432048367894, "grad_norm": 0.11877201497554779, "learning_rate": 0.002, "loss": 2.3217, "step": 378440 }, { "epoch": 1.4629818620401727, "grad_norm": 0.1024615615606308, "learning_rate": 0.002, "loss": 2.3368, "step": 378450 }, { "epoch": 1.463020519243556, "grad_norm": 0.10329898446798325, "learning_rate": 0.002, "loss": 2.3317, "step": 378460 }, { "epoch": 1.4630591764469392, "grad_norm": 0.1304638385772705, "learning_rate": 0.002, "loss": 2.3398, "step": 378470 }, { "epoch": 1.4630978336503224, "grad_norm": 0.09835382550954819, "learning_rate": 0.002, "loss": 2.3386, "step": 378480 }, { "epoch": 1.4631364908537057, "grad_norm": 0.11532345414161682, "learning_rate": 0.002, "loss": 2.3388, "step": 378490 }, { "epoch": 1.463175148057089, "grad_norm": 0.09121698886156082, "learning_rate": 0.002, "loss": 2.3321, "step": 378500 }, { "epoch": 1.4632138052604722, "grad_norm": 0.10565804690122604, "learning_rate": 0.002, "loss": 2.345, "step": 378510 }, { "epoch": 1.4632524624638554, "grad_norm": 0.10543417930603027, "learning_rate": 0.002, "loss": 2.3318, "step": 378520 }, { "epoch": 1.4632911196672387, "grad_norm": 0.0975838452577591, "learning_rate": 0.002, "loss": 2.3345, "step": 378530 }, { "epoch": 1.463329776870622, "grad_norm": 0.09796416759490967, "learning_rate": 0.002, "loss": 2.3302, "step": 378540 }, { "epoch": 1.4633684340740054, "grad_norm": 0.1139095202088356, "learning_rate": 0.002, "loss": 2.3296, "step": 378550 }, { "epoch": 1.4634070912773887, "grad_norm": 0.1038951724767685, "learning_rate": 0.002, "loss": 2.322, "step": 378560 }, { "epoch": 1.463445748480772, "grad_norm": 0.11512892693281174, "learning_rate": 0.002, "loss": 2.3379, "step": 378570 }, { "epoch": 1.4634844056841552, "grad_norm": 0.10317057371139526, "learning_rate": 0.002, "loss": 2.3208, "step": 378580 }, { "epoch": 1.4635230628875384, "grad_norm": 0.09820453077554703, "learning_rate": 0.002, "loss": 2.3177, "step": 378590 }, { "epoch": 1.4635617200909217, "grad_norm": 0.09886424243450165, "learning_rate": 0.002, "loss": 2.3297, "step": 378600 }, { "epoch": 1.4636003772943051, "grad_norm": 0.09394435584545135, "learning_rate": 0.002, "loss": 2.3312, "step": 378610 }, { "epoch": 1.4636390344976884, "grad_norm": 0.09522797167301178, "learning_rate": 0.002, "loss": 2.3092, "step": 378620 }, { "epoch": 1.4636776917010716, "grad_norm": 0.13810744881629944, "learning_rate": 0.002, "loss": 2.3425, "step": 378630 }, { "epoch": 1.463716348904455, "grad_norm": 0.10484451800584793, "learning_rate": 0.002, "loss": 2.3294, "step": 378640 }, { "epoch": 1.4637550061078382, "grad_norm": 0.10630256682634354, "learning_rate": 0.002, "loss": 2.3368, "step": 378650 }, { "epoch": 1.4637936633112214, "grad_norm": 0.10911909490823746, "learning_rate": 0.002, "loss": 2.348, "step": 378660 }, { "epoch": 1.4638323205146047, "grad_norm": 0.10513444989919662, "learning_rate": 0.002, "loss": 2.3133, "step": 378670 }, { "epoch": 1.463870977717988, "grad_norm": 0.12285730987787247, "learning_rate": 0.002, "loss": 2.3508, "step": 378680 }, { "epoch": 1.4639096349213712, "grad_norm": 0.10793665051460266, "learning_rate": 0.002, "loss": 2.3258, "step": 378690 }, { "epoch": 1.4639482921247544, "grad_norm": 0.12629415094852448, "learning_rate": 0.002, "loss": 2.3518, "step": 378700 }, { "epoch": 1.4639869493281379, "grad_norm": 0.10023588687181473, "learning_rate": 0.002, "loss": 2.3352, "step": 378710 }, { "epoch": 1.4640256065315211, "grad_norm": 0.13200624287128448, "learning_rate": 0.002, "loss": 2.3187, "step": 378720 }, { "epoch": 1.4640642637349044, "grad_norm": 0.1099928468465805, "learning_rate": 0.002, "loss": 2.3131, "step": 378730 }, { "epoch": 1.4641029209382876, "grad_norm": 0.11238476634025574, "learning_rate": 0.002, "loss": 2.3324, "step": 378740 }, { "epoch": 1.464141578141671, "grad_norm": 0.08446121960878372, "learning_rate": 0.002, "loss": 2.3464, "step": 378750 }, { "epoch": 1.4641802353450541, "grad_norm": 0.12352073937654495, "learning_rate": 0.002, "loss": 2.3487, "step": 378760 }, { "epoch": 1.4642188925484374, "grad_norm": 0.09753142297267914, "learning_rate": 0.002, "loss": 2.3329, "step": 378770 }, { "epoch": 1.4642575497518209, "grad_norm": 0.1746557056903839, "learning_rate": 0.002, "loss": 2.336, "step": 378780 }, { "epoch": 1.4642962069552041, "grad_norm": 0.11375793814659119, "learning_rate": 0.002, "loss": 2.3465, "step": 378790 }, { "epoch": 1.4643348641585874, "grad_norm": 0.11853129416704178, "learning_rate": 0.002, "loss": 2.3377, "step": 378800 }, { "epoch": 1.4643735213619706, "grad_norm": 0.10554022341966629, "learning_rate": 0.002, "loss": 2.3223, "step": 378810 }, { "epoch": 1.4644121785653539, "grad_norm": 0.10978643596172333, "learning_rate": 0.002, "loss": 2.3321, "step": 378820 }, { "epoch": 1.4644508357687371, "grad_norm": 0.10757063329219818, "learning_rate": 0.002, "loss": 2.3363, "step": 378830 }, { "epoch": 1.4644894929721204, "grad_norm": 0.09296415001153946, "learning_rate": 0.002, "loss": 2.3379, "step": 378840 }, { "epoch": 1.4645281501755036, "grad_norm": 0.11280360817909241, "learning_rate": 0.002, "loss": 2.3139, "step": 378850 }, { "epoch": 1.464566807378887, "grad_norm": 0.11102467775344849, "learning_rate": 0.002, "loss": 2.3323, "step": 378860 }, { "epoch": 1.4646054645822701, "grad_norm": 0.10691647231578827, "learning_rate": 0.002, "loss": 2.3425, "step": 378870 }, { "epoch": 1.4646441217856536, "grad_norm": 0.11569046974182129, "learning_rate": 0.002, "loss": 2.3342, "step": 378880 }, { "epoch": 1.4646827789890369, "grad_norm": 0.10578905045986176, "learning_rate": 0.002, "loss": 2.3361, "step": 378890 }, { "epoch": 1.4647214361924201, "grad_norm": 0.11721711605787277, "learning_rate": 0.002, "loss": 2.342, "step": 378900 }, { "epoch": 1.4647600933958034, "grad_norm": 0.10432325303554535, "learning_rate": 0.002, "loss": 2.3199, "step": 378910 }, { "epoch": 1.4647987505991866, "grad_norm": 0.09794896841049194, "learning_rate": 0.002, "loss": 2.3238, "step": 378920 }, { "epoch": 1.4648374078025699, "grad_norm": 0.10016597807407379, "learning_rate": 0.002, "loss": 2.3319, "step": 378930 }, { "epoch": 1.4648760650059531, "grad_norm": 0.09569735080003738, "learning_rate": 0.002, "loss": 2.3177, "step": 378940 }, { "epoch": 1.4649147222093366, "grad_norm": 0.11742109805345535, "learning_rate": 0.002, "loss": 2.3263, "step": 378950 }, { "epoch": 1.4649533794127199, "grad_norm": 0.13616660237312317, "learning_rate": 0.002, "loss": 2.3206, "step": 378960 }, { "epoch": 1.464992036616103, "grad_norm": 0.09571486711502075, "learning_rate": 0.002, "loss": 2.3264, "step": 378970 }, { "epoch": 1.4650306938194864, "grad_norm": 0.09083391726016998, "learning_rate": 0.002, "loss": 2.3271, "step": 378980 }, { "epoch": 1.4650693510228696, "grad_norm": 0.10643785446882248, "learning_rate": 0.002, "loss": 2.3403, "step": 378990 }, { "epoch": 1.4651080082262529, "grad_norm": 0.11423055082559586, "learning_rate": 0.002, "loss": 2.3389, "step": 379000 }, { "epoch": 1.4651466654296361, "grad_norm": 0.10717245191335678, "learning_rate": 0.002, "loss": 2.3264, "step": 379010 }, { "epoch": 1.4651853226330194, "grad_norm": 0.11718068271875381, "learning_rate": 0.002, "loss": 2.3238, "step": 379020 }, { "epoch": 1.4652239798364026, "grad_norm": 0.09448101371526718, "learning_rate": 0.002, "loss": 2.3416, "step": 379030 }, { "epoch": 1.4652626370397859, "grad_norm": 0.10101408511400223, "learning_rate": 0.002, "loss": 2.337, "step": 379040 }, { "epoch": 1.4653012942431693, "grad_norm": 0.09884758293628693, "learning_rate": 0.002, "loss": 2.3271, "step": 379050 }, { "epoch": 1.4653399514465526, "grad_norm": 0.10572511702775955, "learning_rate": 0.002, "loss": 2.3251, "step": 379060 }, { "epoch": 1.4653786086499359, "grad_norm": 0.12739481031894684, "learning_rate": 0.002, "loss": 2.336, "step": 379070 }, { "epoch": 1.465417265853319, "grad_norm": 0.09359881281852722, "learning_rate": 0.002, "loss": 2.3282, "step": 379080 }, { "epoch": 1.4654559230567024, "grad_norm": 0.17386668920516968, "learning_rate": 0.002, "loss": 2.3275, "step": 379090 }, { "epoch": 1.4654945802600856, "grad_norm": 0.11996670067310333, "learning_rate": 0.002, "loss": 2.3359, "step": 379100 }, { "epoch": 1.4655332374634689, "grad_norm": 0.1109367087483406, "learning_rate": 0.002, "loss": 2.3424, "step": 379110 }, { "epoch": 1.4655718946668523, "grad_norm": 0.10811998695135117, "learning_rate": 0.002, "loss": 2.3254, "step": 379120 }, { "epoch": 1.4656105518702356, "grad_norm": 0.10904763638973236, "learning_rate": 0.002, "loss": 2.3334, "step": 379130 }, { "epoch": 1.4656492090736188, "grad_norm": 0.10743703693151474, "learning_rate": 0.002, "loss": 2.3288, "step": 379140 }, { "epoch": 1.465687866277002, "grad_norm": 0.09433609992265701, "learning_rate": 0.002, "loss": 2.3388, "step": 379150 }, { "epoch": 1.4657265234803853, "grad_norm": 0.09775488823652267, "learning_rate": 0.002, "loss": 2.3309, "step": 379160 }, { "epoch": 1.4657651806837686, "grad_norm": 0.11015506088733673, "learning_rate": 0.002, "loss": 2.3166, "step": 379170 }, { "epoch": 1.4658038378871519, "grad_norm": 0.1063729077577591, "learning_rate": 0.002, "loss": 2.3422, "step": 379180 }, { "epoch": 1.465842495090535, "grad_norm": 0.11362296342849731, "learning_rate": 0.002, "loss": 2.3362, "step": 379190 }, { "epoch": 1.4658811522939184, "grad_norm": 0.11729138344526291, "learning_rate": 0.002, "loss": 2.3286, "step": 379200 }, { "epoch": 1.4659198094973016, "grad_norm": 0.09225037693977356, "learning_rate": 0.002, "loss": 2.3372, "step": 379210 }, { "epoch": 1.465958466700685, "grad_norm": 0.0946132019162178, "learning_rate": 0.002, "loss": 2.3478, "step": 379220 }, { "epoch": 1.4659971239040683, "grad_norm": 0.1005450114607811, "learning_rate": 0.002, "loss": 2.3558, "step": 379230 }, { "epoch": 1.4660357811074516, "grad_norm": 0.11385586857795715, "learning_rate": 0.002, "loss": 2.327, "step": 379240 }, { "epoch": 1.4660744383108348, "grad_norm": 0.09891478717327118, "learning_rate": 0.002, "loss": 2.3316, "step": 379250 }, { "epoch": 1.466113095514218, "grad_norm": 0.09788984805345535, "learning_rate": 0.002, "loss": 2.3372, "step": 379260 }, { "epoch": 1.4661517527176013, "grad_norm": 0.09577134996652603, "learning_rate": 0.002, "loss": 2.3165, "step": 379270 }, { "epoch": 1.4661904099209846, "grad_norm": 0.11400555819272995, "learning_rate": 0.002, "loss": 2.3218, "step": 379280 }, { "epoch": 1.466229067124368, "grad_norm": 0.0947289690375328, "learning_rate": 0.002, "loss": 2.335, "step": 379290 }, { "epoch": 1.4662677243277513, "grad_norm": 0.10459941625595093, "learning_rate": 0.002, "loss": 2.3174, "step": 379300 }, { "epoch": 1.4663063815311346, "grad_norm": 0.13126231729984283, "learning_rate": 0.002, "loss": 2.3279, "step": 379310 }, { "epoch": 1.4663450387345178, "grad_norm": 0.0940525233745575, "learning_rate": 0.002, "loss": 2.3126, "step": 379320 }, { "epoch": 1.466383695937901, "grad_norm": 0.09635006636381149, "learning_rate": 0.002, "loss": 2.3292, "step": 379330 }, { "epoch": 1.4664223531412843, "grad_norm": 0.11484242975711823, "learning_rate": 0.002, "loss": 2.3466, "step": 379340 }, { "epoch": 1.4664610103446676, "grad_norm": 0.10275579243898392, "learning_rate": 0.002, "loss": 2.3335, "step": 379350 }, { "epoch": 1.4664996675480508, "grad_norm": 0.1047661229968071, "learning_rate": 0.002, "loss": 2.3395, "step": 379360 }, { "epoch": 1.466538324751434, "grad_norm": 0.08567144721746445, "learning_rate": 0.002, "loss": 2.3336, "step": 379370 }, { "epoch": 1.4665769819548173, "grad_norm": 0.4120446741580963, "learning_rate": 0.002, "loss": 2.3262, "step": 379380 }, { "epoch": 1.4666156391582008, "grad_norm": 0.11681369692087173, "learning_rate": 0.002, "loss": 2.3312, "step": 379390 }, { "epoch": 1.466654296361584, "grad_norm": 0.12154824286699295, "learning_rate": 0.002, "loss": 2.3395, "step": 379400 }, { "epoch": 1.4666929535649673, "grad_norm": 0.09770403057336807, "learning_rate": 0.002, "loss": 2.3286, "step": 379410 }, { "epoch": 1.4667316107683506, "grad_norm": 0.09427760541439056, "learning_rate": 0.002, "loss": 2.3382, "step": 379420 }, { "epoch": 1.4667702679717338, "grad_norm": 0.10652682930231094, "learning_rate": 0.002, "loss": 2.3397, "step": 379430 }, { "epoch": 1.466808925175117, "grad_norm": 0.1023586243391037, "learning_rate": 0.002, "loss": 2.3236, "step": 379440 }, { "epoch": 1.4668475823785005, "grad_norm": 0.10801868885755539, "learning_rate": 0.002, "loss": 2.3257, "step": 379450 }, { "epoch": 1.4668862395818838, "grad_norm": 0.10652651637792587, "learning_rate": 0.002, "loss": 2.3424, "step": 379460 }, { "epoch": 1.466924896785267, "grad_norm": 0.1005072072148323, "learning_rate": 0.002, "loss": 2.3345, "step": 379470 }, { "epoch": 1.4669635539886503, "grad_norm": 0.1006920337677002, "learning_rate": 0.002, "loss": 2.3297, "step": 379480 }, { "epoch": 1.4670022111920336, "grad_norm": 0.12021995335817337, "learning_rate": 0.002, "loss": 2.3315, "step": 379490 }, { "epoch": 1.4670408683954168, "grad_norm": 0.11323700100183487, "learning_rate": 0.002, "loss": 2.331, "step": 379500 }, { "epoch": 1.4670795255988, "grad_norm": 0.10849696397781372, "learning_rate": 0.002, "loss": 2.3426, "step": 379510 }, { "epoch": 1.4671181828021833, "grad_norm": 0.09491071850061417, "learning_rate": 0.002, "loss": 2.3211, "step": 379520 }, { "epoch": 1.4671568400055666, "grad_norm": 0.1152421236038208, "learning_rate": 0.002, "loss": 2.3301, "step": 379530 }, { "epoch": 1.4671954972089498, "grad_norm": 0.0976879745721817, "learning_rate": 0.002, "loss": 2.3211, "step": 379540 }, { "epoch": 1.467234154412333, "grad_norm": 0.13243097066879272, "learning_rate": 0.002, "loss": 2.3419, "step": 379550 }, { "epoch": 1.4672728116157165, "grad_norm": 0.09705587476491928, "learning_rate": 0.002, "loss": 2.3253, "step": 379560 }, { "epoch": 1.4673114688190998, "grad_norm": 0.09247449040412903, "learning_rate": 0.002, "loss": 2.3322, "step": 379570 }, { "epoch": 1.467350126022483, "grad_norm": 0.14580531418323517, "learning_rate": 0.002, "loss": 2.3348, "step": 379580 }, { "epoch": 1.4673887832258663, "grad_norm": 0.11156206578016281, "learning_rate": 0.002, "loss": 2.3274, "step": 379590 }, { "epoch": 1.4674274404292496, "grad_norm": 0.11129026859998703, "learning_rate": 0.002, "loss": 2.3335, "step": 379600 }, { "epoch": 1.4674660976326328, "grad_norm": 0.1293782740831375, "learning_rate": 0.002, "loss": 2.3191, "step": 379610 }, { "epoch": 1.4675047548360163, "grad_norm": 0.10700634866952896, "learning_rate": 0.002, "loss": 2.3409, "step": 379620 }, { "epoch": 1.4675434120393995, "grad_norm": 0.09107174724340439, "learning_rate": 0.002, "loss": 2.3345, "step": 379630 }, { "epoch": 1.4675820692427828, "grad_norm": 0.0936293751001358, "learning_rate": 0.002, "loss": 2.3336, "step": 379640 }, { "epoch": 1.467620726446166, "grad_norm": 0.09288164973258972, "learning_rate": 0.002, "loss": 2.3315, "step": 379650 }, { "epoch": 1.4676593836495493, "grad_norm": 0.11261691898107529, "learning_rate": 0.002, "loss": 2.3299, "step": 379660 }, { "epoch": 1.4676980408529325, "grad_norm": 0.10153496265411377, "learning_rate": 0.002, "loss": 2.3192, "step": 379670 }, { "epoch": 1.4677366980563158, "grad_norm": 0.104087233543396, "learning_rate": 0.002, "loss": 2.3124, "step": 379680 }, { "epoch": 1.467775355259699, "grad_norm": 0.10559988766908646, "learning_rate": 0.002, "loss": 2.3323, "step": 379690 }, { "epoch": 1.4678140124630823, "grad_norm": 0.09458968043327332, "learning_rate": 0.002, "loss": 2.3391, "step": 379700 }, { "epoch": 1.4678526696664655, "grad_norm": 0.09753791242837906, "learning_rate": 0.002, "loss": 2.3343, "step": 379710 }, { "epoch": 1.4678913268698488, "grad_norm": 0.10440224409103394, "learning_rate": 0.002, "loss": 2.3321, "step": 379720 }, { "epoch": 1.4679299840732323, "grad_norm": 0.11204987019300461, "learning_rate": 0.002, "loss": 2.3235, "step": 379730 }, { "epoch": 1.4679686412766155, "grad_norm": 0.09139106422662735, "learning_rate": 0.002, "loss": 2.331, "step": 379740 }, { "epoch": 1.4680072984799988, "grad_norm": 0.09165173023939133, "learning_rate": 0.002, "loss": 2.3435, "step": 379750 }, { "epoch": 1.468045955683382, "grad_norm": 0.11202359199523926, "learning_rate": 0.002, "loss": 2.3313, "step": 379760 }, { "epoch": 1.4680846128867653, "grad_norm": 0.11179552227258682, "learning_rate": 0.002, "loss": 2.3369, "step": 379770 }, { "epoch": 1.4681232700901485, "grad_norm": 0.1006980687379837, "learning_rate": 0.002, "loss": 2.3393, "step": 379780 }, { "epoch": 1.468161927293532, "grad_norm": 0.09841970354318619, "learning_rate": 0.002, "loss": 2.3255, "step": 379790 }, { "epoch": 1.4682005844969153, "grad_norm": 0.10231813788414001, "learning_rate": 0.002, "loss": 2.3427, "step": 379800 }, { "epoch": 1.4682392417002985, "grad_norm": 0.11085063964128494, "learning_rate": 0.002, "loss": 2.3336, "step": 379810 }, { "epoch": 1.4682778989036818, "grad_norm": 0.09536705911159515, "learning_rate": 0.002, "loss": 2.334, "step": 379820 }, { "epoch": 1.468316556107065, "grad_norm": 0.10429663956165314, "learning_rate": 0.002, "loss": 2.3178, "step": 379830 }, { "epoch": 1.4683552133104483, "grad_norm": 0.10179862380027771, "learning_rate": 0.002, "loss": 2.3422, "step": 379840 }, { "epoch": 1.4683938705138315, "grad_norm": 0.10341744124889374, "learning_rate": 0.002, "loss": 2.3223, "step": 379850 }, { "epoch": 1.4684325277172148, "grad_norm": 0.10987520217895508, "learning_rate": 0.002, "loss": 2.3158, "step": 379860 }, { "epoch": 1.468471184920598, "grad_norm": 0.1209501177072525, "learning_rate": 0.002, "loss": 2.3181, "step": 379870 }, { "epoch": 1.4685098421239813, "grad_norm": 0.10270653665065765, "learning_rate": 0.002, "loss": 2.3264, "step": 379880 }, { "epoch": 1.4685484993273645, "grad_norm": 0.10785645246505737, "learning_rate": 0.002, "loss": 2.3344, "step": 379890 }, { "epoch": 1.468587156530748, "grad_norm": 0.10725623369216919, "learning_rate": 0.002, "loss": 2.3176, "step": 379900 }, { "epoch": 1.4686258137341313, "grad_norm": 0.09680724889039993, "learning_rate": 0.002, "loss": 2.3145, "step": 379910 }, { "epoch": 1.4686644709375145, "grad_norm": 0.11619780212640762, "learning_rate": 0.002, "loss": 2.3213, "step": 379920 }, { "epoch": 1.4687031281408978, "grad_norm": 0.10926903039216995, "learning_rate": 0.002, "loss": 2.3473, "step": 379930 }, { "epoch": 1.468741785344281, "grad_norm": 0.09844309836626053, "learning_rate": 0.002, "loss": 2.3403, "step": 379940 }, { "epoch": 1.4687804425476643, "grad_norm": 0.11584539711475372, "learning_rate": 0.002, "loss": 2.3426, "step": 379950 }, { "epoch": 1.4688190997510477, "grad_norm": 0.09956274181604385, "learning_rate": 0.002, "loss": 2.3216, "step": 379960 }, { "epoch": 1.468857756954431, "grad_norm": 0.10570421069860458, "learning_rate": 0.002, "loss": 2.3247, "step": 379970 }, { "epoch": 1.4688964141578142, "grad_norm": 0.09156245738267899, "learning_rate": 0.002, "loss": 2.3326, "step": 379980 }, { "epoch": 1.4689350713611975, "grad_norm": 0.11181099712848663, "learning_rate": 0.002, "loss": 2.3297, "step": 379990 }, { "epoch": 1.4689737285645807, "grad_norm": 0.09745943546295166, "learning_rate": 0.002, "loss": 2.3108, "step": 380000 }, { "epoch": 1.469012385767964, "grad_norm": 0.09387210011482239, "learning_rate": 0.002, "loss": 2.3172, "step": 380010 }, { "epoch": 1.4690510429713473, "grad_norm": 0.11295516043901443, "learning_rate": 0.002, "loss": 2.3158, "step": 380020 }, { "epoch": 1.4690897001747305, "grad_norm": 0.10584930330514908, "learning_rate": 0.002, "loss": 2.3314, "step": 380030 }, { "epoch": 1.4691283573781138, "grad_norm": 0.11459202319383621, "learning_rate": 0.002, "loss": 2.3381, "step": 380040 }, { "epoch": 1.469167014581497, "grad_norm": 0.11405713111162186, "learning_rate": 0.002, "loss": 2.3328, "step": 380050 }, { "epoch": 1.4692056717848803, "grad_norm": 0.09181715548038483, "learning_rate": 0.002, "loss": 2.3309, "step": 380060 }, { "epoch": 1.4692443289882637, "grad_norm": 0.09268059581518173, "learning_rate": 0.002, "loss": 2.3327, "step": 380070 }, { "epoch": 1.469282986191647, "grad_norm": 0.11425948888063431, "learning_rate": 0.002, "loss": 2.3475, "step": 380080 }, { "epoch": 1.4693216433950302, "grad_norm": 0.1236632838845253, "learning_rate": 0.002, "loss": 2.336, "step": 380090 }, { "epoch": 1.4693603005984135, "grad_norm": 0.0985027402639389, "learning_rate": 0.002, "loss": 2.3446, "step": 380100 }, { "epoch": 1.4693989578017967, "grad_norm": 0.10117731988430023, "learning_rate": 0.002, "loss": 2.3345, "step": 380110 }, { "epoch": 1.46943761500518, "grad_norm": 0.12691277265548706, "learning_rate": 0.002, "loss": 2.3343, "step": 380120 }, { "epoch": 1.4694762722085635, "grad_norm": 0.09519045799970627, "learning_rate": 0.002, "loss": 2.3393, "step": 380130 }, { "epoch": 1.4695149294119467, "grad_norm": 0.09453404694795609, "learning_rate": 0.002, "loss": 2.3255, "step": 380140 }, { "epoch": 1.46955358661533, "grad_norm": 0.09491042047739029, "learning_rate": 0.002, "loss": 2.3246, "step": 380150 }, { "epoch": 1.4695922438187132, "grad_norm": 0.10169180482625961, "learning_rate": 0.002, "loss": 2.3368, "step": 380160 }, { "epoch": 1.4696309010220965, "grad_norm": 0.10007622092962265, "learning_rate": 0.002, "loss": 2.3556, "step": 380170 }, { "epoch": 1.4696695582254797, "grad_norm": 0.1122666671872139, "learning_rate": 0.002, "loss": 2.3406, "step": 380180 }, { "epoch": 1.469708215428863, "grad_norm": 0.09976609796285629, "learning_rate": 0.002, "loss": 2.3362, "step": 380190 }, { "epoch": 1.4697468726322462, "grad_norm": 0.12167870253324509, "learning_rate": 0.002, "loss": 2.3462, "step": 380200 }, { "epoch": 1.4697855298356295, "grad_norm": 0.09556052088737488, "learning_rate": 0.002, "loss": 2.339, "step": 380210 }, { "epoch": 1.4698241870390127, "grad_norm": 0.11083319783210754, "learning_rate": 0.002, "loss": 2.33, "step": 380220 }, { "epoch": 1.469862844242396, "grad_norm": 0.11835497617721558, "learning_rate": 0.002, "loss": 2.331, "step": 380230 }, { "epoch": 1.4699015014457795, "grad_norm": 0.11649499833583832, "learning_rate": 0.002, "loss": 2.3345, "step": 380240 }, { "epoch": 1.4699401586491627, "grad_norm": 0.10010214895009995, "learning_rate": 0.002, "loss": 2.3374, "step": 380250 }, { "epoch": 1.469978815852546, "grad_norm": 0.10207920521497726, "learning_rate": 0.002, "loss": 2.3401, "step": 380260 }, { "epoch": 1.4700174730559292, "grad_norm": 0.14429835975170135, "learning_rate": 0.002, "loss": 2.3274, "step": 380270 }, { "epoch": 1.4700561302593125, "grad_norm": 0.09240922331809998, "learning_rate": 0.002, "loss": 2.3501, "step": 380280 }, { "epoch": 1.4700947874626957, "grad_norm": 0.0997718796133995, "learning_rate": 0.002, "loss": 2.3312, "step": 380290 }, { "epoch": 1.4701334446660792, "grad_norm": 0.11467599868774414, "learning_rate": 0.002, "loss": 2.3304, "step": 380300 }, { "epoch": 1.4701721018694625, "grad_norm": 0.09451211243867874, "learning_rate": 0.002, "loss": 2.3188, "step": 380310 }, { "epoch": 1.4702107590728457, "grad_norm": 0.1025175079703331, "learning_rate": 0.002, "loss": 2.3217, "step": 380320 }, { "epoch": 1.470249416276229, "grad_norm": 0.1079663336277008, "learning_rate": 0.002, "loss": 2.343, "step": 380330 }, { "epoch": 1.4702880734796122, "grad_norm": 0.10581180453300476, "learning_rate": 0.002, "loss": 2.3494, "step": 380340 }, { "epoch": 1.4703267306829955, "grad_norm": 0.09240443259477615, "learning_rate": 0.002, "loss": 2.3237, "step": 380350 }, { "epoch": 1.4703653878863787, "grad_norm": 0.10867279767990112, "learning_rate": 0.002, "loss": 2.3298, "step": 380360 }, { "epoch": 1.470404045089762, "grad_norm": 0.11264824122190475, "learning_rate": 0.002, "loss": 2.3378, "step": 380370 }, { "epoch": 1.4704427022931452, "grad_norm": 0.11031465977430344, "learning_rate": 0.002, "loss": 2.3427, "step": 380380 }, { "epoch": 1.4704813594965285, "grad_norm": 0.10774829238653183, "learning_rate": 0.002, "loss": 2.3395, "step": 380390 }, { "epoch": 1.4705200166999117, "grad_norm": 0.11938739567995071, "learning_rate": 0.002, "loss": 2.3374, "step": 380400 }, { "epoch": 1.4705586739032952, "grad_norm": 0.10929541289806366, "learning_rate": 0.002, "loss": 2.335, "step": 380410 }, { "epoch": 1.4705973311066785, "grad_norm": 0.10994645208120346, "learning_rate": 0.002, "loss": 2.3266, "step": 380420 }, { "epoch": 1.4706359883100617, "grad_norm": 0.11571916937828064, "learning_rate": 0.002, "loss": 2.3345, "step": 380430 }, { "epoch": 1.470674645513445, "grad_norm": 0.10880973190069199, "learning_rate": 0.002, "loss": 2.3509, "step": 380440 }, { "epoch": 1.4707133027168282, "grad_norm": 0.10868261009454727, "learning_rate": 0.002, "loss": 2.3297, "step": 380450 }, { "epoch": 1.4707519599202115, "grad_norm": 0.1179015189409256, "learning_rate": 0.002, "loss": 2.3283, "step": 380460 }, { "epoch": 1.470790617123595, "grad_norm": 0.11172597110271454, "learning_rate": 0.002, "loss": 2.3184, "step": 380470 }, { "epoch": 1.4708292743269782, "grad_norm": 0.08885904401540756, "learning_rate": 0.002, "loss": 2.3413, "step": 380480 }, { "epoch": 1.4708679315303614, "grad_norm": 0.1096772775053978, "learning_rate": 0.002, "loss": 2.3326, "step": 380490 }, { "epoch": 1.4709065887337447, "grad_norm": 0.1212785616517067, "learning_rate": 0.002, "loss": 2.3415, "step": 380500 }, { "epoch": 1.470945245937128, "grad_norm": 0.10107403248548508, "learning_rate": 0.002, "loss": 2.3286, "step": 380510 }, { "epoch": 1.4709839031405112, "grad_norm": 0.1215515285730362, "learning_rate": 0.002, "loss": 2.3229, "step": 380520 }, { "epoch": 1.4710225603438944, "grad_norm": 0.11970778554677963, "learning_rate": 0.002, "loss": 2.3432, "step": 380530 }, { "epoch": 1.4710612175472777, "grad_norm": 0.09164808690547943, "learning_rate": 0.002, "loss": 2.3249, "step": 380540 }, { "epoch": 1.471099874750661, "grad_norm": 0.09449364989995956, "learning_rate": 0.002, "loss": 2.3181, "step": 380550 }, { "epoch": 1.4711385319540442, "grad_norm": 0.0985458716750145, "learning_rate": 0.002, "loss": 2.3425, "step": 380560 }, { "epoch": 1.4711771891574277, "grad_norm": 0.10552497208118439, "learning_rate": 0.002, "loss": 2.3346, "step": 380570 }, { "epoch": 1.471215846360811, "grad_norm": 0.13049247860908508, "learning_rate": 0.002, "loss": 2.3299, "step": 380580 }, { "epoch": 1.4712545035641942, "grad_norm": 0.11239859461784363, "learning_rate": 0.002, "loss": 2.3331, "step": 380590 }, { "epoch": 1.4712931607675774, "grad_norm": 0.10022827237844467, "learning_rate": 0.002, "loss": 2.3378, "step": 380600 }, { "epoch": 1.4713318179709607, "grad_norm": 0.10231848806142807, "learning_rate": 0.002, "loss": 2.3383, "step": 380610 }, { "epoch": 1.471370475174344, "grad_norm": 0.10004410892724991, "learning_rate": 0.002, "loss": 2.3277, "step": 380620 }, { "epoch": 1.4714091323777272, "grad_norm": 0.10348884761333466, "learning_rate": 0.002, "loss": 2.3318, "step": 380630 }, { "epoch": 1.4714477895811107, "grad_norm": 0.11574351042509079, "learning_rate": 0.002, "loss": 2.3209, "step": 380640 }, { "epoch": 1.471486446784494, "grad_norm": 0.09569251537322998, "learning_rate": 0.002, "loss": 2.3325, "step": 380650 }, { "epoch": 1.4715251039878772, "grad_norm": 0.09906301647424698, "learning_rate": 0.002, "loss": 2.3248, "step": 380660 }, { "epoch": 1.4715637611912604, "grad_norm": 0.11972828209400177, "learning_rate": 0.002, "loss": 2.333, "step": 380670 }, { "epoch": 1.4716024183946437, "grad_norm": 0.11014852672815323, "learning_rate": 0.002, "loss": 2.3354, "step": 380680 }, { "epoch": 1.471641075598027, "grad_norm": 0.09081583470106125, "learning_rate": 0.002, "loss": 2.3322, "step": 380690 }, { "epoch": 1.4716797328014102, "grad_norm": 0.11451420187950134, "learning_rate": 0.002, "loss": 2.3343, "step": 380700 }, { "epoch": 1.4717183900047934, "grad_norm": 0.11507417261600494, "learning_rate": 0.002, "loss": 2.3522, "step": 380710 }, { "epoch": 1.4717570472081767, "grad_norm": 0.10528484731912613, "learning_rate": 0.002, "loss": 2.3318, "step": 380720 }, { "epoch": 1.47179570441156, "grad_norm": 0.1073676198720932, "learning_rate": 0.002, "loss": 2.3387, "step": 380730 }, { "epoch": 1.4718343616149434, "grad_norm": 0.09003012627363205, "learning_rate": 0.002, "loss": 2.3307, "step": 380740 }, { "epoch": 1.4718730188183267, "grad_norm": 0.11079955101013184, "learning_rate": 0.002, "loss": 2.3431, "step": 380750 }, { "epoch": 1.47191167602171, "grad_norm": 0.11685178428888321, "learning_rate": 0.002, "loss": 2.3289, "step": 380760 }, { "epoch": 1.4719503332250932, "grad_norm": 0.0931723341345787, "learning_rate": 0.002, "loss": 2.3477, "step": 380770 }, { "epoch": 1.4719889904284764, "grad_norm": 0.13615788519382477, "learning_rate": 0.002, "loss": 2.3375, "step": 380780 }, { "epoch": 1.4720276476318597, "grad_norm": 0.0943218395113945, "learning_rate": 0.002, "loss": 2.3191, "step": 380790 }, { "epoch": 1.472066304835243, "grad_norm": 0.09162439405918121, "learning_rate": 0.002, "loss": 2.3433, "step": 380800 }, { "epoch": 1.4721049620386264, "grad_norm": 0.12335995584726334, "learning_rate": 0.002, "loss": 2.3322, "step": 380810 }, { "epoch": 1.4721436192420096, "grad_norm": 0.09435350447893143, "learning_rate": 0.002, "loss": 2.3369, "step": 380820 }, { "epoch": 1.472182276445393, "grad_norm": 0.10461000353097916, "learning_rate": 0.002, "loss": 2.3238, "step": 380830 }, { "epoch": 1.4722209336487762, "grad_norm": 0.09914273768663406, "learning_rate": 0.002, "loss": 2.3385, "step": 380840 }, { "epoch": 1.4722595908521594, "grad_norm": 0.102699413895607, "learning_rate": 0.002, "loss": 2.321, "step": 380850 }, { "epoch": 1.4722982480555427, "grad_norm": 0.10240603238344193, "learning_rate": 0.002, "loss": 2.3344, "step": 380860 }, { "epoch": 1.472336905258926, "grad_norm": 0.11398480832576752, "learning_rate": 0.002, "loss": 2.3333, "step": 380870 }, { "epoch": 1.4723755624623092, "grad_norm": 0.10690321773290634, "learning_rate": 0.002, "loss": 2.3313, "step": 380880 }, { "epoch": 1.4724142196656924, "grad_norm": 0.10043326020240784, "learning_rate": 0.002, "loss": 2.3331, "step": 380890 }, { "epoch": 1.4724528768690757, "grad_norm": 0.1416574865579605, "learning_rate": 0.002, "loss": 2.3279, "step": 380900 }, { "epoch": 1.4724915340724591, "grad_norm": 0.09897167980670929, "learning_rate": 0.002, "loss": 2.3444, "step": 380910 }, { "epoch": 1.4725301912758424, "grad_norm": 0.10017742216587067, "learning_rate": 0.002, "loss": 2.3394, "step": 380920 }, { "epoch": 1.4725688484792256, "grad_norm": 0.09924539178609848, "learning_rate": 0.002, "loss": 2.3359, "step": 380930 }, { "epoch": 1.472607505682609, "grad_norm": 0.10560602694749832, "learning_rate": 0.002, "loss": 2.3364, "step": 380940 }, { "epoch": 1.4726461628859921, "grad_norm": 0.11045221984386444, "learning_rate": 0.002, "loss": 2.3465, "step": 380950 }, { "epoch": 1.4726848200893754, "grad_norm": 0.10794904828071594, "learning_rate": 0.002, "loss": 2.3237, "step": 380960 }, { "epoch": 1.4727234772927587, "grad_norm": 0.12349242717027664, "learning_rate": 0.002, "loss": 2.3267, "step": 380970 }, { "epoch": 1.4727621344961421, "grad_norm": 0.11813584715127945, "learning_rate": 0.002, "loss": 2.3298, "step": 380980 }, { "epoch": 1.4728007916995254, "grad_norm": 0.09766297787427902, "learning_rate": 0.002, "loss": 2.3506, "step": 380990 }, { "epoch": 1.4728394489029086, "grad_norm": 0.16462789475917816, "learning_rate": 0.002, "loss": 2.3252, "step": 381000 }, { "epoch": 1.4728781061062919, "grad_norm": 0.10614291578531265, "learning_rate": 0.002, "loss": 2.3342, "step": 381010 }, { "epoch": 1.4729167633096751, "grad_norm": 0.09676255285739899, "learning_rate": 0.002, "loss": 2.3211, "step": 381020 }, { "epoch": 1.4729554205130584, "grad_norm": 0.09638655185699463, "learning_rate": 0.002, "loss": 2.3251, "step": 381030 }, { "epoch": 1.4729940777164416, "grad_norm": 0.10229352861642838, "learning_rate": 0.002, "loss": 2.3439, "step": 381040 }, { "epoch": 1.473032734919825, "grad_norm": 0.11363990604877472, "learning_rate": 0.002, "loss": 2.3357, "step": 381050 }, { "epoch": 1.4730713921232081, "grad_norm": 0.26950469613075256, "learning_rate": 0.002, "loss": 2.3364, "step": 381060 }, { "epoch": 1.4731100493265914, "grad_norm": 0.11464790999889374, "learning_rate": 0.002, "loss": 2.3388, "step": 381070 }, { "epoch": 1.4731487065299749, "grad_norm": 0.10803456604480743, "learning_rate": 0.002, "loss": 2.323, "step": 381080 }, { "epoch": 1.4731873637333581, "grad_norm": 0.11394017934799194, "learning_rate": 0.002, "loss": 2.3277, "step": 381090 }, { "epoch": 1.4732260209367414, "grad_norm": 0.10622947663068771, "learning_rate": 0.002, "loss": 2.3268, "step": 381100 }, { "epoch": 1.4732646781401246, "grad_norm": 0.09255822747945786, "learning_rate": 0.002, "loss": 2.3255, "step": 381110 }, { "epoch": 1.4733033353435079, "grad_norm": 0.1168975681066513, "learning_rate": 0.002, "loss": 2.3226, "step": 381120 }, { "epoch": 1.4733419925468911, "grad_norm": 0.09841727465391159, "learning_rate": 0.002, "loss": 2.3291, "step": 381130 }, { "epoch": 1.4733806497502744, "grad_norm": 0.09333381801843643, "learning_rate": 0.002, "loss": 2.329, "step": 381140 }, { "epoch": 1.4734193069536579, "grad_norm": 0.09599963575601578, "learning_rate": 0.002, "loss": 2.336, "step": 381150 }, { "epoch": 1.4734579641570411, "grad_norm": 0.11897846311330795, "learning_rate": 0.002, "loss": 2.3299, "step": 381160 }, { "epoch": 1.4734966213604244, "grad_norm": 0.11470766365528107, "learning_rate": 0.002, "loss": 2.3195, "step": 381170 }, { "epoch": 1.4735352785638076, "grad_norm": 0.10256505012512207, "learning_rate": 0.002, "loss": 2.3354, "step": 381180 }, { "epoch": 1.4735739357671909, "grad_norm": 0.10822631418704987, "learning_rate": 0.002, "loss": 2.3329, "step": 381190 }, { "epoch": 1.4736125929705741, "grad_norm": 0.09520353376865387, "learning_rate": 0.002, "loss": 2.3126, "step": 381200 }, { "epoch": 1.4736512501739574, "grad_norm": 0.10728409886360168, "learning_rate": 0.002, "loss": 2.3291, "step": 381210 }, { "epoch": 1.4736899073773406, "grad_norm": 0.10907592624425888, "learning_rate": 0.002, "loss": 2.3545, "step": 381220 }, { "epoch": 1.4737285645807239, "grad_norm": 0.11383340507745743, "learning_rate": 0.002, "loss": 2.3402, "step": 381230 }, { "epoch": 1.4737672217841071, "grad_norm": 0.0988120511174202, "learning_rate": 0.002, "loss": 2.3308, "step": 381240 }, { "epoch": 1.4738058789874906, "grad_norm": 0.09999316930770874, "learning_rate": 0.002, "loss": 2.317, "step": 381250 }, { "epoch": 1.4738445361908739, "grad_norm": 0.10439565777778625, "learning_rate": 0.002, "loss": 2.3414, "step": 381260 }, { "epoch": 1.473883193394257, "grad_norm": 0.1033417209982872, "learning_rate": 0.002, "loss": 2.3434, "step": 381270 }, { "epoch": 1.4739218505976404, "grad_norm": 0.11376326531171799, "learning_rate": 0.002, "loss": 2.3322, "step": 381280 }, { "epoch": 1.4739605078010236, "grad_norm": 0.09683159738779068, "learning_rate": 0.002, "loss": 2.3518, "step": 381290 }, { "epoch": 1.4739991650044069, "grad_norm": 0.1309593915939331, "learning_rate": 0.002, "loss": 2.3362, "step": 381300 }, { "epoch": 1.4740378222077903, "grad_norm": 0.09711278229951859, "learning_rate": 0.002, "loss": 2.3351, "step": 381310 }, { "epoch": 1.4740764794111736, "grad_norm": 0.09251412004232407, "learning_rate": 0.002, "loss": 2.3375, "step": 381320 }, { "epoch": 1.4741151366145568, "grad_norm": 0.10818198323249817, "learning_rate": 0.002, "loss": 2.3318, "step": 381330 }, { "epoch": 1.47415379381794, "grad_norm": 0.17244568467140198, "learning_rate": 0.002, "loss": 2.3312, "step": 381340 }, { "epoch": 1.4741924510213233, "grad_norm": 0.10578557848930359, "learning_rate": 0.002, "loss": 2.3345, "step": 381350 }, { "epoch": 1.4742311082247066, "grad_norm": 0.10104954242706299, "learning_rate": 0.002, "loss": 2.3331, "step": 381360 }, { "epoch": 1.4742697654280899, "grad_norm": 0.10733964294195175, "learning_rate": 0.002, "loss": 2.3275, "step": 381370 }, { "epoch": 1.474308422631473, "grad_norm": 0.10672634840011597, "learning_rate": 0.002, "loss": 2.3285, "step": 381380 }, { "epoch": 1.4743470798348564, "grad_norm": 0.09510699659585953, "learning_rate": 0.002, "loss": 2.3371, "step": 381390 }, { "epoch": 1.4743857370382396, "grad_norm": 0.10608115047216415, "learning_rate": 0.002, "loss": 2.3324, "step": 381400 }, { "epoch": 1.4744243942416229, "grad_norm": 0.0924900472164154, "learning_rate": 0.002, "loss": 2.3529, "step": 381410 }, { "epoch": 1.4744630514450063, "grad_norm": 0.09588416665792465, "learning_rate": 0.002, "loss": 2.3464, "step": 381420 }, { "epoch": 1.4745017086483896, "grad_norm": 0.10708600282669067, "learning_rate": 0.002, "loss": 2.3391, "step": 381430 }, { "epoch": 1.4745403658517728, "grad_norm": 0.12301962822675705, "learning_rate": 0.002, "loss": 2.3259, "step": 381440 }, { "epoch": 1.474579023055156, "grad_norm": 0.10980133712291718, "learning_rate": 0.002, "loss": 2.3414, "step": 381450 }, { "epoch": 1.4746176802585393, "grad_norm": 0.0985681489109993, "learning_rate": 0.002, "loss": 2.335, "step": 381460 }, { "epoch": 1.4746563374619226, "grad_norm": 0.10325771570205688, "learning_rate": 0.002, "loss": 2.3406, "step": 381470 }, { "epoch": 1.474694994665306, "grad_norm": 0.11751792579889297, "learning_rate": 0.002, "loss": 2.3409, "step": 381480 }, { "epoch": 1.4747336518686893, "grad_norm": 0.11009322106838226, "learning_rate": 0.002, "loss": 2.3204, "step": 381490 }, { "epoch": 1.4747723090720726, "grad_norm": 0.11105030030012131, "learning_rate": 0.002, "loss": 2.3245, "step": 381500 }, { "epoch": 1.4748109662754558, "grad_norm": 0.11428146064281464, "learning_rate": 0.002, "loss": 2.321, "step": 381510 }, { "epoch": 1.474849623478839, "grad_norm": 0.09404455125331879, "learning_rate": 0.002, "loss": 2.3167, "step": 381520 }, { "epoch": 1.4748882806822223, "grad_norm": 0.10543165355920792, "learning_rate": 0.002, "loss": 2.3188, "step": 381530 }, { "epoch": 1.4749269378856056, "grad_norm": 0.10001705586910248, "learning_rate": 0.002, "loss": 2.3204, "step": 381540 }, { "epoch": 1.4749655950889888, "grad_norm": 0.08789706975221634, "learning_rate": 0.002, "loss": 2.3207, "step": 381550 }, { "epoch": 1.475004252292372, "grad_norm": 0.1343705952167511, "learning_rate": 0.002, "loss": 2.3336, "step": 381560 }, { "epoch": 1.4750429094957553, "grad_norm": 0.10130036622285843, "learning_rate": 0.002, "loss": 2.3302, "step": 381570 }, { "epoch": 1.4750815666991386, "grad_norm": 0.09015454351902008, "learning_rate": 0.002, "loss": 2.3364, "step": 381580 }, { "epoch": 1.475120223902522, "grad_norm": 0.12282449007034302, "learning_rate": 0.002, "loss": 2.3333, "step": 381590 }, { "epoch": 1.4751588811059053, "grad_norm": 0.09663018584251404, "learning_rate": 0.002, "loss": 2.3177, "step": 381600 }, { "epoch": 1.4751975383092886, "grad_norm": 0.11174379289150238, "learning_rate": 0.002, "loss": 2.3276, "step": 381610 }, { "epoch": 1.4752361955126718, "grad_norm": 0.11045493185520172, "learning_rate": 0.002, "loss": 2.324, "step": 381620 }, { "epoch": 1.475274852716055, "grad_norm": 0.10591579973697662, "learning_rate": 0.002, "loss": 2.324, "step": 381630 }, { "epoch": 1.4753135099194383, "grad_norm": 0.12079647928476334, "learning_rate": 0.002, "loss": 2.3308, "step": 381640 }, { "epoch": 1.4753521671228218, "grad_norm": 0.10993454605340958, "learning_rate": 0.002, "loss": 2.3449, "step": 381650 }, { "epoch": 1.475390824326205, "grad_norm": 0.10719642043113708, "learning_rate": 0.002, "loss": 2.3383, "step": 381660 }, { "epoch": 1.4754294815295883, "grad_norm": 0.10297977179288864, "learning_rate": 0.002, "loss": 2.3213, "step": 381670 }, { "epoch": 1.4754681387329716, "grad_norm": 0.10150489956140518, "learning_rate": 0.002, "loss": 2.3366, "step": 381680 }, { "epoch": 1.4755067959363548, "grad_norm": 0.10953573882579803, "learning_rate": 0.002, "loss": 2.3255, "step": 381690 }, { "epoch": 1.475545453139738, "grad_norm": 0.15350480377674103, "learning_rate": 0.002, "loss": 2.3366, "step": 381700 }, { "epoch": 1.4755841103431213, "grad_norm": 0.10827025026082993, "learning_rate": 0.002, "loss": 2.3231, "step": 381710 }, { "epoch": 1.4756227675465046, "grad_norm": 0.10280109941959381, "learning_rate": 0.002, "loss": 2.3319, "step": 381720 }, { "epoch": 1.4756614247498878, "grad_norm": 0.09801103174686432, "learning_rate": 0.002, "loss": 2.3297, "step": 381730 }, { "epoch": 1.475700081953271, "grad_norm": 0.11004986613988876, "learning_rate": 0.002, "loss": 2.3223, "step": 381740 }, { "epoch": 1.4757387391566543, "grad_norm": 0.12494536489248276, "learning_rate": 0.002, "loss": 2.3259, "step": 381750 }, { "epoch": 1.4757773963600378, "grad_norm": 0.1095493733882904, "learning_rate": 0.002, "loss": 2.3411, "step": 381760 }, { "epoch": 1.475816053563421, "grad_norm": 0.13262827694416046, "learning_rate": 0.002, "loss": 2.3516, "step": 381770 }, { "epoch": 1.4758547107668043, "grad_norm": 0.1285833716392517, "learning_rate": 0.002, "loss": 2.3504, "step": 381780 }, { "epoch": 1.4758933679701876, "grad_norm": 0.11003441363573074, "learning_rate": 0.002, "loss": 2.3266, "step": 381790 }, { "epoch": 1.4759320251735708, "grad_norm": 0.10204324126243591, "learning_rate": 0.002, "loss": 2.3418, "step": 381800 }, { "epoch": 1.475970682376954, "grad_norm": 0.10408364236354828, "learning_rate": 0.002, "loss": 2.3293, "step": 381810 }, { "epoch": 1.4760093395803375, "grad_norm": 0.12155904620885849, "learning_rate": 0.002, "loss": 2.3235, "step": 381820 }, { "epoch": 1.4760479967837208, "grad_norm": 0.09382772445678711, "learning_rate": 0.002, "loss": 2.3367, "step": 381830 }, { "epoch": 1.476086653987104, "grad_norm": 0.10493974387645721, "learning_rate": 0.002, "loss": 2.338, "step": 381840 }, { "epoch": 1.4761253111904873, "grad_norm": 0.09143518656492233, "learning_rate": 0.002, "loss": 2.3459, "step": 381850 }, { "epoch": 1.4761639683938705, "grad_norm": 0.09787338227033615, "learning_rate": 0.002, "loss": 2.3363, "step": 381860 }, { "epoch": 1.4762026255972538, "grad_norm": 0.0987553671002388, "learning_rate": 0.002, "loss": 2.3237, "step": 381870 }, { "epoch": 1.476241282800637, "grad_norm": 0.10685688257217407, "learning_rate": 0.002, "loss": 2.3319, "step": 381880 }, { "epoch": 1.4762799400040203, "grad_norm": 0.10927334427833557, "learning_rate": 0.002, "loss": 2.3475, "step": 381890 }, { "epoch": 1.4763185972074035, "grad_norm": 0.1169334352016449, "learning_rate": 0.002, "loss": 2.3355, "step": 381900 }, { "epoch": 1.4763572544107868, "grad_norm": 0.09676641970872879, "learning_rate": 0.002, "loss": 2.3318, "step": 381910 }, { "epoch": 1.47639591161417, "grad_norm": 0.10892260074615479, "learning_rate": 0.002, "loss": 2.3237, "step": 381920 }, { "epoch": 1.4764345688175535, "grad_norm": 0.12671910226345062, "learning_rate": 0.002, "loss": 2.3227, "step": 381930 }, { "epoch": 1.4764732260209368, "grad_norm": 0.12871995568275452, "learning_rate": 0.002, "loss": 2.3417, "step": 381940 }, { "epoch": 1.47651188322432, "grad_norm": 0.10366962105035782, "learning_rate": 0.002, "loss": 2.3226, "step": 381950 }, { "epoch": 1.4765505404277033, "grad_norm": 0.10168401896953583, "learning_rate": 0.002, "loss": 2.3352, "step": 381960 }, { "epoch": 1.4765891976310865, "grad_norm": 0.11718802154064178, "learning_rate": 0.002, "loss": 2.3213, "step": 381970 }, { "epoch": 1.4766278548344698, "grad_norm": 0.11141911149024963, "learning_rate": 0.002, "loss": 2.3147, "step": 381980 }, { "epoch": 1.4766665120378533, "grad_norm": 0.10923470556735992, "learning_rate": 0.002, "loss": 2.3272, "step": 381990 }, { "epoch": 1.4767051692412365, "grad_norm": 0.09513156116008759, "learning_rate": 0.002, "loss": 2.3215, "step": 382000 }, { "epoch": 1.4767438264446198, "grad_norm": 0.12428732961416245, "learning_rate": 0.002, "loss": 2.337, "step": 382010 }, { "epoch": 1.476782483648003, "grad_norm": 0.10048487782478333, "learning_rate": 0.002, "loss": 2.3183, "step": 382020 }, { "epoch": 1.4768211408513863, "grad_norm": 0.10726416856050491, "learning_rate": 0.002, "loss": 2.3177, "step": 382030 }, { "epoch": 1.4768597980547695, "grad_norm": 0.140085369348526, "learning_rate": 0.002, "loss": 2.3325, "step": 382040 }, { "epoch": 1.4768984552581528, "grad_norm": 0.10316112637519836, "learning_rate": 0.002, "loss": 2.3328, "step": 382050 }, { "epoch": 1.476937112461536, "grad_norm": 0.10740186274051666, "learning_rate": 0.002, "loss": 2.3363, "step": 382060 }, { "epoch": 1.4769757696649193, "grad_norm": 0.102906733751297, "learning_rate": 0.002, "loss": 2.3232, "step": 382070 }, { "epoch": 1.4770144268683025, "grad_norm": 0.10092508047819138, "learning_rate": 0.002, "loss": 2.3268, "step": 382080 }, { "epoch": 1.4770530840716858, "grad_norm": 0.09575673937797546, "learning_rate": 0.002, "loss": 2.3259, "step": 382090 }, { "epoch": 1.4770917412750693, "grad_norm": 0.11741238832473755, "learning_rate": 0.002, "loss": 2.3188, "step": 382100 }, { "epoch": 1.4771303984784525, "grad_norm": 0.12052875757217407, "learning_rate": 0.002, "loss": 2.3229, "step": 382110 }, { "epoch": 1.4771690556818358, "grad_norm": 0.106513611972332, "learning_rate": 0.002, "loss": 2.3474, "step": 382120 }, { "epoch": 1.477207712885219, "grad_norm": 0.1042778491973877, "learning_rate": 0.002, "loss": 2.3271, "step": 382130 }, { "epoch": 1.4772463700886023, "grad_norm": 0.10151872038841248, "learning_rate": 0.002, "loss": 2.3174, "step": 382140 }, { "epoch": 1.4772850272919855, "grad_norm": 0.12854936718940735, "learning_rate": 0.002, "loss": 2.3288, "step": 382150 }, { "epoch": 1.477323684495369, "grad_norm": 0.11349263787269592, "learning_rate": 0.002, "loss": 2.3244, "step": 382160 }, { "epoch": 1.4773623416987522, "grad_norm": 0.09675941616296768, "learning_rate": 0.002, "loss": 2.3352, "step": 382170 }, { "epoch": 1.4774009989021355, "grad_norm": 0.10710638016462326, "learning_rate": 0.002, "loss": 2.341, "step": 382180 }, { "epoch": 1.4774396561055188, "grad_norm": 0.10345502942800522, "learning_rate": 0.002, "loss": 2.3249, "step": 382190 }, { "epoch": 1.477478313308902, "grad_norm": 0.09632916003465652, "learning_rate": 0.002, "loss": 2.3262, "step": 382200 }, { "epoch": 1.4775169705122853, "grad_norm": 0.09333551675081253, "learning_rate": 0.002, "loss": 2.3265, "step": 382210 }, { "epoch": 1.4775556277156685, "grad_norm": 0.09522828459739685, "learning_rate": 0.002, "loss": 2.3452, "step": 382220 }, { "epoch": 1.4775942849190518, "grad_norm": 0.11494068056344986, "learning_rate": 0.002, "loss": 2.3313, "step": 382230 }, { "epoch": 1.477632942122435, "grad_norm": 0.1599724292755127, "learning_rate": 0.002, "loss": 2.325, "step": 382240 }, { "epoch": 1.4776715993258183, "grad_norm": 0.10708048194646835, "learning_rate": 0.002, "loss": 2.3261, "step": 382250 }, { "epoch": 1.4777102565292015, "grad_norm": 0.18249209225177765, "learning_rate": 0.002, "loss": 2.3459, "step": 382260 }, { "epoch": 1.477748913732585, "grad_norm": 0.10732556134462357, "learning_rate": 0.002, "loss": 2.3262, "step": 382270 }, { "epoch": 1.4777875709359682, "grad_norm": 0.11067913472652435, "learning_rate": 0.002, "loss": 2.3395, "step": 382280 }, { "epoch": 1.4778262281393515, "grad_norm": 0.09666985273361206, "learning_rate": 0.002, "loss": 2.3267, "step": 382290 }, { "epoch": 1.4778648853427347, "grad_norm": 0.10674726217985153, "learning_rate": 0.002, "loss": 2.3302, "step": 382300 }, { "epoch": 1.477903542546118, "grad_norm": 0.1110454648733139, "learning_rate": 0.002, "loss": 2.33, "step": 382310 }, { "epoch": 1.4779421997495013, "grad_norm": 0.10222259163856506, "learning_rate": 0.002, "loss": 2.3319, "step": 382320 }, { "epoch": 1.4779808569528847, "grad_norm": 0.11711250245571136, "learning_rate": 0.002, "loss": 2.3223, "step": 382330 }, { "epoch": 1.478019514156268, "grad_norm": 0.0934610366821289, "learning_rate": 0.002, "loss": 2.3249, "step": 382340 }, { "epoch": 1.4780581713596512, "grad_norm": 0.10337325185537338, "learning_rate": 0.002, "loss": 2.3322, "step": 382350 }, { "epoch": 1.4780968285630345, "grad_norm": 0.11120796948671341, "learning_rate": 0.002, "loss": 2.3222, "step": 382360 }, { "epoch": 1.4781354857664177, "grad_norm": 0.10233265906572342, "learning_rate": 0.002, "loss": 2.3287, "step": 382370 }, { "epoch": 1.478174142969801, "grad_norm": 0.11238359659910202, "learning_rate": 0.002, "loss": 2.348, "step": 382380 }, { "epoch": 1.4782128001731842, "grad_norm": 0.11099530011415482, "learning_rate": 0.002, "loss": 2.3421, "step": 382390 }, { "epoch": 1.4782514573765675, "grad_norm": 0.13393400609493256, "learning_rate": 0.002, "loss": 2.3399, "step": 382400 }, { "epoch": 1.4782901145799507, "grad_norm": 0.10745197534561157, "learning_rate": 0.002, "loss": 2.3103, "step": 382410 }, { "epoch": 1.478328771783334, "grad_norm": 0.09651700407266617, "learning_rate": 0.002, "loss": 2.3201, "step": 382420 }, { "epoch": 1.4783674289867172, "grad_norm": 0.13040949404239655, "learning_rate": 0.002, "loss": 2.3441, "step": 382430 }, { "epoch": 1.4784060861901007, "grad_norm": 0.1285749077796936, "learning_rate": 0.002, "loss": 2.3236, "step": 382440 }, { "epoch": 1.478444743393484, "grad_norm": 0.11416109651327133, "learning_rate": 0.002, "loss": 2.3137, "step": 382450 }, { "epoch": 1.4784834005968672, "grad_norm": 0.09473222494125366, "learning_rate": 0.002, "loss": 2.33, "step": 382460 }, { "epoch": 1.4785220578002505, "grad_norm": 0.09636333584785461, "learning_rate": 0.002, "loss": 2.3165, "step": 382470 }, { "epoch": 1.4785607150036337, "grad_norm": 0.1212683841586113, "learning_rate": 0.002, "loss": 2.3355, "step": 382480 }, { "epoch": 1.478599372207017, "grad_norm": 0.10573016852140427, "learning_rate": 0.002, "loss": 2.3362, "step": 382490 }, { "epoch": 1.4786380294104005, "grad_norm": 0.0973353236913681, "learning_rate": 0.002, "loss": 2.3342, "step": 382500 }, { "epoch": 1.4786766866137837, "grad_norm": 0.10334083437919617, "learning_rate": 0.002, "loss": 2.3261, "step": 382510 }, { "epoch": 1.478715343817167, "grad_norm": 0.11552633345127106, "learning_rate": 0.002, "loss": 2.335, "step": 382520 }, { "epoch": 1.4787540010205502, "grad_norm": 0.10685543715953827, "learning_rate": 0.002, "loss": 2.3243, "step": 382530 }, { "epoch": 1.4787926582239335, "grad_norm": 0.09678447246551514, "learning_rate": 0.002, "loss": 2.3235, "step": 382540 }, { "epoch": 1.4788313154273167, "grad_norm": 0.10663742572069168, "learning_rate": 0.002, "loss": 2.3316, "step": 382550 }, { "epoch": 1.4788699726307, "grad_norm": 0.1363559365272522, "learning_rate": 0.002, "loss": 2.3403, "step": 382560 }, { "epoch": 1.4789086298340832, "grad_norm": 0.12492944300174713, "learning_rate": 0.002, "loss": 2.3325, "step": 382570 }, { "epoch": 1.4789472870374665, "grad_norm": 0.10026931017637253, "learning_rate": 0.002, "loss": 2.3248, "step": 382580 }, { "epoch": 1.4789859442408497, "grad_norm": 0.1090857982635498, "learning_rate": 0.002, "loss": 2.3307, "step": 382590 }, { "epoch": 1.4790246014442332, "grad_norm": 0.09713274985551834, "learning_rate": 0.002, "loss": 2.3319, "step": 382600 }, { "epoch": 1.4790632586476165, "grad_norm": 0.0868186354637146, "learning_rate": 0.002, "loss": 2.3274, "step": 382610 }, { "epoch": 1.4791019158509997, "grad_norm": 0.11653818935155869, "learning_rate": 0.002, "loss": 2.337, "step": 382620 }, { "epoch": 1.479140573054383, "grad_norm": 0.10660745203495026, "learning_rate": 0.002, "loss": 2.3267, "step": 382630 }, { "epoch": 1.4791792302577662, "grad_norm": 0.10817324370145798, "learning_rate": 0.002, "loss": 2.3374, "step": 382640 }, { "epoch": 1.4792178874611495, "grad_norm": 0.11057060956954956, "learning_rate": 0.002, "loss": 2.3143, "step": 382650 }, { "epoch": 1.4792565446645327, "grad_norm": 0.10341325402259827, "learning_rate": 0.002, "loss": 2.3431, "step": 382660 }, { "epoch": 1.4792952018679162, "grad_norm": 0.11677426844835281, "learning_rate": 0.002, "loss": 2.3487, "step": 382670 }, { "epoch": 1.4793338590712994, "grad_norm": 0.15986883640289307, "learning_rate": 0.002, "loss": 2.3342, "step": 382680 }, { "epoch": 1.4793725162746827, "grad_norm": 0.10989416390657425, "learning_rate": 0.002, "loss": 2.3377, "step": 382690 }, { "epoch": 1.479411173478066, "grad_norm": 0.09436041861772537, "learning_rate": 0.002, "loss": 2.332, "step": 382700 }, { "epoch": 1.4794498306814492, "grad_norm": 0.11045808345079422, "learning_rate": 0.002, "loss": 2.3286, "step": 382710 }, { "epoch": 1.4794884878848324, "grad_norm": 0.10220617055892944, "learning_rate": 0.002, "loss": 2.3421, "step": 382720 }, { "epoch": 1.4795271450882157, "grad_norm": 0.0920969769358635, "learning_rate": 0.002, "loss": 2.3247, "step": 382730 }, { "epoch": 1.479565802291599, "grad_norm": 0.2078670710325241, "learning_rate": 0.002, "loss": 2.3137, "step": 382740 }, { "epoch": 1.4796044594949822, "grad_norm": 0.1230267584323883, "learning_rate": 0.002, "loss": 2.3464, "step": 382750 }, { "epoch": 1.4796431166983655, "grad_norm": 0.10537160933017731, "learning_rate": 0.002, "loss": 2.3306, "step": 382760 }, { "epoch": 1.479681773901749, "grad_norm": 0.33487197756767273, "learning_rate": 0.002, "loss": 2.3377, "step": 382770 }, { "epoch": 1.4797204311051322, "grad_norm": 0.10333807021379471, "learning_rate": 0.002, "loss": 2.3577, "step": 382780 }, { "epoch": 1.4797590883085154, "grad_norm": 0.09185725450515747, "learning_rate": 0.002, "loss": 2.3326, "step": 382790 }, { "epoch": 1.4797977455118987, "grad_norm": 0.11369094252586365, "learning_rate": 0.002, "loss": 2.3423, "step": 382800 }, { "epoch": 1.479836402715282, "grad_norm": 0.6589314341545105, "learning_rate": 0.002, "loss": 2.3456, "step": 382810 }, { "epoch": 1.4798750599186652, "grad_norm": 0.12008057534694672, "learning_rate": 0.002, "loss": 2.3249, "step": 382820 }, { "epoch": 1.4799137171220484, "grad_norm": 0.10496290773153305, "learning_rate": 0.002, "loss": 2.3405, "step": 382830 }, { "epoch": 1.479952374325432, "grad_norm": 0.1102655827999115, "learning_rate": 0.002, "loss": 2.354, "step": 382840 }, { "epoch": 1.4799910315288152, "grad_norm": 0.09562227129936218, "learning_rate": 0.002, "loss": 2.3055, "step": 382850 }, { "epoch": 1.4800296887321984, "grad_norm": 0.12711955606937408, "learning_rate": 0.002, "loss": 2.3473, "step": 382860 }, { "epoch": 1.4800683459355817, "grad_norm": 0.11347995698451996, "learning_rate": 0.002, "loss": 2.326, "step": 382870 }, { "epoch": 1.480107003138965, "grad_norm": 0.11589667201042175, "learning_rate": 0.002, "loss": 2.3311, "step": 382880 }, { "epoch": 1.4801456603423482, "grad_norm": 0.1088893711566925, "learning_rate": 0.002, "loss": 2.3435, "step": 382890 }, { "epoch": 1.4801843175457314, "grad_norm": 0.10397517681121826, "learning_rate": 0.002, "loss": 2.3283, "step": 382900 }, { "epoch": 1.4802229747491147, "grad_norm": 0.10150329023599625, "learning_rate": 0.002, "loss": 2.3242, "step": 382910 }, { "epoch": 1.480261631952498, "grad_norm": 0.1259625405073166, "learning_rate": 0.002, "loss": 2.3337, "step": 382920 }, { "epoch": 1.4803002891558812, "grad_norm": 0.10090825706720352, "learning_rate": 0.002, "loss": 2.3288, "step": 382930 }, { "epoch": 1.4803389463592647, "grad_norm": 0.11497049778699875, "learning_rate": 0.002, "loss": 2.3509, "step": 382940 }, { "epoch": 1.480377603562648, "grad_norm": 0.12436963617801666, "learning_rate": 0.002, "loss": 2.3387, "step": 382950 }, { "epoch": 1.4804162607660312, "grad_norm": 0.09711844474077225, "learning_rate": 0.002, "loss": 2.3359, "step": 382960 }, { "epoch": 1.4804549179694144, "grad_norm": 0.11406796425580978, "learning_rate": 0.002, "loss": 2.3286, "step": 382970 }, { "epoch": 1.4804935751727977, "grad_norm": 0.09396304190158844, "learning_rate": 0.002, "loss": 2.3305, "step": 382980 }, { "epoch": 1.480532232376181, "grad_norm": 0.10211638361215591, "learning_rate": 0.002, "loss": 2.3339, "step": 382990 }, { "epoch": 1.4805708895795642, "grad_norm": 0.08367765694856644, "learning_rate": 0.002, "loss": 2.3449, "step": 383000 }, { "epoch": 1.4806095467829476, "grad_norm": 0.13419294357299805, "learning_rate": 0.002, "loss": 2.3352, "step": 383010 }, { "epoch": 1.480648203986331, "grad_norm": 0.10420286655426025, "learning_rate": 0.002, "loss": 2.3206, "step": 383020 }, { "epoch": 1.4806868611897142, "grad_norm": 0.09555057436227798, "learning_rate": 0.002, "loss": 2.3504, "step": 383030 }, { "epoch": 1.4807255183930974, "grad_norm": 0.11132855713367462, "learning_rate": 0.002, "loss": 2.3464, "step": 383040 }, { "epoch": 1.4807641755964807, "grad_norm": 0.11654718965291977, "learning_rate": 0.002, "loss": 2.3353, "step": 383050 }, { "epoch": 1.480802832799864, "grad_norm": 0.10852136462926865, "learning_rate": 0.002, "loss": 2.339, "step": 383060 }, { "epoch": 1.4808414900032472, "grad_norm": 0.10440509021282196, "learning_rate": 0.002, "loss": 2.3263, "step": 383070 }, { "epoch": 1.4808801472066304, "grad_norm": 0.10147271305322647, "learning_rate": 0.002, "loss": 2.333, "step": 383080 }, { "epoch": 1.4809188044100137, "grad_norm": 0.11680194735527039, "learning_rate": 0.002, "loss": 2.3255, "step": 383090 }, { "epoch": 1.480957461613397, "grad_norm": 0.10291650146245956, "learning_rate": 0.002, "loss": 2.3139, "step": 383100 }, { "epoch": 1.4809961188167804, "grad_norm": 0.0920812115073204, "learning_rate": 0.002, "loss": 2.3527, "step": 383110 }, { "epoch": 1.4810347760201636, "grad_norm": 0.12201771140098572, "learning_rate": 0.002, "loss": 2.3263, "step": 383120 }, { "epoch": 1.481073433223547, "grad_norm": 0.09390729665756226, "learning_rate": 0.002, "loss": 2.3292, "step": 383130 }, { "epoch": 1.4811120904269302, "grad_norm": 0.13557425141334534, "learning_rate": 0.002, "loss": 2.3355, "step": 383140 }, { "epoch": 1.4811507476303134, "grad_norm": 0.1080607920885086, "learning_rate": 0.002, "loss": 2.3305, "step": 383150 }, { "epoch": 1.4811894048336967, "grad_norm": 0.09251314401626587, "learning_rate": 0.002, "loss": 2.3385, "step": 383160 }, { "epoch": 1.48122806203708, "grad_norm": 0.09981363266706467, "learning_rate": 0.002, "loss": 2.3322, "step": 383170 }, { "epoch": 1.4812667192404634, "grad_norm": 0.12779657542705536, "learning_rate": 0.002, "loss": 2.3362, "step": 383180 }, { "epoch": 1.4813053764438466, "grad_norm": 0.10266245901584625, "learning_rate": 0.002, "loss": 2.3257, "step": 383190 }, { "epoch": 1.4813440336472299, "grad_norm": 0.09837732464075089, "learning_rate": 0.002, "loss": 2.3359, "step": 383200 }, { "epoch": 1.4813826908506131, "grad_norm": 0.11256790161132812, "learning_rate": 0.002, "loss": 2.3518, "step": 383210 }, { "epoch": 1.4814213480539964, "grad_norm": 0.11383913457393646, "learning_rate": 0.002, "loss": 2.3279, "step": 383220 }, { "epoch": 1.4814600052573796, "grad_norm": 0.10398650914430618, "learning_rate": 0.002, "loss": 2.334, "step": 383230 }, { "epoch": 1.481498662460763, "grad_norm": 0.08707722276449203, "learning_rate": 0.002, "loss": 2.3286, "step": 383240 }, { "epoch": 1.4815373196641461, "grad_norm": 0.1149417906999588, "learning_rate": 0.002, "loss": 2.337, "step": 383250 }, { "epoch": 1.4815759768675294, "grad_norm": 0.10394676774740219, "learning_rate": 0.002, "loss": 2.3421, "step": 383260 }, { "epoch": 1.4816146340709127, "grad_norm": 0.115664541721344, "learning_rate": 0.002, "loss": 2.3418, "step": 383270 }, { "epoch": 1.4816532912742961, "grad_norm": 0.12399958074092865, "learning_rate": 0.002, "loss": 2.3444, "step": 383280 }, { "epoch": 1.4816919484776794, "grad_norm": 0.09220393002033234, "learning_rate": 0.002, "loss": 2.344, "step": 383290 }, { "epoch": 1.4817306056810626, "grad_norm": 0.10916592925786972, "learning_rate": 0.002, "loss": 2.3392, "step": 383300 }, { "epoch": 1.4817692628844459, "grad_norm": 0.09219096601009369, "learning_rate": 0.002, "loss": 2.33, "step": 383310 }, { "epoch": 1.4818079200878291, "grad_norm": 0.10437247902154922, "learning_rate": 0.002, "loss": 2.3193, "step": 383320 }, { "epoch": 1.4818465772912124, "grad_norm": 0.10224609076976776, "learning_rate": 0.002, "loss": 2.3266, "step": 383330 }, { "epoch": 1.4818852344945959, "grad_norm": 0.12552587687969208, "learning_rate": 0.002, "loss": 2.3208, "step": 383340 }, { "epoch": 1.4819238916979791, "grad_norm": 0.11228864639997482, "learning_rate": 0.002, "loss": 2.3158, "step": 383350 }, { "epoch": 1.4819625489013624, "grad_norm": 0.11492089182138443, "learning_rate": 0.002, "loss": 2.3369, "step": 383360 }, { "epoch": 1.4820012061047456, "grad_norm": 0.10515844821929932, "learning_rate": 0.002, "loss": 2.3273, "step": 383370 }, { "epoch": 1.4820398633081289, "grad_norm": 0.09570827335119247, "learning_rate": 0.002, "loss": 2.3285, "step": 383380 }, { "epoch": 1.4820785205115121, "grad_norm": 0.13947443664073944, "learning_rate": 0.002, "loss": 2.3276, "step": 383390 }, { "epoch": 1.4821171777148954, "grad_norm": 0.10615614056587219, "learning_rate": 0.002, "loss": 2.3344, "step": 383400 }, { "epoch": 1.4821558349182786, "grad_norm": 0.10525351017713547, "learning_rate": 0.002, "loss": 2.335, "step": 383410 }, { "epoch": 1.4821944921216619, "grad_norm": 0.09245917946100235, "learning_rate": 0.002, "loss": 2.3376, "step": 383420 }, { "epoch": 1.4822331493250451, "grad_norm": 0.10440874099731445, "learning_rate": 0.002, "loss": 2.3302, "step": 383430 }, { "epoch": 1.4822718065284284, "grad_norm": 0.11015748977661133, "learning_rate": 0.002, "loss": 2.3322, "step": 383440 }, { "epoch": 1.4823104637318119, "grad_norm": 0.14614816009998322, "learning_rate": 0.002, "loss": 2.3387, "step": 383450 }, { "epoch": 1.482349120935195, "grad_norm": 0.09687640517950058, "learning_rate": 0.002, "loss": 2.3482, "step": 383460 }, { "epoch": 1.4823877781385784, "grad_norm": 0.09319257736206055, "learning_rate": 0.002, "loss": 2.331, "step": 383470 }, { "epoch": 1.4824264353419616, "grad_norm": 0.10338862240314484, "learning_rate": 0.002, "loss": 2.3296, "step": 383480 }, { "epoch": 1.4824650925453449, "grad_norm": 0.10386480391025543, "learning_rate": 0.002, "loss": 2.3449, "step": 383490 }, { "epoch": 1.4825037497487281, "grad_norm": 0.10086138546466827, "learning_rate": 0.002, "loss": 2.339, "step": 383500 }, { "epoch": 1.4825424069521116, "grad_norm": 0.10217492282390594, "learning_rate": 0.002, "loss": 2.3216, "step": 383510 }, { "epoch": 1.4825810641554948, "grad_norm": 0.09466515481472015, "learning_rate": 0.002, "loss": 2.3171, "step": 383520 }, { "epoch": 1.482619721358878, "grad_norm": 0.10521284490823746, "learning_rate": 0.002, "loss": 2.3197, "step": 383530 }, { "epoch": 1.4826583785622613, "grad_norm": 0.09716833382844925, "learning_rate": 0.002, "loss": 2.3394, "step": 383540 }, { "epoch": 1.4826970357656446, "grad_norm": 0.1123156026005745, "learning_rate": 0.002, "loss": 2.3305, "step": 383550 }, { "epoch": 1.4827356929690279, "grad_norm": 0.09585864841938019, "learning_rate": 0.002, "loss": 2.3332, "step": 383560 }, { "epoch": 1.482774350172411, "grad_norm": 0.09589291363954544, "learning_rate": 0.002, "loss": 2.3481, "step": 383570 }, { "epoch": 1.4828130073757944, "grad_norm": 0.1105579063296318, "learning_rate": 0.002, "loss": 2.3414, "step": 383580 }, { "epoch": 1.4828516645791776, "grad_norm": 0.10001932829618454, "learning_rate": 0.002, "loss": 2.3264, "step": 383590 }, { "epoch": 1.4828903217825609, "grad_norm": 0.09974181652069092, "learning_rate": 0.002, "loss": 2.3416, "step": 383600 }, { "epoch": 1.4829289789859441, "grad_norm": 0.10484086722135544, "learning_rate": 0.002, "loss": 2.3581, "step": 383610 }, { "epoch": 1.4829676361893276, "grad_norm": 0.10299315303564072, "learning_rate": 0.002, "loss": 2.3453, "step": 383620 }, { "epoch": 1.4830062933927108, "grad_norm": 0.11832892894744873, "learning_rate": 0.002, "loss": 2.3399, "step": 383630 }, { "epoch": 1.483044950596094, "grad_norm": 0.12004309892654419, "learning_rate": 0.002, "loss": 2.3488, "step": 383640 }, { "epoch": 1.4830836077994773, "grad_norm": 0.14523443579673767, "learning_rate": 0.002, "loss": 2.3255, "step": 383650 }, { "epoch": 1.4831222650028606, "grad_norm": 0.11291990429162979, "learning_rate": 0.002, "loss": 2.3261, "step": 383660 }, { "epoch": 1.4831609222062438, "grad_norm": 0.10504403710365295, "learning_rate": 0.002, "loss": 2.34, "step": 383670 }, { "epoch": 1.4831995794096273, "grad_norm": 0.09405023604631424, "learning_rate": 0.002, "loss": 2.3206, "step": 383680 }, { "epoch": 1.4832382366130106, "grad_norm": 0.10415950417518616, "learning_rate": 0.002, "loss": 2.3198, "step": 383690 }, { "epoch": 1.4832768938163938, "grad_norm": 0.09688252210617065, "learning_rate": 0.002, "loss": 2.3349, "step": 383700 }, { "epoch": 1.483315551019777, "grad_norm": 0.130942702293396, "learning_rate": 0.002, "loss": 2.3381, "step": 383710 }, { "epoch": 1.4833542082231603, "grad_norm": 0.09914401918649673, "learning_rate": 0.002, "loss": 2.3313, "step": 383720 }, { "epoch": 1.4833928654265436, "grad_norm": 0.11842868477106094, "learning_rate": 0.002, "loss": 2.3358, "step": 383730 }, { "epoch": 1.4834315226299268, "grad_norm": 0.1023070365190506, "learning_rate": 0.002, "loss": 2.327, "step": 383740 }, { "epoch": 1.48347017983331, "grad_norm": 0.1171945184469223, "learning_rate": 0.002, "loss": 2.3373, "step": 383750 }, { "epoch": 1.4835088370366933, "grad_norm": 0.10847219824790955, "learning_rate": 0.002, "loss": 2.3216, "step": 383760 }, { "epoch": 1.4835474942400766, "grad_norm": 0.10732583701610565, "learning_rate": 0.002, "loss": 2.3257, "step": 383770 }, { "epoch": 1.4835861514434598, "grad_norm": 0.09026862680912018, "learning_rate": 0.002, "loss": 2.32, "step": 383780 }, { "epoch": 1.4836248086468433, "grad_norm": 0.09737653285264969, "learning_rate": 0.002, "loss": 2.3392, "step": 383790 }, { "epoch": 1.4836634658502266, "grad_norm": 0.1116415336728096, "learning_rate": 0.002, "loss": 2.3364, "step": 383800 }, { "epoch": 1.4837021230536098, "grad_norm": 0.10554317384958267, "learning_rate": 0.002, "loss": 2.3274, "step": 383810 }, { "epoch": 1.483740780256993, "grad_norm": 0.10899829119443893, "learning_rate": 0.002, "loss": 2.332, "step": 383820 }, { "epoch": 1.4837794374603763, "grad_norm": 0.09907833486795425, "learning_rate": 0.002, "loss": 2.3197, "step": 383830 }, { "epoch": 1.4838180946637596, "grad_norm": 0.10291578620672226, "learning_rate": 0.002, "loss": 2.326, "step": 383840 }, { "epoch": 1.483856751867143, "grad_norm": 0.10465496778488159, "learning_rate": 0.002, "loss": 2.3401, "step": 383850 }, { "epoch": 1.4838954090705263, "grad_norm": 0.12064478546380997, "learning_rate": 0.002, "loss": 2.3298, "step": 383860 }, { "epoch": 1.4839340662739096, "grad_norm": 0.12056712806224823, "learning_rate": 0.002, "loss": 2.3328, "step": 383870 }, { "epoch": 1.4839727234772928, "grad_norm": 0.11756856739521027, "learning_rate": 0.002, "loss": 2.3347, "step": 383880 }, { "epoch": 1.484011380680676, "grad_norm": 0.09254784137010574, "learning_rate": 0.002, "loss": 2.3434, "step": 383890 }, { "epoch": 1.4840500378840593, "grad_norm": 0.10828401148319244, "learning_rate": 0.002, "loss": 2.3179, "step": 383900 }, { "epoch": 1.4840886950874426, "grad_norm": 0.09548208862543106, "learning_rate": 0.002, "loss": 2.3337, "step": 383910 }, { "epoch": 1.4841273522908258, "grad_norm": 0.09640761464834213, "learning_rate": 0.002, "loss": 2.339, "step": 383920 }, { "epoch": 1.484166009494209, "grad_norm": 0.11068131029605865, "learning_rate": 0.002, "loss": 2.3282, "step": 383930 }, { "epoch": 1.4842046666975923, "grad_norm": 0.10378747433423996, "learning_rate": 0.002, "loss": 2.3424, "step": 383940 }, { "epoch": 1.4842433239009756, "grad_norm": 0.10128451883792877, "learning_rate": 0.002, "loss": 2.3425, "step": 383950 }, { "epoch": 1.484281981104359, "grad_norm": 0.10668580234050751, "learning_rate": 0.002, "loss": 2.3322, "step": 383960 }, { "epoch": 1.4843206383077423, "grad_norm": 0.10479380190372467, "learning_rate": 0.002, "loss": 2.3487, "step": 383970 }, { "epoch": 1.4843592955111256, "grad_norm": 0.10641813278198242, "learning_rate": 0.002, "loss": 2.341, "step": 383980 }, { "epoch": 1.4843979527145088, "grad_norm": 0.10178200155496597, "learning_rate": 0.002, "loss": 2.3197, "step": 383990 }, { "epoch": 1.484436609917892, "grad_norm": 0.10124867409467697, "learning_rate": 0.002, "loss": 2.326, "step": 384000 }, { "epoch": 1.4844752671212753, "grad_norm": 0.10476204007863998, "learning_rate": 0.002, "loss": 2.3243, "step": 384010 }, { "epoch": 1.4845139243246588, "grad_norm": 0.10116813331842422, "learning_rate": 0.002, "loss": 2.346, "step": 384020 }, { "epoch": 1.484552581528042, "grad_norm": 0.12537890672683716, "learning_rate": 0.002, "loss": 2.3424, "step": 384030 }, { "epoch": 1.4845912387314253, "grad_norm": 0.10372122377157211, "learning_rate": 0.002, "loss": 2.3294, "step": 384040 }, { "epoch": 1.4846298959348085, "grad_norm": 0.10350830852985382, "learning_rate": 0.002, "loss": 2.3329, "step": 384050 }, { "epoch": 1.4846685531381918, "grad_norm": 0.09183838963508606, "learning_rate": 0.002, "loss": 2.327, "step": 384060 }, { "epoch": 1.484707210341575, "grad_norm": 0.12090179324150085, "learning_rate": 0.002, "loss": 2.3354, "step": 384070 }, { "epoch": 1.4847458675449583, "grad_norm": 0.1174481064081192, "learning_rate": 0.002, "loss": 2.3393, "step": 384080 }, { "epoch": 1.4847845247483415, "grad_norm": 0.08650671690702438, "learning_rate": 0.002, "loss": 2.3462, "step": 384090 }, { "epoch": 1.4848231819517248, "grad_norm": 0.08859454840421677, "learning_rate": 0.002, "loss": 2.3358, "step": 384100 }, { "epoch": 1.484861839155108, "grad_norm": 0.0890578106045723, "learning_rate": 0.002, "loss": 2.3051, "step": 384110 }, { "epoch": 1.4849004963584913, "grad_norm": 0.12500403821468353, "learning_rate": 0.002, "loss": 2.3333, "step": 384120 }, { "epoch": 1.4849391535618748, "grad_norm": 0.11174352467060089, "learning_rate": 0.002, "loss": 2.329, "step": 384130 }, { "epoch": 1.484977810765258, "grad_norm": 0.12891241908073425, "learning_rate": 0.002, "loss": 2.3336, "step": 384140 }, { "epoch": 1.4850164679686413, "grad_norm": 0.1084713265299797, "learning_rate": 0.002, "loss": 2.3286, "step": 384150 }, { "epoch": 1.4850551251720245, "grad_norm": 0.10541419684886932, "learning_rate": 0.002, "loss": 2.3165, "step": 384160 }, { "epoch": 1.4850937823754078, "grad_norm": 0.11375588923692703, "learning_rate": 0.002, "loss": 2.3232, "step": 384170 }, { "epoch": 1.485132439578791, "grad_norm": 0.09189879149198532, "learning_rate": 0.002, "loss": 2.339, "step": 384180 }, { "epoch": 1.4851710967821745, "grad_norm": 0.09924867004156113, "learning_rate": 0.002, "loss": 2.3461, "step": 384190 }, { "epoch": 1.4852097539855578, "grad_norm": 0.10338280349969864, "learning_rate": 0.002, "loss": 2.3488, "step": 384200 }, { "epoch": 1.485248411188941, "grad_norm": 0.10292736440896988, "learning_rate": 0.002, "loss": 2.3305, "step": 384210 }, { "epoch": 1.4852870683923243, "grad_norm": 0.1111016720533371, "learning_rate": 0.002, "loss": 2.3404, "step": 384220 }, { "epoch": 1.4853257255957075, "grad_norm": 0.1034284457564354, "learning_rate": 0.002, "loss": 2.3347, "step": 384230 }, { "epoch": 1.4853643827990908, "grad_norm": 0.08710968494415283, "learning_rate": 0.002, "loss": 2.3233, "step": 384240 }, { "epoch": 1.485403040002474, "grad_norm": 0.10321284830570221, "learning_rate": 0.002, "loss": 2.3225, "step": 384250 }, { "epoch": 1.4854416972058573, "grad_norm": 0.10766103863716125, "learning_rate": 0.002, "loss": 2.3266, "step": 384260 }, { "epoch": 1.4854803544092405, "grad_norm": 0.10859043151140213, "learning_rate": 0.002, "loss": 2.3404, "step": 384270 }, { "epoch": 1.4855190116126238, "grad_norm": 0.09920382499694824, "learning_rate": 0.002, "loss": 2.3427, "step": 384280 }, { "epoch": 1.485557668816007, "grad_norm": 0.09533470869064331, "learning_rate": 0.002, "loss": 2.3265, "step": 384290 }, { "epoch": 1.4855963260193905, "grad_norm": 0.10594595968723297, "learning_rate": 0.002, "loss": 2.3148, "step": 384300 }, { "epoch": 1.4856349832227738, "grad_norm": 0.12473361939191818, "learning_rate": 0.002, "loss": 2.3354, "step": 384310 }, { "epoch": 1.485673640426157, "grad_norm": 0.10555955022573471, "learning_rate": 0.002, "loss": 2.3328, "step": 384320 }, { "epoch": 1.4857122976295403, "grad_norm": 0.09441909939050674, "learning_rate": 0.002, "loss": 2.3491, "step": 384330 }, { "epoch": 1.4857509548329235, "grad_norm": 0.1008702889084816, "learning_rate": 0.002, "loss": 2.3359, "step": 384340 }, { "epoch": 1.4857896120363068, "grad_norm": 0.0997135266661644, "learning_rate": 0.002, "loss": 2.3284, "step": 384350 }, { "epoch": 1.4858282692396902, "grad_norm": 0.11072135716676712, "learning_rate": 0.002, "loss": 2.3296, "step": 384360 }, { "epoch": 1.4858669264430735, "grad_norm": 0.1106172502040863, "learning_rate": 0.002, "loss": 2.3384, "step": 384370 }, { "epoch": 1.4859055836464568, "grad_norm": 0.09268863499164581, "learning_rate": 0.002, "loss": 2.3344, "step": 384380 }, { "epoch": 1.48594424084984, "grad_norm": 0.09718073159456253, "learning_rate": 0.002, "loss": 2.3434, "step": 384390 }, { "epoch": 1.4859828980532233, "grad_norm": 0.10194788128137589, "learning_rate": 0.002, "loss": 2.3311, "step": 384400 }, { "epoch": 1.4860215552566065, "grad_norm": 0.13043349981307983, "learning_rate": 0.002, "loss": 2.3508, "step": 384410 }, { "epoch": 1.4860602124599898, "grad_norm": 0.09998510032892227, "learning_rate": 0.002, "loss": 2.3344, "step": 384420 }, { "epoch": 1.486098869663373, "grad_norm": 0.1066591814160347, "learning_rate": 0.002, "loss": 2.3384, "step": 384430 }, { "epoch": 1.4861375268667563, "grad_norm": 0.11668353527784348, "learning_rate": 0.002, "loss": 2.3558, "step": 384440 }, { "epoch": 1.4861761840701395, "grad_norm": 0.1100035160779953, "learning_rate": 0.002, "loss": 2.3369, "step": 384450 }, { "epoch": 1.486214841273523, "grad_norm": 0.11471410095691681, "learning_rate": 0.002, "loss": 2.308, "step": 384460 }, { "epoch": 1.4862534984769062, "grad_norm": 0.09942575544118881, "learning_rate": 0.002, "loss": 2.3417, "step": 384470 }, { "epoch": 1.4862921556802895, "grad_norm": 0.11599720269441605, "learning_rate": 0.002, "loss": 2.3494, "step": 384480 }, { "epoch": 1.4863308128836727, "grad_norm": 0.10169614106416702, "learning_rate": 0.002, "loss": 2.318, "step": 384490 }, { "epoch": 1.486369470087056, "grad_norm": 0.12186551839113235, "learning_rate": 0.002, "loss": 2.3409, "step": 384500 }, { "epoch": 1.4864081272904393, "grad_norm": 0.10848033428192139, "learning_rate": 0.002, "loss": 2.3498, "step": 384510 }, { "epoch": 1.4864467844938225, "grad_norm": 0.1230197325348854, "learning_rate": 0.002, "loss": 2.3412, "step": 384520 }, { "epoch": 1.486485441697206, "grad_norm": 0.11099942028522491, "learning_rate": 0.002, "loss": 2.3394, "step": 384530 }, { "epoch": 1.4865240989005892, "grad_norm": 0.11096183955669403, "learning_rate": 0.002, "loss": 2.3382, "step": 384540 }, { "epoch": 1.4865627561039725, "grad_norm": 0.10845883190631866, "learning_rate": 0.002, "loss": 2.3289, "step": 384550 }, { "epoch": 1.4866014133073557, "grad_norm": 0.11498292535543442, "learning_rate": 0.002, "loss": 2.3337, "step": 384560 }, { "epoch": 1.486640070510739, "grad_norm": 0.10446729511022568, "learning_rate": 0.002, "loss": 2.3248, "step": 384570 }, { "epoch": 1.4866787277141222, "grad_norm": 0.1114344671368599, "learning_rate": 0.002, "loss": 2.3239, "step": 384580 }, { "epoch": 1.4867173849175055, "grad_norm": 0.10594511777162552, "learning_rate": 0.002, "loss": 2.3325, "step": 384590 }, { "epoch": 1.4867560421208887, "grad_norm": 0.09707600623369217, "learning_rate": 0.002, "loss": 2.3214, "step": 384600 }, { "epoch": 1.486794699324272, "grad_norm": 0.1228179857134819, "learning_rate": 0.002, "loss": 2.3239, "step": 384610 }, { "epoch": 1.4868333565276552, "grad_norm": 0.10634078830480576, "learning_rate": 0.002, "loss": 2.3361, "step": 384620 }, { "epoch": 1.4868720137310387, "grad_norm": 0.1053246408700943, "learning_rate": 0.002, "loss": 2.3222, "step": 384630 }, { "epoch": 1.486910670934422, "grad_norm": 0.10659126192331314, "learning_rate": 0.002, "loss": 2.3304, "step": 384640 }, { "epoch": 1.4869493281378052, "grad_norm": 0.10877703130245209, "learning_rate": 0.002, "loss": 2.3429, "step": 384650 }, { "epoch": 1.4869879853411885, "grad_norm": 0.11045970767736435, "learning_rate": 0.002, "loss": 2.3371, "step": 384660 }, { "epoch": 1.4870266425445717, "grad_norm": 0.09530500322580338, "learning_rate": 0.002, "loss": 2.3266, "step": 384670 }, { "epoch": 1.487065299747955, "grad_norm": 0.11263837665319443, "learning_rate": 0.002, "loss": 2.3408, "step": 384680 }, { "epoch": 1.4871039569513382, "grad_norm": 0.10159396380186081, "learning_rate": 0.002, "loss": 2.3347, "step": 384690 }, { "epoch": 1.4871426141547217, "grad_norm": 0.10858163982629776, "learning_rate": 0.002, "loss": 2.3369, "step": 384700 }, { "epoch": 1.487181271358105, "grad_norm": 0.11083827912807465, "learning_rate": 0.002, "loss": 2.3305, "step": 384710 }, { "epoch": 1.4872199285614882, "grad_norm": 0.1142810732126236, "learning_rate": 0.002, "loss": 2.3441, "step": 384720 }, { "epoch": 1.4872585857648715, "grad_norm": 0.10030544549226761, "learning_rate": 0.002, "loss": 2.3506, "step": 384730 }, { "epoch": 1.4872972429682547, "grad_norm": 0.10645314306020737, "learning_rate": 0.002, "loss": 2.3282, "step": 384740 }, { "epoch": 1.487335900171638, "grad_norm": 0.0968933179974556, "learning_rate": 0.002, "loss": 2.3216, "step": 384750 }, { "epoch": 1.4873745573750212, "grad_norm": 0.11533058434724808, "learning_rate": 0.002, "loss": 2.3199, "step": 384760 }, { "epoch": 1.4874132145784045, "grad_norm": 0.10017845034599304, "learning_rate": 0.002, "loss": 2.3522, "step": 384770 }, { "epoch": 1.4874518717817877, "grad_norm": 0.11839465796947479, "learning_rate": 0.002, "loss": 2.3273, "step": 384780 }, { "epoch": 1.487490528985171, "grad_norm": 0.11330369114875793, "learning_rate": 0.002, "loss": 2.3377, "step": 384790 }, { "epoch": 1.4875291861885545, "grad_norm": 0.1114191859960556, "learning_rate": 0.002, "loss": 2.3314, "step": 384800 }, { "epoch": 1.4875678433919377, "grad_norm": 0.10950705409049988, "learning_rate": 0.002, "loss": 2.3321, "step": 384810 }, { "epoch": 1.487606500595321, "grad_norm": 0.11004837602376938, "learning_rate": 0.002, "loss": 2.3415, "step": 384820 }, { "epoch": 1.4876451577987042, "grad_norm": 0.09497445821762085, "learning_rate": 0.002, "loss": 2.3267, "step": 384830 }, { "epoch": 1.4876838150020875, "grad_norm": 0.09108157455921173, "learning_rate": 0.002, "loss": 2.3319, "step": 384840 }, { "epoch": 1.4877224722054707, "grad_norm": 0.10038923472166061, "learning_rate": 0.002, "loss": 2.3406, "step": 384850 }, { "epoch": 1.487761129408854, "grad_norm": 0.10554396361112595, "learning_rate": 0.002, "loss": 2.3331, "step": 384860 }, { "epoch": 1.4877997866122374, "grad_norm": 0.10369051992893219, "learning_rate": 0.002, "loss": 2.3286, "step": 384870 }, { "epoch": 1.4878384438156207, "grad_norm": 0.1258969008922577, "learning_rate": 0.002, "loss": 2.3332, "step": 384880 }, { "epoch": 1.487877101019004, "grad_norm": 0.08914817869663239, "learning_rate": 0.002, "loss": 2.3268, "step": 384890 }, { "epoch": 1.4879157582223872, "grad_norm": 0.09517431259155273, "learning_rate": 0.002, "loss": 2.3223, "step": 384900 }, { "epoch": 1.4879544154257704, "grad_norm": 0.09845437854528427, "learning_rate": 0.002, "loss": 2.3452, "step": 384910 }, { "epoch": 1.4879930726291537, "grad_norm": 0.12158235162496567, "learning_rate": 0.002, "loss": 2.3309, "step": 384920 }, { "epoch": 1.488031729832537, "grad_norm": 0.10567854344844818, "learning_rate": 0.002, "loss": 2.3292, "step": 384930 }, { "epoch": 1.4880703870359202, "grad_norm": 0.09979545325040817, "learning_rate": 0.002, "loss": 2.3276, "step": 384940 }, { "epoch": 1.4881090442393035, "grad_norm": 0.1211281418800354, "learning_rate": 0.002, "loss": 2.3309, "step": 384950 }, { "epoch": 1.4881477014426867, "grad_norm": 0.09864045679569244, "learning_rate": 0.002, "loss": 2.3414, "step": 384960 }, { "epoch": 1.4881863586460702, "grad_norm": 0.1125154048204422, "learning_rate": 0.002, "loss": 2.3315, "step": 384970 }, { "epoch": 1.4882250158494534, "grad_norm": 0.11239704489707947, "learning_rate": 0.002, "loss": 2.3359, "step": 384980 }, { "epoch": 1.4882636730528367, "grad_norm": 0.10237913578748703, "learning_rate": 0.002, "loss": 2.3324, "step": 384990 }, { "epoch": 1.48830233025622, "grad_norm": 0.10310379415750504, "learning_rate": 0.002, "loss": 2.3171, "step": 385000 }, { "epoch": 1.4883409874596032, "grad_norm": 0.10797710716724396, "learning_rate": 0.002, "loss": 2.335, "step": 385010 }, { "epoch": 1.4883796446629864, "grad_norm": 0.10414762794971466, "learning_rate": 0.002, "loss": 2.3261, "step": 385020 }, { "epoch": 1.4884183018663697, "grad_norm": 0.12361656874418259, "learning_rate": 0.002, "loss": 2.3264, "step": 385030 }, { "epoch": 1.4884569590697532, "grad_norm": 0.10109461098909378, "learning_rate": 0.002, "loss": 2.3488, "step": 385040 }, { "epoch": 1.4884956162731364, "grad_norm": 0.1054452583193779, "learning_rate": 0.002, "loss": 2.3292, "step": 385050 }, { "epoch": 1.4885342734765197, "grad_norm": 0.09931855648756027, "learning_rate": 0.002, "loss": 2.3245, "step": 385060 }, { "epoch": 1.488572930679903, "grad_norm": 0.12945152819156647, "learning_rate": 0.002, "loss": 2.3278, "step": 385070 }, { "epoch": 1.4886115878832862, "grad_norm": 0.10160331428050995, "learning_rate": 0.002, "loss": 2.3369, "step": 385080 }, { "epoch": 1.4886502450866694, "grad_norm": 0.09898512065410614, "learning_rate": 0.002, "loss": 2.3266, "step": 385090 }, { "epoch": 1.4886889022900527, "grad_norm": 0.09146331250667572, "learning_rate": 0.002, "loss": 2.3232, "step": 385100 }, { "epoch": 1.488727559493436, "grad_norm": 0.10068368166685104, "learning_rate": 0.002, "loss": 2.308, "step": 385110 }, { "epoch": 1.4887662166968192, "grad_norm": 0.10230806469917297, "learning_rate": 0.002, "loss": 2.3354, "step": 385120 }, { "epoch": 1.4888048739002024, "grad_norm": 0.10139543563127518, "learning_rate": 0.002, "loss": 2.3265, "step": 385130 }, { "epoch": 1.488843531103586, "grad_norm": 0.10228900611400604, "learning_rate": 0.002, "loss": 2.3249, "step": 385140 }, { "epoch": 1.4888821883069692, "grad_norm": 0.110586978495121, "learning_rate": 0.002, "loss": 2.32, "step": 385150 }, { "epoch": 1.4889208455103524, "grad_norm": 0.10921701043844223, "learning_rate": 0.002, "loss": 2.332, "step": 385160 }, { "epoch": 1.4889595027137357, "grad_norm": 0.10913387686014175, "learning_rate": 0.002, "loss": 2.3273, "step": 385170 }, { "epoch": 1.488998159917119, "grad_norm": 0.10443458706140518, "learning_rate": 0.002, "loss": 2.344, "step": 385180 }, { "epoch": 1.4890368171205022, "grad_norm": 0.11597229540348053, "learning_rate": 0.002, "loss": 2.3358, "step": 385190 }, { "epoch": 1.4890754743238857, "grad_norm": 0.09303038567304611, "learning_rate": 0.002, "loss": 2.3315, "step": 385200 }, { "epoch": 1.489114131527269, "grad_norm": 0.0993490144610405, "learning_rate": 0.002, "loss": 2.3232, "step": 385210 }, { "epoch": 1.4891527887306522, "grad_norm": 0.10018926113843918, "learning_rate": 0.002, "loss": 2.324, "step": 385220 }, { "epoch": 1.4891914459340354, "grad_norm": 0.1158030554652214, "learning_rate": 0.002, "loss": 2.3299, "step": 385230 }, { "epoch": 1.4892301031374187, "grad_norm": 0.10592015087604523, "learning_rate": 0.002, "loss": 2.3371, "step": 385240 }, { "epoch": 1.489268760340802, "grad_norm": 0.10533588379621506, "learning_rate": 0.002, "loss": 2.3285, "step": 385250 }, { "epoch": 1.4893074175441852, "grad_norm": 0.10306498408317566, "learning_rate": 0.002, "loss": 2.3378, "step": 385260 }, { "epoch": 1.4893460747475684, "grad_norm": 0.1061411127448082, "learning_rate": 0.002, "loss": 2.3377, "step": 385270 }, { "epoch": 1.4893847319509517, "grad_norm": 0.0947585478425026, "learning_rate": 0.002, "loss": 2.3329, "step": 385280 }, { "epoch": 1.489423389154335, "grad_norm": 0.13337740302085876, "learning_rate": 0.002, "loss": 2.3428, "step": 385290 }, { "epoch": 1.4894620463577182, "grad_norm": 0.11710366606712341, "learning_rate": 0.002, "loss": 2.3316, "step": 385300 }, { "epoch": 1.4895007035611016, "grad_norm": 0.11103884130716324, "learning_rate": 0.002, "loss": 2.3265, "step": 385310 }, { "epoch": 1.489539360764485, "grad_norm": 0.1108136996626854, "learning_rate": 0.002, "loss": 2.3184, "step": 385320 }, { "epoch": 1.4895780179678682, "grad_norm": 0.11765240877866745, "learning_rate": 0.002, "loss": 2.3393, "step": 385330 }, { "epoch": 1.4896166751712514, "grad_norm": 0.09105757623910904, "learning_rate": 0.002, "loss": 2.314, "step": 385340 }, { "epoch": 1.4896553323746347, "grad_norm": 0.09805571287870407, "learning_rate": 0.002, "loss": 2.3362, "step": 385350 }, { "epoch": 1.489693989578018, "grad_norm": 0.11820369213819504, "learning_rate": 0.002, "loss": 2.3193, "step": 385360 }, { "epoch": 1.4897326467814014, "grad_norm": 0.09663347899913788, "learning_rate": 0.002, "loss": 2.324, "step": 385370 }, { "epoch": 1.4897713039847846, "grad_norm": 0.10458124428987503, "learning_rate": 0.002, "loss": 2.3355, "step": 385380 }, { "epoch": 1.4898099611881679, "grad_norm": 0.10823596268892288, "learning_rate": 0.002, "loss": 2.3413, "step": 385390 }, { "epoch": 1.4898486183915511, "grad_norm": 0.09211437404155731, "learning_rate": 0.002, "loss": 2.3451, "step": 385400 }, { "epoch": 1.4898872755949344, "grad_norm": 0.1029345691204071, "learning_rate": 0.002, "loss": 2.3279, "step": 385410 }, { "epoch": 1.4899259327983176, "grad_norm": 0.09940619766712189, "learning_rate": 0.002, "loss": 2.3313, "step": 385420 }, { "epoch": 1.489964590001701, "grad_norm": 0.0912894532084465, "learning_rate": 0.002, "loss": 2.318, "step": 385430 }, { "epoch": 1.4900032472050841, "grad_norm": 0.10252334922552109, "learning_rate": 0.002, "loss": 2.3122, "step": 385440 }, { "epoch": 1.4900419044084674, "grad_norm": 0.11161807179450989, "learning_rate": 0.002, "loss": 2.344, "step": 385450 }, { "epoch": 1.4900805616118507, "grad_norm": 0.11407460272312164, "learning_rate": 0.002, "loss": 2.3175, "step": 385460 }, { "epoch": 1.490119218815234, "grad_norm": 0.10580111294984818, "learning_rate": 0.002, "loss": 2.3188, "step": 385470 }, { "epoch": 1.4901578760186174, "grad_norm": 0.10977879911661148, "learning_rate": 0.002, "loss": 2.3388, "step": 385480 }, { "epoch": 1.4901965332220006, "grad_norm": 0.11768648028373718, "learning_rate": 0.002, "loss": 2.3403, "step": 385490 }, { "epoch": 1.4902351904253839, "grad_norm": 0.09398915618658066, "learning_rate": 0.002, "loss": 2.3172, "step": 385500 }, { "epoch": 1.4902738476287671, "grad_norm": 0.09989374130964279, "learning_rate": 0.002, "loss": 2.3215, "step": 385510 }, { "epoch": 1.4903125048321504, "grad_norm": 0.1387077122926712, "learning_rate": 0.002, "loss": 2.3345, "step": 385520 }, { "epoch": 1.4903511620355336, "grad_norm": 0.11738712340593338, "learning_rate": 0.002, "loss": 2.3348, "step": 385530 }, { "epoch": 1.4903898192389171, "grad_norm": 0.10570180416107178, "learning_rate": 0.002, "loss": 2.3383, "step": 385540 }, { "epoch": 1.4904284764423004, "grad_norm": 0.12425248324871063, "learning_rate": 0.002, "loss": 2.3622, "step": 385550 }, { "epoch": 1.4904671336456836, "grad_norm": 0.11239184439182281, "learning_rate": 0.002, "loss": 2.3365, "step": 385560 }, { "epoch": 1.4905057908490669, "grad_norm": 0.11231973767280579, "learning_rate": 0.002, "loss": 2.3413, "step": 385570 }, { "epoch": 1.4905444480524501, "grad_norm": 0.10970614850521088, "learning_rate": 0.002, "loss": 2.3367, "step": 385580 }, { "epoch": 1.4905831052558334, "grad_norm": 0.09998834133148193, "learning_rate": 0.002, "loss": 2.3265, "step": 385590 }, { "epoch": 1.4906217624592166, "grad_norm": 0.10915309935808182, "learning_rate": 0.002, "loss": 2.3298, "step": 385600 }, { "epoch": 1.4906604196625999, "grad_norm": 0.1345834732055664, "learning_rate": 0.002, "loss": 2.3397, "step": 385610 }, { "epoch": 1.4906990768659831, "grad_norm": 0.09776283800601959, "learning_rate": 0.002, "loss": 2.343, "step": 385620 }, { "epoch": 1.4907377340693664, "grad_norm": 0.10310234874486923, "learning_rate": 0.002, "loss": 2.342, "step": 385630 }, { "epoch": 1.4907763912727496, "grad_norm": 0.11326014995574951, "learning_rate": 0.002, "loss": 2.3398, "step": 385640 }, { "epoch": 1.490815048476133, "grad_norm": 0.09007581323385239, "learning_rate": 0.002, "loss": 2.3335, "step": 385650 }, { "epoch": 1.4908537056795164, "grad_norm": 0.09305016696453094, "learning_rate": 0.002, "loss": 2.3281, "step": 385660 }, { "epoch": 1.4908923628828996, "grad_norm": 0.12081436812877655, "learning_rate": 0.002, "loss": 2.3405, "step": 385670 }, { "epoch": 1.4909310200862829, "grad_norm": 0.1021009087562561, "learning_rate": 0.002, "loss": 2.3432, "step": 385680 }, { "epoch": 1.4909696772896661, "grad_norm": 0.11606641858816147, "learning_rate": 0.002, "loss": 2.3401, "step": 385690 }, { "epoch": 1.4910083344930494, "grad_norm": 0.10912982374429703, "learning_rate": 0.002, "loss": 2.3414, "step": 385700 }, { "epoch": 1.4910469916964328, "grad_norm": 0.11273853480815887, "learning_rate": 0.002, "loss": 2.3291, "step": 385710 }, { "epoch": 1.491085648899816, "grad_norm": 0.10597539693117142, "learning_rate": 0.002, "loss": 2.3267, "step": 385720 }, { "epoch": 1.4911243061031993, "grad_norm": 0.10982988774776459, "learning_rate": 0.002, "loss": 2.331, "step": 385730 }, { "epoch": 1.4911629633065826, "grad_norm": 0.10974101722240448, "learning_rate": 0.002, "loss": 2.3269, "step": 385740 }, { "epoch": 1.4912016205099659, "grad_norm": 0.09794044494628906, "learning_rate": 0.002, "loss": 2.3235, "step": 385750 }, { "epoch": 1.491240277713349, "grad_norm": 0.09805993735790253, "learning_rate": 0.002, "loss": 2.3275, "step": 385760 }, { "epoch": 1.4912789349167324, "grad_norm": 0.11157072335481644, "learning_rate": 0.002, "loss": 2.3338, "step": 385770 }, { "epoch": 1.4913175921201156, "grad_norm": 0.09369926154613495, "learning_rate": 0.002, "loss": 2.3234, "step": 385780 }, { "epoch": 1.4913562493234989, "grad_norm": 0.10773736238479614, "learning_rate": 0.002, "loss": 2.3214, "step": 385790 }, { "epoch": 1.4913949065268821, "grad_norm": 0.09465586394071579, "learning_rate": 0.002, "loss": 2.3331, "step": 385800 }, { "epoch": 1.4914335637302654, "grad_norm": 0.09090068191289902, "learning_rate": 0.002, "loss": 2.3322, "step": 385810 }, { "epoch": 1.4914722209336488, "grad_norm": 0.14364342391490936, "learning_rate": 0.002, "loss": 2.3244, "step": 385820 }, { "epoch": 1.491510878137032, "grad_norm": 0.09867400676012039, "learning_rate": 0.002, "loss": 2.3314, "step": 385830 }, { "epoch": 1.4915495353404153, "grad_norm": 0.1080484688282013, "learning_rate": 0.002, "loss": 2.3491, "step": 385840 }, { "epoch": 1.4915881925437986, "grad_norm": 0.09689328819513321, "learning_rate": 0.002, "loss": 2.3323, "step": 385850 }, { "epoch": 1.4916268497471818, "grad_norm": 0.11398959159851074, "learning_rate": 0.002, "loss": 2.3317, "step": 385860 }, { "epoch": 1.491665506950565, "grad_norm": 0.09113884717226028, "learning_rate": 0.002, "loss": 2.3114, "step": 385870 }, { "epoch": 1.4917041641539486, "grad_norm": 0.10891013592481613, "learning_rate": 0.002, "loss": 2.343, "step": 385880 }, { "epoch": 1.4917428213573318, "grad_norm": 0.09847282618284225, "learning_rate": 0.002, "loss": 2.3341, "step": 385890 }, { "epoch": 1.491781478560715, "grad_norm": 0.13759636878967285, "learning_rate": 0.002, "loss": 2.3336, "step": 385900 }, { "epoch": 1.4918201357640983, "grad_norm": 0.10775196552276611, "learning_rate": 0.002, "loss": 2.3368, "step": 385910 }, { "epoch": 1.4918587929674816, "grad_norm": 0.10851259529590607, "learning_rate": 0.002, "loss": 2.3349, "step": 385920 }, { "epoch": 1.4918974501708648, "grad_norm": 0.10537663847208023, "learning_rate": 0.002, "loss": 2.3361, "step": 385930 }, { "epoch": 1.491936107374248, "grad_norm": 0.09828297793865204, "learning_rate": 0.002, "loss": 2.3404, "step": 385940 }, { "epoch": 1.4919747645776313, "grad_norm": 0.09515740722417831, "learning_rate": 0.002, "loss": 2.3305, "step": 385950 }, { "epoch": 1.4920134217810146, "grad_norm": 0.1303916722536087, "learning_rate": 0.002, "loss": 2.3239, "step": 385960 }, { "epoch": 1.4920520789843978, "grad_norm": 0.09538701176643372, "learning_rate": 0.002, "loss": 2.3426, "step": 385970 }, { "epoch": 1.492090736187781, "grad_norm": 0.10584598779678345, "learning_rate": 0.002, "loss": 2.3441, "step": 385980 }, { "epoch": 1.4921293933911646, "grad_norm": 0.1097467914223671, "learning_rate": 0.002, "loss": 2.3301, "step": 385990 }, { "epoch": 1.4921680505945478, "grad_norm": 0.10740872472524643, "learning_rate": 0.002, "loss": 2.3445, "step": 386000 }, { "epoch": 1.492206707797931, "grad_norm": 0.09600348770618439, "learning_rate": 0.002, "loss": 2.3159, "step": 386010 }, { "epoch": 1.4922453650013143, "grad_norm": 0.1136220321059227, "learning_rate": 0.002, "loss": 2.3169, "step": 386020 }, { "epoch": 1.4922840222046976, "grad_norm": 0.10824783891439438, "learning_rate": 0.002, "loss": 2.345, "step": 386030 }, { "epoch": 1.4923226794080808, "grad_norm": 0.09880796074867249, "learning_rate": 0.002, "loss": 2.351, "step": 386040 }, { "epoch": 1.4923613366114643, "grad_norm": 0.10647264122962952, "learning_rate": 0.002, "loss": 2.3283, "step": 386050 }, { "epoch": 1.4923999938148476, "grad_norm": 0.08816510438919067, "learning_rate": 0.002, "loss": 2.3295, "step": 386060 }, { "epoch": 1.4924386510182308, "grad_norm": 0.10047812014818192, "learning_rate": 0.002, "loss": 2.3385, "step": 386070 }, { "epoch": 1.492477308221614, "grad_norm": 0.12441463768482208, "learning_rate": 0.002, "loss": 2.317, "step": 386080 }, { "epoch": 1.4925159654249973, "grad_norm": 0.10287167876958847, "learning_rate": 0.002, "loss": 2.3268, "step": 386090 }, { "epoch": 1.4925546226283806, "grad_norm": 0.11641228199005127, "learning_rate": 0.002, "loss": 2.3333, "step": 386100 }, { "epoch": 1.4925932798317638, "grad_norm": 0.11843828856945038, "learning_rate": 0.002, "loss": 2.332, "step": 386110 }, { "epoch": 1.492631937035147, "grad_norm": 0.10662206262350082, "learning_rate": 0.002, "loss": 2.3545, "step": 386120 }, { "epoch": 1.4926705942385303, "grad_norm": 0.10384664684534073, "learning_rate": 0.002, "loss": 2.3231, "step": 386130 }, { "epoch": 1.4927092514419136, "grad_norm": 0.10603508353233337, "learning_rate": 0.002, "loss": 2.3306, "step": 386140 }, { "epoch": 1.4927479086452968, "grad_norm": 0.11298702657222748, "learning_rate": 0.002, "loss": 2.3275, "step": 386150 }, { "epoch": 1.4927865658486803, "grad_norm": 0.08943584561347961, "learning_rate": 0.002, "loss": 2.345, "step": 386160 }, { "epoch": 1.4928252230520636, "grad_norm": 0.11151974648237228, "learning_rate": 0.002, "loss": 2.3348, "step": 386170 }, { "epoch": 1.4928638802554468, "grad_norm": 0.10686797648668289, "learning_rate": 0.002, "loss": 2.3493, "step": 386180 }, { "epoch": 1.49290253745883, "grad_norm": 0.1035001203417778, "learning_rate": 0.002, "loss": 2.3212, "step": 386190 }, { "epoch": 1.4929411946622133, "grad_norm": 0.10172740370035172, "learning_rate": 0.002, "loss": 2.3236, "step": 386200 }, { "epoch": 1.4929798518655966, "grad_norm": 0.11338132619857788, "learning_rate": 0.002, "loss": 2.3377, "step": 386210 }, { "epoch": 1.49301850906898, "grad_norm": 0.11095208674669266, "learning_rate": 0.002, "loss": 2.3464, "step": 386220 }, { "epoch": 1.4930571662723633, "grad_norm": 0.111575648188591, "learning_rate": 0.002, "loss": 2.3215, "step": 386230 }, { "epoch": 1.4930958234757465, "grad_norm": 0.09547489881515503, "learning_rate": 0.002, "loss": 2.3359, "step": 386240 }, { "epoch": 1.4931344806791298, "grad_norm": 0.09410912543535233, "learning_rate": 0.002, "loss": 2.3461, "step": 386250 }, { "epoch": 1.493173137882513, "grad_norm": 0.1237073466181755, "learning_rate": 0.002, "loss": 2.3396, "step": 386260 }, { "epoch": 1.4932117950858963, "grad_norm": 0.11605266481637955, "learning_rate": 0.002, "loss": 2.3279, "step": 386270 }, { "epoch": 1.4932504522892796, "grad_norm": 0.10717470943927765, "learning_rate": 0.002, "loss": 2.3431, "step": 386280 }, { "epoch": 1.4932891094926628, "grad_norm": 0.10141540318727493, "learning_rate": 0.002, "loss": 2.3378, "step": 386290 }, { "epoch": 1.493327766696046, "grad_norm": 0.1000044122338295, "learning_rate": 0.002, "loss": 2.3278, "step": 386300 }, { "epoch": 1.4933664238994293, "grad_norm": 0.0962546244263649, "learning_rate": 0.002, "loss": 2.3284, "step": 386310 }, { "epoch": 1.4934050811028126, "grad_norm": 0.09642874449491501, "learning_rate": 0.002, "loss": 2.3372, "step": 386320 }, { "epoch": 1.493443738306196, "grad_norm": 0.08658089488744736, "learning_rate": 0.002, "loss": 2.3432, "step": 386330 }, { "epoch": 1.4934823955095793, "grad_norm": 0.14702236652374268, "learning_rate": 0.002, "loss": 2.338, "step": 386340 }, { "epoch": 1.4935210527129625, "grad_norm": 0.10530516505241394, "learning_rate": 0.002, "loss": 2.3384, "step": 386350 }, { "epoch": 1.4935597099163458, "grad_norm": 0.0912630707025528, "learning_rate": 0.002, "loss": 2.3364, "step": 386360 }, { "epoch": 1.493598367119729, "grad_norm": 0.12375325709581375, "learning_rate": 0.002, "loss": 2.3366, "step": 386370 }, { "epoch": 1.4936370243231123, "grad_norm": 0.0921991690993309, "learning_rate": 0.002, "loss": 2.3339, "step": 386380 }, { "epoch": 1.4936756815264958, "grad_norm": 0.11260882019996643, "learning_rate": 0.002, "loss": 2.3272, "step": 386390 }, { "epoch": 1.493714338729879, "grad_norm": 0.11492011696100235, "learning_rate": 0.002, "loss": 2.3319, "step": 386400 }, { "epoch": 1.4937529959332623, "grad_norm": 0.11711040884256363, "learning_rate": 0.002, "loss": 2.3426, "step": 386410 }, { "epoch": 1.4937916531366455, "grad_norm": 0.1272195279598236, "learning_rate": 0.002, "loss": 2.3275, "step": 386420 }, { "epoch": 1.4938303103400288, "grad_norm": 0.09441424161195755, "learning_rate": 0.002, "loss": 2.3181, "step": 386430 }, { "epoch": 1.493868967543412, "grad_norm": 0.09789464622735977, "learning_rate": 0.002, "loss": 2.3281, "step": 386440 }, { "epoch": 1.4939076247467953, "grad_norm": 0.10984098166227341, "learning_rate": 0.002, "loss": 2.3275, "step": 386450 }, { "epoch": 1.4939462819501785, "grad_norm": 0.0962597206234932, "learning_rate": 0.002, "loss": 2.3385, "step": 386460 }, { "epoch": 1.4939849391535618, "grad_norm": 0.10608778893947601, "learning_rate": 0.002, "loss": 2.3204, "step": 386470 }, { "epoch": 1.494023596356945, "grad_norm": 0.10052426904439926, "learning_rate": 0.002, "loss": 2.3303, "step": 386480 }, { "epoch": 1.4940622535603285, "grad_norm": 0.10627039521932602, "learning_rate": 0.002, "loss": 2.3462, "step": 386490 }, { "epoch": 1.4941009107637118, "grad_norm": 0.10974103212356567, "learning_rate": 0.002, "loss": 2.3262, "step": 386500 }, { "epoch": 1.494139567967095, "grad_norm": 0.10237738490104675, "learning_rate": 0.002, "loss": 2.3485, "step": 386510 }, { "epoch": 1.4941782251704783, "grad_norm": 0.11119193583726883, "learning_rate": 0.002, "loss": 2.3392, "step": 386520 }, { "epoch": 1.4942168823738615, "grad_norm": 0.12733414769172668, "learning_rate": 0.002, "loss": 2.3335, "step": 386530 }, { "epoch": 1.4942555395772448, "grad_norm": 0.11605283617973328, "learning_rate": 0.002, "loss": 2.3162, "step": 386540 }, { "epoch": 1.494294196780628, "grad_norm": 0.1292264312505722, "learning_rate": 0.002, "loss": 2.323, "step": 386550 }, { "epoch": 1.4943328539840115, "grad_norm": 0.11857552826404572, "learning_rate": 0.002, "loss": 2.331, "step": 386560 }, { "epoch": 1.4943715111873948, "grad_norm": 0.0968799740076065, "learning_rate": 0.002, "loss": 2.3457, "step": 386570 }, { "epoch": 1.494410168390778, "grad_norm": 0.2860582768917084, "learning_rate": 0.002, "loss": 2.3238, "step": 386580 }, { "epoch": 1.4944488255941613, "grad_norm": 0.11790402233600616, "learning_rate": 0.002, "loss": 2.3282, "step": 386590 }, { "epoch": 1.4944874827975445, "grad_norm": 0.1172618493437767, "learning_rate": 0.002, "loss": 2.3282, "step": 386600 }, { "epoch": 1.4945261400009278, "grad_norm": 0.08819235861301422, "learning_rate": 0.002, "loss": 2.3276, "step": 386610 }, { "epoch": 1.494564797204311, "grad_norm": 0.11440522223711014, "learning_rate": 0.002, "loss": 2.332, "step": 386620 }, { "epoch": 1.4946034544076943, "grad_norm": 0.11602221429347992, "learning_rate": 0.002, "loss": 2.3343, "step": 386630 }, { "epoch": 1.4946421116110775, "grad_norm": 0.08647876977920532, "learning_rate": 0.002, "loss": 2.3312, "step": 386640 }, { "epoch": 1.4946807688144608, "grad_norm": 0.12004893273115158, "learning_rate": 0.002, "loss": 2.3505, "step": 386650 }, { "epoch": 1.4947194260178442, "grad_norm": 0.1069132536649704, "learning_rate": 0.002, "loss": 2.3426, "step": 386660 }, { "epoch": 1.4947580832212275, "grad_norm": 0.09967175871133804, "learning_rate": 0.002, "loss": 2.3374, "step": 386670 }, { "epoch": 1.4947967404246107, "grad_norm": 0.11542779952287674, "learning_rate": 0.002, "loss": 2.3335, "step": 386680 }, { "epoch": 1.494835397627994, "grad_norm": 0.10156113654375076, "learning_rate": 0.002, "loss": 2.3276, "step": 386690 }, { "epoch": 1.4948740548313773, "grad_norm": 0.10142820328474045, "learning_rate": 0.002, "loss": 2.3445, "step": 386700 }, { "epoch": 1.4949127120347605, "grad_norm": 0.10076209157705307, "learning_rate": 0.002, "loss": 2.3295, "step": 386710 }, { "epoch": 1.4949513692381438, "grad_norm": 0.10062038898468018, "learning_rate": 0.002, "loss": 2.3322, "step": 386720 }, { "epoch": 1.4949900264415272, "grad_norm": 0.0967542752623558, "learning_rate": 0.002, "loss": 2.3334, "step": 386730 }, { "epoch": 1.4950286836449105, "grad_norm": 0.11722204834222794, "learning_rate": 0.002, "loss": 2.3472, "step": 386740 }, { "epoch": 1.4950673408482937, "grad_norm": 0.10882771760225296, "learning_rate": 0.002, "loss": 2.3367, "step": 386750 }, { "epoch": 1.495105998051677, "grad_norm": 0.109149269759655, "learning_rate": 0.002, "loss": 2.3373, "step": 386760 }, { "epoch": 1.4951446552550602, "grad_norm": 0.10933854430913925, "learning_rate": 0.002, "loss": 2.3268, "step": 386770 }, { "epoch": 1.4951833124584435, "grad_norm": 0.10297413170337677, "learning_rate": 0.002, "loss": 2.3289, "step": 386780 }, { "epoch": 1.4952219696618267, "grad_norm": 0.10061938315629959, "learning_rate": 0.002, "loss": 2.3117, "step": 386790 }, { "epoch": 1.49526062686521, "grad_norm": 0.10365668684244156, "learning_rate": 0.002, "loss": 2.3221, "step": 386800 }, { "epoch": 1.4952992840685932, "grad_norm": 0.10860082507133484, "learning_rate": 0.002, "loss": 2.3277, "step": 386810 }, { "epoch": 1.4953379412719765, "grad_norm": 0.1317634880542755, "learning_rate": 0.002, "loss": 2.3093, "step": 386820 }, { "epoch": 1.49537659847536, "grad_norm": 0.12806427478790283, "learning_rate": 0.002, "loss": 2.3311, "step": 386830 }, { "epoch": 1.4954152556787432, "grad_norm": 0.11585642397403717, "learning_rate": 0.002, "loss": 2.3366, "step": 386840 }, { "epoch": 1.4954539128821265, "grad_norm": 0.09713537245988846, "learning_rate": 0.002, "loss": 2.3362, "step": 386850 }, { "epoch": 1.4954925700855097, "grad_norm": 0.0927940234541893, "learning_rate": 0.002, "loss": 2.3191, "step": 386860 }, { "epoch": 1.495531227288893, "grad_norm": 0.09935614466667175, "learning_rate": 0.002, "loss": 2.3247, "step": 386870 }, { "epoch": 1.4955698844922762, "grad_norm": 0.10377293825149536, "learning_rate": 0.002, "loss": 2.3416, "step": 386880 }, { "epoch": 1.4956085416956595, "grad_norm": 0.11830423027276993, "learning_rate": 0.002, "loss": 2.3371, "step": 386890 }, { "epoch": 1.495647198899043, "grad_norm": 0.10357511788606644, "learning_rate": 0.002, "loss": 2.3328, "step": 386900 }, { "epoch": 1.4956858561024262, "grad_norm": 0.10574238002300262, "learning_rate": 0.002, "loss": 2.3383, "step": 386910 }, { "epoch": 1.4957245133058095, "grad_norm": 0.11580254137516022, "learning_rate": 0.002, "loss": 2.3197, "step": 386920 }, { "epoch": 1.4957631705091927, "grad_norm": 0.10394071042537689, "learning_rate": 0.002, "loss": 2.3362, "step": 386930 }, { "epoch": 1.495801827712576, "grad_norm": 0.14849720895290375, "learning_rate": 0.002, "loss": 2.341, "step": 386940 }, { "epoch": 1.4958404849159592, "grad_norm": 0.0992945060133934, "learning_rate": 0.002, "loss": 2.3214, "step": 386950 }, { "epoch": 1.4958791421193425, "grad_norm": 0.09691601246595383, "learning_rate": 0.002, "loss": 2.3191, "step": 386960 }, { "epoch": 1.4959177993227257, "grad_norm": 0.09586106985807419, "learning_rate": 0.002, "loss": 2.3302, "step": 386970 }, { "epoch": 1.495956456526109, "grad_norm": 0.10614022612571716, "learning_rate": 0.002, "loss": 2.3253, "step": 386980 }, { "epoch": 1.4959951137294922, "grad_norm": 0.10315316915512085, "learning_rate": 0.002, "loss": 2.319, "step": 386990 }, { "epoch": 1.4960337709328757, "grad_norm": 0.11343605816364288, "learning_rate": 0.002, "loss": 2.3248, "step": 387000 }, { "epoch": 1.496072428136259, "grad_norm": 0.11363035440444946, "learning_rate": 0.002, "loss": 2.3309, "step": 387010 }, { "epoch": 1.4961110853396422, "grad_norm": 0.09986026585102081, "learning_rate": 0.002, "loss": 2.3203, "step": 387020 }, { "epoch": 1.4961497425430255, "grad_norm": 0.11407621949911118, "learning_rate": 0.002, "loss": 2.318, "step": 387030 }, { "epoch": 1.4961883997464087, "grad_norm": 0.11337780207395554, "learning_rate": 0.002, "loss": 2.3407, "step": 387040 }, { "epoch": 1.496227056949792, "grad_norm": 0.0949147418141365, "learning_rate": 0.002, "loss": 2.3273, "step": 387050 }, { "epoch": 1.4962657141531754, "grad_norm": 0.10442779213190079, "learning_rate": 0.002, "loss": 2.3425, "step": 387060 }, { "epoch": 1.4963043713565587, "grad_norm": 0.1179739385843277, "learning_rate": 0.002, "loss": 2.3222, "step": 387070 }, { "epoch": 1.496343028559942, "grad_norm": 0.09415707737207413, "learning_rate": 0.002, "loss": 2.3384, "step": 387080 }, { "epoch": 1.4963816857633252, "grad_norm": 0.10857003182172775, "learning_rate": 0.002, "loss": 2.314, "step": 387090 }, { "epoch": 1.4964203429667084, "grad_norm": 0.11177841573953629, "learning_rate": 0.002, "loss": 2.3279, "step": 387100 }, { "epoch": 1.4964590001700917, "grad_norm": 0.1178489550948143, "learning_rate": 0.002, "loss": 2.3356, "step": 387110 }, { "epoch": 1.496497657373475, "grad_norm": 0.11042745411396027, "learning_rate": 0.002, "loss": 2.328, "step": 387120 }, { "epoch": 1.4965363145768582, "grad_norm": 0.09763308614492416, "learning_rate": 0.002, "loss": 2.3362, "step": 387130 }, { "epoch": 1.4965749717802415, "grad_norm": 0.11204870790243149, "learning_rate": 0.002, "loss": 2.3284, "step": 387140 }, { "epoch": 1.4966136289836247, "grad_norm": 0.09637466073036194, "learning_rate": 0.002, "loss": 2.3304, "step": 387150 }, { "epoch": 1.496652286187008, "grad_norm": 0.11716560274362564, "learning_rate": 0.002, "loss": 2.3404, "step": 387160 }, { "epoch": 1.4966909433903914, "grad_norm": 0.10886909812688828, "learning_rate": 0.002, "loss": 2.3421, "step": 387170 }, { "epoch": 1.4967296005937747, "grad_norm": 0.11475536227226257, "learning_rate": 0.002, "loss": 2.3371, "step": 387180 }, { "epoch": 1.496768257797158, "grad_norm": 0.08800867199897766, "learning_rate": 0.002, "loss": 2.3388, "step": 387190 }, { "epoch": 1.4968069150005412, "grad_norm": 0.11351599544286728, "learning_rate": 0.002, "loss": 2.3432, "step": 387200 }, { "epoch": 1.4968455722039244, "grad_norm": 0.10470616072416306, "learning_rate": 0.002, "loss": 2.3401, "step": 387210 }, { "epoch": 1.4968842294073077, "grad_norm": 0.11616797000169754, "learning_rate": 0.002, "loss": 2.325, "step": 387220 }, { "epoch": 1.4969228866106912, "grad_norm": 0.10372336953878403, "learning_rate": 0.002, "loss": 2.3218, "step": 387230 }, { "epoch": 1.4969615438140744, "grad_norm": 0.14390446245670319, "learning_rate": 0.002, "loss": 2.3422, "step": 387240 }, { "epoch": 1.4970002010174577, "grad_norm": 0.10057645291090012, "learning_rate": 0.002, "loss": 2.3221, "step": 387250 }, { "epoch": 1.497038858220841, "grad_norm": 0.09963632375001907, "learning_rate": 0.002, "loss": 2.3471, "step": 387260 }, { "epoch": 1.4970775154242242, "grad_norm": 0.1073041632771492, "learning_rate": 0.002, "loss": 2.3252, "step": 387270 }, { "epoch": 1.4971161726276074, "grad_norm": 0.10896219313144684, "learning_rate": 0.002, "loss": 2.3305, "step": 387280 }, { "epoch": 1.4971548298309907, "grad_norm": 0.10329505801200867, "learning_rate": 0.002, "loss": 2.3192, "step": 387290 }, { "epoch": 1.497193487034374, "grad_norm": 0.10008035600185394, "learning_rate": 0.002, "loss": 2.3285, "step": 387300 }, { "epoch": 1.4972321442377572, "grad_norm": 0.09460999816656113, "learning_rate": 0.002, "loss": 2.3379, "step": 387310 }, { "epoch": 1.4972708014411404, "grad_norm": 0.11439158022403717, "learning_rate": 0.002, "loss": 2.3458, "step": 387320 }, { "epoch": 1.4973094586445237, "grad_norm": 0.09630564600229263, "learning_rate": 0.002, "loss": 2.3363, "step": 387330 }, { "epoch": 1.4973481158479072, "grad_norm": 0.12669190764427185, "learning_rate": 0.002, "loss": 2.336, "step": 387340 }, { "epoch": 1.4973867730512904, "grad_norm": 0.2987734377384186, "learning_rate": 0.002, "loss": 2.3258, "step": 387350 }, { "epoch": 1.4974254302546737, "grad_norm": 0.10269183665513992, "learning_rate": 0.002, "loss": 2.3467, "step": 387360 }, { "epoch": 1.497464087458057, "grad_norm": 0.2718169093132019, "learning_rate": 0.002, "loss": 2.327, "step": 387370 }, { "epoch": 1.4975027446614402, "grad_norm": 0.09955942630767822, "learning_rate": 0.002, "loss": 2.3393, "step": 387380 }, { "epoch": 1.4975414018648234, "grad_norm": 0.13938391208648682, "learning_rate": 0.002, "loss": 2.3316, "step": 387390 }, { "epoch": 1.497580059068207, "grad_norm": 0.10868766158819199, "learning_rate": 0.002, "loss": 2.3247, "step": 387400 }, { "epoch": 1.4976187162715902, "grad_norm": 0.10905612260103226, "learning_rate": 0.002, "loss": 2.3274, "step": 387410 }, { "epoch": 1.4976573734749734, "grad_norm": 0.10838504135608673, "learning_rate": 0.002, "loss": 2.3191, "step": 387420 }, { "epoch": 1.4976960306783567, "grad_norm": 0.11081354320049286, "learning_rate": 0.002, "loss": 2.3264, "step": 387430 }, { "epoch": 1.49773468788174, "grad_norm": 0.11287319660186768, "learning_rate": 0.002, "loss": 2.3407, "step": 387440 }, { "epoch": 1.4977733450851232, "grad_norm": 0.11388552933931351, "learning_rate": 0.002, "loss": 2.3292, "step": 387450 }, { "epoch": 1.4978120022885064, "grad_norm": 0.09228865057229996, "learning_rate": 0.002, "loss": 2.32, "step": 387460 }, { "epoch": 1.4978506594918897, "grad_norm": 0.10086563974618912, "learning_rate": 0.002, "loss": 2.3545, "step": 387470 }, { "epoch": 1.497889316695273, "grad_norm": 0.1103566586971283, "learning_rate": 0.002, "loss": 2.3274, "step": 387480 }, { "epoch": 1.4979279738986562, "grad_norm": 0.10129038989543915, "learning_rate": 0.002, "loss": 2.3351, "step": 387490 }, { "epoch": 1.4979666311020394, "grad_norm": 0.10978537052869797, "learning_rate": 0.002, "loss": 2.3292, "step": 387500 }, { "epoch": 1.498005288305423, "grad_norm": 0.09721244871616364, "learning_rate": 0.002, "loss": 2.3158, "step": 387510 }, { "epoch": 1.4980439455088062, "grad_norm": 0.10568886995315552, "learning_rate": 0.002, "loss": 2.3453, "step": 387520 }, { "epoch": 1.4980826027121894, "grad_norm": 0.10114246606826782, "learning_rate": 0.002, "loss": 2.336, "step": 387530 }, { "epoch": 1.4981212599155727, "grad_norm": 0.08565915375947952, "learning_rate": 0.002, "loss": 2.3427, "step": 387540 }, { "epoch": 1.498159917118956, "grad_norm": 0.08483897149562836, "learning_rate": 0.002, "loss": 2.3262, "step": 387550 }, { "epoch": 1.4981985743223392, "grad_norm": 0.08971007168292999, "learning_rate": 0.002, "loss": 2.3408, "step": 387560 }, { "epoch": 1.4982372315257226, "grad_norm": 0.13412101566791534, "learning_rate": 0.002, "loss": 2.3345, "step": 387570 }, { "epoch": 1.4982758887291059, "grad_norm": 0.10396832227706909, "learning_rate": 0.002, "loss": 2.331, "step": 387580 }, { "epoch": 1.4983145459324891, "grad_norm": 0.1287054866552353, "learning_rate": 0.002, "loss": 2.3321, "step": 387590 }, { "epoch": 1.4983532031358724, "grad_norm": 0.09784513711929321, "learning_rate": 0.002, "loss": 2.3173, "step": 387600 }, { "epoch": 1.4983918603392556, "grad_norm": 0.10984116792678833, "learning_rate": 0.002, "loss": 2.3398, "step": 387610 }, { "epoch": 1.498430517542639, "grad_norm": 0.10213734209537506, "learning_rate": 0.002, "loss": 2.3466, "step": 387620 }, { "epoch": 1.4984691747460221, "grad_norm": 0.11160784214735031, "learning_rate": 0.002, "loss": 2.3447, "step": 387630 }, { "epoch": 1.4985078319494054, "grad_norm": 0.10323362797498703, "learning_rate": 0.002, "loss": 2.3244, "step": 387640 }, { "epoch": 1.4985464891527887, "grad_norm": 0.11253419518470764, "learning_rate": 0.002, "loss": 2.3244, "step": 387650 }, { "epoch": 1.498585146356172, "grad_norm": 0.1090523898601532, "learning_rate": 0.002, "loss": 2.3427, "step": 387660 }, { "epoch": 1.4986238035595552, "grad_norm": 0.11632861196994781, "learning_rate": 0.002, "loss": 2.3441, "step": 387670 }, { "epoch": 1.4986624607629386, "grad_norm": 0.09930185228586197, "learning_rate": 0.002, "loss": 2.3407, "step": 387680 }, { "epoch": 1.4987011179663219, "grad_norm": 0.09560258686542511, "learning_rate": 0.002, "loss": 2.3371, "step": 387690 }, { "epoch": 1.4987397751697051, "grad_norm": 0.10566894710063934, "learning_rate": 0.002, "loss": 2.3276, "step": 387700 }, { "epoch": 1.4987784323730884, "grad_norm": 0.11293548345565796, "learning_rate": 0.002, "loss": 2.3314, "step": 387710 }, { "epoch": 1.4988170895764716, "grad_norm": 0.10041575133800507, "learning_rate": 0.002, "loss": 2.3289, "step": 387720 }, { "epoch": 1.498855746779855, "grad_norm": 0.10108038783073425, "learning_rate": 0.002, "loss": 2.3373, "step": 387730 }, { "epoch": 1.4988944039832384, "grad_norm": 0.1080123633146286, "learning_rate": 0.002, "loss": 2.3314, "step": 387740 }, { "epoch": 1.4989330611866216, "grad_norm": 0.0993170365691185, "learning_rate": 0.002, "loss": 2.3281, "step": 387750 }, { "epoch": 1.4989717183900049, "grad_norm": 0.11185912042856216, "learning_rate": 0.002, "loss": 2.3497, "step": 387760 }, { "epoch": 1.4990103755933881, "grad_norm": 0.10889036953449249, "learning_rate": 0.002, "loss": 2.3291, "step": 387770 }, { "epoch": 1.4990490327967714, "grad_norm": 0.09817574918270111, "learning_rate": 0.002, "loss": 2.3373, "step": 387780 }, { "epoch": 1.4990876900001546, "grad_norm": 0.10236465930938721, "learning_rate": 0.002, "loss": 2.3391, "step": 387790 }, { "epoch": 1.4991263472035379, "grad_norm": 0.09964855760335922, "learning_rate": 0.002, "loss": 2.3434, "step": 387800 }, { "epoch": 1.4991650044069211, "grad_norm": 0.10301323235034943, "learning_rate": 0.002, "loss": 2.3352, "step": 387810 }, { "epoch": 1.4992036616103044, "grad_norm": 0.11032721400260925, "learning_rate": 0.002, "loss": 2.344, "step": 387820 }, { "epoch": 1.4992423188136876, "grad_norm": 0.09334992617368698, "learning_rate": 0.002, "loss": 2.3399, "step": 387830 }, { "epoch": 1.4992809760170709, "grad_norm": 0.0951884463429451, "learning_rate": 0.002, "loss": 2.3318, "step": 387840 }, { "epoch": 1.4993196332204544, "grad_norm": 0.09448539465665817, "learning_rate": 0.002, "loss": 2.3305, "step": 387850 }, { "epoch": 1.4993582904238376, "grad_norm": 0.10757434368133545, "learning_rate": 0.002, "loss": 2.3177, "step": 387860 }, { "epoch": 1.4993969476272209, "grad_norm": 0.11576647311449051, "learning_rate": 0.002, "loss": 2.331, "step": 387870 }, { "epoch": 1.4994356048306041, "grad_norm": 0.09661038964986801, "learning_rate": 0.002, "loss": 2.3304, "step": 387880 }, { "epoch": 1.4994742620339874, "grad_norm": 0.11678434163331985, "learning_rate": 0.002, "loss": 2.3382, "step": 387890 }, { "epoch": 1.4995129192373706, "grad_norm": 0.09969460219144821, "learning_rate": 0.002, "loss": 2.3481, "step": 387900 }, { "epoch": 1.499551576440754, "grad_norm": 0.11960793286561966, "learning_rate": 0.002, "loss": 2.3375, "step": 387910 }, { "epoch": 1.4995902336441373, "grad_norm": 0.11385287344455719, "learning_rate": 0.002, "loss": 2.3349, "step": 387920 }, { "epoch": 1.4996288908475206, "grad_norm": 0.09566864371299744, "learning_rate": 0.002, "loss": 2.3557, "step": 387930 }, { "epoch": 1.4996675480509039, "grad_norm": 0.09156309813261032, "learning_rate": 0.002, "loss": 2.3351, "step": 387940 }, { "epoch": 1.499706205254287, "grad_norm": 0.11333674937486649, "learning_rate": 0.002, "loss": 2.3424, "step": 387950 }, { "epoch": 1.4997448624576704, "grad_norm": 0.10646463185548782, "learning_rate": 0.002, "loss": 2.3224, "step": 387960 }, { "epoch": 1.4997835196610536, "grad_norm": 0.09774358570575714, "learning_rate": 0.002, "loss": 2.3405, "step": 387970 }, { "epoch": 1.4998221768644369, "grad_norm": 0.10020807385444641, "learning_rate": 0.002, "loss": 2.3413, "step": 387980 }, { "epoch": 1.4998608340678201, "grad_norm": 0.1096162423491478, "learning_rate": 0.002, "loss": 2.3326, "step": 387990 }, { "epoch": 1.4998994912712034, "grad_norm": 0.12573811411857605, "learning_rate": 0.002, "loss": 2.3275, "step": 388000 }, { "epoch": 1.4999381484745866, "grad_norm": 0.08667264133691788, "learning_rate": 0.002, "loss": 2.3262, "step": 388010 }, { "epoch": 1.49997680567797, "grad_norm": 0.10994665324687958, "learning_rate": 0.002, "loss": 2.3236, "step": 388020 }, { "epoch": 1.5000154628813533, "grad_norm": 0.10760389268398285, "learning_rate": 0.002, "loss": 2.3385, "step": 388030 }, { "epoch": 1.5000541200847366, "grad_norm": 0.10332886129617691, "learning_rate": 0.002, "loss": 2.3099, "step": 388040 }, { "epoch": 1.5000927772881198, "grad_norm": 0.10724914819002151, "learning_rate": 0.002, "loss": 2.3369, "step": 388050 }, { "epoch": 1.500131434491503, "grad_norm": 0.10977024585008621, "learning_rate": 0.002, "loss": 2.3442, "step": 388060 }, { "epoch": 1.5001700916948866, "grad_norm": 0.10817869752645493, "learning_rate": 0.002, "loss": 2.3326, "step": 388070 }, { "epoch": 1.5002087488982698, "grad_norm": 0.11673028767108917, "learning_rate": 0.002, "loss": 2.3278, "step": 388080 }, { "epoch": 1.500247406101653, "grad_norm": 0.10535792261362076, "learning_rate": 0.002, "loss": 2.3338, "step": 388090 }, { "epoch": 1.5002860633050363, "grad_norm": 0.10003501921892166, "learning_rate": 0.002, "loss": 2.3516, "step": 388100 }, { "epoch": 1.5003247205084196, "grad_norm": 0.1060367152094841, "learning_rate": 0.002, "loss": 2.3209, "step": 388110 }, { "epoch": 1.5003633777118028, "grad_norm": 0.1156829223036766, "learning_rate": 0.002, "loss": 2.3307, "step": 388120 }, { "epoch": 1.500402034915186, "grad_norm": 0.11129516363143921, "learning_rate": 0.002, "loss": 2.3389, "step": 388130 }, { "epoch": 1.5004406921185693, "grad_norm": 0.44771406054496765, "learning_rate": 0.002, "loss": 2.3221, "step": 388140 }, { "epoch": 1.5004793493219526, "grad_norm": 0.10634361952543259, "learning_rate": 0.002, "loss": 2.3473, "step": 388150 }, { "epoch": 1.5005180065253358, "grad_norm": 0.0964827761054039, "learning_rate": 0.002, "loss": 2.3386, "step": 388160 }, { "epoch": 1.500556663728719, "grad_norm": 0.0940362885594368, "learning_rate": 0.002, "loss": 2.3415, "step": 388170 }, { "epoch": 1.5005953209321024, "grad_norm": 0.10004458576440811, "learning_rate": 0.002, "loss": 2.3331, "step": 388180 }, { "epoch": 1.5006339781354856, "grad_norm": 0.10141144692897797, "learning_rate": 0.002, "loss": 2.3196, "step": 388190 }, { "epoch": 1.500672635338869, "grad_norm": 0.10415209829807281, "learning_rate": 0.002, "loss": 2.3205, "step": 388200 }, { "epoch": 1.5007112925422523, "grad_norm": 0.12072297185659409, "learning_rate": 0.002, "loss": 2.3137, "step": 388210 }, { "epoch": 1.5007499497456356, "grad_norm": 0.120872363448143, "learning_rate": 0.002, "loss": 2.3202, "step": 388220 }, { "epoch": 1.5007886069490188, "grad_norm": 0.11353632062673569, "learning_rate": 0.002, "loss": 2.3263, "step": 388230 }, { "epoch": 1.5008272641524023, "grad_norm": 0.10656017065048218, "learning_rate": 0.002, "loss": 2.3331, "step": 388240 }, { "epoch": 1.5008659213557856, "grad_norm": 0.10381469875574112, "learning_rate": 0.002, "loss": 2.3405, "step": 388250 }, { "epoch": 1.5009045785591688, "grad_norm": 0.11682920902967453, "learning_rate": 0.002, "loss": 2.3372, "step": 388260 }, { "epoch": 1.500943235762552, "grad_norm": 0.109528087079525, "learning_rate": 0.002, "loss": 2.3401, "step": 388270 }, { "epoch": 1.5009818929659353, "grad_norm": 0.08958955109119415, "learning_rate": 0.002, "loss": 2.3345, "step": 388280 }, { "epoch": 1.5010205501693186, "grad_norm": 0.1280771642923355, "learning_rate": 0.002, "loss": 2.3278, "step": 388290 }, { "epoch": 1.5010592073727018, "grad_norm": 0.10596900433301926, "learning_rate": 0.002, "loss": 2.3283, "step": 388300 }, { "epoch": 1.501097864576085, "grad_norm": 0.10658132284879684, "learning_rate": 0.002, "loss": 2.3419, "step": 388310 }, { "epoch": 1.5011365217794683, "grad_norm": 0.11552879959344864, "learning_rate": 0.002, "loss": 2.3306, "step": 388320 }, { "epoch": 1.5011751789828516, "grad_norm": 0.11456441134214401, "learning_rate": 0.002, "loss": 2.3454, "step": 388330 }, { "epoch": 1.5012138361862348, "grad_norm": 0.12030810117721558, "learning_rate": 0.002, "loss": 2.3362, "step": 388340 }, { "epoch": 1.501252493389618, "grad_norm": 0.0922018438577652, "learning_rate": 0.002, "loss": 2.3303, "step": 388350 }, { "epoch": 1.5012911505930013, "grad_norm": 0.106600321829319, "learning_rate": 0.002, "loss": 2.3382, "step": 388360 }, { "epoch": 1.5013298077963848, "grad_norm": 0.10056577622890472, "learning_rate": 0.002, "loss": 2.3328, "step": 388370 }, { "epoch": 1.501368464999768, "grad_norm": 0.09994447231292725, "learning_rate": 0.002, "loss": 2.3315, "step": 388380 }, { "epoch": 1.5014071222031513, "grad_norm": 0.09490270912647247, "learning_rate": 0.002, "loss": 2.3317, "step": 388390 }, { "epoch": 1.5014457794065346, "grad_norm": 0.10684575140476227, "learning_rate": 0.002, "loss": 2.3401, "step": 388400 }, { "epoch": 1.501484436609918, "grad_norm": 0.11132889240980148, "learning_rate": 0.002, "loss": 2.328, "step": 388410 }, { "epoch": 1.5015230938133013, "grad_norm": 0.11740515381097794, "learning_rate": 0.002, "loss": 2.3386, "step": 388420 }, { "epoch": 1.5015617510166845, "grad_norm": 0.12547734379768372, "learning_rate": 0.002, "loss": 2.3255, "step": 388430 }, { "epoch": 1.5016004082200678, "grad_norm": 0.0951928198337555, "learning_rate": 0.002, "loss": 2.328, "step": 388440 }, { "epoch": 1.501639065423451, "grad_norm": 0.09670039266347885, "learning_rate": 0.002, "loss": 2.3371, "step": 388450 }, { "epoch": 1.5016777226268343, "grad_norm": 0.08767662197351456, "learning_rate": 0.002, "loss": 2.3275, "step": 388460 }, { "epoch": 1.5017163798302176, "grad_norm": 0.08616555482149124, "learning_rate": 0.002, "loss": 2.3223, "step": 388470 }, { "epoch": 1.5017550370336008, "grad_norm": 0.10425017029047012, "learning_rate": 0.002, "loss": 2.3368, "step": 388480 }, { "epoch": 1.501793694236984, "grad_norm": 0.11210408806800842, "learning_rate": 0.002, "loss": 2.3268, "step": 388490 }, { "epoch": 1.5018323514403673, "grad_norm": 0.10728723555803299, "learning_rate": 0.002, "loss": 2.3322, "step": 388500 }, { "epoch": 1.5018710086437506, "grad_norm": 0.09564007073640823, "learning_rate": 0.002, "loss": 2.335, "step": 388510 }, { "epoch": 1.5019096658471338, "grad_norm": 0.11068980395793915, "learning_rate": 0.002, "loss": 2.3238, "step": 388520 }, { "epoch": 1.5019483230505173, "grad_norm": 0.11430082470178604, "learning_rate": 0.002, "loss": 2.3243, "step": 388530 }, { "epoch": 1.5019869802539005, "grad_norm": 0.10830912739038467, "learning_rate": 0.002, "loss": 2.3416, "step": 388540 }, { "epoch": 1.5020256374572838, "grad_norm": 0.11302468180656433, "learning_rate": 0.002, "loss": 2.3261, "step": 388550 }, { "epoch": 1.502064294660667, "grad_norm": 0.09648089855909348, "learning_rate": 0.002, "loss": 2.3239, "step": 388560 }, { "epoch": 1.5021029518640503, "grad_norm": 0.09799264371395111, "learning_rate": 0.002, "loss": 2.3312, "step": 388570 }, { "epoch": 1.5021416090674338, "grad_norm": 0.10546073317527771, "learning_rate": 0.002, "loss": 2.3339, "step": 388580 }, { "epoch": 1.502180266270817, "grad_norm": 0.1253112256526947, "learning_rate": 0.002, "loss": 2.3302, "step": 388590 }, { "epoch": 1.5022189234742003, "grad_norm": 0.10033942759037018, "learning_rate": 0.002, "loss": 2.319, "step": 388600 }, { "epoch": 1.5022575806775835, "grad_norm": 0.09231418371200562, "learning_rate": 0.002, "loss": 2.3202, "step": 388610 }, { "epoch": 1.5022962378809668, "grad_norm": 0.11819812655448914, "learning_rate": 0.002, "loss": 2.3253, "step": 388620 }, { "epoch": 1.50233489508435, "grad_norm": 0.10543312877416611, "learning_rate": 0.002, "loss": 2.3324, "step": 388630 }, { "epoch": 1.5023735522877333, "grad_norm": 0.10718866437673569, "learning_rate": 0.002, "loss": 2.3377, "step": 388640 }, { "epoch": 1.5024122094911165, "grad_norm": 0.11451803147792816, "learning_rate": 0.002, "loss": 2.3345, "step": 388650 }, { "epoch": 1.5024508666944998, "grad_norm": 0.09051160514354706, "learning_rate": 0.002, "loss": 2.3321, "step": 388660 }, { "epoch": 1.502489523897883, "grad_norm": 0.6592245697975159, "learning_rate": 0.002, "loss": 2.3316, "step": 388670 }, { "epoch": 1.5025281811012663, "grad_norm": 0.1896670162677765, "learning_rate": 0.002, "loss": 2.3285, "step": 388680 }, { "epoch": 1.5025668383046495, "grad_norm": 0.09931972622871399, "learning_rate": 0.002, "loss": 2.3222, "step": 388690 }, { "epoch": 1.502605495508033, "grad_norm": 0.10684159398078918, "learning_rate": 0.002, "loss": 2.3342, "step": 388700 }, { "epoch": 1.5026441527114163, "grad_norm": 0.09554118663072586, "learning_rate": 0.002, "loss": 2.3254, "step": 388710 }, { "epoch": 1.5026828099147995, "grad_norm": 0.10077395290136337, "learning_rate": 0.002, "loss": 2.3367, "step": 388720 }, { "epoch": 1.5027214671181828, "grad_norm": 0.09313789755105972, "learning_rate": 0.002, "loss": 2.315, "step": 388730 }, { "epoch": 1.502760124321566, "grad_norm": 0.10598669201135635, "learning_rate": 0.002, "loss": 2.3451, "step": 388740 }, { "epoch": 1.5027987815249495, "grad_norm": 0.10307098925113678, "learning_rate": 0.002, "loss": 2.3389, "step": 388750 }, { "epoch": 1.5028374387283328, "grad_norm": 0.08475697040557861, "learning_rate": 0.002, "loss": 2.3284, "step": 388760 }, { "epoch": 1.502876095931716, "grad_norm": 0.15715770423412323, "learning_rate": 0.002, "loss": 2.3269, "step": 388770 }, { "epoch": 1.5029147531350993, "grad_norm": 0.0934685617685318, "learning_rate": 0.002, "loss": 2.3345, "step": 388780 }, { "epoch": 1.5029534103384825, "grad_norm": 0.1194806918501854, "learning_rate": 0.002, "loss": 2.3273, "step": 388790 }, { "epoch": 1.5029920675418658, "grad_norm": 0.09156420081853867, "learning_rate": 0.002, "loss": 2.3232, "step": 388800 }, { "epoch": 1.503030724745249, "grad_norm": 0.12355215847492218, "learning_rate": 0.002, "loss": 2.3264, "step": 388810 }, { "epoch": 1.5030693819486323, "grad_norm": 0.1338273584842682, "learning_rate": 0.002, "loss": 2.3241, "step": 388820 }, { "epoch": 1.5031080391520155, "grad_norm": 0.10630382597446442, "learning_rate": 0.002, "loss": 2.336, "step": 388830 }, { "epoch": 1.5031466963553988, "grad_norm": 0.090221107006073, "learning_rate": 0.002, "loss": 2.3329, "step": 388840 }, { "epoch": 1.503185353558782, "grad_norm": 0.09536974132061005, "learning_rate": 0.002, "loss": 2.3292, "step": 388850 }, { "epoch": 1.5032240107621653, "grad_norm": 0.09492926299571991, "learning_rate": 0.002, "loss": 2.3303, "step": 388860 }, { "epoch": 1.5032626679655487, "grad_norm": 0.10294625908136368, "learning_rate": 0.002, "loss": 2.3281, "step": 388870 }, { "epoch": 1.503301325168932, "grad_norm": 0.12637640535831451, "learning_rate": 0.002, "loss": 2.33, "step": 388880 }, { "epoch": 1.5033399823723153, "grad_norm": 0.10367640107870102, "learning_rate": 0.002, "loss": 2.32, "step": 388890 }, { "epoch": 1.5033786395756985, "grad_norm": 0.10841293632984161, "learning_rate": 0.002, "loss": 2.3338, "step": 388900 }, { "epoch": 1.503417296779082, "grad_norm": 0.09370643645524979, "learning_rate": 0.002, "loss": 2.3454, "step": 388910 }, { "epoch": 1.5034559539824652, "grad_norm": 0.11507739871740341, "learning_rate": 0.002, "loss": 2.3389, "step": 388920 }, { "epoch": 1.5034946111858485, "grad_norm": 0.11928284913301468, "learning_rate": 0.002, "loss": 2.3406, "step": 388930 }, { "epoch": 1.5035332683892317, "grad_norm": 0.1128237247467041, "learning_rate": 0.002, "loss": 2.3405, "step": 388940 }, { "epoch": 1.503571925592615, "grad_norm": 0.09719345718622208, "learning_rate": 0.002, "loss": 2.3196, "step": 388950 }, { "epoch": 1.5036105827959982, "grad_norm": 0.10155039280653, "learning_rate": 0.002, "loss": 2.3351, "step": 388960 }, { "epoch": 1.5036492399993815, "grad_norm": 0.1289808750152588, "learning_rate": 0.002, "loss": 2.3286, "step": 388970 }, { "epoch": 1.5036878972027647, "grad_norm": 0.10769186913967133, "learning_rate": 0.002, "loss": 2.3177, "step": 388980 }, { "epoch": 1.503726554406148, "grad_norm": 0.09606771916151047, "learning_rate": 0.002, "loss": 2.3327, "step": 388990 }, { "epoch": 1.5037652116095312, "grad_norm": 0.0984034314751625, "learning_rate": 0.002, "loss": 2.3452, "step": 389000 }, { "epoch": 1.5038038688129145, "grad_norm": 0.10618776828050613, "learning_rate": 0.002, "loss": 2.32, "step": 389010 }, { "epoch": 1.5038425260162978, "grad_norm": 0.10699150711297989, "learning_rate": 0.002, "loss": 2.339, "step": 389020 }, { "epoch": 1.503881183219681, "grad_norm": 0.09430860728025436, "learning_rate": 0.002, "loss": 2.3308, "step": 389030 }, { "epoch": 1.5039198404230645, "grad_norm": 0.0932871401309967, "learning_rate": 0.002, "loss": 2.3406, "step": 389040 }, { "epoch": 1.5039584976264477, "grad_norm": 0.10846278071403503, "learning_rate": 0.002, "loss": 2.3451, "step": 389050 }, { "epoch": 1.503997154829831, "grad_norm": 0.10263998061418533, "learning_rate": 0.002, "loss": 2.3319, "step": 389060 }, { "epoch": 1.5040358120332142, "grad_norm": 0.11393225938081741, "learning_rate": 0.002, "loss": 2.3364, "step": 389070 }, { "epoch": 1.5040744692365977, "grad_norm": 0.10867276042699814, "learning_rate": 0.002, "loss": 2.3116, "step": 389080 }, { "epoch": 1.504113126439981, "grad_norm": 0.10557813942432404, "learning_rate": 0.002, "loss": 2.3281, "step": 389090 }, { "epoch": 1.5041517836433642, "grad_norm": 0.0988297313451767, "learning_rate": 0.002, "loss": 2.3266, "step": 389100 }, { "epoch": 1.5041904408467475, "grad_norm": 0.13613122701644897, "learning_rate": 0.002, "loss": 2.3352, "step": 389110 }, { "epoch": 1.5042290980501307, "grad_norm": 0.10214740037918091, "learning_rate": 0.002, "loss": 2.3321, "step": 389120 }, { "epoch": 1.504267755253514, "grad_norm": 0.10097360610961914, "learning_rate": 0.002, "loss": 2.3305, "step": 389130 }, { "epoch": 1.5043064124568972, "grad_norm": 0.10136395692825317, "learning_rate": 0.002, "loss": 2.3351, "step": 389140 }, { "epoch": 1.5043450696602805, "grad_norm": 0.10534156858921051, "learning_rate": 0.002, "loss": 2.3402, "step": 389150 }, { "epoch": 1.5043837268636637, "grad_norm": 0.10916571319103241, "learning_rate": 0.002, "loss": 2.3239, "step": 389160 }, { "epoch": 1.504422384067047, "grad_norm": 0.11262747645378113, "learning_rate": 0.002, "loss": 2.3309, "step": 389170 }, { "epoch": 1.5044610412704302, "grad_norm": 0.11168365180492401, "learning_rate": 0.002, "loss": 2.3303, "step": 389180 }, { "epoch": 1.5044996984738135, "grad_norm": 0.10077814012765884, "learning_rate": 0.002, "loss": 2.3288, "step": 389190 }, { "epoch": 1.5045383556771967, "grad_norm": 0.10950718075037003, "learning_rate": 0.002, "loss": 2.3419, "step": 389200 }, { "epoch": 1.5045770128805802, "grad_norm": 0.1116180568933487, "learning_rate": 0.002, "loss": 2.3486, "step": 389210 }, { "epoch": 1.5046156700839635, "grad_norm": 0.10967687517404556, "learning_rate": 0.002, "loss": 2.3128, "step": 389220 }, { "epoch": 1.5046543272873467, "grad_norm": 0.10847678780555725, "learning_rate": 0.002, "loss": 2.344, "step": 389230 }, { "epoch": 1.50469298449073, "grad_norm": 0.11077504605054855, "learning_rate": 0.002, "loss": 2.3114, "step": 389240 }, { "epoch": 1.5047316416941134, "grad_norm": 0.12560157477855682, "learning_rate": 0.002, "loss": 2.3486, "step": 389250 }, { "epoch": 1.5047702988974967, "grad_norm": 0.10352113842964172, "learning_rate": 0.002, "loss": 2.3325, "step": 389260 }, { "epoch": 1.50480895610088, "grad_norm": 0.09635389596223831, "learning_rate": 0.002, "loss": 2.3353, "step": 389270 }, { "epoch": 1.5048476133042632, "grad_norm": 0.10735803097486496, "learning_rate": 0.002, "loss": 2.3263, "step": 389280 }, { "epoch": 1.5048862705076465, "grad_norm": 0.12441831827163696, "learning_rate": 0.002, "loss": 2.3348, "step": 389290 }, { "epoch": 1.5049249277110297, "grad_norm": 0.10879199206829071, "learning_rate": 0.002, "loss": 2.3265, "step": 389300 }, { "epoch": 1.504963584914413, "grad_norm": 0.0954468846321106, "learning_rate": 0.002, "loss": 2.3178, "step": 389310 }, { "epoch": 1.5050022421177962, "grad_norm": 0.09573718160390854, "learning_rate": 0.002, "loss": 2.3161, "step": 389320 }, { "epoch": 1.5050408993211795, "grad_norm": 0.10864290595054626, "learning_rate": 0.002, "loss": 2.3123, "step": 389330 }, { "epoch": 1.5050795565245627, "grad_norm": 0.09222857654094696, "learning_rate": 0.002, "loss": 2.3296, "step": 389340 }, { "epoch": 1.505118213727946, "grad_norm": 0.09859822690486908, "learning_rate": 0.002, "loss": 2.3317, "step": 389350 }, { "epoch": 1.5051568709313292, "grad_norm": 0.12920109927654266, "learning_rate": 0.002, "loss": 2.3319, "step": 389360 }, { "epoch": 1.5051955281347125, "grad_norm": 0.09436677396297455, "learning_rate": 0.002, "loss": 2.3308, "step": 389370 }, { "epoch": 1.505234185338096, "grad_norm": 0.10126109421253204, "learning_rate": 0.002, "loss": 2.3441, "step": 389380 }, { "epoch": 1.5052728425414792, "grad_norm": 0.09820156544446945, "learning_rate": 0.002, "loss": 2.3358, "step": 389390 }, { "epoch": 1.5053114997448624, "grad_norm": 0.16399995982646942, "learning_rate": 0.002, "loss": 2.3482, "step": 389400 }, { "epoch": 1.5053501569482457, "grad_norm": 0.11499249935150146, "learning_rate": 0.002, "loss": 2.3379, "step": 389410 }, { "epoch": 1.5053888141516292, "grad_norm": 0.19883383810520172, "learning_rate": 0.002, "loss": 2.3144, "step": 389420 }, { "epoch": 1.5054274713550124, "grad_norm": 0.09507088363170624, "learning_rate": 0.002, "loss": 2.3342, "step": 389430 }, { "epoch": 1.5054661285583957, "grad_norm": 0.12007997930049896, "learning_rate": 0.002, "loss": 2.3438, "step": 389440 }, { "epoch": 1.505504785761779, "grad_norm": 0.09999562054872513, "learning_rate": 0.002, "loss": 2.3235, "step": 389450 }, { "epoch": 1.5055434429651622, "grad_norm": 0.10346845537424088, "learning_rate": 0.002, "loss": 2.334, "step": 389460 }, { "epoch": 1.5055821001685454, "grad_norm": 0.09966317564249039, "learning_rate": 0.002, "loss": 2.3255, "step": 389470 }, { "epoch": 1.5056207573719287, "grad_norm": 0.3257620930671692, "learning_rate": 0.002, "loss": 2.3274, "step": 389480 }, { "epoch": 1.505659414575312, "grad_norm": 0.10240617394447327, "learning_rate": 0.002, "loss": 2.3228, "step": 389490 }, { "epoch": 1.5056980717786952, "grad_norm": 0.10182981193065643, "learning_rate": 0.002, "loss": 2.3244, "step": 389500 }, { "epoch": 1.5057367289820784, "grad_norm": 0.09937752783298492, "learning_rate": 0.002, "loss": 2.3202, "step": 389510 }, { "epoch": 1.5057753861854617, "grad_norm": 0.11027289927005768, "learning_rate": 0.002, "loss": 2.3504, "step": 389520 }, { "epoch": 1.505814043388845, "grad_norm": 0.10598906129598618, "learning_rate": 0.002, "loss": 2.337, "step": 389530 }, { "epoch": 1.5058527005922282, "grad_norm": 0.09967856109142303, "learning_rate": 0.002, "loss": 2.3454, "step": 389540 }, { "epoch": 1.5058913577956117, "grad_norm": 0.12473419308662415, "learning_rate": 0.002, "loss": 2.3355, "step": 389550 }, { "epoch": 1.505930014998995, "grad_norm": 0.09534024447202682, "learning_rate": 0.002, "loss": 2.3202, "step": 389560 }, { "epoch": 1.5059686722023782, "grad_norm": 0.1028229296207428, "learning_rate": 0.002, "loss": 2.3253, "step": 389570 }, { "epoch": 1.5060073294057614, "grad_norm": 0.09971043467521667, "learning_rate": 0.002, "loss": 2.3346, "step": 389580 }, { "epoch": 1.506045986609145, "grad_norm": 0.09162665158510208, "learning_rate": 0.002, "loss": 2.3357, "step": 389590 }, { "epoch": 1.5060846438125282, "grad_norm": 0.13003022968769073, "learning_rate": 0.002, "loss": 2.3315, "step": 389600 }, { "epoch": 1.5061233010159114, "grad_norm": 0.11770327389240265, "learning_rate": 0.002, "loss": 2.3431, "step": 389610 }, { "epoch": 1.5061619582192947, "grad_norm": 0.09348583221435547, "learning_rate": 0.002, "loss": 2.3332, "step": 389620 }, { "epoch": 1.506200615422678, "grad_norm": 0.11873598396778107, "learning_rate": 0.002, "loss": 2.3465, "step": 389630 }, { "epoch": 1.5062392726260612, "grad_norm": 0.11417189985513687, "learning_rate": 0.002, "loss": 2.3413, "step": 389640 }, { "epoch": 1.5062779298294444, "grad_norm": 0.12200142443180084, "learning_rate": 0.002, "loss": 2.3217, "step": 389650 }, { "epoch": 1.5063165870328277, "grad_norm": 0.11383046954870224, "learning_rate": 0.002, "loss": 2.3309, "step": 389660 }, { "epoch": 1.506355244236211, "grad_norm": 0.1026448905467987, "learning_rate": 0.002, "loss": 2.3223, "step": 389670 }, { "epoch": 1.5063939014395942, "grad_norm": 0.09607816487550735, "learning_rate": 0.002, "loss": 2.3238, "step": 389680 }, { "epoch": 1.5064325586429774, "grad_norm": 0.10966033488512039, "learning_rate": 0.002, "loss": 2.3276, "step": 389690 }, { "epoch": 1.5064712158463607, "grad_norm": 0.0915357917547226, "learning_rate": 0.002, "loss": 2.3303, "step": 389700 }, { "epoch": 1.506509873049744, "grad_norm": 0.10979775339365005, "learning_rate": 0.002, "loss": 2.3291, "step": 389710 }, { "epoch": 1.5065485302531274, "grad_norm": 0.10046348720788956, "learning_rate": 0.002, "loss": 2.321, "step": 389720 }, { "epoch": 1.5065871874565107, "grad_norm": 0.09790916740894318, "learning_rate": 0.002, "loss": 2.3146, "step": 389730 }, { "epoch": 1.506625844659894, "grad_norm": 0.09642428904771805, "learning_rate": 0.002, "loss": 2.3158, "step": 389740 }, { "epoch": 1.5066645018632772, "grad_norm": 0.09451036900281906, "learning_rate": 0.002, "loss": 2.3212, "step": 389750 }, { "epoch": 1.5067031590666606, "grad_norm": 0.10986850410699844, "learning_rate": 0.002, "loss": 2.3384, "step": 389760 }, { "epoch": 1.5067418162700439, "grad_norm": 0.08941185474395752, "learning_rate": 0.002, "loss": 2.341, "step": 389770 }, { "epoch": 1.5067804734734271, "grad_norm": 0.09333249181509018, "learning_rate": 0.002, "loss": 2.3272, "step": 389780 }, { "epoch": 1.5068191306768104, "grad_norm": 0.10135620832443237, "learning_rate": 0.002, "loss": 2.3275, "step": 389790 }, { "epoch": 1.5068577878801936, "grad_norm": 0.09368425607681274, "learning_rate": 0.002, "loss": 2.3169, "step": 389800 }, { "epoch": 1.506896445083577, "grad_norm": 0.1552366465330124, "learning_rate": 0.002, "loss": 2.3335, "step": 389810 }, { "epoch": 1.5069351022869601, "grad_norm": 0.09722664952278137, "learning_rate": 0.002, "loss": 2.3272, "step": 389820 }, { "epoch": 1.5069737594903434, "grad_norm": 0.11551640927791595, "learning_rate": 0.002, "loss": 2.34, "step": 389830 }, { "epoch": 1.5070124166937267, "grad_norm": 0.11581946909427643, "learning_rate": 0.002, "loss": 2.3437, "step": 389840 }, { "epoch": 1.50705107389711, "grad_norm": 0.09836060553789139, "learning_rate": 0.002, "loss": 2.3251, "step": 389850 }, { "epoch": 1.5070897311004932, "grad_norm": 0.12190762907266617, "learning_rate": 0.002, "loss": 2.3461, "step": 389860 }, { "epoch": 1.5071283883038764, "grad_norm": 0.11505207419395447, "learning_rate": 0.002, "loss": 2.3359, "step": 389870 }, { "epoch": 1.5071670455072597, "grad_norm": 0.0856177881360054, "learning_rate": 0.002, "loss": 2.3357, "step": 389880 }, { "epoch": 1.5072057027106431, "grad_norm": 0.09246792644262314, "learning_rate": 0.002, "loss": 2.3259, "step": 389890 }, { "epoch": 1.5072443599140264, "grad_norm": 0.10114166885614395, "learning_rate": 0.002, "loss": 2.3471, "step": 389900 }, { "epoch": 1.5072830171174096, "grad_norm": 0.09140849113464355, "learning_rate": 0.002, "loss": 2.3346, "step": 389910 }, { "epoch": 1.507321674320793, "grad_norm": 0.10203917324542999, "learning_rate": 0.002, "loss": 2.3371, "step": 389920 }, { "epoch": 1.5073603315241764, "grad_norm": 0.10773786902427673, "learning_rate": 0.002, "loss": 2.3291, "step": 389930 }, { "epoch": 1.5073989887275596, "grad_norm": 0.09965097904205322, "learning_rate": 0.002, "loss": 2.3311, "step": 389940 }, { "epoch": 1.5074376459309429, "grad_norm": 0.13344642519950867, "learning_rate": 0.002, "loss": 2.324, "step": 389950 }, { "epoch": 1.5074763031343261, "grad_norm": 0.0945010781288147, "learning_rate": 0.002, "loss": 2.3348, "step": 389960 }, { "epoch": 1.5075149603377094, "grad_norm": 0.09982645511627197, "learning_rate": 0.002, "loss": 2.3309, "step": 389970 }, { "epoch": 1.5075536175410926, "grad_norm": 0.10593358427286148, "learning_rate": 0.002, "loss": 2.34, "step": 389980 }, { "epoch": 1.5075922747444759, "grad_norm": 0.14716258645057678, "learning_rate": 0.002, "loss": 2.3361, "step": 389990 }, { "epoch": 1.5076309319478591, "grad_norm": 0.1291695535182953, "learning_rate": 0.002, "loss": 2.3303, "step": 390000 }, { "epoch": 1.5076695891512424, "grad_norm": 0.09642203152179718, "learning_rate": 0.002, "loss": 2.3214, "step": 390010 }, { "epoch": 1.5077082463546256, "grad_norm": 0.09814012795686722, "learning_rate": 0.002, "loss": 2.3295, "step": 390020 }, { "epoch": 1.5077469035580089, "grad_norm": 0.09167808294296265, "learning_rate": 0.002, "loss": 2.3269, "step": 390030 }, { "epoch": 1.5077855607613921, "grad_norm": 0.10567633807659149, "learning_rate": 0.002, "loss": 2.3363, "step": 390040 }, { "epoch": 1.5078242179647754, "grad_norm": 0.10716531425714493, "learning_rate": 0.002, "loss": 2.3405, "step": 390050 }, { "epoch": 1.5078628751681589, "grad_norm": 0.11084076017141342, "learning_rate": 0.002, "loss": 2.3313, "step": 390060 }, { "epoch": 1.5079015323715421, "grad_norm": 0.11794474720954895, "learning_rate": 0.002, "loss": 2.3252, "step": 390070 }, { "epoch": 1.5079401895749254, "grad_norm": 0.10359127074480057, "learning_rate": 0.002, "loss": 2.3373, "step": 390080 }, { "epoch": 1.5079788467783086, "grad_norm": 0.09916546940803528, "learning_rate": 0.002, "loss": 2.3276, "step": 390090 }, { "epoch": 1.508017503981692, "grad_norm": 0.0951470211148262, "learning_rate": 0.002, "loss": 2.3501, "step": 390100 }, { "epoch": 1.5080561611850754, "grad_norm": 0.10114279389381409, "learning_rate": 0.002, "loss": 2.3155, "step": 390110 }, { "epoch": 1.5080948183884586, "grad_norm": 0.1029725968837738, "learning_rate": 0.002, "loss": 2.3492, "step": 390120 }, { "epoch": 1.5081334755918419, "grad_norm": 0.11488725990056992, "learning_rate": 0.002, "loss": 2.3388, "step": 390130 }, { "epoch": 1.508172132795225, "grad_norm": 0.11002848297357559, "learning_rate": 0.002, "loss": 2.3131, "step": 390140 }, { "epoch": 1.5082107899986084, "grad_norm": 0.11786045879125595, "learning_rate": 0.002, "loss": 2.3228, "step": 390150 }, { "epoch": 1.5082494472019916, "grad_norm": 0.12056462466716766, "learning_rate": 0.002, "loss": 2.3234, "step": 390160 }, { "epoch": 1.5082881044053749, "grad_norm": 0.13139678537845612, "learning_rate": 0.002, "loss": 2.3138, "step": 390170 }, { "epoch": 1.5083267616087581, "grad_norm": 0.1203528419137001, "learning_rate": 0.002, "loss": 2.3287, "step": 390180 }, { "epoch": 1.5083654188121414, "grad_norm": 0.10807789117097855, "learning_rate": 0.002, "loss": 2.333, "step": 390190 }, { "epoch": 1.5084040760155246, "grad_norm": 0.08393408358097076, "learning_rate": 0.002, "loss": 2.3377, "step": 390200 }, { "epoch": 1.5084427332189079, "grad_norm": 0.11528384685516357, "learning_rate": 0.002, "loss": 2.3344, "step": 390210 }, { "epoch": 1.5084813904222911, "grad_norm": 0.10376711189746857, "learning_rate": 0.002, "loss": 2.3418, "step": 390220 }, { "epoch": 1.5085200476256746, "grad_norm": 0.0873163640499115, "learning_rate": 0.002, "loss": 2.3379, "step": 390230 }, { "epoch": 1.5085587048290579, "grad_norm": 0.11890695244073868, "learning_rate": 0.002, "loss": 2.3386, "step": 390240 }, { "epoch": 1.508597362032441, "grad_norm": 0.11122336238622665, "learning_rate": 0.002, "loss": 2.3361, "step": 390250 }, { "epoch": 1.5086360192358244, "grad_norm": 0.12941014766693115, "learning_rate": 0.002, "loss": 2.3244, "step": 390260 }, { "epoch": 1.5086746764392078, "grad_norm": 0.10037616640329361, "learning_rate": 0.002, "loss": 2.3292, "step": 390270 }, { "epoch": 1.508713333642591, "grad_norm": 0.1099221259355545, "learning_rate": 0.002, "loss": 2.345, "step": 390280 }, { "epoch": 1.5087519908459743, "grad_norm": 0.10333079099655151, "learning_rate": 0.002, "loss": 2.3373, "step": 390290 }, { "epoch": 1.5087906480493576, "grad_norm": 0.11339517682790756, "learning_rate": 0.002, "loss": 2.3516, "step": 390300 }, { "epoch": 1.5088293052527408, "grad_norm": 0.0993972197175026, "learning_rate": 0.002, "loss": 2.3474, "step": 390310 }, { "epoch": 1.508867962456124, "grad_norm": 0.14479485154151917, "learning_rate": 0.002, "loss": 2.3284, "step": 390320 }, { "epoch": 1.5089066196595073, "grad_norm": 0.10708092153072357, "learning_rate": 0.002, "loss": 2.3461, "step": 390330 }, { "epoch": 1.5089452768628906, "grad_norm": 0.11167121678590775, "learning_rate": 0.002, "loss": 2.342, "step": 390340 }, { "epoch": 1.5089839340662738, "grad_norm": 0.09349657595157623, "learning_rate": 0.002, "loss": 2.3347, "step": 390350 }, { "epoch": 1.509022591269657, "grad_norm": 0.0951918363571167, "learning_rate": 0.002, "loss": 2.3391, "step": 390360 }, { "epoch": 1.5090612484730404, "grad_norm": 0.10206221789121628, "learning_rate": 0.002, "loss": 2.3241, "step": 390370 }, { "epoch": 1.5090999056764236, "grad_norm": 0.13456571102142334, "learning_rate": 0.002, "loss": 2.3384, "step": 390380 }, { "epoch": 1.509138562879807, "grad_norm": 0.10076375305652618, "learning_rate": 0.002, "loss": 2.3187, "step": 390390 }, { "epoch": 1.5091772200831903, "grad_norm": 0.11521826684474945, "learning_rate": 0.002, "loss": 2.3346, "step": 390400 }, { "epoch": 1.5092158772865736, "grad_norm": 0.10876718908548355, "learning_rate": 0.002, "loss": 2.333, "step": 390410 }, { "epoch": 1.5092545344899568, "grad_norm": 0.09847059100866318, "learning_rate": 0.002, "loss": 2.332, "step": 390420 }, { "epoch": 1.50929319169334, "grad_norm": 0.11620467156171799, "learning_rate": 0.002, "loss": 2.3425, "step": 390430 }, { "epoch": 1.5093318488967236, "grad_norm": 0.10162986814975739, "learning_rate": 0.002, "loss": 2.3518, "step": 390440 }, { "epoch": 1.5093705061001068, "grad_norm": 0.1363476663827896, "learning_rate": 0.002, "loss": 2.3375, "step": 390450 }, { "epoch": 1.50940916330349, "grad_norm": 0.10407718271017075, "learning_rate": 0.002, "loss": 2.329, "step": 390460 }, { "epoch": 1.5094478205068733, "grad_norm": 0.10302364081144333, "learning_rate": 0.002, "loss": 2.324, "step": 390470 }, { "epoch": 1.5094864777102566, "grad_norm": 0.10686130076646805, "learning_rate": 0.002, "loss": 2.356, "step": 390480 }, { "epoch": 1.5095251349136398, "grad_norm": 0.10023646801710129, "learning_rate": 0.002, "loss": 2.3389, "step": 390490 }, { "epoch": 1.509563792117023, "grad_norm": 0.11389179527759552, "learning_rate": 0.002, "loss": 2.3295, "step": 390500 }, { "epoch": 1.5096024493204063, "grad_norm": 0.12031827121973038, "learning_rate": 0.002, "loss": 2.3444, "step": 390510 }, { "epoch": 1.5096411065237896, "grad_norm": 0.09122280776500702, "learning_rate": 0.002, "loss": 2.3339, "step": 390520 }, { "epoch": 1.5096797637271728, "grad_norm": 0.11488986760377884, "learning_rate": 0.002, "loss": 2.3337, "step": 390530 }, { "epoch": 1.509718420930556, "grad_norm": 0.10780785232782364, "learning_rate": 0.002, "loss": 2.3289, "step": 390540 }, { "epoch": 1.5097570781339393, "grad_norm": 0.08984780311584473, "learning_rate": 0.002, "loss": 2.3288, "step": 390550 }, { "epoch": 1.5097957353373228, "grad_norm": 0.12073812633752823, "learning_rate": 0.002, "loss": 2.3298, "step": 390560 }, { "epoch": 1.509834392540706, "grad_norm": 0.11867832392454147, "learning_rate": 0.002, "loss": 2.3345, "step": 390570 }, { "epoch": 1.5098730497440893, "grad_norm": 0.11204609274864197, "learning_rate": 0.002, "loss": 2.3374, "step": 390580 }, { "epoch": 1.5099117069474726, "grad_norm": 0.1025635302066803, "learning_rate": 0.002, "loss": 2.3414, "step": 390590 }, { "epoch": 1.5099503641508558, "grad_norm": 0.09087958931922913, "learning_rate": 0.002, "loss": 2.3203, "step": 390600 }, { "epoch": 1.5099890213542393, "grad_norm": 0.09487861394882202, "learning_rate": 0.002, "loss": 2.3331, "step": 390610 }, { "epoch": 1.5100276785576225, "grad_norm": 0.10458683222532272, "learning_rate": 0.002, "loss": 2.343, "step": 390620 }, { "epoch": 1.5100663357610058, "grad_norm": 0.11646603047847748, "learning_rate": 0.002, "loss": 2.3306, "step": 390630 }, { "epoch": 1.510104992964389, "grad_norm": 0.106773741543293, "learning_rate": 0.002, "loss": 2.3359, "step": 390640 }, { "epoch": 1.5101436501677723, "grad_norm": 0.09964160621166229, "learning_rate": 0.002, "loss": 2.3227, "step": 390650 }, { "epoch": 1.5101823073711556, "grad_norm": 0.09841714799404144, "learning_rate": 0.002, "loss": 2.3049, "step": 390660 }, { "epoch": 1.5102209645745388, "grad_norm": 0.0869053304195404, "learning_rate": 0.002, "loss": 2.3376, "step": 390670 }, { "epoch": 1.510259621777922, "grad_norm": 0.09558701515197754, "learning_rate": 0.002, "loss": 2.3306, "step": 390680 }, { "epoch": 1.5102982789813053, "grad_norm": 0.13511855900287628, "learning_rate": 0.002, "loss": 2.3309, "step": 390690 }, { "epoch": 1.5103369361846886, "grad_norm": 0.12006426602602005, "learning_rate": 0.002, "loss": 2.3249, "step": 390700 }, { "epoch": 1.5103755933880718, "grad_norm": 0.09411891549825668, "learning_rate": 0.002, "loss": 2.3327, "step": 390710 }, { "epoch": 1.510414250591455, "grad_norm": 0.1079292893409729, "learning_rate": 0.002, "loss": 2.3319, "step": 390720 }, { "epoch": 1.5104529077948385, "grad_norm": 0.09245080500841141, "learning_rate": 0.002, "loss": 2.3383, "step": 390730 }, { "epoch": 1.5104915649982218, "grad_norm": 0.10134102404117584, "learning_rate": 0.002, "loss": 2.3323, "step": 390740 }, { "epoch": 1.510530222201605, "grad_norm": 0.11004859209060669, "learning_rate": 0.002, "loss": 2.3362, "step": 390750 }, { "epoch": 1.5105688794049883, "grad_norm": 0.10117950290441513, "learning_rate": 0.002, "loss": 2.336, "step": 390760 }, { "epoch": 1.5106075366083718, "grad_norm": 0.1033184751868248, "learning_rate": 0.002, "loss": 2.3264, "step": 390770 }, { "epoch": 1.510646193811755, "grad_norm": 0.11043199896812439, "learning_rate": 0.002, "loss": 2.3292, "step": 390780 }, { "epoch": 1.5106848510151383, "grad_norm": 0.21440497040748596, "learning_rate": 0.002, "loss": 2.319, "step": 390790 }, { "epoch": 1.5107235082185215, "grad_norm": 0.10097640007734299, "learning_rate": 0.002, "loss": 2.3319, "step": 390800 }, { "epoch": 1.5107621654219048, "grad_norm": 0.11292038857936859, "learning_rate": 0.002, "loss": 2.3463, "step": 390810 }, { "epoch": 1.510800822625288, "grad_norm": 0.12639853358268738, "learning_rate": 0.002, "loss": 2.3185, "step": 390820 }, { "epoch": 1.5108394798286713, "grad_norm": 0.12269312143325806, "learning_rate": 0.002, "loss": 2.3294, "step": 390830 }, { "epoch": 1.5108781370320545, "grad_norm": 0.10088943690061569, "learning_rate": 0.002, "loss": 2.328, "step": 390840 }, { "epoch": 1.5109167942354378, "grad_norm": 0.0912739560008049, "learning_rate": 0.002, "loss": 2.3234, "step": 390850 }, { "epoch": 1.510955451438821, "grad_norm": 0.1321217268705368, "learning_rate": 0.002, "loss": 2.3343, "step": 390860 }, { "epoch": 1.5109941086422043, "grad_norm": 0.3824848532676697, "learning_rate": 0.002, "loss": 2.3477, "step": 390870 }, { "epoch": 1.5110327658455875, "grad_norm": 0.10984724014997482, "learning_rate": 0.002, "loss": 2.3232, "step": 390880 }, { "epoch": 1.5110714230489708, "grad_norm": 0.09105230122804642, "learning_rate": 0.002, "loss": 2.3311, "step": 390890 }, { "epoch": 1.5111100802523543, "grad_norm": 0.10240257531404495, "learning_rate": 0.002, "loss": 2.3222, "step": 390900 }, { "epoch": 1.5111487374557375, "grad_norm": 0.1014326810836792, "learning_rate": 0.002, "loss": 2.3271, "step": 390910 }, { "epoch": 1.5111873946591208, "grad_norm": 0.0920289158821106, "learning_rate": 0.002, "loss": 2.3443, "step": 390920 }, { "epoch": 1.511226051862504, "grad_norm": 0.10732376575469971, "learning_rate": 0.002, "loss": 2.3349, "step": 390930 }, { "epoch": 1.5112647090658875, "grad_norm": 0.1001841351389885, "learning_rate": 0.002, "loss": 2.318, "step": 390940 }, { "epoch": 1.5113033662692708, "grad_norm": 0.10073614120483398, "learning_rate": 0.002, "loss": 2.3338, "step": 390950 }, { "epoch": 1.511342023472654, "grad_norm": 0.09710587561130524, "learning_rate": 0.002, "loss": 2.3304, "step": 390960 }, { "epoch": 1.5113806806760373, "grad_norm": 0.1239316314458847, "learning_rate": 0.002, "loss": 2.3427, "step": 390970 }, { "epoch": 1.5114193378794205, "grad_norm": 0.10505892336368561, "learning_rate": 0.002, "loss": 2.3349, "step": 390980 }, { "epoch": 1.5114579950828038, "grad_norm": 0.10494199395179749, "learning_rate": 0.002, "loss": 2.3374, "step": 390990 }, { "epoch": 1.511496652286187, "grad_norm": 0.10229931026697159, "learning_rate": 0.002, "loss": 2.3264, "step": 391000 }, { "epoch": 1.5115353094895703, "grad_norm": 0.09727927297353745, "learning_rate": 0.002, "loss": 2.3327, "step": 391010 }, { "epoch": 1.5115739666929535, "grad_norm": 0.09767213463783264, "learning_rate": 0.002, "loss": 2.3228, "step": 391020 }, { "epoch": 1.5116126238963368, "grad_norm": 0.10535608232021332, "learning_rate": 0.002, "loss": 2.3251, "step": 391030 }, { "epoch": 1.51165128109972, "grad_norm": 0.12566813826560974, "learning_rate": 0.002, "loss": 2.3404, "step": 391040 }, { "epoch": 1.5116899383031033, "grad_norm": 0.10017907619476318, "learning_rate": 0.002, "loss": 2.3338, "step": 391050 }, { "epoch": 1.5117285955064865, "grad_norm": 0.10999967157840729, "learning_rate": 0.002, "loss": 2.3335, "step": 391060 }, { "epoch": 1.51176725270987, "grad_norm": 0.11590699851512909, "learning_rate": 0.002, "loss": 2.3288, "step": 391070 }, { "epoch": 1.5118059099132533, "grad_norm": 0.09183014929294586, "learning_rate": 0.002, "loss": 2.3199, "step": 391080 }, { "epoch": 1.5118445671166365, "grad_norm": 0.09417787194252014, "learning_rate": 0.002, "loss": 2.3142, "step": 391090 }, { "epoch": 1.5118832243200198, "grad_norm": 0.12490889430046082, "learning_rate": 0.002, "loss": 2.3519, "step": 391100 }, { "epoch": 1.5119218815234032, "grad_norm": 0.09948685765266418, "learning_rate": 0.002, "loss": 2.3389, "step": 391110 }, { "epoch": 1.5119605387267865, "grad_norm": 0.10057361423969269, "learning_rate": 0.002, "loss": 2.3306, "step": 391120 }, { "epoch": 1.5119991959301697, "grad_norm": 0.10864268243312836, "learning_rate": 0.002, "loss": 2.329, "step": 391130 }, { "epoch": 1.512037853133553, "grad_norm": 0.11731230467557907, "learning_rate": 0.002, "loss": 2.3465, "step": 391140 }, { "epoch": 1.5120765103369362, "grad_norm": 0.10575708001852036, "learning_rate": 0.002, "loss": 2.337, "step": 391150 }, { "epoch": 1.5121151675403195, "grad_norm": 0.12365453690290451, "learning_rate": 0.002, "loss": 2.3378, "step": 391160 }, { "epoch": 1.5121538247437027, "grad_norm": 0.12001720815896988, "learning_rate": 0.002, "loss": 2.3524, "step": 391170 }, { "epoch": 1.512192481947086, "grad_norm": 0.12133695185184479, "learning_rate": 0.002, "loss": 2.3432, "step": 391180 }, { "epoch": 1.5122311391504693, "grad_norm": 0.10062947124242783, "learning_rate": 0.002, "loss": 2.3605, "step": 391190 }, { "epoch": 1.5122697963538525, "grad_norm": 0.10657189786434174, "learning_rate": 0.002, "loss": 2.3238, "step": 391200 }, { "epoch": 1.5123084535572358, "grad_norm": 0.10898979008197784, "learning_rate": 0.002, "loss": 2.3226, "step": 391210 }, { "epoch": 1.512347110760619, "grad_norm": 0.09677448123693466, "learning_rate": 0.002, "loss": 2.3303, "step": 391220 }, { "epoch": 1.5123857679640023, "grad_norm": 0.09534822404384613, "learning_rate": 0.002, "loss": 2.3181, "step": 391230 }, { "epoch": 1.5124244251673857, "grad_norm": 0.12047450989484787, "learning_rate": 0.002, "loss": 2.3181, "step": 391240 }, { "epoch": 1.512463082370769, "grad_norm": 0.10149037837982178, "learning_rate": 0.002, "loss": 2.3331, "step": 391250 }, { "epoch": 1.5125017395741522, "grad_norm": 0.11668939143419266, "learning_rate": 0.002, "loss": 2.3259, "step": 391260 }, { "epoch": 1.5125403967775355, "grad_norm": 0.09661033749580383, "learning_rate": 0.002, "loss": 2.3365, "step": 391270 }, { "epoch": 1.512579053980919, "grad_norm": 0.10099072754383087, "learning_rate": 0.002, "loss": 2.3431, "step": 391280 }, { "epoch": 1.5126177111843022, "grad_norm": 0.09791026264429092, "learning_rate": 0.002, "loss": 2.322, "step": 391290 }, { "epoch": 1.5126563683876855, "grad_norm": 0.10209483653306961, "learning_rate": 0.002, "loss": 2.329, "step": 391300 }, { "epoch": 1.5126950255910687, "grad_norm": 0.0877443328499794, "learning_rate": 0.002, "loss": 2.3162, "step": 391310 }, { "epoch": 1.512733682794452, "grad_norm": 0.11895333230495453, "learning_rate": 0.002, "loss": 2.3359, "step": 391320 }, { "epoch": 1.5127723399978352, "grad_norm": 0.11299550533294678, "learning_rate": 0.002, "loss": 2.3336, "step": 391330 }, { "epoch": 1.5128109972012185, "grad_norm": 0.09337560087442398, "learning_rate": 0.002, "loss": 2.3188, "step": 391340 }, { "epoch": 1.5128496544046017, "grad_norm": 0.11867666244506836, "learning_rate": 0.002, "loss": 2.3276, "step": 391350 }, { "epoch": 1.512888311607985, "grad_norm": 0.09304529428482056, "learning_rate": 0.002, "loss": 2.3341, "step": 391360 }, { "epoch": 1.5129269688113682, "grad_norm": 0.10020868480205536, "learning_rate": 0.002, "loss": 2.3357, "step": 391370 }, { "epoch": 1.5129656260147515, "grad_norm": 0.1085643619298935, "learning_rate": 0.002, "loss": 2.3351, "step": 391380 }, { "epoch": 1.5130042832181347, "grad_norm": 0.1056111752986908, "learning_rate": 0.002, "loss": 2.3273, "step": 391390 }, { "epoch": 1.513042940421518, "grad_norm": 0.10014656186103821, "learning_rate": 0.002, "loss": 2.3333, "step": 391400 }, { "epoch": 1.5130815976249015, "grad_norm": 0.09461404383182526, "learning_rate": 0.002, "loss": 2.3337, "step": 391410 }, { "epoch": 1.5131202548282847, "grad_norm": 0.12414918839931488, "learning_rate": 0.002, "loss": 2.3333, "step": 391420 }, { "epoch": 1.513158912031668, "grad_norm": 0.10402705520391464, "learning_rate": 0.002, "loss": 2.3399, "step": 391430 }, { "epoch": 1.5131975692350512, "grad_norm": 0.09142938256263733, "learning_rate": 0.002, "loss": 2.323, "step": 391440 }, { "epoch": 1.5132362264384347, "grad_norm": 0.09494287520647049, "learning_rate": 0.002, "loss": 2.34, "step": 391450 }, { "epoch": 1.513274883641818, "grad_norm": 0.11556043475866318, "learning_rate": 0.002, "loss": 2.3421, "step": 391460 }, { "epoch": 1.5133135408452012, "grad_norm": 0.08956260234117508, "learning_rate": 0.002, "loss": 2.3413, "step": 391470 }, { "epoch": 1.5133521980485845, "grad_norm": 0.11527398973703384, "learning_rate": 0.002, "loss": 2.3405, "step": 391480 }, { "epoch": 1.5133908552519677, "grad_norm": 0.1307714432477951, "learning_rate": 0.002, "loss": 2.3289, "step": 391490 }, { "epoch": 1.513429512455351, "grad_norm": 0.09584154933691025, "learning_rate": 0.002, "loss": 2.3295, "step": 391500 }, { "epoch": 1.5134681696587342, "grad_norm": 0.10020260512828827, "learning_rate": 0.002, "loss": 2.3471, "step": 391510 }, { "epoch": 1.5135068268621175, "grad_norm": 0.09970816969871521, "learning_rate": 0.002, "loss": 2.3266, "step": 391520 }, { "epoch": 1.5135454840655007, "grad_norm": 0.10133900493383408, "learning_rate": 0.002, "loss": 2.3228, "step": 391530 }, { "epoch": 1.513584141268884, "grad_norm": 0.09789116680622101, "learning_rate": 0.002, "loss": 2.3375, "step": 391540 }, { "epoch": 1.5136227984722672, "grad_norm": 0.09601163864135742, "learning_rate": 0.002, "loss": 2.3385, "step": 391550 }, { "epoch": 1.5136614556756505, "grad_norm": 0.12760160863399506, "learning_rate": 0.002, "loss": 2.3449, "step": 391560 }, { "epoch": 1.5137001128790337, "grad_norm": 0.13389061391353607, "learning_rate": 0.002, "loss": 2.3321, "step": 391570 }, { "epoch": 1.5137387700824172, "grad_norm": 0.10897321254014969, "learning_rate": 0.002, "loss": 2.3222, "step": 391580 }, { "epoch": 1.5137774272858004, "grad_norm": 0.10551299899816513, "learning_rate": 0.002, "loss": 2.3501, "step": 391590 }, { "epoch": 1.5138160844891837, "grad_norm": 0.09069501608610153, "learning_rate": 0.002, "loss": 2.3293, "step": 391600 }, { "epoch": 1.513854741692567, "grad_norm": 0.09852441400289536, "learning_rate": 0.002, "loss": 2.3186, "step": 391610 }, { "epoch": 1.5138933988959504, "grad_norm": 0.10836633294820786, "learning_rate": 0.002, "loss": 2.3458, "step": 391620 }, { "epoch": 1.5139320560993337, "grad_norm": 0.09398753941059113, "learning_rate": 0.002, "loss": 2.3424, "step": 391630 }, { "epoch": 1.513970713302717, "grad_norm": 0.10845766961574554, "learning_rate": 0.002, "loss": 2.3332, "step": 391640 }, { "epoch": 1.5140093705061002, "grad_norm": 0.1267317682504654, "learning_rate": 0.002, "loss": 2.3543, "step": 391650 }, { "epoch": 1.5140480277094834, "grad_norm": 0.096676304936409, "learning_rate": 0.002, "loss": 2.3222, "step": 391660 }, { "epoch": 1.5140866849128667, "grad_norm": 0.10404936224222183, "learning_rate": 0.002, "loss": 2.3345, "step": 391670 }, { "epoch": 1.51412534211625, "grad_norm": 0.12528268992900848, "learning_rate": 0.002, "loss": 2.3432, "step": 391680 }, { "epoch": 1.5141639993196332, "grad_norm": 0.09098008275032043, "learning_rate": 0.002, "loss": 2.3357, "step": 391690 }, { "epoch": 1.5142026565230164, "grad_norm": 0.09539708495140076, "learning_rate": 0.002, "loss": 2.3379, "step": 391700 }, { "epoch": 1.5142413137263997, "grad_norm": 0.1156436949968338, "learning_rate": 0.002, "loss": 2.3379, "step": 391710 }, { "epoch": 1.514279970929783, "grad_norm": 0.0968218669295311, "learning_rate": 0.002, "loss": 2.3243, "step": 391720 }, { "epoch": 1.5143186281331662, "grad_norm": 0.10383110493421555, "learning_rate": 0.002, "loss": 2.3304, "step": 391730 }, { "epoch": 1.5143572853365495, "grad_norm": 0.09785936027765274, "learning_rate": 0.002, "loss": 2.3315, "step": 391740 }, { "epoch": 1.514395942539933, "grad_norm": 0.12443191558122635, "learning_rate": 0.002, "loss": 2.3236, "step": 391750 }, { "epoch": 1.5144345997433162, "grad_norm": 0.09383685141801834, "learning_rate": 0.002, "loss": 2.3289, "step": 391760 }, { "epoch": 1.5144732569466994, "grad_norm": 0.09789196401834488, "learning_rate": 0.002, "loss": 2.3283, "step": 391770 }, { "epoch": 1.5145119141500827, "grad_norm": 0.09333156794309616, "learning_rate": 0.002, "loss": 2.343, "step": 391780 }, { "epoch": 1.5145505713534662, "grad_norm": 0.4236663281917572, "learning_rate": 0.002, "loss": 2.3251, "step": 391790 }, { "epoch": 1.5145892285568494, "grad_norm": 0.10581768304109573, "learning_rate": 0.002, "loss": 2.3412, "step": 391800 }, { "epoch": 1.5146278857602327, "grad_norm": 0.10425762087106705, "learning_rate": 0.002, "loss": 2.3338, "step": 391810 }, { "epoch": 1.514666542963616, "grad_norm": 0.11275427788496017, "learning_rate": 0.002, "loss": 2.3405, "step": 391820 }, { "epoch": 1.5147052001669992, "grad_norm": 0.09525428712368011, "learning_rate": 0.002, "loss": 2.3411, "step": 391830 }, { "epoch": 1.5147438573703824, "grad_norm": 0.11205447465181351, "learning_rate": 0.002, "loss": 2.3315, "step": 391840 }, { "epoch": 1.5147825145737657, "grad_norm": 0.1030084639787674, "learning_rate": 0.002, "loss": 2.3458, "step": 391850 }, { "epoch": 1.514821171777149, "grad_norm": 0.0894642099738121, "learning_rate": 0.002, "loss": 2.3332, "step": 391860 }, { "epoch": 1.5148598289805322, "grad_norm": 0.14282263815402985, "learning_rate": 0.002, "loss": 2.3319, "step": 391870 }, { "epoch": 1.5148984861839154, "grad_norm": 0.15263572335243225, "learning_rate": 0.002, "loss": 2.3309, "step": 391880 }, { "epoch": 1.5149371433872987, "grad_norm": 0.09568876773118973, "learning_rate": 0.002, "loss": 2.338, "step": 391890 }, { "epoch": 1.514975800590682, "grad_norm": 0.11003223806619644, "learning_rate": 0.002, "loss": 2.3271, "step": 391900 }, { "epoch": 1.5150144577940652, "grad_norm": 0.09703919291496277, "learning_rate": 0.002, "loss": 2.3228, "step": 391910 }, { "epoch": 1.5150531149974487, "grad_norm": 0.10533502697944641, "learning_rate": 0.002, "loss": 2.3418, "step": 391920 }, { "epoch": 1.515091772200832, "grad_norm": 0.09649696201086044, "learning_rate": 0.002, "loss": 2.3454, "step": 391930 }, { "epoch": 1.5151304294042152, "grad_norm": 0.12361782789230347, "learning_rate": 0.002, "loss": 2.3359, "step": 391940 }, { "epoch": 1.5151690866075984, "grad_norm": 0.09518574178218842, "learning_rate": 0.002, "loss": 2.3305, "step": 391950 }, { "epoch": 1.5152077438109819, "grad_norm": 0.10137498378753662, "learning_rate": 0.002, "loss": 2.3306, "step": 391960 }, { "epoch": 1.5152464010143651, "grad_norm": 0.1020670160651207, "learning_rate": 0.002, "loss": 2.331, "step": 391970 }, { "epoch": 1.5152850582177484, "grad_norm": 0.11717270314693451, "learning_rate": 0.002, "loss": 2.3349, "step": 391980 }, { "epoch": 1.5153237154211316, "grad_norm": 0.10038861632347107, "learning_rate": 0.002, "loss": 2.3231, "step": 391990 }, { "epoch": 1.515362372624515, "grad_norm": 0.0989314541220665, "learning_rate": 0.002, "loss": 2.3231, "step": 392000 }, { "epoch": 1.5154010298278981, "grad_norm": 0.09754546731710434, "learning_rate": 0.002, "loss": 2.3419, "step": 392010 }, { "epoch": 1.5154396870312814, "grad_norm": 0.10955166816711426, "learning_rate": 0.002, "loss": 2.3427, "step": 392020 }, { "epoch": 1.5154783442346647, "grad_norm": 0.10340434312820435, "learning_rate": 0.002, "loss": 2.3323, "step": 392030 }, { "epoch": 1.515517001438048, "grad_norm": 0.11696350574493408, "learning_rate": 0.002, "loss": 2.3343, "step": 392040 }, { "epoch": 1.5155556586414312, "grad_norm": 0.11961662024259567, "learning_rate": 0.002, "loss": 2.3347, "step": 392050 }, { "epoch": 1.5155943158448144, "grad_norm": 0.09642060101032257, "learning_rate": 0.002, "loss": 2.3189, "step": 392060 }, { "epoch": 1.5156329730481977, "grad_norm": 0.09937610477209091, "learning_rate": 0.002, "loss": 2.3336, "step": 392070 }, { "epoch": 1.515671630251581, "grad_norm": 0.09241262823343277, "learning_rate": 0.002, "loss": 2.3299, "step": 392080 }, { "epoch": 1.5157102874549644, "grad_norm": 0.10047882050275803, "learning_rate": 0.002, "loss": 2.3295, "step": 392090 }, { "epoch": 1.5157489446583476, "grad_norm": 0.09795857965946198, "learning_rate": 0.002, "loss": 2.3303, "step": 392100 }, { "epoch": 1.515787601861731, "grad_norm": 0.10768328607082367, "learning_rate": 0.002, "loss": 2.3397, "step": 392110 }, { "epoch": 1.5158262590651141, "grad_norm": 0.10577775537967682, "learning_rate": 0.002, "loss": 2.3311, "step": 392120 }, { "epoch": 1.5158649162684976, "grad_norm": 0.09501665830612183, "learning_rate": 0.002, "loss": 2.3402, "step": 392130 }, { "epoch": 1.5159035734718809, "grad_norm": 0.09370652586221695, "learning_rate": 0.002, "loss": 2.3362, "step": 392140 }, { "epoch": 1.5159422306752641, "grad_norm": 0.11130591481924057, "learning_rate": 0.002, "loss": 2.3344, "step": 392150 }, { "epoch": 1.5159808878786474, "grad_norm": 0.09607101231813431, "learning_rate": 0.002, "loss": 2.3482, "step": 392160 }, { "epoch": 1.5160195450820306, "grad_norm": 0.09282945096492767, "learning_rate": 0.002, "loss": 2.3336, "step": 392170 }, { "epoch": 1.5160582022854139, "grad_norm": 0.09593275189399719, "learning_rate": 0.002, "loss": 2.3248, "step": 392180 }, { "epoch": 1.5160968594887971, "grad_norm": 0.1060456782579422, "learning_rate": 0.002, "loss": 2.3401, "step": 392190 }, { "epoch": 1.5161355166921804, "grad_norm": 0.08822032809257507, "learning_rate": 0.002, "loss": 2.3325, "step": 392200 }, { "epoch": 1.5161741738955636, "grad_norm": 0.1242724061012268, "learning_rate": 0.002, "loss": 2.3281, "step": 392210 }, { "epoch": 1.516212831098947, "grad_norm": 0.15245239436626434, "learning_rate": 0.002, "loss": 2.352, "step": 392220 }, { "epoch": 1.5162514883023301, "grad_norm": 0.11181936413049698, "learning_rate": 0.002, "loss": 2.3429, "step": 392230 }, { "epoch": 1.5162901455057134, "grad_norm": 0.10632267594337463, "learning_rate": 0.002, "loss": 2.3328, "step": 392240 }, { "epoch": 1.5163288027090969, "grad_norm": 0.11932023614645004, "learning_rate": 0.002, "loss": 2.3341, "step": 392250 }, { "epoch": 1.5163674599124801, "grad_norm": 0.09645699709653854, "learning_rate": 0.002, "loss": 2.3432, "step": 392260 }, { "epoch": 1.5164061171158634, "grad_norm": 0.09505350142717361, "learning_rate": 0.002, "loss": 2.3178, "step": 392270 }, { "epoch": 1.5164447743192466, "grad_norm": 0.10155574977397919, "learning_rate": 0.002, "loss": 2.3438, "step": 392280 }, { "epoch": 1.5164834315226299, "grad_norm": 0.09071482717990875, "learning_rate": 0.002, "loss": 2.3448, "step": 392290 }, { "epoch": 1.5165220887260134, "grad_norm": 0.10598849505186081, "learning_rate": 0.002, "loss": 2.3332, "step": 392300 }, { "epoch": 1.5165607459293966, "grad_norm": 0.09445805847644806, "learning_rate": 0.002, "loss": 2.3408, "step": 392310 }, { "epoch": 1.5165994031327799, "grad_norm": 0.11329188942909241, "learning_rate": 0.002, "loss": 2.3273, "step": 392320 }, { "epoch": 1.516638060336163, "grad_norm": 0.12282945215702057, "learning_rate": 0.002, "loss": 2.3343, "step": 392330 }, { "epoch": 1.5166767175395464, "grad_norm": 0.11929178982973099, "learning_rate": 0.002, "loss": 2.3172, "step": 392340 }, { "epoch": 1.5167153747429296, "grad_norm": 0.0997723639011383, "learning_rate": 0.002, "loss": 2.341, "step": 392350 }, { "epoch": 1.5167540319463129, "grad_norm": 0.10382483899593353, "learning_rate": 0.002, "loss": 2.3325, "step": 392360 }, { "epoch": 1.5167926891496961, "grad_norm": 0.09186827391386032, "learning_rate": 0.002, "loss": 2.3452, "step": 392370 }, { "epoch": 1.5168313463530794, "grad_norm": 0.11631743609905243, "learning_rate": 0.002, "loss": 2.3442, "step": 392380 }, { "epoch": 1.5168700035564626, "grad_norm": 0.11780108511447906, "learning_rate": 0.002, "loss": 2.332, "step": 392390 }, { "epoch": 1.5169086607598459, "grad_norm": 0.11376313865184784, "learning_rate": 0.002, "loss": 2.3441, "step": 392400 }, { "epoch": 1.5169473179632291, "grad_norm": 0.11365222185850143, "learning_rate": 0.002, "loss": 2.3394, "step": 392410 }, { "epoch": 1.5169859751666126, "grad_norm": 0.09479023516178131, "learning_rate": 0.002, "loss": 2.3352, "step": 392420 }, { "epoch": 1.5170246323699959, "grad_norm": 0.09805808961391449, "learning_rate": 0.002, "loss": 2.3415, "step": 392430 }, { "epoch": 1.517063289573379, "grad_norm": 0.0988486185669899, "learning_rate": 0.002, "loss": 2.3393, "step": 392440 }, { "epoch": 1.5171019467767624, "grad_norm": 0.14714302122592926, "learning_rate": 0.002, "loss": 2.3366, "step": 392450 }, { "epoch": 1.5171406039801456, "grad_norm": 0.10289258509874344, "learning_rate": 0.002, "loss": 2.3324, "step": 392460 }, { "epoch": 1.517179261183529, "grad_norm": 0.11664602160453796, "learning_rate": 0.002, "loss": 2.3298, "step": 392470 }, { "epoch": 1.5172179183869123, "grad_norm": 0.10490322858095169, "learning_rate": 0.002, "loss": 2.3232, "step": 392480 }, { "epoch": 1.5172565755902956, "grad_norm": 0.1358417272567749, "learning_rate": 0.002, "loss": 2.3206, "step": 392490 }, { "epoch": 1.5172952327936788, "grad_norm": 0.11767023056745529, "learning_rate": 0.002, "loss": 2.3337, "step": 392500 }, { "epoch": 1.517333889997062, "grad_norm": 0.1056869849562645, "learning_rate": 0.002, "loss": 2.334, "step": 392510 }, { "epoch": 1.5173725472004453, "grad_norm": 0.09077652543783188, "learning_rate": 0.002, "loss": 2.3299, "step": 392520 }, { "epoch": 1.5174112044038286, "grad_norm": 0.11978007107973099, "learning_rate": 0.002, "loss": 2.333, "step": 392530 }, { "epoch": 1.5174498616072118, "grad_norm": 0.09781042486429214, "learning_rate": 0.002, "loss": 2.3207, "step": 392540 }, { "epoch": 1.517488518810595, "grad_norm": 0.1132543534040451, "learning_rate": 0.002, "loss": 2.3387, "step": 392550 }, { "epoch": 1.5175271760139784, "grad_norm": 0.10332779586315155, "learning_rate": 0.002, "loss": 2.3297, "step": 392560 }, { "epoch": 1.5175658332173616, "grad_norm": 0.0938902273774147, "learning_rate": 0.002, "loss": 2.3347, "step": 392570 }, { "epoch": 1.5176044904207449, "grad_norm": 0.10210373252630234, "learning_rate": 0.002, "loss": 2.3257, "step": 392580 }, { "epoch": 1.5176431476241283, "grad_norm": 0.10769516974687576, "learning_rate": 0.002, "loss": 2.3388, "step": 392590 }, { "epoch": 1.5176818048275116, "grad_norm": 0.11146010458469391, "learning_rate": 0.002, "loss": 2.3328, "step": 392600 }, { "epoch": 1.5177204620308948, "grad_norm": 0.0972304567694664, "learning_rate": 0.002, "loss": 2.3281, "step": 392610 }, { "epoch": 1.517759119234278, "grad_norm": 0.09830550849437714, "learning_rate": 0.002, "loss": 2.3315, "step": 392620 }, { "epoch": 1.5177977764376613, "grad_norm": 0.11365142464637756, "learning_rate": 0.002, "loss": 2.3249, "step": 392630 }, { "epoch": 1.5178364336410448, "grad_norm": 0.09081844240427017, "learning_rate": 0.002, "loss": 2.3308, "step": 392640 }, { "epoch": 1.517875090844428, "grad_norm": 0.09644568711519241, "learning_rate": 0.002, "loss": 2.3244, "step": 392650 }, { "epoch": 1.5179137480478113, "grad_norm": 0.11101473867893219, "learning_rate": 0.002, "loss": 2.3288, "step": 392660 }, { "epoch": 1.5179524052511946, "grad_norm": 0.10434384644031525, "learning_rate": 0.002, "loss": 2.3265, "step": 392670 }, { "epoch": 1.5179910624545778, "grad_norm": 0.095488540828228, "learning_rate": 0.002, "loss": 2.3263, "step": 392680 }, { "epoch": 1.518029719657961, "grad_norm": 0.11795365810394287, "learning_rate": 0.002, "loss": 2.3469, "step": 392690 }, { "epoch": 1.5180683768613443, "grad_norm": 0.11635580658912659, "learning_rate": 0.002, "loss": 2.3334, "step": 392700 }, { "epoch": 1.5181070340647276, "grad_norm": 0.10605958849191666, "learning_rate": 0.002, "loss": 2.3206, "step": 392710 }, { "epoch": 1.5181456912681108, "grad_norm": 0.11753251403570175, "learning_rate": 0.002, "loss": 2.3415, "step": 392720 }, { "epoch": 1.518184348471494, "grad_norm": 0.10806383937597275, "learning_rate": 0.002, "loss": 2.3407, "step": 392730 }, { "epoch": 1.5182230056748773, "grad_norm": 0.1053234115242958, "learning_rate": 0.002, "loss": 2.3261, "step": 392740 }, { "epoch": 1.5182616628782606, "grad_norm": 0.09364716708660126, "learning_rate": 0.002, "loss": 2.3267, "step": 392750 }, { "epoch": 1.518300320081644, "grad_norm": 0.09259460121393204, "learning_rate": 0.002, "loss": 2.3417, "step": 392760 }, { "epoch": 1.5183389772850273, "grad_norm": 0.10869884490966797, "learning_rate": 0.002, "loss": 2.3142, "step": 392770 }, { "epoch": 1.5183776344884106, "grad_norm": 0.10413431376218796, "learning_rate": 0.002, "loss": 2.3283, "step": 392780 }, { "epoch": 1.5184162916917938, "grad_norm": 0.09960522502660751, "learning_rate": 0.002, "loss": 2.3531, "step": 392790 }, { "epoch": 1.5184549488951773, "grad_norm": 0.10901309549808502, "learning_rate": 0.002, "loss": 2.3233, "step": 392800 }, { "epoch": 1.5184936060985605, "grad_norm": 0.09266559034585953, "learning_rate": 0.002, "loss": 2.3306, "step": 392810 }, { "epoch": 1.5185322633019438, "grad_norm": 0.12511612474918365, "learning_rate": 0.002, "loss": 2.343, "step": 392820 }, { "epoch": 1.518570920505327, "grad_norm": 0.0981638953089714, "learning_rate": 0.002, "loss": 2.3327, "step": 392830 }, { "epoch": 1.5186095777087103, "grad_norm": 0.12114229053258896, "learning_rate": 0.002, "loss": 2.3355, "step": 392840 }, { "epoch": 1.5186482349120936, "grad_norm": 0.1143832877278328, "learning_rate": 0.002, "loss": 2.3327, "step": 392850 }, { "epoch": 1.5186868921154768, "grad_norm": 0.0972919762134552, "learning_rate": 0.002, "loss": 2.3485, "step": 392860 }, { "epoch": 1.51872554931886, "grad_norm": 0.10748854279518127, "learning_rate": 0.002, "loss": 2.3346, "step": 392870 }, { "epoch": 1.5187642065222433, "grad_norm": 0.12689507007598877, "learning_rate": 0.002, "loss": 2.3295, "step": 392880 }, { "epoch": 1.5188028637256266, "grad_norm": 0.09913776814937592, "learning_rate": 0.002, "loss": 2.3178, "step": 392890 }, { "epoch": 1.5188415209290098, "grad_norm": 0.10747122764587402, "learning_rate": 0.002, "loss": 2.3297, "step": 392900 }, { "epoch": 1.518880178132393, "grad_norm": 0.12248285859823227, "learning_rate": 0.002, "loss": 2.3562, "step": 392910 }, { "epoch": 1.5189188353357763, "grad_norm": 0.0993824154138565, "learning_rate": 0.002, "loss": 2.3399, "step": 392920 }, { "epoch": 1.5189574925391598, "grad_norm": 0.10456233471632004, "learning_rate": 0.002, "loss": 2.3454, "step": 392930 }, { "epoch": 1.518996149742543, "grad_norm": 0.10220813006162643, "learning_rate": 0.002, "loss": 2.3466, "step": 392940 }, { "epoch": 1.5190348069459263, "grad_norm": 0.12464220821857452, "learning_rate": 0.002, "loss": 2.3355, "step": 392950 }, { "epoch": 1.5190734641493095, "grad_norm": 0.12892554700374603, "learning_rate": 0.002, "loss": 2.3377, "step": 392960 }, { "epoch": 1.519112121352693, "grad_norm": 0.09762685000896454, "learning_rate": 0.002, "loss": 2.3376, "step": 392970 }, { "epoch": 1.5191507785560763, "grad_norm": 0.1277654469013214, "learning_rate": 0.002, "loss": 2.3315, "step": 392980 }, { "epoch": 1.5191894357594595, "grad_norm": 0.09789907187223434, "learning_rate": 0.002, "loss": 2.3336, "step": 392990 }, { "epoch": 1.5192280929628428, "grad_norm": 0.10237940400838852, "learning_rate": 0.002, "loss": 2.3398, "step": 393000 }, { "epoch": 1.519266750166226, "grad_norm": 0.11480651795864105, "learning_rate": 0.002, "loss": 2.3354, "step": 393010 }, { "epoch": 1.5193054073696093, "grad_norm": 0.09746021777391434, "learning_rate": 0.002, "loss": 2.3205, "step": 393020 }, { "epoch": 1.5193440645729925, "grad_norm": 0.10675664246082306, "learning_rate": 0.002, "loss": 2.3284, "step": 393030 }, { "epoch": 1.5193827217763758, "grad_norm": 0.09594815224409103, "learning_rate": 0.002, "loss": 2.3173, "step": 393040 }, { "epoch": 1.519421378979759, "grad_norm": 0.11100509762763977, "learning_rate": 0.002, "loss": 2.3115, "step": 393050 }, { "epoch": 1.5194600361831423, "grad_norm": 0.11012808233499527, "learning_rate": 0.002, "loss": 2.3274, "step": 393060 }, { "epoch": 1.5194986933865255, "grad_norm": 0.10745590925216675, "learning_rate": 0.002, "loss": 2.3328, "step": 393070 }, { "epoch": 1.5195373505899088, "grad_norm": 0.10260172188282013, "learning_rate": 0.002, "loss": 2.3308, "step": 393080 }, { "epoch": 1.519576007793292, "grad_norm": 0.10885608941316605, "learning_rate": 0.002, "loss": 2.3307, "step": 393090 }, { "epoch": 1.5196146649966755, "grad_norm": 0.11682126671075821, "learning_rate": 0.002, "loss": 2.3396, "step": 393100 }, { "epoch": 1.5196533222000588, "grad_norm": 0.10573320090770721, "learning_rate": 0.002, "loss": 2.3319, "step": 393110 }, { "epoch": 1.519691979403442, "grad_norm": 0.10078450292348862, "learning_rate": 0.002, "loss": 2.3198, "step": 393120 }, { "epoch": 1.5197306366068253, "grad_norm": 0.1034998968243599, "learning_rate": 0.002, "loss": 2.324, "step": 393130 }, { "epoch": 1.5197692938102088, "grad_norm": 0.09276104718446732, "learning_rate": 0.002, "loss": 2.3336, "step": 393140 }, { "epoch": 1.519807951013592, "grad_norm": 0.10877040028572083, "learning_rate": 0.002, "loss": 2.3336, "step": 393150 }, { "epoch": 1.5198466082169753, "grad_norm": 0.10812745988368988, "learning_rate": 0.002, "loss": 2.3185, "step": 393160 }, { "epoch": 1.5198852654203585, "grad_norm": 0.12653662264347076, "learning_rate": 0.002, "loss": 2.3452, "step": 393170 }, { "epoch": 1.5199239226237418, "grad_norm": 0.10978109389543533, "learning_rate": 0.002, "loss": 2.3469, "step": 393180 }, { "epoch": 1.519962579827125, "grad_norm": 0.10237731039524078, "learning_rate": 0.002, "loss": 2.3383, "step": 393190 }, { "epoch": 1.5200012370305083, "grad_norm": 0.10582784563302994, "learning_rate": 0.002, "loss": 2.3182, "step": 393200 }, { "epoch": 1.5200398942338915, "grad_norm": 0.10746137797832489, "learning_rate": 0.002, "loss": 2.3364, "step": 393210 }, { "epoch": 1.5200785514372748, "grad_norm": 0.1339419037103653, "learning_rate": 0.002, "loss": 2.3354, "step": 393220 }, { "epoch": 1.520117208640658, "grad_norm": 0.1077442392706871, "learning_rate": 0.002, "loss": 2.3264, "step": 393230 }, { "epoch": 1.5201558658440413, "grad_norm": 0.11086498200893402, "learning_rate": 0.002, "loss": 2.3296, "step": 393240 }, { "epoch": 1.5201945230474245, "grad_norm": 0.11237195134162903, "learning_rate": 0.002, "loss": 2.3318, "step": 393250 }, { "epoch": 1.5202331802508078, "grad_norm": 0.12104542553424835, "learning_rate": 0.002, "loss": 2.3319, "step": 393260 }, { "epoch": 1.5202718374541913, "grad_norm": 0.10996460914611816, "learning_rate": 0.002, "loss": 2.3316, "step": 393270 }, { "epoch": 1.5203104946575745, "grad_norm": 0.09453155845403671, "learning_rate": 0.002, "loss": 2.3289, "step": 393280 }, { "epoch": 1.5203491518609578, "grad_norm": 0.10569145530462265, "learning_rate": 0.002, "loss": 2.3281, "step": 393290 }, { "epoch": 1.520387809064341, "grad_norm": 0.1049167811870575, "learning_rate": 0.002, "loss": 2.3166, "step": 393300 }, { "epoch": 1.5204264662677245, "grad_norm": 0.10674849152565002, "learning_rate": 0.002, "loss": 2.3257, "step": 393310 }, { "epoch": 1.5204651234711077, "grad_norm": 0.09160462021827698, "learning_rate": 0.002, "loss": 2.3247, "step": 393320 }, { "epoch": 1.520503780674491, "grad_norm": 0.11467369645833969, "learning_rate": 0.002, "loss": 2.3119, "step": 393330 }, { "epoch": 1.5205424378778742, "grad_norm": 0.11105100810527802, "learning_rate": 0.002, "loss": 2.3299, "step": 393340 }, { "epoch": 1.5205810950812575, "grad_norm": 0.09533059597015381, "learning_rate": 0.002, "loss": 2.3415, "step": 393350 }, { "epoch": 1.5206197522846407, "grad_norm": 0.10581649094820023, "learning_rate": 0.002, "loss": 2.325, "step": 393360 }, { "epoch": 1.520658409488024, "grad_norm": 0.12146256864070892, "learning_rate": 0.002, "loss": 2.3393, "step": 393370 }, { "epoch": 1.5206970666914073, "grad_norm": 0.14656734466552734, "learning_rate": 0.002, "loss": 2.3504, "step": 393380 }, { "epoch": 1.5207357238947905, "grad_norm": 0.10592775046825409, "learning_rate": 0.002, "loss": 2.3383, "step": 393390 }, { "epoch": 1.5207743810981738, "grad_norm": 0.10509035736322403, "learning_rate": 0.002, "loss": 2.3376, "step": 393400 }, { "epoch": 1.520813038301557, "grad_norm": 0.1095312312245369, "learning_rate": 0.002, "loss": 2.3341, "step": 393410 }, { "epoch": 1.5208516955049403, "grad_norm": 0.0920720025897026, "learning_rate": 0.002, "loss": 2.326, "step": 393420 }, { "epoch": 1.5208903527083235, "grad_norm": 0.10379816591739655, "learning_rate": 0.002, "loss": 2.3522, "step": 393430 }, { "epoch": 1.520929009911707, "grad_norm": 0.8264697790145874, "learning_rate": 0.002, "loss": 2.3277, "step": 393440 }, { "epoch": 1.5209676671150902, "grad_norm": 0.10276079922914505, "learning_rate": 0.002, "loss": 2.3318, "step": 393450 }, { "epoch": 1.5210063243184735, "grad_norm": 0.13902220129966736, "learning_rate": 0.002, "loss": 2.3421, "step": 393460 }, { "epoch": 1.5210449815218567, "grad_norm": 0.12761332094669342, "learning_rate": 0.002, "loss": 2.3361, "step": 393470 }, { "epoch": 1.5210836387252402, "grad_norm": 0.09847622364759445, "learning_rate": 0.002, "loss": 2.3278, "step": 393480 }, { "epoch": 1.5211222959286235, "grad_norm": 0.10445865988731384, "learning_rate": 0.002, "loss": 2.3281, "step": 393490 }, { "epoch": 1.5211609531320067, "grad_norm": 0.10974553227424622, "learning_rate": 0.002, "loss": 2.3313, "step": 393500 }, { "epoch": 1.52119961033539, "grad_norm": 0.09774612635374069, "learning_rate": 0.002, "loss": 2.3318, "step": 393510 }, { "epoch": 1.5212382675387732, "grad_norm": 0.10803424566984177, "learning_rate": 0.002, "loss": 2.3439, "step": 393520 }, { "epoch": 1.5212769247421565, "grad_norm": 0.10066314786672592, "learning_rate": 0.002, "loss": 2.3264, "step": 393530 }, { "epoch": 1.5213155819455397, "grad_norm": 0.10231117159128189, "learning_rate": 0.002, "loss": 2.3474, "step": 393540 }, { "epoch": 1.521354239148923, "grad_norm": 0.12019024789333344, "learning_rate": 0.002, "loss": 2.3311, "step": 393550 }, { "epoch": 1.5213928963523062, "grad_norm": 0.10460830479860306, "learning_rate": 0.002, "loss": 2.3415, "step": 393560 }, { "epoch": 1.5214315535556895, "grad_norm": 0.11702663451433182, "learning_rate": 0.002, "loss": 2.3401, "step": 393570 }, { "epoch": 1.5214702107590727, "grad_norm": 0.10864273458719254, "learning_rate": 0.002, "loss": 2.3324, "step": 393580 }, { "epoch": 1.521508867962456, "grad_norm": 0.09753713756799698, "learning_rate": 0.002, "loss": 2.323, "step": 393590 }, { "epoch": 1.5215475251658392, "grad_norm": 0.11186327785253525, "learning_rate": 0.002, "loss": 2.3278, "step": 393600 }, { "epoch": 1.5215861823692227, "grad_norm": 0.10732456296682358, "learning_rate": 0.002, "loss": 2.3292, "step": 393610 }, { "epoch": 1.521624839572606, "grad_norm": 0.10576072335243225, "learning_rate": 0.002, "loss": 2.3367, "step": 393620 }, { "epoch": 1.5216634967759892, "grad_norm": 0.10033950954675674, "learning_rate": 0.002, "loss": 2.3157, "step": 393630 }, { "epoch": 1.5217021539793725, "grad_norm": 0.11310286819934845, "learning_rate": 0.002, "loss": 2.3336, "step": 393640 }, { "epoch": 1.521740811182756, "grad_norm": 0.09552452713251114, "learning_rate": 0.002, "loss": 2.3303, "step": 393650 }, { "epoch": 1.5217794683861392, "grad_norm": 0.11281601339578629, "learning_rate": 0.002, "loss": 2.3263, "step": 393660 }, { "epoch": 1.5218181255895225, "grad_norm": 0.10923857986927032, "learning_rate": 0.002, "loss": 2.3371, "step": 393670 }, { "epoch": 1.5218567827929057, "grad_norm": 0.10855615139007568, "learning_rate": 0.002, "loss": 2.3393, "step": 393680 }, { "epoch": 1.521895439996289, "grad_norm": 0.11590652912855148, "learning_rate": 0.002, "loss": 2.3243, "step": 393690 }, { "epoch": 1.5219340971996722, "grad_norm": 0.10057484358549118, "learning_rate": 0.002, "loss": 2.3366, "step": 393700 }, { "epoch": 1.5219727544030555, "grad_norm": 0.10506322979927063, "learning_rate": 0.002, "loss": 2.3358, "step": 393710 }, { "epoch": 1.5220114116064387, "grad_norm": 0.11757268756628036, "learning_rate": 0.002, "loss": 2.3421, "step": 393720 }, { "epoch": 1.522050068809822, "grad_norm": 0.1272018998861313, "learning_rate": 0.002, "loss": 2.3516, "step": 393730 }, { "epoch": 1.5220887260132052, "grad_norm": 0.10142607241868973, "learning_rate": 0.002, "loss": 2.3563, "step": 393740 }, { "epoch": 1.5221273832165885, "grad_norm": 0.12956005334854126, "learning_rate": 0.002, "loss": 2.3225, "step": 393750 }, { "epoch": 1.5221660404199717, "grad_norm": 0.0910429060459137, "learning_rate": 0.002, "loss": 2.3312, "step": 393760 }, { "epoch": 1.522204697623355, "grad_norm": 0.1021728664636612, "learning_rate": 0.002, "loss": 2.3437, "step": 393770 }, { "epoch": 1.5222433548267384, "grad_norm": 0.1251039206981659, "learning_rate": 0.002, "loss": 2.3273, "step": 393780 }, { "epoch": 1.5222820120301217, "grad_norm": 0.10015079379081726, "learning_rate": 0.002, "loss": 2.3309, "step": 393790 }, { "epoch": 1.522320669233505, "grad_norm": 0.09192286431789398, "learning_rate": 0.002, "loss": 2.3318, "step": 393800 }, { "epoch": 1.5223593264368882, "grad_norm": 0.13426172733306885, "learning_rate": 0.002, "loss": 2.3378, "step": 393810 }, { "epoch": 1.5223979836402717, "grad_norm": 0.11347789317369461, "learning_rate": 0.002, "loss": 2.328, "step": 393820 }, { "epoch": 1.522436640843655, "grad_norm": 0.13141945004463196, "learning_rate": 0.002, "loss": 2.3208, "step": 393830 }, { "epoch": 1.5224752980470382, "grad_norm": 0.09779248386621475, "learning_rate": 0.002, "loss": 2.3418, "step": 393840 }, { "epoch": 1.5225139552504214, "grad_norm": 0.1317087858915329, "learning_rate": 0.002, "loss": 2.3105, "step": 393850 }, { "epoch": 1.5225526124538047, "grad_norm": 0.11914301663637161, "learning_rate": 0.002, "loss": 2.3223, "step": 393860 }, { "epoch": 1.522591269657188, "grad_norm": 0.10186275094747543, "learning_rate": 0.002, "loss": 2.3352, "step": 393870 }, { "epoch": 1.5226299268605712, "grad_norm": 0.09285727143287659, "learning_rate": 0.002, "loss": 2.3411, "step": 393880 }, { "epoch": 1.5226685840639544, "grad_norm": 0.11961885541677475, "learning_rate": 0.002, "loss": 2.335, "step": 393890 }, { "epoch": 1.5227072412673377, "grad_norm": 0.09913287311792374, "learning_rate": 0.002, "loss": 2.3243, "step": 393900 }, { "epoch": 1.522745898470721, "grad_norm": 0.10889461636543274, "learning_rate": 0.002, "loss": 2.3531, "step": 393910 }, { "epoch": 1.5227845556741042, "grad_norm": 0.10375667363405228, "learning_rate": 0.002, "loss": 2.3342, "step": 393920 }, { "epoch": 1.5228232128774875, "grad_norm": 0.11258301883935928, "learning_rate": 0.002, "loss": 2.3128, "step": 393930 }, { "epoch": 1.5228618700808707, "grad_norm": 0.10796305537223816, "learning_rate": 0.002, "loss": 2.3244, "step": 393940 }, { "epoch": 1.5229005272842542, "grad_norm": 0.11623138934373856, "learning_rate": 0.002, "loss": 2.32, "step": 393950 }, { "epoch": 1.5229391844876374, "grad_norm": 0.10926652699708939, "learning_rate": 0.002, "loss": 2.3238, "step": 393960 }, { "epoch": 1.5229778416910207, "grad_norm": 0.1066916286945343, "learning_rate": 0.002, "loss": 2.3325, "step": 393970 }, { "epoch": 1.523016498894404, "grad_norm": 0.10033965855836868, "learning_rate": 0.002, "loss": 2.3383, "step": 393980 }, { "epoch": 1.5230551560977874, "grad_norm": 0.09625542908906937, "learning_rate": 0.002, "loss": 2.3429, "step": 393990 }, { "epoch": 1.5230938133011707, "grad_norm": 0.10865096747875214, "learning_rate": 0.002, "loss": 2.3345, "step": 394000 }, { "epoch": 1.523132470504554, "grad_norm": 0.09583982825279236, "learning_rate": 0.002, "loss": 2.3494, "step": 394010 }, { "epoch": 1.5231711277079372, "grad_norm": 0.1044599786400795, "learning_rate": 0.002, "loss": 2.337, "step": 394020 }, { "epoch": 1.5232097849113204, "grad_norm": 0.11785249412059784, "learning_rate": 0.002, "loss": 2.3396, "step": 394030 }, { "epoch": 1.5232484421147037, "grad_norm": 0.20341509580612183, "learning_rate": 0.002, "loss": 2.326, "step": 394040 }, { "epoch": 1.523287099318087, "grad_norm": 0.11942635476589203, "learning_rate": 0.002, "loss": 2.3336, "step": 394050 }, { "epoch": 1.5233257565214702, "grad_norm": 0.10689658671617508, "learning_rate": 0.002, "loss": 2.3294, "step": 394060 }, { "epoch": 1.5233644137248534, "grad_norm": 0.1341332197189331, "learning_rate": 0.002, "loss": 2.3336, "step": 394070 }, { "epoch": 1.5234030709282367, "grad_norm": 0.10913897305727005, "learning_rate": 0.002, "loss": 2.3268, "step": 394080 }, { "epoch": 1.52344172813162, "grad_norm": 0.10185789316892624, "learning_rate": 0.002, "loss": 2.3294, "step": 394090 }, { "epoch": 1.5234803853350032, "grad_norm": 0.12783731520175934, "learning_rate": 0.002, "loss": 2.3263, "step": 394100 }, { "epoch": 1.5235190425383864, "grad_norm": 0.10477516800165176, "learning_rate": 0.002, "loss": 2.3481, "step": 394110 }, { "epoch": 1.52355769974177, "grad_norm": 0.10059119760990143, "learning_rate": 0.002, "loss": 2.3345, "step": 394120 }, { "epoch": 1.5235963569451532, "grad_norm": 0.09582138806581497, "learning_rate": 0.002, "loss": 2.3363, "step": 394130 }, { "epoch": 1.5236350141485364, "grad_norm": 0.09843972325325012, "learning_rate": 0.002, "loss": 2.3495, "step": 394140 }, { "epoch": 1.5236736713519197, "grad_norm": 0.10571901500225067, "learning_rate": 0.002, "loss": 2.3319, "step": 394150 }, { "epoch": 1.5237123285553031, "grad_norm": 0.12133859097957611, "learning_rate": 0.002, "loss": 2.3438, "step": 394160 }, { "epoch": 1.5237509857586864, "grad_norm": 0.11906891316175461, "learning_rate": 0.002, "loss": 2.3366, "step": 394170 }, { "epoch": 1.5237896429620696, "grad_norm": 0.11050893366336823, "learning_rate": 0.002, "loss": 2.344, "step": 394180 }, { "epoch": 1.523828300165453, "grad_norm": 0.0925726592540741, "learning_rate": 0.002, "loss": 2.309, "step": 394190 }, { "epoch": 1.5238669573688362, "grad_norm": 0.09126723557710648, "learning_rate": 0.002, "loss": 2.3419, "step": 394200 }, { "epoch": 1.5239056145722194, "grad_norm": 0.12772385776042938, "learning_rate": 0.002, "loss": 2.3206, "step": 394210 }, { "epoch": 1.5239442717756027, "grad_norm": 0.10459309816360474, "learning_rate": 0.002, "loss": 2.3418, "step": 394220 }, { "epoch": 1.523982928978986, "grad_norm": 0.13920532166957855, "learning_rate": 0.002, "loss": 2.3244, "step": 394230 }, { "epoch": 1.5240215861823692, "grad_norm": 0.11099079251289368, "learning_rate": 0.002, "loss": 2.3278, "step": 394240 }, { "epoch": 1.5240602433857524, "grad_norm": 0.11004325747489929, "learning_rate": 0.002, "loss": 2.3302, "step": 394250 }, { "epoch": 1.5240989005891357, "grad_norm": 0.10719511657953262, "learning_rate": 0.002, "loss": 2.3308, "step": 394260 }, { "epoch": 1.524137557792519, "grad_norm": 0.092842236161232, "learning_rate": 0.002, "loss": 2.3349, "step": 394270 }, { "epoch": 1.5241762149959024, "grad_norm": 0.1256948709487915, "learning_rate": 0.002, "loss": 2.3393, "step": 394280 }, { "epoch": 1.5242148721992856, "grad_norm": 0.11076880246400833, "learning_rate": 0.002, "loss": 2.3338, "step": 394290 }, { "epoch": 1.524253529402669, "grad_norm": 0.09827834367752075, "learning_rate": 0.002, "loss": 2.3436, "step": 394300 }, { "epoch": 1.5242921866060521, "grad_norm": 0.10175015777349472, "learning_rate": 0.002, "loss": 2.3331, "step": 394310 }, { "epoch": 1.5243308438094354, "grad_norm": 0.09242283552885056, "learning_rate": 0.002, "loss": 2.3296, "step": 394320 }, { "epoch": 1.5243695010128189, "grad_norm": 0.12954500317573547, "learning_rate": 0.002, "loss": 2.3239, "step": 394330 }, { "epoch": 1.5244081582162021, "grad_norm": 0.10394249111413956, "learning_rate": 0.002, "loss": 2.3368, "step": 394340 }, { "epoch": 1.5244468154195854, "grad_norm": 0.09540977329015732, "learning_rate": 0.002, "loss": 2.3321, "step": 394350 }, { "epoch": 1.5244854726229686, "grad_norm": 0.1039431244134903, "learning_rate": 0.002, "loss": 2.3245, "step": 394360 }, { "epoch": 1.5245241298263519, "grad_norm": 0.09400424361228943, "learning_rate": 0.002, "loss": 2.3418, "step": 394370 }, { "epoch": 1.5245627870297351, "grad_norm": 0.10155437886714935, "learning_rate": 0.002, "loss": 2.3257, "step": 394380 }, { "epoch": 1.5246014442331184, "grad_norm": 0.16589802503585815, "learning_rate": 0.002, "loss": 2.3333, "step": 394390 }, { "epoch": 1.5246401014365016, "grad_norm": 0.1328371912240982, "learning_rate": 0.002, "loss": 2.3269, "step": 394400 }, { "epoch": 1.524678758639885, "grad_norm": 0.12050023674964905, "learning_rate": 0.002, "loss": 2.3408, "step": 394410 }, { "epoch": 1.5247174158432681, "grad_norm": 0.1132076308131218, "learning_rate": 0.002, "loss": 2.3432, "step": 394420 }, { "epoch": 1.5247560730466514, "grad_norm": 0.09805141389369965, "learning_rate": 0.002, "loss": 2.3418, "step": 394430 }, { "epoch": 1.5247947302500346, "grad_norm": 0.25652357935905457, "learning_rate": 0.002, "loss": 2.3667, "step": 394440 }, { "epoch": 1.5248333874534181, "grad_norm": 0.11934979259967804, "learning_rate": 0.002, "loss": 2.3501, "step": 394450 }, { "epoch": 1.5248720446568014, "grad_norm": 0.09216290712356567, "learning_rate": 0.002, "loss": 2.3308, "step": 394460 }, { "epoch": 1.5249107018601846, "grad_norm": 0.09738843142986298, "learning_rate": 0.002, "loss": 2.324, "step": 394470 }, { "epoch": 1.5249493590635679, "grad_norm": 0.10043909400701523, "learning_rate": 0.002, "loss": 2.3403, "step": 394480 }, { "epoch": 1.5249880162669511, "grad_norm": 0.09155519306659698, "learning_rate": 0.002, "loss": 2.3225, "step": 394490 }, { "epoch": 1.5250266734703346, "grad_norm": 0.10652053356170654, "learning_rate": 0.002, "loss": 2.3303, "step": 394500 }, { "epoch": 1.5250653306737179, "grad_norm": 0.10862810164690018, "learning_rate": 0.002, "loss": 2.3402, "step": 394510 }, { "epoch": 1.525103987877101, "grad_norm": 0.11527835577726364, "learning_rate": 0.002, "loss": 2.3254, "step": 394520 }, { "epoch": 1.5251426450804844, "grad_norm": 0.09903129935264587, "learning_rate": 0.002, "loss": 2.3336, "step": 394530 }, { "epoch": 1.5251813022838676, "grad_norm": 0.10090334713459015, "learning_rate": 0.002, "loss": 2.3353, "step": 394540 }, { "epoch": 1.5252199594872509, "grad_norm": 0.11012939363718033, "learning_rate": 0.002, "loss": 2.3371, "step": 394550 }, { "epoch": 1.5252586166906341, "grad_norm": 0.13572582602500916, "learning_rate": 0.002, "loss": 2.3291, "step": 394560 }, { "epoch": 1.5252972738940174, "grad_norm": 0.0971146821975708, "learning_rate": 0.002, "loss": 2.3276, "step": 394570 }, { "epoch": 1.5253359310974006, "grad_norm": 0.10251978039741516, "learning_rate": 0.002, "loss": 2.3303, "step": 394580 }, { "epoch": 1.5253745883007839, "grad_norm": 0.1334100067615509, "learning_rate": 0.002, "loss": 2.3349, "step": 394590 }, { "epoch": 1.5254132455041671, "grad_norm": 0.10141900181770325, "learning_rate": 0.002, "loss": 2.3537, "step": 394600 }, { "epoch": 1.5254519027075504, "grad_norm": 0.10914376378059387, "learning_rate": 0.002, "loss": 2.3291, "step": 394610 }, { "epoch": 1.5254905599109339, "grad_norm": 0.1215430423617363, "learning_rate": 0.002, "loss": 2.3323, "step": 394620 }, { "epoch": 1.525529217114317, "grad_norm": 0.0969533771276474, "learning_rate": 0.002, "loss": 2.3323, "step": 394630 }, { "epoch": 1.5255678743177004, "grad_norm": 0.12010926753282547, "learning_rate": 0.002, "loss": 2.3267, "step": 394640 }, { "epoch": 1.5256065315210836, "grad_norm": 0.10832379013299942, "learning_rate": 0.002, "loss": 2.3348, "step": 394650 }, { "epoch": 1.525645188724467, "grad_norm": 0.11077246814966202, "learning_rate": 0.002, "loss": 2.3488, "step": 394660 }, { "epoch": 1.5256838459278503, "grad_norm": 0.09438929706811905, "learning_rate": 0.002, "loss": 2.3407, "step": 394670 }, { "epoch": 1.5257225031312336, "grad_norm": 0.09369917958974838, "learning_rate": 0.002, "loss": 2.3264, "step": 394680 }, { "epoch": 1.5257611603346168, "grad_norm": 0.09520909935235977, "learning_rate": 0.002, "loss": 2.3472, "step": 394690 }, { "epoch": 1.525799817538, "grad_norm": 0.10132263600826263, "learning_rate": 0.002, "loss": 2.3342, "step": 394700 }, { "epoch": 1.5258384747413833, "grad_norm": 0.10649420320987701, "learning_rate": 0.002, "loss": 2.34, "step": 394710 }, { "epoch": 1.5258771319447666, "grad_norm": 0.09204483777284622, "learning_rate": 0.002, "loss": 2.3321, "step": 394720 }, { "epoch": 1.5259157891481498, "grad_norm": 0.0947391539812088, "learning_rate": 0.002, "loss": 2.3313, "step": 394730 }, { "epoch": 1.525954446351533, "grad_norm": 0.11095141619443893, "learning_rate": 0.002, "loss": 2.3486, "step": 394740 }, { "epoch": 1.5259931035549164, "grad_norm": 0.11917775869369507, "learning_rate": 0.002, "loss": 2.3364, "step": 394750 }, { "epoch": 1.5260317607582996, "grad_norm": 0.09193374961614609, "learning_rate": 0.002, "loss": 2.3268, "step": 394760 }, { "epoch": 1.5260704179616829, "grad_norm": 0.10913031548261642, "learning_rate": 0.002, "loss": 2.3369, "step": 394770 }, { "epoch": 1.526109075165066, "grad_norm": 0.08642298728227615, "learning_rate": 0.002, "loss": 2.3411, "step": 394780 }, { "epoch": 1.5261477323684496, "grad_norm": 0.10410486906766891, "learning_rate": 0.002, "loss": 2.3514, "step": 394790 }, { "epoch": 1.5261863895718328, "grad_norm": 0.10142649710178375, "learning_rate": 0.002, "loss": 2.3354, "step": 394800 }, { "epoch": 1.526225046775216, "grad_norm": 0.1226276382803917, "learning_rate": 0.002, "loss": 2.3336, "step": 394810 }, { "epoch": 1.5262637039785993, "grad_norm": 0.09792998433113098, "learning_rate": 0.002, "loss": 2.3338, "step": 394820 }, { "epoch": 1.5263023611819828, "grad_norm": 0.1416461020708084, "learning_rate": 0.002, "loss": 2.3263, "step": 394830 }, { "epoch": 1.526341018385366, "grad_norm": 0.1003195121884346, "learning_rate": 0.002, "loss": 2.3281, "step": 394840 }, { "epoch": 1.5263796755887493, "grad_norm": 0.11260812729597092, "learning_rate": 0.002, "loss": 2.3349, "step": 394850 }, { "epoch": 1.5264183327921326, "grad_norm": 0.09097535163164139, "learning_rate": 0.002, "loss": 2.3408, "step": 394860 }, { "epoch": 1.5264569899955158, "grad_norm": 0.10058214515447617, "learning_rate": 0.002, "loss": 2.33, "step": 394870 }, { "epoch": 1.526495647198899, "grad_norm": 0.11305031180381775, "learning_rate": 0.002, "loss": 2.3287, "step": 394880 }, { "epoch": 1.5265343044022823, "grad_norm": 0.11100471764802933, "learning_rate": 0.002, "loss": 2.3256, "step": 394890 }, { "epoch": 1.5265729616056656, "grad_norm": 0.09917481243610382, "learning_rate": 0.002, "loss": 2.3162, "step": 394900 }, { "epoch": 1.5266116188090488, "grad_norm": 0.16251282393932343, "learning_rate": 0.002, "loss": 2.3175, "step": 394910 }, { "epoch": 1.526650276012432, "grad_norm": 0.09185020625591278, "learning_rate": 0.002, "loss": 2.3368, "step": 394920 }, { "epoch": 1.5266889332158153, "grad_norm": 0.09810732305049896, "learning_rate": 0.002, "loss": 2.3478, "step": 394930 }, { "epoch": 1.5267275904191986, "grad_norm": 0.09562589228153229, "learning_rate": 0.002, "loss": 2.3206, "step": 394940 }, { "epoch": 1.5267662476225818, "grad_norm": 0.0937427207827568, "learning_rate": 0.002, "loss": 2.3359, "step": 394950 }, { "epoch": 1.5268049048259653, "grad_norm": 0.10749828815460205, "learning_rate": 0.002, "loss": 2.3365, "step": 394960 }, { "epoch": 1.5268435620293486, "grad_norm": 0.10349355638027191, "learning_rate": 0.002, "loss": 2.3345, "step": 394970 }, { "epoch": 1.5268822192327318, "grad_norm": 0.08923173695802689, "learning_rate": 0.002, "loss": 2.3214, "step": 394980 }, { "epoch": 1.526920876436115, "grad_norm": 0.09006751328706741, "learning_rate": 0.002, "loss": 2.3401, "step": 394990 }, { "epoch": 1.5269595336394985, "grad_norm": 0.0997283011674881, "learning_rate": 0.002, "loss": 2.3246, "step": 395000 }, { "epoch": 1.5269981908428818, "grad_norm": 0.106237031519413, "learning_rate": 0.002, "loss": 2.336, "step": 395010 }, { "epoch": 1.527036848046265, "grad_norm": 0.1262180507183075, "learning_rate": 0.002, "loss": 2.3576, "step": 395020 }, { "epoch": 1.5270755052496483, "grad_norm": 0.09994278103113174, "learning_rate": 0.002, "loss": 2.3478, "step": 395030 }, { "epoch": 1.5271141624530316, "grad_norm": 0.12491146475076675, "learning_rate": 0.002, "loss": 2.3326, "step": 395040 }, { "epoch": 1.5271528196564148, "grad_norm": 0.09989090263843536, "learning_rate": 0.002, "loss": 2.3278, "step": 395050 }, { "epoch": 1.527191476859798, "grad_norm": 0.09762978553771973, "learning_rate": 0.002, "loss": 2.3354, "step": 395060 }, { "epoch": 1.5272301340631813, "grad_norm": 0.10708542168140411, "learning_rate": 0.002, "loss": 2.336, "step": 395070 }, { "epoch": 1.5272687912665646, "grad_norm": 0.10440186411142349, "learning_rate": 0.002, "loss": 2.3224, "step": 395080 }, { "epoch": 1.5273074484699478, "grad_norm": 0.10143192112445831, "learning_rate": 0.002, "loss": 2.3298, "step": 395090 }, { "epoch": 1.527346105673331, "grad_norm": 0.10683754086494446, "learning_rate": 0.002, "loss": 2.3265, "step": 395100 }, { "epoch": 1.5273847628767143, "grad_norm": 0.10886263102293015, "learning_rate": 0.002, "loss": 2.339, "step": 395110 }, { "epoch": 1.5274234200800976, "grad_norm": 0.10051087290048599, "learning_rate": 0.002, "loss": 2.3403, "step": 395120 }, { "epoch": 1.527462077283481, "grad_norm": 0.10115080326795578, "learning_rate": 0.002, "loss": 2.346, "step": 395130 }, { "epoch": 1.5275007344868643, "grad_norm": 0.09336403757333755, "learning_rate": 0.002, "loss": 2.3333, "step": 395140 }, { "epoch": 1.5275393916902475, "grad_norm": 0.12161771953105927, "learning_rate": 0.002, "loss": 2.3341, "step": 395150 }, { "epoch": 1.5275780488936308, "grad_norm": 0.10998454689979553, "learning_rate": 0.002, "loss": 2.3388, "step": 395160 }, { "epoch": 1.5276167060970143, "grad_norm": 0.10109839588403702, "learning_rate": 0.002, "loss": 2.3322, "step": 395170 }, { "epoch": 1.5276553633003975, "grad_norm": 0.13047313690185547, "learning_rate": 0.002, "loss": 2.3192, "step": 395180 }, { "epoch": 1.5276940205037808, "grad_norm": 0.09682628512382507, "learning_rate": 0.002, "loss": 2.3265, "step": 395190 }, { "epoch": 1.527732677707164, "grad_norm": 0.1216890811920166, "learning_rate": 0.002, "loss": 2.3199, "step": 395200 }, { "epoch": 1.5277713349105473, "grad_norm": 0.10351161658763885, "learning_rate": 0.002, "loss": 2.3375, "step": 395210 }, { "epoch": 1.5278099921139305, "grad_norm": 0.10402938723564148, "learning_rate": 0.002, "loss": 2.3179, "step": 395220 }, { "epoch": 1.5278486493173138, "grad_norm": 0.1108585074543953, "learning_rate": 0.002, "loss": 2.3418, "step": 395230 }, { "epoch": 1.527887306520697, "grad_norm": 0.10756135731935501, "learning_rate": 0.002, "loss": 2.324, "step": 395240 }, { "epoch": 1.5279259637240803, "grad_norm": 0.10325378179550171, "learning_rate": 0.002, "loss": 2.3268, "step": 395250 }, { "epoch": 1.5279646209274635, "grad_norm": 0.09820621460676193, "learning_rate": 0.002, "loss": 2.3284, "step": 395260 }, { "epoch": 1.5280032781308468, "grad_norm": 0.17582575976848602, "learning_rate": 0.002, "loss": 2.3366, "step": 395270 }, { "epoch": 1.52804193533423, "grad_norm": 0.0954713225364685, "learning_rate": 0.002, "loss": 2.3368, "step": 395280 }, { "epoch": 1.5280805925376133, "grad_norm": 0.10267660766839981, "learning_rate": 0.002, "loss": 2.3452, "step": 395290 }, { "epoch": 1.5281192497409968, "grad_norm": 0.16547106206417084, "learning_rate": 0.002, "loss": 2.318, "step": 395300 }, { "epoch": 1.52815790694438, "grad_norm": 0.11562098562717438, "learning_rate": 0.002, "loss": 2.3301, "step": 395310 }, { "epoch": 1.5281965641477633, "grad_norm": 0.09530088305473328, "learning_rate": 0.002, "loss": 2.335, "step": 395320 }, { "epoch": 1.5282352213511465, "grad_norm": 0.10034076869487762, "learning_rate": 0.002, "loss": 2.3392, "step": 395330 }, { "epoch": 1.52827387855453, "grad_norm": 0.11918345093727112, "learning_rate": 0.002, "loss": 2.311, "step": 395340 }, { "epoch": 1.5283125357579133, "grad_norm": 0.11170210689306259, "learning_rate": 0.002, "loss": 2.3352, "step": 395350 }, { "epoch": 1.5283511929612965, "grad_norm": 0.09193076193332672, "learning_rate": 0.002, "loss": 2.3334, "step": 395360 }, { "epoch": 1.5283898501646798, "grad_norm": 0.10835543274879456, "learning_rate": 0.002, "loss": 2.3263, "step": 395370 }, { "epoch": 1.528428507368063, "grad_norm": 0.1091921404004097, "learning_rate": 0.002, "loss": 2.3214, "step": 395380 }, { "epoch": 1.5284671645714463, "grad_norm": 0.11564579606056213, "learning_rate": 0.002, "loss": 2.3215, "step": 395390 }, { "epoch": 1.5285058217748295, "grad_norm": 0.09592635929584503, "learning_rate": 0.002, "loss": 2.3311, "step": 395400 }, { "epoch": 1.5285444789782128, "grad_norm": 0.1146683543920517, "learning_rate": 0.002, "loss": 2.3424, "step": 395410 }, { "epoch": 1.528583136181596, "grad_norm": 0.10991352051496506, "learning_rate": 0.002, "loss": 2.3398, "step": 395420 }, { "epoch": 1.5286217933849793, "grad_norm": 0.08941232413053513, "learning_rate": 0.002, "loss": 2.3322, "step": 395430 }, { "epoch": 1.5286604505883625, "grad_norm": 0.10512373596429825, "learning_rate": 0.002, "loss": 2.3322, "step": 395440 }, { "epoch": 1.5286991077917458, "grad_norm": 0.10848497599363327, "learning_rate": 0.002, "loss": 2.3355, "step": 395450 }, { "epoch": 1.528737764995129, "grad_norm": 0.1271876096725464, "learning_rate": 0.002, "loss": 2.3384, "step": 395460 }, { "epoch": 1.5287764221985125, "grad_norm": 0.11504293978214264, "learning_rate": 0.002, "loss": 2.3413, "step": 395470 }, { "epoch": 1.5288150794018958, "grad_norm": 0.11468256264925003, "learning_rate": 0.002, "loss": 2.3468, "step": 395480 }, { "epoch": 1.528853736605279, "grad_norm": 0.1417941004037857, "learning_rate": 0.002, "loss": 2.3292, "step": 395490 }, { "epoch": 1.5288923938086623, "grad_norm": 0.09736809879541397, "learning_rate": 0.002, "loss": 2.3311, "step": 395500 }, { "epoch": 1.5289310510120457, "grad_norm": 0.09892872720956802, "learning_rate": 0.002, "loss": 2.3439, "step": 395510 }, { "epoch": 1.528969708215429, "grad_norm": 0.09419476240873337, "learning_rate": 0.002, "loss": 2.3539, "step": 395520 }, { "epoch": 1.5290083654188122, "grad_norm": 0.10254430770874023, "learning_rate": 0.002, "loss": 2.3259, "step": 395530 }, { "epoch": 1.5290470226221955, "grad_norm": 0.10115151852369308, "learning_rate": 0.002, "loss": 2.3387, "step": 395540 }, { "epoch": 1.5290856798255787, "grad_norm": 0.09870092570781708, "learning_rate": 0.002, "loss": 2.331, "step": 395550 }, { "epoch": 1.529124337028962, "grad_norm": 0.09946366399526596, "learning_rate": 0.002, "loss": 2.325, "step": 395560 }, { "epoch": 1.5291629942323453, "grad_norm": 0.09750016778707504, "learning_rate": 0.002, "loss": 2.335, "step": 395570 }, { "epoch": 1.5292016514357285, "grad_norm": 0.09980180114507675, "learning_rate": 0.002, "loss": 2.34, "step": 395580 }, { "epoch": 1.5292403086391118, "grad_norm": 0.10483616590499878, "learning_rate": 0.002, "loss": 2.3413, "step": 395590 }, { "epoch": 1.529278965842495, "grad_norm": 0.09471538662910461, "learning_rate": 0.002, "loss": 2.3278, "step": 395600 }, { "epoch": 1.5293176230458783, "grad_norm": 0.1223699301481247, "learning_rate": 0.002, "loss": 2.3241, "step": 395610 }, { "epoch": 1.5293562802492615, "grad_norm": 0.09756605327129364, "learning_rate": 0.002, "loss": 2.3329, "step": 395620 }, { "epoch": 1.5293949374526448, "grad_norm": 0.10068816691637039, "learning_rate": 0.002, "loss": 2.3332, "step": 395630 }, { "epoch": 1.5294335946560282, "grad_norm": 0.10722284764051437, "learning_rate": 0.002, "loss": 2.3284, "step": 395640 }, { "epoch": 1.5294722518594115, "grad_norm": 0.08979428559541702, "learning_rate": 0.002, "loss": 2.3303, "step": 395650 }, { "epoch": 1.5295109090627947, "grad_norm": 0.09775793552398682, "learning_rate": 0.002, "loss": 2.3328, "step": 395660 }, { "epoch": 1.529549566266178, "grad_norm": 0.10150935500860214, "learning_rate": 0.002, "loss": 2.3454, "step": 395670 }, { "epoch": 1.5295882234695615, "grad_norm": 0.1311400681734085, "learning_rate": 0.002, "loss": 2.3317, "step": 395680 }, { "epoch": 1.5296268806729447, "grad_norm": 0.11002326011657715, "learning_rate": 0.002, "loss": 2.3224, "step": 395690 }, { "epoch": 1.529665537876328, "grad_norm": 0.11130055040121078, "learning_rate": 0.002, "loss": 2.3252, "step": 395700 }, { "epoch": 1.5297041950797112, "grad_norm": 0.10423920303583145, "learning_rate": 0.002, "loss": 2.3325, "step": 395710 }, { "epoch": 1.5297428522830945, "grad_norm": 0.10642270743846893, "learning_rate": 0.002, "loss": 2.3191, "step": 395720 }, { "epoch": 1.5297815094864777, "grad_norm": 0.09561317414045334, "learning_rate": 0.002, "loss": 2.3335, "step": 395730 }, { "epoch": 1.529820166689861, "grad_norm": 0.10052410513162613, "learning_rate": 0.002, "loss": 2.3283, "step": 395740 }, { "epoch": 1.5298588238932442, "grad_norm": 0.12037631869316101, "learning_rate": 0.002, "loss": 2.342, "step": 395750 }, { "epoch": 1.5298974810966275, "grad_norm": 0.08717092871665955, "learning_rate": 0.002, "loss": 2.3287, "step": 395760 }, { "epoch": 1.5299361383000107, "grad_norm": 0.10462912172079086, "learning_rate": 0.002, "loss": 2.3141, "step": 395770 }, { "epoch": 1.529974795503394, "grad_norm": 0.09321984648704529, "learning_rate": 0.002, "loss": 2.3391, "step": 395780 }, { "epoch": 1.5300134527067772, "grad_norm": 0.39973539113998413, "learning_rate": 0.002, "loss": 2.3354, "step": 395790 }, { "epoch": 1.5300521099101605, "grad_norm": 0.1333955079317093, "learning_rate": 0.002, "loss": 2.3516, "step": 395800 }, { "epoch": 1.530090767113544, "grad_norm": 0.10980807989835739, "learning_rate": 0.002, "loss": 2.3544, "step": 395810 }, { "epoch": 1.5301294243169272, "grad_norm": 0.09195207804441452, "learning_rate": 0.002, "loss": 2.3324, "step": 395820 }, { "epoch": 1.5301680815203105, "grad_norm": 0.0882343202829361, "learning_rate": 0.002, "loss": 2.3318, "step": 395830 }, { "epoch": 1.5302067387236937, "grad_norm": 0.11924585700035095, "learning_rate": 0.002, "loss": 2.3281, "step": 395840 }, { "epoch": 1.5302453959270772, "grad_norm": 0.10046351701021194, "learning_rate": 0.002, "loss": 2.3186, "step": 395850 }, { "epoch": 1.5302840531304605, "grad_norm": 0.09576009958982468, "learning_rate": 0.002, "loss": 2.3441, "step": 395860 }, { "epoch": 1.5303227103338437, "grad_norm": 0.09291372448205948, "learning_rate": 0.002, "loss": 2.3273, "step": 395870 }, { "epoch": 1.530361367537227, "grad_norm": 0.12952889502048492, "learning_rate": 0.002, "loss": 2.3269, "step": 395880 }, { "epoch": 1.5304000247406102, "grad_norm": 0.11749241501092911, "learning_rate": 0.002, "loss": 2.3406, "step": 395890 }, { "epoch": 1.5304386819439935, "grad_norm": 0.10607244074344635, "learning_rate": 0.002, "loss": 2.3348, "step": 395900 }, { "epoch": 1.5304773391473767, "grad_norm": 0.1116856187582016, "learning_rate": 0.002, "loss": 2.337, "step": 395910 }, { "epoch": 1.53051599635076, "grad_norm": 0.10862328857183456, "learning_rate": 0.002, "loss": 2.3393, "step": 395920 }, { "epoch": 1.5305546535541432, "grad_norm": 0.10131102055311203, "learning_rate": 0.002, "loss": 2.3243, "step": 395930 }, { "epoch": 1.5305933107575265, "grad_norm": 0.17241306602954865, "learning_rate": 0.002, "loss": 2.3374, "step": 395940 }, { "epoch": 1.5306319679609097, "grad_norm": 0.09801255911588669, "learning_rate": 0.002, "loss": 2.3293, "step": 395950 }, { "epoch": 1.530670625164293, "grad_norm": 0.10967551171779633, "learning_rate": 0.002, "loss": 2.3186, "step": 395960 }, { "epoch": 1.5307092823676762, "grad_norm": 0.11190198361873627, "learning_rate": 0.002, "loss": 2.3369, "step": 395970 }, { "epoch": 1.5307479395710597, "grad_norm": 0.10620824247598648, "learning_rate": 0.002, "loss": 2.3214, "step": 395980 }, { "epoch": 1.530786596774443, "grad_norm": 0.09396962076425552, "learning_rate": 0.002, "loss": 2.3216, "step": 395990 }, { "epoch": 1.5308252539778262, "grad_norm": 0.10517092794179916, "learning_rate": 0.002, "loss": 2.3473, "step": 396000 }, { "epoch": 1.5308639111812095, "grad_norm": 0.09492594748735428, "learning_rate": 0.002, "loss": 2.3504, "step": 396010 }, { "epoch": 1.530902568384593, "grad_norm": 0.09977784752845764, "learning_rate": 0.002, "loss": 2.3251, "step": 396020 }, { "epoch": 1.5309412255879762, "grad_norm": 0.11173679679632187, "learning_rate": 0.002, "loss": 2.3263, "step": 396030 }, { "epoch": 1.5309798827913594, "grad_norm": 0.0971636101603508, "learning_rate": 0.002, "loss": 2.3328, "step": 396040 }, { "epoch": 1.5310185399947427, "grad_norm": 0.0979008898139, "learning_rate": 0.002, "loss": 2.3216, "step": 396050 }, { "epoch": 1.531057197198126, "grad_norm": 0.1309940665960312, "learning_rate": 0.002, "loss": 2.3365, "step": 396060 }, { "epoch": 1.5310958544015092, "grad_norm": 0.10882627218961716, "learning_rate": 0.002, "loss": 2.3186, "step": 396070 }, { "epoch": 1.5311345116048924, "grad_norm": 0.09825605899095535, "learning_rate": 0.002, "loss": 2.335, "step": 396080 }, { "epoch": 1.5311731688082757, "grad_norm": 0.10115810483694077, "learning_rate": 0.002, "loss": 2.3236, "step": 396090 }, { "epoch": 1.531211826011659, "grad_norm": 0.10364280641078949, "learning_rate": 0.002, "loss": 2.3226, "step": 396100 }, { "epoch": 1.5312504832150422, "grad_norm": 0.09675593674182892, "learning_rate": 0.002, "loss": 2.3346, "step": 396110 }, { "epoch": 1.5312891404184255, "grad_norm": 0.12637415528297424, "learning_rate": 0.002, "loss": 2.3596, "step": 396120 }, { "epoch": 1.5313277976218087, "grad_norm": 0.12695544958114624, "learning_rate": 0.002, "loss": 2.3424, "step": 396130 }, { "epoch": 1.5313664548251922, "grad_norm": 0.11234147101640701, "learning_rate": 0.002, "loss": 2.3344, "step": 396140 }, { "epoch": 1.5314051120285754, "grad_norm": 0.09313137084245682, "learning_rate": 0.002, "loss": 2.3353, "step": 396150 }, { "epoch": 1.5314437692319587, "grad_norm": 0.09089941531419754, "learning_rate": 0.002, "loss": 2.3221, "step": 396160 }, { "epoch": 1.531482426435342, "grad_norm": 0.09164874255657196, "learning_rate": 0.002, "loss": 2.3237, "step": 396170 }, { "epoch": 1.5315210836387252, "grad_norm": 0.10642282664775848, "learning_rate": 0.002, "loss": 2.3341, "step": 396180 }, { "epoch": 1.5315597408421087, "grad_norm": 0.11467402428388596, "learning_rate": 0.002, "loss": 2.3171, "step": 396190 }, { "epoch": 1.531598398045492, "grad_norm": 0.12948563694953918, "learning_rate": 0.002, "loss": 2.3101, "step": 396200 }, { "epoch": 1.5316370552488752, "grad_norm": 0.1263953000307083, "learning_rate": 0.002, "loss": 2.318, "step": 396210 }, { "epoch": 1.5316757124522584, "grad_norm": 0.10095341503620148, "learning_rate": 0.002, "loss": 2.329, "step": 396220 }, { "epoch": 1.5317143696556417, "grad_norm": 0.09533122181892395, "learning_rate": 0.002, "loss": 2.3314, "step": 396230 }, { "epoch": 1.531753026859025, "grad_norm": 0.10737673193216324, "learning_rate": 0.002, "loss": 2.3253, "step": 396240 }, { "epoch": 1.5317916840624082, "grad_norm": 0.12329886853694916, "learning_rate": 0.002, "loss": 2.3336, "step": 396250 }, { "epoch": 1.5318303412657914, "grad_norm": 0.11738262325525284, "learning_rate": 0.002, "loss": 2.3253, "step": 396260 }, { "epoch": 1.5318689984691747, "grad_norm": 0.1225343570113182, "learning_rate": 0.002, "loss": 2.3151, "step": 396270 }, { "epoch": 1.531907655672558, "grad_norm": 0.10273466259241104, "learning_rate": 0.002, "loss": 2.337, "step": 396280 }, { "epoch": 1.5319463128759412, "grad_norm": 0.10513811558485031, "learning_rate": 0.002, "loss": 2.3268, "step": 396290 }, { "epoch": 1.5319849700793244, "grad_norm": 0.12400317937135696, "learning_rate": 0.002, "loss": 2.339, "step": 396300 }, { "epoch": 1.532023627282708, "grad_norm": 0.12455243617296219, "learning_rate": 0.002, "loss": 2.3204, "step": 396310 }, { "epoch": 1.5320622844860912, "grad_norm": 0.20735898613929749, "learning_rate": 0.002, "loss": 2.3305, "step": 396320 }, { "epoch": 1.5321009416894744, "grad_norm": 0.10270749777555466, "learning_rate": 0.002, "loss": 2.3107, "step": 396330 }, { "epoch": 1.5321395988928577, "grad_norm": 0.12237662076950073, "learning_rate": 0.002, "loss": 2.3396, "step": 396340 }, { "epoch": 1.532178256096241, "grad_norm": 0.10366332530975342, "learning_rate": 0.002, "loss": 2.3257, "step": 396350 }, { "epoch": 1.5322169132996244, "grad_norm": 0.09420368820428848, "learning_rate": 0.002, "loss": 2.3404, "step": 396360 }, { "epoch": 1.5322555705030076, "grad_norm": 0.0979878306388855, "learning_rate": 0.002, "loss": 2.3387, "step": 396370 }, { "epoch": 1.532294227706391, "grad_norm": 0.10520835965871811, "learning_rate": 0.002, "loss": 2.3383, "step": 396380 }, { "epoch": 1.5323328849097742, "grad_norm": 0.0948127955198288, "learning_rate": 0.002, "loss": 2.3301, "step": 396390 }, { "epoch": 1.5323715421131574, "grad_norm": 0.11359903216362, "learning_rate": 0.002, "loss": 2.3412, "step": 396400 }, { "epoch": 1.5324101993165407, "grad_norm": 0.11875665932893753, "learning_rate": 0.002, "loss": 2.3371, "step": 396410 }, { "epoch": 1.532448856519924, "grad_norm": 0.1266757994890213, "learning_rate": 0.002, "loss": 2.3313, "step": 396420 }, { "epoch": 1.5324875137233072, "grad_norm": 0.10371547937393188, "learning_rate": 0.002, "loss": 2.3413, "step": 396430 }, { "epoch": 1.5325261709266904, "grad_norm": 0.10963073372840881, "learning_rate": 0.002, "loss": 2.3185, "step": 396440 }, { "epoch": 1.5325648281300737, "grad_norm": 0.0958590880036354, "learning_rate": 0.002, "loss": 2.3505, "step": 396450 }, { "epoch": 1.532603485333457, "grad_norm": 0.11888827383518219, "learning_rate": 0.002, "loss": 2.3289, "step": 396460 }, { "epoch": 1.5326421425368402, "grad_norm": 0.09254291653633118, "learning_rate": 0.002, "loss": 2.3416, "step": 396470 }, { "epoch": 1.5326807997402236, "grad_norm": 0.10755075514316559, "learning_rate": 0.002, "loss": 2.3271, "step": 396480 }, { "epoch": 1.532719456943607, "grad_norm": 0.1045212596654892, "learning_rate": 0.002, "loss": 2.3319, "step": 396490 }, { "epoch": 1.5327581141469901, "grad_norm": 0.1087140217423439, "learning_rate": 0.002, "loss": 2.3185, "step": 396500 }, { "epoch": 1.5327967713503734, "grad_norm": 0.0878644585609436, "learning_rate": 0.002, "loss": 2.3353, "step": 396510 }, { "epoch": 1.5328354285537569, "grad_norm": 0.12269987165927887, "learning_rate": 0.002, "loss": 2.3212, "step": 396520 }, { "epoch": 1.5328740857571401, "grad_norm": 0.11972922086715698, "learning_rate": 0.002, "loss": 2.3334, "step": 396530 }, { "epoch": 1.5329127429605234, "grad_norm": 0.11032579094171524, "learning_rate": 0.002, "loss": 2.3416, "step": 396540 }, { "epoch": 1.5329514001639066, "grad_norm": 0.11419215053319931, "learning_rate": 0.002, "loss": 2.3297, "step": 396550 }, { "epoch": 1.5329900573672899, "grad_norm": 0.10795775800943375, "learning_rate": 0.002, "loss": 2.3424, "step": 396560 }, { "epoch": 1.5330287145706731, "grad_norm": 0.105446957051754, "learning_rate": 0.002, "loss": 2.3336, "step": 396570 }, { "epoch": 1.5330673717740564, "grad_norm": 0.1169656291604042, "learning_rate": 0.002, "loss": 2.3237, "step": 396580 }, { "epoch": 1.5331060289774396, "grad_norm": 0.09742467850446701, "learning_rate": 0.002, "loss": 2.3275, "step": 396590 }, { "epoch": 1.533144686180823, "grad_norm": 0.12050811201334, "learning_rate": 0.002, "loss": 2.3372, "step": 396600 }, { "epoch": 1.5331833433842061, "grad_norm": 0.10963039100170135, "learning_rate": 0.002, "loss": 2.3236, "step": 396610 }, { "epoch": 1.5332220005875894, "grad_norm": 0.09053657948970795, "learning_rate": 0.002, "loss": 2.3218, "step": 396620 }, { "epoch": 1.5332606577909726, "grad_norm": 0.3095700442790985, "learning_rate": 0.002, "loss": 2.3344, "step": 396630 }, { "epoch": 1.533299314994356, "grad_norm": 0.09944092482328415, "learning_rate": 0.002, "loss": 2.3394, "step": 396640 }, { "epoch": 1.5333379721977394, "grad_norm": 0.1099468246102333, "learning_rate": 0.002, "loss": 2.3295, "step": 396650 }, { "epoch": 1.5333766294011226, "grad_norm": 0.0962558463215828, "learning_rate": 0.002, "loss": 2.3494, "step": 396660 }, { "epoch": 1.5334152866045059, "grad_norm": 0.12188946455717087, "learning_rate": 0.002, "loss": 2.3162, "step": 396670 }, { "epoch": 1.5334539438078891, "grad_norm": 0.1150522455573082, "learning_rate": 0.002, "loss": 2.3416, "step": 396680 }, { "epoch": 1.5334926010112726, "grad_norm": 0.10602287948131561, "learning_rate": 0.002, "loss": 2.3376, "step": 396690 }, { "epoch": 1.5335312582146559, "grad_norm": 0.09643089771270752, "learning_rate": 0.002, "loss": 2.3338, "step": 396700 }, { "epoch": 1.533569915418039, "grad_norm": 0.12320626527070999, "learning_rate": 0.002, "loss": 2.3198, "step": 396710 }, { "epoch": 1.5336085726214224, "grad_norm": 0.10964152216911316, "learning_rate": 0.002, "loss": 2.3442, "step": 396720 }, { "epoch": 1.5336472298248056, "grad_norm": 0.10293164104223251, "learning_rate": 0.002, "loss": 2.3398, "step": 396730 }, { "epoch": 1.5336858870281889, "grad_norm": 0.0910605862736702, "learning_rate": 0.002, "loss": 2.3399, "step": 396740 }, { "epoch": 1.5337245442315721, "grad_norm": 0.10125657171010971, "learning_rate": 0.002, "loss": 2.3496, "step": 396750 }, { "epoch": 1.5337632014349554, "grad_norm": 0.09557554870843887, "learning_rate": 0.002, "loss": 2.3323, "step": 396760 }, { "epoch": 1.5338018586383386, "grad_norm": 0.09974601864814758, "learning_rate": 0.002, "loss": 2.3329, "step": 396770 }, { "epoch": 1.5338405158417219, "grad_norm": 0.09233753383159637, "learning_rate": 0.002, "loss": 2.3413, "step": 396780 }, { "epoch": 1.5338791730451051, "grad_norm": 0.10616888850927353, "learning_rate": 0.002, "loss": 2.3331, "step": 396790 }, { "epoch": 1.5339178302484884, "grad_norm": 0.1244562566280365, "learning_rate": 0.002, "loss": 2.3411, "step": 396800 }, { "epoch": 1.5339564874518716, "grad_norm": 0.11564454436302185, "learning_rate": 0.002, "loss": 2.3332, "step": 396810 }, { "epoch": 1.533995144655255, "grad_norm": 0.12104111909866333, "learning_rate": 0.002, "loss": 2.3284, "step": 396820 }, { "epoch": 1.5340338018586384, "grad_norm": 0.10878870636224747, "learning_rate": 0.002, "loss": 2.3204, "step": 396830 }, { "epoch": 1.5340724590620216, "grad_norm": 0.10166596621274948, "learning_rate": 0.002, "loss": 2.3294, "step": 396840 }, { "epoch": 1.5341111162654049, "grad_norm": 0.10973954945802689, "learning_rate": 0.002, "loss": 2.3304, "step": 396850 }, { "epoch": 1.5341497734687883, "grad_norm": 0.09237612038850784, "learning_rate": 0.002, "loss": 2.3279, "step": 396860 }, { "epoch": 1.5341884306721716, "grad_norm": 0.0972457304596901, "learning_rate": 0.002, "loss": 2.3242, "step": 396870 }, { "epoch": 1.5342270878755548, "grad_norm": 0.10720162093639374, "learning_rate": 0.002, "loss": 2.3404, "step": 396880 }, { "epoch": 1.534265745078938, "grad_norm": 0.10426491498947144, "learning_rate": 0.002, "loss": 2.3194, "step": 396890 }, { "epoch": 1.5343044022823213, "grad_norm": 0.1059766411781311, "learning_rate": 0.002, "loss": 2.332, "step": 396900 }, { "epoch": 1.5343430594857046, "grad_norm": 0.09205926954746246, "learning_rate": 0.002, "loss": 2.3233, "step": 396910 }, { "epoch": 1.5343817166890878, "grad_norm": 0.09527570009231567, "learning_rate": 0.002, "loss": 2.3376, "step": 396920 }, { "epoch": 1.534420373892471, "grad_norm": 0.11123289912939072, "learning_rate": 0.002, "loss": 2.328, "step": 396930 }, { "epoch": 1.5344590310958544, "grad_norm": 0.12551504373550415, "learning_rate": 0.002, "loss": 2.3441, "step": 396940 }, { "epoch": 1.5344976882992376, "grad_norm": 0.09078148007392883, "learning_rate": 0.002, "loss": 2.3457, "step": 396950 }, { "epoch": 1.5345363455026209, "grad_norm": 0.09621188044548035, "learning_rate": 0.002, "loss": 2.3494, "step": 396960 }, { "epoch": 1.534575002706004, "grad_norm": 0.12395072728395462, "learning_rate": 0.002, "loss": 2.3269, "step": 396970 }, { "epoch": 1.5346136599093874, "grad_norm": 0.10399714857339859, "learning_rate": 0.002, "loss": 2.3392, "step": 396980 }, { "epoch": 1.5346523171127708, "grad_norm": 0.12199089676141739, "learning_rate": 0.002, "loss": 2.32, "step": 396990 }, { "epoch": 1.534690974316154, "grad_norm": 0.12163523584604263, "learning_rate": 0.002, "loss": 2.3284, "step": 397000 }, { "epoch": 1.5347296315195373, "grad_norm": 0.10492004454135895, "learning_rate": 0.002, "loss": 2.3449, "step": 397010 }, { "epoch": 1.5347682887229206, "grad_norm": 0.0947810560464859, "learning_rate": 0.002, "loss": 2.3591, "step": 397020 }, { "epoch": 1.534806945926304, "grad_norm": 0.10641422867774963, "learning_rate": 0.002, "loss": 2.317, "step": 397030 }, { "epoch": 1.5348456031296873, "grad_norm": 0.1084209531545639, "learning_rate": 0.002, "loss": 2.3268, "step": 397040 }, { "epoch": 1.5348842603330706, "grad_norm": 0.11186496913433075, "learning_rate": 0.002, "loss": 2.3306, "step": 397050 }, { "epoch": 1.5349229175364538, "grad_norm": 0.11108078062534332, "learning_rate": 0.002, "loss": 2.3267, "step": 397060 }, { "epoch": 1.534961574739837, "grad_norm": 0.08751664310693741, "learning_rate": 0.002, "loss": 2.3257, "step": 397070 }, { "epoch": 1.5350002319432203, "grad_norm": 0.13249637186527252, "learning_rate": 0.002, "loss": 2.3313, "step": 397080 }, { "epoch": 1.5350388891466036, "grad_norm": 0.1015789583325386, "learning_rate": 0.002, "loss": 2.3338, "step": 397090 }, { "epoch": 1.5350775463499868, "grad_norm": 0.10492010414600372, "learning_rate": 0.002, "loss": 2.3276, "step": 397100 }, { "epoch": 1.53511620355337, "grad_norm": 0.11029554158449173, "learning_rate": 0.002, "loss": 2.3258, "step": 397110 }, { "epoch": 1.5351548607567533, "grad_norm": 0.0864063948392868, "learning_rate": 0.002, "loss": 2.327, "step": 397120 }, { "epoch": 1.5351935179601366, "grad_norm": 0.11806897073984146, "learning_rate": 0.002, "loss": 2.3249, "step": 397130 }, { "epoch": 1.5352321751635198, "grad_norm": 0.11673596501350403, "learning_rate": 0.002, "loss": 2.3503, "step": 397140 }, { "epoch": 1.535270832366903, "grad_norm": 0.10795406252145767, "learning_rate": 0.002, "loss": 2.3231, "step": 397150 }, { "epoch": 1.5353094895702866, "grad_norm": 0.11805335432291031, "learning_rate": 0.002, "loss": 2.3347, "step": 397160 }, { "epoch": 1.5353481467736698, "grad_norm": 0.10892137140035629, "learning_rate": 0.002, "loss": 2.3262, "step": 397170 }, { "epoch": 1.535386803977053, "grad_norm": 0.1233871653676033, "learning_rate": 0.002, "loss": 2.3197, "step": 397180 }, { "epoch": 1.5354254611804363, "grad_norm": 0.26964542269706726, "learning_rate": 0.002, "loss": 2.3348, "step": 397190 }, { "epoch": 1.5354641183838198, "grad_norm": 0.09856158494949341, "learning_rate": 0.002, "loss": 2.3415, "step": 397200 }, { "epoch": 1.535502775587203, "grad_norm": 0.12949031591415405, "learning_rate": 0.002, "loss": 2.3326, "step": 397210 }, { "epoch": 1.5355414327905863, "grad_norm": 0.11520420014858246, "learning_rate": 0.002, "loss": 2.3325, "step": 397220 }, { "epoch": 1.5355800899939696, "grad_norm": 0.1231561005115509, "learning_rate": 0.002, "loss": 2.3224, "step": 397230 }, { "epoch": 1.5356187471973528, "grad_norm": 0.1025674045085907, "learning_rate": 0.002, "loss": 2.3317, "step": 397240 }, { "epoch": 1.535657404400736, "grad_norm": 0.10563423484563828, "learning_rate": 0.002, "loss": 2.3384, "step": 397250 }, { "epoch": 1.5356960616041193, "grad_norm": 0.11561419069766998, "learning_rate": 0.002, "loss": 2.3288, "step": 397260 }, { "epoch": 1.5357347188075026, "grad_norm": 0.09945831447839737, "learning_rate": 0.002, "loss": 2.3488, "step": 397270 }, { "epoch": 1.5357733760108858, "grad_norm": 0.11000964045524597, "learning_rate": 0.002, "loss": 2.3285, "step": 397280 }, { "epoch": 1.535812033214269, "grad_norm": 0.11010735481977463, "learning_rate": 0.002, "loss": 2.3411, "step": 397290 }, { "epoch": 1.5358506904176523, "grad_norm": 0.11759929358959198, "learning_rate": 0.002, "loss": 2.3264, "step": 397300 }, { "epoch": 1.5358893476210356, "grad_norm": 0.10051693022251129, "learning_rate": 0.002, "loss": 2.3348, "step": 397310 }, { "epoch": 1.5359280048244188, "grad_norm": 0.10334565490484238, "learning_rate": 0.002, "loss": 2.3275, "step": 397320 }, { "epoch": 1.5359666620278023, "grad_norm": 0.11563760787248611, "learning_rate": 0.002, "loss": 2.3299, "step": 397330 }, { "epoch": 1.5360053192311856, "grad_norm": 0.1098022386431694, "learning_rate": 0.002, "loss": 2.3434, "step": 397340 }, { "epoch": 1.5360439764345688, "grad_norm": 0.10105536133050919, "learning_rate": 0.002, "loss": 2.3279, "step": 397350 }, { "epoch": 1.536082633637952, "grad_norm": 0.09732984006404877, "learning_rate": 0.002, "loss": 2.3397, "step": 397360 }, { "epoch": 1.5361212908413355, "grad_norm": 0.10375712811946869, "learning_rate": 0.002, "loss": 2.3327, "step": 397370 }, { "epoch": 1.5361599480447188, "grad_norm": 0.13661228120326996, "learning_rate": 0.002, "loss": 2.3512, "step": 397380 }, { "epoch": 1.536198605248102, "grad_norm": 0.09348346292972565, "learning_rate": 0.002, "loss": 2.3414, "step": 397390 }, { "epoch": 1.5362372624514853, "grad_norm": 0.10297410190105438, "learning_rate": 0.002, "loss": 2.3275, "step": 397400 }, { "epoch": 1.5362759196548685, "grad_norm": 0.10874597728252411, "learning_rate": 0.002, "loss": 2.3299, "step": 397410 }, { "epoch": 1.5363145768582518, "grad_norm": 0.12305815517902374, "learning_rate": 0.002, "loss": 2.3135, "step": 397420 }, { "epoch": 1.536353234061635, "grad_norm": 0.09961279481649399, "learning_rate": 0.002, "loss": 2.3346, "step": 397430 }, { "epoch": 1.5363918912650183, "grad_norm": 0.0989786684513092, "learning_rate": 0.002, "loss": 2.3328, "step": 397440 }, { "epoch": 1.5364305484684015, "grad_norm": 0.10146909207105637, "learning_rate": 0.002, "loss": 2.3355, "step": 397450 }, { "epoch": 1.5364692056717848, "grad_norm": 0.10440550744533539, "learning_rate": 0.002, "loss": 2.3449, "step": 397460 }, { "epoch": 1.536507862875168, "grad_norm": 0.11641864478588104, "learning_rate": 0.002, "loss": 2.3211, "step": 397470 }, { "epoch": 1.5365465200785513, "grad_norm": 0.0965433195233345, "learning_rate": 0.002, "loss": 2.3154, "step": 397480 }, { "epoch": 1.5365851772819346, "grad_norm": 0.1075579971075058, "learning_rate": 0.002, "loss": 2.3489, "step": 397490 }, { "epoch": 1.536623834485318, "grad_norm": 0.09854072332382202, "learning_rate": 0.002, "loss": 2.3349, "step": 397500 }, { "epoch": 1.5366624916887013, "grad_norm": 0.08956795185804367, "learning_rate": 0.002, "loss": 2.3406, "step": 397510 }, { "epoch": 1.5367011488920845, "grad_norm": 0.11217138916254044, "learning_rate": 0.002, "loss": 2.3356, "step": 397520 }, { "epoch": 1.5367398060954678, "grad_norm": 0.11101838946342468, "learning_rate": 0.002, "loss": 2.319, "step": 397530 }, { "epoch": 1.5367784632988513, "grad_norm": 0.10932407528162003, "learning_rate": 0.002, "loss": 2.3344, "step": 397540 }, { "epoch": 1.5368171205022345, "grad_norm": 0.10210220515727997, "learning_rate": 0.002, "loss": 2.3275, "step": 397550 }, { "epoch": 1.5368557777056178, "grad_norm": 0.11524409055709839, "learning_rate": 0.002, "loss": 2.3266, "step": 397560 }, { "epoch": 1.536894434909001, "grad_norm": 0.1281406432390213, "learning_rate": 0.002, "loss": 2.3242, "step": 397570 }, { "epoch": 1.5369330921123843, "grad_norm": 0.09939172863960266, "learning_rate": 0.002, "loss": 2.3295, "step": 397580 }, { "epoch": 1.5369717493157675, "grad_norm": 0.09247737377882004, "learning_rate": 0.002, "loss": 2.337, "step": 397590 }, { "epoch": 1.5370104065191508, "grad_norm": 0.09294746816158295, "learning_rate": 0.002, "loss": 2.3258, "step": 397600 }, { "epoch": 1.537049063722534, "grad_norm": 0.13566049933433533, "learning_rate": 0.002, "loss": 2.3339, "step": 397610 }, { "epoch": 1.5370877209259173, "grad_norm": 0.10132917016744614, "learning_rate": 0.002, "loss": 2.3267, "step": 397620 }, { "epoch": 1.5371263781293005, "grad_norm": 0.09911826252937317, "learning_rate": 0.002, "loss": 2.3391, "step": 397630 }, { "epoch": 1.5371650353326838, "grad_norm": 0.0962437316775322, "learning_rate": 0.002, "loss": 2.3153, "step": 397640 }, { "epoch": 1.537203692536067, "grad_norm": 0.11543872952461243, "learning_rate": 0.002, "loss": 2.3324, "step": 397650 }, { "epoch": 1.5372423497394503, "grad_norm": 0.10791458934545517, "learning_rate": 0.002, "loss": 2.3399, "step": 397660 }, { "epoch": 1.5372810069428338, "grad_norm": 0.09911391139030457, "learning_rate": 0.002, "loss": 2.3562, "step": 397670 }, { "epoch": 1.537319664146217, "grad_norm": 0.10981152206659317, "learning_rate": 0.002, "loss": 2.3265, "step": 397680 }, { "epoch": 1.5373583213496003, "grad_norm": 0.10167837142944336, "learning_rate": 0.002, "loss": 2.3394, "step": 397690 }, { "epoch": 1.5373969785529835, "grad_norm": 0.09902778267860413, "learning_rate": 0.002, "loss": 2.3432, "step": 397700 }, { "epoch": 1.537435635756367, "grad_norm": 0.09959257394075394, "learning_rate": 0.002, "loss": 2.3342, "step": 397710 }, { "epoch": 1.5374742929597502, "grad_norm": 0.1057918518781662, "learning_rate": 0.002, "loss": 2.3369, "step": 397720 }, { "epoch": 1.5375129501631335, "grad_norm": 0.13278275728225708, "learning_rate": 0.002, "loss": 2.3358, "step": 397730 }, { "epoch": 1.5375516073665167, "grad_norm": 0.11418651789426804, "learning_rate": 0.002, "loss": 2.3437, "step": 397740 }, { "epoch": 1.5375902645699, "grad_norm": 0.10052768886089325, "learning_rate": 0.002, "loss": 2.3315, "step": 397750 }, { "epoch": 1.5376289217732833, "grad_norm": 0.09811482578516006, "learning_rate": 0.002, "loss": 2.3265, "step": 397760 }, { "epoch": 1.5376675789766665, "grad_norm": 0.15639714896678925, "learning_rate": 0.002, "loss": 2.3134, "step": 397770 }, { "epoch": 1.5377062361800498, "grad_norm": 0.10803427547216415, "learning_rate": 0.002, "loss": 2.3346, "step": 397780 }, { "epoch": 1.537744893383433, "grad_norm": 0.09680962562561035, "learning_rate": 0.002, "loss": 2.3347, "step": 397790 }, { "epoch": 1.5377835505868163, "grad_norm": 0.10290376096963882, "learning_rate": 0.002, "loss": 2.3289, "step": 397800 }, { "epoch": 1.5378222077901995, "grad_norm": 0.09839563071727753, "learning_rate": 0.002, "loss": 2.3365, "step": 397810 }, { "epoch": 1.5378608649935828, "grad_norm": 0.08913205564022064, "learning_rate": 0.002, "loss": 2.3155, "step": 397820 }, { "epoch": 1.537899522196966, "grad_norm": 0.10999598354101181, "learning_rate": 0.002, "loss": 2.3319, "step": 397830 }, { "epoch": 1.5379381794003495, "grad_norm": 0.13350215554237366, "learning_rate": 0.002, "loss": 2.3356, "step": 397840 }, { "epoch": 1.5379768366037327, "grad_norm": 0.10599138587713242, "learning_rate": 0.002, "loss": 2.3391, "step": 397850 }, { "epoch": 1.538015493807116, "grad_norm": 0.12027101218700409, "learning_rate": 0.002, "loss": 2.3459, "step": 397860 }, { "epoch": 1.5380541510104992, "grad_norm": 0.09793560951948166, "learning_rate": 0.002, "loss": 2.3278, "step": 397870 }, { "epoch": 1.5380928082138827, "grad_norm": 0.09150359034538269, "learning_rate": 0.002, "loss": 2.3541, "step": 397880 }, { "epoch": 1.538131465417266, "grad_norm": 0.10976289212703705, "learning_rate": 0.002, "loss": 2.3281, "step": 397890 }, { "epoch": 1.5381701226206492, "grad_norm": 0.10149134695529938, "learning_rate": 0.002, "loss": 2.3348, "step": 397900 }, { "epoch": 1.5382087798240325, "grad_norm": 0.10179386287927628, "learning_rate": 0.002, "loss": 2.3501, "step": 397910 }, { "epoch": 1.5382474370274157, "grad_norm": 0.09850426018238068, "learning_rate": 0.002, "loss": 2.3328, "step": 397920 }, { "epoch": 1.538286094230799, "grad_norm": 0.11354361474514008, "learning_rate": 0.002, "loss": 2.3258, "step": 397930 }, { "epoch": 1.5383247514341822, "grad_norm": 0.11222802102565765, "learning_rate": 0.002, "loss": 2.331, "step": 397940 }, { "epoch": 1.5383634086375655, "grad_norm": 0.10113963484764099, "learning_rate": 0.002, "loss": 2.3443, "step": 397950 }, { "epoch": 1.5384020658409487, "grad_norm": 0.09058237820863724, "learning_rate": 0.002, "loss": 2.3398, "step": 397960 }, { "epoch": 1.538440723044332, "grad_norm": 0.09591413289308548, "learning_rate": 0.002, "loss": 2.3405, "step": 397970 }, { "epoch": 1.5384793802477152, "grad_norm": 0.10284294188022614, "learning_rate": 0.002, "loss": 2.3436, "step": 397980 }, { "epoch": 1.5385180374510985, "grad_norm": 0.11103425920009613, "learning_rate": 0.002, "loss": 2.328, "step": 397990 }, { "epoch": 1.538556694654482, "grad_norm": 0.10229439288377762, "learning_rate": 0.002, "loss": 2.3475, "step": 398000 }, { "epoch": 1.5385953518578652, "grad_norm": 0.10271711647510529, "learning_rate": 0.002, "loss": 2.3454, "step": 398010 }, { "epoch": 1.5386340090612485, "grad_norm": 0.11383017897605896, "learning_rate": 0.002, "loss": 2.3114, "step": 398020 }, { "epoch": 1.5386726662646317, "grad_norm": 0.09810294210910797, "learning_rate": 0.002, "loss": 2.3401, "step": 398030 }, { "epoch": 1.538711323468015, "grad_norm": 0.10992488265037537, "learning_rate": 0.002, "loss": 2.322, "step": 398040 }, { "epoch": 1.5387499806713985, "grad_norm": 0.12305788695812225, "learning_rate": 0.002, "loss": 2.3269, "step": 398050 }, { "epoch": 1.5387886378747817, "grad_norm": 0.10567454993724823, "learning_rate": 0.002, "loss": 2.3249, "step": 398060 }, { "epoch": 1.538827295078165, "grad_norm": 0.10405563563108444, "learning_rate": 0.002, "loss": 2.3474, "step": 398070 }, { "epoch": 1.5388659522815482, "grad_norm": 0.115367092192173, "learning_rate": 0.002, "loss": 2.3236, "step": 398080 }, { "epoch": 1.5389046094849315, "grad_norm": 0.11122714728116989, "learning_rate": 0.002, "loss": 2.3223, "step": 398090 }, { "epoch": 1.5389432666883147, "grad_norm": 0.09935643523931503, "learning_rate": 0.002, "loss": 2.3298, "step": 398100 }, { "epoch": 1.538981923891698, "grad_norm": 0.10723993182182312, "learning_rate": 0.002, "loss": 2.3223, "step": 398110 }, { "epoch": 1.5390205810950812, "grad_norm": 0.09951582551002502, "learning_rate": 0.002, "loss": 2.3373, "step": 398120 }, { "epoch": 1.5390592382984645, "grad_norm": 0.12732456624507904, "learning_rate": 0.002, "loss": 2.3147, "step": 398130 }, { "epoch": 1.5390978955018477, "grad_norm": 0.1021832600235939, "learning_rate": 0.002, "loss": 2.3248, "step": 398140 }, { "epoch": 1.539136552705231, "grad_norm": 0.11529269069433212, "learning_rate": 0.002, "loss": 2.336, "step": 398150 }, { "epoch": 1.5391752099086142, "grad_norm": 0.14958423376083374, "learning_rate": 0.002, "loss": 2.3253, "step": 398160 }, { "epoch": 1.5392138671119977, "grad_norm": 0.09263918548822403, "learning_rate": 0.002, "loss": 2.3396, "step": 398170 }, { "epoch": 1.539252524315381, "grad_norm": 0.1017642617225647, "learning_rate": 0.002, "loss": 2.3296, "step": 398180 }, { "epoch": 1.5392911815187642, "grad_norm": 0.09107334166765213, "learning_rate": 0.002, "loss": 2.3184, "step": 398190 }, { "epoch": 1.5393298387221475, "grad_norm": 0.1224852204322815, "learning_rate": 0.002, "loss": 2.3286, "step": 398200 }, { "epoch": 1.5393684959255307, "grad_norm": 0.10259949415922165, "learning_rate": 0.002, "loss": 2.3319, "step": 398210 }, { "epoch": 1.5394071531289142, "grad_norm": 0.1043364554643631, "learning_rate": 0.002, "loss": 2.3301, "step": 398220 }, { "epoch": 1.5394458103322974, "grad_norm": 0.1084597185254097, "learning_rate": 0.002, "loss": 2.3208, "step": 398230 }, { "epoch": 1.5394844675356807, "grad_norm": 0.10298971086740494, "learning_rate": 0.002, "loss": 2.3357, "step": 398240 }, { "epoch": 1.539523124739064, "grad_norm": 0.10925693809986115, "learning_rate": 0.002, "loss": 2.3168, "step": 398250 }, { "epoch": 1.5395617819424472, "grad_norm": 0.09399773925542831, "learning_rate": 0.002, "loss": 2.3268, "step": 398260 }, { "epoch": 1.5396004391458304, "grad_norm": 0.15015877783298492, "learning_rate": 0.002, "loss": 2.3459, "step": 398270 }, { "epoch": 1.5396390963492137, "grad_norm": 0.0967710092663765, "learning_rate": 0.002, "loss": 2.3244, "step": 398280 }, { "epoch": 1.539677753552597, "grad_norm": 0.09569672495126724, "learning_rate": 0.002, "loss": 2.3211, "step": 398290 }, { "epoch": 1.5397164107559802, "grad_norm": 0.09085290133953094, "learning_rate": 0.002, "loss": 2.3334, "step": 398300 }, { "epoch": 1.5397550679593635, "grad_norm": 0.109792560338974, "learning_rate": 0.002, "loss": 2.3402, "step": 398310 }, { "epoch": 1.5397937251627467, "grad_norm": 0.11429277807474136, "learning_rate": 0.002, "loss": 2.3374, "step": 398320 }, { "epoch": 1.53983238236613, "grad_norm": 0.10654623806476593, "learning_rate": 0.002, "loss": 2.3293, "step": 398330 }, { "epoch": 1.5398710395695134, "grad_norm": 0.09431030601263046, "learning_rate": 0.002, "loss": 2.3198, "step": 398340 }, { "epoch": 1.5399096967728967, "grad_norm": 0.10289266705513, "learning_rate": 0.002, "loss": 2.3355, "step": 398350 }, { "epoch": 1.53994835397628, "grad_norm": 0.0963275283575058, "learning_rate": 0.002, "loss": 2.3384, "step": 398360 }, { "epoch": 1.5399870111796632, "grad_norm": 0.12232542783021927, "learning_rate": 0.002, "loss": 2.3416, "step": 398370 }, { "epoch": 1.5400256683830464, "grad_norm": 0.09303968399763107, "learning_rate": 0.002, "loss": 2.3432, "step": 398380 }, { "epoch": 1.54006432558643, "grad_norm": 0.10578244924545288, "learning_rate": 0.002, "loss": 2.3317, "step": 398390 }, { "epoch": 1.5401029827898132, "grad_norm": 0.0913250744342804, "learning_rate": 0.002, "loss": 2.3339, "step": 398400 }, { "epoch": 1.5401416399931964, "grad_norm": 0.10749612748622894, "learning_rate": 0.002, "loss": 2.3188, "step": 398410 }, { "epoch": 1.5401802971965797, "grad_norm": 0.09793798625469208, "learning_rate": 0.002, "loss": 2.3232, "step": 398420 }, { "epoch": 1.540218954399963, "grad_norm": 0.09848188608884811, "learning_rate": 0.002, "loss": 2.3244, "step": 398430 }, { "epoch": 1.5402576116033462, "grad_norm": 0.10011995583772659, "learning_rate": 0.002, "loss": 2.3371, "step": 398440 }, { "epoch": 1.5402962688067294, "grad_norm": 0.08867098391056061, "learning_rate": 0.002, "loss": 2.3331, "step": 398450 }, { "epoch": 1.5403349260101127, "grad_norm": 0.08888798952102661, "learning_rate": 0.002, "loss": 2.3189, "step": 398460 }, { "epoch": 1.540373583213496, "grad_norm": 0.09818118065595627, "learning_rate": 0.002, "loss": 2.3452, "step": 398470 }, { "epoch": 1.5404122404168792, "grad_norm": 0.09578051418066025, "learning_rate": 0.002, "loss": 2.3285, "step": 398480 }, { "epoch": 1.5404508976202624, "grad_norm": 0.10496256500482559, "learning_rate": 0.002, "loss": 2.3167, "step": 398490 }, { "epoch": 1.5404895548236457, "grad_norm": 0.09453893452882767, "learning_rate": 0.002, "loss": 2.3329, "step": 398500 }, { "epoch": 1.5405282120270292, "grad_norm": 0.09974279999732971, "learning_rate": 0.002, "loss": 2.32, "step": 398510 }, { "epoch": 1.5405668692304124, "grad_norm": 0.09207332134246826, "learning_rate": 0.002, "loss": 2.3227, "step": 398520 }, { "epoch": 1.5406055264337957, "grad_norm": 0.15341641008853912, "learning_rate": 0.002, "loss": 2.3243, "step": 398530 }, { "epoch": 1.540644183637179, "grad_norm": 0.10787559300661087, "learning_rate": 0.002, "loss": 2.3523, "step": 398540 }, { "epoch": 1.5406828408405624, "grad_norm": 0.09427941590547562, "learning_rate": 0.002, "loss": 2.3246, "step": 398550 }, { "epoch": 1.5407214980439456, "grad_norm": 0.13333727419376373, "learning_rate": 0.002, "loss": 2.3325, "step": 398560 }, { "epoch": 1.540760155247329, "grad_norm": 0.11192180216312408, "learning_rate": 0.002, "loss": 2.3254, "step": 398570 }, { "epoch": 1.5407988124507122, "grad_norm": 0.10843715071678162, "learning_rate": 0.002, "loss": 2.3125, "step": 398580 }, { "epoch": 1.5408374696540954, "grad_norm": 0.10502120107412338, "learning_rate": 0.002, "loss": 2.3368, "step": 398590 }, { "epoch": 1.5408761268574787, "grad_norm": 0.10552159696817398, "learning_rate": 0.002, "loss": 2.3223, "step": 398600 }, { "epoch": 1.540914784060862, "grad_norm": 0.11767709255218506, "learning_rate": 0.002, "loss": 2.3246, "step": 398610 }, { "epoch": 1.5409534412642452, "grad_norm": 0.12128245830535889, "learning_rate": 0.002, "loss": 2.3441, "step": 398620 }, { "epoch": 1.5409920984676284, "grad_norm": 0.10676980763673782, "learning_rate": 0.002, "loss": 2.3164, "step": 398630 }, { "epoch": 1.5410307556710117, "grad_norm": 0.08841355890035629, "learning_rate": 0.002, "loss": 2.3399, "step": 398640 }, { "epoch": 1.541069412874395, "grad_norm": 0.11226193606853485, "learning_rate": 0.002, "loss": 2.3419, "step": 398650 }, { "epoch": 1.5411080700777782, "grad_norm": 0.13144473731517792, "learning_rate": 0.002, "loss": 2.3453, "step": 398660 }, { "epoch": 1.5411467272811614, "grad_norm": 0.12197545170783997, "learning_rate": 0.002, "loss": 2.3466, "step": 398670 }, { "epoch": 1.541185384484545, "grad_norm": 0.10014840960502625, "learning_rate": 0.002, "loss": 2.3252, "step": 398680 }, { "epoch": 1.5412240416879281, "grad_norm": 0.11811138689517975, "learning_rate": 0.002, "loss": 2.3298, "step": 398690 }, { "epoch": 1.5412626988913114, "grad_norm": 0.10400564223527908, "learning_rate": 0.002, "loss": 2.3255, "step": 398700 }, { "epoch": 1.5413013560946947, "grad_norm": 0.1099502369761467, "learning_rate": 0.002, "loss": 2.3338, "step": 398710 }, { "epoch": 1.5413400132980781, "grad_norm": 0.10907261818647385, "learning_rate": 0.002, "loss": 2.3279, "step": 398720 }, { "epoch": 1.5413786705014614, "grad_norm": 0.11199428141117096, "learning_rate": 0.002, "loss": 2.3119, "step": 398730 }, { "epoch": 1.5414173277048446, "grad_norm": 0.10097459703683853, "learning_rate": 0.002, "loss": 2.3248, "step": 398740 }, { "epoch": 1.5414559849082279, "grad_norm": 0.09460058063268661, "learning_rate": 0.002, "loss": 2.3223, "step": 398750 }, { "epoch": 1.5414946421116111, "grad_norm": 0.13177794218063354, "learning_rate": 0.002, "loss": 2.3392, "step": 398760 }, { "epoch": 1.5415332993149944, "grad_norm": 0.11397556215524673, "learning_rate": 0.002, "loss": 2.3207, "step": 398770 }, { "epoch": 1.5415719565183776, "grad_norm": 0.09486106038093567, "learning_rate": 0.002, "loss": 2.317, "step": 398780 }, { "epoch": 1.541610613721761, "grad_norm": 0.11118344962596893, "learning_rate": 0.002, "loss": 2.3234, "step": 398790 }, { "epoch": 1.5416492709251441, "grad_norm": 0.13064731657505035, "learning_rate": 0.002, "loss": 2.3297, "step": 398800 }, { "epoch": 1.5416879281285274, "grad_norm": 0.10388616472482681, "learning_rate": 0.002, "loss": 2.3415, "step": 398810 }, { "epoch": 1.5417265853319106, "grad_norm": 0.12096244096755981, "learning_rate": 0.002, "loss": 2.3358, "step": 398820 }, { "epoch": 1.541765242535294, "grad_norm": 0.10955790430307388, "learning_rate": 0.002, "loss": 2.3306, "step": 398830 }, { "epoch": 1.5418038997386772, "grad_norm": 0.13898706436157227, "learning_rate": 0.002, "loss": 2.331, "step": 398840 }, { "epoch": 1.5418425569420606, "grad_norm": 0.09149462729692459, "learning_rate": 0.002, "loss": 2.3139, "step": 398850 }, { "epoch": 1.5418812141454439, "grad_norm": 0.12002673745155334, "learning_rate": 0.002, "loss": 2.3387, "step": 398860 }, { "epoch": 1.5419198713488271, "grad_norm": 0.09715425968170166, "learning_rate": 0.002, "loss": 2.3403, "step": 398870 }, { "epoch": 1.5419585285522104, "grad_norm": 0.11434023827314377, "learning_rate": 0.002, "loss": 2.327, "step": 398880 }, { "epoch": 1.5419971857555939, "grad_norm": 0.10705401003360748, "learning_rate": 0.002, "loss": 2.3436, "step": 398890 }, { "epoch": 1.542035842958977, "grad_norm": 0.10347422957420349, "learning_rate": 0.002, "loss": 2.3118, "step": 398900 }, { "epoch": 1.5420745001623604, "grad_norm": 0.10077045857906342, "learning_rate": 0.002, "loss": 2.3259, "step": 398910 }, { "epoch": 1.5421131573657436, "grad_norm": 0.11980032175779343, "learning_rate": 0.002, "loss": 2.3213, "step": 398920 }, { "epoch": 1.5421518145691269, "grad_norm": 0.08976440131664276, "learning_rate": 0.002, "loss": 2.328, "step": 398930 }, { "epoch": 1.5421904717725101, "grad_norm": 0.08498923480510712, "learning_rate": 0.002, "loss": 2.3384, "step": 398940 }, { "epoch": 1.5422291289758934, "grad_norm": 0.10932603478431702, "learning_rate": 0.002, "loss": 2.3112, "step": 398950 }, { "epoch": 1.5422677861792766, "grad_norm": 0.10000132769346237, "learning_rate": 0.002, "loss": 2.3389, "step": 398960 }, { "epoch": 1.5423064433826599, "grad_norm": 0.09836205095052719, "learning_rate": 0.002, "loss": 2.3269, "step": 398970 }, { "epoch": 1.5423451005860431, "grad_norm": 0.10029017180204391, "learning_rate": 0.002, "loss": 2.34, "step": 398980 }, { "epoch": 1.5423837577894264, "grad_norm": 0.10624997317790985, "learning_rate": 0.002, "loss": 2.3305, "step": 398990 }, { "epoch": 1.5424224149928096, "grad_norm": 0.1110740602016449, "learning_rate": 0.002, "loss": 2.3293, "step": 399000 }, { "epoch": 1.5424610721961929, "grad_norm": 0.09967368841171265, "learning_rate": 0.002, "loss": 2.3513, "step": 399010 }, { "epoch": 1.5424997293995764, "grad_norm": 0.08127487450838089, "learning_rate": 0.002, "loss": 2.3261, "step": 399020 }, { "epoch": 1.5425383866029596, "grad_norm": 0.11044812202453613, "learning_rate": 0.002, "loss": 2.3247, "step": 399030 }, { "epoch": 1.5425770438063429, "grad_norm": 0.10314445942640305, "learning_rate": 0.002, "loss": 2.3422, "step": 399040 }, { "epoch": 1.5426157010097261, "grad_norm": 0.24173246324062347, "learning_rate": 0.002, "loss": 2.3288, "step": 399050 }, { "epoch": 1.5426543582131096, "grad_norm": 0.1080498993396759, "learning_rate": 0.002, "loss": 2.335, "step": 399060 }, { "epoch": 1.5426930154164928, "grad_norm": 0.12131674587726593, "learning_rate": 0.002, "loss": 2.3251, "step": 399070 }, { "epoch": 1.542731672619876, "grad_norm": 0.09877556562423706, "learning_rate": 0.002, "loss": 2.3221, "step": 399080 }, { "epoch": 1.5427703298232593, "grad_norm": 0.10076315701007843, "learning_rate": 0.002, "loss": 2.3318, "step": 399090 }, { "epoch": 1.5428089870266426, "grad_norm": 0.08925936371088028, "learning_rate": 0.002, "loss": 2.3218, "step": 399100 }, { "epoch": 1.5428476442300258, "grad_norm": 0.12738700211048126, "learning_rate": 0.002, "loss": 2.3305, "step": 399110 }, { "epoch": 1.542886301433409, "grad_norm": 0.10984507948160172, "learning_rate": 0.002, "loss": 2.3205, "step": 399120 }, { "epoch": 1.5429249586367924, "grad_norm": 0.18421679735183716, "learning_rate": 0.002, "loss": 2.3249, "step": 399130 }, { "epoch": 1.5429636158401756, "grad_norm": 0.11120090633630753, "learning_rate": 0.002, "loss": 2.3409, "step": 399140 }, { "epoch": 1.5430022730435589, "grad_norm": 0.09735604375600815, "learning_rate": 0.002, "loss": 2.3463, "step": 399150 }, { "epoch": 1.543040930246942, "grad_norm": 0.09944828599691391, "learning_rate": 0.002, "loss": 2.3432, "step": 399160 }, { "epoch": 1.5430795874503254, "grad_norm": 0.14178155362606049, "learning_rate": 0.002, "loss": 2.3545, "step": 399170 }, { "epoch": 1.5431182446537086, "grad_norm": 0.09563390910625458, "learning_rate": 0.002, "loss": 2.3227, "step": 399180 }, { "epoch": 1.543156901857092, "grad_norm": 0.12702666223049164, "learning_rate": 0.002, "loss": 2.3496, "step": 399190 }, { "epoch": 1.5431955590604753, "grad_norm": 0.1233954131603241, "learning_rate": 0.002, "loss": 2.3362, "step": 399200 }, { "epoch": 1.5432342162638586, "grad_norm": 0.09861844778060913, "learning_rate": 0.002, "loss": 2.3483, "step": 399210 }, { "epoch": 1.5432728734672418, "grad_norm": 0.37939682602882385, "learning_rate": 0.002, "loss": 2.3676, "step": 399220 }, { "epoch": 1.5433115306706253, "grad_norm": 0.12103582173585892, "learning_rate": 0.002, "loss": 2.3678, "step": 399230 }, { "epoch": 1.5433501878740086, "grad_norm": 0.08898939937353134, "learning_rate": 0.002, "loss": 2.3522, "step": 399240 }, { "epoch": 1.5433888450773918, "grad_norm": 0.1117500364780426, "learning_rate": 0.002, "loss": 2.3441, "step": 399250 }, { "epoch": 1.543427502280775, "grad_norm": 0.1187075525522232, "learning_rate": 0.002, "loss": 2.3271, "step": 399260 }, { "epoch": 1.5434661594841583, "grad_norm": 0.10672876983880997, "learning_rate": 0.002, "loss": 2.3404, "step": 399270 }, { "epoch": 1.5435048166875416, "grad_norm": 0.12173470854759216, "learning_rate": 0.002, "loss": 2.3284, "step": 399280 }, { "epoch": 1.5435434738909248, "grad_norm": 0.10364934802055359, "learning_rate": 0.002, "loss": 2.3351, "step": 399290 }, { "epoch": 1.543582131094308, "grad_norm": 0.09502959996461868, "learning_rate": 0.002, "loss": 2.329, "step": 399300 }, { "epoch": 1.5436207882976913, "grad_norm": 0.11555942893028259, "learning_rate": 0.002, "loss": 2.3228, "step": 399310 }, { "epoch": 1.5436594455010746, "grad_norm": 0.10680690407752991, "learning_rate": 0.002, "loss": 2.3372, "step": 399320 }, { "epoch": 1.5436981027044578, "grad_norm": 0.11268538981676102, "learning_rate": 0.002, "loss": 2.3365, "step": 399330 }, { "epoch": 1.543736759907841, "grad_norm": 0.09227351099252701, "learning_rate": 0.002, "loss": 2.3301, "step": 399340 }, { "epoch": 1.5437754171112243, "grad_norm": 0.09874910861253738, "learning_rate": 0.002, "loss": 2.3328, "step": 399350 }, { "epoch": 1.5438140743146078, "grad_norm": 0.12444418668746948, "learning_rate": 0.002, "loss": 2.3232, "step": 399360 }, { "epoch": 1.543852731517991, "grad_norm": 0.10006395727396011, "learning_rate": 0.002, "loss": 2.3535, "step": 399370 }, { "epoch": 1.5438913887213743, "grad_norm": 0.10121716558933258, "learning_rate": 0.002, "loss": 2.3307, "step": 399380 }, { "epoch": 1.5439300459247576, "grad_norm": 0.32989785075187683, "learning_rate": 0.002, "loss": 2.3359, "step": 399390 }, { "epoch": 1.543968703128141, "grad_norm": 0.10850565880537033, "learning_rate": 0.002, "loss": 2.3344, "step": 399400 }, { "epoch": 1.5440073603315243, "grad_norm": 0.10429178178310394, "learning_rate": 0.002, "loss": 2.3259, "step": 399410 }, { "epoch": 1.5440460175349076, "grad_norm": 0.09502607583999634, "learning_rate": 0.002, "loss": 2.3354, "step": 399420 }, { "epoch": 1.5440846747382908, "grad_norm": 0.09839571267366409, "learning_rate": 0.002, "loss": 2.3239, "step": 399430 }, { "epoch": 1.544123331941674, "grad_norm": 0.1268584430217743, "learning_rate": 0.002, "loss": 2.3434, "step": 399440 }, { "epoch": 1.5441619891450573, "grad_norm": 0.10402169078588486, "learning_rate": 0.002, "loss": 2.3471, "step": 399450 }, { "epoch": 1.5442006463484406, "grad_norm": 0.11737069487571716, "learning_rate": 0.002, "loss": 2.3291, "step": 399460 }, { "epoch": 1.5442393035518238, "grad_norm": 0.09239500761032104, "learning_rate": 0.002, "loss": 2.3337, "step": 399470 }, { "epoch": 1.544277960755207, "grad_norm": 0.11623097956180573, "learning_rate": 0.002, "loss": 2.346, "step": 399480 }, { "epoch": 1.5443166179585903, "grad_norm": 0.09053388983011246, "learning_rate": 0.002, "loss": 2.3283, "step": 399490 }, { "epoch": 1.5443552751619736, "grad_norm": 0.10397826880216599, "learning_rate": 0.002, "loss": 2.329, "step": 399500 }, { "epoch": 1.5443939323653568, "grad_norm": 0.11161120235919952, "learning_rate": 0.002, "loss": 2.3302, "step": 399510 }, { "epoch": 1.54443258956874, "grad_norm": 0.11719498038291931, "learning_rate": 0.002, "loss": 2.3298, "step": 399520 }, { "epoch": 1.5444712467721236, "grad_norm": 0.1244446188211441, "learning_rate": 0.002, "loss": 2.3275, "step": 399530 }, { "epoch": 1.5445099039755068, "grad_norm": 0.10088226944208145, "learning_rate": 0.002, "loss": 2.338, "step": 399540 }, { "epoch": 1.54454856117889, "grad_norm": 0.10080454498529434, "learning_rate": 0.002, "loss": 2.3314, "step": 399550 }, { "epoch": 1.5445872183822733, "grad_norm": 0.09458685666322708, "learning_rate": 0.002, "loss": 2.3274, "step": 399560 }, { "epoch": 1.5446258755856568, "grad_norm": 0.1011287122964859, "learning_rate": 0.002, "loss": 2.3316, "step": 399570 }, { "epoch": 1.54466453278904, "grad_norm": 0.1155065968632698, "learning_rate": 0.002, "loss": 2.3448, "step": 399580 }, { "epoch": 1.5447031899924233, "grad_norm": 0.0928688570857048, "learning_rate": 0.002, "loss": 2.3263, "step": 399590 }, { "epoch": 1.5447418471958065, "grad_norm": 0.13152991235256195, "learning_rate": 0.002, "loss": 2.3456, "step": 399600 }, { "epoch": 1.5447805043991898, "grad_norm": 0.09968362748622894, "learning_rate": 0.002, "loss": 2.3306, "step": 399610 }, { "epoch": 1.544819161602573, "grad_norm": 0.10470478981733322, "learning_rate": 0.002, "loss": 2.3394, "step": 399620 }, { "epoch": 1.5448578188059563, "grad_norm": 0.1123451516032219, "learning_rate": 0.002, "loss": 2.3224, "step": 399630 }, { "epoch": 1.5448964760093395, "grad_norm": 0.11159180849790573, "learning_rate": 0.002, "loss": 2.325, "step": 399640 }, { "epoch": 1.5449351332127228, "grad_norm": 0.0886734277009964, "learning_rate": 0.002, "loss": 2.3418, "step": 399650 }, { "epoch": 1.544973790416106, "grad_norm": 0.10552497208118439, "learning_rate": 0.002, "loss": 2.3302, "step": 399660 }, { "epoch": 1.5450124476194893, "grad_norm": 0.10136623680591583, "learning_rate": 0.002, "loss": 2.3274, "step": 399670 }, { "epoch": 1.5450511048228726, "grad_norm": 0.09759674221277237, "learning_rate": 0.002, "loss": 2.3377, "step": 399680 }, { "epoch": 1.5450897620262558, "grad_norm": 0.1186317577958107, "learning_rate": 0.002, "loss": 2.3482, "step": 399690 }, { "epoch": 1.5451284192296393, "grad_norm": 0.10864102095365524, "learning_rate": 0.002, "loss": 2.326, "step": 399700 }, { "epoch": 1.5451670764330225, "grad_norm": 0.11893132328987122, "learning_rate": 0.002, "loss": 2.3152, "step": 399710 }, { "epoch": 1.5452057336364058, "grad_norm": 0.109439916908741, "learning_rate": 0.002, "loss": 2.3408, "step": 399720 }, { "epoch": 1.545244390839789, "grad_norm": 0.10437440127134323, "learning_rate": 0.002, "loss": 2.3165, "step": 399730 }, { "epoch": 1.5452830480431725, "grad_norm": 0.09925030171871185, "learning_rate": 0.002, "loss": 2.3278, "step": 399740 }, { "epoch": 1.5453217052465558, "grad_norm": 0.09538068622350693, "learning_rate": 0.002, "loss": 2.3163, "step": 399750 }, { "epoch": 1.545360362449939, "grad_norm": 0.09987644851207733, "learning_rate": 0.002, "loss": 2.3424, "step": 399760 }, { "epoch": 1.5453990196533223, "grad_norm": 0.2534315586090088, "learning_rate": 0.002, "loss": 2.3256, "step": 399770 }, { "epoch": 1.5454376768567055, "grad_norm": 0.10734555870294571, "learning_rate": 0.002, "loss": 2.3369, "step": 399780 }, { "epoch": 1.5454763340600888, "grad_norm": 0.09567726403474808, "learning_rate": 0.002, "loss": 2.3348, "step": 399790 }, { "epoch": 1.545514991263472, "grad_norm": 0.10363011807203293, "learning_rate": 0.002, "loss": 2.3337, "step": 399800 }, { "epoch": 1.5455536484668553, "grad_norm": 0.10830151289701462, "learning_rate": 0.002, "loss": 2.338, "step": 399810 }, { "epoch": 1.5455923056702385, "grad_norm": 0.09303846955299377, "learning_rate": 0.002, "loss": 2.3353, "step": 399820 }, { "epoch": 1.5456309628736218, "grad_norm": 0.08819768577814102, "learning_rate": 0.002, "loss": 2.3363, "step": 399830 }, { "epoch": 1.545669620077005, "grad_norm": 0.09079156070947647, "learning_rate": 0.002, "loss": 2.3328, "step": 399840 }, { "epoch": 1.5457082772803883, "grad_norm": 0.11149446666240692, "learning_rate": 0.002, "loss": 2.3422, "step": 399850 }, { "epoch": 1.5457469344837715, "grad_norm": 0.5256873965263367, "learning_rate": 0.002, "loss": 2.3259, "step": 399860 }, { "epoch": 1.545785591687155, "grad_norm": 0.1418309509754181, "learning_rate": 0.002, "loss": 2.3457, "step": 399870 }, { "epoch": 1.5458242488905383, "grad_norm": 0.3079683482646942, "learning_rate": 0.002, "loss": 2.3274, "step": 399880 }, { "epoch": 1.5458629060939215, "grad_norm": 0.1174551472067833, "learning_rate": 0.002, "loss": 2.3382, "step": 399890 }, { "epoch": 1.5459015632973048, "grad_norm": 0.10708323121070862, "learning_rate": 0.002, "loss": 2.3283, "step": 399900 }, { "epoch": 1.5459402205006882, "grad_norm": 0.18589606881141663, "learning_rate": 0.002, "loss": 2.3578, "step": 399910 }, { "epoch": 1.5459788777040715, "grad_norm": 0.12244658917188644, "learning_rate": 0.002, "loss": 2.3288, "step": 399920 }, { "epoch": 1.5460175349074547, "grad_norm": 0.10290535539388657, "learning_rate": 0.002, "loss": 2.3307, "step": 399930 }, { "epoch": 1.546056192110838, "grad_norm": 0.10942217707633972, "learning_rate": 0.002, "loss": 2.3483, "step": 399940 }, { "epoch": 1.5460948493142213, "grad_norm": 0.10202532261610031, "learning_rate": 0.002, "loss": 2.3316, "step": 399950 }, { "epoch": 1.5461335065176045, "grad_norm": 0.09840740263462067, "learning_rate": 0.002, "loss": 2.3317, "step": 399960 }, { "epoch": 1.5461721637209878, "grad_norm": 0.11503361165523529, "learning_rate": 0.002, "loss": 2.3333, "step": 399970 }, { "epoch": 1.546210820924371, "grad_norm": 0.10702019184827805, "learning_rate": 0.002, "loss": 2.3352, "step": 399980 }, { "epoch": 1.5462494781277543, "grad_norm": 0.4364302456378937, "learning_rate": 0.002, "loss": 2.3415, "step": 399990 }, { "epoch": 1.5462881353311375, "grad_norm": 0.1672603040933609, "learning_rate": 0.002, "loss": 2.3276, "step": 400000 }, { "epoch": 1.5463267925345208, "grad_norm": 0.09613791108131409, "learning_rate": 0.00198, "loss": 2.3347, "step": 400010 }, { "epoch": 1.546365449737904, "grad_norm": 0.12125419080257416, "learning_rate": 0.001971715728752538, "loss": 2.3203, "step": 400020 }, { "epoch": 1.5464041069412875, "grad_norm": 0.10514193773269653, "learning_rate": 0.0019653589838486227, "loss": 2.3491, "step": 400030 }, { "epoch": 1.5464427641446707, "grad_norm": 0.1063866913318634, "learning_rate": 0.00196, "loss": 2.3327, "step": 400040 }, { "epoch": 1.546481421348054, "grad_norm": 0.09683094173669815, "learning_rate": 0.001955278640450004, "loss": 2.3387, "step": 400050 }, { "epoch": 1.5465200785514372, "grad_norm": 0.10151323676109314, "learning_rate": 0.0019510102051443366, "loss": 2.3366, "step": 400060 }, { "epoch": 1.5465587357548205, "grad_norm": 0.10045294463634491, "learning_rate": 0.0019470849737787082, "loss": 2.3392, "step": 400070 }, { "epoch": 1.546597392958204, "grad_norm": 0.10340237617492676, "learning_rate": 0.0019434314575050762, "loss": 2.3339, "step": 400080 }, { "epoch": 1.5466360501615872, "grad_norm": 0.1004454717040062, "learning_rate": 0.0019399999999999999, "loss": 2.326, "step": 400090 }, { "epoch": 1.5466747073649705, "grad_norm": 0.09741810709238052, "learning_rate": 0.0019367544467966324, "loss": 2.3313, "step": 400100 }, { "epoch": 1.5467133645683537, "grad_norm": 0.10408639907836914, "learning_rate": 0.0019336675041928919, "loss": 2.3397, "step": 400110 }, { "epoch": 1.546752021771737, "grad_norm": 0.11467229574918747, "learning_rate": 0.001930717967697245, "loss": 2.3468, "step": 400120 }, { "epoch": 1.5467906789751202, "grad_norm": 0.102940134704113, "learning_rate": 0.00192788897449072, "loss": 2.3373, "step": 400130 }, { "epoch": 1.5468293361785035, "grad_norm": 0.09272021800279617, "learning_rate": 0.0019251668522645212, "loss": 2.3421, "step": 400140 }, { "epoch": 1.5468679933818867, "grad_norm": 0.09395978599786758, "learning_rate": 0.0019225403330758518, "loss": 2.3284, "step": 400150 }, { "epoch": 1.54690665058527, "grad_norm": 0.10013695806264877, "learning_rate": 0.00192, "loss": 2.3276, "step": 400160 }, { "epoch": 1.5469453077886532, "grad_norm": 0.10074548423290253, "learning_rate": 0.0019175378874876467, "loss": 2.3231, "step": 400170 }, { "epoch": 1.5469839649920365, "grad_norm": 0.10383658856153488, "learning_rate": 0.0019151471862576144, "loss": 2.3379, "step": 400180 }, { "epoch": 1.5470226221954197, "grad_norm": 0.11273407936096191, "learning_rate": 0.0019128220211291867, "loss": 2.3302, "step": 400190 }, { "epoch": 1.5470612793988032, "grad_norm": 0.10116290301084518, "learning_rate": 0.0019105572809000086, "loss": 2.336, "step": 400200 }, { "epoch": 1.5470999366021865, "grad_norm": 0.0944078117609024, "learning_rate": 0.0019083484861008833, "loss": 2.3357, "step": 400210 }, { "epoch": 1.5471385938055697, "grad_norm": 0.09677086025476456, "learning_rate": 0.0019061916848035314, "loss": 2.3129, "step": 400220 }, { "epoch": 1.547177251008953, "grad_norm": 0.09968365728855133, "learning_rate": 0.0019040833695337456, "loss": 2.3417, "step": 400230 }, { "epoch": 1.5472159082123362, "grad_norm": 0.10656339675188065, "learning_rate": 0.001902020410288673, "loss": 2.3373, "step": 400240 }, { "epoch": 1.5472545654157197, "grad_norm": 0.11012217402458191, "learning_rate": 0.0019, "loss": 2.3491, "step": 400250 }, { "epoch": 1.547293222619103, "grad_norm": 0.08270913362503052, "learning_rate": 0.0018980196097281444, "loss": 2.3177, "step": 400260 }, { "epoch": 1.5473318798224862, "grad_norm": 0.09015677869319916, "learning_rate": 0.0018960769515458673, "loss": 2.3248, "step": 400270 }, { "epoch": 1.5473705370258695, "grad_norm": 0.10345932841300964, "learning_rate": 0.0018941699475574164, "loss": 2.3395, "step": 400280 }, { "epoch": 1.5474091942292527, "grad_norm": 0.10158301144838333, "learning_rate": 0.00189229670385731, "loss": 2.3251, "step": 400290 }, { "epoch": 1.547447851432636, "grad_norm": 0.09819726645946503, "learning_rate": 0.0018904554884989668, "loss": 2.3319, "step": 400300 }, { "epoch": 1.5474865086360192, "grad_norm": 0.10514269769191742, "learning_rate": 0.0018886447127433998, "loss": 2.3271, "step": 400310 }, { "epoch": 1.5475251658394025, "grad_norm": 0.11034499853849411, "learning_rate": 0.0018868629150101525, "loss": 2.3264, "step": 400320 }, { "epoch": 1.5475638230427857, "grad_norm": 0.09889666736125946, "learning_rate": 0.0018851087470692395, "loss": 2.3236, "step": 400330 }, { "epoch": 1.547602480246169, "grad_norm": 0.09816837310791016, "learning_rate": 0.001883380962103094, "loss": 2.3293, "step": 400340 }, { "epoch": 1.5476411374495522, "grad_norm": 0.10355667024850845, "learning_rate": 0.0018816784043380076, "loss": 2.3193, "step": 400350 }, { "epoch": 1.5476797946529355, "grad_norm": 0.10169237107038498, "learning_rate": 0.00188, "loss": 2.3178, "step": 400360 }, { "epoch": 1.547718451856319, "grad_norm": 0.10110478848218918, "learning_rate": 0.0018783447493940356, "loss": 2.3239, "step": 400370 }, { "epoch": 1.5477571090597022, "grad_norm": 0.10995251685380936, "learning_rate": 0.0018767117199406204, "loss": 2.3166, "step": 400380 }, { "epoch": 1.5477957662630855, "grad_norm": 0.10122602432966232, "learning_rate": 0.0018751000400320321, "loss": 2.3247, "step": 400390 }, { "epoch": 1.5478344234664687, "grad_norm": 0.11095928400754929, "learning_rate": 0.001873508893593265, "loss": 2.3257, "step": 400400 }, { "epoch": 1.5478730806698522, "grad_norm": 0.11088376492261887, "learning_rate": 0.001871937515251343, "loss": 2.3242, "step": 400410 }, { "epoch": 1.5479117378732354, "grad_norm": 0.10559550672769547, "learning_rate": 0.0018703851860318427, "loss": 2.3357, "step": 400420 }, { "epoch": 1.5479503950766187, "grad_norm": 0.0967123806476593, "learning_rate": 0.00186885122951396, "loss": 2.31, "step": 400430 }, { "epoch": 1.547989052280002, "grad_norm": 0.10819984972476959, "learning_rate": 0.0018673350083857842, "loss": 2.3237, "step": 400440 }, { "epoch": 1.5480277094833852, "grad_norm": 0.08791578561067581, "learning_rate": 0.0018658359213500127, "loss": 2.3294, "step": 400450 }, { "epoch": 1.5480663666867684, "grad_norm": 0.10057539492845535, "learning_rate": 0.0018643534003374947, "loss": 2.3184, "step": 400460 }, { "epoch": 1.5481050238901517, "grad_norm": 0.10410565137863159, "learning_rate": 0.0018628869079919791, "loss": 2.3487, "step": 400470 }, { "epoch": 1.548143681093535, "grad_norm": 0.10269663482904434, "learning_rate": 0.0018614359353944898, "loss": 2.3307, "step": 400480 }, { "epoch": 1.5481823382969182, "grad_norm": 0.10686153173446655, "learning_rate": 0.00186, "loss": 2.3123, "step": 400490 }, { "epoch": 1.5482209955003015, "grad_norm": 0.0893033891916275, "learning_rate": 0.0018585786437626906, "loss": 2.3257, "step": 400500 }, { "epoch": 1.5482596527036847, "grad_norm": 0.10048429667949677, "learning_rate": 0.001857171431429143, "loss": 2.3426, "step": 400510 }, { "epoch": 1.548298309907068, "grad_norm": 0.1200726330280304, "learning_rate": 0.0018557779489814404, "loss": 2.3116, "step": 400520 }, { "epoch": 1.5483369671104512, "grad_norm": 0.09477726370096207, "learning_rate": 0.0018543978022143898, "loss": 2.3486, "step": 400530 }, { "epoch": 1.5483756243138347, "grad_norm": 0.09153016656637192, "learning_rate": 0.0018530306154330095, "loss": 2.3381, "step": 400540 }, { "epoch": 1.548414281517218, "grad_norm": 0.09223145991563797, "learning_rate": 0.0018516760302580869, "loss": 2.3146, "step": 400550 }, { "epoch": 1.5484529387206012, "grad_norm": 0.09365306049585342, "learning_rate": 0.0018503337045290423, "loss": 2.3223, "step": 400560 }, { "epoch": 1.5484915959239844, "grad_norm": 0.09258891642093658, "learning_rate": 0.001849003311294585, "loss": 2.3144, "step": 400570 }, { "epoch": 1.548530253127368, "grad_norm": 0.09590215235948563, "learning_rate": 0.001847684537882722, "loss": 2.3272, "step": 400580 }, { "epoch": 1.5485689103307512, "grad_norm": 0.11163296550512314, "learning_rate": 0.0018463770850426278, "loss": 2.3233, "step": 400590 }, { "epoch": 1.5486075675341344, "grad_norm": 0.08850311487913132, "learning_rate": 0.0018450806661517035, "loss": 2.3218, "step": 400600 }, { "epoch": 1.5486462247375177, "grad_norm": 0.1200372651219368, "learning_rate": 0.001843795006481867, "loss": 2.3257, "step": 400610 }, { "epoch": 1.548684881940901, "grad_norm": 0.10512597113847733, "learning_rate": 0.0018425198425197637, "loss": 2.3318, "step": 400620 }, { "epoch": 1.5487235391442842, "grad_norm": 0.0931624174118042, "learning_rate": 0.0018412549213361244, "loss": 2.3145, "step": 400630 }, { "epoch": 1.5487621963476674, "grad_norm": 0.08724930137395859, "learning_rate": 0.00184, "loss": 2.3231, "step": 400640 }, { "epoch": 1.5488008535510507, "grad_norm": 0.13431008160114288, "learning_rate": 0.001838754845034029, "loss": 2.3179, "step": 400650 }, { "epoch": 1.548839510754434, "grad_norm": 0.11374661326408386, "learning_rate": 0.0018375192319072808, "loss": 2.3164, "step": 400660 }, { "epoch": 1.5488781679578172, "grad_norm": 0.08780115097761154, "learning_rate": 0.0018362929445625512, "loss": 2.3131, "step": 400670 }, { "epoch": 1.5489168251612004, "grad_norm": 0.11381842941045761, "learning_rate": 0.0018350757749752936, "loss": 2.324, "step": 400680 }, { "epoch": 1.5489554823645837, "grad_norm": 0.09610319882631302, "learning_rate": 0.0018338675227416387, "loss": 2.3059, "step": 400690 }, { "epoch": 1.548994139567967, "grad_norm": 0.10623107850551605, "learning_rate": 0.001832667994693185, "loss": 2.3182, "step": 400700 }, { "epoch": 1.5490327967713504, "grad_norm": 0.10675205290317535, "learning_rate": 0.0018314770045364727, "loss": 2.3274, "step": 400710 }, { "epoch": 1.5490714539747337, "grad_norm": 0.09334469586610794, "learning_rate": 0.0018302943725152286, "loss": 2.3179, "step": 400720 }, { "epoch": 1.549110111178117, "grad_norm": 0.08830467611551285, "learning_rate": 0.0018291199250936494, "loss": 2.3191, "step": 400730 }, { "epoch": 1.5491487683815002, "grad_norm": 0.09757346659898758, "learning_rate": 0.0018279534946591474, "loss": 2.3117, "step": 400740 }, { "epoch": 1.5491874255848836, "grad_norm": 0.09814561903476715, "learning_rate": 0.0018267949192431123, "loss": 2.313, "step": 400750 }, { "epoch": 1.549226082788267, "grad_norm": 0.11958609521389008, "learning_rate": 0.0018256440422583732, "loss": 2.3135, "step": 400760 }, { "epoch": 1.5492647399916502, "grad_norm": 0.09650695323944092, "learning_rate": 0.0018245007122521576, "loss": 2.3139, "step": 400770 }, { "epoch": 1.5493033971950334, "grad_norm": 0.09163734316825867, "learning_rate": 0.0018233647826734433, "loss": 2.3148, "step": 400780 }, { "epoch": 1.5493420543984167, "grad_norm": 0.08900466561317444, "learning_rate": 0.0018222361116536883, "loss": 2.322, "step": 400790 }, { "epoch": 1.5493807116018, "grad_norm": 0.12508372962474823, "learning_rate": 0.0018211145618000169, "loss": 2.3183, "step": 400800 }, { "epoch": 1.5494193688051832, "grad_norm": 0.09552543610334396, "learning_rate": 0.00182, "loss": 2.3093, "step": 400810 }, { "epoch": 1.5494580260085664, "grad_norm": 0.10733716189861298, "learning_rate": 0.0018188922972372516, "loss": 2.3158, "step": 400820 }, { "epoch": 1.5494966832119497, "grad_norm": 0.09862062335014343, "learning_rate": 0.001817791328417114, "loss": 2.3221, "step": 400830 }, { "epoch": 1.549535340415333, "grad_norm": 0.08959076553583145, "learning_rate": 0.0018166969722017666, "loss": 2.3046, "step": 400840 }, { "epoch": 1.5495739976187162, "grad_norm": 0.0865064188838005, "learning_rate": 0.0018156091108541424, "loss": 2.3107, "step": 400850 }, { "epoch": 1.5496126548220994, "grad_norm": 0.10689177364110947, "learning_rate": 0.0018145276300900858, "loss": 2.3257, "step": 400860 }, { "epoch": 1.5496513120254827, "grad_norm": 0.10806325078010559, "learning_rate": 0.0018134524189382239, "loss": 2.3276, "step": 400870 }, { "epoch": 1.5496899692288661, "grad_norm": 0.09787989407777786, "learning_rate": 0.001812383369607063, "loss": 2.315, "step": 400880 }, { "epoch": 1.5497286264322494, "grad_norm": 0.0919385477900505, "learning_rate": 0.001811320377358868, "loss": 2.3366, "step": 400890 }, { "epoch": 1.5497672836356327, "grad_norm": 0.09771880507469177, "learning_rate": 0.0018102633403898974, "loss": 2.3307, "step": 400900 }, { "epoch": 1.549805940839016, "grad_norm": 0.1106455996632576, "learning_rate": 0.001809212159716611, "loss": 2.324, "step": 400910 }, { "epoch": 1.5498445980423994, "grad_norm": 0.09291300177574158, "learning_rate": 0.0018081667390674912, "loss": 2.3126, "step": 400920 }, { "epoch": 1.5498832552457826, "grad_norm": 0.11154467612504959, "learning_rate": 0.001807126984780141, "loss": 2.3218, "step": 400930 }, { "epoch": 1.5499219124491659, "grad_norm": 0.11702819168567657, "learning_rate": 0.0018060928057033467, "loss": 2.3198, "step": 400940 }, { "epoch": 1.5499605696525491, "grad_norm": 0.08991272002458572, "learning_rate": 0.001805064113103821, "loss": 2.3291, "step": 400950 }, { "epoch": 1.5499992268559324, "grad_norm": 0.10776031762361526, "learning_rate": 0.0018040408205773457, "loss": 2.3171, "step": 400960 }, { "epoch": 1.5500378840593156, "grad_norm": 0.09182223677635193, "learning_rate": 0.001803022843964078, "loss": 2.3203, "step": 400970 }, { "epoch": 1.550076541262699, "grad_norm": 0.11256624758243561, "learning_rate": 0.0018020101012677667, "loss": 2.3326, "step": 400980 }, { "epoch": 1.5501151984660821, "grad_norm": 0.0917619988322258, "learning_rate": 0.001801002512578676, "loss": 2.3226, "step": 400990 }, { "epoch": 1.5501538556694654, "grad_norm": 0.10037446767091751, "learning_rate": 0.0018000000000000002, "loss": 2.3168, "step": 401000 }, { "epoch": 1.5501925128728486, "grad_norm": 0.09557545930147171, "learning_rate": 0.001799002487577582, "loss": 2.3071, "step": 401010 }, { "epoch": 1.550231170076232, "grad_norm": 0.08818859606981277, "learning_rate": 0.0017980099012327585, "loss": 2.3239, "step": 401020 }, { "epoch": 1.5502698272796152, "grad_norm": 0.10895640403032303, "learning_rate": 0.0017970221686981556, "loss": 2.3325, "step": 401030 }, { "epoch": 1.5503084844829984, "grad_norm": 0.10136058926582336, "learning_rate": 0.0017960392194562886, "loss": 2.3291, "step": 401040 }, { "epoch": 1.5503471416863819, "grad_norm": 0.09887836873531342, "learning_rate": 0.001795060984680808, "loss": 2.3346, "step": 401050 }, { "epoch": 1.5503857988897651, "grad_norm": 0.1036849245429039, "learning_rate": 0.00179408739718026, "loss": 2.3176, "step": 401060 }, { "epoch": 1.5504244560931484, "grad_norm": 0.09475167095661163, "learning_rate": 0.0017931183913442279, "loss": 2.3148, "step": 401070 }, { "epoch": 1.5504631132965316, "grad_norm": 0.10519945621490479, "learning_rate": 0.0017921539030917347, "loss": 2.3172, "step": 401080 }, { "epoch": 1.550501770499915, "grad_norm": 0.09560395032167435, "learning_rate": 0.0017911938698217892, "loss": 2.3086, "step": 401090 }, { "epoch": 1.5505404277032984, "grad_norm": 0.09103363007307053, "learning_rate": 0.0017902382303659696, "loss": 2.3198, "step": 401100 }, { "epoch": 1.5505790849066816, "grad_norm": 0.09273833781480789, "learning_rate": 0.0017892869249429453, "loss": 2.3329, "step": 401110 }, { "epoch": 1.5506177421100649, "grad_norm": 0.10024043917655945, "learning_rate": 0.0017883398951148329, "loss": 2.3195, "step": 401120 }, { "epoch": 1.5506563993134481, "grad_norm": 0.1332581788301468, "learning_rate": 0.0017873970837453072, "loss": 2.32, "step": 401130 }, { "epoch": 1.5506950565168314, "grad_norm": 0.12426207214593887, "learning_rate": 0.0017864584349593737, "loss": 2.3221, "step": 401140 }, { "epoch": 1.5507337137202146, "grad_norm": 0.10845503956079483, "learning_rate": 0.0017855238941047278, "loss": 2.3191, "step": 401150 }, { "epoch": 1.5507723709235979, "grad_norm": 0.1018427163362503, "learning_rate": 0.00178459340771462, "loss": 2.3215, "step": 401160 }, { "epoch": 1.5508110281269811, "grad_norm": 0.13322055339813232, "learning_rate": 0.0017836669234721607, "loss": 2.3301, "step": 401170 }, { "epoch": 1.5508496853303644, "grad_norm": 0.11589889228343964, "learning_rate": 0.0017827443901759956, "loss": 2.3184, "step": 401180 }, { "epoch": 1.5508883425337476, "grad_norm": 0.11078567057847977, "learning_rate": 0.001781825757707286, "loss": 2.3031, "step": 401190 }, { "epoch": 1.5509269997371309, "grad_norm": 0.09361839294433594, "learning_rate": 0.0017809109769979336, "loss": 2.3139, "step": 401200 }, { "epoch": 1.5509656569405141, "grad_norm": 0.09666917473077774, "learning_rate": 0.0017800000000000001, "loss": 2.3225, "step": 401210 }, { "epoch": 1.5510043141438976, "grad_norm": 0.1282287836074829, "learning_rate": 0.0017790927796562548, "loss": 2.3288, "step": 401220 }, { "epoch": 1.5510429713472809, "grad_norm": 0.09642815589904785, "learning_rate": 0.0017781892698718116, "loss": 2.31, "step": 401230 }, { "epoch": 1.5510816285506641, "grad_norm": 0.09528058767318726, "learning_rate": 0.0017772894254867993, "loss": 2.3134, "step": 401240 }, { "epoch": 1.5511202857540474, "grad_norm": 0.12327845394611359, "learning_rate": 0.001776393202250021, "loss": 2.3202, "step": 401250 }, { "epoch": 1.5511589429574308, "grad_norm": 0.09685596823692322, "learning_rate": 0.0017755005567935637, "loss": 2.3177, "step": 401260 }, { "epoch": 1.551197600160814, "grad_norm": 0.09323841333389282, "learning_rate": 0.0017746114466083071, "loss": 2.3288, "step": 401270 }, { "epoch": 1.5512362573641973, "grad_norm": 0.09714601933956146, "learning_rate": 0.0017737258300203047, "loss": 2.3082, "step": 401280 }, { "epoch": 1.5512749145675806, "grad_norm": 0.11459461599588394, "learning_rate": 0.0017728436661679891, "loss": 2.3257, "step": 401290 }, { "epoch": 1.5513135717709639, "grad_norm": 0.09478688985109329, "learning_rate": 0.0017719649149801724, "loss": 2.3119, "step": 401300 }, { "epoch": 1.551352228974347, "grad_norm": 0.09680263698101044, "learning_rate": 0.001771089537154808, "loss": 2.3094, "step": 401310 }, { "epoch": 1.5513908861777304, "grad_norm": 0.09631063044071198, "learning_rate": 0.0017702174941384788, "loss": 2.3171, "step": 401320 }, { "epoch": 1.5514295433811136, "grad_norm": 0.10044778883457184, "learning_rate": 0.0017693487481065843, "loss": 2.3217, "step": 401330 }, { "epoch": 1.5514682005844969, "grad_norm": 0.0928507149219513, "learning_rate": 0.0017684832619441954, "loss": 2.3125, "step": 401340 }, { "epoch": 1.5515068577878801, "grad_norm": 0.08930382877588272, "learning_rate": 0.001767620999227555, "loss": 2.3282, "step": 401350 }, { "epoch": 1.5515455149912634, "grad_norm": 0.10376087576150894, "learning_rate": 0.001766761924206188, "loss": 2.3218, "step": 401360 }, { "epoch": 1.5515841721946466, "grad_norm": 0.09389341622591019, "learning_rate": 0.0017659060017856075, "loss": 2.3314, "step": 401370 }, { "epoch": 1.5516228293980299, "grad_norm": 0.10632017254829407, "learning_rate": 0.0017650531975105855, "loss": 2.3161, "step": 401380 }, { "epoch": 1.5516614866014133, "grad_norm": 0.10752441734075546, "learning_rate": 0.0017642034775489682, "loss": 2.313, "step": 401390 }, { "epoch": 1.5517001438047966, "grad_norm": 0.09111400693655014, "learning_rate": 0.0017633568086760155, "loss": 2.3151, "step": 401400 }, { "epoch": 1.5517388010081798, "grad_norm": 0.11407702416181564, "learning_rate": 0.0017625131582592417, "loss": 2.3116, "step": 401410 }, { "epoch": 1.551777458211563, "grad_norm": 0.09957265853881836, "learning_rate": 0.0017616724942437403, "loss": 2.3201, "step": 401420 }, { "epoch": 1.5518161154149466, "grad_norm": 0.11485077440738678, "learning_rate": 0.001760834785137972, "loss": 2.3202, "step": 401430 }, { "epoch": 1.5518547726183298, "grad_norm": 0.09502989798784256, "learning_rate": 0.00176, "loss": 2.3158, "step": 401440 }, { "epoch": 1.551893429821713, "grad_norm": 0.10360467433929443, "learning_rate": 0.0017591681084241542, "loss": 2.3093, "step": 401450 }, { "epoch": 1.5519320870250963, "grad_norm": 0.08504238724708557, "learning_rate": 0.0017583390805281085, "loss": 2.3005, "step": 401460 }, { "epoch": 1.5519707442284796, "grad_norm": 0.0989072248339653, "learning_rate": 0.0017575128869403572, "loss": 2.3084, "step": 401470 }, { "epoch": 1.5520094014318628, "grad_norm": 0.08768634498119354, "learning_rate": 0.0017566894987880713, "loss": 2.31, "step": 401480 }, { "epoch": 1.552048058635246, "grad_norm": 0.11494536697864532, "learning_rate": 0.001755868887685326, "loss": 2.3079, "step": 401490 }, { "epoch": 1.5520867158386293, "grad_norm": 0.0976579487323761, "learning_rate": 0.0017550510257216823, "loss": 2.2971, "step": 401500 }, { "epoch": 1.5521253730420126, "grad_norm": 0.09919919073581696, "learning_rate": 0.0017542358854511098, "loss": 2.3158, "step": 401510 }, { "epoch": 1.5521640302453958, "grad_norm": 0.45339933037757874, "learning_rate": 0.001753423439881241, "loss": 2.3116, "step": 401520 }, { "epoch": 1.552202687448779, "grad_norm": 0.09647250175476074, "learning_rate": 0.0017526136624629403, "loss": 2.3251, "step": 401530 }, { "epoch": 1.5522413446521623, "grad_norm": 0.11359827220439911, "learning_rate": 0.001751806527080183, "loss": 2.3095, "step": 401540 }, { "epoch": 1.5522800018555456, "grad_norm": 0.10807959735393524, "learning_rate": 0.0017510020080402254, "loss": 2.3172, "step": 401550 }, { "epoch": 1.552318659058929, "grad_norm": 0.09357649832963943, "learning_rate": 0.001750200080064064, "loss": 2.3051, "step": 401560 }, { "epoch": 1.5523573162623123, "grad_norm": 0.10182651877403259, "learning_rate": 0.0017494007182771668, "loss": 2.3103, "step": 401570 }, { "epoch": 1.5523959734656956, "grad_norm": 0.09746528416872025, "learning_rate": 0.0017486038982004693, "loss": 2.3086, "step": 401580 }, { "epoch": 1.5524346306690788, "grad_norm": 0.10169055312871933, "learning_rate": 0.0017478095957416302, "loss": 2.3067, "step": 401590 }, { "epoch": 1.5524732878724623, "grad_norm": 0.102170929312706, "learning_rate": 0.0017470177871865297, "loss": 2.325, "step": 401600 }, { "epoch": 1.5525119450758456, "grad_norm": 0.09123319387435913, "learning_rate": 0.0017462284491910098, "loss": 2.3178, "step": 401610 }, { "epoch": 1.5525506022792288, "grad_norm": 0.09148348122835159, "learning_rate": 0.001745441558772843, "loss": 2.31, "step": 401620 }, { "epoch": 1.552589259482612, "grad_norm": 0.28315749764442444, "learning_rate": 0.0017446570933039262, "loss": 2.324, "step": 401630 }, { "epoch": 1.5526279166859953, "grad_norm": 0.09796846657991409, "learning_rate": 0.001743875030502686, "loss": 2.3082, "step": 401640 }, { "epoch": 1.5526665738893786, "grad_norm": 0.09901988506317139, "learning_rate": 0.0017430953484266973, "loss": 2.3334, "step": 401650 }, { "epoch": 1.5527052310927618, "grad_norm": 0.08878893405199051, "learning_rate": 0.0017423180254654976, "loss": 2.3042, "step": 401660 }, { "epoch": 1.552743888296145, "grad_norm": 0.13966940343379974, "learning_rate": 0.0017415430403335984, "loss": 2.3115, "step": 401670 }, { "epoch": 1.5527825454995283, "grad_norm": 0.10997126996517181, "learning_rate": 0.0017407703720636856, "loss": 2.3204, "step": 401680 }, { "epoch": 1.5528212027029116, "grad_norm": 0.09582191705703735, "learning_rate": 0.00174, "loss": 2.2998, "step": 401690 }, { "epoch": 1.5528598599062948, "grad_norm": 0.0954718291759491, "learning_rate": 0.001739231903791894, "loss": 2.3069, "step": 401700 }, { "epoch": 1.552898517109678, "grad_norm": 0.08937995135784149, "learning_rate": 0.0017384660633875595, "loss": 2.3267, "step": 401710 }, { "epoch": 1.5529371743130613, "grad_norm": 0.09006324410438538, "learning_rate": 0.00173770245902792, "loss": 2.3027, "step": 401720 }, { "epoch": 1.5529758315164448, "grad_norm": 0.09693530946969986, "learning_rate": 0.0017369410712406818, "loss": 2.3182, "step": 401730 }, { "epoch": 1.553014488719828, "grad_norm": 0.11013215780258179, "learning_rate": 0.0017361818808345416, "loss": 2.3176, "step": 401740 }, { "epoch": 1.5530531459232113, "grad_norm": 0.09539856761693954, "learning_rate": 0.001735424868893541, "loss": 2.3102, "step": 401750 }, { "epoch": 1.5530918031265946, "grad_norm": 0.10650390386581421, "learning_rate": 0.001734670016771568, "loss": 2.3264, "step": 401760 }, { "epoch": 1.553130460329978, "grad_norm": 0.10164553672075272, "learning_rate": 0.0017339173060869986, "loss": 2.3162, "step": 401770 }, { "epoch": 1.5531691175333613, "grad_norm": 0.10432730615139008, "learning_rate": 0.0017331667187174733, "loss": 2.2991, "step": 401780 }, { "epoch": 1.5532077747367445, "grad_norm": 0.08654264360666275, "learning_rate": 0.001732418236794807, "loss": 2.316, "step": 401790 }, { "epoch": 1.5532464319401278, "grad_norm": 0.09503353387117386, "learning_rate": 0.0017316718427000254, "loss": 2.3102, "step": 401800 }, { "epoch": 1.553285089143511, "grad_norm": 0.09561841189861298, "learning_rate": 0.001730927519058526, "loss": 2.3267, "step": 401810 }, { "epoch": 1.5533237463468943, "grad_norm": 0.11583434045314789, "learning_rate": 0.001730185248735359, "loss": 2.306, "step": 401820 }, { "epoch": 1.5533624035502775, "grad_norm": 0.1029180958867073, "learning_rate": 0.0017294450148306263, "loss": 2.2991, "step": 401830 }, { "epoch": 1.5534010607536608, "grad_norm": 0.09783989191055298, "learning_rate": 0.0017287068006749894, "loss": 2.32, "step": 401840 }, { "epoch": 1.553439717957044, "grad_norm": 0.09479359537363052, "learning_rate": 0.0017279705898252912, "loss": 2.305, "step": 401850 }, { "epoch": 1.5534783751604273, "grad_norm": 0.08666594326496124, "learning_rate": 0.001727236366060283, "loss": 2.3039, "step": 401860 }, { "epoch": 1.5535170323638106, "grad_norm": 0.0965995267033577, "learning_rate": 0.0017265041133764532, "loss": 2.3118, "step": 401870 }, { "epoch": 1.5535556895671938, "grad_norm": 0.10142888128757477, "learning_rate": 0.0017257738159839582, "loss": 2.2966, "step": 401880 }, { "epoch": 1.5535943467705773, "grad_norm": 0.09101786464452744, "learning_rate": 0.0017250454583026497, "loss": 2.3118, "step": 401890 }, { "epoch": 1.5536330039739605, "grad_norm": 0.10528852790594101, "learning_rate": 0.0017243190249581954, "loss": 2.3094, "step": 401900 }, { "epoch": 1.5536716611773438, "grad_norm": 0.09671095758676529, "learning_rate": 0.0017235945007782949, "loss": 2.3117, "step": 401910 }, { "epoch": 1.553710318380727, "grad_norm": 0.12018919736146927, "learning_rate": 0.0017228718707889797, "loss": 2.3165, "step": 401920 }, { "epoch": 1.5537489755841103, "grad_norm": 0.10087112337350845, "learning_rate": 0.001722151120211004, "loss": 2.3122, "step": 401930 }, { "epoch": 1.5537876327874938, "grad_norm": 0.08700679242610931, "learning_rate": 0.0017214322344563176, "loss": 2.2984, "step": 401940 }, { "epoch": 1.553826289990877, "grad_norm": 0.10135027021169662, "learning_rate": 0.0017207151991246214, "loss": 2.3121, "step": 401950 }, { "epoch": 1.5538649471942603, "grad_norm": 0.0919208973646164, "learning_rate": 0.00172, "loss": 2.3116, "step": 401960 }, { "epoch": 1.5539036043976435, "grad_norm": 0.08301553875207901, "learning_rate": 0.001719286623047636, "loss": 2.3051, "step": 401970 }, { "epoch": 1.5539422616010268, "grad_norm": 0.09961026906967163, "learning_rate": 0.0017185750544105943, "loss": 2.3124, "step": 401980 }, { "epoch": 1.55398091880441, "grad_norm": 0.1093689426779747, "learning_rate": 0.0017178652804066823, "loss": 2.3203, "step": 401990 }, { "epoch": 1.5540195760077933, "grad_norm": 0.08834511041641235, "learning_rate": 0.001717157287525381, "loss": 2.3168, "step": 402000 }, { "epoch": 1.5540582332111765, "grad_norm": 0.09967672824859619, "learning_rate": 0.0017164510624248434, "loss": 2.3181, "step": 402010 }, { "epoch": 1.5540968904145598, "grad_norm": 0.09733570367097855, "learning_rate": 0.001715746591928962, "loss": 2.3099, "step": 402020 }, { "epoch": 1.554135547617943, "grad_norm": 0.08811885118484497, "learning_rate": 0.0017150438630245, "loss": 2.2935, "step": 402030 }, { "epoch": 1.5541742048213263, "grad_norm": 0.10241816192865372, "learning_rate": 0.001714342862858286, "loss": 2.3091, "step": 402040 }, { "epoch": 1.5542128620247095, "grad_norm": 0.09512423723936081, "learning_rate": 0.001713643578734473, "loss": 2.3217, "step": 402050 }, { "epoch": 1.554251519228093, "grad_norm": 0.0858379676938057, "learning_rate": 0.0017129459981118536, "loss": 2.3169, "step": 402060 }, { "epoch": 1.5542901764314763, "grad_norm": 0.08720643073320389, "learning_rate": 0.001712250108601237, "loss": 2.3249, "step": 402070 }, { "epoch": 1.5543288336348595, "grad_norm": 0.08493568003177643, "learning_rate": 0.001711555897962881, "loss": 2.314, "step": 402080 }, { "epoch": 1.5543674908382428, "grad_norm": 0.09646010398864746, "learning_rate": 0.001710863354103981, "loss": 2.2924, "step": 402090 }, { "epoch": 1.554406148041626, "grad_norm": 0.10783302783966064, "learning_rate": 0.0017101724650762113, "loss": 2.3187, "step": 402100 }, { "epoch": 1.5544448052450095, "grad_norm": 0.09830918163061142, "learning_rate": 0.001709483219073321, "loss": 2.3102, "step": 402110 }, { "epoch": 1.5544834624483927, "grad_norm": 0.09604904055595398, "learning_rate": 0.0017087956044287793, "loss": 2.3204, "step": 402120 }, { "epoch": 1.554522119651776, "grad_norm": 0.09845247119665146, "learning_rate": 0.0017081096096134717, "loss": 2.3242, "step": 402130 }, { "epoch": 1.5545607768551593, "grad_norm": 0.09527433663606644, "learning_rate": 0.0017074252232334441, "loss": 2.2961, "step": 402140 }, { "epoch": 1.5545994340585425, "grad_norm": 0.10510462522506714, "learning_rate": 0.0017067424340276964, "loss": 2.3101, "step": 402150 }, { "epoch": 1.5546380912619258, "grad_norm": 0.10998314619064331, "learning_rate": 0.0017060612308660186, "loss": 2.3157, "step": 402160 }, { "epoch": 1.554676748465309, "grad_norm": 0.09259533137083054, "learning_rate": 0.0017053816027468754, "loss": 2.3101, "step": 402170 }, { "epoch": 1.5547154056686923, "grad_norm": 0.11178915947675705, "learning_rate": 0.001704703538795332, "loss": 2.306, "step": 402180 }, { "epoch": 1.5547540628720755, "grad_norm": 0.09446456283330917, "learning_rate": 0.0017040270282610251, "loss": 2.2987, "step": 402190 }, { "epoch": 1.5547927200754588, "grad_norm": 0.1069209948182106, "learning_rate": 0.0017033520605161737, "loss": 2.3044, "step": 402200 }, { "epoch": 1.554831377278842, "grad_norm": 0.10097237676382065, "learning_rate": 0.00170267862505363, "loss": 2.2995, "step": 402210 }, { "epoch": 1.5548700344822253, "grad_norm": 0.09666384011507034, "learning_rate": 0.0017020067114849732, "loss": 2.3085, "step": 402220 }, { "epoch": 1.5549086916856087, "grad_norm": 0.09053724259138107, "learning_rate": 0.0017013363095386386, "loss": 2.313, "step": 402230 }, { "epoch": 1.554947348888992, "grad_norm": 0.08768399804830551, "learning_rate": 0.0017006674090580846, "loss": 2.2969, "step": 402240 }, { "epoch": 1.5549860060923753, "grad_norm": 0.10341167449951172, "learning_rate": 0.0017, "loss": 2.3079, "step": 402250 }, { "epoch": 1.5550246632957585, "grad_norm": 0.11017939448356628, "learning_rate": 0.001699334072432542, "loss": 2.3145, "step": 402260 }, { "epoch": 1.5550633204991418, "grad_norm": 0.11902227252721786, "learning_rate": 0.0016986696165336129, "loss": 2.3129, "step": 402270 }, { "epoch": 1.5551019777025252, "grad_norm": 0.12658636271953583, "learning_rate": 0.0016980066225891698, "loss": 2.328, "step": 402280 }, { "epoch": 1.5551406349059085, "grad_norm": 0.08775745332241058, "learning_rate": 0.001697345080991569, "loss": 2.313, "step": 402290 }, { "epoch": 1.5551792921092917, "grad_norm": 0.11420045793056488, "learning_rate": 0.001696684982237938, "loss": 2.3107, "step": 402300 }, { "epoch": 1.555217949312675, "grad_norm": 0.09320458769798279, "learning_rate": 0.0016960263169285867, "loss": 2.3109, "step": 402310 }, { "epoch": 1.5552566065160582, "grad_norm": 0.11391550302505493, "learning_rate": 0.0016953690757654436, "loss": 2.3109, "step": 402320 }, { "epoch": 1.5552952637194415, "grad_norm": 0.09274116158485413, "learning_rate": 0.001694713249550525, "loss": 2.3179, "step": 402330 }, { "epoch": 1.5553339209228247, "grad_norm": 0.09113335609436035, "learning_rate": 0.001694058829184433, "loss": 2.3207, "step": 402340 }, { "epoch": 1.555372578126208, "grad_norm": 0.09407570213079453, "learning_rate": 0.0016934058056648823, "loss": 2.3197, "step": 402350 }, { "epoch": 1.5554112353295912, "grad_norm": 0.11472621560096741, "learning_rate": 0.0016927541700852559, "loss": 2.3086, "step": 402360 }, { "epoch": 1.5554498925329745, "grad_norm": 0.09536908566951752, "learning_rate": 0.001692103913633187, "loss": 2.3043, "step": 402370 }, { "epoch": 1.5554885497363578, "grad_norm": 0.09856908768415451, "learning_rate": 0.0016914550275891698, "loss": 2.3104, "step": 402380 }, { "epoch": 1.555527206939741, "grad_norm": 0.09119411557912827, "learning_rate": 0.001690807503325194, "loss": 2.3038, "step": 402390 }, { "epoch": 1.5555658641431245, "grad_norm": 0.08942999690771103, "learning_rate": 0.0016901613323034067, "loss": 2.3136, "step": 402400 }, { "epoch": 1.5556045213465077, "grad_norm": 0.08663064986467361, "learning_rate": 0.0016895165060747998, "loss": 2.3072, "step": 402410 }, { "epoch": 1.555643178549891, "grad_norm": 0.0936921089887619, "learning_rate": 0.0016888730162779191, "loss": 2.3079, "step": 402420 }, { "epoch": 1.5556818357532742, "grad_norm": 0.09534984081983566, "learning_rate": 0.0016882308546376022, "loss": 2.3013, "step": 402430 }, { "epoch": 1.5557204929566577, "grad_norm": 0.10839767009019852, "learning_rate": 0.001687590012963734, "loss": 2.299, "step": 402440 }, { "epoch": 1.555759150160041, "grad_norm": 0.13321153819561005, "learning_rate": 0.0016869504831500295, "loss": 2.3115, "step": 402450 }, { "epoch": 1.5557978073634242, "grad_norm": 0.10143892467021942, "learning_rate": 0.0016863122571728376, "loss": 2.3137, "step": 402460 }, { "epoch": 1.5558364645668075, "grad_norm": 0.11658959090709686, "learning_rate": 0.0016856753270899658, "loss": 2.3132, "step": 402470 }, { "epoch": 1.5558751217701907, "grad_norm": 0.09141705185174942, "learning_rate": 0.0016850396850395274, "loss": 2.317, "step": 402480 }, { "epoch": 1.555913778973574, "grad_norm": 0.10423143208026886, "learning_rate": 0.00168440532323881, "loss": 2.3036, "step": 402490 }, { "epoch": 1.5559524361769572, "grad_norm": 0.11280666291713715, "learning_rate": 0.001683772233983162, "loss": 2.31, "step": 402500 }, { "epoch": 1.5559910933803405, "grad_norm": 0.11399994045495987, "learning_rate": 0.0016831404096449027, "loss": 2.323, "step": 402510 }, { "epoch": 1.5560297505837237, "grad_norm": 0.09138819575309753, "learning_rate": 0.0016825098426722493, "loss": 2.3094, "step": 402520 }, { "epoch": 1.556068407787107, "grad_norm": 0.10838840901851654, "learning_rate": 0.0016818805255882628, "loss": 2.3267, "step": 402530 }, { "epoch": 1.5561070649904902, "grad_norm": 0.10054635256528854, "learning_rate": 0.0016812524509898154, "loss": 2.3192, "step": 402540 }, { "epoch": 1.5561457221938735, "grad_norm": 0.1000504270195961, "learning_rate": 0.0016806256115465738, "loss": 2.2949, "step": 402550 }, { "epoch": 1.5561843793972567, "grad_norm": 0.09790179878473282, "learning_rate": 0.00168, "loss": 2.3207, "step": 402560 }, { "epoch": 1.5562230366006402, "grad_norm": 0.09651616215705872, "learning_rate": 0.001679375609162372, "loss": 2.3064, "step": 402570 }, { "epoch": 1.5562616938040235, "grad_norm": 0.09436143189668655, "learning_rate": 0.0016787524319158199, "loss": 2.3141, "step": 402580 }, { "epoch": 1.5563003510074067, "grad_norm": 0.1127728670835495, "learning_rate": 0.0016781304612113784, "loss": 2.3029, "step": 402590 }, { "epoch": 1.55633900821079, "grad_norm": 0.10509685426950455, "learning_rate": 0.001677509690068058, "loss": 2.3241, "step": 402600 }, { "epoch": 1.5563776654141734, "grad_norm": 0.09460047632455826, "learning_rate": 0.0016768901115719298, "loss": 2.2929, "step": 402610 }, { "epoch": 1.5564163226175567, "grad_norm": 0.09158284217119217, "learning_rate": 0.0016762717188752272, "loss": 2.3087, "step": 402620 }, { "epoch": 1.55645497982094, "grad_norm": 0.09809747338294983, "learning_rate": 0.001675654505195463, "loss": 2.3213, "step": 402630 }, { "epoch": 1.5564936370243232, "grad_norm": 0.10416768491268158, "learning_rate": 0.0016750384638145616, "loss": 2.3046, "step": 402640 }, { "epoch": 1.5565322942277064, "grad_norm": 0.10024569928646088, "learning_rate": 0.001674423588078006, "loss": 2.3101, "step": 402650 }, { "epoch": 1.5565709514310897, "grad_norm": 0.10588731616735458, "learning_rate": 0.0016738098713939983, "loss": 2.3095, "step": 402660 }, { "epoch": 1.556609608634473, "grad_norm": 0.12064171582460403, "learning_rate": 0.0016731973072326362, "loss": 2.3084, "step": 402670 }, { "epoch": 1.5566482658378562, "grad_norm": 0.09845302999019623, "learning_rate": 0.001672585889125102, "loss": 2.3035, "step": 402680 }, { "epoch": 1.5566869230412395, "grad_norm": 0.10145915299654007, "learning_rate": 0.0016719756106628654, "loss": 2.2949, "step": 402690 }, { "epoch": 1.5567255802446227, "grad_norm": 0.13840682804584503, "learning_rate": 0.0016713664654969002, "loss": 2.3077, "step": 402700 }, { "epoch": 1.556764237448006, "grad_norm": 0.09022045880556107, "learning_rate": 0.0016707584473369134, "loss": 2.2954, "step": 402710 }, { "epoch": 1.5568028946513892, "grad_norm": 0.09547263383865356, "learning_rate": 0.0016701515499505872, "loss": 2.2929, "step": 402720 }, { "epoch": 1.5568415518547725, "grad_norm": 0.11516858637332916, "learning_rate": 0.001669545767162834, "loss": 2.3237, "step": 402730 }, { "epoch": 1.556880209058156, "grad_norm": 0.10102009028196335, "learning_rate": 0.001668941092855063, "loss": 2.31, "step": 402740 }, { "epoch": 1.5569188662615392, "grad_norm": 0.11381220817565918, "learning_rate": 0.00166833752096446, "loss": 2.2922, "step": 402750 }, { "epoch": 1.5569575234649224, "grad_norm": 0.0834990069270134, "learning_rate": 0.0016677350454832771, "loss": 2.2958, "step": 402760 }, { "epoch": 1.5569961806683057, "grad_norm": 0.08868042379617691, "learning_rate": 0.0016671336604581353, "loss": 2.2997, "step": 402770 }, { "epoch": 1.5570348378716892, "grad_norm": 0.09132802486419678, "learning_rate": 0.0016665333599893387, "loss": 2.3004, "step": 402780 }, { "epoch": 1.5570734950750724, "grad_norm": 0.12312108278274536, "learning_rate": 0.0016659341382301988, "loss": 2.2988, "step": 402790 }, { "epoch": 1.5571121522784557, "grad_norm": 0.09049829840660095, "learning_rate": 0.0016653359893863697, "loss": 2.3147, "step": 402800 }, { "epoch": 1.557150809481839, "grad_norm": 0.08735433220863342, "learning_rate": 0.0016647389077151958, "loss": 2.2921, "step": 402810 }, { "epoch": 1.5571894666852222, "grad_norm": 0.08642420172691345, "learning_rate": 0.0016641428875250669, "loss": 2.2984, "step": 402820 }, { "epoch": 1.5572281238886054, "grad_norm": 0.10535074025392532, "learning_rate": 0.0016635479231747856, "loss": 2.3132, "step": 402830 }, { "epoch": 1.5572667810919887, "grad_norm": 0.09041352570056915, "learning_rate": 0.0016629540090729456, "loss": 2.312, "step": 402840 }, { "epoch": 1.557305438295372, "grad_norm": 0.09955234080553055, "learning_rate": 0.0016623611396773175, "loss": 2.2881, "step": 402850 }, { "epoch": 1.5573440954987552, "grad_norm": 0.08979614078998566, "learning_rate": 0.0016617693094942447, "loss": 2.3217, "step": 402860 }, { "epoch": 1.5573827527021384, "grad_norm": 0.08955507725477219, "learning_rate": 0.0016611785130780517, "loss": 2.3222, "step": 402870 }, { "epoch": 1.5574214099055217, "grad_norm": 0.0945705994963646, "learning_rate": 0.0016605887450304572, "loss": 2.3083, "step": 402880 }, { "epoch": 1.557460067108905, "grad_norm": 0.10606098920106888, "learning_rate": 0.0016600000000000002, "loss": 2.3259, "step": 402890 }, { "epoch": 1.5574987243122882, "grad_norm": 0.09341254830360413, "learning_rate": 0.0016594122726814719, "loss": 2.3099, "step": 402900 }, { "epoch": 1.5575373815156717, "grad_norm": 0.11154890060424805, "learning_rate": 0.0016588255578153604, "loss": 2.3172, "step": 402910 }, { "epoch": 1.557576038719055, "grad_norm": 0.08776730298995972, "learning_rate": 0.0016582398501872988, "loss": 2.3059, "step": 402920 }, { "epoch": 1.5576146959224382, "grad_norm": 0.0916043370962143, "learning_rate": 0.0016576551446275264, "loss": 2.3114, "step": 402930 }, { "epoch": 1.5576533531258214, "grad_norm": 0.10796567797660828, "learning_rate": 0.0016570714360103552, "loss": 2.3113, "step": 402940 }, { "epoch": 1.557692010329205, "grad_norm": 0.11403820663690567, "learning_rate": 0.0016564887192536467, "loss": 2.3049, "step": 402950 }, { "epoch": 1.5577306675325882, "grad_norm": 0.1020413413643837, "learning_rate": 0.001655906989318295, "loss": 2.311, "step": 402960 }, { "epoch": 1.5577693247359714, "grad_norm": 0.10035323351621628, "learning_rate": 0.0016553262412077184, "loss": 2.2995, "step": 402970 }, { "epoch": 1.5578079819393547, "grad_norm": 0.09566937386989594, "learning_rate": 0.0016547464699673588, "loss": 2.3201, "step": 402980 }, { "epoch": 1.557846639142738, "grad_norm": 0.09444167464971542, "learning_rate": 0.0016541676706841883, "loss": 2.3061, "step": 402990 }, { "epoch": 1.5578852963461212, "grad_norm": 0.10356861352920532, "learning_rate": 0.0016535898384862247, "loss": 2.3177, "step": 403000 }, { "epoch": 1.5579239535495044, "grad_norm": 0.11803413927555084, "learning_rate": 0.0016530129685420505, "loss": 2.3072, "step": 403010 }, { "epoch": 1.5579626107528877, "grad_norm": 0.09005914628505707, "learning_rate": 0.0016524370560603448, "loss": 2.3037, "step": 403020 }, { "epoch": 1.558001267956271, "grad_norm": 0.10417237877845764, "learning_rate": 0.0016518620962894157, "loss": 2.2866, "step": 403030 }, { "epoch": 1.5580399251596542, "grad_norm": 0.10798921436071396, "learning_rate": 0.0016512880845167461, "loss": 2.3025, "step": 403040 }, { "epoch": 1.5580785823630374, "grad_norm": 0.10037175565958023, "learning_rate": 0.0016507150160685405, "loss": 2.3261, "step": 403050 }, { "epoch": 1.5581172395664207, "grad_norm": 0.1046285331249237, "learning_rate": 0.001650142886309282, "loss": 2.303, "step": 403060 }, { "epoch": 1.558155896769804, "grad_norm": 0.09166203439235687, "learning_rate": 0.0016495716906412955, "loss": 2.2987, "step": 403070 }, { "epoch": 1.5581945539731874, "grad_norm": 0.10187604278326035, "learning_rate": 0.0016490014245043151, "loss": 2.3015, "step": 403080 }, { "epoch": 1.5582332111765707, "grad_norm": 0.10249179601669312, "learning_rate": 0.0016484320833750612, "loss": 2.3023, "step": 403090 }, { "epoch": 1.558271868379954, "grad_norm": 0.10240791738033295, "learning_rate": 0.0016478636627668198, "loss": 2.3049, "step": 403100 }, { "epoch": 1.5583105255833372, "grad_norm": 0.09928867965936661, "learning_rate": 0.001647296158229032, "loss": 2.3009, "step": 403110 }, { "epoch": 1.5583491827867206, "grad_norm": 0.11111550778150558, "learning_rate": 0.001646729565346886, "loss": 2.3116, "step": 403120 }, { "epoch": 1.5583878399901039, "grad_norm": 0.10868023335933685, "learning_rate": 0.0016461638797409172, "loss": 2.3134, "step": 403130 }, { "epoch": 1.5584264971934871, "grad_norm": 0.09917468577623367, "learning_rate": 0.001645599097066613, "loss": 2.2945, "step": 403140 }, { "epoch": 1.5584651543968704, "grad_norm": 0.10844592750072479, "learning_rate": 0.001645035213014023, "loss": 2.3096, "step": 403150 }, { "epoch": 1.5585038116002536, "grad_norm": 0.0975717306137085, "learning_rate": 0.0016444722233073766, "loss": 2.2947, "step": 403160 }, { "epoch": 1.558542468803637, "grad_norm": 0.12086108326911926, "learning_rate": 0.001643910123704703, "loss": 2.3201, "step": 403170 }, { "epoch": 1.5585811260070201, "grad_norm": 0.1903742551803589, "learning_rate": 0.00164334890999746, "loss": 2.3063, "step": 403180 }, { "epoch": 1.5586197832104034, "grad_norm": 0.1074889600276947, "learning_rate": 0.001642788578010165, "loss": 2.2973, "step": 403190 }, { "epoch": 1.5586584404137866, "grad_norm": 0.0952894538640976, "learning_rate": 0.0016422291236000337, "loss": 2.3008, "step": 403200 }, { "epoch": 1.55869709761717, "grad_norm": 0.08857908099889755, "learning_rate": 0.0016416705426566217, "loss": 2.3148, "step": 403210 }, { "epoch": 1.5587357548205532, "grad_norm": 0.09984030574560165, "learning_rate": 0.0016411128311014727, "loss": 2.3017, "step": 403220 }, { "epoch": 1.5587744120239364, "grad_norm": 0.10996051877737045, "learning_rate": 0.0016405559848877713, "loss": 2.3167, "step": 403230 }, { "epoch": 1.5588130692273197, "grad_norm": 0.09792662411928177, "learning_rate": 0.0016400000000000002, "loss": 2.3018, "step": 403240 }, { "epoch": 1.5588517264307031, "grad_norm": 0.09244784712791443, "learning_rate": 0.001639444872453601, "loss": 2.2989, "step": 403250 }, { "epoch": 1.5588903836340864, "grad_norm": 0.09633370488882065, "learning_rate": 0.0016388905982946443, "loss": 2.3134, "step": 403260 }, { "epoch": 1.5589290408374696, "grad_norm": 0.09883268177509308, "learning_rate": 0.0016383371735994973, "loss": 2.2933, "step": 403270 }, { "epoch": 1.558967698040853, "grad_norm": 0.09502178430557251, "learning_rate": 0.0016377845944745032, "loss": 2.2998, "step": 403280 }, { "epoch": 1.5590063552442364, "grad_norm": 0.0832071527838707, "learning_rate": 0.001637232857055659, "loss": 2.305, "step": 403290 }, { "epoch": 1.5590450124476196, "grad_norm": 0.08292759954929352, "learning_rate": 0.001636681957508301, "loss": 2.2914, "step": 403300 }, { "epoch": 1.5590836696510029, "grad_norm": 0.1084103211760521, "learning_rate": 0.001636131892026795, "loss": 2.3133, "step": 403310 }, { "epoch": 1.5591223268543861, "grad_norm": 0.09909511357545853, "learning_rate": 0.001635582656834228, "loss": 2.2919, "step": 403320 }, { "epoch": 1.5591609840577694, "grad_norm": 0.10739119350910187, "learning_rate": 0.0016350342481821069, "loss": 2.3024, "step": 403330 }, { "epoch": 1.5591996412611526, "grad_norm": 0.10292712599039078, "learning_rate": 0.0016344866623500588, "loss": 2.3043, "step": 403340 }, { "epoch": 1.5592382984645359, "grad_norm": 0.09883809834718704, "learning_rate": 0.0016339398956455376, "loss": 2.3082, "step": 403350 }, { "epoch": 1.5592769556679191, "grad_norm": 0.08739059418439865, "learning_rate": 0.001633393944403533, "loss": 2.2986, "step": 403360 }, { "epoch": 1.5593156128713024, "grad_norm": 0.12030746042728424, "learning_rate": 0.0016328488049862835, "loss": 2.3061, "step": 403370 }, { "epoch": 1.5593542700746856, "grad_norm": 0.09531065076589584, "learning_rate": 0.0016323044737829953, "loss": 2.3006, "step": 403380 }, { "epoch": 1.5593929272780689, "grad_norm": 0.12471679598093033, "learning_rate": 0.0016317609472095607, "loss": 2.3053, "step": 403390 }, { "epoch": 1.5594315844814521, "grad_norm": 0.11966842412948608, "learning_rate": 0.0016312182217082846, "loss": 2.31, "step": 403400 }, { "epoch": 1.5594702416848354, "grad_norm": 0.08434103429317474, "learning_rate": 0.0016306762937476123, "loss": 2.2828, "step": 403410 }, { "epoch": 1.5595088988882189, "grad_norm": 0.09526640921831131, "learning_rate": 0.0016301351598218615, "loss": 2.3021, "step": 403420 }, { "epoch": 1.5595475560916021, "grad_norm": 0.08554691821336746, "learning_rate": 0.0016295948164509572, "loss": 2.3047, "step": 403430 }, { "epoch": 1.5595862132949854, "grad_norm": 0.08580779284238815, "learning_rate": 0.0016290552601801718, "loss": 2.3107, "step": 403440 }, { "epoch": 1.5596248704983686, "grad_norm": 0.10723388195037842, "learning_rate": 0.0016285164875798658, "loss": 2.3114, "step": 403450 }, { "epoch": 1.559663527701752, "grad_norm": 0.09583356231451035, "learning_rate": 0.0016279784952452346, "loss": 2.3199, "step": 403460 }, { "epoch": 1.5597021849051353, "grad_norm": 0.08936525881290436, "learning_rate": 0.0016274412797960569, "loss": 2.3109, "step": 403470 }, { "epoch": 1.5597408421085186, "grad_norm": 0.08797308802604675, "learning_rate": 0.0016269048378764475, "loss": 2.3036, "step": 403480 }, { "epoch": 1.5597794993119019, "grad_norm": 0.08992758393287659, "learning_rate": 0.001626369166154612, "loss": 2.3087, "step": 403490 }, { "epoch": 1.559818156515285, "grad_norm": 0.08879110962152481, "learning_rate": 0.0016258342613226057, "loss": 2.3088, "step": 403500 }, { "epoch": 1.5598568137186684, "grad_norm": 0.09392860531806946, "learning_rate": 0.0016253001200960961, "loss": 2.3107, "step": 403510 }, { "epoch": 1.5598954709220516, "grad_norm": 0.11898693442344666, "learning_rate": 0.0016247667392141256, "loss": 2.3222, "step": 403520 }, { "epoch": 1.5599341281254349, "grad_norm": 0.08376120030879974, "learning_rate": 0.0016242341154388813, "loss": 2.3027, "step": 403530 }, { "epoch": 1.5599727853288181, "grad_norm": 0.11127032339572906, "learning_rate": 0.0016237022455554645, "loss": 2.2968, "step": 403540 }, { "epoch": 1.5600114425322014, "grad_norm": 0.08576207607984543, "learning_rate": 0.0016231711263716647, "loss": 2.3012, "step": 403550 }, { "epoch": 1.5600500997355846, "grad_norm": 0.10351715236902237, "learning_rate": 0.001622640754717736, "loss": 2.3086, "step": 403560 }, { "epoch": 1.5600887569389679, "grad_norm": 0.08792705088853836, "learning_rate": 0.0016221111274461763, "loss": 2.3092, "step": 403570 }, { "epoch": 1.5601274141423511, "grad_norm": 0.1035882756114006, "learning_rate": 0.00162158224143151, "loss": 2.2993, "step": 403580 }, { "epoch": 1.5601660713457346, "grad_norm": 0.09966867417097092, "learning_rate": 0.0016210540935700716, "loss": 2.3229, "step": 403590 }, { "epoch": 1.5602047285491178, "grad_norm": 0.09071838110685349, "learning_rate": 0.0016205266807797945, "loss": 2.2982, "step": 403600 }, { "epoch": 1.560243385752501, "grad_norm": 0.09316880255937576, "learning_rate": 0.0016200000000000001, "loss": 2.3015, "step": 403610 }, { "epoch": 1.5602820429558844, "grad_norm": 0.08824972063302994, "learning_rate": 0.001619474048191191, "loss": 2.2916, "step": 403620 }, { "epoch": 1.5603207001592678, "grad_norm": 0.09666429460048676, "learning_rate": 0.001618948822334847, "loss": 2.3021, "step": 403630 }, { "epoch": 1.560359357362651, "grad_norm": 0.09371677786111832, "learning_rate": 0.0016184243194332217, "loss": 2.3096, "step": 403640 }, { "epoch": 1.5603980145660343, "grad_norm": 0.08939579129219055, "learning_rate": 0.001617900536509144, "loss": 2.2947, "step": 403650 }, { "epoch": 1.5604366717694176, "grad_norm": 0.09367606788873672, "learning_rate": 0.0016173774706058203, "loss": 2.32, "step": 403660 }, { "epoch": 1.5604753289728008, "grad_norm": 0.08723913878202438, "learning_rate": 0.0016168551187866395, "loss": 2.2983, "step": 403670 }, { "epoch": 1.560513986176184, "grad_norm": 0.09485051780939102, "learning_rate": 0.0016163334781349825, "loss": 2.2985, "step": 403680 }, { "epoch": 1.5605526433795673, "grad_norm": 0.09703823179006577, "learning_rate": 0.001615812545754029, "loss": 2.2967, "step": 403690 }, { "epoch": 1.5605913005829506, "grad_norm": 0.10443862527608871, "learning_rate": 0.0016152923187665732, "loss": 2.3232, "step": 403700 }, { "epoch": 1.5606299577863338, "grad_norm": 0.09260427206754684, "learning_rate": 0.0016147727943148357, "loss": 2.2957, "step": 403710 }, { "epoch": 1.560668614989717, "grad_norm": 0.0898246020078659, "learning_rate": 0.0016142539695602818, "loss": 2.2967, "step": 403720 }, { "epoch": 1.5607072721931003, "grad_norm": 0.10553883761167526, "learning_rate": 0.0016137358416834406, "loss": 2.3074, "step": 403730 }, { "epoch": 1.5607459293964836, "grad_norm": 0.10429342091083527, "learning_rate": 0.0016132184078837257, "loss": 2.3027, "step": 403740 }, { "epoch": 1.560784586599867, "grad_norm": 0.09854055196046829, "learning_rate": 0.0016127016653792582, "loss": 2.2968, "step": 403750 }, { "epoch": 1.5608232438032503, "grad_norm": 0.14768493175506592, "learning_rate": 0.0016121856114066936, "loss": 2.3109, "step": 403760 }, { "epoch": 1.5608619010066336, "grad_norm": 0.09202374517917633, "learning_rate": 0.0016116702432210478, "loss": 2.2951, "step": 403770 }, { "epoch": 1.5609005582100168, "grad_norm": 0.0918547585606575, "learning_rate": 0.0016111555580955283, "loss": 2.3131, "step": 403780 }, { "epoch": 1.5609392154134, "grad_norm": 0.08800943940877914, "learning_rate": 0.0016106415533213643, "loss": 2.2971, "step": 403790 }, { "epoch": 1.5609778726167836, "grad_norm": 0.10626718401908875, "learning_rate": 0.0016101282262076415, "loss": 2.2902, "step": 403800 }, { "epoch": 1.5610165298201668, "grad_norm": 0.10609113425016403, "learning_rate": 0.0016096155740811374, "loss": 2.3076, "step": 403810 }, { "epoch": 1.56105518702355, "grad_norm": 0.09052567183971405, "learning_rate": 0.0016091035942861587, "loss": 2.3103, "step": 403820 }, { "epoch": 1.5610938442269333, "grad_norm": 0.13472731411457062, "learning_rate": 0.0016085922841843814, "loss": 2.3097, "step": 403830 }, { "epoch": 1.5611325014303166, "grad_norm": 0.10542362183332443, "learning_rate": 0.0016080816411546915, "loss": 2.3079, "step": 403840 }, { "epoch": 1.5611711586336998, "grad_norm": 0.1079033687710762, "learning_rate": 0.0016075716625930283, "loss": 2.3053, "step": 403850 }, { "epoch": 1.561209815837083, "grad_norm": 0.09278237819671631, "learning_rate": 0.00160706234591223, "loss": 2.3149, "step": 403860 }, { "epoch": 1.5612484730404663, "grad_norm": 0.08637858927249908, "learning_rate": 0.00160655368854188, "loss": 2.2866, "step": 403870 }, { "epoch": 1.5612871302438496, "grad_norm": 0.10525112599134445, "learning_rate": 0.0016060456879281558, "loss": 2.2973, "step": 403880 }, { "epoch": 1.5613257874472328, "grad_norm": 0.10020878165960312, "learning_rate": 0.0016055383415336797, "loss": 2.303, "step": 403890 }, { "epoch": 1.561364444650616, "grad_norm": 0.08318115025758743, "learning_rate": 0.00160503164683737, "loss": 2.3193, "step": 403900 }, { "epoch": 1.5614031018539993, "grad_norm": 0.11012984067201614, "learning_rate": 0.0016045256013342965, "loss": 2.3035, "step": 403910 }, { "epoch": 1.5614417590573828, "grad_norm": 0.08762725442647934, "learning_rate": 0.0016040202025355335, "loss": 2.3057, "step": 403920 }, { "epoch": 1.561480416260766, "grad_norm": 0.09949534386396408, "learning_rate": 0.00160351544796802, "loss": 2.2934, "step": 403930 }, { "epoch": 1.5615190734641493, "grad_norm": 0.0957961231470108, "learning_rate": 0.0016030113351744157, "loss": 2.3151, "step": 403940 }, { "epoch": 1.5615577306675326, "grad_norm": 0.08565373718738556, "learning_rate": 0.0016025078617129642, "loss": 2.3107, "step": 403950 }, { "epoch": 1.5615963878709158, "grad_norm": 0.09716659784317017, "learning_rate": 0.001602005025157352, "loss": 2.2956, "step": 403960 }, { "epoch": 1.5616350450742993, "grad_norm": 0.10047255456447601, "learning_rate": 0.0016015028230965745, "loss": 2.2965, "step": 403970 }, { "epoch": 1.5616737022776825, "grad_norm": 0.08772067725658417, "learning_rate": 0.0016010012531348, "loss": 2.3126, "step": 403980 }, { "epoch": 1.5617123594810658, "grad_norm": 0.09811752289533615, "learning_rate": 0.0016005003128912365, "loss": 2.2973, "step": 403990 }, { "epoch": 1.561751016684449, "grad_norm": 0.08804940432310104, "learning_rate": 0.0016, "loss": 2.3035, "step": 404000 }, { "epoch": 1.5617896738878323, "grad_norm": 0.0887695848941803, "learning_rate": 0.0015995003121099844, "loss": 2.2913, "step": 404010 }, { "epoch": 1.5618283310912155, "grad_norm": 0.09368077665567398, "learning_rate": 0.0015990012468847317, "loss": 2.306, "step": 404020 }, { "epoch": 1.5618669882945988, "grad_norm": 0.09503257274627686, "learning_rate": 0.0015985028020023054, "loss": 2.3125, "step": 404030 }, { "epoch": 1.561905645497982, "grad_norm": 0.09424030780792236, "learning_rate": 0.0015980049751551644, "loss": 2.3032, "step": 404040 }, { "epoch": 1.5619443027013653, "grad_norm": 0.09154917299747467, "learning_rate": 0.001597507764050038, "loss": 2.304, "step": 404050 }, { "epoch": 1.5619829599047486, "grad_norm": 0.08909681439399719, "learning_rate": 0.0015970111664078024, "loss": 2.3203, "step": 404060 }, { "epoch": 1.5620216171081318, "grad_norm": 0.09056919813156128, "learning_rate": 0.0015965151799633597, "loss": 2.3096, "step": 404070 }, { "epoch": 1.562060274311515, "grad_norm": 0.10641703754663467, "learning_rate": 0.0015960198024655167, "loss": 2.296, "step": 404080 }, { "epoch": 1.5620989315148985, "grad_norm": 0.10420147329568863, "learning_rate": 0.0015955250316768664, "loss": 2.2943, "step": 404090 }, { "epoch": 1.5621375887182818, "grad_norm": 0.10419346392154694, "learning_rate": 0.0015950308653736682, "loss": 2.2908, "step": 404100 }, { "epoch": 1.562176245921665, "grad_norm": 0.08935526013374329, "learning_rate": 0.0015945373013457342, "loss": 2.3078, "step": 404110 }, { "epoch": 1.5622149031250483, "grad_norm": 0.10160025209188461, "learning_rate": 0.0015940443373963112, "loss": 2.3096, "step": 404120 }, { "epoch": 1.5622535603284315, "grad_norm": 0.11097364872694016, "learning_rate": 0.0015935519713419684, "loss": 2.3054, "step": 404130 }, { "epoch": 1.562292217531815, "grad_norm": 0.08406781405210495, "learning_rate": 0.0015930602010124839, "loss": 2.317, "step": 404140 }, { "epoch": 1.5623308747351983, "grad_norm": 0.09016028046607971, "learning_rate": 0.0015925690242507328, "loss": 2.3145, "step": 404150 }, { "epoch": 1.5623695319385815, "grad_norm": 0.0907144844532013, "learning_rate": 0.0015920784389125773, "loss": 2.2806, "step": 404160 }, { "epoch": 1.5624081891419648, "grad_norm": 0.09960740059614182, "learning_rate": 0.0015915884428667573, "loss": 2.2886, "step": 404170 }, { "epoch": 1.562446846345348, "grad_norm": 0.09331320971250534, "learning_rate": 0.0015910990339947826, "loss": 2.3056, "step": 404180 }, { "epoch": 1.5624855035487313, "grad_norm": 0.10006804764270782, "learning_rate": 0.0015906102101908256, "loss": 2.2976, "step": 404190 }, { "epoch": 1.5625241607521145, "grad_norm": 0.11300943791866302, "learning_rate": 0.001590121969361616, "loss": 2.3179, "step": 404200 }, { "epoch": 1.5625628179554978, "grad_norm": 0.09404987841844559, "learning_rate": 0.0015896343094263363, "loss": 2.2953, "step": 404210 }, { "epoch": 1.562601475158881, "grad_norm": 0.10447025299072266, "learning_rate": 0.0015891472283165173, "loss": 2.291, "step": 404220 }, { "epoch": 1.5626401323622643, "grad_norm": 0.09680704772472382, "learning_rate": 0.0015886607239759373, "loss": 2.2923, "step": 404230 }, { "epoch": 1.5626787895656475, "grad_norm": 0.1104864552617073, "learning_rate": 0.00158817479436052, "loss": 2.2877, "step": 404240 }, { "epoch": 1.5627174467690308, "grad_norm": 0.09455239772796631, "learning_rate": 0.001587689437438234, "loss": 2.3164, "step": 404250 }, { "epoch": 1.5627561039724143, "grad_norm": 0.09873755276203156, "learning_rate": 0.001587204651188994, "loss": 2.2793, "step": 404260 }, { "epoch": 1.5627947611757975, "grad_norm": 0.09644533693790436, "learning_rate": 0.0015867204336045635, "loss": 2.3147, "step": 404270 }, { "epoch": 1.5628334183791808, "grad_norm": 0.10555064678192139, "learning_rate": 0.0015862367826884561, "loss": 2.316, "step": 404280 }, { "epoch": 1.562872075582564, "grad_norm": 0.09051340818405151, "learning_rate": 0.0015857536964558406, "loss": 2.2949, "step": 404290 }, { "epoch": 1.5629107327859475, "grad_norm": 0.0887477844953537, "learning_rate": 0.0015852711729334456, "loss": 2.3257, "step": 404300 }, { "epoch": 1.5629493899893308, "grad_norm": 0.09938947856426239, "learning_rate": 0.001584789210159466, "loss": 2.2996, "step": 404310 }, { "epoch": 1.562988047192714, "grad_norm": 0.1027822345495224, "learning_rate": 0.0015843078061834695, "loss": 2.2955, "step": 404320 }, { "epoch": 1.5630267043960973, "grad_norm": 0.09696999937295914, "learning_rate": 0.001583826959066304, "loss": 2.3087, "step": 404330 }, { "epoch": 1.5630653615994805, "grad_norm": 0.09085328131914139, "learning_rate": 0.001583346666880007, "loss": 2.3112, "step": 404340 }, { "epoch": 1.5631040188028638, "grad_norm": 0.10693728923797607, "learning_rate": 0.0015828669277077158, "loss": 2.3, "step": 404350 }, { "epoch": 1.563142676006247, "grad_norm": 0.10152090340852737, "learning_rate": 0.001582387739643578, "loss": 2.2922, "step": 404360 }, { "epoch": 1.5631813332096303, "grad_norm": 0.11221075803041458, "learning_rate": 0.0015819091007926627, "loss": 2.2943, "step": 404370 }, { "epoch": 1.5632199904130135, "grad_norm": 0.1135498657822609, "learning_rate": 0.001581431009270873, "loss": 2.2907, "step": 404380 }, { "epoch": 1.5632586476163968, "grad_norm": 0.10113508999347687, "learning_rate": 0.0015809534632048607, "loss": 2.2966, "step": 404390 }, { "epoch": 1.56329730481978, "grad_norm": 0.09676952660083771, "learning_rate": 0.0015804764607319393, "loss": 2.3061, "step": 404400 }, { "epoch": 1.5633359620231633, "grad_norm": 0.10081905871629715, "learning_rate": 0.00158, "loss": 2.2943, "step": 404410 }, { "epoch": 1.5633746192265465, "grad_norm": 0.11143055558204651, "learning_rate": 0.0015795240791674273, "loss": 2.3154, "step": 404420 }, { "epoch": 1.56341327642993, "grad_norm": 0.10502201318740845, "learning_rate": 0.0015790486964030164, "loss": 2.3043, "step": 404430 }, { "epoch": 1.5634519336333133, "grad_norm": 0.08794165402650833, "learning_rate": 0.0015785738498858906, "loss": 2.2958, "step": 404440 }, { "epoch": 1.5634905908366965, "grad_norm": 0.09427875280380249, "learning_rate": 0.0015780995378054203, "loss": 2.2992, "step": 404450 }, { "epoch": 1.5635292480400798, "grad_norm": 0.09788183867931366, "learning_rate": 0.0015776257583611426, "loss": 2.2962, "step": 404460 }, { "epoch": 1.5635679052434632, "grad_norm": 0.11642619967460632, "learning_rate": 0.0015771525097626805, "loss": 2.3116, "step": 404470 }, { "epoch": 1.5636065624468465, "grad_norm": 0.09682761877775192, "learning_rate": 0.0015766797902296657, "loss": 2.3041, "step": 404480 }, { "epoch": 1.5636452196502297, "grad_norm": 0.08750241249799728, "learning_rate": 0.0015762075979916582, "loss": 2.3025, "step": 404490 }, { "epoch": 1.563683876853613, "grad_norm": 0.09719724208116531, "learning_rate": 0.0015757359312880714, "loss": 2.3009, "step": 404500 }, { "epoch": 1.5637225340569962, "grad_norm": 0.09916465729475021, "learning_rate": 0.001575264788368094, "loss": 2.3055, "step": 404510 }, { "epoch": 1.5637611912603795, "grad_norm": 0.09297320246696472, "learning_rate": 0.001574794167490614, "loss": 2.2828, "step": 404520 }, { "epoch": 1.5637998484637627, "grad_norm": 0.10248459875583649, "learning_rate": 0.0015743240669241448, "loss": 2.2927, "step": 404530 }, { "epoch": 1.563838505667146, "grad_norm": 0.11836646497249603, "learning_rate": 0.0015738544849467498, "loss": 2.3037, "step": 404540 }, { "epoch": 1.5638771628705292, "grad_norm": 0.10327929258346558, "learning_rate": 0.0015733854198459692, "loss": 2.2934, "step": 404550 }, { "epoch": 1.5639158200739125, "grad_norm": 0.0815177783370018, "learning_rate": 0.0015729168699187474, "loss": 2.3026, "step": 404560 }, { "epoch": 1.5639544772772958, "grad_norm": 0.10899542272090912, "learning_rate": 0.0015724488334713609, "loss": 2.3002, "step": 404570 }, { "epoch": 1.563993134480679, "grad_norm": 0.10903110355138779, "learning_rate": 0.0015719813088193463, "loss": 2.3106, "step": 404580 }, { "epoch": 1.5640317916840623, "grad_norm": 0.09604477882385254, "learning_rate": 0.001571514294287429, "loss": 2.2954, "step": 404590 }, { "epoch": 1.5640704488874457, "grad_norm": 0.09511998295783997, "learning_rate": 0.0015710477882094555, "loss": 2.2929, "step": 404600 }, { "epoch": 1.564109106090829, "grad_norm": 0.08982515335083008, "learning_rate": 0.0015705817889283223, "loss": 2.2926, "step": 404610 }, { "epoch": 1.5641477632942122, "grad_norm": 0.08804116398096085, "learning_rate": 0.0015701162947959065, "loss": 2.307, "step": 404620 }, { "epoch": 1.5641864204975955, "grad_norm": 0.0896340161561966, "learning_rate": 0.0015696513041729998, "loss": 2.3074, "step": 404630 }, { "epoch": 1.564225077700979, "grad_norm": 0.12138937413692474, "learning_rate": 0.0015691868154292397, "loss": 2.3095, "step": 404640 }, { "epoch": 1.5642637349043622, "grad_norm": 0.10645350068807602, "learning_rate": 0.0015687228269430436, "loss": 2.3073, "step": 404650 }, { "epoch": 1.5643023921077455, "grad_norm": 0.12402874231338501, "learning_rate": 0.0015682593371015418, "loss": 2.2938, "step": 404660 }, { "epoch": 1.5643410493111287, "grad_norm": 0.08726049959659576, "learning_rate": 0.0015677963443005139, "loss": 2.2876, "step": 404670 }, { "epoch": 1.564379706514512, "grad_norm": 0.11048829555511475, "learning_rate": 0.0015673338469443213, "loss": 2.2954, "step": 404680 }, { "epoch": 1.5644183637178952, "grad_norm": 0.09738937020301819, "learning_rate": 0.0015668718434458458, "loss": 2.3104, "step": 404690 }, { "epoch": 1.5644570209212785, "grad_norm": 0.09116674214601517, "learning_rate": 0.001566410332226424, "loss": 2.2856, "step": 404700 }, { "epoch": 1.5644956781246617, "grad_norm": 0.08973516523838043, "learning_rate": 0.0015659493117157859, "loss": 2.277, "step": 404710 }, { "epoch": 1.564534335328045, "grad_norm": 0.09033355116844177, "learning_rate": 0.0015654887803519912, "loss": 2.3133, "step": 404720 }, { "epoch": 1.5645729925314282, "grad_norm": 0.1006704568862915, "learning_rate": 0.001565028736581369, "loss": 2.2991, "step": 404730 }, { "epoch": 1.5646116497348115, "grad_norm": 0.1322348564863205, "learning_rate": 0.0015645691788584552, "loss": 2.3152, "step": 404740 }, { "epoch": 1.5646503069381947, "grad_norm": 0.08889655768871307, "learning_rate": 0.0015641101056459328, "loss": 2.301, "step": 404750 }, { "epoch": 1.564688964141578, "grad_norm": 0.09167850762605667, "learning_rate": 0.0015636515154145714, "loss": 2.3004, "step": 404760 }, { "epoch": 1.5647276213449615, "grad_norm": 0.09554719924926758, "learning_rate": 0.001563193406643169, "loss": 2.3018, "step": 404770 }, { "epoch": 1.5647662785483447, "grad_norm": 0.09353359788656235, "learning_rate": 0.001562735777818491, "loss": 2.2896, "step": 404780 }, { "epoch": 1.564804935751728, "grad_norm": 0.1024642139673233, "learning_rate": 0.0015622786274352143, "loss": 2.3121, "step": 404790 }, { "epoch": 1.5648435929551112, "grad_norm": 0.09727098792791367, "learning_rate": 0.001561821953995867, "loss": 2.2878, "step": 404800 }, { "epoch": 1.5648822501584947, "grad_norm": 0.09108156710863113, "learning_rate": 0.0015613657560107738, "loss": 2.2889, "step": 404810 }, { "epoch": 1.564920907361878, "grad_norm": 0.09672145545482635, "learning_rate": 0.001560910031997997, "loss": 2.2887, "step": 404820 }, { "epoch": 1.5649595645652612, "grad_norm": 0.09327162802219391, "learning_rate": 0.001560454780483282, "loss": 2.297, "step": 404830 }, { "epoch": 1.5649982217686444, "grad_norm": 0.11216502636671066, "learning_rate": 0.0015600000000000002, "loss": 2.2957, "step": 404840 }, { "epoch": 1.5650368789720277, "grad_norm": 0.09545884281396866, "learning_rate": 0.001559545689089095, "loss": 2.2991, "step": 404850 }, { "epoch": 1.565075536175411, "grad_norm": 0.10925882309675217, "learning_rate": 0.001559091846299028, "loss": 2.3099, "step": 404860 }, { "epoch": 1.5651141933787942, "grad_norm": 0.10249479860067368, "learning_rate": 0.0015586384701857217, "loss": 2.2967, "step": 404870 }, { "epoch": 1.5651528505821775, "grad_norm": 0.08673015236854553, "learning_rate": 0.0015581855593125096, "loss": 2.3004, "step": 404880 }, { "epoch": 1.5651915077855607, "grad_norm": 0.09049186110496521, "learning_rate": 0.0015577331122500805, "loss": 2.2948, "step": 404890 }, { "epoch": 1.565230164988944, "grad_norm": 0.09595070779323578, "learning_rate": 0.0015572811275764268, "loss": 2.2978, "step": 404900 }, { "epoch": 1.5652688221923272, "grad_norm": 0.09924677759408951, "learning_rate": 0.0015568296038767932, "loss": 2.2994, "step": 404910 }, { "epoch": 1.5653074793957105, "grad_norm": 0.12818053364753723, "learning_rate": 0.0015563785397436235, "loss": 2.2861, "step": 404920 }, { "epoch": 1.5653461365990937, "grad_norm": 0.09655303508043289, "learning_rate": 0.0015559279337765098, "loss": 2.308, "step": 404930 }, { "epoch": 1.5653847938024772, "grad_norm": 0.09840988367795944, "learning_rate": 0.0015554777845821425, "loss": 2.3117, "step": 404940 }, { "epoch": 1.5654234510058604, "grad_norm": 0.09422668814659119, "learning_rate": 0.0015550280907742603, "loss": 2.2959, "step": 404950 }, { "epoch": 1.5654621082092437, "grad_norm": 0.09979169815778732, "learning_rate": 0.0015545788509735983, "loss": 2.3184, "step": 404960 }, { "epoch": 1.565500765412627, "grad_norm": 0.09588532149791718, "learning_rate": 0.001554130063807841, "loss": 2.3002, "step": 404970 }, { "epoch": 1.5655394226160104, "grad_norm": 0.10011453926563263, "learning_rate": 0.001553681727911572, "loss": 2.3092, "step": 404980 }, { "epoch": 1.5655780798193937, "grad_norm": 0.09356016665697098, "learning_rate": 0.0015532338419262267, "loss": 2.2948, "step": 404990 }, { "epoch": 1.565616737022777, "grad_norm": 0.3818550109863281, "learning_rate": 0.0015527864045000422, "loss": 2.2887, "step": 405000 }, { "epoch": 1.5656553942261602, "grad_norm": 0.08383811265230179, "learning_rate": 0.0015523394142880123, "loss": 2.3076, "step": 405010 }, { "epoch": 1.5656940514295434, "grad_norm": 0.09441009163856506, "learning_rate": 0.0015518928699518385, "loss": 2.3064, "step": 405020 }, { "epoch": 1.5657327086329267, "grad_norm": 0.09960923343896866, "learning_rate": 0.001551446770159884, "loss": 2.3078, "step": 405030 }, { "epoch": 1.56577136583631, "grad_norm": 0.0911037027835846, "learning_rate": 0.001551001113587127, "loss": 2.3047, "step": 405040 }, { "epoch": 1.5658100230396932, "grad_norm": 0.08394049108028412, "learning_rate": 0.0015505558989151154, "loss": 2.3029, "step": 405050 }, { "epoch": 1.5658486802430764, "grad_norm": 0.09003666043281555, "learning_rate": 0.0015501111248319205, "loss": 2.2986, "step": 405060 }, { "epoch": 1.5658873374464597, "grad_norm": 0.1031719297170639, "learning_rate": 0.001549666790032092, "loss": 2.3044, "step": 405070 }, { "epoch": 1.565925994649843, "grad_norm": 0.10729104280471802, "learning_rate": 0.0015492228932166144, "loss": 2.2966, "step": 405080 }, { "epoch": 1.5659646518532262, "grad_norm": 0.09601952135562897, "learning_rate": 0.0015487794330928607, "loss": 2.3064, "step": 405090 }, { "epoch": 1.5660033090566094, "grad_norm": 0.09457894414663315, "learning_rate": 0.0015483364083745514, "loss": 2.2983, "step": 405100 }, { "epoch": 1.566041966259993, "grad_norm": 0.09483607113361359, "learning_rate": 0.0015478938177817074, "loss": 2.3162, "step": 405110 }, { "epoch": 1.5660806234633762, "grad_norm": 0.08963212370872498, "learning_rate": 0.0015474516600406097, "loss": 2.2948, "step": 405120 }, { "epoch": 1.5661192806667594, "grad_norm": 0.10295980423688889, "learning_rate": 0.001547009933883755, "loss": 2.2943, "step": 405130 }, { "epoch": 1.5661579378701427, "grad_norm": 0.08700409531593323, "learning_rate": 0.0015465686380498147, "loss": 2.299, "step": 405140 }, { "epoch": 1.5661965950735262, "grad_norm": 0.10050175338983536, "learning_rate": 0.0015461277712835913, "loss": 2.319, "step": 405150 }, { "epoch": 1.5662352522769094, "grad_norm": 0.09328875690698624, "learning_rate": 0.0015456873323359782, "loss": 2.2914, "step": 405160 }, { "epoch": 1.5662739094802927, "grad_norm": 0.09559834003448486, "learning_rate": 0.001545247319963917, "loss": 2.317, "step": 405170 }, { "epoch": 1.566312566683676, "grad_norm": 0.10605069994926453, "learning_rate": 0.0015448077329303582, "loss": 2.3129, "step": 405180 }, { "epoch": 1.5663512238870592, "grad_norm": 0.09179332107305527, "learning_rate": 0.0015443685700042193, "loss": 2.3062, "step": 405190 }, { "epoch": 1.5663898810904424, "grad_norm": 0.09346537292003632, "learning_rate": 0.001543929829960345, "loss": 2.2932, "step": 405200 }, { "epoch": 1.5664285382938257, "grad_norm": 0.09841105341911316, "learning_rate": 0.001543491511579467, "loss": 2.3092, "step": 405210 }, { "epoch": 1.566467195497209, "grad_norm": 0.09264381974935532, "learning_rate": 0.0015430536136481657, "loss": 2.2978, "step": 405220 }, { "epoch": 1.5665058527005922, "grad_norm": 0.10439188033342361, "learning_rate": 0.0015426161349588292, "loss": 2.3002, "step": 405230 }, { "epoch": 1.5665445099039754, "grad_norm": 0.09768210351467133, "learning_rate": 0.0015421790743096163, "loss": 2.2982, "step": 405240 }, { "epoch": 1.5665831671073587, "grad_norm": 0.11853209882974625, "learning_rate": 0.001541742430504416, "loss": 2.3093, "step": 405250 }, { "epoch": 1.566621824310742, "grad_norm": 0.10544905066490173, "learning_rate": 0.0015413062023528115, "loss": 2.291, "step": 405260 }, { "epoch": 1.5666604815141252, "grad_norm": 0.10261768102645874, "learning_rate": 0.0015408703886700401, "loss": 2.307, "step": 405270 }, { "epoch": 1.5666991387175087, "grad_norm": 0.10163643211126328, "learning_rate": 0.0015404349882769577, "loss": 2.3067, "step": 405280 }, { "epoch": 1.566737795920892, "grad_norm": 0.10879616439342499, "learning_rate": 0.0015400000000000001, "loss": 2.2961, "step": 405290 }, { "epoch": 1.5667764531242752, "grad_norm": 0.09426324814558029, "learning_rate": 0.0015395654226711467, "loss": 2.2886, "step": 405300 }, { "epoch": 1.5668151103276584, "grad_norm": 0.09058332443237305, "learning_rate": 0.0015391312551278835, "loss": 2.2944, "step": 405310 }, { "epoch": 1.5668537675310419, "grad_norm": 0.09346079081296921, "learning_rate": 0.0015386974962131683, "loss": 2.2927, "step": 405320 }, { "epoch": 1.5668924247344251, "grad_norm": 0.09960110485553741, "learning_rate": 0.0015382641447753923, "loss": 2.3021, "step": 405330 }, { "epoch": 1.5669310819378084, "grad_norm": 0.09202473610639572, "learning_rate": 0.0015378311996683462, "loss": 2.295, "step": 405340 }, { "epoch": 1.5669697391411916, "grad_norm": 0.09738468378782272, "learning_rate": 0.0015373986597511848, "loss": 2.2838, "step": 405350 }, { "epoch": 1.567008396344575, "grad_norm": 0.0873396024107933, "learning_rate": 0.001536966523888391, "loss": 2.3042, "step": 405360 }, { "epoch": 1.5670470535479581, "grad_norm": 0.0920250341296196, "learning_rate": 0.0015365347909497415, "loss": 2.2925, "step": 405370 }, { "epoch": 1.5670857107513414, "grad_norm": 0.09826283156871796, "learning_rate": 0.001536103459810272, "loss": 2.3059, "step": 405380 }, { "epoch": 1.5671243679547247, "grad_norm": 0.08695420622825623, "learning_rate": 0.001535672529350244, "loss": 2.3007, "step": 405390 }, { "epoch": 1.567163025158108, "grad_norm": 0.10699399560689926, "learning_rate": 0.00153524199845511, "loss": 2.2854, "step": 405400 }, { "epoch": 1.5672016823614912, "grad_norm": 0.10169906914234161, "learning_rate": 0.0015348118660154797, "loss": 2.2965, "step": 405410 }, { "epoch": 1.5672403395648744, "grad_norm": 0.08803411573171616, "learning_rate": 0.0015343821309270872, "loss": 2.2862, "step": 405420 }, { "epoch": 1.5672789967682577, "grad_norm": 0.11092033982276917, "learning_rate": 0.0015339527920907583, "loss": 2.2922, "step": 405430 }, { "epoch": 1.567317653971641, "grad_norm": 0.10854346305131912, "learning_rate": 0.0015335238484123761, "loss": 2.3048, "step": 405440 }, { "epoch": 1.5673563111750244, "grad_norm": 0.09341815859079361, "learning_rate": 0.00153309529880285, "loss": 2.3061, "step": 405450 }, { "epoch": 1.5673949683784076, "grad_norm": 0.0901259183883667, "learning_rate": 0.001532667142178083, "loss": 2.3048, "step": 405460 }, { "epoch": 1.567433625581791, "grad_norm": 0.09512338042259216, "learning_rate": 0.00153223937745894, "loss": 2.3011, "step": 405470 }, { "epoch": 1.5674722827851741, "grad_norm": 0.09469451010227203, "learning_rate": 0.0015318120035712152, "loss": 2.2995, "step": 405480 }, { "epoch": 1.5675109399885576, "grad_norm": 0.10558336973190308, "learning_rate": 0.0015313850194456006, "loss": 2.2988, "step": 405490 }, { "epoch": 1.5675495971919409, "grad_norm": 0.08973246067762375, "learning_rate": 0.0015309584240176572, "loss": 2.291, "step": 405500 }, { "epoch": 1.5675882543953241, "grad_norm": 0.08959843218326569, "learning_rate": 0.0015305322162277799, "loss": 2.2912, "step": 405510 }, { "epoch": 1.5676269115987074, "grad_norm": 0.12382600456476212, "learning_rate": 0.001530106395021171, "loss": 2.3053, "step": 405520 }, { "epoch": 1.5676655688020906, "grad_norm": 0.09585974365472794, "learning_rate": 0.0015296809593478061, "loss": 2.2898, "step": 405530 }, { "epoch": 1.5677042260054739, "grad_norm": 0.09502732008695602, "learning_rate": 0.001529255908162407, "loss": 2.3122, "step": 405540 }, { "epoch": 1.5677428832088571, "grad_norm": 0.0948452278971672, "learning_rate": 0.0015288312404244102, "loss": 2.2958, "step": 405550 }, { "epoch": 1.5677815404122404, "grad_norm": 0.11344671249389648, "learning_rate": 0.0015284069550979362, "loss": 2.3004, "step": 405560 }, { "epoch": 1.5678201976156236, "grad_norm": 0.09710563719272614, "learning_rate": 0.0015279830511517622, "loss": 2.2906, "step": 405570 }, { "epoch": 1.5678588548190069, "grad_norm": 0.09709560871124268, "learning_rate": 0.0015275595275592913, "loss": 2.3023, "step": 405580 }, { "epoch": 1.5678975120223901, "grad_norm": 0.11424267292022705, "learning_rate": 0.0015271363832985245, "loss": 2.2982, "step": 405590 }, { "epoch": 1.5679361692257734, "grad_norm": 0.10697871446609497, "learning_rate": 0.001526713617352031, "loss": 2.3051, "step": 405600 }, { "epoch": 1.5679748264291566, "grad_norm": 0.09038961678743362, "learning_rate": 0.0015262912287069197, "loss": 2.2992, "step": 405610 }, { "epoch": 1.5680134836325401, "grad_norm": 0.10185104608535767, "learning_rate": 0.0015258692163548121, "loss": 2.304, "step": 405620 }, { "epoch": 1.5680521408359234, "grad_norm": 0.09717609733343124, "learning_rate": 0.0015254475792918132, "loss": 2.3047, "step": 405630 }, { "epoch": 1.5680907980393066, "grad_norm": 0.09980986267328262, "learning_rate": 0.0015250263165184834, "loss": 2.2991, "step": 405640 }, { "epoch": 1.5681294552426899, "grad_norm": 0.08376102149486542, "learning_rate": 0.0015246054270398116, "loss": 2.2921, "step": 405650 }, { "epoch": 1.5681681124460733, "grad_norm": 0.10156821459531784, "learning_rate": 0.0015241849098651871, "loss": 2.3028, "step": 405660 }, { "epoch": 1.5682067696494566, "grad_norm": 0.1134498193860054, "learning_rate": 0.0015237647640083739, "loss": 2.2907, "step": 405670 }, { "epoch": 1.5682454268528399, "grad_norm": 0.0972135066986084, "learning_rate": 0.0015233449884874805, "loss": 2.301, "step": 405680 }, { "epoch": 1.568284084056223, "grad_norm": 0.09666749089956284, "learning_rate": 0.0015229255823249375, "loss": 2.2921, "step": 405690 }, { "epoch": 1.5683227412596064, "grad_norm": 0.10058042407035828, "learning_rate": 0.001522506544547467, "loss": 2.3025, "step": 405700 }, { "epoch": 1.5683613984629896, "grad_norm": 0.10395345836877823, "learning_rate": 0.0015220878741860591, "loss": 2.2992, "step": 405710 }, { "epoch": 1.5684000556663729, "grad_norm": 0.10118017345666885, "learning_rate": 0.0015216695702759439, "loss": 2.2953, "step": 405720 }, { "epoch": 1.5684387128697561, "grad_norm": 0.11035631597042084, "learning_rate": 0.001521251631856567, "loss": 2.2899, "step": 405730 }, { "epoch": 1.5684773700731394, "grad_norm": 0.1244610846042633, "learning_rate": 0.0015208340579715624, "loss": 2.2977, "step": 405740 }, { "epoch": 1.5685160272765226, "grad_norm": 0.08581045269966125, "learning_rate": 0.001520416847668728, "loss": 2.2982, "step": 405750 }, { "epoch": 1.5685546844799059, "grad_norm": 0.09502839297056198, "learning_rate": 0.00152, "loss": 2.2957, "step": 405760 }, { "epoch": 1.5685933416832891, "grad_norm": 0.08598793298006058, "learning_rate": 0.0015195835140214276, "loss": 2.2969, "step": 405770 }, { "epoch": 1.5686319988866726, "grad_norm": 0.10448497533798218, "learning_rate": 0.001519167388793148, "loss": 2.2906, "step": 405780 }, { "epoch": 1.5686706560900558, "grad_norm": 0.10341082513332367, "learning_rate": 0.0015187516233793614, "loss": 2.2806, "step": 405790 }, { "epoch": 1.568709313293439, "grad_norm": 0.08865875750780106, "learning_rate": 0.001518336216848308, "loss": 2.29, "step": 405800 }, { "epoch": 1.5687479704968224, "grad_norm": 0.09579867124557495, "learning_rate": 0.001517921168272242, "loss": 2.3151, "step": 405810 }, { "epoch": 1.5687866277002056, "grad_norm": 0.09985391795635223, "learning_rate": 0.0015175064767274074, "loss": 2.3067, "step": 405820 }, { "epoch": 1.568825284903589, "grad_norm": 0.0938592478632927, "learning_rate": 0.0015170921412940147, "loss": 2.2875, "step": 405830 }, { "epoch": 1.5688639421069723, "grad_norm": 0.09133358299732208, "learning_rate": 0.001516678161056217, "loss": 2.3052, "step": 405840 }, { "epoch": 1.5689025993103556, "grad_norm": 0.23263610899448395, "learning_rate": 0.0015162645351020872, "loss": 2.2854, "step": 405850 }, { "epoch": 1.5689412565137388, "grad_norm": 0.09544631838798523, "learning_rate": 0.0015158512625235918, "loss": 2.2988, "step": 405860 }, { "epoch": 1.568979913717122, "grad_norm": 0.0995941236615181, "learning_rate": 0.0015154383424165713, "loss": 2.2911, "step": 405870 }, { "epoch": 1.5690185709205053, "grad_norm": 0.09293455630540848, "learning_rate": 0.0015150257738807142, "loss": 2.3112, "step": 405880 }, { "epoch": 1.5690572281238886, "grad_norm": 0.10342123359441757, "learning_rate": 0.0015146135560195361, "loss": 2.307, "step": 405890 }, { "epoch": 1.5690958853272718, "grad_norm": 0.10460405796766281, "learning_rate": 0.0015142016879403552, "loss": 2.2901, "step": 405900 }, { "epoch": 1.569134542530655, "grad_norm": 0.09743546694517136, "learning_rate": 0.0015137901687542713, "loss": 2.2987, "step": 405910 }, { "epoch": 1.5691731997340383, "grad_norm": 0.11557811498641968, "learning_rate": 0.0015133789975761425, "loss": 2.3131, "step": 405920 }, { "epoch": 1.5692118569374216, "grad_norm": 0.11989576369524002, "learning_rate": 0.0015129681735245634, "loss": 2.3038, "step": 405930 }, { "epoch": 1.5692505141408049, "grad_norm": 0.09218642115592957, "learning_rate": 0.0015125576957218426, "loss": 2.2926, "step": 405940 }, { "epoch": 1.5692891713441883, "grad_norm": 0.11180219054222107, "learning_rate": 0.0015121475632939815, "loss": 2.2939, "step": 405950 }, { "epoch": 1.5693278285475716, "grad_norm": 0.09809017926454544, "learning_rate": 0.0015117377753706518, "loss": 2.2901, "step": 405960 }, { "epoch": 1.5693664857509548, "grad_norm": 0.08998557180166245, "learning_rate": 0.0015113283310851753, "loss": 2.2973, "step": 405970 }, { "epoch": 1.569405142954338, "grad_norm": 0.10271146148443222, "learning_rate": 0.0015109192295745006, "loss": 2.2886, "step": 405980 }, { "epoch": 1.5694438001577213, "grad_norm": 0.10330662876367569, "learning_rate": 0.0015105104699791835, "loss": 2.2929, "step": 405990 }, { "epoch": 1.5694824573611048, "grad_norm": 0.0977010577917099, "learning_rate": 0.0015101020514433645, "loss": 2.3121, "step": 406000 }, { "epoch": 1.569521114564488, "grad_norm": 0.12290336936712265, "learning_rate": 0.0015096939731147497, "loss": 2.2993, "step": 406010 }, { "epoch": 1.5695597717678713, "grad_norm": 0.0925399586558342, "learning_rate": 0.0015092862341445881, "loss": 2.2953, "step": 406020 }, { "epoch": 1.5695984289712546, "grad_norm": 0.09576588124036789, "learning_rate": 0.0015088788336876532, "loss": 2.2992, "step": 406030 }, { "epoch": 1.5696370861746378, "grad_norm": 0.1092677116394043, "learning_rate": 0.0015084717709022198, "loss": 2.2854, "step": 406040 }, { "epoch": 1.569675743378021, "grad_norm": 0.1083604022860527, "learning_rate": 0.0015080650449500463, "loss": 2.2948, "step": 406050 }, { "epoch": 1.5697144005814043, "grad_norm": 0.09485524892807007, "learning_rate": 0.0015076586549963532, "loss": 2.2844, "step": 406060 }, { "epoch": 1.5697530577847876, "grad_norm": 0.09845910221338272, "learning_rate": 0.0015072526002098032, "loss": 2.2903, "step": 406070 }, { "epoch": 1.5697917149881708, "grad_norm": 0.09440339356660843, "learning_rate": 0.0015068468797624819, "loss": 2.283, "step": 406080 }, { "epoch": 1.569830372191554, "grad_norm": 0.10253866016864777, "learning_rate": 0.0015064414928298772, "loss": 2.2678, "step": 406090 }, { "epoch": 1.5698690293949373, "grad_norm": 0.09150514751672745, "learning_rate": 0.0015060364385908614, "loss": 2.288, "step": 406100 }, { "epoch": 1.5699076865983206, "grad_norm": 0.10575828701257706, "learning_rate": 0.0015056317162276689, "loss": 2.2965, "step": 406110 }, { "epoch": 1.569946343801704, "grad_norm": 0.1001557931303978, "learning_rate": 0.0015052273249258807, "loss": 2.3123, "step": 406120 }, { "epoch": 1.5699850010050873, "grad_norm": 0.09956227242946625, "learning_rate": 0.0015048232638744021, "loss": 2.287, "step": 406130 }, { "epoch": 1.5700236582084706, "grad_norm": 0.0972357839345932, "learning_rate": 0.0015044195322654452, "loss": 2.2986, "step": 406140 }, { "epoch": 1.5700623154118538, "grad_norm": 0.09053231030702591, "learning_rate": 0.0015040161292945102, "loss": 2.2851, "step": 406150 }, { "epoch": 1.5701009726152373, "grad_norm": 0.0977761521935463, "learning_rate": 0.0015036130541603656, "loss": 2.3055, "step": 406160 }, { "epoch": 1.5701396298186205, "grad_norm": 0.08875074982643127, "learning_rate": 0.0015032103060650312, "loss": 2.3, "step": 406170 }, { "epoch": 1.5701782870220038, "grad_norm": 0.10786014795303345, "learning_rate": 0.001502807884213758, "loss": 2.3062, "step": 406180 }, { "epoch": 1.570216944225387, "grad_norm": 0.09021253883838654, "learning_rate": 0.001502405787815011, "loss": 2.2884, "step": 406190 }, { "epoch": 1.5702556014287703, "grad_norm": 0.1020994558930397, "learning_rate": 0.0015020040160804507, "loss": 2.3191, "step": 406200 }, { "epoch": 1.5702942586321536, "grad_norm": 0.08927156031131744, "learning_rate": 0.0015016025682249156, "loss": 2.3037, "step": 406210 }, { "epoch": 1.5703329158355368, "grad_norm": 0.09674308449029922, "learning_rate": 0.0015012014434664029, "loss": 2.2899, "step": 406220 }, { "epoch": 1.57037157303892, "grad_norm": 0.10415749251842499, "learning_rate": 0.0015008006410260526, "loss": 2.2893, "step": 406230 }, { "epoch": 1.5704102302423033, "grad_norm": 0.08874905109405518, "learning_rate": 0.001500400160128128, "loss": 2.29, "step": 406240 }, { "epoch": 1.5704488874456866, "grad_norm": 0.09588893502950668, "learning_rate": 0.0015, "loss": 2.3002, "step": 406250 }, { "epoch": 1.5704875446490698, "grad_norm": 0.10821720212697983, "learning_rate": 0.001499600159872128, "loss": 2.2989, "step": 406260 }, { "epoch": 1.570526201852453, "grad_norm": 0.095454141497612, "learning_rate": 0.0014992006389780434, "loss": 2.2943, "step": 406270 }, { "epoch": 1.5705648590558363, "grad_norm": 0.0938018187880516, "learning_rate": 0.0014988014365543334, "loss": 2.2917, "step": 406280 }, { "epoch": 1.5706035162592198, "grad_norm": 0.09858124703168869, "learning_rate": 0.0014984025518406218, "loss": 2.2879, "step": 406290 }, { "epoch": 1.570642173462603, "grad_norm": 0.10998082906007767, "learning_rate": 0.0014980039840795547, "loss": 2.297, "step": 406300 }, { "epoch": 1.5706808306659863, "grad_norm": 0.10663913190364838, "learning_rate": 0.0014976057325167811, "loss": 2.2791, "step": 406310 }, { "epoch": 1.5707194878693695, "grad_norm": 0.08741891384124756, "learning_rate": 0.0014972077964009384, "loss": 2.2934, "step": 406320 }, { "epoch": 1.570758145072753, "grad_norm": 0.09392169117927551, "learning_rate": 0.0014968101749836351, "loss": 2.3063, "step": 406330 }, { "epoch": 1.5707968022761363, "grad_norm": 0.09536357969045639, "learning_rate": 0.0014964128675194332, "loss": 2.3099, "step": 406340 }, { "epoch": 1.5708354594795195, "grad_norm": 0.09603865444660187, "learning_rate": 0.0014960158732658338, "loss": 2.3019, "step": 406350 }, { "epoch": 1.5708741166829028, "grad_norm": 0.11046824604272842, "learning_rate": 0.0014956191914832604, "loss": 2.2862, "step": 406360 }, { "epoch": 1.570912773886286, "grad_norm": 0.1022503674030304, "learning_rate": 0.0014952228214350413, "loss": 2.2962, "step": 406370 }, { "epoch": 1.5709514310896693, "grad_norm": 0.08685960620641708, "learning_rate": 0.0014948267623873966, "loss": 2.3009, "step": 406380 }, { "epoch": 1.5709900882930525, "grad_norm": 0.10193188488483429, "learning_rate": 0.0014944310136094186, "loss": 2.3034, "step": 406390 }, { "epoch": 1.5710287454964358, "grad_norm": 0.10660498589277267, "learning_rate": 0.0014940355743730594, "loss": 2.304, "step": 406400 }, { "epoch": 1.571067402699819, "grad_norm": 0.09901173412799835, "learning_rate": 0.0014936404439531135, "loss": 2.3006, "step": 406410 }, { "epoch": 1.5711060599032023, "grad_norm": 0.09949599951505661, "learning_rate": 0.001493245621627203, "loss": 2.2982, "step": 406420 }, { "epoch": 1.5711447171065855, "grad_norm": 0.09687718749046326, "learning_rate": 0.0014928511066757614, "loss": 2.2955, "step": 406430 }, { "epoch": 1.5711833743099688, "grad_norm": 0.08860356360673904, "learning_rate": 0.0014924568983820192, "loss": 2.2951, "step": 406440 }, { "epoch": 1.571222031513352, "grad_norm": 0.09513945877552032, "learning_rate": 0.0014920629960319882, "loss": 2.2821, "step": 406450 }, { "epoch": 1.5712606887167355, "grad_norm": 0.08987179398536682, "learning_rate": 0.0014916693989144468, "loss": 2.2918, "step": 406460 }, { "epoch": 1.5712993459201188, "grad_norm": 0.08767048269510269, "learning_rate": 0.001491276106320924, "loss": 2.3, "step": 406470 }, { "epoch": 1.571338003123502, "grad_norm": 0.09782981127500534, "learning_rate": 0.0014908831175456858, "loss": 2.2984, "step": 406480 }, { "epoch": 1.5713766603268853, "grad_norm": 0.1317455768585205, "learning_rate": 0.0014904904318857202, "loss": 2.2922, "step": 406490 }, { "epoch": 1.5714153175302688, "grad_norm": 0.0975729376077652, "learning_rate": 0.0014900980486407215, "loss": 2.2993, "step": 406500 }, { "epoch": 1.571453974733652, "grad_norm": 0.10746076703071594, "learning_rate": 0.0014897059671130771, "loss": 2.2949, "step": 406510 }, { "epoch": 1.5714926319370353, "grad_norm": 0.10250459611415863, "learning_rate": 0.0014893141866078518, "loss": 2.2775, "step": 406520 }, { "epoch": 1.5715312891404185, "grad_norm": 0.10624974966049194, "learning_rate": 0.0014889227064327746, "loss": 2.2846, "step": 406530 }, { "epoch": 1.5715699463438018, "grad_norm": 0.09488651156425476, "learning_rate": 0.0014885315258982234, "loss": 2.2932, "step": 406540 }, { "epoch": 1.571608603547185, "grad_norm": 0.09613313525915146, "learning_rate": 0.001488140644317211, "loss": 2.2969, "step": 406550 }, { "epoch": 1.5716472607505683, "grad_norm": 0.10298208147287369, "learning_rate": 0.0014877500610053721, "loss": 2.2823, "step": 406560 }, { "epoch": 1.5716859179539515, "grad_norm": 0.10946042090654373, "learning_rate": 0.0014873597752809483, "loss": 2.3014, "step": 406570 }, { "epoch": 1.5717245751573348, "grad_norm": 0.09858959168195724, "learning_rate": 0.0014869697864647736, "loss": 2.2867, "step": 406580 }, { "epoch": 1.571763232360718, "grad_norm": 0.10874930769205093, "learning_rate": 0.0014865800938802627, "loss": 2.3001, "step": 406590 }, { "epoch": 1.5718018895641013, "grad_norm": 0.09290513396263123, "learning_rate": 0.0014861906968533947, "loss": 2.2731, "step": 406600 }, { "epoch": 1.5718405467674845, "grad_norm": 0.09719132632017136, "learning_rate": 0.0014858015947127024, "loss": 2.2942, "step": 406610 }, { "epoch": 1.5718792039708678, "grad_norm": 0.0922248438000679, "learning_rate": 0.0014854127867892557, "loss": 2.2888, "step": 406620 }, { "epoch": 1.5719178611742513, "grad_norm": 0.10382457077503204, "learning_rate": 0.0014850242724166508, "loss": 2.2822, "step": 406630 }, { "epoch": 1.5719565183776345, "grad_norm": 0.10628269612789154, "learning_rate": 0.001484636050930995, "loss": 2.2953, "step": 406640 }, { "epoch": 1.5719951755810178, "grad_norm": 0.0957474410533905, "learning_rate": 0.001484248121670895, "loss": 2.2953, "step": 406650 }, { "epoch": 1.572033832784401, "grad_norm": 0.10400521755218506, "learning_rate": 0.0014838604839774426, "loss": 2.2836, "step": 406660 }, { "epoch": 1.5720724899877845, "grad_norm": 0.09058831632137299, "learning_rate": 0.0014834731371942019, "loss": 2.2958, "step": 406670 }, { "epoch": 1.5721111471911677, "grad_norm": 0.11464519053697586, "learning_rate": 0.0014830860806671967, "loss": 2.3005, "step": 406680 }, { "epoch": 1.572149804394551, "grad_norm": 0.09459482133388519, "learning_rate": 0.0014826993137448973, "loss": 2.2827, "step": 406690 }, { "epoch": 1.5721884615979342, "grad_norm": 0.09405256062746048, "learning_rate": 0.0014823128357782087, "loss": 2.2952, "step": 406700 }, { "epoch": 1.5722271188013175, "grad_norm": 0.09099555015563965, "learning_rate": 0.0014819266461204553, "loss": 2.2966, "step": 406710 }, { "epoch": 1.5722657760047007, "grad_norm": 0.11007675528526306, "learning_rate": 0.0014815407441273712, "loss": 2.2839, "step": 406720 }, { "epoch": 1.572304433208084, "grad_norm": 0.09731370210647583, "learning_rate": 0.0014811551291570861, "loss": 2.294, "step": 406730 }, { "epoch": 1.5723430904114672, "grad_norm": 0.10459351539611816, "learning_rate": 0.0014807698005701133, "loss": 2.2937, "step": 406740 }, { "epoch": 1.5723817476148505, "grad_norm": 0.10094854235649109, "learning_rate": 0.0014803847577293367, "loss": 2.2839, "step": 406750 }, { "epoch": 1.5724204048182338, "grad_norm": 0.0960117056965828, "learning_rate": 0.00148, "loss": 2.289, "step": 406760 }, { "epoch": 1.572459062021617, "grad_norm": 0.09705298393964767, "learning_rate": 0.0014796155267496928, "loss": 2.3085, "step": 406770 }, { "epoch": 1.5724977192250003, "grad_norm": 0.11112324148416519, "learning_rate": 0.0014792313373483386, "loss": 2.3118, "step": 406780 }, { "epoch": 1.5725363764283835, "grad_norm": 0.09867529571056366, "learning_rate": 0.0014788474311681849, "loss": 2.3043, "step": 406790 }, { "epoch": 1.572575033631767, "grad_norm": 0.09210442006587982, "learning_rate": 0.001478463807583788, "loss": 2.3029, "step": 406800 }, { "epoch": 1.5726136908351502, "grad_norm": 0.1095161885023117, "learning_rate": 0.0014780804659720044, "loss": 2.2978, "step": 406810 }, { "epoch": 1.5726523480385335, "grad_norm": 0.10121491551399231, "learning_rate": 0.001477697405711976, "loss": 2.2927, "step": 406820 }, { "epoch": 1.5726910052419167, "grad_norm": 0.0996260941028595, "learning_rate": 0.0014773146261851209, "loss": 2.3018, "step": 406830 }, { "epoch": 1.5727296624453002, "grad_norm": 0.0989765152335167, "learning_rate": 0.0014769321267751193, "loss": 2.3013, "step": 406840 }, { "epoch": 1.5727683196486835, "grad_norm": 0.08970838785171509, "learning_rate": 0.001476549906867904, "loss": 2.3049, "step": 406850 }, { "epoch": 1.5728069768520667, "grad_norm": 0.08717630058526993, "learning_rate": 0.0014761679658516482, "loss": 2.2886, "step": 406860 }, { "epoch": 1.57284563405545, "grad_norm": 0.09678196161985397, "learning_rate": 0.0014757863031167537, "loss": 2.3171, "step": 406870 }, { "epoch": 1.5728842912588332, "grad_norm": 0.09176228940486908, "learning_rate": 0.00147540491805584, "loss": 2.2973, "step": 406880 }, { "epoch": 1.5729229484622165, "grad_norm": 0.10987565666437149, "learning_rate": 0.0014750238100637325, "loss": 2.2988, "step": 406890 }, { "epoch": 1.5729616056655997, "grad_norm": 0.10397421568632126, "learning_rate": 0.0014746429785374522, "loss": 2.2854, "step": 406900 }, { "epoch": 1.573000262868983, "grad_norm": 0.08612383902072906, "learning_rate": 0.0014742624228762033, "loss": 2.2989, "step": 406910 }, { "epoch": 1.5730389200723662, "grad_norm": 0.0959596112370491, "learning_rate": 0.0014738821424813637, "loss": 2.3031, "step": 406920 }, { "epoch": 1.5730775772757495, "grad_norm": 0.11785572022199631, "learning_rate": 0.0014735021367564727, "loss": 2.2857, "step": 406930 }, { "epoch": 1.5731162344791327, "grad_norm": 0.08782157301902771, "learning_rate": 0.0014731224051072202, "loss": 2.3039, "step": 406940 }, { "epoch": 1.573154891682516, "grad_norm": 0.0940522849559784, "learning_rate": 0.0014727429469414373, "loss": 2.2877, "step": 406950 }, { "epoch": 1.5731935488858992, "grad_norm": 0.09437448531389236, "learning_rate": 0.0014723637616690832, "loss": 2.3009, "step": 406960 }, { "epoch": 1.5732322060892827, "grad_norm": 0.08396671712398529, "learning_rate": 0.0014719848487022365, "loss": 2.2748, "step": 406970 }, { "epoch": 1.573270863292666, "grad_norm": 0.10707056522369385, "learning_rate": 0.0014716062074550838, "loss": 2.2777, "step": 406980 }, { "epoch": 1.5733095204960492, "grad_norm": 0.09134868532419205, "learning_rate": 0.0014712278373439087, "loss": 2.2942, "step": 406990 }, { "epoch": 1.5733481776994325, "grad_norm": 0.0936884954571724, "learning_rate": 0.001470849737787082, "loss": 2.2889, "step": 407000 }, { "epoch": 1.573386834902816, "grad_norm": 0.11258005350828171, "learning_rate": 0.001470471908205051, "loss": 2.2963, "step": 407010 }, { "epoch": 1.5734254921061992, "grad_norm": 0.10835757851600647, "learning_rate": 0.0014700943480203293, "loss": 2.2981, "step": 407020 }, { "epoch": 1.5734641493095824, "grad_norm": 0.10515619814395905, "learning_rate": 0.001469717056657486, "loss": 2.2831, "step": 407030 }, { "epoch": 1.5735028065129657, "grad_norm": 0.1021227315068245, "learning_rate": 0.0014693400335431361, "loss": 2.2988, "step": 407040 }, { "epoch": 1.573541463716349, "grad_norm": 0.08599024266004562, "learning_rate": 0.00146896327810593, "loss": 2.2934, "step": 407050 }, { "epoch": 1.5735801209197322, "grad_norm": 0.12350116670131683, "learning_rate": 0.001468586789776543, "loss": 2.2927, "step": 407060 }, { "epoch": 1.5736187781231155, "grad_norm": 0.09155186265707016, "learning_rate": 0.0014682105679876666, "loss": 2.2795, "step": 407070 }, { "epoch": 1.5736574353264987, "grad_norm": 0.0983167216181755, "learning_rate": 0.0014678346121739972, "loss": 2.2943, "step": 407080 }, { "epoch": 1.573696092529882, "grad_norm": 0.10952533036470413, "learning_rate": 0.001467458921772226, "loss": 2.2762, "step": 407090 }, { "epoch": 1.5737347497332652, "grad_norm": 0.09205260127782822, "learning_rate": 0.0014670834962210312, "loss": 2.3022, "step": 407100 }, { "epoch": 1.5737734069366485, "grad_norm": 0.10734140127897263, "learning_rate": 0.001466708334961065, "loss": 2.3061, "step": 407110 }, { "epoch": 1.5738120641400317, "grad_norm": 0.10416164994239807, "learning_rate": 0.0014663334374349466, "loss": 2.3031, "step": 407120 }, { "epoch": 1.573850721343415, "grad_norm": 0.1047743633389473, "learning_rate": 0.0014659588030872524, "loss": 2.2928, "step": 407130 }, { "epoch": 1.5738893785467984, "grad_norm": 0.11782661080360413, "learning_rate": 0.0014655844313645045, "loss": 2.3004, "step": 407140 }, { "epoch": 1.5739280357501817, "grad_norm": 0.1171165481209755, "learning_rate": 0.0014652103217151625, "loss": 2.2923, "step": 407150 }, { "epoch": 1.573966692953565, "grad_norm": 0.09023956954479218, "learning_rate": 0.0014648364735896139, "loss": 2.2792, "step": 407160 }, { "epoch": 1.5740053501569482, "grad_norm": 0.10098619759082794, "learning_rate": 0.0014644628864401645, "loss": 2.3014, "step": 407170 }, { "epoch": 1.5740440073603317, "grad_norm": 0.1009693369269371, "learning_rate": 0.0014640895597210295, "loss": 2.2983, "step": 407180 }, { "epoch": 1.574082664563715, "grad_norm": 0.1049528419971466, "learning_rate": 0.0014637164928883231, "loss": 2.2988, "step": 407190 }, { "epoch": 1.5741213217670982, "grad_norm": 0.09989991039037704, "learning_rate": 0.0014633436854000505, "loss": 2.2972, "step": 407200 }, { "epoch": 1.5741599789704814, "grad_norm": 0.1667187511920929, "learning_rate": 0.001462971136716098, "loss": 2.2917, "step": 407210 }, { "epoch": 1.5741986361738647, "grad_norm": 0.09764105081558228, "learning_rate": 0.0014625988462982238, "loss": 2.2825, "step": 407220 }, { "epoch": 1.574237293377248, "grad_norm": 0.10742088407278061, "learning_rate": 0.0014622268136100498, "loss": 2.2903, "step": 407230 }, { "epoch": 1.5742759505806312, "grad_norm": 0.09315811842679977, "learning_rate": 0.0014618550381170516, "loss": 2.2988, "step": 407240 }, { "epoch": 1.5743146077840144, "grad_norm": 0.10519949346780777, "learning_rate": 0.0014614835192865497, "loss": 2.2998, "step": 407250 }, { "epoch": 1.5743532649873977, "grad_norm": 0.09778866916894913, "learning_rate": 0.0014611122565877008, "loss": 2.2996, "step": 407260 }, { "epoch": 1.574391922190781, "grad_norm": 0.09602082520723343, "learning_rate": 0.0014607412494914893, "loss": 2.2962, "step": 407270 }, { "epoch": 1.5744305793941642, "grad_norm": 0.09230657666921616, "learning_rate": 0.0014603704974707184, "loss": 2.2843, "step": 407280 }, { "epoch": 1.5744692365975475, "grad_norm": 0.09628087282180786, "learning_rate": 0.00146, "loss": 2.3029, "step": 407290 }, { "epoch": 1.5745078938009307, "grad_norm": 0.08596788346767426, "learning_rate": 0.0014596297565557484, "loss": 2.2843, "step": 407300 }, { "epoch": 1.5745465510043142, "grad_norm": 0.0886722058057785, "learning_rate": 0.001459259766616169, "loss": 2.287, "step": 407310 }, { "epoch": 1.5745852082076974, "grad_norm": 0.10400716960430145, "learning_rate": 0.0014588900296612529, "loss": 2.2906, "step": 407320 }, { "epoch": 1.5746238654110807, "grad_norm": 0.10699665546417236, "learning_rate": 0.0014585205451727648, "loss": 2.2916, "step": 407330 }, { "epoch": 1.574662522614464, "grad_norm": 0.08873599767684937, "learning_rate": 0.0014581513126342375, "loss": 2.2822, "step": 407340 }, { "epoch": 1.5747011798178474, "grad_norm": 0.092472143471241, "learning_rate": 0.0014577823315309617, "loss": 2.2867, "step": 407350 }, { "epoch": 1.5747398370212307, "grad_norm": 0.09492716938257217, "learning_rate": 0.0014574136013499786, "loss": 2.2963, "step": 407360 }, { "epoch": 1.574778494224614, "grad_norm": 0.111127570271492, "learning_rate": 0.0014570451215800708, "loss": 2.3091, "step": 407370 }, { "epoch": 1.5748171514279972, "grad_norm": 0.09847905486822128, "learning_rate": 0.001456676891711755, "loss": 2.2934, "step": 407380 }, { "epoch": 1.5748558086313804, "grad_norm": 0.10108734667301178, "learning_rate": 0.0014563089112372726, "loss": 2.2872, "step": 407390 }, { "epoch": 1.5748944658347637, "grad_norm": 0.10206575691699982, "learning_rate": 0.001455941179650582, "loss": 2.2894, "step": 407400 }, { "epoch": 1.574933123038147, "grad_norm": 0.09543667733669281, "learning_rate": 0.001455573696447352, "loss": 2.2996, "step": 407410 }, { "epoch": 1.5749717802415302, "grad_norm": 0.09524298459291458, "learning_rate": 0.0014552064611249506, "loss": 2.2928, "step": 407420 }, { "epoch": 1.5750104374449134, "grad_norm": 0.10034104436635971, "learning_rate": 0.0014548394731824397, "loss": 2.2875, "step": 407430 }, { "epoch": 1.5750490946482967, "grad_norm": 0.25245344638824463, "learning_rate": 0.001454472732120566, "loss": 2.2905, "step": 407440 }, { "epoch": 1.57508775185168, "grad_norm": 0.10932547599077225, "learning_rate": 0.0014541062374417528, "loss": 2.2833, "step": 407450 }, { "epoch": 1.5751264090550632, "grad_norm": 0.10273553431034088, "learning_rate": 0.0014537399886500935, "loss": 2.298, "step": 407460 }, { "epoch": 1.5751650662584464, "grad_norm": 0.09069519490003586, "learning_rate": 0.001453373985251342, "loss": 2.3058, "step": 407470 }, { "epoch": 1.57520372346183, "grad_norm": 0.10794001072645187, "learning_rate": 0.0014530082267529064, "loss": 2.2977, "step": 407480 }, { "epoch": 1.5752423806652132, "grad_norm": 0.08729327470064163, "learning_rate": 0.0014526427126638397, "loss": 2.2933, "step": 407490 }, { "epoch": 1.5752810378685964, "grad_norm": 0.10574730485677719, "learning_rate": 0.001452277442494834, "loss": 2.2885, "step": 407500 }, { "epoch": 1.5753196950719797, "grad_norm": 0.09718071669340134, "learning_rate": 0.0014519124157582111, "loss": 2.2903, "step": 407510 }, { "epoch": 1.5753583522753631, "grad_norm": 0.09844125062227249, "learning_rate": 0.0014515476319679164, "loss": 2.2993, "step": 407520 }, { "epoch": 1.5753970094787464, "grad_norm": 0.095311738550663, "learning_rate": 0.0014511830906395102, "loss": 2.2791, "step": 407530 }, { "epoch": 1.5754356666821296, "grad_norm": 0.10125110298395157, "learning_rate": 0.001450818791290161, "loss": 2.2881, "step": 407540 }, { "epoch": 1.575474323885513, "grad_norm": 0.10146543383598328, "learning_rate": 0.0014504547334386368, "loss": 2.2922, "step": 407550 }, { "epoch": 1.5755129810888961, "grad_norm": 0.46111249923706055, "learning_rate": 0.001450090916605299, "loss": 2.2808, "step": 407560 }, { "epoch": 1.5755516382922794, "grad_norm": 0.09353338181972504, "learning_rate": 0.0014497273403120958, "loss": 2.2934, "step": 407570 }, { "epoch": 1.5755902954956627, "grad_norm": 0.1222909465432167, "learning_rate": 0.0014493640040825516, "loss": 2.312, "step": 407580 }, { "epoch": 1.575628952699046, "grad_norm": 0.11227816343307495, "learning_rate": 0.0014490009074417636, "loss": 2.2977, "step": 407590 }, { "epoch": 1.5756676099024292, "grad_norm": 0.10007626563310623, "learning_rate": 0.001448638049916391, "loss": 2.287, "step": 407600 }, { "epoch": 1.5757062671058124, "grad_norm": 0.09337951242923737, "learning_rate": 0.0014482754310346513, "loss": 2.2769, "step": 407610 }, { "epoch": 1.5757449243091957, "grad_norm": 0.10422071069478989, "learning_rate": 0.0014479130503263096, "loss": 2.2881, "step": 407620 }, { "epoch": 1.575783581512579, "grad_norm": 0.11618789285421371, "learning_rate": 0.0014475509073226746, "loss": 2.298, "step": 407630 }, { "epoch": 1.5758222387159624, "grad_norm": 0.09515954554080963, "learning_rate": 0.00144718900155659, "loss": 2.301, "step": 407640 }, { "epoch": 1.5758608959193456, "grad_norm": 0.10632378607988358, "learning_rate": 0.0014468273325624267, "loss": 2.2911, "step": 407650 }, { "epoch": 1.575899553122729, "grad_norm": 0.09790574014186859, "learning_rate": 0.001446465899876078, "loss": 2.2898, "step": 407660 }, { "epoch": 1.5759382103261121, "grad_norm": 0.115874744951725, "learning_rate": 0.0014461047030349507, "loss": 2.2863, "step": 407670 }, { "epoch": 1.5759768675294954, "grad_norm": 0.1001160591840744, "learning_rate": 0.0014457437415779594, "loss": 2.3023, "step": 407680 }, { "epoch": 1.5760155247328789, "grad_norm": 0.2014889270067215, "learning_rate": 0.0014453830150455182, "loss": 2.2921, "step": 407690 }, { "epoch": 1.5760541819362621, "grad_norm": 0.08689361810684204, "learning_rate": 0.0014450225229795358, "loss": 2.2836, "step": 407700 }, { "epoch": 1.5760928391396454, "grad_norm": 0.10067226737737656, "learning_rate": 0.0014446622649234074, "loss": 2.2921, "step": 407710 }, { "epoch": 1.5761314963430286, "grad_norm": 0.10704955458641052, "learning_rate": 0.0014443022404220078, "loss": 2.288, "step": 407720 }, { "epoch": 1.5761701535464119, "grad_norm": 0.09658665955066681, "learning_rate": 0.0014439424490216863, "loss": 2.2724, "step": 407730 }, { "epoch": 1.5762088107497951, "grad_norm": 0.11580196022987366, "learning_rate": 0.0014435828902702576, "loss": 2.2839, "step": 407740 }, { "epoch": 1.5762474679531784, "grad_norm": 0.09257831424474716, "learning_rate": 0.0014432235637169978, "loss": 2.2959, "step": 407750 }, { "epoch": 1.5762861251565616, "grad_norm": 0.0986739918589592, "learning_rate": 0.0014428644689126351, "loss": 2.2858, "step": 407760 }, { "epoch": 1.5763247823599449, "grad_norm": 0.10795556008815765, "learning_rate": 0.0014425056054093459, "loss": 2.3015, "step": 407770 }, { "epoch": 1.5763634395633281, "grad_norm": 0.10407692193984985, "learning_rate": 0.001442146972760746, "loss": 2.3058, "step": 407780 }, { "epoch": 1.5764020967667114, "grad_norm": 0.09564802050590515, "learning_rate": 0.0014417885705218856, "loss": 2.2994, "step": 407790 }, { "epoch": 1.5764407539700946, "grad_norm": 0.11026576906442642, "learning_rate": 0.0014414303982492424, "loss": 2.2939, "step": 407800 }, { "epoch": 1.5764794111734781, "grad_norm": 0.09176908433437347, "learning_rate": 0.0014410724555007151, "loss": 2.2954, "step": 407810 }, { "epoch": 1.5765180683768614, "grad_norm": 0.10155240446329117, "learning_rate": 0.0014407147418356175, "loss": 2.275, "step": 407820 }, { "epoch": 1.5765567255802446, "grad_norm": 0.10282189399003983, "learning_rate": 0.001440357256814671, "loss": 2.2997, "step": 407830 }, { "epoch": 1.5765953827836279, "grad_norm": 0.12512604892253876, "learning_rate": 0.0014399999999999999, "loss": 2.2979, "step": 407840 }, { "epoch": 1.5766340399870111, "grad_norm": 0.09527765214443207, "learning_rate": 0.001439642970955124, "loss": 2.2951, "step": 407850 }, { "epoch": 1.5766726971903946, "grad_norm": 0.09358199685811996, "learning_rate": 0.0014392861692449526, "loss": 2.3014, "step": 407860 }, { "epoch": 1.5767113543937779, "grad_norm": 0.1274263858795166, "learning_rate": 0.0014389295944357786, "loss": 2.2863, "step": 407870 }, { "epoch": 1.576750011597161, "grad_norm": 0.09673239290714264, "learning_rate": 0.001438573246095272, "loss": 2.2956, "step": 407880 }, { "epoch": 1.5767886688005444, "grad_norm": 0.09876884520053864, "learning_rate": 0.0014382171237924744, "loss": 2.2907, "step": 407890 }, { "epoch": 1.5768273260039276, "grad_norm": 0.09205851703882217, "learning_rate": 0.0014378612270977923, "loss": 2.2921, "step": 407900 }, { "epoch": 1.5768659832073109, "grad_norm": 0.09754933416843414, "learning_rate": 0.0014375055555829906, "loss": 2.298, "step": 407910 }, { "epoch": 1.5769046404106941, "grad_norm": 0.08937043696641922, "learning_rate": 0.0014371501088211885, "loss": 2.2888, "step": 407920 }, { "epoch": 1.5769432976140774, "grad_norm": 0.09569250047206879, "learning_rate": 0.001436794886386851, "loss": 2.286, "step": 407930 }, { "epoch": 1.5769819548174606, "grad_norm": 0.0856354683637619, "learning_rate": 0.001436439887855785, "loss": 2.2919, "step": 407940 }, { "epoch": 1.5770206120208439, "grad_norm": 0.10027945786714554, "learning_rate": 0.0014360851128051326, "loss": 2.2967, "step": 407950 }, { "epoch": 1.5770592692242271, "grad_norm": 0.0965295284986496, "learning_rate": 0.0014357305608133646, "loss": 2.2972, "step": 407960 }, { "epoch": 1.5770979264276104, "grad_norm": 0.10624401271343231, "learning_rate": 0.0014353762314602758, "loss": 2.2902, "step": 407970 }, { "epoch": 1.5771365836309938, "grad_norm": 0.1368337869644165, "learning_rate": 0.0014350221243269786, "loss": 2.2711, "step": 407980 }, { "epoch": 1.577175240834377, "grad_norm": 0.09370030462741852, "learning_rate": 0.0014346682389958971, "loss": 2.3014, "step": 407990 }, { "epoch": 1.5772138980377604, "grad_norm": 0.09466104954481125, "learning_rate": 0.0014343145750507621, "loss": 2.2876, "step": 408000 }, { "epoch": 1.5772525552411436, "grad_norm": 0.08923348039388657, "learning_rate": 0.0014339611320766039, "loss": 2.3028, "step": 408010 }, { "epoch": 1.5772912124445269, "grad_norm": 0.093915194272995, "learning_rate": 0.0014336079096597483, "loss": 2.3049, "step": 408020 }, { "epoch": 1.5773298696479103, "grad_norm": 0.09254784882068634, "learning_rate": 0.0014332549073878098, "loss": 2.2877, "step": 408030 }, { "epoch": 1.5773685268512936, "grad_norm": 0.09824127703905106, "learning_rate": 0.001432902124849687, "loss": 2.3099, "step": 408040 }, { "epoch": 1.5774071840546768, "grad_norm": 0.10029337555170059, "learning_rate": 0.0014325495616355557, "loss": 2.2993, "step": 408050 }, { "epoch": 1.57744584125806, "grad_norm": 0.11360572278499603, "learning_rate": 0.0014321972173368645, "loss": 2.3052, "step": 408060 }, { "epoch": 1.5774844984614433, "grad_norm": 0.10877586901187897, "learning_rate": 0.0014318450915463284, "loss": 2.3072, "step": 408070 }, { "epoch": 1.5775231556648266, "grad_norm": 0.11041352897882462, "learning_rate": 0.0014314931838579242, "loss": 2.288, "step": 408080 }, { "epoch": 1.5775618128682098, "grad_norm": 0.10026522725820541, "learning_rate": 0.0014311414938668846, "loss": 2.2958, "step": 408090 }, { "epoch": 1.577600470071593, "grad_norm": 0.09852489084005356, "learning_rate": 0.0014307900211696918, "loss": 2.298, "step": 408100 }, { "epoch": 1.5776391272749763, "grad_norm": 0.08970458805561066, "learning_rate": 0.0014304387653640744, "loss": 2.2928, "step": 408110 }, { "epoch": 1.5776777844783596, "grad_norm": 0.11003737896680832, "learning_rate": 0.0014300877260489998, "loss": 2.302, "step": 408120 }, { "epoch": 1.5777164416817429, "grad_norm": 0.11057864129543304, "learning_rate": 0.0014297369028246699, "loss": 2.294, "step": 408130 }, { "epoch": 1.577755098885126, "grad_norm": 0.0905907079577446, "learning_rate": 0.0014293862952925159, "loss": 2.2921, "step": 408140 }, { "epoch": 1.5777937560885096, "grad_norm": 0.09885738790035248, "learning_rate": 0.0014290359030551922, "loss": 2.3063, "step": 408150 }, { "epoch": 1.5778324132918928, "grad_norm": 0.10116096585988998, "learning_rate": 0.001428685725716572, "loss": 2.2841, "step": 408160 }, { "epoch": 1.577871070495276, "grad_norm": 0.0916747897863388, "learning_rate": 0.001428335762881742, "loss": 2.2867, "step": 408170 }, { "epoch": 1.5779097276986593, "grad_norm": 0.09478262811899185, "learning_rate": 0.0014279860141569963, "loss": 2.2881, "step": 408180 }, { "epoch": 1.5779483849020428, "grad_norm": 0.11914552003145218, "learning_rate": 0.0014276364791498326, "loss": 2.2943, "step": 408190 }, { "epoch": 1.577987042105426, "grad_norm": 0.10792306065559387, "learning_rate": 0.0014272871574689458, "loss": 2.2968, "step": 408200 }, { "epoch": 1.5780256993088093, "grad_norm": 0.0978836938738823, "learning_rate": 0.0014269380487242239, "loss": 2.3056, "step": 408210 }, { "epoch": 1.5780643565121926, "grad_norm": 0.08757525682449341, "learning_rate": 0.0014265891525267421, "loss": 2.3039, "step": 408220 }, { "epoch": 1.5781030137155758, "grad_norm": 0.12824532389640808, "learning_rate": 0.001426240468488758, "loss": 2.2852, "step": 408230 }, { "epoch": 1.578141670918959, "grad_norm": 0.11376450955867767, "learning_rate": 0.0014258919962237071, "loss": 2.2895, "step": 408240 }, { "epoch": 1.5781803281223423, "grad_norm": 0.09492777287960052, "learning_rate": 0.0014255437353461972, "loss": 2.2876, "step": 408250 }, { "epoch": 1.5782189853257256, "grad_norm": 0.09079192578792572, "learning_rate": 0.0014251956854720034, "loss": 2.2903, "step": 408260 }, { "epoch": 1.5782576425291088, "grad_norm": 0.10566417127847672, "learning_rate": 0.0014248478462180639, "loss": 2.2921, "step": 408270 }, { "epoch": 1.578296299732492, "grad_norm": 0.09935545921325684, "learning_rate": 0.0014245002172024737, "loss": 2.2963, "step": 408280 }, { "epoch": 1.5783349569358753, "grad_norm": 0.1073034405708313, "learning_rate": 0.0014241527980444813, "loss": 2.3041, "step": 408290 }, { "epoch": 1.5783736141392586, "grad_norm": 0.10224999487400055, "learning_rate": 0.0014238055883644828, "loss": 2.2713, "step": 408300 }, { "epoch": 1.5784122713426418, "grad_norm": 0.11747825890779495, "learning_rate": 0.0014234585877840173, "loss": 2.2992, "step": 408310 }, { "epoch": 1.5784509285460253, "grad_norm": 0.09979795664548874, "learning_rate": 0.0014231117959257618, "loss": 2.2943, "step": 408320 }, { "epoch": 1.5784895857494086, "grad_norm": 0.084749236702919, "learning_rate": 0.0014227652124135276, "loss": 2.3049, "step": 408330 }, { "epoch": 1.5785282429527918, "grad_norm": 0.10331593453884125, "learning_rate": 0.001422418836872254, "loss": 2.2902, "step": 408340 }, { "epoch": 1.578566900156175, "grad_norm": 0.1007453128695488, "learning_rate": 0.0014220726689280045, "loss": 2.2821, "step": 408350 }, { "epoch": 1.5786055573595585, "grad_norm": 0.09320829808712006, "learning_rate": 0.0014217267082079618, "loss": 2.2779, "step": 408360 }, { "epoch": 1.5786442145629418, "grad_norm": 0.09189892560243607, "learning_rate": 0.0014213809543404227, "loss": 2.2908, "step": 408370 }, { "epoch": 1.578682871766325, "grad_norm": 0.11019908636808395, "learning_rate": 0.0014210354069547951, "loss": 2.2777, "step": 408380 }, { "epoch": 1.5787215289697083, "grad_norm": 0.10784642398357391, "learning_rate": 0.0014206900656815904, "loss": 2.2997, "step": 408390 }, { "epoch": 1.5787601861730916, "grad_norm": 0.10673866420984268, "learning_rate": 0.0014203449301524226, "loss": 2.284, "step": 408400 }, { "epoch": 1.5787988433764748, "grad_norm": 0.10720127075910568, "learning_rate": 0.00142, "loss": 2.2901, "step": 408410 }, { "epoch": 1.578837500579858, "grad_norm": 0.09876653552055359, "learning_rate": 0.0014196552748581238, "loss": 2.2898, "step": 408420 }, { "epoch": 1.5788761577832413, "grad_norm": 0.11836722493171692, "learning_rate": 0.001419310754361681, "loss": 2.2762, "step": 408430 }, { "epoch": 1.5789148149866246, "grad_norm": 0.10221022367477417, "learning_rate": 0.001418966438146642, "loss": 2.309, "step": 408440 }, { "epoch": 1.5789534721900078, "grad_norm": 0.11221145838499069, "learning_rate": 0.0014186223258500546, "loss": 2.2921, "step": 408450 }, { "epoch": 1.578992129393391, "grad_norm": 0.10702753812074661, "learning_rate": 0.0014182784171100408, "loss": 2.2923, "step": 408460 }, { "epoch": 1.5790307865967743, "grad_norm": 0.09933892637491226, "learning_rate": 0.00141793471156579, "loss": 2.2882, "step": 408470 }, { "epoch": 1.5790694438001576, "grad_norm": 0.09902329742908478, "learning_rate": 0.0014175912088575586, "loss": 2.2871, "step": 408480 }, { "epoch": 1.579108101003541, "grad_norm": 0.10397355258464813, "learning_rate": 0.0014172479086266615, "loss": 2.2816, "step": 408490 }, { "epoch": 1.5791467582069243, "grad_norm": 0.10957317054271698, "learning_rate": 0.0014169048105154701, "loss": 2.2812, "step": 408500 }, { "epoch": 1.5791854154103075, "grad_norm": 0.1058499738574028, "learning_rate": 0.0014165619141674073, "loss": 2.2871, "step": 408510 }, { "epoch": 1.5792240726136908, "grad_norm": 0.11140215396881104, "learning_rate": 0.0014162192192269433, "loss": 2.277, "step": 408520 }, { "epoch": 1.5792627298170743, "grad_norm": 0.0999557301402092, "learning_rate": 0.0014158767253395906, "loss": 2.283, "step": 408530 }, { "epoch": 1.5793013870204575, "grad_norm": 0.09867963194847107, "learning_rate": 0.0014155344321519017, "loss": 2.2882, "step": 408540 }, { "epoch": 1.5793400442238408, "grad_norm": 0.09761340171098709, "learning_rate": 0.0014151923393114624, "loss": 2.2788, "step": 408550 }, { "epoch": 1.579378701427224, "grad_norm": 0.10846646130084991, "learning_rate": 0.0014148504464668882, "loss": 2.2889, "step": 408560 }, { "epoch": 1.5794173586306073, "grad_norm": 0.1139591857790947, "learning_rate": 0.0014145087532678221, "loss": 2.292, "step": 408570 }, { "epoch": 1.5794560158339905, "grad_norm": 0.0976622998714447, "learning_rate": 0.0014141672593649278, "loss": 2.2901, "step": 408580 }, { "epoch": 1.5794946730373738, "grad_norm": 0.08976734429597855, "learning_rate": 0.0014138259644098862, "loss": 2.2818, "step": 408590 }, { "epoch": 1.579533330240757, "grad_norm": 0.09310808777809143, "learning_rate": 0.001413484868055393, "loss": 2.2754, "step": 408600 }, { "epoch": 1.5795719874441403, "grad_norm": 0.09846369922161102, "learning_rate": 0.0014131439699551515, "loss": 2.2906, "step": 408610 }, { "epoch": 1.5796106446475235, "grad_norm": 0.08984551578760147, "learning_rate": 0.001412803269763872, "loss": 2.2803, "step": 408620 }, { "epoch": 1.5796493018509068, "grad_norm": 0.10137151181697845, "learning_rate": 0.0014124627671372647, "loss": 2.3008, "step": 408630 }, { "epoch": 1.57968795905429, "grad_norm": 0.11374299973249435, "learning_rate": 0.0014121224617320372, "loss": 2.2979, "step": 408640 }, { "epoch": 1.5797266162576733, "grad_norm": 0.08813643455505371, "learning_rate": 0.0014117823532058904, "loss": 2.3102, "step": 408650 }, { "epoch": 1.5797652734610568, "grad_norm": 0.0898527130484581, "learning_rate": 0.0014114424412175136, "loss": 2.2905, "step": 408660 }, { "epoch": 1.57980393066444, "grad_norm": 0.10921207070350647, "learning_rate": 0.0014111027254265819, "loss": 2.2815, "step": 408670 }, { "epoch": 1.5798425878678233, "grad_norm": 0.09411787986755371, "learning_rate": 0.0014107632054937507, "loss": 2.2744, "step": 408680 }, { "epoch": 1.5798812450712065, "grad_norm": 0.09054916352033615, "learning_rate": 0.0014104238810806532, "loss": 2.2873, "step": 408690 }, { "epoch": 1.57991990227459, "grad_norm": 0.1259244829416275, "learning_rate": 0.001410084751849895, "loss": 2.2749, "step": 408700 }, { "epoch": 1.5799585594779733, "grad_norm": 0.10252106934785843, "learning_rate": 0.0014097458174650516, "loss": 2.2847, "step": 408710 }, { "epoch": 1.5799972166813565, "grad_norm": 0.12871307134628296, "learning_rate": 0.001409407077590664, "loss": 2.2955, "step": 408720 }, { "epoch": 1.5800358738847398, "grad_norm": 0.10662032663822174, "learning_rate": 0.001409068531892234, "loss": 2.284, "step": 408730 }, { "epoch": 1.580074531088123, "grad_norm": 0.09653251618146896, "learning_rate": 0.0014087301800362207, "loss": 2.277, "step": 408740 }, { "epoch": 1.5801131882915063, "grad_norm": 0.10052668303251266, "learning_rate": 0.0014083920216900383, "loss": 2.2755, "step": 408750 }, { "epoch": 1.5801518454948895, "grad_norm": 0.11152070760726929, "learning_rate": 0.0014080540565220502, "loss": 2.2881, "step": 408760 }, { "epoch": 1.5801905026982728, "grad_norm": 0.11330562084913254, "learning_rate": 0.001407716284201566, "loss": 2.3062, "step": 408770 }, { "epoch": 1.580229159901656, "grad_norm": 0.09445352852344513, "learning_rate": 0.001407378704398838, "loss": 2.2682, "step": 408780 }, { "epoch": 1.5802678171050393, "grad_norm": 0.09261759370565414, "learning_rate": 0.0014070413167850565, "loss": 2.293, "step": 408790 }, { "epoch": 1.5803064743084225, "grad_norm": 0.1160760149359703, "learning_rate": 0.001406704121032347, "loss": 2.3078, "step": 408800 }, { "epoch": 1.5803451315118058, "grad_norm": 0.093442402780056, "learning_rate": 0.001406367116813767, "loss": 2.284, "step": 408810 }, { "epoch": 1.580383788715189, "grad_norm": 0.09904216229915619, "learning_rate": 0.0014060303038033002, "loss": 2.2989, "step": 408820 }, { "epoch": 1.5804224459185725, "grad_norm": 0.09945961833000183, "learning_rate": 0.001405693681675855, "loss": 2.2893, "step": 408830 }, { "epoch": 1.5804611031219558, "grad_norm": 0.09675027430057526, "learning_rate": 0.0014053572501072599, "loss": 2.2987, "step": 408840 }, { "epoch": 1.580499760325339, "grad_norm": 0.09257011860609055, "learning_rate": 0.0014050210087742593, "loss": 2.2906, "step": 408850 }, { "epoch": 1.5805384175287223, "grad_norm": 0.08864189684391022, "learning_rate": 0.0014046849573545112, "loss": 2.279, "step": 408860 }, { "epoch": 1.5805770747321057, "grad_norm": 0.0994875431060791, "learning_rate": 0.001404349095526583, "loss": 2.2808, "step": 408870 }, { "epoch": 1.580615731935489, "grad_norm": 0.09842891246080399, "learning_rate": 0.0014040134229699464, "loss": 2.2925, "step": 408880 }, { "epoch": 1.5806543891388722, "grad_norm": 0.10454820096492767, "learning_rate": 0.001403677939364977, "loss": 2.2945, "step": 408890 }, { "epoch": 1.5806930463422555, "grad_norm": 0.09167510271072388, "learning_rate": 0.0014033426443929482, "loss": 2.2758, "step": 408900 }, { "epoch": 1.5807317035456387, "grad_norm": 0.09263806790113449, "learning_rate": 0.001403007537736028, "loss": 2.2821, "step": 408910 }, { "epoch": 1.580770360749022, "grad_norm": 0.09206433594226837, "learning_rate": 0.0014026726190772768, "loss": 2.2835, "step": 408920 }, { "epoch": 1.5808090179524052, "grad_norm": 0.10548428446054459, "learning_rate": 0.0014023378881006425, "loss": 2.2998, "step": 408930 }, { "epoch": 1.5808476751557885, "grad_norm": 0.10245232284069061, "learning_rate": 0.0014020033444909579, "loss": 2.2964, "step": 408940 }, { "epoch": 1.5808863323591718, "grad_norm": 0.09368015080690384, "learning_rate": 0.0014016689879339363, "loss": 2.2897, "step": 408950 }, { "epoch": 1.580924989562555, "grad_norm": 0.10228240489959717, "learning_rate": 0.0014013348181161694, "loss": 2.2901, "step": 408960 }, { "epoch": 1.5809636467659383, "grad_norm": 0.08766623586416245, "learning_rate": 0.0014010008347251227, "loss": 2.2727, "step": 408970 }, { "epoch": 1.5810023039693215, "grad_norm": 0.11325518786907196, "learning_rate": 0.001400667037449132, "loss": 2.2948, "step": 408980 }, { "epoch": 1.5810409611727048, "grad_norm": 0.0871184691786766, "learning_rate": 0.0014003334259774021, "loss": 2.2982, "step": 408990 }, { "epoch": 1.5810796183760882, "grad_norm": 0.17203271389007568, "learning_rate": 0.0014, "loss": 2.2945, "step": 409000 }, { "epoch": 1.5811182755794715, "grad_norm": 0.11297266185283661, "learning_rate": 0.0013996667592078547, "loss": 2.2977, "step": 409010 }, { "epoch": 1.5811569327828547, "grad_norm": 0.11587999761104584, "learning_rate": 0.0013993337032927517, "loss": 2.294, "step": 409020 }, { "epoch": 1.581195589986238, "grad_norm": 0.10281231999397278, "learning_rate": 0.001399000831947331, "loss": 2.2813, "step": 409030 }, { "epoch": 1.5812342471896215, "grad_norm": 0.10410250723361969, "learning_rate": 0.0013986681448650837, "loss": 2.2818, "step": 409040 }, { "epoch": 1.5812729043930047, "grad_norm": 0.09489083290100098, "learning_rate": 0.001398335641740347, "loss": 2.2981, "step": 409050 }, { "epoch": 1.581311561596388, "grad_norm": 0.09412014484405518, "learning_rate": 0.0013980033222683037, "loss": 2.2875, "step": 409060 }, { "epoch": 1.5813502187997712, "grad_norm": 0.11623632162809372, "learning_rate": 0.0013976711861449762, "loss": 2.2739, "step": 409070 }, { "epoch": 1.5813888760031545, "grad_norm": 0.09715640544891357, "learning_rate": 0.0013973392330672255, "loss": 2.2781, "step": 409080 }, { "epoch": 1.5814275332065377, "grad_norm": 0.09456169605255127, "learning_rate": 0.0013970074627327467, "loss": 2.2757, "step": 409090 }, { "epoch": 1.581466190409921, "grad_norm": 0.11020629107952118, "learning_rate": 0.0013966758748400658, "loss": 2.2932, "step": 409100 }, { "epoch": 1.5815048476133042, "grad_norm": 0.11536996811628342, "learning_rate": 0.0013963444690885374, "loss": 2.291, "step": 409110 }, { "epoch": 1.5815435048166875, "grad_norm": 0.1016986295580864, "learning_rate": 0.00139601324517834, "loss": 2.2991, "step": 409120 }, { "epoch": 1.5815821620200707, "grad_norm": 0.09667601436376572, "learning_rate": 0.0013956822028104749, "loss": 2.2762, "step": 409130 }, { "epoch": 1.581620819223454, "grad_norm": 0.10011686384677887, "learning_rate": 0.0013953513416867609, "loss": 2.292, "step": 409140 }, { "epoch": 1.5816594764268372, "grad_norm": 0.09894595295190811, "learning_rate": 0.001395020661509833, "loss": 2.2953, "step": 409150 }, { "epoch": 1.5816981336302205, "grad_norm": 0.09758591651916504, "learning_rate": 0.0013946901619831377, "loss": 2.2848, "step": 409160 }, { "epoch": 1.581736790833604, "grad_norm": 0.12541015446186066, "learning_rate": 0.0013943598428109313, "loss": 2.2877, "step": 409170 }, { "epoch": 1.5817754480369872, "grad_norm": 0.09081506729125977, "learning_rate": 0.0013940297036982754, "loss": 2.3061, "step": 409180 }, { "epoch": 1.5818141052403705, "grad_norm": 0.09523743391036987, "learning_rate": 0.0013936997443510352, "loss": 2.2783, "step": 409190 }, { "epoch": 1.5818527624437537, "grad_norm": 0.09542723000049591, "learning_rate": 0.0013933699644758759, "loss": 2.2789, "step": 409200 }, { "epoch": 1.5818914196471372, "grad_norm": 0.11139500141143799, "learning_rate": 0.0013930403637802593, "loss": 2.2724, "step": 409210 }, { "epoch": 1.5819300768505205, "grad_norm": 0.08887927234172821, "learning_rate": 0.0013927109419724411, "loss": 2.3028, "step": 409220 }, { "epoch": 1.5819687340539037, "grad_norm": 0.09690530598163605, "learning_rate": 0.0013923816987614678, "loss": 2.2806, "step": 409230 }, { "epoch": 1.582007391257287, "grad_norm": 0.09255881607532501, "learning_rate": 0.0013920526338571735, "loss": 2.2911, "step": 409240 }, { "epoch": 1.5820460484606702, "grad_norm": 0.1247158870100975, "learning_rate": 0.001391723746970178, "loss": 2.2646, "step": 409250 }, { "epoch": 1.5820847056640535, "grad_norm": 0.08813361078500748, "learning_rate": 0.0013913950378118824, "loss": 2.2785, "step": 409260 }, { "epoch": 1.5821233628674367, "grad_norm": 0.09330994635820389, "learning_rate": 0.0013910665060944667, "loss": 2.2819, "step": 409270 }, { "epoch": 1.58216202007082, "grad_norm": 0.2143334448337555, "learning_rate": 0.0013907381515308873, "loss": 2.2856, "step": 409280 }, { "epoch": 1.5822006772742032, "grad_norm": 0.11374535411596298, "learning_rate": 0.0013904099738348732, "loss": 2.2919, "step": 409290 }, { "epoch": 1.5822393344775865, "grad_norm": 0.0991811528801918, "learning_rate": 0.0013900819727209238, "loss": 2.2894, "step": 409300 }, { "epoch": 1.5822779916809697, "grad_norm": 0.0955258384346962, "learning_rate": 0.0013897541479043058, "loss": 2.3012, "step": 409310 }, { "epoch": 1.582316648884353, "grad_norm": 0.09650768339633942, "learning_rate": 0.0013894264991010502, "loss": 2.2867, "step": 409320 }, { "epoch": 1.5823553060877362, "grad_norm": 0.09201765805482864, "learning_rate": 0.0013890990260279495, "loss": 2.2851, "step": 409330 }, { "epoch": 1.5823939632911197, "grad_norm": 0.0908934623003006, "learning_rate": 0.0013887717284025549, "loss": 2.2791, "step": 409340 }, { "epoch": 1.582432620494503, "grad_norm": 0.10206924378871918, "learning_rate": 0.0013884446059431736, "loss": 2.2748, "step": 409350 }, { "epoch": 1.5824712776978862, "grad_norm": 0.09511981904506683, "learning_rate": 0.0013881176583688658, "loss": 2.2912, "step": 409360 }, { "epoch": 1.5825099349012695, "grad_norm": 0.08898471295833588, "learning_rate": 0.0013877908853994411, "loss": 2.3048, "step": 409370 }, { "epoch": 1.582548592104653, "grad_norm": 0.09409672766923904, "learning_rate": 0.0013874642867554578, "loss": 2.2753, "step": 409380 }, { "epoch": 1.5825872493080362, "grad_norm": 0.1006675586104393, "learning_rate": 0.0013871378621582175, "loss": 2.2878, "step": 409390 }, { "epoch": 1.5826259065114194, "grad_norm": 0.11397848278284073, "learning_rate": 0.0013868116113297643, "loss": 2.2598, "step": 409400 }, { "epoch": 1.5826645637148027, "grad_norm": 0.10111487656831741, "learning_rate": 0.0013864855339928814, "loss": 2.2876, "step": 409410 }, { "epoch": 1.582703220918186, "grad_norm": 0.11789622157812119, "learning_rate": 0.0013861596298710879, "loss": 2.2848, "step": 409420 }, { "epoch": 1.5827418781215692, "grad_norm": 0.0989585891366005, "learning_rate": 0.0013858338986886365, "loss": 2.2922, "step": 409430 }, { "epoch": 1.5827805353249524, "grad_norm": 0.10369249433279037, "learning_rate": 0.0013855083401705115, "loss": 2.2769, "step": 409440 }, { "epoch": 1.5828191925283357, "grad_norm": 0.09713876992464066, "learning_rate": 0.001385182954042424, "loss": 2.2898, "step": 409450 }, { "epoch": 1.582857849731719, "grad_norm": 0.11114867031574249, "learning_rate": 0.001384857740030812, "loss": 2.2846, "step": 409460 }, { "epoch": 1.5828965069351022, "grad_norm": 0.1006435751914978, "learning_rate": 0.0013845326978628352, "loss": 2.2703, "step": 409470 }, { "epoch": 1.5829351641384855, "grad_norm": 0.11165027320384979, "learning_rate": 0.001384207827266374, "loss": 2.2909, "step": 409480 }, { "epoch": 1.5829738213418687, "grad_norm": 0.09109925478696823, "learning_rate": 0.0013838831279700254, "loss": 2.3026, "step": 409490 }, { "epoch": 1.5830124785452522, "grad_norm": 0.10089856386184692, "learning_rate": 0.0013835585997031023, "loss": 2.2982, "step": 409500 }, { "epoch": 1.5830511357486354, "grad_norm": 0.114852674305439, "learning_rate": 0.001383234242195629, "loss": 2.2853, "step": 409510 }, { "epoch": 1.5830897929520187, "grad_norm": 0.09972550719976425, "learning_rate": 0.0013829100551783395, "loss": 2.2905, "step": 409520 }, { "epoch": 1.583128450155402, "grad_norm": 0.11271734535694122, "learning_rate": 0.0013825860383826747, "loss": 2.2872, "step": 409530 }, { "epoch": 1.5831671073587852, "grad_norm": 0.10400784015655518, "learning_rate": 0.00138226219154078, "loss": 2.2949, "step": 409540 }, { "epoch": 1.5832057645621687, "grad_norm": 0.08777232468128204, "learning_rate": 0.0013819385143855024, "loss": 2.2789, "step": 409550 }, { "epoch": 1.583244421765552, "grad_norm": 0.1051279604434967, "learning_rate": 0.0013816150066503878, "loss": 2.2968, "step": 409560 }, { "epoch": 1.5832830789689352, "grad_norm": 0.10784800350666046, "learning_rate": 0.0013812916680696792, "loss": 2.2763, "step": 409570 }, { "epoch": 1.5833217361723184, "grad_norm": 0.09149152040481567, "learning_rate": 0.0013809684983783134, "loss": 2.2875, "step": 409580 }, { "epoch": 1.5833603933757017, "grad_norm": 0.10545124113559723, "learning_rate": 0.001380645497311919, "loss": 2.2985, "step": 409590 }, { "epoch": 1.583399050579085, "grad_norm": 0.09649606794118881, "learning_rate": 0.0013803226646068132, "loss": 2.2923, "step": 409600 }, { "epoch": 1.5834377077824682, "grad_norm": 0.09470316767692566, "learning_rate": 0.00138, "loss": 2.2941, "step": 409610 }, { "epoch": 1.5834763649858514, "grad_norm": 0.10625059902667999, "learning_rate": 0.0013796775032291673, "loss": 2.2805, "step": 409620 }, { "epoch": 1.5835150221892347, "grad_norm": 0.09362804144620895, "learning_rate": 0.001379355174032684, "loss": 2.2774, "step": 409630 }, { "epoch": 1.583553679392618, "grad_norm": 0.09844432771205902, "learning_rate": 0.001379033012149599, "loss": 2.2887, "step": 409640 }, { "epoch": 1.5835923365960012, "grad_norm": 0.10673537850379944, "learning_rate": 0.0013787110173196374, "loss": 2.2756, "step": 409650 }, { "epoch": 1.5836309937993844, "grad_norm": 0.09506750851869583, "learning_rate": 0.0013783891892831979, "loss": 2.2761, "step": 409660 }, { "epoch": 1.583669651002768, "grad_norm": 0.10086861997842789, "learning_rate": 0.0013780675277813518, "loss": 2.2968, "step": 409670 }, { "epoch": 1.5837083082061512, "grad_norm": 0.1022254228591919, "learning_rate": 0.0013777460325558382, "loss": 2.2836, "step": 409680 }, { "epoch": 1.5837469654095344, "grad_norm": 0.09821146726608276, "learning_rate": 0.0013774247033490647, "loss": 2.2702, "step": 409690 }, { "epoch": 1.5837856226129177, "grad_norm": 0.10450205951929092, "learning_rate": 0.0013771035399041025, "loss": 2.2992, "step": 409700 }, { "epoch": 1.583824279816301, "grad_norm": 0.10975898802280426, "learning_rate": 0.0013767825419646847, "loss": 2.2944, "step": 409710 }, { "epoch": 1.5838629370196844, "grad_norm": 0.10892923921346664, "learning_rate": 0.0013764617092752044, "loss": 2.3034, "step": 409720 }, { "epoch": 1.5839015942230676, "grad_norm": 0.10283569991588593, "learning_rate": 0.001376141041580711, "loss": 2.285, "step": 409730 }, { "epoch": 1.583940251426451, "grad_norm": 0.10661765933036804, "learning_rate": 0.0013758205386269107, "loss": 2.2894, "step": 409740 }, { "epoch": 1.5839789086298341, "grad_norm": 0.10816767066717148, "learning_rate": 0.0013755002001601601, "loss": 2.2795, "step": 409750 }, { "epoch": 1.5840175658332174, "grad_norm": 0.106157585978508, "learning_rate": 0.0013751800259274676, "loss": 2.2982, "step": 409760 }, { "epoch": 1.5840562230366007, "grad_norm": 0.09719347953796387, "learning_rate": 0.0013748600156764886, "loss": 2.2863, "step": 409770 }, { "epoch": 1.584094880239984, "grad_norm": 0.11564947664737701, "learning_rate": 0.0013745401691555243, "loss": 2.2661, "step": 409780 }, { "epoch": 1.5841335374433672, "grad_norm": 0.09925565123558044, "learning_rate": 0.0013742204861135194, "loss": 2.2797, "step": 409790 }, { "epoch": 1.5841721946467504, "grad_norm": 0.09485266357660294, "learning_rate": 0.001373900966300059, "loss": 2.2858, "step": 409800 }, { "epoch": 1.5842108518501337, "grad_norm": 0.09301532804965973, "learning_rate": 0.001373581609465367, "loss": 2.2859, "step": 409810 }, { "epoch": 1.584249509053517, "grad_norm": 0.13172714412212372, "learning_rate": 0.0013732624153603042, "loss": 2.2829, "step": 409820 }, { "epoch": 1.5842881662569002, "grad_norm": 0.1107850968837738, "learning_rate": 0.0013729433837363647, "loss": 2.2935, "step": 409830 }, { "epoch": 1.5843268234602836, "grad_norm": 0.09742755442857742, "learning_rate": 0.001372624514345675, "loss": 2.2824, "step": 409840 }, { "epoch": 1.584365480663667, "grad_norm": 0.10189301520586014, "learning_rate": 0.0013723058069409913, "loss": 2.29, "step": 409850 }, { "epoch": 1.5844041378670501, "grad_norm": 0.10698041319847107, "learning_rate": 0.0013719872612756967, "loss": 2.2877, "step": 409860 }, { "epoch": 1.5844427950704334, "grad_norm": 0.10542524605989456, "learning_rate": 0.0013716688771037997, "loss": 2.2889, "step": 409870 }, { "epoch": 1.5844814522738166, "grad_norm": 0.1208849772810936, "learning_rate": 0.0013713506541799317, "loss": 2.2875, "step": 409880 }, { "epoch": 1.5845201094772001, "grad_norm": 0.09475509077310562, "learning_rate": 0.0013710325922593445, "loss": 2.2951, "step": 409890 }, { "epoch": 1.5845587666805834, "grad_norm": 0.09734809398651123, "learning_rate": 0.0013707146910979092, "loss": 2.289, "step": 409900 }, { "epoch": 1.5845974238839666, "grad_norm": 0.0958809107542038, "learning_rate": 0.0013703969504521123, "loss": 2.2839, "step": 409910 }, { "epoch": 1.5846360810873499, "grad_norm": 0.10502952337265015, "learning_rate": 0.001370079370079055, "loss": 2.2833, "step": 409920 }, { "epoch": 1.5846747382907331, "grad_norm": 0.10497497767210007, "learning_rate": 0.0013697619497364509, "loss": 2.2751, "step": 409930 }, { "epoch": 1.5847133954941164, "grad_norm": 0.09893753379583359, "learning_rate": 0.0013694446891826223, "loss": 2.2889, "step": 409940 }, { "epoch": 1.5847520526974996, "grad_norm": 0.10483641177415848, "learning_rate": 0.0013691275881764998, "loss": 2.2961, "step": 409950 }, { "epoch": 1.5847907099008829, "grad_norm": 0.09729575365781784, "learning_rate": 0.00136881064647762, "loss": 2.2839, "step": 409960 }, { "epoch": 1.5848293671042661, "grad_norm": 0.11494239419698715, "learning_rate": 0.0013684938638461222, "loss": 2.2947, "step": 409970 }, { "epoch": 1.5848680243076494, "grad_norm": 0.108024001121521, "learning_rate": 0.0013681772400427475, "loss": 2.2968, "step": 409980 }, { "epoch": 1.5849066815110326, "grad_norm": 0.10436026751995087, "learning_rate": 0.0013678607748288357, "loss": 2.2759, "step": 409990 }, { "epoch": 1.584945338714416, "grad_norm": 0.10196762531995773, "learning_rate": 0.001367544467966324, "loss": 2.2939, "step": 410000 }, { "epoch": 1.5849839959177994, "grad_norm": 0.10346586257219315, "learning_rate": 0.001367228319217745, "loss": 2.2845, "step": 410010 }, { "epoch": 1.5850226531211826, "grad_norm": 0.09651974588632584, "learning_rate": 0.0013669123283462235, "loss": 2.2963, "step": 410020 }, { "epoch": 1.5850613103245659, "grad_norm": 0.09264250844717026, "learning_rate": 0.0013665964951154754, "loss": 2.2735, "step": 410030 }, { "epoch": 1.5850999675279491, "grad_norm": 0.09752018004655838, "learning_rate": 0.0013662808192898056, "loss": 2.2901, "step": 410040 }, { "epoch": 1.5851386247313326, "grad_norm": 0.10400497913360596, "learning_rate": 0.0013659653006341057, "loss": 2.2782, "step": 410050 }, { "epoch": 1.5851772819347159, "grad_norm": 0.11008419096469879, "learning_rate": 0.0013656499389138519, "loss": 2.2888, "step": 410060 }, { "epoch": 1.585215939138099, "grad_norm": 0.11386552453041077, "learning_rate": 0.0013653347338951028, "loss": 2.2865, "step": 410070 }, { "epoch": 1.5852545963414824, "grad_norm": 0.18960657715797424, "learning_rate": 0.0013650196853444983, "loss": 2.2879, "step": 410080 }, { "epoch": 1.5852932535448656, "grad_norm": 0.09678232669830322, "learning_rate": 0.0013647047930292564, "loss": 2.2783, "step": 410090 }, { "epoch": 1.5853319107482489, "grad_norm": 0.096040740609169, "learning_rate": 0.0013643900567171718, "loss": 2.2924, "step": 410100 }, { "epoch": 1.5853705679516321, "grad_norm": 0.20530691742897034, "learning_rate": 0.0013640754761766144, "loss": 2.2836, "step": 410110 }, { "epoch": 1.5854092251550154, "grad_norm": 0.10444552451372147, "learning_rate": 0.0013637610511765255, "loss": 2.2679, "step": 410120 }, { "epoch": 1.5854478823583986, "grad_norm": 0.132937952876091, "learning_rate": 0.001363446781486418, "loss": 2.2928, "step": 410130 }, { "epoch": 1.5854865395617819, "grad_norm": 0.09830232709646225, "learning_rate": 0.0013631326668763738, "loss": 2.2902, "step": 410140 }, { "epoch": 1.5855251967651651, "grad_norm": 0.09446366876363754, "learning_rate": 0.0013628187071170403, "loss": 2.2991, "step": 410150 }, { "epoch": 1.5855638539685484, "grad_norm": 0.0938754454255104, "learning_rate": 0.001362504901979631, "loss": 2.2895, "step": 410160 }, { "epoch": 1.5856025111719316, "grad_norm": 0.1105690747499466, "learning_rate": 0.0013621912512359212, "loss": 2.2879, "step": 410170 }, { "epoch": 1.585641168375315, "grad_norm": 0.09766722470521927, "learning_rate": 0.0013618777546582472, "loss": 2.2927, "step": 410180 }, { "epoch": 1.5856798255786984, "grad_norm": 0.1109689325094223, "learning_rate": 0.001361564412019505, "loss": 2.2862, "step": 410190 }, { "epoch": 1.5857184827820816, "grad_norm": 0.10538505017757416, "learning_rate": 0.0013612512230931474, "loss": 2.2708, "step": 410200 }, { "epoch": 1.5857571399854649, "grad_norm": 0.09841452538967133, "learning_rate": 0.001360938187653182, "loss": 2.2811, "step": 410210 }, { "epoch": 1.5857957971888483, "grad_norm": 0.09635888785123825, "learning_rate": 0.0013606253054741688, "loss": 2.2762, "step": 410220 }, { "epoch": 1.5858344543922316, "grad_norm": 0.09994711726903915, "learning_rate": 0.001360312576331221, "loss": 2.2843, "step": 410230 }, { "epoch": 1.5858731115956148, "grad_norm": 0.11057305335998535, "learning_rate": 0.0013599999999999999, "loss": 2.2859, "step": 410240 }, { "epoch": 1.585911768798998, "grad_norm": 0.10168281197547913, "learning_rate": 0.0013596875762567151, "loss": 2.3044, "step": 410250 }, { "epoch": 1.5859504260023813, "grad_norm": 0.10190840065479279, "learning_rate": 0.0013593753048781214, "loss": 2.2784, "step": 410260 }, { "epoch": 1.5859890832057646, "grad_norm": 0.09425269812345505, "learning_rate": 0.0013590631856415173, "loss": 2.2899, "step": 410270 }, { "epoch": 1.5860277404091478, "grad_norm": 0.11108811944723129, "learning_rate": 0.0013587512183247442, "loss": 2.2865, "step": 410280 }, { "epoch": 1.586066397612531, "grad_norm": 0.12007686495780945, "learning_rate": 0.0013584394027061823, "loss": 2.2944, "step": 410290 }, { "epoch": 1.5861050548159144, "grad_norm": 0.11422277241945267, "learning_rate": 0.0013581277385647517, "loss": 2.2944, "step": 410300 }, { "epoch": 1.5861437120192976, "grad_norm": 0.10217493772506714, "learning_rate": 0.0013578162256799072, "loss": 2.2854, "step": 410310 }, { "epoch": 1.5861823692226809, "grad_norm": 0.10135794430971146, "learning_rate": 0.0013575048638316397, "loss": 2.2994, "step": 410320 }, { "epoch": 1.586221026426064, "grad_norm": 0.10746722668409348, "learning_rate": 0.0013571936528004721, "loss": 2.2899, "step": 410330 }, { "epoch": 1.5862596836294474, "grad_norm": 0.09228982776403427, "learning_rate": 0.001356882592367459, "loss": 2.291, "step": 410340 }, { "epoch": 1.5862983408328308, "grad_norm": 0.12576334178447723, "learning_rate": 0.0013565716823141837, "loss": 2.2806, "step": 410350 }, { "epoch": 1.586336998036214, "grad_norm": 0.11180675029754639, "learning_rate": 0.0013562609224227566, "loss": 2.3051, "step": 410360 }, { "epoch": 1.5863756552395973, "grad_norm": 0.0983600988984108, "learning_rate": 0.0013559503124758153, "loss": 2.2917, "step": 410370 }, { "epoch": 1.5864143124429806, "grad_norm": 0.09851762652397156, "learning_rate": 0.0013556398522565195, "loss": 2.2819, "step": 410380 }, { "epoch": 1.586452969646364, "grad_norm": 0.09128008782863617, "learning_rate": 0.001355329541548552, "loss": 2.2816, "step": 410390 }, { "epoch": 1.5864916268497473, "grad_norm": 0.09307295829057693, "learning_rate": 0.001355019380136116, "loss": 2.2751, "step": 410400 }, { "epoch": 1.5865302840531306, "grad_norm": 0.1199588030576706, "learning_rate": 0.0013547093678039329, "loss": 2.2911, "step": 410410 }, { "epoch": 1.5865689412565138, "grad_norm": 0.10031304508447647, "learning_rate": 0.0013543995043372413, "loss": 2.2987, "step": 410420 }, { "epoch": 1.586607598459897, "grad_norm": 0.1117083951830864, "learning_rate": 0.0013540897895217942, "loss": 2.2861, "step": 410430 }, { "epoch": 1.5866462556632803, "grad_norm": 0.0932147353887558, "learning_rate": 0.0013537802231438595, "loss": 2.2821, "step": 410440 }, { "epoch": 1.5866849128666636, "grad_norm": 0.10109856724739075, "learning_rate": 0.0013534708049902156, "loss": 2.2933, "step": 410450 }, { "epoch": 1.5867235700700468, "grad_norm": 0.09498569369316101, "learning_rate": 0.0013531615348481507, "loss": 2.2964, "step": 410460 }, { "epoch": 1.58676222727343, "grad_norm": 0.10946396738290787, "learning_rate": 0.0013528524125054626, "loss": 2.2835, "step": 410470 }, { "epoch": 1.5868008844768133, "grad_norm": 0.09790240973234177, "learning_rate": 0.0013525434377504543, "loss": 2.2838, "step": 410480 }, { "epoch": 1.5868395416801966, "grad_norm": 0.10081296414136887, "learning_rate": 0.001352234610371934, "loss": 2.2906, "step": 410490 }, { "epoch": 1.5868781988835798, "grad_norm": 0.11622887849807739, "learning_rate": 0.001351925930159214, "loss": 2.2996, "step": 410500 }, { "epoch": 1.586916856086963, "grad_norm": 0.09005551785230637, "learning_rate": 0.001351617396902107, "loss": 2.2913, "step": 410510 }, { "epoch": 1.5869555132903466, "grad_norm": 0.10132360458374023, "learning_rate": 0.0013513090103909257, "loss": 2.2762, "step": 410520 }, { "epoch": 1.5869941704937298, "grad_norm": 0.09307138621807098, "learning_rate": 0.001351000770416482, "loss": 2.2842, "step": 410530 }, { "epoch": 1.587032827697113, "grad_norm": 0.19938762485980988, "learning_rate": 0.001350692676770083, "loss": 2.2708, "step": 410540 }, { "epoch": 1.5870714849004963, "grad_norm": 0.09920799732208252, "learning_rate": 0.0013503847292435315, "loss": 2.291, "step": 410550 }, { "epoch": 1.5871101421038798, "grad_norm": 0.09430687129497528, "learning_rate": 0.0013500769276291232, "loss": 2.2867, "step": 410560 }, { "epoch": 1.587148799307263, "grad_norm": 0.1142541691660881, "learning_rate": 0.0013497692717196457, "loss": 2.3058, "step": 410570 }, { "epoch": 1.5871874565106463, "grad_norm": 0.09234173595905304, "learning_rate": 0.001349461761308376, "loss": 2.2839, "step": 410580 }, { "epoch": 1.5872261137140296, "grad_norm": 0.12318877130746841, "learning_rate": 0.0013491543961890809, "loss": 2.2912, "step": 410590 }, { "epoch": 1.5872647709174128, "grad_norm": 0.11205442249774933, "learning_rate": 0.0013488471761560117, "loss": 2.2894, "step": 410600 }, { "epoch": 1.587303428120796, "grad_norm": 0.11573982238769531, "learning_rate": 0.001348540101003907, "loss": 2.2769, "step": 410610 }, { "epoch": 1.5873420853241793, "grad_norm": 0.12379911541938782, "learning_rate": 0.001348233170527987, "loss": 2.2867, "step": 410620 }, { "epoch": 1.5873807425275626, "grad_norm": 0.09459318220615387, "learning_rate": 0.0013479263845239558, "loss": 2.2827, "step": 410630 }, { "epoch": 1.5874193997309458, "grad_norm": 0.09499311447143555, "learning_rate": 0.0013476197427879965, "loss": 2.2919, "step": 410640 }, { "epoch": 1.587458056934329, "grad_norm": 0.09781172126531601, "learning_rate": 0.0013473132451167712, "loss": 2.2859, "step": 410650 }, { "epoch": 1.5874967141377123, "grad_norm": 0.09685497730970383, "learning_rate": 0.0013470068913074198, "loss": 2.2797, "step": 410660 }, { "epoch": 1.5875353713410956, "grad_norm": 0.0856289267539978, "learning_rate": 0.0013467006811575572, "loss": 2.2763, "step": 410670 }, { "epoch": 1.5875740285444788, "grad_norm": 0.0968082994222641, "learning_rate": 0.0013463946144652724, "loss": 2.2785, "step": 410680 }, { "epoch": 1.5876126857478623, "grad_norm": 0.11211162805557251, "learning_rate": 0.0013460886910291274, "loss": 2.2963, "step": 410690 }, { "epoch": 1.5876513429512455, "grad_norm": 0.26160678267478943, "learning_rate": 0.001345782910648155, "loss": 2.2934, "step": 410700 }, { "epoch": 1.5876900001546288, "grad_norm": 0.09896879643201828, "learning_rate": 0.0013454772731218572, "loss": 2.2792, "step": 410710 }, { "epoch": 1.587728657358012, "grad_norm": 0.10798089951276779, "learning_rate": 0.001345171778250204, "loss": 2.3055, "step": 410720 }, { "epoch": 1.5877673145613955, "grad_norm": 0.10475075244903564, "learning_rate": 0.0013448664258336321, "loss": 2.2867, "step": 410730 }, { "epoch": 1.5878059717647788, "grad_norm": 0.108814537525177, "learning_rate": 0.0013445612156730426, "loss": 2.2868, "step": 410740 }, { "epoch": 1.587844628968162, "grad_norm": 0.11034875363111496, "learning_rate": 0.0013442561475698002, "loss": 2.2904, "step": 410750 }, { "epoch": 1.5878832861715453, "grad_norm": 0.0990104079246521, "learning_rate": 0.001343951221325731, "loss": 2.3047, "step": 410760 }, { "epoch": 1.5879219433749285, "grad_norm": 0.0992002859711647, "learning_rate": 0.0013436464367431226, "loss": 2.2871, "step": 410770 }, { "epoch": 1.5879606005783118, "grad_norm": 0.10469389706850052, "learning_rate": 0.0013433417936247198, "loss": 2.2893, "step": 410780 }, { "epoch": 1.587999257781695, "grad_norm": 0.10755585879087448, "learning_rate": 0.0013430372917737263, "loss": 2.2819, "step": 410790 }, { "epoch": 1.5880379149850783, "grad_norm": 0.16936883330345154, "learning_rate": 0.0013427329309938006, "loss": 2.2872, "step": 410800 }, { "epoch": 1.5880765721884615, "grad_norm": 0.10266046226024628, "learning_rate": 0.001342428711089056, "loss": 2.2775, "step": 410810 }, { "epoch": 1.5881152293918448, "grad_norm": 0.09038469195365906, "learning_rate": 0.001342124631864059, "loss": 2.2807, "step": 410820 }, { "epoch": 1.588153886595228, "grad_norm": 0.10589548945426941, "learning_rate": 0.0013418206931238266, "loss": 2.2908, "step": 410830 }, { "epoch": 1.5881925437986113, "grad_norm": 0.08776730298995972, "learning_rate": 0.001341516894673827, "loss": 2.2934, "step": 410840 }, { "epoch": 1.5882312010019946, "grad_norm": 0.10342015326023102, "learning_rate": 0.001341213236319976, "loss": 2.2711, "step": 410850 }, { "epoch": 1.588269858205378, "grad_norm": 0.09962315857410431, "learning_rate": 0.0013409097178686369, "loss": 2.2739, "step": 410860 }, { "epoch": 1.5883085154087613, "grad_norm": 0.12369918823242188, "learning_rate": 0.0013406063391266185, "loss": 2.2901, "step": 410870 }, { "epoch": 1.5883471726121445, "grad_norm": 0.09774774312973022, "learning_rate": 0.0013403030999011745, "loss": 2.273, "step": 410880 }, { "epoch": 1.5883858298155278, "grad_norm": 0.10486874729394913, "learning_rate": 0.0013399999999999998, "loss": 2.2856, "step": 410890 }, { "epoch": 1.5884244870189113, "grad_norm": 0.12358658760786057, "learning_rate": 0.0013396970392312328, "loss": 2.2988, "step": 410900 }, { "epoch": 1.5884631442222945, "grad_norm": 0.10743393003940582, "learning_rate": 0.0013393942174034503, "loss": 2.2783, "step": 410910 }, { "epoch": 1.5885018014256778, "grad_norm": 0.10001206398010254, "learning_rate": 0.0013390915343256678, "loss": 2.2816, "step": 410920 }, { "epoch": 1.588540458629061, "grad_norm": 0.10591772943735123, "learning_rate": 0.0013387889898073383, "loss": 2.2981, "step": 410930 }, { "epoch": 1.5885791158324443, "grad_norm": 0.10507027804851532, "learning_rate": 0.001338486583658351, "loss": 2.293, "step": 410940 }, { "epoch": 1.5886177730358275, "grad_norm": 0.13516682386398315, "learning_rate": 0.0013381843156890282, "loss": 2.2735, "step": 410950 }, { "epoch": 1.5886564302392108, "grad_norm": 0.10282962769269943, "learning_rate": 0.001337882185710126, "loss": 2.2705, "step": 410960 }, { "epoch": 1.588695087442594, "grad_norm": 0.10241902619600296, "learning_rate": 0.0013375801935328322, "loss": 2.2836, "step": 410970 }, { "epoch": 1.5887337446459773, "grad_norm": 0.09397570788860321, "learning_rate": 0.0013372783389687644, "loss": 2.2804, "step": 410980 }, { "epoch": 1.5887724018493605, "grad_norm": 0.09933995455503464, "learning_rate": 0.001336976621829969, "loss": 2.2846, "step": 410990 }, { "epoch": 1.5888110590527438, "grad_norm": 0.23156529664993286, "learning_rate": 0.00133667504192892, "loss": 2.2898, "step": 411000 }, { "epoch": 1.588849716256127, "grad_norm": 0.09423259645700455, "learning_rate": 0.0013363735990785178, "loss": 2.2851, "step": 411010 }, { "epoch": 1.5888883734595103, "grad_norm": 0.10134994238615036, "learning_rate": 0.0013360722930920868, "loss": 2.2957, "step": 411020 }, { "epoch": 1.5889270306628938, "grad_norm": 0.1059289425611496, "learning_rate": 0.0013357711237833754, "loss": 2.2816, "step": 411030 }, { "epoch": 1.588965687866277, "grad_norm": 0.2090606540441513, "learning_rate": 0.001335470090966554, "loss": 2.3035, "step": 411040 }, { "epoch": 1.5890043450696603, "grad_norm": 0.15536662936210632, "learning_rate": 0.0013351691944562135, "loss": 2.2997, "step": 411050 }, { "epoch": 1.5890430022730435, "grad_norm": 0.10510016977787018, "learning_rate": 0.0013348684340673644, "loss": 2.2988, "step": 411060 }, { "epoch": 1.589081659476427, "grad_norm": 0.4065359830856323, "learning_rate": 0.001334567809615435, "loss": 2.2891, "step": 411070 }, { "epoch": 1.5891203166798102, "grad_norm": 0.09470411390066147, "learning_rate": 0.0013342673209162706, "loss": 2.3082, "step": 411080 }, { "epoch": 1.5891589738831935, "grad_norm": 0.09722394496202469, "learning_rate": 0.0013339669677861314, "loss": 2.2815, "step": 411090 }, { "epoch": 1.5891976310865767, "grad_norm": 0.10346391797065735, "learning_rate": 0.0013336667500416929, "loss": 2.2876, "step": 411100 }, { "epoch": 1.58923628828996, "grad_norm": 0.09680181741714478, "learning_rate": 0.0013333666675000417, "loss": 2.2978, "step": 411110 }, { "epoch": 1.5892749454933432, "grad_norm": 0.1011996865272522, "learning_rate": 0.0013330667199786773, "loss": 2.289, "step": 411120 }, { "epoch": 1.5893136026967265, "grad_norm": 0.1092514917254448, "learning_rate": 0.0013327669072955089, "loss": 2.2919, "step": 411130 }, { "epoch": 1.5893522599001098, "grad_norm": 0.09436289221048355, "learning_rate": 0.0013324672292688545, "loss": 2.2845, "step": 411140 }, { "epoch": 1.589390917103493, "grad_norm": 0.09714016318321228, "learning_rate": 0.00133216768571744, "loss": 2.2978, "step": 411150 }, { "epoch": 1.5894295743068763, "grad_norm": 0.10386151075363159, "learning_rate": 0.0013318682764603973, "loss": 2.2672, "step": 411160 }, { "epoch": 1.5894682315102595, "grad_norm": 0.09310182183980942, "learning_rate": 0.001331569001317264, "loss": 2.2861, "step": 411170 }, { "epoch": 1.5895068887136428, "grad_norm": 0.10692202299833298, "learning_rate": 0.0013312698601079807, "loss": 2.2751, "step": 411180 }, { "epoch": 1.589545545917026, "grad_norm": 0.09382941573858261, "learning_rate": 0.0013309708526528907, "loss": 2.2891, "step": 411190 }, { "epoch": 1.5895842031204095, "grad_norm": 0.10170342028141022, "learning_rate": 0.0013306719787727396, "loss": 2.2846, "step": 411200 }, { "epoch": 1.5896228603237927, "grad_norm": 0.11549654603004456, "learning_rate": 0.0013303732382886717, "loss": 2.2846, "step": 411210 }, { "epoch": 1.589661517527176, "grad_norm": 0.11813855916261673, "learning_rate": 0.001330074631022231, "loss": 2.263, "step": 411220 }, { "epoch": 1.5897001747305592, "grad_norm": 0.09434445947408676, "learning_rate": 0.0013297761567953583, "loss": 2.276, "step": 411230 }, { "epoch": 1.5897388319339427, "grad_norm": 0.09521712362766266, "learning_rate": 0.0013294778154303915, "loss": 2.2783, "step": 411240 }, { "epoch": 1.589777489137326, "grad_norm": 0.10191566497087479, "learning_rate": 0.001329179606750063, "loss": 2.2815, "step": 411250 }, { "epoch": 1.5898161463407092, "grad_norm": 0.10600905120372772, "learning_rate": 0.0013288815305774993, "loss": 2.2789, "step": 411260 }, { "epoch": 1.5898548035440925, "grad_norm": 0.10898283123970032, "learning_rate": 0.0013285835867362192, "loss": 2.2853, "step": 411270 }, { "epoch": 1.5898934607474757, "grad_norm": 0.4227880835533142, "learning_rate": 0.0013282857750501337, "loss": 2.2716, "step": 411280 }, { "epoch": 1.589932117950859, "grad_norm": 0.10346785932779312, "learning_rate": 0.0013279880953435423, "loss": 2.2924, "step": 411290 }, { "epoch": 1.5899707751542422, "grad_norm": 0.10272988677024841, "learning_rate": 0.0013276905474411357, "loss": 2.2859, "step": 411300 }, { "epoch": 1.5900094323576255, "grad_norm": 0.17632514238357544, "learning_rate": 0.0013273931311679906, "loss": 2.2894, "step": 411310 }, { "epoch": 1.5900480895610087, "grad_norm": 0.10075627267360687, "learning_rate": 0.0013270958463495713, "loss": 2.2829, "step": 411320 }, { "epoch": 1.590086746764392, "grad_norm": 0.10793676227331161, "learning_rate": 0.0013267986928117267, "loss": 2.2933, "step": 411330 }, { "epoch": 1.5901254039677752, "grad_norm": 0.10403290390968323, "learning_rate": 0.0013265016703806905, "loss": 2.2683, "step": 411340 }, { "epoch": 1.5901640611711585, "grad_norm": 0.08908011019229889, "learning_rate": 0.0013262047788830792, "loss": 2.2938, "step": 411350 }, { "epoch": 1.5902027183745417, "grad_norm": 0.09441845118999481, "learning_rate": 0.0013259080181458912, "loss": 2.2854, "step": 411360 }, { "epoch": 1.5902413755779252, "grad_norm": 0.10546926409006119, "learning_rate": 0.0013256113879965056, "loss": 2.2922, "step": 411370 }, { "epoch": 1.5902800327813085, "grad_norm": 0.1103266179561615, "learning_rate": 0.00132531488826268, "loss": 2.277, "step": 411380 }, { "epoch": 1.5903186899846917, "grad_norm": 0.0980391874909401, "learning_rate": 0.001325018518772552, "loss": 2.3028, "step": 411390 }, { "epoch": 1.590357347188075, "grad_norm": 0.1028299555182457, "learning_rate": 0.0013247222793546347, "loss": 2.3042, "step": 411400 }, { "epoch": 1.5903960043914585, "grad_norm": 0.0907234251499176, "learning_rate": 0.0013244261698378186, "loss": 2.2924, "step": 411410 }, { "epoch": 1.5904346615948417, "grad_norm": 0.10806559771299362, "learning_rate": 0.001324130190051368, "loss": 2.2932, "step": 411420 }, { "epoch": 1.590473318798225, "grad_norm": 0.0910990834236145, "learning_rate": 0.0013238343398249213, "loss": 2.2821, "step": 411430 }, { "epoch": 1.5905119760016082, "grad_norm": 0.09880875796079636, "learning_rate": 0.0013235386189884896, "loss": 2.2785, "step": 411440 }, { "epoch": 1.5905506332049915, "grad_norm": 0.09702501446008682, "learning_rate": 0.0013232430273724548, "loss": 2.2757, "step": 411450 }, { "epoch": 1.5905892904083747, "grad_norm": 0.09511641412973404, "learning_rate": 0.0013229475648075697, "loss": 2.2857, "step": 411460 }, { "epoch": 1.590627947611758, "grad_norm": 0.10028154402971268, "learning_rate": 0.001322652231124956, "loss": 2.2678, "step": 411470 }, { "epoch": 1.5906666048151412, "grad_norm": 0.10853612422943115, "learning_rate": 0.0013223570261561032, "loss": 2.2888, "step": 411480 }, { "epoch": 1.5907052620185245, "grad_norm": 0.3342318832874298, "learning_rate": 0.0013220619497328683, "loss": 2.2837, "step": 411490 }, { "epoch": 1.5907439192219077, "grad_norm": 0.092925526201725, "learning_rate": 0.0013217670016874732, "loss": 2.2834, "step": 411500 }, { "epoch": 1.590782576425291, "grad_norm": 0.09471990168094635, "learning_rate": 0.001321472181852505, "loss": 2.2605, "step": 411510 }, { "epoch": 1.5908212336286742, "grad_norm": 0.10889468342065811, "learning_rate": 0.0013211774900609144, "loss": 2.2817, "step": 411520 }, { "epoch": 1.5908598908320577, "grad_norm": 0.10209710150957108, "learning_rate": 0.0013208829261460143, "loss": 2.2977, "step": 411530 }, { "epoch": 1.590898548035441, "grad_norm": 0.09678677469491959, "learning_rate": 0.0013205884899414788, "loss": 2.2805, "step": 411540 }, { "epoch": 1.5909372052388242, "grad_norm": 0.12248372286558151, "learning_rate": 0.0013202941812813429, "loss": 2.2852, "step": 411550 }, { "epoch": 1.5909758624422075, "grad_norm": 0.09738996624946594, "learning_rate": 0.00132, "loss": 2.3028, "step": 411560 }, { "epoch": 1.5910145196455907, "grad_norm": 0.09860974550247192, "learning_rate": 0.0013197059459322022, "loss": 2.2742, "step": 411570 }, { "epoch": 1.5910531768489742, "grad_norm": 0.11459953337907791, "learning_rate": 0.0013194120189130578, "loss": 2.2892, "step": 411580 }, { "epoch": 1.5910918340523574, "grad_norm": 0.11038514971733093, "learning_rate": 0.001319118218778032, "loss": 2.2809, "step": 411590 }, { "epoch": 1.5911304912557407, "grad_norm": 0.09345544129610062, "learning_rate": 0.001318824545362944, "loss": 2.2818, "step": 411600 }, { "epoch": 1.591169148459124, "grad_norm": 0.08866851031780243, "learning_rate": 0.001318530998503967, "loss": 2.2901, "step": 411610 }, { "epoch": 1.5912078056625072, "grad_norm": 0.09870915859937668, "learning_rate": 0.0013182375780376274, "loss": 2.2757, "step": 411620 }, { "epoch": 1.5912464628658904, "grad_norm": 0.09726520627737045, "learning_rate": 0.0013179442838008027, "loss": 2.2711, "step": 411630 }, { "epoch": 1.5912851200692737, "grad_norm": 0.11688517779111862, "learning_rate": 0.0013176511156307207, "loss": 2.276, "step": 411640 }, { "epoch": 1.591323777272657, "grad_norm": 0.11012086272239685, "learning_rate": 0.0013173580733649595, "loss": 2.289, "step": 411650 }, { "epoch": 1.5913624344760402, "grad_norm": 0.08703190833330154, "learning_rate": 0.001317065156841445, "loss": 2.2957, "step": 411660 }, { "epoch": 1.5914010916794235, "grad_norm": 0.15332461893558502, "learning_rate": 0.0013167723658984512, "loss": 2.3, "step": 411670 }, { "epoch": 1.5914397488828067, "grad_norm": 0.09997732192277908, "learning_rate": 0.0013164797003745977, "loss": 2.2693, "step": 411680 }, { "epoch": 1.59147840608619, "grad_norm": 0.10567644983530045, "learning_rate": 0.0013161871601088495, "loss": 2.291, "step": 411690 }, { "epoch": 1.5915170632895734, "grad_norm": 0.09762544929981232, "learning_rate": 0.0013158947449405172, "loss": 2.2918, "step": 411700 }, { "epoch": 1.5915557204929567, "grad_norm": 0.10543996840715408, "learning_rate": 0.001315602454709253, "loss": 2.2713, "step": 411710 }, { "epoch": 1.59159437769634, "grad_norm": 0.09037619829177856, "learning_rate": 0.0013153102892550525, "loss": 2.2706, "step": 411720 }, { "epoch": 1.5916330348997232, "grad_norm": 0.10732545703649521, "learning_rate": 0.0013150182484182515, "loss": 2.2967, "step": 411730 }, { "epoch": 1.5916716921031064, "grad_norm": 0.09629809856414795, "learning_rate": 0.0013147263320395274, "loss": 2.2672, "step": 411740 }, { "epoch": 1.59171034930649, "grad_norm": 0.10680815577507019, "learning_rate": 0.0013144345399598955, "loss": 2.2666, "step": 411750 }, { "epoch": 1.5917490065098732, "grad_norm": 0.09527281671762466, "learning_rate": 0.0013141428720207103, "loss": 2.2815, "step": 411760 }, { "epoch": 1.5917876637132564, "grad_norm": 0.10793960839509964, "learning_rate": 0.0013138513280636624, "loss": 2.2803, "step": 411770 }, { "epoch": 1.5918263209166397, "grad_norm": 0.10628779232501984, "learning_rate": 0.0013135599079307796, "loss": 2.287, "step": 411780 }, { "epoch": 1.591864978120023, "grad_norm": 0.10582350194454193, "learning_rate": 0.0013132686114644243, "loss": 2.2832, "step": 411790 }, { "epoch": 1.5919036353234062, "grad_norm": 0.11519595235586166, "learning_rate": 0.0013129774385072933, "loss": 2.2925, "step": 411800 }, { "epoch": 1.5919422925267894, "grad_norm": 0.09863905608654022, "learning_rate": 0.0013126863889024168, "loss": 2.2756, "step": 411810 }, { "epoch": 1.5919809497301727, "grad_norm": 0.10615672171115875, "learning_rate": 0.0013123954624931567, "loss": 2.2781, "step": 411820 }, { "epoch": 1.592019606933556, "grad_norm": 0.09898418933153152, "learning_rate": 0.0013121046591232065, "loss": 2.2748, "step": 411830 }, { "epoch": 1.5920582641369392, "grad_norm": 0.09342687577009201, "learning_rate": 0.00131181397863659, "loss": 2.2863, "step": 411840 }, { "epoch": 1.5920969213403224, "grad_norm": 0.0946342870593071, "learning_rate": 0.0013115234208776597, "loss": 2.274, "step": 411850 }, { "epoch": 1.5921355785437057, "grad_norm": 0.09903334826231003, "learning_rate": 0.0013112329856910976, "loss": 2.2742, "step": 411860 }, { "epoch": 1.5921742357470892, "grad_norm": 0.10328184813261032, "learning_rate": 0.0013109426729219114, "loss": 2.285, "step": 411870 }, { "epoch": 1.5922128929504724, "grad_norm": 0.09361221641302109, "learning_rate": 0.0013106524824154366, "loss": 2.2823, "step": 411880 }, { "epoch": 1.5922515501538557, "grad_norm": 0.0951387882232666, "learning_rate": 0.0013103624140173335, "loss": 2.284, "step": 411890 }, { "epoch": 1.592290207357239, "grad_norm": 0.10114741325378418, "learning_rate": 0.0013100724675735864, "loss": 2.2861, "step": 411900 }, { "epoch": 1.5923288645606224, "grad_norm": 0.10498538613319397, "learning_rate": 0.0013097826429305042, "loss": 2.2775, "step": 411910 }, { "epoch": 1.5923675217640056, "grad_norm": 0.092182457447052, "learning_rate": 0.0013094929399347173, "loss": 2.2895, "step": 411920 }, { "epoch": 1.592406178967389, "grad_norm": 0.11279166489839554, "learning_rate": 0.0013092033584331784, "loss": 2.2872, "step": 411930 }, { "epoch": 1.5924448361707721, "grad_norm": 0.09732379764318466, "learning_rate": 0.0013089138982731604, "loss": 2.2741, "step": 411940 }, { "epoch": 1.5924834933741554, "grad_norm": 0.09594089537858963, "learning_rate": 0.001308624559302256, "loss": 2.2718, "step": 411950 }, { "epoch": 1.5925221505775387, "grad_norm": 0.1188618466258049, "learning_rate": 0.0013083353413683768, "loss": 2.2821, "step": 411960 }, { "epoch": 1.592560807780922, "grad_norm": 0.11669903993606567, "learning_rate": 0.0013080462443197524, "loss": 2.287, "step": 411970 }, { "epoch": 1.5925994649843052, "grad_norm": 0.10351302474737167, "learning_rate": 0.0013077572680049287, "loss": 2.2905, "step": 411980 }, { "epoch": 1.5926381221876884, "grad_norm": 0.3517273962497711, "learning_rate": 0.0013074684122727684, "loss": 2.2672, "step": 411990 }, { "epoch": 1.5926767793910717, "grad_norm": 0.11604971438646317, "learning_rate": 0.0013071796769724491, "loss": 2.2852, "step": 412000 }, { "epoch": 1.592715436594455, "grad_norm": 0.09867674112319946, "learning_rate": 0.0013068910619534618, "loss": 2.2912, "step": 412010 }, { "epoch": 1.5927540937978382, "grad_norm": 0.09733185172080994, "learning_rate": 0.0013066025670656115, "loss": 2.2827, "step": 412020 }, { "epoch": 1.5927927510012214, "grad_norm": 0.10976596921682358, "learning_rate": 0.0013063141921590149, "loss": 2.2807, "step": 412030 }, { "epoch": 1.592831408204605, "grad_norm": 0.10603948682546616, "learning_rate": 0.001306025937084101, "loss": 2.2792, "step": 412040 }, { "epoch": 1.5928700654079881, "grad_norm": 0.10735761374235153, "learning_rate": 0.0013057378016916089, "loss": 2.2661, "step": 412050 }, { "epoch": 1.5929087226113714, "grad_norm": 0.09300761669874191, "learning_rate": 0.0013054497858325865, "loss": 2.2894, "step": 412060 }, { "epoch": 1.5929473798147546, "grad_norm": 0.10261404514312744, "learning_rate": 0.0013051618893583916, "loss": 2.2913, "step": 412070 }, { "epoch": 1.5929860370181381, "grad_norm": 0.1160757839679718, "learning_rate": 0.0013048741121206894, "loss": 2.2692, "step": 412080 }, { "epoch": 1.5930246942215214, "grad_norm": 0.11559846252202988, "learning_rate": 0.0013045864539714515, "loss": 2.2747, "step": 412090 }, { "epoch": 1.5930633514249046, "grad_norm": 0.10721922665834427, "learning_rate": 0.0013042989147629567, "loss": 2.2882, "step": 412100 }, { "epoch": 1.5931020086282879, "grad_norm": 0.10484657436609268, "learning_rate": 0.0013040114943477874, "loss": 2.2707, "step": 412110 }, { "epoch": 1.5931406658316711, "grad_norm": 0.1271674782037735, "learning_rate": 0.0013037241925788314, "loss": 2.2859, "step": 412120 }, { "epoch": 1.5931793230350544, "grad_norm": 0.09756782650947571, "learning_rate": 0.0013034370093092803, "loss": 2.2738, "step": 412130 }, { "epoch": 1.5932179802384376, "grad_norm": 0.09413274377584457, "learning_rate": 0.0013031499443926261, "loss": 2.2885, "step": 412140 }, { "epoch": 1.5932566374418209, "grad_norm": 0.09922010451555252, "learning_rate": 0.0013028629976826651, "loss": 2.2891, "step": 412150 }, { "epoch": 1.5932952946452041, "grad_norm": 0.10199158638715744, "learning_rate": 0.0013025761690334922, "loss": 2.2676, "step": 412160 }, { "epoch": 1.5933339518485874, "grad_norm": 0.11042314022779465, "learning_rate": 0.0013022894582995037, "loss": 2.2784, "step": 412170 }, { "epoch": 1.5933726090519706, "grad_norm": 0.0946459174156189, "learning_rate": 0.001302002865335394, "loss": 2.266, "step": 412180 }, { "epoch": 1.593411266255354, "grad_norm": 0.09742408990859985, "learning_rate": 0.0013017163899961563, "loss": 2.28, "step": 412190 }, { "epoch": 1.5934499234587371, "grad_norm": 0.10281988233327866, "learning_rate": 0.0013014300321370809, "loss": 2.2715, "step": 412200 }, { "epoch": 1.5934885806621206, "grad_norm": 0.09957725554704666, "learning_rate": 0.001301143791613754, "loss": 2.2696, "step": 412210 }, { "epoch": 1.5935272378655039, "grad_norm": 0.10443780571222305, "learning_rate": 0.0013008576682820587, "loss": 2.2815, "step": 412220 }, { "epoch": 1.5935658950688871, "grad_norm": 0.12150289118289948, "learning_rate": 0.0013005716619981715, "loss": 2.2797, "step": 412230 }, { "epoch": 1.5936045522722704, "grad_norm": 0.09694153815507889, "learning_rate": 0.001300285772618564, "loss": 2.2784, "step": 412240 }, { "epoch": 1.5936432094756539, "grad_norm": 0.10975735634565353, "learning_rate": 0.0013000000000000002, "loss": 2.2676, "step": 412250 }, { "epoch": 1.593681866679037, "grad_norm": 0.11566482484340668, "learning_rate": 0.0012997143439995363, "loss": 2.2765, "step": 412260 }, { "epoch": 1.5937205238824204, "grad_norm": 0.0927523523569107, "learning_rate": 0.0012994288044745202, "loss": 2.2901, "step": 412270 }, { "epoch": 1.5937591810858036, "grad_norm": 0.10696090012788773, "learning_rate": 0.0012991433812825908, "loss": 2.2845, "step": 412280 }, { "epoch": 1.5937978382891869, "grad_norm": 0.10401918739080429, "learning_rate": 0.001298858074281676, "loss": 2.2804, "step": 412290 }, { "epoch": 1.5938364954925701, "grad_norm": 0.09173014760017395, "learning_rate": 0.0012985728833299927, "loss": 2.2877, "step": 412300 }, { "epoch": 1.5938751526959534, "grad_norm": 0.0979364663362503, "learning_rate": 0.0012982878082860468, "loss": 2.2779, "step": 412310 }, { "epoch": 1.5939138098993366, "grad_norm": 0.10152798146009445, "learning_rate": 0.0012980028490086305, "loss": 2.2735, "step": 412320 }, { "epoch": 1.5939524671027199, "grad_norm": 0.09956847876310349, "learning_rate": 0.0012977180053568224, "loss": 2.2834, "step": 412330 }, { "epoch": 1.5939911243061031, "grad_norm": 0.115941122174263, "learning_rate": 0.0012974332771899882, "loss": 2.284, "step": 412340 }, { "epoch": 1.5940297815094864, "grad_norm": 0.11504446715116501, "learning_rate": 0.0012971486643677769, "loss": 2.2872, "step": 412350 }, { "epoch": 1.5940684387128696, "grad_norm": 0.12059466540813446, "learning_rate": 0.0012968641667501222, "loss": 2.2706, "step": 412360 }, { "epoch": 1.5941070959162529, "grad_norm": 0.11342114210128784, "learning_rate": 0.0012965797841972412, "loss": 2.2714, "step": 412370 }, { "epoch": 1.5941457531196364, "grad_norm": 0.09073950350284576, "learning_rate": 0.0012962955165696328, "loss": 2.2844, "step": 412380 }, { "epoch": 1.5941844103230196, "grad_norm": 0.0957561805844307, "learning_rate": 0.0012960113637280784, "loss": 2.2792, "step": 412390 }, { "epoch": 1.5942230675264029, "grad_norm": 0.11272169649600983, "learning_rate": 0.0012957273255336397, "loss": 2.2842, "step": 412400 }, { "epoch": 1.5942617247297861, "grad_norm": 0.10272194445133209, "learning_rate": 0.0012954434018476583, "loss": 2.2899, "step": 412410 }, { "epoch": 1.5943003819331696, "grad_norm": 0.0964650809764862, "learning_rate": 0.001295159592531756, "loss": 2.2863, "step": 412420 }, { "epoch": 1.5943390391365528, "grad_norm": 0.09082885831594467, "learning_rate": 0.0012948758974478322, "loss": 2.2722, "step": 412430 }, { "epoch": 1.594377696339936, "grad_norm": 0.10105688869953156, "learning_rate": 0.001294592316458064, "loss": 2.2836, "step": 412440 }, { "epoch": 1.5944163535433193, "grad_norm": 0.11495474725961685, "learning_rate": 0.001294308849424906, "loss": 2.2826, "step": 412450 }, { "epoch": 1.5944550107467026, "grad_norm": 0.09655379503965378, "learning_rate": 0.0012940254962110881, "loss": 2.265, "step": 412460 }, { "epoch": 1.5944936679500858, "grad_norm": 0.10862415283918381, "learning_rate": 0.001293742256679617, "loss": 2.305, "step": 412470 }, { "epoch": 1.594532325153469, "grad_norm": 0.10821930319070816, "learning_rate": 0.0012934591306937723, "loss": 2.2805, "step": 412480 }, { "epoch": 1.5945709823568524, "grad_norm": 0.10074169933795929, "learning_rate": 0.0012931761181171084, "loss": 2.2778, "step": 412490 }, { "epoch": 1.5946096395602356, "grad_norm": 0.10636480897665024, "learning_rate": 0.0012928932188134526, "loss": 2.2731, "step": 412500 }, { "epoch": 1.5946482967636189, "grad_norm": 0.09842386096715927, "learning_rate": 0.0012926104326469044, "loss": 2.2806, "step": 412510 }, { "epoch": 1.594686953967002, "grad_norm": 0.10499393194913864, "learning_rate": 0.0012923277594818346, "loss": 2.2826, "step": 412520 }, { "epoch": 1.5947256111703854, "grad_norm": 0.0923650711774826, "learning_rate": 0.0012920451991828856, "loss": 2.2768, "step": 412530 }, { "epoch": 1.5947642683737686, "grad_norm": 0.09121539443731308, "learning_rate": 0.001291762751614969, "loss": 2.2732, "step": 412540 }, { "epoch": 1.594802925577152, "grad_norm": 0.12280796468257904, "learning_rate": 0.0012914804166432661, "loss": 2.2729, "step": 412550 }, { "epoch": 1.5948415827805353, "grad_norm": 0.10645350813865662, "learning_rate": 0.0012911981941332261, "loss": 2.2759, "step": 412560 }, { "epoch": 1.5948802399839186, "grad_norm": 0.08899696171283722, "learning_rate": 0.0012909160839505668, "loss": 2.2877, "step": 412570 }, { "epoch": 1.5949188971873018, "grad_norm": 0.08800100535154343, "learning_rate": 0.001290634085961272, "loss": 2.2736, "step": 412580 }, { "epoch": 1.5949575543906853, "grad_norm": 0.10100112110376358, "learning_rate": 0.001290352200031593, "loss": 2.2794, "step": 412590 }, { "epoch": 1.5949962115940686, "grad_norm": 0.1053532063961029, "learning_rate": 0.0012900704260280463, "loss": 2.2769, "step": 412600 }, { "epoch": 1.5950348687974518, "grad_norm": 0.11090871691703796, "learning_rate": 0.001289788763817412, "loss": 2.2763, "step": 412610 }, { "epoch": 1.595073526000835, "grad_norm": 0.0996427908539772, "learning_rate": 0.0012895072132667355, "loss": 2.2865, "step": 412620 }, { "epoch": 1.5951121832042183, "grad_norm": 0.10796479880809784, "learning_rate": 0.0012892257742433254, "loss": 2.2742, "step": 412630 }, { "epoch": 1.5951508404076016, "grad_norm": 0.11622989177703857, "learning_rate": 0.0012889444466147528, "loss": 2.2768, "step": 412640 }, { "epoch": 1.5951894976109848, "grad_norm": 0.09687823057174683, "learning_rate": 0.0012886632302488504, "loss": 2.2826, "step": 412650 }, { "epoch": 1.595228154814368, "grad_norm": 0.107315793633461, "learning_rate": 0.0012883821250137123, "loss": 2.2936, "step": 412660 }, { "epoch": 1.5952668120177513, "grad_norm": 0.11312927305698395, "learning_rate": 0.0012881011307776923, "loss": 2.2905, "step": 412670 }, { "epoch": 1.5953054692211346, "grad_norm": 0.09998205304145813, "learning_rate": 0.0012878202474094057, "loss": 2.2802, "step": 412680 }, { "epoch": 1.5953441264245178, "grad_norm": 0.10146909207105637, "learning_rate": 0.001287539474777725, "loss": 2.2754, "step": 412690 }, { "epoch": 1.595382783627901, "grad_norm": 0.09956265985965729, "learning_rate": 0.0012872588127517816, "loss": 2.2828, "step": 412700 }, { "epoch": 1.5954214408312843, "grad_norm": 0.1043073907494545, "learning_rate": 0.0012869782612009643, "loss": 2.2712, "step": 412710 }, { "epoch": 1.5954600980346678, "grad_norm": 0.11289764195680618, "learning_rate": 0.0012866978199949197, "loss": 2.2858, "step": 412720 }, { "epoch": 1.595498755238051, "grad_norm": 0.10688609629869461, "learning_rate": 0.0012864174890035492, "loss": 2.26, "step": 412730 }, { "epoch": 1.5955374124414343, "grad_norm": 0.1139904335141182, "learning_rate": 0.00128613726809701, "loss": 2.2785, "step": 412740 }, { "epoch": 1.5955760696448176, "grad_norm": 0.10517367720603943, "learning_rate": 0.0012858571571457151, "loss": 2.2816, "step": 412750 }, { "epoch": 1.595614726848201, "grad_norm": 0.10349394381046295, "learning_rate": 0.0012855771560203299, "loss": 2.2903, "step": 412760 }, { "epoch": 1.5956533840515843, "grad_norm": 0.09697014838457108, "learning_rate": 0.0012852972645917746, "loss": 2.2865, "step": 412770 }, { "epoch": 1.5956920412549676, "grad_norm": 0.10311219096183777, "learning_rate": 0.0012850174827312212, "loss": 2.2963, "step": 412780 }, { "epoch": 1.5957306984583508, "grad_norm": 0.10511083900928497, "learning_rate": 0.0012847378103100933, "loss": 2.2981, "step": 412790 }, { "epoch": 1.595769355661734, "grad_norm": 0.09088359028100967, "learning_rate": 0.0012844582472000675, "loss": 2.2919, "step": 412800 }, { "epoch": 1.5958080128651173, "grad_norm": 0.10324206948280334, "learning_rate": 0.001284178793273069, "loss": 2.2829, "step": 412810 }, { "epoch": 1.5958466700685006, "grad_norm": 0.09309279173612595, "learning_rate": 0.0012838994484012738, "loss": 2.2731, "step": 412820 }, { "epoch": 1.5958853272718838, "grad_norm": 0.17614911496639252, "learning_rate": 0.0012836202124571075, "loss": 2.268, "step": 412830 }, { "epoch": 1.595923984475267, "grad_norm": 0.10486125200986862, "learning_rate": 0.0012833410853132431, "loss": 2.2921, "step": 412840 }, { "epoch": 1.5959626416786503, "grad_norm": 0.11186165362596512, "learning_rate": 0.0012830620668426032, "loss": 2.2847, "step": 412850 }, { "epoch": 1.5960012988820336, "grad_norm": 0.12183138728141785, "learning_rate": 0.001282783156918356, "loss": 2.2755, "step": 412860 }, { "epoch": 1.5960399560854168, "grad_norm": 0.0917850136756897, "learning_rate": 0.001282504355413916, "loss": 2.2678, "step": 412870 }, { "epoch": 1.5960786132888, "grad_norm": 0.10441316664218903, "learning_rate": 0.0012822256622029456, "loss": 2.2848, "step": 412880 }, { "epoch": 1.5961172704921835, "grad_norm": 0.09781082719564438, "learning_rate": 0.0012819470771593504, "loss": 2.2803, "step": 412890 }, { "epoch": 1.5961559276955668, "grad_norm": 0.09336867183446884, "learning_rate": 0.001281668600157281, "loss": 2.2777, "step": 412900 }, { "epoch": 1.59619458489895, "grad_norm": 0.11104751378297806, "learning_rate": 0.001281390231071133, "loss": 2.2826, "step": 412910 }, { "epoch": 1.5962332421023333, "grad_norm": 0.09125860035419464, "learning_rate": 0.001281111969775543, "loss": 2.2865, "step": 412920 }, { "epoch": 1.5962718993057168, "grad_norm": 0.10214760154485703, "learning_rate": 0.001280833816145392, "loss": 2.2782, "step": 412930 }, { "epoch": 1.5963105565091, "grad_norm": 0.10561887919902802, "learning_rate": 0.0012805557700558022, "loss": 2.2736, "step": 412940 }, { "epoch": 1.5963492137124833, "grad_norm": 0.10510638356208801, "learning_rate": 0.0012802778313821368, "loss": 2.2831, "step": 412950 }, { "epoch": 1.5963878709158665, "grad_norm": 0.11140754073858261, "learning_rate": 0.00128, "loss": 2.279, "step": 412960 }, { "epoch": 1.5964265281192498, "grad_norm": 0.10227959603071213, "learning_rate": 0.0012797222757852356, "loss": 2.2798, "step": 412970 }, { "epoch": 1.596465185322633, "grad_norm": 0.10155359655618668, "learning_rate": 0.0012794446586139273, "loss": 2.2788, "step": 412980 }, { "epoch": 1.5965038425260163, "grad_norm": 0.09626945108175278, "learning_rate": 0.001279167148362396, "loss": 2.2714, "step": 412990 }, { "epoch": 1.5965424997293995, "grad_norm": 0.1154874861240387, "learning_rate": 0.0012788897449072022, "loss": 2.2803, "step": 413000 }, { "epoch": 1.5965811569327828, "grad_norm": 0.09753113240003586, "learning_rate": 0.0012786124481251426, "loss": 2.2998, "step": 413010 }, { "epoch": 1.596619814136166, "grad_norm": 0.10309763252735138, "learning_rate": 0.0012783352578932515, "loss": 2.2726, "step": 413020 }, { "epoch": 1.5966584713395493, "grad_norm": 0.10636594146490097, "learning_rate": 0.0012780581740887982, "loss": 2.2824, "step": 413030 }, { "epoch": 1.5966971285429326, "grad_norm": 0.11068262904882431, "learning_rate": 0.0012777811965892884, "loss": 2.2909, "step": 413040 }, { "epoch": 1.5967357857463158, "grad_norm": 0.10145969688892365, "learning_rate": 0.0012775043252724622, "loss": 2.2845, "step": 413050 }, { "epoch": 1.5967744429496993, "grad_norm": 0.10044920444488525, "learning_rate": 0.0012772275600162939, "loss": 2.2767, "step": 413060 }, { "epoch": 1.5968131001530825, "grad_norm": 0.10166584700345993, "learning_rate": 0.0012769509006989913, "loss": 2.2787, "step": 413070 }, { "epoch": 1.5968517573564658, "grad_norm": 0.1131749376654625, "learning_rate": 0.001276674347198995, "loss": 2.287, "step": 413080 }, { "epoch": 1.596890414559849, "grad_norm": 0.09794627130031586, "learning_rate": 0.0012763978993949783, "loss": 2.2853, "step": 413090 }, { "epoch": 1.5969290717632325, "grad_norm": 0.10854090750217438, "learning_rate": 0.0012761215571658459, "loss": 2.2833, "step": 413100 }, { "epoch": 1.5969677289666158, "grad_norm": 0.09260708838701248, "learning_rate": 0.0012758453203907329, "loss": 2.2825, "step": 413110 }, { "epoch": 1.597006386169999, "grad_norm": 0.10994840413331985, "learning_rate": 0.0012755691889490067, "loss": 2.2724, "step": 413120 }, { "epoch": 1.5970450433733823, "grad_norm": 0.10354938358068466, "learning_rate": 0.0012752931627202627, "loss": 2.275, "step": 413130 }, { "epoch": 1.5970837005767655, "grad_norm": 0.09041547030210495, "learning_rate": 0.0012750172415843257, "loss": 2.2847, "step": 413140 }, { "epoch": 1.5971223577801488, "grad_norm": 0.10713107883930206, "learning_rate": 0.0012747414254212501, "loss": 2.3039, "step": 413150 }, { "epoch": 1.597161014983532, "grad_norm": 0.11910802125930786, "learning_rate": 0.0012744657141113178, "loss": 2.2999, "step": 413160 }, { "epoch": 1.5971996721869153, "grad_norm": 0.0957915261387825, "learning_rate": 0.0012741901075350376, "loss": 2.2702, "step": 413170 }, { "epoch": 1.5972383293902985, "grad_norm": 0.09311871975660324, "learning_rate": 0.001273914605573146, "loss": 2.2805, "step": 413180 }, { "epoch": 1.5972769865936818, "grad_norm": 0.0910448431968689, "learning_rate": 0.001273639208106605, "loss": 2.2822, "step": 413190 }, { "epoch": 1.597315643797065, "grad_norm": 0.0983869656920433, "learning_rate": 0.0012733639150166018, "loss": 2.2788, "step": 413200 }, { "epoch": 1.5973543010004483, "grad_norm": 0.08953586965799332, "learning_rate": 0.0012730887261845501, "loss": 2.2892, "step": 413210 }, { "epoch": 1.5973929582038315, "grad_norm": 0.0971858873963356, "learning_rate": 0.0012728136414920863, "loss": 2.2721, "step": 413220 }, { "epoch": 1.597431615407215, "grad_norm": 0.09747296571731567, "learning_rate": 0.0012725386608210716, "loss": 2.2784, "step": 413230 }, { "epoch": 1.5974702726105983, "grad_norm": 0.10249760001897812, "learning_rate": 0.00127226378405359, "loss": 2.2793, "step": 413240 }, { "epoch": 1.5975089298139815, "grad_norm": 0.09742303937673569, "learning_rate": 0.0012719890110719483, "loss": 2.2895, "step": 413250 }, { "epoch": 1.5975475870173648, "grad_norm": 0.09713740646839142, "learning_rate": 0.001271714341758675, "loss": 2.2726, "step": 413260 }, { "epoch": 1.5975862442207482, "grad_norm": 0.1044480949640274, "learning_rate": 0.0012714397759965206, "loss": 2.2858, "step": 413270 }, { "epoch": 1.5976249014241315, "grad_norm": 0.09574960172176361, "learning_rate": 0.001271165313668456, "loss": 2.285, "step": 413280 }, { "epoch": 1.5976635586275147, "grad_norm": 0.1157715916633606, "learning_rate": 0.0012708909546576726, "loss": 2.289, "step": 413290 }, { "epoch": 1.597702215830898, "grad_norm": 0.09488074481487274, "learning_rate": 0.0012706166988475812, "loss": 2.2866, "step": 413300 }, { "epoch": 1.5977408730342813, "grad_norm": 0.1098017692565918, "learning_rate": 0.001270342546121812, "loss": 2.276, "step": 413310 }, { "epoch": 1.5977795302376645, "grad_norm": 0.10246486961841583, "learning_rate": 0.0012700684963642135, "loss": 2.2728, "step": 413320 }, { "epoch": 1.5978181874410478, "grad_norm": 0.10948675870895386, "learning_rate": 0.0012697945494588529, "loss": 2.2733, "step": 413330 }, { "epoch": 1.597856844644431, "grad_norm": 0.10623447597026825, "learning_rate": 0.0012695207052900131, "loss": 2.2814, "step": 413340 }, { "epoch": 1.5978955018478143, "grad_norm": 0.09846518188714981, "learning_rate": 0.0012692469637421958, "loss": 2.2762, "step": 413350 }, { "epoch": 1.5979341590511975, "grad_norm": 0.11750591546297073, "learning_rate": 0.0012689733247001173, "loss": 2.27, "step": 413360 }, { "epoch": 1.5979728162545808, "grad_norm": 0.10157160460948944, "learning_rate": 0.001268699788048711, "loss": 2.2894, "step": 413370 }, { "epoch": 1.598011473457964, "grad_norm": 0.10318373888731003, "learning_rate": 0.0012684263536731249, "loss": 2.283, "step": 413380 }, { "epoch": 1.5980501306613475, "grad_norm": 0.09734541177749634, "learning_rate": 0.0012681530214587204, "loss": 2.2726, "step": 413390 }, { "epoch": 1.5980887878647307, "grad_norm": 0.09607987105846405, "learning_rate": 0.001267879791291075, "loss": 2.2841, "step": 413400 }, { "epoch": 1.598127445068114, "grad_norm": 0.10669342428445816, "learning_rate": 0.0012676066630559779, "loss": 2.2788, "step": 413410 }, { "epoch": 1.5981661022714972, "grad_norm": 0.0970144048333168, "learning_rate": 0.001267333636639432, "loss": 2.2853, "step": 413420 }, { "epoch": 1.5982047594748805, "grad_norm": 0.09008067846298218, "learning_rate": 0.0012670607119276522, "loss": 2.2842, "step": 413430 }, { "epoch": 1.598243416678264, "grad_norm": 0.11209733784198761, "learning_rate": 0.0012667878888070656, "loss": 2.2788, "step": 413440 }, { "epoch": 1.5982820738816472, "grad_norm": 0.09946850687265396, "learning_rate": 0.0012665151671643101, "loss": 2.2864, "step": 413450 }, { "epoch": 1.5983207310850305, "grad_norm": 0.09585539996623993, "learning_rate": 0.0012662425468862343, "loss": 2.2808, "step": 413460 }, { "epoch": 1.5983593882884137, "grad_norm": 0.11974449455738068, "learning_rate": 0.0012659700278598973, "loss": 2.2809, "step": 413470 }, { "epoch": 1.598398045491797, "grad_norm": 0.11818915605545044, "learning_rate": 0.0012656976099725671, "loss": 2.2805, "step": 413480 }, { "epoch": 1.5984367026951802, "grad_norm": 0.1089639961719513, "learning_rate": 0.0012654252931117217, "loss": 2.2755, "step": 413490 }, { "epoch": 1.5984753598985635, "grad_norm": 0.13089609146118164, "learning_rate": 0.0012651530771650465, "loss": 2.2763, "step": 413500 }, { "epoch": 1.5985140171019467, "grad_norm": 0.11262860149145126, "learning_rate": 0.0012648809620204359, "loss": 2.2805, "step": 413510 }, { "epoch": 1.59855267430533, "grad_norm": 0.11303657293319702, "learning_rate": 0.0012646089475659905, "loss": 2.2887, "step": 413520 }, { "epoch": 1.5985913315087132, "grad_norm": 0.09849189966917038, "learning_rate": 0.001264337033690019, "loss": 2.276, "step": 413530 }, { "epoch": 1.5986299887120965, "grad_norm": 0.10323601216077805, "learning_rate": 0.001264065220281036, "loss": 2.2874, "step": 413540 }, { "epoch": 1.5986686459154797, "grad_norm": 0.10031892359256744, "learning_rate": 0.0012637935072277616, "loss": 2.2652, "step": 413550 }, { "epoch": 1.5987073031188632, "grad_norm": 0.3931354284286499, "learning_rate": 0.0012635218944191213, "loss": 2.284, "step": 413560 }, { "epoch": 1.5987459603222465, "grad_norm": 0.09942755848169327, "learning_rate": 0.0012632503817442456, "loss": 2.2645, "step": 413570 }, { "epoch": 1.5987846175256297, "grad_norm": 0.1002105176448822, "learning_rate": 0.001262978969092469, "loss": 2.2841, "step": 413580 }, { "epoch": 1.598823274729013, "grad_norm": 0.10870186984539032, "learning_rate": 0.0012627076563533298, "loss": 2.267, "step": 413590 }, { "epoch": 1.5988619319323962, "grad_norm": 0.10717492550611496, "learning_rate": 0.001262436443416569, "loss": 2.262, "step": 413600 }, { "epoch": 1.5989005891357797, "grad_norm": 0.10230233520269394, "learning_rate": 0.0012621653301721314, "loss": 2.2821, "step": 413610 }, { "epoch": 1.598939246339163, "grad_norm": 0.1118675246834755, "learning_rate": 0.0012618943165101629, "loss": 2.287, "step": 413620 }, { "epoch": 1.5989779035425462, "grad_norm": 0.10283380001783371, "learning_rate": 0.0012616234023210106, "loss": 2.268, "step": 413630 }, { "epoch": 1.5990165607459295, "grad_norm": 0.10829443484544754, "learning_rate": 0.0012613525874952244, "loss": 2.2735, "step": 413640 }, { "epoch": 1.5990552179493127, "grad_norm": 0.09989216923713684, "learning_rate": 0.0012610818719235534, "loss": 2.2734, "step": 413650 }, { "epoch": 1.599093875152696, "grad_norm": 0.12643863260746002, "learning_rate": 0.0012608112554969469, "loss": 2.2787, "step": 413660 }, { "epoch": 1.5991325323560792, "grad_norm": 0.09596065431833267, "learning_rate": 0.0012605407381065538, "loss": 2.2773, "step": 413670 }, { "epoch": 1.5991711895594625, "grad_norm": 0.11484631896018982, "learning_rate": 0.001260270319643723, "loss": 2.2758, "step": 413680 }, { "epoch": 1.5992098467628457, "grad_norm": 0.10234335064888, "learning_rate": 0.00126, "loss": 2.2901, "step": 413690 }, { "epoch": 1.599248503966229, "grad_norm": 0.09941039234399796, "learning_rate": 0.00125972977906713, "loss": 2.2768, "step": 413700 }, { "epoch": 1.5992871611696122, "grad_norm": 0.09414675086736679, "learning_rate": 0.0012594596567370552, "loss": 2.2717, "step": 413710 }, { "epoch": 1.5993258183729955, "grad_norm": 0.10195624828338623, "learning_rate": 0.0012591896329019147, "loss": 2.2717, "step": 413720 }, { "epoch": 1.599364475576379, "grad_norm": 0.11093621701002121, "learning_rate": 0.001258919707454044, "loss": 2.2896, "step": 413730 }, { "epoch": 1.5994031327797622, "grad_norm": 0.11402977257966995, "learning_rate": 0.0012586498802859745, "loss": 2.2758, "step": 413740 }, { "epoch": 1.5994417899831455, "grad_norm": 0.09677516669034958, "learning_rate": 0.0012583801512904339, "loss": 2.2798, "step": 413750 }, { "epoch": 1.5994804471865287, "grad_norm": 0.1029655858874321, "learning_rate": 0.0012581105203603436, "loss": 2.2876, "step": 413760 }, { "epoch": 1.599519104389912, "grad_norm": 0.10037294775247574, "learning_rate": 0.0012578409873888212, "loss": 2.2785, "step": 413770 }, { "epoch": 1.5995577615932954, "grad_norm": 0.10133747011423111, "learning_rate": 0.0012575715522691767, "loss": 2.2804, "step": 413780 }, { "epoch": 1.5995964187966787, "grad_norm": 0.10408452898263931, "learning_rate": 0.0012573022148949144, "loss": 2.2828, "step": 413790 }, { "epoch": 1.599635076000062, "grad_norm": 0.10227343440055847, "learning_rate": 0.0012570329751597316, "loss": 2.2861, "step": 413800 }, { "epoch": 1.5996737332034452, "grad_norm": 0.1007285937666893, "learning_rate": 0.0012567638329575182, "loss": 2.2801, "step": 413810 }, { "epoch": 1.5997123904068284, "grad_norm": 0.1031252071261406, "learning_rate": 0.001256494788182356, "loss": 2.2789, "step": 413820 }, { "epoch": 1.5997510476102117, "grad_norm": 0.1067824587225914, "learning_rate": 0.0012562258407285182, "loss": 2.3046, "step": 413830 }, { "epoch": 1.599789704813595, "grad_norm": 0.10092274099588394, "learning_rate": 0.001255956990490469, "loss": 2.2862, "step": 413840 }, { "epoch": 1.5998283620169782, "grad_norm": 0.10653362423181534, "learning_rate": 0.0012556882373628642, "loss": 2.2808, "step": 413850 }, { "epoch": 1.5998670192203615, "grad_norm": 0.09871282428503036, "learning_rate": 0.0012554195812405488, "loss": 2.2773, "step": 413860 }, { "epoch": 1.5999056764237447, "grad_norm": 0.1083545833826065, "learning_rate": 0.001255151022018557, "loss": 2.2712, "step": 413870 }, { "epoch": 1.599944333627128, "grad_norm": 0.10615554451942444, "learning_rate": 0.0012548825595921139, "loss": 2.2687, "step": 413880 }, { "epoch": 1.5999829908305112, "grad_norm": 0.10352876782417297, "learning_rate": 0.001254614193856631, "loss": 2.2818, "step": 413890 }, { "epoch": 1.6000216480338947, "grad_norm": 0.11283598840236664, "learning_rate": 0.00125434592470771, "loss": 2.2948, "step": 413900 }, { "epoch": 1.600060305237278, "grad_norm": 0.11240514367818832, "learning_rate": 0.0012540777520411394, "loss": 2.2909, "step": 413910 }, { "epoch": 1.6000989624406612, "grad_norm": 0.10302004963159561, "learning_rate": 0.0012538096757528949, "loss": 2.275, "step": 413920 }, { "epoch": 1.6001376196440444, "grad_norm": 0.11001778393983841, "learning_rate": 0.001253541695739139, "loss": 2.2815, "step": 413930 }, { "epoch": 1.600176276847428, "grad_norm": 0.09422043710947037, "learning_rate": 0.0012532738118962213, "loss": 2.281, "step": 413940 }, { "epoch": 1.6002149340508112, "grad_norm": 0.1052865982055664, "learning_rate": 0.0012530060241206762, "loss": 2.2635, "step": 413950 }, { "epoch": 1.6002535912541944, "grad_norm": 0.09602361172437668, "learning_rate": 0.0012527383323092237, "loss": 2.2773, "step": 413960 }, { "epoch": 1.6002922484575777, "grad_norm": 0.0933568999171257, "learning_rate": 0.0012524707363587696, "loss": 2.2783, "step": 413970 }, { "epoch": 1.600330905660961, "grad_norm": 0.10403438657522202, "learning_rate": 0.0012522032361664034, "loss": 2.2874, "step": 413980 }, { "epoch": 1.6003695628643442, "grad_norm": 0.1254464089870453, "learning_rate": 0.0012519358316293982, "loss": 2.2858, "step": 413990 }, { "epoch": 1.6004082200677274, "grad_norm": 0.10663452744483948, "learning_rate": 0.0012516685226452117, "loss": 2.2685, "step": 414000 }, { "epoch": 1.6004468772711107, "grad_norm": 0.09303441643714905, "learning_rate": 0.0012514013091114839, "loss": 2.2671, "step": 414010 }, { "epoch": 1.600485534474494, "grad_norm": 0.12453543394804001, "learning_rate": 0.0012511341909260379, "loss": 2.2814, "step": 414020 }, { "epoch": 1.6005241916778772, "grad_norm": 0.1046123132109642, "learning_rate": 0.0012508671679868782, "loss": 2.2908, "step": 414030 }, { "epoch": 1.6005628488812604, "grad_norm": 0.10266414284706116, "learning_rate": 0.0012506002401921922, "loss": 2.2784, "step": 414040 }, { "epoch": 1.6006015060846437, "grad_norm": 0.09736956655979156, "learning_rate": 0.0012503334074403477, "loss": 2.2799, "step": 414050 }, { "epoch": 1.600640163288027, "grad_norm": 0.10303352773189545, "learning_rate": 0.001250066669629893, "loss": 2.2809, "step": 414060 }, { "epoch": 1.6006788204914104, "grad_norm": 0.10491194576025009, "learning_rate": 0.001249800026659558, "loss": 2.2847, "step": 414070 }, { "epoch": 1.6007174776947937, "grad_norm": 0.10172902047634125, "learning_rate": 0.0012495334784282512, "loss": 2.2779, "step": 414080 }, { "epoch": 1.600756134898177, "grad_norm": 0.2797784209251404, "learning_rate": 0.0012492670248350616, "loss": 2.2656, "step": 414090 }, { "epoch": 1.6007947921015602, "grad_norm": 0.1073826253414154, "learning_rate": 0.0012490006657792565, "loss": 2.2824, "step": 414100 }, { "epoch": 1.6008334493049436, "grad_norm": 0.10700371861457825, "learning_rate": 0.0012487344011602821, "loss": 2.2809, "step": 414110 }, { "epoch": 1.600872106508327, "grad_norm": 0.11427832394838333, "learning_rate": 0.0012484682308777626, "loss": 2.2699, "step": 414120 }, { "epoch": 1.6009107637117101, "grad_norm": 0.10496781021356583, "learning_rate": 0.0012482021548315, "loss": 2.2802, "step": 414130 }, { "epoch": 1.6009494209150934, "grad_norm": 0.11209113150835037, "learning_rate": 0.0012479361729214734, "loss": 2.2817, "step": 414140 }, { "epoch": 1.6009880781184767, "grad_norm": 0.09979293495416641, "learning_rate": 0.001247670285047839, "loss": 2.2669, "step": 414150 }, { "epoch": 1.60102673532186, "grad_norm": 0.10232799500226974, "learning_rate": 0.0012474044911109288, "loss": 2.2704, "step": 414160 }, { "epoch": 1.6010653925252432, "grad_norm": 0.1055445596575737, "learning_rate": 0.0012471387910112518, "loss": 2.2804, "step": 414170 }, { "epoch": 1.6011040497286264, "grad_norm": 0.11574835330247879, "learning_rate": 0.0012468731846494907, "loss": 2.2749, "step": 414180 }, { "epoch": 1.6011427069320097, "grad_norm": 0.0972663164138794, "learning_rate": 0.0012466076719265056, "loss": 2.2759, "step": 414190 }, { "epoch": 1.601181364135393, "grad_norm": 0.09286480396986008, "learning_rate": 0.0012463422527433292, "loss": 2.2798, "step": 414200 }, { "epoch": 1.6012200213387762, "grad_norm": 0.14326174557209015, "learning_rate": 0.0012460769270011697, "loss": 2.2817, "step": 414210 }, { "epoch": 1.6012586785421594, "grad_norm": 0.11282878369092941, "learning_rate": 0.001245811694601408, "loss": 2.2926, "step": 414220 }, { "epoch": 1.6012973357455427, "grad_norm": 0.10788289457559586, "learning_rate": 0.0012455465554455994, "loss": 2.2846, "step": 414230 }, { "epoch": 1.6013359929489261, "grad_norm": 0.09294237941503525, "learning_rate": 0.0012452815094354717, "loss": 2.2862, "step": 414240 }, { "epoch": 1.6013746501523094, "grad_norm": 0.10845671594142914, "learning_rate": 0.0012450165564729253, "loss": 2.2683, "step": 414250 }, { "epoch": 1.6014133073556927, "grad_norm": 0.10894111543893814, "learning_rate": 0.001244751696460032, "loss": 2.2686, "step": 414260 }, { "epoch": 1.601451964559076, "grad_norm": 0.09936366230249405, "learning_rate": 0.0012444869292990359, "loss": 2.2792, "step": 414270 }, { "epoch": 1.6014906217624594, "grad_norm": 0.09494331479072571, "learning_rate": 0.0012442222548923528, "loss": 2.2858, "step": 414280 }, { "epoch": 1.6015292789658426, "grad_norm": 0.09906581044197083, "learning_rate": 0.001243957673142568, "loss": 2.2755, "step": 414290 }, { "epoch": 1.6015679361692259, "grad_norm": 0.10921325534582138, "learning_rate": 0.0012436931839524385, "loss": 2.2702, "step": 414300 }, { "epoch": 1.6016065933726091, "grad_norm": 0.105479396879673, "learning_rate": 0.0012434287872248903, "loss": 2.2867, "step": 414310 }, { "epoch": 1.6016452505759924, "grad_norm": 0.10393494367599487, "learning_rate": 0.00124316448286302, "loss": 2.281, "step": 414320 }, { "epoch": 1.6016839077793756, "grad_norm": 0.10371506959199905, "learning_rate": 0.001242900270770092, "loss": 2.2936, "step": 414330 }, { "epoch": 1.601722564982759, "grad_norm": 0.11971278488636017, "learning_rate": 0.0012426361508495404, "loss": 2.2801, "step": 414340 }, { "epoch": 1.6017612221861421, "grad_norm": 0.10474719107151031, "learning_rate": 0.0012423721230049676, "loss": 2.291, "step": 414350 }, { "epoch": 1.6017998793895254, "grad_norm": 0.10546832531690598, "learning_rate": 0.0012421081871401435, "loss": 2.2873, "step": 414360 }, { "epoch": 1.6018385365929086, "grad_norm": 0.11091950535774231, "learning_rate": 0.0012418443431590053, "loss": 2.273, "step": 414370 }, { "epoch": 1.601877193796292, "grad_norm": 0.09752365201711655, "learning_rate": 0.0012415805909656583, "loss": 2.277, "step": 414380 }, { "epoch": 1.6019158509996752, "grad_norm": 0.09656338393688202, "learning_rate": 0.0012413169304643736, "loss": 2.2818, "step": 414390 }, { "epoch": 1.6019545082030584, "grad_norm": 0.12549050152301788, "learning_rate": 0.001241053361559589, "loss": 2.2766, "step": 414400 }, { "epoch": 1.6019931654064419, "grad_norm": 0.0944322869181633, "learning_rate": 0.0012407898841559077, "loss": 2.2892, "step": 414410 }, { "epoch": 1.6020318226098251, "grad_norm": 0.10391388088464737, "learning_rate": 0.001240526498158099, "loss": 2.2892, "step": 414420 }, { "epoch": 1.6020704798132084, "grad_norm": 0.09657981991767883, "learning_rate": 0.001240263203471097, "loss": 2.287, "step": 414430 }, { "epoch": 1.6021091370165916, "grad_norm": 0.09660341590642929, "learning_rate": 0.00124, "loss": 2.2946, "step": 414440 }, { "epoch": 1.602147794219975, "grad_norm": 0.39730313420295715, "learning_rate": 0.0012397368876500715, "loss": 2.2819, "step": 414450 }, { "epoch": 1.6021864514233584, "grad_norm": 0.10600872337818146, "learning_rate": 0.0012394738663267383, "loss": 2.2668, "step": 414460 }, { "epoch": 1.6022251086267416, "grad_norm": 0.09585840255022049, "learning_rate": 0.0012392109359355907, "loss": 2.2738, "step": 414470 }, { "epoch": 1.6022637658301249, "grad_norm": 0.09625507146120071, "learning_rate": 0.001238948096382382, "loss": 2.2827, "step": 414480 }, { "epoch": 1.6023024230335081, "grad_norm": 0.10400725156068802, "learning_rate": 0.001238685347573029, "loss": 2.2772, "step": 414490 }, { "epoch": 1.6023410802368914, "grad_norm": 0.1072283685207367, "learning_rate": 0.0012384226894136092, "loss": 2.2799, "step": 414500 }, { "epoch": 1.6023797374402746, "grad_norm": 0.11016058921813965, "learning_rate": 0.0012381601218103637, "loss": 2.2494, "step": 414510 }, { "epoch": 1.6024183946436579, "grad_norm": 0.11033624410629272, "learning_rate": 0.0012378976446696939, "loss": 2.267, "step": 414520 }, { "epoch": 1.6024570518470411, "grad_norm": 0.1050303652882576, "learning_rate": 0.0012376352578981633, "loss": 2.286, "step": 414530 }, { "epoch": 1.6024957090504244, "grad_norm": 0.11567538976669312, "learning_rate": 0.0012373729614024952, "loss": 2.2639, "step": 414540 }, { "epoch": 1.6025343662538076, "grad_norm": 0.10231651365756989, "learning_rate": 0.001237110755089574, "loss": 2.2814, "step": 414550 }, { "epoch": 1.6025730234571909, "grad_norm": 0.09145838022232056, "learning_rate": 0.0012368486388664435, "loss": 2.2815, "step": 414560 }, { "epoch": 1.6026116806605741, "grad_norm": 0.09755510091781616, "learning_rate": 0.0012365866126403074, "loss": 2.2707, "step": 414570 }, { "epoch": 1.6026503378639576, "grad_norm": 0.10884889215230942, "learning_rate": 0.0012363246763185285, "loss": 2.2893, "step": 414580 }, { "epoch": 1.6026889950673409, "grad_norm": 0.11061963438987732, "learning_rate": 0.001236062829808629, "loss": 2.2809, "step": 414590 }, { "epoch": 1.6027276522707241, "grad_norm": 0.1166672632098198, "learning_rate": 0.001235801073018288, "loss": 2.2647, "step": 414600 }, { "epoch": 1.6027663094741074, "grad_norm": 0.10163895040750504, "learning_rate": 0.0012355394058553442, "loss": 2.2558, "step": 414610 }, { "epoch": 1.6028049666774908, "grad_norm": 0.11530463397502899, "learning_rate": 0.0012352778282277935, "loss": 2.2682, "step": 414620 }, { "epoch": 1.602843623880874, "grad_norm": 0.10394533723592758, "learning_rate": 0.001235016340043789, "loss": 2.2795, "step": 414630 }, { "epoch": 1.6028822810842573, "grad_norm": 0.09342104196548462, "learning_rate": 0.0012347549412116403, "loss": 2.2847, "step": 414640 }, { "epoch": 1.6029209382876406, "grad_norm": 0.09775515645742416, "learning_rate": 0.0012344936316398146, "loss": 2.2587, "step": 414650 }, { "epoch": 1.6029595954910238, "grad_norm": 0.18582019209861755, "learning_rate": 0.001234232411236934, "loss": 2.2786, "step": 414660 }, { "epoch": 1.602998252694407, "grad_norm": 0.09853124618530273, "learning_rate": 0.0012339712799117777, "loss": 2.2731, "step": 414670 }, { "epoch": 1.6030369098977904, "grad_norm": 0.11088378727436066, "learning_rate": 0.0012337102375732792, "loss": 2.298, "step": 414680 }, { "epoch": 1.6030755671011736, "grad_norm": 0.11615308374166489, "learning_rate": 0.001233449284130528, "loss": 2.2655, "step": 414690 }, { "epoch": 1.6031142243045569, "grad_norm": 0.09753936529159546, "learning_rate": 0.0012331884194927674, "loss": 2.264, "step": 414700 }, { "epoch": 1.60315288150794, "grad_norm": 0.11125210672616959, "learning_rate": 0.0012329276435693957, "loss": 2.2854, "step": 414710 }, { "epoch": 1.6031915387113234, "grad_norm": 0.10580608248710632, "learning_rate": 0.001232666956269965, "loss": 2.2761, "step": 414720 }, { "epoch": 1.6032301959147066, "grad_norm": 0.1012212336063385, "learning_rate": 0.0012324063575041807, "loss": 2.2729, "step": 414730 }, { "epoch": 1.6032688531180899, "grad_norm": 0.09648676961660385, "learning_rate": 0.0012321458471819013, "loss": 2.2899, "step": 414740 }, { "epoch": 1.6033075103214733, "grad_norm": 0.09944088011980057, "learning_rate": 0.001231885425213139, "loss": 2.2746, "step": 414750 }, { "epoch": 1.6033461675248566, "grad_norm": 0.1056373193860054, "learning_rate": 0.0012316250915080582, "loss": 2.2798, "step": 414760 }, { "epoch": 1.6033848247282398, "grad_norm": 0.10777273774147034, "learning_rate": 0.0012313648459769747, "loss": 2.2701, "step": 414770 }, { "epoch": 1.603423481931623, "grad_norm": 0.10863542556762695, "learning_rate": 0.0012311046885303564, "loss": 2.2884, "step": 414780 }, { "epoch": 1.6034621391350066, "grad_norm": 0.10409296303987503, "learning_rate": 0.0012308446190788236, "loss": 2.2668, "step": 414790 }, { "epoch": 1.6035007963383898, "grad_norm": 0.10630719363689423, "learning_rate": 0.0012305846375331461, "loss": 2.2815, "step": 414800 }, { "epoch": 1.603539453541773, "grad_norm": 0.10058703273534775, "learning_rate": 0.0012303247438042457, "loss": 2.2879, "step": 414810 }, { "epoch": 1.6035781107451563, "grad_norm": 0.09760040044784546, "learning_rate": 0.001230064937803194, "loss": 2.2757, "step": 414820 }, { "epoch": 1.6036167679485396, "grad_norm": 0.09717744588851929, "learning_rate": 0.0012298052194412118, "loss": 2.2789, "step": 414830 }, { "epoch": 1.6036554251519228, "grad_norm": 0.10187872499227524, "learning_rate": 0.0012295455886296711, "loss": 2.2813, "step": 414840 }, { "epoch": 1.603694082355306, "grad_norm": 0.11174602806568146, "learning_rate": 0.0012292860452800922, "loss": 2.264, "step": 414850 }, { "epoch": 1.6037327395586893, "grad_norm": 0.11284361034631729, "learning_rate": 0.0012290265893041448, "loss": 2.2585, "step": 414860 }, { "epoch": 1.6037713967620726, "grad_norm": 0.09597291797399521, "learning_rate": 0.0012287672206136465, "loss": 2.2831, "step": 414870 }, { "epoch": 1.6038100539654558, "grad_norm": 0.0953512042760849, "learning_rate": 0.0012285079391205636, "loss": 2.2606, "step": 414880 }, { "epoch": 1.603848711168839, "grad_norm": 0.1113586574792862, "learning_rate": 0.0012282487447370105, "loss": 2.2814, "step": 414890 }, { "epoch": 1.6038873683722223, "grad_norm": 0.11221566796302795, "learning_rate": 0.0012279896373752486, "loss": 2.272, "step": 414900 }, { "epoch": 1.6039260255756056, "grad_norm": 0.10260015726089478, "learning_rate": 0.001227730616947687, "loss": 2.2744, "step": 414910 }, { "epoch": 1.603964682778989, "grad_norm": 0.1190873309969902, "learning_rate": 0.0012274716833668813, "loss": 2.2702, "step": 414920 }, { "epoch": 1.6040033399823723, "grad_norm": 0.10727227479219437, "learning_rate": 0.0012272128365455337, "loss": 2.2882, "step": 414930 }, { "epoch": 1.6040419971857556, "grad_norm": 0.12654146552085876, "learning_rate": 0.0012269540763964924, "loss": 2.2745, "step": 414940 }, { "epoch": 1.6040806543891388, "grad_norm": 0.09809217602014542, "learning_rate": 0.0012266954028327518, "loss": 2.2914, "step": 414950 }, { "epoch": 1.6041193115925223, "grad_norm": 0.10526150465011597, "learning_rate": 0.0012264368157674514, "loss": 2.2849, "step": 414960 }, { "epoch": 1.6041579687959056, "grad_norm": 0.0995633453130722, "learning_rate": 0.0012261783151138758, "loss": 2.2896, "step": 414970 }, { "epoch": 1.6041966259992888, "grad_norm": 0.09011335670948029, "learning_rate": 0.001225919900785455, "loss": 2.2663, "step": 414980 }, { "epoch": 1.604235283202672, "grad_norm": 0.10598836839199066, "learning_rate": 0.0012256615726957624, "loss": 2.2811, "step": 414990 }, { "epoch": 1.6042739404060553, "grad_norm": 0.1029854342341423, "learning_rate": 0.0012254033307585166, "loss": 2.2946, "step": 415000 }, { "epoch": 1.6043125976094386, "grad_norm": 0.09580104798078537, "learning_rate": 0.0012251451748875794, "loss": 2.2908, "step": 415010 }, { "epoch": 1.6043512548128218, "grad_norm": 0.10150299221277237, "learning_rate": 0.0012248871049969558, "loss": 2.2644, "step": 415020 }, { "epoch": 1.604389912016205, "grad_norm": 0.09846926480531693, "learning_rate": 0.001224629121000795, "loss": 2.2755, "step": 415030 }, { "epoch": 1.6044285692195883, "grad_norm": 0.11625342816114426, "learning_rate": 0.0012243712228133875, "loss": 2.2873, "step": 415040 }, { "epoch": 1.6044672264229716, "grad_norm": 0.11542271077632904, "learning_rate": 0.0012241134103491672, "loss": 2.2749, "step": 415050 }, { "epoch": 1.6045058836263548, "grad_norm": 0.09932534396648407, "learning_rate": 0.0012238556835227098, "loss": 2.2735, "step": 415060 }, { "epoch": 1.604544540829738, "grad_norm": 0.10799705982208252, "learning_rate": 0.0012235980422487332, "loss": 2.2614, "step": 415070 }, { "epoch": 1.6045831980331213, "grad_norm": 0.11131885647773743, "learning_rate": 0.001223340486442096, "loss": 2.278, "step": 415080 }, { "epoch": 1.6046218552365048, "grad_norm": 0.10251007974147797, "learning_rate": 0.0012230830160177987, "loss": 2.2725, "step": 415090 }, { "epoch": 1.604660512439888, "grad_norm": 0.09627630561590195, "learning_rate": 0.001222825630890982, "loss": 2.2852, "step": 415100 }, { "epoch": 1.6046991696432713, "grad_norm": 0.12653441727161407, "learning_rate": 0.0012225683309769276, "loss": 2.2731, "step": 415110 }, { "epoch": 1.6047378268466546, "grad_norm": 0.09953426569700241, "learning_rate": 0.0012223111161910568, "loss": 2.2678, "step": 415120 }, { "epoch": 1.604776484050038, "grad_norm": 0.10526780039072037, "learning_rate": 0.001222053986448931, "loss": 2.2708, "step": 415130 }, { "epoch": 1.6048151412534213, "grad_norm": 0.10229865461587906, "learning_rate": 0.001221796941666251, "loss": 2.2776, "step": 415140 }, { "epoch": 1.6048537984568045, "grad_norm": 0.0909830704331398, "learning_rate": 0.0012215399817588576, "loss": 2.2825, "step": 415150 }, { "epoch": 1.6048924556601878, "grad_norm": 0.12754322588443756, "learning_rate": 0.0012212831066427286, "loss": 2.2681, "step": 415160 }, { "epoch": 1.604931112863571, "grad_norm": 0.10755956172943115, "learning_rate": 0.0012210263162339822, "loss": 2.2771, "step": 415170 }, { "epoch": 1.6049697700669543, "grad_norm": 0.08844668418169022, "learning_rate": 0.001220769610448874, "loss": 2.2871, "step": 415180 }, { "epoch": 1.6050084272703375, "grad_norm": 0.11943338066339493, "learning_rate": 0.001220512989203797, "loss": 2.2607, "step": 415190 }, { "epoch": 1.6050470844737208, "grad_norm": 0.10932467132806778, "learning_rate": 0.001220256452415283, "loss": 2.2764, "step": 415200 }, { "epoch": 1.605085741677104, "grad_norm": 0.11282321810722351, "learning_rate": 0.00122, "loss": 2.2761, "step": 415210 }, { "epoch": 1.6051243988804873, "grad_norm": 0.09715761244297028, "learning_rate": 0.0012197436318747536, "loss": 2.2705, "step": 415220 }, { "epoch": 1.6051630560838706, "grad_norm": 0.11072029173374176, "learning_rate": 0.0012194873479564859, "loss": 2.2825, "step": 415230 }, { "epoch": 1.6052017132872538, "grad_norm": 0.12762323021888733, "learning_rate": 0.0012192311481622746, "loss": 2.2746, "step": 415240 }, { "epoch": 1.6052403704906373, "grad_norm": 0.10402945429086685, "learning_rate": 0.0012189750324093347, "loss": 2.2752, "step": 415250 }, { "epoch": 1.6052790276940205, "grad_norm": 0.11042294651269913, "learning_rate": 0.0012187190006150157, "loss": 2.2655, "step": 415260 }, { "epoch": 1.6053176848974038, "grad_norm": 0.11077842116355896, "learning_rate": 0.0012184630526968032, "loss": 2.2568, "step": 415270 }, { "epoch": 1.605356342100787, "grad_norm": 0.12798729538917542, "learning_rate": 0.0012182071885723173, "loss": 2.2744, "step": 415280 }, { "epoch": 1.6053949993041703, "grad_norm": 0.10232830047607422, "learning_rate": 0.001217951408159314, "loss": 2.2643, "step": 415290 }, { "epoch": 1.6054336565075538, "grad_norm": 0.1272798478603363, "learning_rate": 0.001217695711375682, "loss": 2.2706, "step": 415300 }, { "epoch": 1.605472313710937, "grad_norm": 0.10634199529886246, "learning_rate": 0.0012174400981394458, "loss": 2.2836, "step": 415310 }, { "epoch": 1.6055109709143203, "grad_norm": 0.09858424961566925, "learning_rate": 0.0012171845683687627, "loss": 2.2855, "step": 415320 }, { "epoch": 1.6055496281177035, "grad_norm": 0.10499022156000137, "learning_rate": 0.0012169291219819242, "loss": 2.267, "step": 415330 }, { "epoch": 1.6055882853210868, "grad_norm": 0.09204845130443573, "learning_rate": 0.0012166737588973544, "loss": 2.2823, "step": 415340 }, { "epoch": 1.60562694252447, "grad_norm": 0.1029248759150505, "learning_rate": 0.0012164184790336107, "loss": 2.2772, "step": 415350 }, { "epoch": 1.6056655997278533, "grad_norm": 0.10894620418548584, "learning_rate": 0.001216163282309383, "loss": 2.2736, "step": 415360 }, { "epoch": 1.6057042569312365, "grad_norm": 0.11481393128633499, "learning_rate": 0.0012159081686434936, "loss": 2.28, "step": 415370 }, { "epoch": 1.6057429141346198, "grad_norm": 0.09570683538913727, "learning_rate": 0.001215653137954897, "loss": 2.2621, "step": 415380 }, { "epoch": 1.605781571338003, "grad_norm": 0.11351441591978073, "learning_rate": 0.0012153981901626787, "loss": 2.256, "step": 415390 }, { "epoch": 1.6058202285413863, "grad_norm": 0.10348304361104965, "learning_rate": 0.0012151433251860566, "loss": 2.2847, "step": 415400 }, { "epoch": 1.6058588857447695, "grad_norm": 0.1195356547832489, "learning_rate": 0.0012148885429443792, "loss": 2.285, "step": 415410 }, { "epoch": 1.605897542948153, "grad_norm": 0.10650033503770828, "learning_rate": 0.0012146338433571256, "loss": 2.2921, "step": 415420 }, { "epoch": 1.6059362001515363, "grad_norm": 0.10261499881744385, "learning_rate": 0.0012143792263439059, "loss": 2.289, "step": 415430 }, { "epoch": 1.6059748573549195, "grad_norm": 0.1088041141629219, "learning_rate": 0.00121412469182446, "loss": 2.262, "step": 415440 }, { "epoch": 1.6060135145583028, "grad_norm": 0.09664658457040787, "learning_rate": 0.0012138702397186583, "loss": 2.2742, "step": 415450 }, { "epoch": 1.606052171761686, "grad_norm": 0.10386968404054642, "learning_rate": 0.0012136158699465002, "loss": 2.2706, "step": 415460 }, { "epoch": 1.6060908289650695, "grad_norm": 0.10762540996074677, "learning_rate": 0.0012133615824281145, "loss": 2.2706, "step": 415470 }, { "epoch": 1.6061294861684527, "grad_norm": 0.098637655377388, "learning_rate": 0.00121310737708376, "loss": 2.2957, "step": 415480 }, { "epoch": 1.606168143371836, "grad_norm": 0.12009122967720032, "learning_rate": 0.0012128532538338227, "loss": 2.273, "step": 415490 }, { "epoch": 1.6062068005752193, "grad_norm": 0.1068306490778923, "learning_rate": 0.001212599212598819, "loss": 2.2643, "step": 415500 }, { "epoch": 1.6062454577786025, "grad_norm": 0.10786595940589905, "learning_rate": 0.0012123452532993913, "loss": 2.28, "step": 415510 }, { "epoch": 1.6062841149819858, "grad_norm": 0.09172281622886658, "learning_rate": 0.0012120913758563118, "loss": 2.28, "step": 415520 }, { "epoch": 1.606322772185369, "grad_norm": 0.10430020838975906, "learning_rate": 0.0012118375801904789, "loss": 2.2769, "step": 415530 }, { "epoch": 1.6063614293887523, "grad_norm": 0.10138547420501709, "learning_rate": 0.0012115838662229191, "loss": 2.273, "step": 415540 }, { "epoch": 1.6064000865921355, "grad_norm": 0.1290520429611206, "learning_rate": 0.0012113302338747861, "loss": 2.2781, "step": 415550 }, { "epoch": 1.6064387437955188, "grad_norm": 0.11046361923217773, "learning_rate": 0.0012110766830673594, "loss": 2.2859, "step": 415560 }, { "epoch": 1.606477400998902, "grad_norm": 0.09649594128131866, "learning_rate": 0.001210823213722046, "loss": 2.2544, "step": 415570 }, { "epoch": 1.6065160582022853, "grad_norm": 0.1014927476644516, "learning_rate": 0.001210569825760378, "loss": 2.273, "step": 415580 }, { "epoch": 1.6065547154056687, "grad_norm": 0.11194448918104172, "learning_rate": 0.0012103165191040147, "loss": 2.2812, "step": 415590 }, { "epoch": 1.606593372609052, "grad_norm": 0.09651295840740204, "learning_rate": 0.00121006329367474, "loss": 2.2823, "step": 415600 }, { "epoch": 1.6066320298124352, "grad_norm": 0.09813214838504791, "learning_rate": 0.0012098101493944636, "loss": 2.2667, "step": 415610 }, { "epoch": 1.6066706870158185, "grad_norm": 0.14182907342910767, "learning_rate": 0.0012095570861852198, "loss": 2.277, "step": 415620 }, { "epoch": 1.6067093442192018, "grad_norm": 0.10931894183158875, "learning_rate": 0.0012093041039691684, "loss": 2.2732, "step": 415630 }, { "epoch": 1.6067480014225852, "grad_norm": 0.09297894686460495, "learning_rate": 0.0012090512026685925, "loss": 2.2752, "step": 415640 }, { "epoch": 1.6067866586259685, "grad_norm": 0.10945281386375427, "learning_rate": 0.0012087983822059007, "loss": 2.2743, "step": 415650 }, { "epoch": 1.6068253158293517, "grad_norm": 0.10067486017942429, "learning_rate": 0.001208545642503625, "loss": 2.2632, "step": 415660 }, { "epoch": 1.606863973032735, "grad_norm": 0.19785423576831818, "learning_rate": 0.0012082929834844206, "loss": 2.2771, "step": 415670 }, { "epoch": 1.6069026302361182, "grad_norm": 0.10551826655864716, "learning_rate": 0.001208040405071067, "loss": 2.2693, "step": 415680 }, { "epoch": 1.6069412874395015, "grad_norm": 0.10171528160572052, "learning_rate": 0.0012077879071864658, "loss": 2.2591, "step": 415690 }, { "epoch": 1.6069799446428847, "grad_norm": 0.1067056804895401, "learning_rate": 0.0012075354897536422, "loss": 2.2809, "step": 415700 }, { "epoch": 1.607018601846268, "grad_norm": 0.0996486097574234, "learning_rate": 0.0012072831526957436, "loss": 2.2799, "step": 415710 }, { "epoch": 1.6070572590496512, "grad_norm": 0.10620211809873581, "learning_rate": 0.0012070308959360395, "loss": 2.2734, "step": 415720 }, { "epoch": 1.6070959162530345, "grad_norm": 0.1143997311592102, "learning_rate": 0.0012067787193979225, "loss": 2.2835, "step": 415730 }, { "epoch": 1.6071345734564177, "grad_norm": 0.10100308805704117, "learning_rate": 0.0012065266230049051, "loss": 2.2762, "step": 415740 }, { "epoch": 1.607173230659801, "grad_norm": 0.09416519105434418, "learning_rate": 0.0012062746066806226, "loss": 2.2799, "step": 415750 }, { "epoch": 1.6072118878631845, "grad_norm": 0.1051805317401886, "learning_rate": 0.0012060226703488317, "loss": 2.2753, "step": 415760 }, { "epoch": 1.6072505450665677, "grad_norm": 0.09900598227977753, "learning_rate": 0.0012057708139334088, "loss": 2.2765, "step": 415770 }, { "epoch": 1.607289202269951, "grad_norm": 0.10039504617452621, "learning_rate": 0.0012055190373583518, "loss": 2.2885, "step": 415780 }, { "epoch": 1.6073278594733342, "grad_norm": 0.11165333539247513, "learning_rate": 0.0012052673405477789, "loss": 2.2762, "step": 415790 }, { "epoch": 1.6073665166767177, "grad_norm": 0.09489556401968002, "learning_rate": 0.0012050157234259286, "loss": 2.2751, "step": 415800 }, { "epoch": 1.607405173880101, "grad_norm": 0.10287255793809891, "learning_rate": 0.0012047641859171583, "loss": 2.2809, "step": 415810 }, { "epoch": 1.6074438310834842, "grad_norm": 0.1100325882434845, "learning_rate": 0.0012045127279459464, "loss": 2.2672, "step": 415820 }, { "epoch": 1.6074824882868675, "grad_norm": 0.10662521421909332, "learning_rate": 0.0012042613494368895, "loss": 2.2614, "step": 415830 }, { "epoch": 1.6075211454902507, "grad_norm": 0.12481796741485596, "learning_rate": 0.001204010050314704, "loss": 2.2778, "step": 415840 }, { "epoch": 1.607559802693634, "grad_norm": 0.1037929430603981, "learning_rate": 0.0012037588305042247, "loss": 2.2737, "step": 415850 }, { "epoch": 1.6075984598970172, "grad_norm": 0.25705114006996155, "learning_rate": 0.0012035076899304048, "loss": 2.2819, "step": 415860 }, { "epoch": 1.6076371171004005, "grad_norm": 0.09950485825538635, "learning_rate": 0.0012032566285183164, "loss": 2.2747, "step": 415870 }, { "epoch": 1.6076757743037837, "grad_norm": 0.10227572917938232, "learning_rate": 0.001203005646193149, "loss": 2.2741, "step": 415880 }, { "epoch": 1.607714431507167, "grad_norm": 0.10218802094459534, "learning_rate": 0.00120275474288021, "loss": 2.2791, "step": 415890 }, { "epoch": 1.6077530887105502, "grad_norm": 0.1049424335360527, "learning_rate": 0.0012025039185049246, "loss": 2.2683, "step": 415900 }, { "epoch": 1.6077917459139335, "grad_norm": 0.10968547314405441, "learning_rate": 0.001202253172992835, "loss": 2.2662, "step": 415910 }, { "epoch": 1.6078304031173167, "grad_norm": 0.11751758307218552, "learning_rate": 0.0012020025062695998, "loss": 2.2607, "step": 415920 }, { "epoch": 1.6078690603207002, "grad_norm": 0.09624996781349182, "learning_rate": 0.001201751918260996, "loss": 2.275, "step": 415930 }, { "epoch": 1.6079077175240835, "grad_norm": 0.11658187955617905, "learning_rate": 0.001201501408892915, "loss": 2.2897, "step": 415940 }, { "epoch": 1.6079463747274667, "grad_norm": 0.09554409235715866, "learning_rate": 0.0012012509780913656, "loss": 2.2782, "step": 415950 }, { "epoch": 1.60798503193085, "grad_norm": 0.12213790416717529, "learning_rate": 0.001201000625782473, "loss": 2.2756, "step": 415960 }, { "epoch": 1.6080236891342334, "grad_norm": 0.1274234801530838, "learning_rate": 0.0012007503518924767, "loss": 2.2652, "step": 415970 }, { "epoch": 1.6080623463376167, "grad_norm": 0.10022182762622833, "learning_rate": 0.0012005001563477327, "loss": 2.272, "step": 415980 }, { "epoch": 1.608101003541, "grad_norm": 0.10145244747400284, "learning_rate": 0.001200250039074712, "loss": 2.2771, "step": 415990 }, { "epoch": 1.6081396607443832, "grad_norm": 0.1067088395357132, "learning_rate": 0.0012, "loss": 2.2807, "step": 416000 }, { "epoch": 1.6081783179477664, "grad_norm": 0.10339207947254181, "learning_rate": 0.0011997500390502978, "loss": 2.2696, "step": 416010 }, { "epoch": 1.6082169751511497, "grad_norm": 0.1001124158501625, "learning_rate": 0.0011995001561524198, "loss": 2.2686, "step": 416020 }, { "epoch": 1.608255632354533, "grad_norm": 0.120066799223423, "learning_rate": 0.001199250351233296, "loss": 2.2708, "step": 416030 }, { "epoch": 1.6082942895579162, "grad_norm": 0.10201053321361542, "learning_rate": 0.0011990006242199684, "loss": 2.267, "step": 416040 }, { "epoch": 1.6083329467612995, "grad_norm": 0.09921201318502426, "learning_rate": 0.0011987509750395948, "loss": 2.2634, "step": 416050 }, { "epoch": 1.6083716039646827, "grad_norm": 0.10539157688617706, "learning_rate": 0.0011985014036194448, "loss": 2.2806, "step": 416060 }, { "epoch": 1.608410261168066, "grad_norm": 0.10070233047008514, "learning_rate": 0.0011982519098869022, "loss": 2.2711, "step": 416070 }, { "epoch": 1.6084489183714492, "grad_norm": 0.09526374936103821, "learning_rate": 0.0011980024937694631, "loss": 2.2893, "step": 416080 }, { "epoch": 1.6084875755748325, "grad_norm": 0.09702368080615997, "learning_rate": 0.001197753155194737, "loss": 2.258, "step": 416090 }, { "epoch": 1.608526232778216, "grad_norm": 0.09795309603214264, "learning_rate": 0.0011975038940904448, "loss": 2.2763, "step": 416100 }, { "epoch": 1.6085648899815992, "grad_norm": 0.10636353492736816, "learning_rate": 0.0011972547103844208, "loss": 2.2737, "step": 416110 }, { "epoch": 1.6086035471849824, "grad_norm": 0.1189536526799202, "learning_rate": 0.0011970056040046108, "loss": 2.268, "step": 416120 }, { "epoch": 1.6086422043883657, "grad_norm": 0.10652356594800949, "learning_rate": 0.001196756574879072, "loss": 2.2684, "step": 416130 }, { "epoch": 1.6086808615917492, "grad_norm": 0.12403170019388199, "learning_rate": 0.0011965076229359735, "loss": 2.265, "step": 416140 }, { "epoch": 1.6087195187951324, "grad_norm": 0.09871499240398407, "learning_rate": 0.0011962587481035953, "loss": 2.2798, "step": 416150 }, { "epoch": 1.6087581759985157, "grad_norm": 0.10580728948116302, "learning_rate": 0.0011960099503103287, "loss": 2.2648, "step": 416160 }, { "epoch": 1.608796833201899, "grad_norm": 0.10762586444616318, "learning_rate": 0.001195761229484676, "loss": 2.2817, "step": 416170 }, { "epoch": 1.6088354904052822, "grad_norm": 0.10945604741573334, "learning_rate": 0.0011955125855552494, "loss": 2.2607, "step": 416180 }, { "epoch": 1.6088741476086654, "grad_norm": 0.1232224777340889, "learning_rate": 0.0011952640184507716, "loss": 2.2782, "step": 416190 }, { "epoch": 1.6089128048120487, "grad_norm": 0.10195323079824448, "learning_rate": 0.0011950155281000758, "loss": 2.2713, "step": 416200 }, { "epoch": 1.608951462015432, "grad_norm": 0.10153280198574066, "learning_rate": 0.0011947671144321042, "loss": 2.2703, "step": 416210 }, { "epoch": 1.6089901192188152, "grad_norm": 0.09289544075727463, "learning_rate": 0.0011945187773759094, "loss": 2.248, "step": 416220 }, { "epoch": 1.6090287764221984, "grad_norm": 0.09814459830522537, "learning_rate": 0.0011942705168606525, "loss": 2.267, "step": 416230 }, { "epoch": 1.6090674336255817, "grad_norm": 0.11871287971735, "learning_rate": 0.0011940223328156048, "loss": 2.2877, "step": 416240 }, { "epoch": 1.609106090828965, "grad_norm": 0.09919058531522751, "learning_rate": 0.0011937742251701452, "loss": 2.2533, "step": 416250 }, { "epoch": 1.6091447480323482, "grad_norm": 0.10049314051866531, "learning_rate": 0.001193526193853762, "loss": 2.2548, "step": 416260 }, { "epoch": 1.6091834052357317, "grad_norm": 0.1021869033575058, "learning_rate": 0.0011932782387960516, "loss": 2.2675, "step": 416270 }, { "epoch": 1.609222062439115, "grad_norm": 0.09958551824092865, "learning_rate": 0.0011930303599267194, "loss": 2.2816, "step": 416280 }, { "epoch": 1.6092607196424982, "grad_norm": 0.09233255684375763, "learning_rate": 0.0011927825571755775, "loss": 2.274, "step": 416290 }, { "epoch": 1.6092993768458814, "grad_norm": 0.10324325412511826, "learning_rate": 0.001192534830472546, "loss": 2.2708, "step": 416300 }, { "epoch": 1.609338034049265, "grad_norm": 0.11270972341299057, "learning_rate": 0.0011922871797476532, "loss": 2.2767, "step": 416310 }, { "epoch": 1.6093766912526482, "grad_norm": 0.09859174489974976, "learning_rate": 0.0011920396049310339, "loss": 2.265, "step": 416320 }, { "epoch": 1.6094153484560314, "grad_norm": 0.11577589809894562, "learning_rate": 0.00119179210595293, "loss": 2.277, "step": 416330 }, { "epoch": 1.6094540056594147, "grad_norm": 0.10810605436563492, "learning_rate": 0.0011915446827436905, "loss": 2.2668, "step": 416340 }, { "epoch": 1.609492662862798, "grad_norm": 0.11018019914627075, "learning_rate": 0.0011912973352337709, "loss": 2.2652, "step": 416350 }, { "epoch": 1.6095313200661812, "grad_norm": 0.10128393024206161, "learning_rate": 0.0011910500633537328, "loss": 2.2706, "step": 416360 }, { "epoch": 1.6095699772695644, "grad_norm": 0.10257522016763687, "learning_rate": 0.0011908028670342436, "loss": 2.2729, "step": 416370 }, { "epoch": 1.6096086344729477, "grad_norm": 0.10266554355621338, "learning_rate": 0.0011905557462060777, "loss": 2.2685, "step": 416380 }, { "epoch": 1.609647291676331, "grad_norm": 0.10134238749742508, "learning_rate": 0.0011903087008001136, "loss": 2.2722, "step": 416390 }, { "epoch": 1.6096859488797142, "grad_norm": 0.09120500087738037, "learning_rate": 0.0011900617307473364, "loss": 2.2741, "step": 416400 }, { "epoch": 1.6097246060830974, "grad_norm": 0.10055642575025558, "learning_rate": 0.0011898148359788363, "loss": 2.2759, "step": 416410 }, { "epoch": 1.6097632632864807, "grad_norm": 0.11470640450716019, "learning_rate": 0.0011895680164258076, "loss": 2.2621, "step": 416420 }, { "epoch": 1.609801920489864, "grad_norm": 0.10170623660087585, "learning_rate": 0.0011893212720195502, "loss": 2.2847, "step": 416430 }, { "epoch": 1.6098405776932474, "grad_norm": 0.09756741672754288, "learning_rate": 0.0011890746026914686, "loss": 2.2654, "step": 416440 }, { "epoch": 1.6098792348966307, "grad_norm": 0.0965120941400528, "learning_rate": 0.0011888280083730703, "loss": 2.2714, "step": 416450 }, { "epoch": 1.609917892100014, "grad_norm": 0.11919204145669937, "learning_rate": 0.0011885814889959683, "loss": 2.2869, "step": 416460 }, { "epoch": 1.6099565493033972, "grad_norm": 0.1065000370144844, "learning_rate": 0.001188335044491879, "loss": 2.2792, "step": 416470 }, { "epoch": 1.6099952065067806, "grad_norm": 0.13623447716236115, "learning_rate": 0.0011880886747926225, "loss": 2.2593, "step": 416480 }, { "epoch": 1.6100338637101639, "grad_norm": 0.10131317377090454, "learning_rate": 0.001187842379830122, "loss": 2.2975, "step": 416490 }, { "epoch": 1.6100725209135471, "grad_norm": 0.10462559759616852, "learning_rate": 0.001187596159536404, "loss": 2.254, "step": 416500 }, { "epoch": 1.6101111781169304, "grad_norm": 0.0960330218076706, "learning_rate": 0.0011873500138435981, "loss": 2.2833, "step": 416510 }, { "epoch": 1.6101498353203136, "grad_norm": 0.10928565263748169, "learning_rate": 0.0011871039426839369, "loss": 2.2635, "step": 416520 }, { "epoch": 1.610188492523697, "grad_norm": 0.10919661819934845, "learning_rate": 0.0011868579459897552, "loss": 2.2692, "step": 416530 }, { "epoch": 1.6102271497270801, "grad_norm": 0.09764064848423004, "learning_rate": 0.00118661202369349, "loss": 2.2708, "step": 416540 }, { "epoch": 1.6102658069304634, "grad_norm": 0.10673554241657257, "learning_rate": 0.0011863661757276806, "loss": 2.2755, "step": 416550 }, { "epoch": 1.6103044641338466, "grad_norm": 0.09112170338630676, "learning_rate": 0.001186120402024968, "loss": 2.2722, "step": 416560 }, { "epoch": 1.61034312133723, "grad_norm": 0.09223476052284241, "learning_rate": 0.0011858747025180953, "loss": 2.2769, "step": 416570 }, { "epoch": 1.6103817785406132, "grad_norm": 0.10158587247133255, "learning_rate": 0.0011856290771399068, "loss": 2.2778, "step": 416580 }, { "epoch": 1.6104204357439964, "grad_norm": 0.11441430449485779, "learning_rate": 0.001185383525823348, "loss": 2.2623, "step": 416590 }, { "epoch": 1.6104590929473797, "grad_norm": 0.10831929743289948, "learning_rate": 0.0011851380485014653, "loss": 2.2732, "step": 416600 }, { "epoch": 1.6104977501507631, "grad_norm": 0.09476850181818008, "learning_rate": 0.0011848926451074066, "loss": 2.2768, "step": 416610 }, { "epoch": 1.6105364073541464, "grad_norm": 0.11065233498811722, "learning_rate": 0.0011846473155744197, "loss": 2.2563, "step": 416620 }, { "epoch": 1.6105750645575296, "grad_norm": 0.10710212588310242, "learning_rate": 0.0011844020598358526, "loss": 2.2628, "step": 416630 }, { "epoch": 1.6106137217609129, "grad_norm": 0.10404687374830246, "learning_rate": 0.0011841568778251546, "loss": 2.2683, "step": 416640 }, { "epoch": 1.6106523789642964, "grad_norm": 0.11419782042503357, "learning_rate": 0.0011839117694758734, "loss": 2.2712, "step": 416650 }, { "epoch": 1.6106910361676796, "grad_norm": 0.10590862482786179, "learning_rate": 0.001183666734721658, "loss": 2.2856, "step": 416660 }, { "epoch": 1.6107296933710629, "grad_norm": 0.1061839759349823, "learning_rate": 0.001183421773496256, "loss": 2.2688, "step": 416670 }, { "epoch": 1.6107683505744461, "grad_norm": 0.09947504848241806, "learning_rate": 0.0011831768857335145, "loss": 2.264, "step": 416680 }, { "epoch": 1.6108070077778294, "grad_norm": 0.0981912761926651, "learning_rate": 0.0011829320713673801, "loss": 2.2641, "step": 416690 }, { "epoch": 1.6108456649812126, "grad_norm": 0.09732093662023544, "learning_rate": 0.0011826873303318979, "loss": 2.2709, "step": 416700 }, { "epoch": 1.6108843221845959, "grad_norm": 0.10180546343326569, "learning_rate": 0.0011824426625612122, "loss": 2.2958, "step": 416710 }, { "epoch": 1.6109229793879791, "grad_norm": 0.08668989688158035, "learning_rate": 0.0011821980679895651, "loss": 2.2715, "step": 416720 }, { "epoch": 1.6109616365913624, "grad_norm": 0.1114187091588974, "learning_rate": 0.0011819535465512976, "loss": 2.2713, "step": 416730 }, { "epoch": 1.6110002937947456, "grad_norm": 0.09553922712802887, "learning_rate": 0.0011817090981808486, "loss": 2.2822, "step": 416740 }, { "epoch": 1.6110389509981289, "grad_norm": 0.10593093186616898, "learning_rate": 0.001181464722812755, "loss": 2.2658, "step": 416750 }, { "epoch": 1.6110776082015121, "grad_norm": 0.11995851248502731, "learning_rate": 0.0011812204203816513, "loss": 2.2758, "step": 416760 }, { "epoch": 1.6111162654048954, "grad_norm": 0.10872121900320053, "learning_rate": 0.0011809761908222695, "loss": 2.2619, "step": 416770 }, { "epoch": 1.6111549226082789, "grad_norm": 0.11107048392295837, "learning_rate": 0.0011807320340694383, "loss": 2.2679, "step": 416780 }, { "epoch": 1.6111935798116621, "grad_norm": 0.10314419865608215, "learning_rate": 0.001180487950058085, "loss": 2.2709, "step": 416790 }, { "epoch": 1.6112322370150454, "grad_norm": 0.10333782434463501, "learning_rate": 0.001180243938723232, "loss": 2.2552, "step": 416800 }, { "epoch": 1.6112708942184286, "grad_norm": 0.09844791144132614, "learning_rate": 0.0011800000000000003, "loss": 2.2751, "step": 416810 }, { "epoch": 1.611309551421812, "grad_norm": 0.10671578347682953, "learning_rate": 0.0011797561338236049, "loss": 2.2686, "step": 416820 }, { "epoch": 1.6113482086251953, "grad_norm": 0.1007014662027359, "learning_rate": 0.0011795123401293593, "loss": 2.2702, "step": 416830 }, { "epoch": 1.6113868658285786, "grad_norm": 0.10503365844488144, "learning_rate": 0.0011792686188526725, "loss": 2.2804, "step": 416840 }, { "epoch": 1.6114255230319618, "grad_norm": 0.09722265601158142, "learning_rate": 0.0011790249699290482, "loss": 2.2669, "step": 416850 }, { "epoch": 1.611464180235345, "grad_norm": 0.09146405011415482, "learning_rate": 0.0011787813932940877, "loss": 2.2751, "step": 416860 }, { "epoch": 1.6115028374387284, "grad_norm": 0.13335338234901428, "learning_rate": 0.0011785378888834858, "loss": 2.2839, "step": 416870 }, { "epoch": 1.6115414946421116, "grad_norm": 0.09907438606023788, "learning_rate": 0.0011782944566330344, "loss": 2.2623, "step": 416880 }, { "epoch": 1.6115801518454949, "grad_norm": 0.12023543566465378, "learning_rate": 0.0011780510964786192, "loss": 2.2806, "step": 416890 }, { "epoch": 1.611618809048878, "grad_norm": 0.10835972428321838, "learning_rate": 0.0011778078083562213, "loss": 2.2843, "step": 416900 }, { "epoch": 1.6116574662522614, "grad_norm": 0.09845547378063202, "learning_rate": 0.0011775645922019165, "loss": 2.2552, "step": 416910 }, { "epoch": 1.6116961234556446, "grad_norm": 0.10376187413930893, "learning_rate": 0.0011773214479518749, "loss": 2.2728, "step": 416920 }, { "epoch": 1.6117347806590279, "grad_norm": 0.10232806205749512, "learning_rate": 0.0011770783755423607, "loss": 2.2799, "step": 416930 }, { "epoch": 1.6117734378624111, "grad_norm": 0.09905198216438293, "learning_rate": 0.0011768353749097328, "loss": 2.2745, "step": 416940 }, { "epoch": 1.6118120950657946, "grad_norm": 0.1061449944972992, "learning_rate": 0.001176592445990444, "loss": 2.2773, "step": 416950 }, { "epoch": 1.6118507522691778, "grad_norm": 0.10233290493488312, "learning_rate": 0.00117634958872104, "loss": 2.2644, "step": 416960 }, { "epoch": 1.611889409472561, "grad_norm": 0.10700671374797821, "learning_rate": 0.0011761068030381608, "loss": 2.263, "step": 416970 }, { "epoch": 1.6119280666759443, "grad_norm": 0.11679814755916595, "learning_rate": 0.0011758640888785395, "loss": 2.2845, "step": 416980 }, { "epoch": 1.6119667238793278, "grad_norm": 0.09567833691835403, "learning_rate": 0.0011756214461790023, "loss": 2.2856, "step": 416990 }, { "epoch": 1.612005381082711, "grad_norm": 0.1070074737071991, "learning_rate": 0.001175378874876468, "loss": 2.2666, "step": 417000 }, { "epoch": 1.6120440382860943, "grad_norm": 0.10622667521238327, "learning_rate": 0.0011751363749079489, "loss": 2.2872, "step": 417010 }, { "epoch": 1.6120826954894776, "grad_norm": 0.10399883985519409, "learning_rate": 0.0011748939462105494, "loss": 2.2621, "step": 417020 }, { "epoch": 1.6121213526928608, "grad_norm": 0.11849434673786163, "learning_rate": 0.0011746515887214662, "loss": 2.2672, "step": 417030 }, { "epoch": 1.612160009896244, "grad_norm": 0.10450667142868042, "learning_rate": 0.0011744093023779883, "loss": 2.2891, "step": 417040 }, { "epoch": 1.6121986670996273, "grad_norm": 0.10449767112731934, "learning_rate": 0.001174167087117497, "loss": 2.2762, "step": 417050 }, { "epoch": 1.6122373243030106, "grad_norm": 0.1145240068435669, "learning_rate": 0.0011739249428774645, "loss": 2.2736, "step": 417060 }, { "epoch": 1.6122759815063938, "grad_norm": 0.10412728041410446, "learning_rate": 0.001173682869595456, "loss": 2.2601, "step": 417070 }, { "epoch": 1.612314638709777, "grad_norm": 0.11344483494758606, "learning_rate": 0.001173440867209127, "loss": 2.2651, "step": 417080 }, { "epoch": 1.6123532959131603, "grad_norm": 0.10842296481132507, "learning_rate": 0.0011731989356562245, "loss": 2.2543, "step": 417090 }, { "epoch": 1.6123919531165436, "grad_norm": 0.10091166198253632, "learning_rate": 0.0011729570748745869, "loss": 2.2493, "step": 417100 }, { "epoch": 1.6124306103199268, "grad_norm": 0.10709764808416367, "learning_rate": 0.0011727152848021425, "loss": 2.2795, "step": 417110 }, { "epoch": 1.6124692675233103, "grad_norm": 0.11410240828990936, "learning_rate": 0.001172473565376912, "loss": 2.2663, "step": 417120 }, { "epoch": 1.6125079247266936, "grad_norm": 0.1093989834189415, "learning_rate": 0.001172231916537005, "loss": 2.2734, "step": 417130 }, { "epoch": 1.6125465819300768, "grad_norm": 0.11429251730442047, "learning_rate": 0.0011719903382206218, "loss": 2.2613, "step": 417140 }, { "epoch": 1.61258523913346, "grad_norm": 0.102109394967556, "learning_rate": 0.0011717488303660537, "loss": 2.2867, "step": 417150 }, { "epoch": 1.6126238963368436, "grad_norm": 0.09235858172178268, "learning_rate": 0.0011715073929116809, "loss": 2.2673, "step": 417160 }, { "epoch": 1.6126625535402268, "grad_norm": 0.10976700484752655, "learning_rate": 0.0011712660257959734, "loss": 2.2827, "step": 417170 }, { "epoch": 1.61270121074361, "grad_norm": 0.10967014729976654, "learning_rate": 0.0011710247289574917, "loss": 2.2583, "step": 417180 }, { "epoch": 1.6127398679469933, "grad_norm": 0.10642175376415253, "learning_rate": 0.001170783502334885, "loss": 2.2621, "step": 417190 }, { "epoch": 1.6127785251503766, "grad_norm": 0.09669990092515945, "learning_rate": 0.0011705423458668914, "loss": 2.2699, "step": 417200 }, { "epoch": 1.6128171823537598, "grad_norm": 0.12274808436632156, "learning_rate": 0.0011703012594923385, "loss": 2.2674, "step": 417210 }, { "epoch": 1.612855839557143, "grad_norm": 0.11121085286140442, "learning_rate": 0.001170060243150143, "loss": 2.2629, "step": 417220 }, { "epoch": 1.6128944967605263, "grad_norm": 0.10337743163108826, "learning_rate": 0.0011698192967793096, "loss": 2.2863, "step": 417230 }, { "epoch": 1.6129331539639096, "grad_norm": 0.09829306602478027, "learning_rate": 0.0011695784203189321, "loss": 2.2731, "step": 417240 }, { "epoch": 1.6129718111672928, "grad_norm": 0.09615987539291382, "learning_rate": 0.0011693376137081927, "loss": 2.2701, "step": 417250 }, { "epoch": 1.613010468370676, "grad_norm": 0.09579098224639893, "learning_rate": 0.0011690968768863605, "loss": 2.2729, "step": 417260 }, { "epoch": 1.6130491255740593, "grad_norm": 0.10610536485910416, "learning_rate": 0.0011688562097927943, "loss": 2.2658, "step": 417270 }, { "epoch": 1.6130877827774428, "grad_norm": 0.11483661830425262, "learning_rate": 0.0011686156123669389, "loss": 2.2666, "step": 417280 }, { "epoch": 1.613126439980826, "grad_norm": 0.16898894309997559, "learning_rate": 0.0011683750845483286, "loss": 2.3003, "step": 417290 }, { "epoch": 1.6131650971842093, "grad_norm": 0.11532501876354218, "learning_rate": 0.0011681346262765833, "loss": 2.2769, "step": 417300 }, { "epoch": 1.6132037543875926, "grad_norm": 0.09754514694213867, "learning_rate": 0.0011678942374914111, "loss": 2.2691, "step": 417310 }, { "epoch": 1.6132424115909758, "grad_norm": 0.12471870332956314, "learning_rate": 0.0011676539181326075, "loss": 2.2793, "step": 417320 }, { "epoch": 1.6132810687943593, "grad_norm": 0.10189971327781677, "learning_rate": 0.0011674136681400543, "loss": 2.2641, "step": 417330 }, { "epoch": 1.6133197259977425, "grad_norm": 0.09957670420408249, "learning_rate": 0.0011671734874537194, "loss": 2.2789, "step": 417340 }, { "epoch": 1.6133583832011258, "grad_norm": 0.0903426930308342, "learning_rate": 0.001166933376013659, "loss": 2.2812, "step": 417350 }, { "epoch": 1.613397040404509, "grad_norm": 0.10298719257116318, "learning_rate": 0.0011666933337600137, "loss": 2.2713, "step": 417360 }, { "epoch": 1.6134356976078923, "grad_norm": 0.10459873825311661, "learning_rate": 0.0011664533606330118, "loss": 2.2795, "step": 417370 }, { "epoch": 1.6134743548112755, "grad_norm": 0.15128566324710846, "learning_rate": 0.0011662134565729666, "loss": 2.2543, "step": 417380 }, { "epoch": 1.6135130120146588, "grad_norm": 0.09230190515518188, "learning_rate": 0.0011659736215202782, "loss": 2.2753, "step": 417390 }, { "epoch": 1.613551669218042, "grad_norm": 0.10914508253335953, "learning_rate": 0.0011657338554154318, "loss": 2.2791, "step": 417400 }, { "epoch": 1.6135903264214253, "grad_norm": 0.11341839283704758, "learning_rate": 0.0011654941581989973, "loss": 2.2741, "step": 417410 }, { "epoch": 1.6136289836248086, "grad_norm": 0.09619162231683731, "learning_rate": 0.001165254529811632, "loss": 2.2733, "step": 417420 }, { "epoch": 1.6136676408281918, "grad_norm": 0.12042240798473358, "learning_rate": 0.001165014970194076, "loss": 2.2602, "step": 417430 }, { "epoch": 1.613706298031575, "grad_norm": 0.09932298958301544, "learning_rate": 0.0011647754792871558, "loss": 2.2867, "step": 417440 }, { "epoch": 1.6137449552349585, "grad_norm": 0.1054065153002739, "learning_rate": 0.001164536057031783, "loss": 2.2724, "step": 417450 }, { "epoch": 1.6137836124383418, "grad_norm": 0.12224634736776352, "learning_rate": 0.001164296703368953, "loss": 2.2763, "step": 417460 }, { "epoch": 1.613822269641725, "grad_norm": 0.10424677282571793, "learning_rate": 0.0011640574182397454, "loss": 2.2655, "step": 417470 }, { "epoch": 1.6138609268451083, "grad_norm": 0.10834372043609619, "learning_rate": 0.001163818201585325, "loss": 2.263, "step": 417480 }, { "epoch": 1.6138995840484915, "grad_norm": 0.10484948009252548, "learning_rate": 0.0011635790533469407, "loss": 2.278, "step": 417490 }, { "epoch": 1.613938241251875, "grad_norm": 0.12994012236595154, "learning_rate": 0.0011633399734659244, "loss": 2.298, "step": 417500 }, { "epoch": 1.6139768984552583, "grad_norm": 0.09662307798862457, "learning_rate": 0.001163100961883693, "loss": 2.2545, "step": 417510 }, { "epoch": 1.6140155556586415, "grad_norm": 0.10316623002290726, "learning_rate": 0.001162862018541746, "loss": 2.2839, "step": 417520 }, { "epoch": 1.6140542128620248, "grad_norm": 0.12849442660808563, "learning_rate": 0.001162623143381667, "loss": 2.2764, "step": 417530 }, { "epoch": 1.614092870065408, "grad_norm": 0.11165708303451538, "learning_rate": 0.0011623843363451232, "loss": 2.273, "step": 417540 }, { "epoch": 1.6141315272687913, "grad_norm": 0.11638086289167404, "learning_rate": 0.0011621455973738635, "loss": 2.2857, "step": 417550 }, { "epoch": 1.6141701844721745, "grad_norm": 0.10104557871818542, "learning_rate": 0.0011619069264097216, "loss": 2.2802, "step": 417560 }, { "epoch": 1.6142088416755578, "grad_norm": 0.09986381232738495, "learning_rate": 0.0011616683233946126, "loss": 2.2755, "step": 417570 }, { "epoch": 1.614247498878941, "grad_norm": 0.10763323307037354, "learning_rate": 0.0011614297882705346, "loss": 2.2704, "step": 417580 }, { "epoch": 1.6142861560823243, "grad_norm": 0.10754591226577759, "learning_rate": 0.0011611913209795693, "loss": 2.2666, "step": 417590 }, { "epoch": 1.6143248132857075, "grad_norm": 0.10428612679243088, "learning_rate": 0.0011609529214638788, "loss": 2.2616, "step": 417600 }, { "epoch": 1.6143634704890908, "grad_norm": 0.13924959301948547, "learning_rate": 0.0011607145896657086, "loss": 2.2711, "step": 417610 }, { "epoch": 1.6144021276924743, "grad_norm": 0.11408555507659912, "learning_rate": 0.0011604763255273858, "loss": 2.2925, "step": 417620 }, { "epoch": 1.6144407848958575, "grad_norm": 0.10074104368686676, "learning_rate": 0.0011602381289913192, "loss": 2.2502, "step": 417630 }, { "epoch": 1.6144794420992408, "grad_norm": 0.10464302450418472, "learning_rate": 0.0011600000000000002, "loss": 2.296, "step": 417640 }, { "epoch": 1.614518099302624, "grad_norm": 0.10674621909856796, "learning_rate": 0.001159761938496, "loss": 2.261, "step": 417650 }, { "epoch": 1.6145567565060075, "grad_norm": 0.10986209660768509, "learning_rate": 0.0011595239444219722, "loss": 2.2839, "step": 417660 }, { "epoch": 1.6145954137093907, "grad_norm": 0.11070261895656586, "learning_rate": 0.0011592860177206518, "loss": 2.2749, "step": 417670 }, { "epoch": 1.614634070912774, "grad_norm": 0.10384651273488998, "learning_rate": 0.0011590481583348545, "loss": 2.2741, "step": 417680 }, { "epoch": 1.6146727281161573, "grad_norm": 0.09847825020551682, "learning_rate": 0.0011588103662074764, "loss": 2.2594, "step": 417690 }, { "epoch": 1.6147113853195405, "grad_norm": 0.1030513197183609, "learning_rate": 0.001158572641281495, "loss": 2.2655, "step": 417700 }, { "epoch": 1.6147500425229238, "grad_norm": 0.11066064238548279, "learning_rate": 0.0011583349834999675, "loss": 2.2739, "step": 417710 }, { "epoch": 1.614788699726307, "grad_norm": 0.11787351965904236, "learning_rate": 0.0011580973928060326, "loss": 2.2642, "step": 417720 }, { "epoch": 1.6148273569296903, "grad_norm": 0.09980590641498566, "learning_rate": 0.001157859869142908, "loss": 2.2598, "step": 417730 }, { "epoch": 1.6148660141330735, "grad_norm": 0.10738690942525864, "learning_rate": 0.0011576224124538925, "loss": 2.2705, "step": 417740 }, { "epoch": 1.6149046713364568, "grad_norm": 0.10659084469079971, "learning_rate": 0.0011573850226823642, "loss": 2.266, "step": 417750 }, { "epoch": 1.61494332853984, "grad_norm": 0.11544018238782883, "learning_rate": 0.001157147699771781, "loss": 2.272, "step": 417760 }, { "epoch": 1.6149819857432233, "grad_norm": 0.0990079790353775, "learning_rate": 0.0011569104436656803, "loss": 2.2523, "step": 417770 }, { "epoch": 1.6150206429466065, "grad_norm": 0.10228414833545685, "learning_rate": 0.0011566732543076795, "loss": 2.274, "step": 417780 }, { "epoch": 1.61505930014999, "grad_norm": 0.11280784010887146, "learning_rate": 0.0011564361316414745, "loss": 2.2764, "step": 417790 }, { "epoch": 1.6150979573533732, "grad_norm": 0.10561858117580414, "learning_rate": 0.0011561990756108406, "loss": 2.2863, "step": 417800 }, { "epoch": 1.6151366145567565, "grad_norm": 0.1057622954249382, "learning_rate": 0.0011559620861596323, "loss": 2.267, "step": 417810 }, { "epoch": 1.6151752717601398, "grad_norm": 0.10904049128293991, "learning_rate": 0.0011557251632317827, "loss": 2.2701, "step": 417820 }, { "epoch": 1.6152139289635232, "grad_norm": 0.116305410861969, "learning_rate": 0.0011554883067713035, "loss": 2.2764, "step": 417830 }, { "epoch": 1.6152525861669065, "grad_norm": 0.11812672764062881, "learning_rate": 0.0011552515167222851, "loss": 2.2764, "step": 417840 }, { "epoch": 1.6152912433702897, "grad_norm": 0.10141696780920029, "learning_rate": 0.0011550147930288956, "loss": 2.2673, "step": 417850 }, { "epoch": 1.615329900573673, "grad_norm": 0.11404278874397278, "learning_rate": 0.0011547781356353825, "loss": 2.2878, "step": 417860 }, { "epoch": 1.6153685577770562, "grad_norm": 0.10533790290355682, "learning_rate": 0.00115454154448607, "loss": 2.2804, "step": 417870 }, { "epoch": 1.6154072149804395, "grad_norm": 0.09895123541355133, "learning_rate": 0.0011543050195253611, "loss": 2.2665, "step": 417880 }, { "epoch": 1.6154458721838227, "grad_norm": 0.10912610590457916, "learning_rate": 0.001154068560697736, "loss": 2.263, "step": 417890 }, { "epoch": 1.615484529387206, "grad_norm": 0.11409979313611984, "learning_rate": 0.0011538321679477527, "loss": 2.289, "step": 417900 }, { "epoch": 1.6155231865905892, "grad_norm": 0.09628956764936447, "learning_rate": 0.0011535958412200468, "loss": 2.2741, "step": 417910 }, { "epoch": 1.6155618437939725, "grad_norm": 0.10939304530620575, "learning_rate": 0.001153359580459331, "loss": 2.2604, "step": 417920 }, { "epoch": 1.6156005009973557, "grad_norm": 0.1213667020201683, "learning_rate": 0.0011531233856103948, "loss": 2.2608, "step": 417930 }, { "epoch": 1.615639158200739, "grad_norm": 0.25866174697875977, "learning_rate": 0.0011528872566181052, "loss": 2.2706, "step": 417940 }, { "epoch": 1.6156778154041223, "grad_norm": 0.10094214230775833, "learning_rate": 0.0011526511934274058, "loss": 2.2852, "step": 417950 }, { "epoch": 1.6157164726075057, "grad_norm": 0.1102001741528511, "learning_rate": 0.0011524151959833163, "loss": 2.2738, "step": 417960 }, { "epoch": 1.615755129810889, "grad_norm": 0.09849420934915543, "learning_rate": 0.0011521792642309344, "loss": 2.2626, "step": 417970 }, { "epoch": 1.6157937870142722, "grad_norm": 0.09583023935556412, "learning_rate": 0.0011519433981154324, "loss": 2.2702, "step": 417980 }, { "epoch": 1.6158324442176555, "grad_norm": 0.10503140091896057, "learning_rate": 0.00115170759758206, "loss": 2.2781, "step": 417990 }, { "epoch": 1.615871101421039, "grad_norm": 0.10275324434041977, "learning_rate": 0.001151471862576143, "loss": 2.2715, "step": 418000 }, { "epoch": 1.6159097586244222, "grad_norm": 0.10475151240825653, "learning_rate": 0.0011512361930430823, "loss": 2.2748, "step": 418010 }, { "epoch": 1.6159484158278055, "grad_norm": 0.10688836872577667, "learning_rate": 0.001151000588928355, "loss": 2.2759, "step": 418020 }, { "epoch": 1.6159870730311887, "grad_norm": 0.10008478909730911, "learning_rate": 0.0011507650501775143, "loss": 2.2548, "step": 418030 }, { "epoch": 1.616025730234572, "grad_norm": 0.11711792647838593, "learning_rate": 0.0011505295767361878, "loss": 2.2767, "step": 418040 }, { "epoch": 1.6160643874379552, "grad_norm": 0.1123252585530281, "learning_rate": 0.0011502941685500798, "loss": 2.2696, "step": 418050 }, { "epoch": 1.6161030446413385, "grad_norm": 0.10946837067604065, "learning_rate": 0.0011500588255649688, "loss": 2.2647, "step": 418060 }, { "epoch": 1.6161417018447217, "grad_norm": 0.11430559307336807, "learning_rate": 0.0011498235477267087, "loss": 2.266, "step": 418070 }, { "epoch": 1.616180359048105, "grad_norm": 0.11472238600254059, "learning_rate": 0.001149588334981228, "loss": 2.2631, "step": 418080 }, { "epoch": 1.6162190162514882, "grad_norm": 0.10393355786800385, "learning_rate": 0.0011493531872745306, "loss": 2.2642, "step": 418090 }, { "epoch": 1.6162576734548715, "grad_norm": 0.10616232454776764, "learning_rate": 0.0011491181045526942, "loss": 2.2711, "step": 418100 }, { "epoch": 1.6162963306582547, "grad_norm": 0.10849283635616302, "learning_rate": 0.0011488830867618716, "loss": 2.2722, "step": 418110 }, { "epoch": 1.616334987861638, "grad_norm": 0.11171620339155197, "learning_rate": 0.0011486481338482894, "loss": 2.2597, "step": 418120 }, { "epoch": 1.6163736450650215, "grad_norm": 0.10275192558765411, "learning_rate": 0.0011484132457582495, "loss": 2.2737, "step": 418130 }, { "epoch": 1.6164123022684047, "grad_norm": 0.1024751290678978, "learning_rate": 0.001148178422438126, "loss": 2.2794, "step": 418140 }, { "epoch": 1.616450959471788, "grad_norm": 0.1121668741106987, "learning_rate": 0.0011479436638343683, "loss": 2.2672, "step": 418150 }, { "epoch": 1.6164896166751712, "grad_norm": 0.4614737033843994, "learning_rate": 0.0011477089698934994, "loss": 2.2657, "step": 418160 }, { "epoch": 1.6165282738785547, "grad_norm": 0.11732994765043259, "learning_rate": 0.0011474743405621153, "loss": 2.2642, "step": 418170 }, { "epoch": 1.616566931081938, "grad_norm": 0.10660345107316971, "learning_rate": 0.001147239775786886, "loss": 2.2844, "step": 418180 }, { "epoch": 1.6166055882853212, "grad_norm": 0.10075422376394272, "learning_rate": 0.0011470052755145552, "loss": 2.271, "step": 418190 }, { "epoch": 1.6166442454887044, "grad_norm": 0.11237199604511261, "learning_rate": 0.0011467708396919382, "loss": 2.26, "step": 418200 }, { "epoch": 1.6166829026920877, "grad_norm": 0.11487163603305817, "learning_rate": 0.0011465364682659255, "loss": 2.2726, "step": 418210 }, { "epoch": 1.616721559895471, "grad_norm": 0.09262558817863464, "learning_rate": 0.001146302161183478, "loss": 2.2807, "step": 418220 }, { "epoch": 1.6167602170988542, "grad_norm": 0.09925800561904907, "learning_rate": 0.0011460679183916322, "loss": 2.2694, "step": 418230 }, { "epoch": 1.6167988743022375, "grad_norm": 0.11094321310520172, "learning_rate": 0.001145833739837495, "loss": 2.2695, "step": 418240 }, { "epoch": 1.6168375315056207, "grad_norm": 0.09434594959020615, "learning_rate": 0.0011455996254682468, "loss": 2.2744, "step": 418250 }, { "epoch": 1.616876188709004, "grad_norm": 0.10965374112129211, "learning_rate": 0.00114536557523114, "loss": 2.264, "step": 418260 }, { "epoch": 1.6169148459123872, "grad_norm": 0.10408959537744522, "learning_rate": 0.0011451315890734994, "loss": 2.2776, "step": 418270 }, { "epoch": 1.6169535031157705, "grad_norm": 0.10442472994327545, "learning_rate": 0.001144897666942722, "loss": 2.2777, "step": 418280 }, { "epoch": 1.6169921603191537, "grad_norm": 0.10826219618320465, "learning_rate": 0.0011446638087862762, "loss": 2.263, "step": 418290 }, { "epoch": 1.6170308175225372, "grad_norm": 0.11713138967752457, "learning_rate": 0.0011444300145517024, "loss": 2.2698, "step": 418300 }, { "epoch": 1.6170694747259204, "grad_norm": 0.1076996773481369, "learning_rate": 0.0011441962841866134, "loss": 2.2774, "step": 418310 }, { "epoch": 1.6171081319293037, "grad_norm": 0.1485646516084671, "learning_rate": 0.001143962617638692, "loss": 2.2644, "step": 418320 }, { "epoch": 1.617146789132687, "grad_norm": 0.10950548946857452, "learning_rate": 0.0011437290148556942, "loss": 2.2821, "step": 418330 }, { "epoch": 1.6171854463360704, "grad_norm": 0.10513289272785187, "learning_rate": 0.0011434954757854458, "loss": 2.2699, "step": 418340 }, { "epoch": 1.6172241035394537, "grad_norm": 0.1014241948723793, "learning_rate": 0.0011432620003758442, "loss": 2.2755, "step": 418350 }, { "epoch": 1.617262760742837, "grad_norm": 0.09708770364522934, "learning_rate": 0.0011430285885748578, "loss": 2.2565, "step": 418360 }, { "epoch": 1.6173014179462202, "grad_norm": 0.09347762167453766, "learning_rate": 0.0011427952403305264, "loss": 2.2654, "step": 418370 }, { "epoch": 1.6173400751496034, "grad_norm": 0.09401445835828781, "learning_rate": 0.0011425619555909595, "loss": 2.2756, "step": 418380 }, { "epoch": 1.6173787323529867, "grad_norm": 0.09593868255615234, "learning_rate": 0.0011423287343043374, "loss": 2.2673, "step": 418390 }, { "epoch": 1.61741738955637, "grad_norm": 0.09695480018854141, "learning_rate": 0.0011420955764189114, "loss": 2.2621, "step": 418400 }, { "epoch": 1.6174560467597532, "grad_norm": 0.10635862499475479, "learning_rate": 0.0011418624818830025, "loss": 2.2752, "step": 418410 }, { "epoch": 1.6174947039631364, "grad_norm": 0.0985569879412651, "learning_rate": 0.0011416294506450028, "loss": 2.2679, "step": 418420 }, { "epoch": 1.6175333611665197, "grad_norm": 0.10103638470172882, "learning_rate": 0.0011413964826533726, "loss": 2.2672, "step": 418430 }, { "epoch": 1.617572018369903, "grad_norm": 0.11381982266902924, "learning_rate": 0.0011411635778566444, "loss": 2.2776, "step": 418440 }, { "epoch": 1.6176106755732862, "grad_norm": 0.09681716561317444, "learning_rate": 0.0011409307362034186, "loss": 2.2727, "step": 418450 }, { "epoch": 1.6176493327766694, "grad_norm": 0.10214970260858536, "learning_rate": 0.0011406979576423666, "loss": 2.2743, "step": 418460 }, { "epoch": 1.617687989980053, "grad_norm": 0.10435772687196732, "learning_rate": 0.001140465242122228, "loss": 2.2689, "step": 418470 }, { "epoch": 1.6177266471834362, "grad_norm": 0.10388590395450592, "learning_rate": 0.0011402325895918128, "loss": 2.2792, "step": 418480 }, { "epoch": 1.6177653043868194, "grad_norm": 0.0986008271574974, "learning_rate": 0.0011400000000000002, "loss": 2.2697, "step": 418490 }, { "epoch": 1.6178039615902027, "grad_norm": 0.09806541353464127, "learning_rate": 0.0011397674732957373, "loss": 2.2812, "step": 418500 }, { "epoch": 1.6178426187935862, "grad_norm": 0.11309289932250977, "learning_rate": 0.0011395350094280418, "loss": 2.254, "step": 418510 }, { "epoch": 1.6178812759969694, "grad_norm": 0.09836818277835846, "learning_rate": 0.0011393026083459997, "loss": 2.2732, "step": 418520 }, { "epoch": 1.6179199332003527, "grad_norm": 0.09785986691713333, "learning_rate": 0.0011390702699987647, "loss": 2.2674, "step": 418530 }, { "epoch": 1.617958590403736, "grad_norm": 0.11060678213834763, "learning_rate": 0.0011388379943355607, "loss": 2.2843, "step": 418540 }, { "epoch": 1.6179972476071192, "grad_norm": 0.10308713465929031, "learning_rate": 0.001138605781305679, "loss": 2.2775, "step": 418550 }, { "epoch": 1.6180359048105024, "grad_norm": 0.10552119463682175, "learning_rate": 0.0011383736308584794, "loss": 2.2556, "step": 418560 }, { "epoch": 1.6180745620138857, "grad_norm": 0.10252942144870758, "learning_rate": 0.0011381415429433904, "loss": 2.2814, "step": 418570 }, { "epoch": 1.618113219217269, "grad_norm": 0.09974144399166107, "learning_rate": 0.0011379095175099079, "loss": 2.2785, "step": 418580 }, { "epoch": 1.6181518764206522, "grad_norm": 0.1105218306183815, "learning_rate": 0.001137677554507596, "loss": 2.2559, "step": 418590 }, { "epoch": 1.6181905336240354, "grad_norm": 0.09096836298704147, "learning_rate": 0.0011374456538860872, "loss": 2.2802, "step": 418600 }, { "epoch": 1.6182291908274187, "grad_norm": 0.10733989626169205, "learning_rate": 0.0011372138155950804, "loss": 2.2743, "step": 418610 }, { "epoch": 1.618267848030802, "grad_norm": 0.09750019013881683, "learning_rate": 0.0011369820395843433, "loss": 2.2576, "step": 418620 }, { "epoch": 1.6183065052341852, "grad_norm": 0.10365524888038635, "learning_rate": 0.0011367503258037104, "loss": 2.2797, "step": 418630 }, { "epoch": 1.6183451624375687, "grad_norm": 0.11139106005430222, "learning_rate": 0.0011365186742030838, "loss": 2.2517, "step": 418640 }, { "epoch": 1.618383819640952, "grad_norm": 0.12801063060760498, "learning_rate": 0.0011362870847324326, "loss": 2.2695, "step": 418650 }, { "epoch": 1.6184224768443352, "grad_norm": 0.11003414541482925, "learning_rate": 0.001136055557341793, "loss": 2.2759, "step": 418660 }, { "epoch": 1.6184611340477184, "grad_norm": 0.10692291706800461, "learning_rate": 0.001135824091981268, "loss": 2.267, "step": 418670 }, { "epoch": 1.6184997912511019, "grad_norm": 0.09166417270898819, "learning_rate": 0.0011355926886010277, "loss": 2.2655, "step": 418680 }, { "epoch": 1.6185384484544851, "grad_norm": 0.10425940155982971, "learning_rate": 0.0011353613471513087, "loss": 2.2675, "step": 418690 }, { "epoch": 1.6185771056578684, "grad_norm": 0.10555826127529144, "learning_rate": 0.0011351300675824139, "loss": 2.2808, "step": 418700 }, { "epoch": 1.6186157628612516, "grad_norm": 0.0973772332072258, "learning_rate": 0.0011348988498447133, "loss": 2.2829, "step": 418710 }, { "epoch": 1.618654420064635, "grad_norm": 0.11358707398176193, "learning_rate": 0.0011346676938886425, "loss": 2.2616, "step": 418720 }, { "epoch": 1.6186930772680181, "grad_norm": 0.10206224024295807, "learning_rate": 0.0011344365996647041, "loss": 2.2683, "step": 418730 }, { "epoch": 1.6187317344714014, "grad_norm": 0.10238737612962723, "learning_rate": 0.0011342055671234655, "loss": 2.2721, "step": 418740 }, { "epoch": 1.6187703916747846, "grad_norm": 0.10538827627897263, "learning_rate": 0.0011339745962155613, "loss": 2.2629, "step": 418750 }, { "epoch": 1.618809048878168, "grad_norm": 0.10559193789958954, "learning_rate": 0.0011337436868916915, "loss": 2.2549, "step": 418760 }, { "epoch": 1.6188477060815512, "grad_norm": 0.11526539921760559, "learning_rate": 0.0011335128391026213, "loss": 2.2805, "step": 418770 }, { "epoch": 1.6188863632849344, "grad_norm": 0.1010223776102066, "learning_rate": 0.001133282052799182, "loss": 2.2703, "step": 418780 }, { "epoch": 1.6189250204883177, "grad_norm": 0.11898636072874069, "learning_rate": 0.0011330513279322703, "loss": 2.2674, "step": 418790 }, { "epoch": 1.618963677691701, "grad_norm": 0.11917950958013535, "learning_rate": 0.001132820664452848, "loss": 2.268, "step": 418800 }, { "epoch": 1.6190023348950844, "grad_norm": 0.10815060138702393, "learning_rate": 0.0011325900623119422, "loss": 2.2786, "step": 418810 }, { "epoch": 1.6190409920984676, "grad_norm": 0.09814000129699707, "learning_rate": 0.0011323595214606454, "loss": 2.2581, "step": 418820 }, { "epoch": 1.6190796493018509, "grad_norm": 0.09670665860176086, "learning_rate": 0.0011321290418501147, "loss": 2.2665, "step": 418830 }, { "epoch": 1.6191183065052341, "grad_norm": 0.11535345762968063, "learning_rate": 0.0011318986234315719, "loss": 2.2738, "step": 418840 }, { "epoch": 1.6191569637086176, "grad_norm": 0.12406040728092194, "learning_rate": 0.0011316682661563035, "loss": 2.281, "step": 418850 }, { "epoch": 1.6191956209120009, "grad_norm": 0.10750404745340347, "learning_rate": 0.0011314379699756615, "loss": 2.2669, "step": 418860 }, { "epoch": 1.6192342781153841, "grad_norm": 0.10614997148513794, "learning_rate": 0.0011312077348410612, "loss": 2.2741, "step": 418870 }, { "epoch": 1.6192729353187674, "grad_norm": 0.11189042776823044, "learning_rate": 0.0011309775607039826, "loss": 2.2819, "step": 418880 }, { "epoch": 1.6193115925221506, "grad_norm": 0.10751363635063171, "learning_rate": 0.001130747447515971, "loss": 2.2717, "step": 418890 }, { "epoch": 1.6193502497255339, "grad_norm": 0.10153806954622269, "learning_rate": 0.0011305173952286336, "loss": 2.2667, "step": 418900 }, { "epoch": 1.6193889069289171, "grad_norm": 0.10106009989976883, "learning_rate": 0.001130287403793644, "loss": 2.2728, "step": 418910 }, { "epoch": 1.6194275641323004, "grad_norm": 0.10318268090486526, "learning_rate": 0.0011300574731627382, "loss": 2.2667, "step": 418920 }, { "epoch": 1.6194662213356836, "grad_norm": 0.11694622784852982, "learning_rate": 0.0011298276032877164, "loss": 2.263, "step": 418930 }, { "epoch": 1.6195048785390669, "grad_norm": 0.10246624052524567, "learning_rate": 0.0011295977941204423, "loss": 2.2536, "step": 418940 }, { "epoch": 1.6195435357424501, "grad_norm": 0.10643582791090012, "learning_rate": 0.0011293680456128433, "loss": 2.2751, "step": 418950 }, { "epoch": 1.6195821929458334, "grad_norm": 0.09867900609970093, "learning_rate": 0.0011291383577169106, "loss": 2.2763, "step": 418960 }, { "epoch": 1.6196208501492166, "grad_norm": 0.11613151431083679, "learning_rate": 0.0011289087303846972, "loss": 2.2676, "step": 418970 }, { "epoch": 1.6196595073526001, "grad_norm": 0.10688113421201706, "learning_rate": 0.0011286791635683215, "loss": 2.2639, "step": 418980 }, { "epoch": 1.6196981645559834, "grad_norm": 0.1168067455291748, "learning_rate": 0.001128449657219963, "loss": 2.2835, "step": 418990 }, { "epoch": 1.6197368217593666, "grad_norm": 0.11430569738149643, "learning_rate": 0.0011282202112918654, "loss": 2.2703, "step": 419000 }, { "epoch": 1.6197754789627499, "grad_norm": 0.09900334477424622, "learning_rate": 0.0011279908257363345, "loss": 2.2908, "step": 419010 }, { "epoch": 1.6198141361661333, "grad_norm": 0.09957217425107956, "learning_rate": 0.0011277615005057392, "loss": 2.2657, "step": 419020 }, { "epoch": 1.6198527933695166, "grad_norm": 0.1115746945142746, "learning_rate": 0.0011275322355525103, "loss": 2.2734, "step": 419030 }, { "epoch": 1.6198914505728998, "grad_norm": 0.1849929392337799, "learning_rate": 0.0011273030308291427, "loss": 2.2604, "step": 419040 }, { "epoch": 1.619930107776283, "grad_norm": 0.1009269431233406, "learning_rate": 0.0011270738862881923, "loss": 2.2667, "step": 419050 }, { "epoch": 1.6199687649796664, "grad_norm": 0.11893726885318756, "learning_rate": 0.0011268448018822773, "loss": 2.2531, "step": 419060 }, { "epoch": 1.6200074221830496, "grad_norm": 0.09925411641597748, "learning_rate": 0.001126615777564078, "loss": 2.2606, "step": 419070 }, { "epoch": 1.6200460793864329, "grad_norm": 0.09997145086526871, "learning_rate": 0.001126386813286338, "loss": 2.2594, "step": 419080 }, { "epoch": 1.620084736589816, "grad_norm": 0.10257407277822495, "learning_rate": 0.001126157909001861, "loss": 2.272, "step": 419090 }, { "epoch": 1.6201233937931994, "grad_norm": 0.11000483483076096, "learning_rate": 0.0011259290646635138, "loss": 2.2661, "step": 419100 }, { "epoch": 1.6201620509965826, "grad_norm": 0.10379195958375931, "learning_rate": 0.0011257002802242243, "loss": 2.2706, "step": 419110 }, { "epoch": 1.6202007081999659, "grad_norm": 0.10364500433206558, "learning_rate": 0.001125471555636982, "loss": 2.2732, "step": 419120 }, { "epoch": 1.6202393654033491, "grad_norm": 0.09665635973215103, "learning_rate": 0.0011252428908548385, "loss": 2.2565, "step": 419130 }, { "epoch": 1.6202780226067326, "grad_norm": 0.11705910414457321, "learning_rate": 0.0011250142858309057, "loss": 2.2614, "step": 419140 }, { "epoch": 1.6203166798101158, "grad_norm": 0.10699387639760971, "learning_rate": 0.0011247857405183575, "loss": 2.2674, "step": 419150 }, { "epoch": 1.620355337013499, "grad_norm": 0.11837508529424667, "learning_rate": 0.0011245572548704286, "loss": 2.261, "step": 419160 }, { "epoch": 1.6203939942168823, "grad_norm": 0.10276247560977936, "learning_rate": 0.0011243288288404144, "loss": 2.2798, "step": 419170 }, { "epoch": 1.6204326514202656, "grad_norm": 0.10083647817373276, "learning_rate": 0.0011241004623816725, "loss": 2.2655, "step": 419180 }, { "epoch": 1.620471308623649, "grad_norm": 0.10014968365430832, "learning_rate": 0.0011238721554476198, "loss": 2.267, "step": 419190 }, { "epoch": 1.6205099658270323, "grad_norm": 0.10161503404378891, "learning_rate": 0.0011236439079917342, "loss": 2.2813, "step": 419200 }, { "epoch": 1.6205486230304156, "grad_norm": 0.11093269288539886, "learning_rate": 0.001123415719967555, "loss": 2.2858, "step": 419210 }, { "epoch": 1.6205872802337988, "grad_norm": 0.10087086260318756, "learning_rate": 0.001123187591328681, "loss": 2.2699, "step": 419220 }, { "epoch": 1.620625937437182, "grad_norm": 0.11747071892023087, "learning_rate": 0.001122959522028772, "loss": 2.2757, "step": 419230 }, { "epoch": 1.6206645946405653, "grad_norm": 0.11029363423585892, "learning_rate": 0.0011227315120215478, "loss": 2.2642, "step": 419240 }, { "epoch": 1.6207032518439486, "grad_norm": 0.12040729075670242, "learning_rate": 0.0011225035612607876, "loss": 2.2605, "step": 419250 }, { "epoch": 1.6207419090473318, "grad_norm": 0.11009768396615982, "learning_rate": 0.0011222756697003324, "loss": 2.2727, "step": 419260 }, { "epoch": 1.620780566250715, "grad_norm": 0.11284641921520233, "learning_rate": 0.0011220478372940812, "loss": 2.2584, "step": 419270 }, { "epoch": 1.6208192234540983, "grad_norm": 0.10155908763408661, "learning_rate": 0.0011218200639959942, "loss": 2.2634, "step": 419280 }, { "epoch": 1.6208578806574816, "grad_norm": 0.10522062331438065, "learning_rate": 0.0011215923497600898, "loss": 2.2669, "step": 419290 }, { "epoch": 1.6208965378608648, "grad_norm": 0.09858293831348419, "learning_rate": 0.001121364694540448, "loss": 2.2692, "step": 419300 }, { "epoch": 1.6209351950642483, "grad_norm": 0.11517447233200073, "learning_rate": 0.001121137098291207, "loss": 2.2639, "step": 419310 }, { "epoch": 1.6209738522676316, "grad_norm": 0.10266264528036118, "learning_rate": 0.0011209095609665636, "loss": 2.2645, "step": 419320 }, { "epoch": 1.6210125094710148, "grad_norm": 0.1053464263677597, "learning_rate": 0.0011206820825207756, "loss": 2.2579, "step": 419330 }, { "epoch": 1.621051166674398, "grad_norm": 0.09718520939350128, "learning_rate": 0.001120454662908159, "loss": 2.2619, "step": 419340 }, { "epoch": 1.6210898238777813, "grad_norm": 0.13670788705348969, "learning_rate": 0.0011202273020830891, "loss": 2.2653, "step": 419350 }, { "epoch": 1.6211284810811648, "grad_norm": 0.09898941963911057, "learning_rate": 0.0011200000000000001, "loss": 2.2638, "step": 419360 }, { "epoch": 1.621167138284548, "grad_norm": 0.10254188627004623, "learning_rate": 0.0011197727566133846, "loss": 2.2611, "step": 419370 }, { "epoch": 1.6212057954879313, "grad_norm": 0.12132681906223297, "learning_rate": 0.0011195455718777945, "loss": 2.2588, "step": 419380 }, { "epoch": 1.6212444526913146, "grad_norm": 0.10235419124364853, "learning_rate": 0.0011193184457478403, "loss": 2.2632, "step": 419390 }, { "epoch": 1.6212831098946978, "grad_norm": 0.08859186619520187, "learning_rate": 0.0011190913781781904, "loss": 2.2639, "step": 419400 }, { "epoch": 1.621321767098081, "grad_norm": 0.10409567505121231, "learning_rate": 0.0011188643691235724, "loss": 2.2676, "step": 419410 }, { "epoch": 1.6213604243014643, "grad_norm": 0.10689257085323334, "learning_rate": 0.0011186374185387718, "loss": 2.2685, "step": 419420 }, { "epoch": 1.6213990815048476, "grad_norm": 0.09902604669332504, "learning_rate": 0.0011184105263786323, "loss": 2.2599, "step": 419430 }, { "epoch": 1.6214377387082308, "grad_norm": 0.10523606836795807, "learning_rate": 0.001118183692598056, "loss": 2.2822, "step": 419440 }, { "epoch": 1.621476395911614, "grad_norm": 0.11436944454908371, "learning_rate": 0.0011179569171520024, "loss": 2.2699, "step": 419450 }, { "epoch": 1.6215150531149973, "grad_norm": 0.10277509689331055, "learning_rate": 0.001117730199995489, "loss": 2.2901, "step": 419460 }, { "epoch": 1.6215537103183806, "grad_norm": 0.0945189818739891, "learning_rate": 0.0011175035410835917, "loss": 2.2642, "step": 419470 }, { "epoch": 1.621592367521764, "grad_norm": 0.10312645882368088, "learning_rate": 0.0011172769403714436, "loss": 2.255, "step": 419480 }, { "epoch": 1.6216310247251473, "grad_norm": 0.10012654960155487, "learning_rate": 0.0011170503978142354, "loss": 2.2658, "step": 419490 }, { "epoch": 1.6216696819285306, "grad_norm": 0.11879194527864456, "learning_rate": 0.0011168239133672153, "loss": 2.2703, "step": 419500 }, { "epoch": 1.6217083391319138, "grad_norm": 0.10942061245441437, "learning_rate": 0.001116597486985689, "loss": 2.278, "step": 419510 }, { "epoch": 1.621746996335297, "grad_norm": 0.10702095180749893, "learning_rate": 0.0011163711186250192, "loss": 2.2676, "step": 419520 }, { "epoch": 1.6217856535386805, "grad_norm": 0.09381652623414993, "learning_rate": 0.0011161448082406258, "loss": 2.2658, "step": 419530 }, { "epoch": 1.6218243107420638, "grad_norm": 0.11745423823595047, "learning_rate": 0.0011159185557879862, "loss": 2.2641, "step": 419540 }, { "epoch": 1.621862967945447, "grad_norm": 0.09949396550655365, "learning_rate": 0.0011156923612226342, "loss": 2.2657, "step": 419550 }, { "epoch": 1.6219016251488303, "grad_norm": 0.10507374256849289, "learning_rate": 0.0011154662245001607, "loss": 2.2696, "step": 419560 }, { "epoch": 1.6219402823522135, "grad_norm": 0.10793768614530563, "learning_rate": 0.0011152401455762134, "loss": 2.2578, "step": 419570 }, { "epoch": 1.6219789395555968, "grad_norm": 0.10195645689964294, "learning_rate": 0.0011150141244064967, "loss": 2.2606, "step": 419580 }, { "epoch": 1.62201759675898, "grad_norm": 0.09891880303621292, "learning_rate": 0.001114788160946771, "loss": 2.2521, "step": 419590 }, { "epoch": 1.6220562539623633, "grad_norm": 0.1110696867108345, "learning_rate": 0.0011145622551528539, "loss": 2.2876, "step": 419600 }, { "epoch": 1.6220949111657466, "grad_norm": 0.10356144607067108, "learning_rate": 0.0011143364069806188, "loss": 2.2686, "step": 419610 }, { "epoch": 1.6221335683691298, "grad_norm": 0.09793224185705185, "learning_rate": 0.001114110616385996, "loss": 2.2732, "step": 419620 }, { "epoch": 1.622172225572513, "grad_norm": 0.21252916753292084, "learning_rate": 0.001113884883324971, "loss": 2.2568, "step": 419630 }, { "epoch": 1.6222108827758963, "grad_norm": 0.5498322248458862, "learning_rate": 0.0011136592077535866, "loss": 2.2856, "step": 419640 }, { "epoch": 1.6222495399792798, "grad_norm": 0.10912423580884933, "learning_rate": 0.00111343358962794, "loss": 2.2727, "step": 419650 }, { "epoch": 1.622288197182663, "grad_norm": 0.1227998435497284, "learning_rate": 0.0011132080289041855, "loss": 2.2589, "step": 419660 }, { "epoch": 1.6223268543860463, "grad_norm": 0.10604656487703323, "learning_rate": 0.0011129825255385326, "loss": 2.2862, "step": 419670 }, { "epoch": 1.6223655115894295, "grad_norm": 0.1027936339378357, "learning_rate": 0.0011127570794872469, "loss": 2.2722, "step": 419680 }, { "epoch": 1.622404168792813, "grad_norm": 0.09441990405321121, "learning_rate": 0.0011125316907066485, "loss": 2.2821, "step": 419690 }, { "epoch": 1.6224428259961963, "grad_norm": 0.11164455115795135, "learning_rate": 0.0011123063591531141, "loss": 2.2644, "step": 419700 }, { "epoch": 1.6224814831995795, "grad_norm": 0.11239205300807953, "learning_rate": 0.0011120810847830755, "loss": 2.2709, "step": 419710 }, { "epoch": 1.6225201404029628, "grad_norm": 0.10172021389007568, "learning_rate": 0.0011118558675530196, "loss": 2.2622, "step": 419720 }, { "epoch": 1.622558797606346, "grad_norm": 0.11356845498085022, "learning_rate": 0.0011116307074194877, "loss": 2.2746, "step": 419730 }, { "epoch": 1.6225974548097293, "grad_norm": 0.11192918568849564, "learning_rate": 0.0011114056043390775, "loss": 2.2632, "step": 419740 }, { "epoch": 1.6226361120131125, "grad_norm": 0.10720881819725037, "learning_rate": 0.001111180558268441, "loss": 2.2551, "step": 419750 }, { "epoch": 1.6226747692164958, "grad_norm": 0.10844343900680542, "learning_rate": 0.0011109555691642852, "loss": 2.2425, "step": 419760 }, { "epoch": 1.622713426419879, "grad_norm": 0.09997853636741638, "learning_rate": 0.0011107306369833715, "loss": 2.2721, "step": 419770 }, { "epoch": 1.6227520836232623, "grad_norm": 0.11213473230600357, "learning_rate": 0.0011105057616825165, "loss": 2.2665, "step": 419780 }, { "epoch": 1.6227907408266455, "grad_norm": 0.11055673658847809, "learning_rate": 0.0011102809432185913, "loss": 2.2698, "step": 419790 }, { "epoch": 1.6228293980300288, "grad_norm": 0.13885144889354706, "learning_rate": 0.0011100561815485205, "loss": 2.2631, "step": 419800 }, { "epoch": 1.622868055233412, "grad_norm": 0.09793928265571594, "learning_rate": 0.0011098314766292846, "loss": 2.269, "step": 419810 }, { "epoch": 1.6229067124367955, "grad_norm": 0.09951359778642654, "learning_rate": 0.0011096068284179176, "loss": 2.2551, "step": 419820 }, { "epoch": 1.6229453696401788, "grad_norm": 0.11357228457927704, "learning_rate": 0.0011093822368715073, "loss": 2.2581, "step": 419830 }, { "epoch": 1.622984026843562, "grad_norm": 0.11530306935310364, "learning_rate": 0.0011091577019471965, "loss": 2.2807, "step": 419840 }, { "epoch": 1.6230226840469453, "grad_norm": 0.1709367036819458, "learning_rate": 0.0011089332236021814, "loss": 2.2832, "step": 419850 }, { "epoch": 1.6230613412503287, "grad_norm": 0.10396969318389893, "learning_rate": 0.0011087088017937123, "loss": 2.2638, "step": 419860 }, { "epoch": 1.623099998453712, "grad_norm": 0.0975300669670105, "learning_rate": 0.0011084844364790932, "loss": 2.2631, "step": 419870 }, { "epoch": 1.6231386556570953, "grad_norm": 0.09769925475120544, "learning_rate": 0.001108260127615682, "loss": 2.2639, "step": 419880 }, { "epoch": 1.6231773128604785, "grad_norm": 0.10420463979244232, "learning_rate": 0.0011080358751608896, "loss": 2.2704, "step": 419890 }, { "epoch": 1.6232159700638618, "grad_norm": 0.10101456940174103, "learning_rate": 0.0011078116790721815, "loss": 2.2717, "step": 419900 }, { "epoch": 1.623254627267245, "grad_norm": 0.09862508624792099, "learning_rate": 0.0011075875393070757, "loss": 2.268, "step": 419910 }, { "epoch": 1.6232932844706283, "grad_norm": 0.10348057001829147, "learning_rate": 0.001107363455823144, "loss": 2.2682, "step": 419920 }, { "epoch": 1.6233319416740115, "grad_norm": 0.09531467407941818, "learning_rate": 0.0011071394285780114, "loss": 2.2713, "step": 419930 }, { "epoch": 1.6233705988773948, "grad_norm": 0.10556498169898987, "learning_rate": 0.0011069154575293555, "loss": 2.287, "step": 419940 }, { "epoch": 1.623409256080778, "grad_norm": 0.10589485615491867, "learning_rate": 0.0011066915426349083, "loss": 2.2685, "step": 419950 }, { "epoch": 1.6234479132841613, "grad_norm": 0.10087093710899353, "learning_rate": 0.001106467683852453, "loss": 2.2755, "step": 419960 }, { "epoch": 1.6234865704875445, "grad_norm": 0.1181773990392685, "learning_rate": 0.001106243881139827, "loss": 2.2547, "step": 419970 }, { "epoch": 1.6235252276909278, "grad_norm": 0.11872897297143936, "learning_rate": 0.0011060201344549197, "loss": 2.2633, "step": 419980 }, { "epoch": 1.6235638848943112, "grad_norm": 0.10826791822910309, "learning_rate": 0.0011057964437556737, "loss": 2.2715, "step": 419990 }, { "epoch": 1.6236025420976945, "grad_norm": 0.11399926245212555, "learning_rate": 0.0011055728090000843, "loss": 2.2629, "step": 420000 }, { "epoch": 1.6236411993010778, "grad_norm": 0.10690005123615265, "learning_rate": 0.0011053492301461986, "loss": 2.2877, "step": 420010 }, { "epoch": 1.623679856504461, "grad_norm": 0.09525414556264877, "learning_rate": 0.0011051257071521164, "loss": 2.2659, "step": 420020 }, { "epoch": 1.6237185137078445, "grad_norm": 0.11313311755657196, "learning_rate": 0.0011049022399759903, "loss": 2.26, "step": 420030 }, { "epoch": 1.6237571709112277, "grad_norm": 0.09924615174531937, "learning_rate": 0.0011046788285760246, "loss": 2.2647, "step": 420040 }, { "epoch": 1.623795828114611, "grad_norm": 0.11179231852293015, "learning_rate": 0.0011044554729104754, "loss": 2.2733, "step": 420050 }, { "epoch": 1.6238344853179942, "grad_norm": 0.11675616353750229, "learning_rate": 0.0011042321729376524, "loss": 2.2761, "step": 420060 }, { "epoch": 1.6238731425213775, "grad_norm": 0.10813635587692261, "learning_rate": 0.0011040089286159151, "loss": 2.2721, "step": 420070 }, { "epoch": 1.6239117997247607, "grad_norm": 0.10933609306812286, "learning_rate": 0.0011037857399036769, "loss": 2.2706, "step": 420080 }, { "epoch": 1.623950456928144, "grad_norm": 0.10850953310728073, "learning_rate": 0.0011035626067594012, "loss": 2.2633, "step": 420090 }, { "epoch": 1.6239891141315272, "grad_norm": 0.10027583688497543, "learning_rate": 0.0011033395291416043, "loss": 2.2656, "step": 420100 }, { "epoch": 1.6240277713349105, "grad_norm": 0.12081553786993027, "learning_rate": 0.0011031165070088536, "loss": 2.2705, "step": 420110 }, { "epoch": 1.6240664285382937, "grad_norm": 0.10444925725460052, "learning_rate": 0.001102893540319768, "loss": 2.2702, "step": 420120 }, { "epoch": 1.624105085741677, "grad_norm": 0.09450006484985352, "learning_rate": 0.0011026706290330178, "loss": 2.2757, "step": 420130 }, { "epoch": 1.6241437429450603, "grad_norm": 0.10357379168272018, "learning_rate": 0.001102447773107325, "loss": 2.2538, "step": 420140 }, { "epoch": 1.6241824001484435, "grad_norm": 0.10619291663169861, "learning_rate": 0.0011022249725014623, "loss": 2.2647, "step": 420150 }, { "epoch": 1.624221057351827, "grad_norm": 0.10615566372871399, "learning_rate": 0.0011020022271742542, "loss": 2.2627, "step": 420160 }, { "epoch": 1.6242597145552102, "grad_norm": 0.10659230500459671, "learning_rate": 0.0011017795370845752, "loss": 2.2682, "step": 420170 }, { "epoch": 1.6242983717585935, "grad_norm": 0.113923080265522, "learning_rate": 0.0011015569021913519, "loss": 2.2535, "step": 420180 }, { "epoch": 1.6243370289619767, "grad_norm": 0.15182152390480042, "learning_rate": 0.0011013343224535613, "loss": 2.2496, "step": 420190 }, { "epoch": 1.6243756861653602, "grad_norm": 0.10359169542789459, "learning_rate": 0.0011011117978302308, "loss": 2.2733, "step": 420200 }, { "epoch": 1.6244143433687435, "grad_norm": 0.12292884290218353, "learning_rate": 0.0011008893282804392, "loss": 2.2629, "step": 420210 }, { "epoch": 1.6244530005721267, "grad_norm": 0.09783503413200378, "learning_rate": 0.0011006669137633154, "loss": 2.2841, "step": 420220 }, { "epoch": 1.62449165777551, "grad_norm": 0.10937552899122238, "learning_rate": 0.0011004445542380393, "loss": 2.2746, "step": 420230 }, { "epoch": 1.6245303149788932, "grad_norm": 0.10353318601846695, "learning_rate": 0.0011002222496638407, "loss": 2.2522, "step": 420240 }, { "epoch": 1.6245689721822765, "grad_norm": 0.10546938329935074, "learning_rate": 0.0011, "loss": 2.2612, "step": 420250 }, { "epoch": 1.6246076293856597, "grad_norm": 0.10323894023895264, "learning_rate": 0.0010997778052058483, "loss": 2.2683, "step": 420260 }, { "epoch": 1.624646286589043, "grad_norm": 0.10143966972827911, "learning_rate": 0.0010995556652407656, "loss": 2.2774, "step": 420270 }, { "epoch": 1.6246849437924262, "grad_norm": 0.10892364382743835, "learning_rate": 0.0010993335800641838, "loss": 2.2676, "step": 420280 }, { "epoch": 1.6247236009958095, "grad_norm": 0.10089428722858429, "learning_rate": 0.0010991115496355834, "loss": 2.2923, "step": 420290 }, { "epoch": 1.6247622581991927, "grad_norm": 0.10978174954652786, "learning_rate": 0.0010988895739144954, "loss": 2.2562, "step": 420300 }, { "epoch": 1.624800915402576, "grad_norm": 0.11797042936086655, "learning_rate": 0.0010986676528605003, "loss": 2.2604, "step": 420310 }, { "epoch": 1.6248395726059592, "grad_norm": 0.10992129147052765, "learning_rate": 0.0010984457864332284, "loss": 2.2747, "step": 420320 }, { "epoch": 1.6248782298093427, "grad_norm": 0.12120220065116882, "learning_rate": 0.0010982239745923603, "loss": 2.2727, "step": 420330 }, { "epoch": 1.624916887012726, "grad_norm": 0.10300073027610779, "learning_rate": 0.0010980022172976255, "loss": 2.2783, "step": 420340 }, { "epoch": 1.6249555442161092, "grad_norm": 0.11692921817302704, "learning_rate": 0.0010977805145088034, "loss": 2.274, "step": 420350 }, { "epoch": 1.6249942014194925, "grad_norm": 0.12124430388212204, "learning_rate": 0.0010975588661857217, "loss": 2.2814, "step": 420360 }, { "epoch": 1.625032858622876, "grad_norm": 0.10215267539024353, "learning_rate": 0.0010973372722882595, "loss": 2.271, "step": 420370 }, { "epoch": 1.6250715158262592, "grad_norm": 0.10612752288579941, "learning_rate": 0.001097115732776343, "loss": 2.2673, "step": 420380 }, { "epoch": 1.6251101730296424, "grad_norm": 0.09550405293703079, "learning_rate": 0.001096894247609949, "loss": 2.271, "step": 420390 }, { "epoch": 1.6251488302330257, "grad_norm": 0.1077994704246521, "learning_rate": 0.001096672816749103, "loss": 2.2609, "step": 420400 }, { "epoch": 1.625187487436409, "grad_norm": 0.12608639895915985, "learning_rate": 0.0010964514401538789, "loss": 2.243, "step": 420410 }, { "epoch": 1.6252261446397922, "grad_norm": 0.10839331150054932, "learning_rate": 0.0010962301177843998, "loss": 2.2636, "step": 420420 }, { "epoch": 1.6252648018431755, "grad_norm": 0.11711254715919495, "learning_rate": 0.0010960088496008381, "loss": 2.2559, "step": 420430 }, { "epoch": 1.6253034590465587, "grad_norm": 0.10556913167238235, "learning_rate": 0.0010957876355634148, "loss": 2.2625, "step": 420440 }, { "epoch": 1.625342116249942, "grad_norm": 0.11005790531635284, "learning_rate": 0.001095566475632399, "loss": 2.2638, "step": 420450 }, { "epoch": 1.6253807734533252, "grad_norm": 0.121341273188591, "learning_rate": 0.0010953453697681087, "loss": 2.2733, "step": 420460 }, { "epoch": 1.6254194306567085, "grad_norm": 0.12211687862873077, "learning_rate": 0.0010951243179309106, "loss": 2.2707, "step": 420470 }, { "epoch": 1.6254580878600917, "grad_norm": 0.10479617863893509, "learning_rate": 0.0010949033200812193, "loss": 2.2619, "step": 420480 }, { "epoch": 1.625496745063475, "grad_norm": 0.11355694383382797, "learning_rate": 0.001094682376179498, "loss": 2.268, "step": 420490 }, { "epoch": 1.6255354022668584, "grad_norm": 0.11647163331508636, "learning_rate": 0.0010944614861862583, "loss": 2.2681, "step": 420500 }, { "epoch": 1.6255740594702417, "grad_norm": 0.11764030158519745, "learning_rate": 0.00109424065006206, "loss": 2.2733, "step": 420510 }, { "epoch": 1.625612716673625, "grad_norm": 0.1114291176199913, "learning_rate": 0.00109401986776751, "loss": 2.2679, "step": 420520 }, { "epoch": 1.6256513738770082, "grad_norm": 0.1021851897239685, "learning_rate": 0.0010937991392632647, "loss": 2.2714, "step": 420530 }, { "epoch": 1.6256900310803917, "grad_norm": 0.10080353170633316, "learning_rate": 0.0010935784645100272, "loss": 2.2602, "step": 420540 }, { "epoch": 1.625728688283775, "grad_norm": 0.10580401122570038, "learning_rate": 0.001093357843468549, "loss": 2.2692, "step": 420550 }, { "epoch": 1.6257673454871582, "grad_norm": 0.1270064115524292, "learning_rate": 0.0010931372760996294, "loss": 2.2556, "step": 420560 }, { "epoch": 1.6258060026905414, "grad_norm": 0.10214035212993622, "learning_rate": 0.0010929167623641147, "loss": 2.2545, "step": 420570 }, { "epoch": 1.6258446598939247, "grad_norm": 0.10115115344524384, "learning_rate": 0.0010926963022228997, "loss": 2.274, "step": 420580 }, { "epoch": 1.625883317097308, "grad_norm": 0.09679996967315674, "learning_rate": 0.0010924758956369258, "loss": 2.2664, "step": 420590 }, { "epoch": 1.6259219743006912, "grad_norm": 0.10778463631868362, "learning_rate": 0.0010922555425671825, "loss": 2.2791, "step": 420600 }, { "epoch": 1.6259606315040744, "grad_norm": 0.1045646145939827, "learning_rate": 0.0010920352429747066, "loss": 2.2572, "step": 420610 }, { "epoch": 1.6259992887074577, "grad_norm": 0.1000080555677414, "learning_rate": 0.0010918149968205818, "loss": 2.2739, "step": 420620 }, { "epoch": 1.626037945910841, "grad_norm": 0.10625805705785751, "learning_rate": 0.0010915948040659388, "loss": 2.2732, "step": 420630 }, { "epoch": 1.6260766031142242, "grad_norm": 0.09651408344507217, "learning_rate": 0.0010913746646719561, "loss": 2.2644, "step": 420640 }, { "epoch": 1.6261152603176074, "grad_norm": 0.10682700574398041, "learning_rate": 0.0010911545785998592, "loss": 2.2696, "step": 420650 }, { "epoch": 1.6261539175209907, "grad_norm": 0.10344824939966202, "learning_rate": 0.001090934545810919, "loss": 2.2726, "step": 420660 }, { "epoch": 1.6261925747243742, "grad_norm": 0.09731447696685791, "learning_rate": 0.0010907145662664556, "loss": 2.2615, "step": 420670 }, { "epoch": 1.6262312319277574, "grad_norm": 0.09883551299571991, "learning_rate": 0.0010904946399278343, "loss": 2.267, "step": 420680 }, { "epoch": 1.6262698891311407, "grad_norm": 0.12334094941616058, "learning_rate": 0.0010902747667564671, "loss": 2.2599, "step": 420690 }, { "epoch": 1.626308546334524, "grad_norm": 0.12517641484737396, "learning_rate": 0.001090054946713814, "loss": 2.2673, "step": 420700 }, { "epoch": 1.6263472035379074, "grad_norm": 0.10702036321163177, "learning_rate": 0.0010898351797613798, "loss": 2.2733, "step": 420710 }, { "epoch": 1.6263858607412907, "grad_norm": 0.11024714261293411, "learning_rate": 0.0010896154658607167, "loss": 2.271, "step": 420720 }, { "epoch": 1.626424517944674, "grad_norm": 0.10311411321163177, "learning_rate": 0.0010893958049734232, "loss": 2.2775, "step": 420730 }, { "epoch": 1.6264631751480572, "grad_norm": 0.1197841688990593, "learning_rate": 0.001089176197061144, "loss": 2.2853, "step": 420740 }, { "epoch": 1.6265018323514404, "grad_norm": 0.10621661692857742, "learning_rate": 0.0010889566420855701, "loss": 2.2552, "step": 420750 }, { "epoch": 1.6265404895548237, "grad_norm": 0.10532528907060623, "learning_rate": 0.0010887371400084386, "loss": 2.2599, "step": 420760 }, { "epoch": 1.626579146758207, "grad_norm": 0.12722547352313995, "learning_rate": 0.001088517690791533, "loss": 2.2556, "step": 420770 }, { "epoch": 1.6266178039615902, "grad_norm": 0.1100541278719902, "learning_rate": 0.0010882982943966814, "loss": 2.2662, "step": 420780 }, { "epoch": 1.6266564611649734, "grad_norm": 0.10416977852582932, "learning_rate": 0.0010880789507857602, "loss": 2.2624, "step": 420790 }, { "epoch": 1.6266951183683567, "grad_norm": 0.11241607367992401, "learning_rate": 0.0010878596599206895, "loss": 2.2601, "step": 420800 }, { "epoch": 1.62673377557174, "grad_norm": 0.1130867525935173, "learning_rate": 0.0010876404217634366, "loss": 2.265, "step": 420810 }, { "epoch": 1.6267724327751232, "grad_norm": 0.11152119189500809, "learning_rate": 0.0010874212362760134, "loss": 2.2722, "step": 420820 }, { "epoch": 1.6268110899785064, "grad_norm": 0.12177984416484833, "learning_rate": 0.001087202103420478, "loss": 2.281, "step": 420830 }, { "epoch": 1.62684974718189, "grad_norm": 0.1110476478934288, "learning_rate": 0.0010869830231589339, "loss": 2.2636, "step": 420840 }, { "epoch": 1.6268884043852732, "grad_norm": 0.11386765539646149, "learning_rate": 0.0010867639954535303, "loss": 2.2469, "step": 420850 }, { "epoch": 1.6269270615886564, "grad_norm": 0.11763129383325577, "learning_rate": 0.001086545020266461, "loss": 2.2629, "step": 420860 }, { "epoch": 1.6269657187920397, "grad_norm": 0.10655208677053452, "learning_rate": 0.0010863260975599665, "loss": 2.2679, "step": 420870 }, { "epoch": 1.6270043759954231, "grad_norm": 0.09815648943185806, "learning_rate": 0.001086107227296331, "loss": 2.2798, "step": 420880 }, { "epoch": 1.6270430331988064, "grad_norm": 0.10880669951438904, "learning_rate": 0.001085888409437885, "loss": 2.2588, "step": 420890 }, { "epoch": 1.6270816904021896, "grad_norm": 0.09757796674966812, "learning_rate": 0.001085669643947003, "loss": 2.2763, "step": 420900 }, { "epoch": 1.627120347605573, "grad_norm": 0.11888976395130157, "learning_rate": 0.0010854509307861057, "loss": 2.2519, "step": 420910 }, { "epoch": 1.6271590048089561, "grad_norm": 0.1087271049618721, "learning_rate": 0.0010852322699176585, "loss": 2.2682, "step": 420920 }, { "epoch": 1.6271976620123394, "grad_norm": 0.10679204016923904, "learning_rate": 0.0010850136613041702, "loss": 2.2729, "step": 420930 }, { "epoch": 1.6272363192157226, "grad_norm": 0.11084762960672379, "learning_rate": 0.001084795104908196, "loss": 2.2588, "step": 420940 }, { "epoch": 1.627274976419106, "grad_norm": 0.09975182265043259, "learning_rate": 0.0010845766006923354, "loss": 2.2691, "step": 420950 }, { "epoch": 1.6273136336224892, "grad_norm": 0.10565955936908722, "learning_rate": 0.0010843581486192323, "loss": 2.2632, "step": 420960 }, { "epoch": 1.6273522908258724, "grad_norm": 0.10742072016000748, "learning_rate": 0.0010841397486515752, "loss": 2.2743, "step": 420970 }, { "epoch": 1.6273909480292557, "grad_norm": 0.10366009920835495, "learning_rate": 0.001083921400752097, "loss": 2.2742, "step": 420980 }, { "epoch": 1.627429605232639, "grad_norm": 0.11645475775003433, "learning_rate": 0.0010837031048835754, "loss": 2.263, "step": 420990 }, { "epoch": 1.6274682624360224, "grad_norm": 0.11208124458789825, "learning_rate": 0.0010834848610088322, "loss": 2.2624, "step": 421000 }, { "epoch": 1.6275069196394056, "grad_norm": 0.11434199661016464, "learning_rate": 0.0010832666690907327, "loss": 2.2651, "step": 421010 }, { "epoch": 1.6275455768427889, "grad_norm": 0.13794845342636108, "learning_rate": 0.0010830485290921881, "loss": 2.2684, "step": 421020 }, { "epoch": 1.6275842340461721, "grad_norm": 0.10673683136701584, "learning_rate": 0.001082830440976152, "loss": 2.2703, "step": 421030 }, { "epoch": 1.6276228912495554, "grad_norm": 0.11057291179895401, "learning_rate": 0.001082612404705623, "loss": 2.2524, "step": 421040 }, { "epoch": 1.6276615484529389, "grad_norm": 0.097368985414505, "learning_rate": 0.001082394420243643, "loss": 2.2665, "step": 421050 }, { "epoch": 1.6277002056563221, "grad_norm": 0.10936520248651505, "learning_rate": 0.0010821764875532987, "loss": 2.2589, "step": 421060 }, { "epoch": 1.6277388628597054, "grad_norm": 0.1091497614979744, "learning_rate": 0.0010819586065977198, "loss": 2.2682, "step": 421070 }, { "epoch": 1.6277775200630886, "grad_norm": 0.10452881455421448, "learning_rate": 0.0010817407773400802, "loss": 2.252, "step": 421080 }, { "epoch": 1.6278161772664719, "grad_norm": 0.1058262288570404, "learning_rate": 0.0010815229997435973, "loss": 2.2656, "step": 421090 }, { "epoch": 1.6278548344698551, "grad_norm": 0.10648338496685028, "learning_rate": 0.0010813052737715318, "loss": 2.2762, "step": 421100 }, { "epoch": 1.6278934916732384, "grad_norm": 0.1072186678647995, "learning_rate": 0.0010810875993871884, "loss": 2.2659, "step": 421110 }, { "epoch": 1.6279321488766216, "grad_norm": 0.10891768336296082, "learning_rate": 0.0010808699765539154, "loss": 2.2588, "step": 421120 }, { "epoch": 1.6279708060800049, "grad_norm": 0.09759233146905899, "learning_rate": 0.0010806524052351038, "loss": 2.2605, "step": 421130 }, { "epoch": 1.6280094632833881, "grad_norm": 0.10803782194852829, "learning_rate": 0.0010804348853941882, "loss": 2.255, "step": 421140 }, { "epoch": 1.6280481204867714, "grad_norm": 0.10355690866708755, "learning_rate": 0.0010802174169946466, "loss": 2.2568, "step": 421150 }, { "epoch": 1.6280867776901546, "grad_norm": 0.09561111778020859, "learning_rate": 0.00108, "loss": 2.2736, "step": 421160 }, { "epoch": 1.6281254348935381, "grad_norm": 0.1167539432644844, "learning_rate": 0.0010797826343738129, "loss": 2.2633, "step": 421170 }, { "epoch": 1.6281640920969214, "grad_norm": 0.11473812162876129, "learning_rate": 0.001079565320079692, "loss": 2.2681, "step": 421180 }, { "epoch": 1.6282027493003046, "grad_norm": 0.11049634218215942, "learning_rate": 0.0010793480570812878, "loss": 2.2605, "step": 421190 }, { "epoch": 1.6282414065036879, "grad_norm": 0.10097356140613556, "learning_rate": 0.001079130845342293, "loss": 2.2665, "step": 421200 }, { "epoch": 1.6282800637070711, "grad_norm": 0.1207822859287262, "learning_rate": 0.0010789136848264435, "loss": 2.2594, "step": 421210 }, { "epoch": 1.6283187209104546, "grad_norm": 0.11211457848548889, "learning_rate": 0.0010786965754975184, "loss": 2.2519, "step": 421220 }, { "epoch": 1.6283573781138379, "grad_norm": 0.10926574468612671, "learning_rate": 0.0010784795173193382, "loss": 2.2675, "step": 421230 }, { "epoch": 1.628396035317221, "grad_norm": 0.11232150346040726, "learning_rate": 0.001078262510255767, "loss": 2.2647, "step": 421240 }, { "epoch": 1.6284346925206044, "grad_norm": 0.1060982421040535, "learning_rate": 0.0010780455542707113, "loss": 2.2656, "step": 421250 }, { "epoch": 1.6284733497239876, "grad_norm": 0.11022268980741501, "learning_rate": 0.0010778286493281198, "loss": 2.2537, "step": 421260 }, { "epoch": 1.6285120069273709, "grad_norm": 0.10144762694835663, "learning_rate": 0.0010776117953919834, "loss": 2.2738, "step": 421270 }, { "epoch": 1.628550664130754, "grad_norm": 0.11215156316757202, "learning_rate": 0.0010773949924263364, "loss": 2.2656, "step": 421280 }, { "epoch": 1.6285893213341374, "grad_norm": 0.1022103950381279, "learning_rate": 0.0010771782403952539, "loss": 2.258, "step": 421290 }, { "epoch": 1.6286279785375206, "grad_norm": 0.10472244024276733, "learning_rate": 0.0010769615392628537, "loss": 2.2595, "step": 421300 }, { "epoch": 1.6286666357409039, "grad_norm": 0.09746657311916351, "learning_rate": 0.0010767448889932967, "loss": 2.2646, "step": 421310 }, { "epoch": 1.6287052929442871, "grad_norm": 0.12648841738700867, "learning_rate": 0.0010765282895507844, "loss": 2.2643, "step": 421320 }, { "epoch": 1.6287439501476704, "grad_norm": 0.10947658121585846, "learning_rate": 0.0010763117408995608, "loss": 2.2662, "step": 421330 }, { "epoch": 1.6287826073510538, "grad_norm": 0.10089129209518433, "learning_rate": 0.0010760952430039121, "loss": 2.2622, "step": 421340 }, { "epoch": 1.628821264554437, "grad_norm": 0.10407378524541855, "learning_rate": 0.0010758787958281663, "loss": 2.2571, "step": 421350 }, { "epoch": 1.6288599217578204, "grad_norm": 0.10484025627374649, "learning_rate": 0.0010756623993366924, "loss": 2.2617, "step": 421360 }, { "epoch": 1.6288985789612036, "grad_norm": 0.09679654240608215, "learning_rate": 0.0010754460534939025, "loss": 2.2711, "step": 421370 }, { "epoch": 1.6289372361645869, "grad_norm": 0.10423847287893295, "learning_rate": 0.0010752297582642487, "loss": 2.278, "step": 421380 }, { "epoch": 1.6289758933679703, "grad_norm": 0.1128976121544838, "learning_rate": 0.001075013513612226, "loss": 2.269, "step": 421390 }, { "epoch": 1.6290145505713536, "grad_norm": 0.10138994455337524, "learning_rate": 0.0010747973195023696, "loss": 2.2543, "step": 421400 }, { "epoch": 1.6290532077747368, "grad_norm": 0.1152886152267456, "learning_rate": 0.001074581175899258, "loss": 2.2649, "step": 421410 }, { "epoch": 1.62909186497812, "grad_norm": 0.11228503286838531, "learning_rate": 0.001074365082767509, "loss": 2.2435, "step": 421420 }, { "epoch": 1.6291305221815033, "grad_norm": 0.10597655922174454, "learning_rate": 0.0010741490400717835, "loss": 2.2545, "step": 421430 }, { "epoch": 1.6291691793848866, "grad_norm": 0.11497277766466141, "learning_rate": 0.0010739330477767819, "loss": 2.2544, "step": 421440 }, { "epoch": 1.6292078365882698, "grad_norm": 0.10805842280387878, "learning_rate": 0.0010737171058472473, "loss": 2.2745, "step": 421450 }, { "epoch": 1.629246493791653, "grad_norm": 0.09347794204950333, "learning_rate": 0.0010735012142479624, "loss": 2.2508, "step": 421460 }, { "epoch": 1.6292851509950363, "grad_norm": 0.10947436839342117, "learning_rate": 0.0010732853729437527, "loss": 2.2613, "step": 421470 }, { "epoch": 1.6293238081984196, "grad_norm": 0.10064587742090225, "learning_rate": 0.0010730695818994827, "loss": 2.27, "step": 421480 }, { "epoch": 1.6293624654018029, "grad_norm": 0.10791762918233871, "learning_rate": 0.0010728538410800592, "loss": 2.2668, "step": 421490 }, { "epoch": 1.629401122605186, "grad_norm": 0.11491987109184265, "learning_rate": 0.0010726381504504296, "loss": 2.2609, "step": 421500 }, { "epoch": 1.6294397798085696, "grad_norm": 0.11298567801713943, "learning_rate": 0.0010724225099755815, "loss": 2.2602, "step": 421510 }, { "epoch": 1.6294784370119528, "grad_norm": 0.10923970490694046, "learning_rate": 0.0010722069196205438, "loss": 2.2569, "step": 421520 }, { "epoch": 1.629517094215336, "grad_norm": 0.10620459914207458, "learning_rate": 0.0010719913793503855, "loss": 2.2606, "step": 421530 }, { "epoch": 1.6295557514187193, "grad_norm": 0.10068482905626297, "learning_rate": 0.0010717758891302168, "loss": 2.2579, "step": 421540 }, { "epoch": 1.6295944086221028, "grad_norm": 0.10469921678304672, "learning_rate": 0.0010715604489251872, "loss": 2.2707, "step": 421550 }, { "epoch": 1.629633065825486, "grad_norm": 0.10165794938802719, "learning_rate": 0.001071345058700488, "loss": 2.2648, "step": 421560 }, { "epoch": 1.6296717230288693, "grad_norm": 0.10368034243583679, "learning_rate": 0.0010711297184213505, "loss": 2.2566, "step": 421570 }, { "epoch": 1.6297103802322526, "grad_norm": 1.6353310346603394, "learning_rate": 0.0010709144280530451, "loss": 2.2658, "step": 421580 }, { "epoch": 1.6297490374356358, "grad_norm": 0.1396324336528778, "learning_rate": 0.0010706991875608844, "loss": 2.2754, "step": 421590 }, { "epoch": 1.629787694639019, "grad_norm": 0.26416754722595215, "learning_rate": 0.00107048399691022, "loss": 2.2707, "step": 421600 }, { "epoch": 1.6298263518424023, "grad_norm": 0.10929203778505325, "learning_rate": 0.0010702688560664433, "loss": 2.2718, "step": 421610 }, { "epoch": 1.6298650090457856, "grad_norm": 0.10347554087638855, "learning_rate": 0.0010700537649949865, "loss": 2.2622, "step": 421620 }, { "epoch": 1.6299036662491688, "grad_norm": 0.10963423550128937, "learning_rate": 0.0010698387236613212, "loss": 2.2673, "step": 421630 }, { "epoch": 1.629942323452552, "grad_norm": 0.10632467269897461, "learning_rate": 0.0010696237320309595, "loss": 2.2679, "step": 421640 }, { "epoch": 1.6299809806559353, "grad_norm": 0.1066928431391716, "learning_rate": 0.0010694087900694528, "loss": 2.2515, "step": 421650 }, { "epoch": 1.6300196378593186, "grad_norm": 0.10093319416046143, "learning_rate": 0.0010691938977423923, "loss": 2.2593, "step": 421660 }, { "epoch": 1.6300582950627018, "grad_norm": 0.11082525551319122, "learning_rate": 0.0010689790550154094, "loss": 2.2578, "step": 421670 }, { "epoch": 1.6300969522660853, "grad_norm": 0.10855924338102341, "learning_rate": 0.0010687642618541747, "loss": 2.2655, "step": 421680 }, { "epoch": 1.6301356094694686, "grad_norm": 0.1240989938378334, "learning_rate": 0.0010685495182243985, "loss": 2.2705, "step": 421690 }, { "epoch": 1.6301742666728518, "grad_norm": 0.1026359423995018, "learning_rate": 0.001068334824091831, "loss": 2.2647, "step": 421700 }, { "epoch": 1.630212923876235, "grad_norm": 0.11498430371284485, "learning_rate": 0.0010681201794222606, "loss": 2.2564, "step": 421710 }, { "epoch": 1.6302515810796185, "grad_norm": 0.11296871304512024, "learning_rate": 0.0010679055841815166, "loss": 2.2658, "step": 421720 }, { "epoch": 1.6302902382830018, "grad_norm": 0.10595811158418655, "learning_rate": 0.0010676910383354667, "loss": 2.2702, "step": 421730 }, { "epoch": 1.630328895486385, "grad_norm": 0.10797400027513504, "learning_rate": 0.0010674765418500188, "loss": 2.2681, "step": 421740 }, { "epoch": 1.6303675526897683, "grad_norm": 0.11341440677642822, "learning_rate": 0.0010672620946911185, "loss": 2.2671, "step": 421750 }, { "epoch": 1.6304062098931515, "grad_norm": 0.107243113219738, "learning_rate": 0.001067047696824752, "loss": 2.2596, "step": 421760 }, { "epoch": 1.6304448670965348, "grad_norm": 0.10347296297550201, "learning_rate": 0.0010668333482169437, "loss": 2.2473, "step": 421770 }, { "epoch": 1.630483524299918, "grad_norm": 0.12594357132911682, "learning_rate": 0.0010666190488337573, "loss": 2.2627, "step": 421780 }, { "epoch": 1.6305221815033013, "grad_norm": 0.1074180155992508, "learning_rate": 0.0010664047986412954, "loss": 2.2652, "step": 421790 }, { "epoch": 1.6305608387066846, "grad_norm": 0.11287180334329605, "learning_rate": 0.0010661905976056998, "loss": 2.2577, "step": 421800 }, { "epoch": 1.6305994959100678, "grad_norm": 0.11506509780883789, "learning_rate": 0.0010659764456931507, "loss": 2.2769, "step": 421810 }, { "epoch": 1.630638153113451, "grad_norm": 0.1361812800168991, "learning_rate": 0.001065762342869867, "loss": 2.2674, "step": 421820 }, { "epoch": 1.6306768103168343, "grad_norm": 0.11115347594022751, "learning_rate": 0.0010655482891021068, "loss": 2.2571, "step": 421830 }, { "epoch": 1.6307154675202176, "grad_norm": 0.11083522439002991, "learning_rate": 0.0010653342843561662, "loss": 2.2825, "step": 421840 }, { "epoch": 1.630754124723601, "grad_norm": 0.10724809020757675, "learning_rate": 0.0010651203285983806, "loss": 2.2671, "step": 421850 }, { "epoch": 1.6307927819269843, "grad_norm": 0.09956831485033035, "learning_rate": 0.0010649064217951234, "loss": 2.2543, "step": 421860 }, { "epoch": 1.6308314391303675, "grad_norm": 0.09386299550533295, "learning_rate": 0.0010646925639128063, "loss": 2.2573, "step": 421870 }, { "epoch": 1.6308700963337508, "grad_norm": 0.10659188777208328, "learning_rate": 0.00106447875491788, "loss": 2.2649, "step": 421880 }, { "epoch": 1.6309087535371343, "grad_norm": 0.10014776140451431, "learning_rate": 0.0010642649947768333, "loss": 2.2568, "step": 421890 }, { "epoch": 1.6309474107405175, "grad_norm": 0.1105884313583374, "learning_rate": 0.0010640512834561929, "loss": 2.2558, "step": 421900 }, { "epoch": 1.6309860679439008, "grad_norm": 0.1113891750574112, "learning_rate": 0.001063837620922524, "loss": 2.2705, "step": 421910 }, { "epoch": 1.631024725147284, "grad_norm": 0.1098250225186348, "learning_rate": 0.00106362400714243, "loss": 2.2734, "step": 421920 }, { "epoch": 1.6310633823506673, "grad_norm": 0.10185971111059189, "learning_rate": 0.0010634104420825525, "loss": 2.2689, "step": 421930 }, { "epoch": 1.6311020395540505, "grad_norm": 0.10226941853761673, "learning_rate": 0.0010631969257095704, "loss": 2.2564, "step": 421940 }, { "epoch": 1.6311406967574338, "grad_norm": 0.11283935606479645, "learning_rate": 0.0010629834579902017, "loss": 2.2797, "step": 421950 }, { "epoch": 1.631179353960817, "grad_norm": 0.10663514584302902, "learning_rate": 0.0010627700388912014, "loss": 2.27, "step": 421960 }, { "epoch": 1.6312180111642003, "grad_norm": 0.10804902017116547, "learning_rate": 0.0010625566683793628, "loss": 2.2518, "step": 421970 }, { "epoch": 1.6312566683675835, "grad_norm": 0.12151705473661423, "learning_rate": 0.0010623433464215166, "loss": 2.2595, "step": 421980 }, { "epoch": 1.6312953255709668, "grad_norm": 0.11235487461090088, "learning_rate": 0.0010621300729845315, "loss": 2.2606, "step": 421990 }, { "epoch": 1.63133398277435, "grad_norm": 0.10612896829843521, "learning_rate": 0.001061916848035314, "loss": 2.2611, "step": 422000 }, { "epoch": 1.6313726399777333, "grad_norm": 0.1074008122086525, "learning_rate": 0.001061703671540808, "loss": 2.2533, "step": 422010 }, { "epoch": 1.6314112971811168, "grad_norm": 0.10118181258440018, "learning_rate": 0.0010614905434679946, "loss": 2.2655, "step": 422020 }, { "epoch": 1.6314499543845, "grad_norm": 0.11081536114215851, "learning_rate": 0.001061277463783893, "loss": 2.2743, "step": 422030 }, { "epoch": 1.6314886115878833, "grad_norm": 0.098418690264225, "learning_rate": 0.0010610644324555599, "loss": 2.2712, "step": 422040 }, { "epoch": 1.6315272687912665, "grad_norm": 0.11335956305265427, "learning_rate": 0.0010608514494500883, "loss": 2.2658, "step": 422050 }, { "epoch": 1.63156592599465, "grad_norm": 0.11804255843162537, "learning_rate": 0.0010606385147346098, "loss": 2.266, "step": 422060 }, { "epoch": 1.6316045831980333, "grad_norm": 0.12196633964776993, "learning_rate": 0.0010604256282762924, "loss": 2.2583, "step": 422070 }, { "epoch": 1.6316432404014165, "grad_norm": 0.10328137874603271, "learning_rate": 0.0010602127900423417, "loss": 2.2675, "step": 422080 }, { "epoch": 1.6316818976047998, "grad_norm": 0.09585174173116684, "learning_rate": 0.0010600000000000002, "loss": 2.283, "step": 422090 }, { "epoch": 1.631720554808183, "grad_norm": 0.10802491754293442, "learning_rate": 0.0010597872581165472, "loss": 2.2461, "step": 422100 }, { "epoch": 1.6317592120115663, "grad_norm": 0.10576233267784119, "learning_rate": 0.0010595745643593002, "loss": 2.2696, "step": 422110 }, { "epoch": 1.6317978692149495, "grad_norm": 0.10108242928981781, "learning_rate": 0.0010593619186956122, "loss": 2.2586, "step": 422120 }, { "epoch": 1.6318365264183328, "grad_norm": 0.11135169863700867, "learning_rate": 0.001059149321092874, "loss": 2.2715, "step": 422130 }, { "epoch": 1.631875183621716, "grad_norm": 0.10790157318115234, "learning_rate": 0.0010589367715185128, "loss": 2.26, "step": 422140 }, { "epoch": 1.6319138408250993, "grad_norm": 0.11493754386901855, "learning_rate": 0.0010587242699399927, "loss": 2.2616, "step": 422150 }, { "epoch": 1.6319524980284825, "grad_norm": 0.1047576367855072, "learning_rate": 0.0010585118163248144, "loss": 2.2642, "step": 422160 }, { "epoch": 1.6319911552318658, "grad_norm": 0.123683400452137, "learning_rate": 0.0010582994106405157, "loss": 2.2612, "step": 422170 }, { "epoch": 1.632029812435249, "grad_norm": 0.10044895112514496, "learning_rate": 0.0010580870528546706, "loss": 2.2581, "step": 422180 }, { "epoch": 1.6320684696386325, "grad_norm": 0.10729312151670456, "learning_rate": 0.00105787474293489, "loss": 2.2666, "step": 422190 }, { "epoch": 1.6321071268420158, "grad_norm": 0.09733797609806061, "learning_rate": 0.0010576624808488204, "loss": 2.2641, "step": 422200 }, { "epoch": 1.632145784045399, "grad_norm": 0.10946942120790482, "learning_rate": 0.0010574502665641457, "loss": 2.2657, "step": 422210 }, { "epoch": 1.6321844412487823, "grad_norm": 0.14660519361495972, "learning_rate": 0.001057238100048586, "loss": 2.2738, "step": 422220 }, { "epoch": 1.6322230984521657, "grad_norm": 0.1073886975646019, "learning_rate": 0.0010570259812698972, "loss": 2.2757, "step": 422230 }, { "epoch": 1.632261755655549, "grad_norm": 0.10828237980604172, "learning_rate": 0.0010568139101958724, "loss": 2.2506, "step": 422240 }, { "epoch": 1.6323004128589322, "grad_norm": 0.11324787884950638, "learning_rate": 0.0010566018867943397, "loss": 2.2758, "step": 422250 }, { "epoch": 1.6323390700623155, "grad_norm": 0.1002441793680191, "learning_rate": 0.001056389911033164, "loss": 2.2523, "step": 422260 }, { "epoch": 1.6323777272656987, "grad_norm": 0.10068082809448242, "learning_rate": 0.0010561779828802468, "loss": 2.2522, "step": 422270 }, { "epoch": 1.632416384469082, "grad_norm": 0.10656104981899261, "learning_rate": 0.0010559661023035244, "loss": 2.261, "step": 422280 }, { "epoch": 1.6324550416724652, "grad_norm": 0.10906139016151428, "learning_rate": 0.0010557542692709698, "loss": 2.2323, "step": 422290 }, { "epoch": 1.6324936988758485, "grad_norm": 0.10339491069316864, "learning_rate": 0.0010555424837505926, "loss": 2.2685, "step": 422300 }, { "epoch": 1.6325323560792318, "grad_norm": 0.1109342947602272, "learning_rate": 0.0010553307457104367, "loss": 2.2719, "step": 422310 }, { "epoch": 1.632571013282615, "grad_norm": 0.11206500977277756, "learning_rate": 0.0010551190551185824, "loss": 2.2628, "step": 422320 }, { "epoch": 1.6326096704859983, "grad_norm": 0.13042597472667694, "learning_rate": 0.0010549074119431472, "loss": 2.2602, "step": 422330 }, { "epoch": 1.6326483276893815, "grad_norm": 0.10283195972442627, "learning_rate": 0.001054695816152282, "loss": 2.2567, "step": 422340 }, { "epoch": 1.6326869848927648, "grad_norm": 0.1011483371257782, "learning_rate": 0.001054484267714175, "loss": 2.2665, "step": 422350 }, { "epoch": 1.6327256420961482, "grad_norm": 0.10999699681997299, "learning_rate": 0.001054272766597049, "loss": 2.2669, "step": 422360 }, { "epoch": 1.6327642992995315, "grad_norm": 0.10112316906452179, "learning_rate": 0.0010540613127691626, "loss": 2.2715, "step": 422370 }, { "epoch": 1.6328029565029147, "grad_norm": 0.10901953279972076, "learning_rate": 0.0010538499061988104, "loss": 2.2596, "step": 422380 }, { "epoch": 1.632841613706298, "grad_norm": 0.11298448592424393, "learning_rate": 0.0010536385468543218, "loss": 2.2585, "step": 422390 }, { "epoch": 1.6328802709096815, "grad_norm": 0.10417155176401138, "learning_rate": 0.0010534272347040615, "loss": 2.2661, "step": 422400 }, { "epoch": 1.6329189281130647, "grad_norm": 0.10402239859104156, "learning_rate": 0.0010532159697164301, "loss": 2.2584, "step": 422410 }, { "epoch": 1.632957585316448, "grad_norm": 0.10203685611486435, "learning_rate": 0.001053004751859863, "loss": 2.2659, "step": 422420 }, { "epoch": 1.6329962425198312, "grad_norm": 0.10778295248746872, "learning_rate": 0.0010527935811028306, "loss": 2.2572, "step": 422430 }, { "epoch": 1.6330348997232145, "grad_norm": 0.10750208795070648, "learning_rate": 0.0010525824574138393, "loss": 2.2648, "step": 422440 }, { "epoch": 1.6330735569265977, "grad_norm": 0.12133093178272247, "learning_rate": 0.0010523713807614294, "loss": 2.2556, "step": 422450 }, { "epoch": 1.633112214129981, "grad_norm": 0.12352251261472702, "learning_rate": 0.0010521603511141771, "loss": 2.2641, "step": 422460 }, { "epoch": 1.6331508713333642, "grad_norm": 0.10133741050958633, "learning_rate": 0.0010519493684406935, "loss": 2.2601, "step": 422470 }, { "epoch": 1.6331895285367475, "grad_norm": 0.10464505851268768, "learning_rate": 0.0010517384327096242, "loss": 2.2511, "step": 422480 }, { "epoch": 1.6332281857401307, "grad_norm": 0.10249783098697662, "learning_rate": 0.00105152754388965, "loss": 2.2537, "step": 422490 }, { "epoch": 1.633266842943514, "grad_norm": 0.10035975277423859, "learning_rate": 0.0010513167019494862, "loss": 2.2676, "step": 422500 }, { "epoch": 1.6333055001468972, "grad_norm": 0.10722977668046951, "learning_rate": 0.0010511059068578832, "loss": 2.2558, "step": 422510 }, { "epoch": 1.6333441573502805, "grad_norm": 0.10174937546253204, "learning_rate": 0.0010508951585836263, "loss": 2.2707, "step": 422520 }, { "epoch": 1.633382814553664, "grad_norm": 0.10561282187700272, "learning_rate": 0.0010506844570955345, "loss": 2.251, "step": 422530 }, { "epoch": 1.6334214717570472, "grad_norm": 0.1309254914522171, "learning_rate": 0.0010504738023624624, "loss": 2.2728, "step": 422540 }, { "epoch": 1.6334601289604305, "grad_norm": 0.11312398314476013, "learning_rate": 0.0010502631943532987, "loss": 2.2879, "step": 422550 }, { "epoch": 1.6334987861638137, "grad_norm": 0.10548081248998642, "learning_rate": 0.0010500526330369667, "loss": 2.2589, "step": 422560 }, { "epoch": 1.6335374433671972, "grad_norm": 0.10877972841262817, "learning_rate": 0.001049842118382424, "loss": 2.2461, "step": 422570 }, { "epoch": 1.6335761005705804, "grad_norm": 0.14530453085899353, "learning_rate": 0.0010496316503586622, "loss": 2.2595, "step": 422580 }, { "epoch": 1.6336147577739637, "grad_norm": 0.10653182119131088, "learning_rate": 0.0010494212289347085, "loss": 2.265, "step": 422590 }, { "epoch": 1.633653414977347, "grad_norm": 0.10831808298826218, "learning_rate": 0.001049210854079623, "loss": 2.2501, "step": 422600 }, { "epoch": 1.6336920721807302, "grad_norm": 0.10424677282571793, "learning_rate": 0.0010490005257625008, "loss": 2.2619, "step": 422610 }, { "epoch": 1.6337307293841135, "grad_norm": 0.1058325320482254, "learning_rate": 0.0010487902439524708, "loss": 2.2685, "step": 422620 }, { "epoch": 1.6337693865874967, "grad_norm": 0.10791575163602829, "learning_rate": 0.0010485800086186964, "loss": 2.271, "step": 422630 }, { "epoch": 1.63380804379088, "grad_norm": 0.10271866619586945, "learning_rate": 0.0010483698197303745, "loss": 2.2712, "step": 422640 }, { "epoch": 1.6338467009942632, "grad_norm": 0.10433712601661682, "learning_rate": 0.0010481596772567365, "loss": 2.2559, "step": 422650 }, { "epoch": 1.6338853581976465, "grad_norm": 0.1071852296590805, "learning_rate": 0.0010479495811670477, "loss": 2.2714, "step": 422660 }, { "epoch": 1.6339240154010297, "grad_norm": 0.10642591118812561, "learning_rate": 0.001047739531430607, "loss": 2.2587, "step": 422670 }, { "epoch": 1.633962672604413, "grad_norm": 0.10073532909154892, "learning_rate": 0.0010475295280167475, "loss": 2.2643, "step": 422680 }, { "epoch": 1.6340013298077962, "grad_norm": 0.11454087495803833, "learning_rate": 0.0010473195708948359, "loss": 2.2568, "step": 422690 }, { "epoch": 1.6340399870111797, "grad_norm": 0.11369120329618454, "learning_rate": 0.0010471096600342725, "loss": 2.2581, "step": 422700 }, { "epoch": 1.634078644214563, "grad_norm": 0.11510149389505386, "learning_rate": 0.0010468997954044916, "loss": 2.2638, "step": 422710 }, { "epoch": 1.6341173014179462, "grad_norm": 0.10187017917633057, "learning_rate": 0.0010466899769749612, "loss": 2.283, "step": 422720 }, { "epoch": 1.6341559586213295, "grad_norm": 0.10251758247613907, "learning_rate": 0.0010464802047151828, "loss": 2.249, "step": 422730 }, { "epoch": 1.634194615824713, "grad_norm": 0.10828348249197006, "learning_rate": 0.0010462704785946908, "loss": 2.2592, "step": 422740 }, { "epoch": 1.6342332730280962, "grad_norm": 0.11212880164384842, "learning_rate": 0.0010460607985830544, "loss": 2.277, "step": 422750 }, { "epoch": 1.6342719302314794, "grad_norm": 0.12863798439502716, "learning_rate": 0.0010458511646498749, "loss": 2.252, "step": 422760 }, { "epoch": 1.6343105874348627, "grad_norm": 0.1167854368686676, "learning_rate": 0.001045641576764788, "loss": 2.2646, "step": 422770 }, { "epoch": 1.634349244638246, "grad_norm": 0.10574250668287277, "learning_rate": 0.001045432034897462, "loss": 2.2737, "step": 422780 }, { "epoch": 1.6343879018416292, "grad_norm": 0.09611816704273224, "learning_rate": 0.0010452225390175994, "loss": 2.2563, "step": 422790 }, { "epoch": 1.6344265590450124, "grad_norm": 0.18568646907806396, "learning_rate": 0.0010450130890949341, "loss": 2.2791, "step": 422800 }, { "epoch": 1.6344652162483957, "grad_norm": 0.09252162277698517, "learning_rate": 0.0010448036850992358, "loss": 2.2496, "step": 422810 }, { "epoch": 1.634503873451779, "grad_norm": 0.1045931875705719, "learning_rate": 0.0010445943270003051, "loss": 2.2759, "step": 422820 }, { "epoch": 1.6345425306551622, "grad_norm": 0.1014002189040184, "learning_rate": 0.0010443850147679767, "loss": 2.2751, "step": 422830 }, { "epoch": 1.6345811878585454, "grad_norm": 0.11607425659894943, "learning_rate": 0.0010441757483721184, "loss": 2.2571, "step": 422840 }, { "epoch": 1.6346198450619287, "grad_norm": 0.10826249420642853, "learning_rate": 0.0010439665277826304, "loss": 2.2559, "step": 422850 }, { "epoch": 1.634658502265312, "grad_norm": 0.10467202216386795, "learning_rate": 0.0010437573529694464, "loss": 2.2675, "step": 422860 }, { "epoch": 1.6346971594686954, "grad_norm": 0.11609058082103729, "learning_rate": 0.0010435482239025324, "loss": 2.2568, "step": 422870 }, { "epoch": 1.6347358166720787, "grad_norm": 0.10561811178922653, "learning_rate": 0.0010433391405518882, "loss": 2.2669, "step": 422880 }, { "epoch": 1.634774473875462, "grad_norm": 0.10006106644868851, "learning_rate": 0.0010431301028875452, "loss": 2.2414, "step": 422890 }, { "epoch": 1.6348131310788452, "grad_norm": 0.12403450161218643, "learning_rate": 0.001042921110879568, "loss": 2.2507, "step": 422900 }, { "epoch": 1.6348517882822287, "grad_norm": 0.09937925636768341, "learning_rate": 0.0010427121644980546, "loss": 2.2478, "step": 422910 }, { "epoch": 1.634890445485612, "grad_norm": 0.09592811018228531, "learning_rate": 0.0010425032637131342, "loss": 2.2595, "step": 422920 }, { "epoch": 1.6349291026889952, "grad_norm": 0.12300334125757217, "learning_rate": 0.0010422944084949697, "loss": 2.2618, "step": 422930 }, { "epoch": 1.6349677598923784, "grad_norm": 0.11170656234025955, "learning_rate": 0.0010420855988137563, "loss": 2.2604, "step": 422940 }, { "epoch": 1.6350064170957617, "grad_norm": 0.10168015211820602, "learning_rate": 0.0010418768346397212, "loss": 2.2627, "step": 422950 }, { "epoch": 1.635045074299145, "grad_norm": 0.11053097248077393, "learning_rate": 0.001041668115943125, "loss": 2.2479, "step": 422960 }, { "epoch": 1.6350837315025282, "grad_norm": 0.12322186678647995, "learning_rate": 0.001041459442694259, "loss": 2.2623, "step": 422970 }, { "epoch": 1.6351223887059114, "grad_norm": 0.0941690057516098, "learning_rate": 0.0010412508148634494, "loss": 2.2497, "step": 422980 }, { "epoch": 1.6351610459092947, "grad_norm": 0.10818459838628769, "learning_rate": 0.0010410422324210519, "loss": 2.2528, "step": 422990 }, { "epoch": 1.635199703112678, "grad_norm": 0.09662491828203201, "learning_rate": 0.0010408336953374561, "loss": 2.2543, "step": 423000 }, { "epoch": 1.6352383603160612, "grad_norm": 0.10936406254768372, "learning_rate": 0.0010406252035830837, "loss": 2.2542, "step": 423010 }, { "epoch": 1.6352770175194444, "grad_norm": 0.10065805166959763, "learning_rate": 0.0010404167571283875, "loss": 2.2614, "step": 423020 }, { "epoch": 1.635315674722828, "grad_norm": 0.10415294766426086, "learning_rate": 0.0010402083559438537, "loss": 2.257, "step": 423030 }, { "epoch": 1.6353543319262112, "grad_norm": 0.11044905334711075, "learning_rate": 0.0010400000000000001, "loss": 2.2656, "step": 423040 }, { "epoch": 1.6353929891295944, "grad_norm": 0.11244837939739227, "learning_rate": 0.0010397916892673758, "loss": 2.265, "step": 423050 }, { "epoch": 1.6354316463329777, "grad_norm": 0.11220791190862656, "learning_rate": 0.0010395834237165624, "loss": 2.271, "step": 423060 }, { "epoch": 1.635470303536361, "grad_norm": 0.1089065745472908, "learning_rate": 0.0010393752033181739, "loss": 2.2728, "step": 423070 }, { "epoch": 1.6355089607397444, "grad_norm": 0.09747709333896637, "learning_rate": 0.0010391670280428548, "loss": 2.2708, "step": 423080 }, { "epoch": 1.6355476179431276, "grad_norm": 0.1153540313243866, "learning_rate": 0.001038958897861283, "loss": 2.2625, "step": 423090 }, { "epoch": 1.635586275146511, "grad_norm": 0.11073118448257446, "learning_rate": 0.0010387508127441666, "loss": 2.268, "step": 423100 }, { "epoch": 1.6356249323498941, "grad_norm": 0.11345057934522629, "learning_rate": 0.0010385427726622466, "loss": 2.2607, "step": 423110 }, { "epoch": 1.6356635895532774, "grad_norm": 0.11611675471067429, "learning_rate": 0.0010383347775862954, "loss": 2.2599, "step": 423120 }, { "epoch": 1.6357022467566606, "grad_norm": 0.10179933160543442, "learning_rate": 0.0010381268274871163, "loss": 2.2498, "step": 423130 }, { "epoch": 1.635740903960044, "grad_norm": 0.1067461371421814, "learning_rate": 0.0010379189223355446, "loss": 2.2581, "step": 423140 }, { "epoch": 1.6357795611634272, "grad_norm": 0.10228580236434937, "learning_rate": 0.0010377110621024475, "loss": 2.2611, "step": 423150 }, { "epoch": 1.6358182183668104, "grad_norm": 0.11855067312717438, "learning_rate": 0.0010375032467587228, "loss": 2.2478, "step": 423160 }, { "epoch": 1.6358568755701937, "grad_norm": 0.10486794263124466, "learning_rate": 0.0010372954762753008, "loss": 2.2541, "step": 423170 }, { "epoch": 1.635895532773577, "grad_norm": 0.1444479078054428, "learning_rate": 0.001037087750623142, "loss": 2.2658, "step": 423180 }, { "epoch": 1.6359341899769602, "grad_norm": 0.10084392130374908, "learning_rate": 0.0010368800697732397, "loss": 2.2671, "step": 423190 }, { "epoch": 1.6359728471803436, "grad_norm": 0.11010993272066116, "learning_rate": 0.0010366724336966161, "loss": 2.2501, "step": 423200 }, { "epoch": 1.6360115043837269, "grad_norm": 0.10917601734399796, "learning_rate": 0.0010364648423643278, "loss": 2.2724, "step": 423210 }, { "epoch": 1.6360501615871101, "grad_norm": 0.11198030412197113, "learning_rate": 0.0010362572957474594, "loss": 2.2782, "step": 423220 }, { "epoch": 1.6360888187904934, "grad_norm": 0.11350420862436295, "learning_rate": 0.0010360497938171286, "loss": 2.2729, "step": 423230 }, { "epoch": 1.6361274759938766, "grad_norm": 0.10055453330278397, "learning_rate": 0.001035842336544484, "loss": 2.2706, "step": 423240 }, { "epoch": 1.6361661331972601, "grad_norm": 0.10071729868650436, "learning_rate": 0.0010356349239007043, "loss": 2.2718, "step": 423250 }, { "epoch": 1.6362047904006434, "grad_norm": 0.1166156753897667, "learning_rate": 0.0010354275558570004, "loss": 2.2711, "step": 423260 }, { "epoch": 1.6362434476040266, "grad_norm": 0.10396468639373779, "learning_rate": 0.0010352202323846132, "loss": 2.2673, "step": 423270 }, { "epoch": 1.6362821048074099, "grad_norm": 0.11050568521022797, "learning_rate": 0.0010350129534548145, "loss": 2.2595, "step": 423280 }, { "epoch": 1.6363207620107931, "grad_norm": 0.12434356659650803, "learning_rate": 0.0010348057190389078, "loss": 2.2524, "step": 423290 }, { "epoch": 1.6363594192141764, "grad_norm": 0.09937973320484161, "learning_rate": 0.0010345985291082265, "loss": 2.2495, "step": 423300 }, { "epoch": 1.6363980764175596, "grad_norm": 0.10541477799415588, "learning_rate": 0.001034391383634135, "loss": 2.2456, "step": 423310 }, { "epoch": 1.6364367336209429, "grad_norm": 0.09955312311649323, "learning_rate": 0.0010341842825880292, "loss": 2.2529, "step": 423320 }, { "epoch": 1.6364753908243261, "grad_norm": 0.11587560921907425, "learning_rate": 0.0010339772259413342, "loss": 2.2644, "step": 423330 }, { "epoch": 1.6365140480277094, "grad_norm": 0.11401667445898056, "learning_rate": 0.0010337702136655072, "loss": 2.2581, "step": 423340 }, { "epoch": 1.6365527052310926, "grad_norm": 0.10360361635684967, "learning_rate": 0.0010335632457320343, "loss": 2.2647, "step": 423350 }, { "epoch": 1.636591362434476, "grad_norm": 0.10124827921390533, "learning_rate": 0.0010333563221124342, "loss": 2.2672, "step": 423360 }, { "epoch": 1.6366300196378594, "grad_norm": 0.12644001841545105, "learning_rate": 0.0010331494427782543, "loss": 2.2465, "step": 423370 }, { "epoch": 1.6366686768412426, "grad_norm": 0.11874937266111374, "learning_rate": 0.0010329426077010733, "loss": 2.2641, "step": 423380 }, { "epoch": 1.6367073340446259, "grad_norm": 0.10666025429964066, "learning_rate": 0.0010327358168525003, "loss": 2.2589, "step": 423390 }, { "epoch": 1.6367459912480091, "grad_norm": 0.10593769699335098, "learning_rate": 0.001032529070204174, "loss": 2.2528, "step": 423400 }, { "epoch": 1.6367846484513926, "grad_norm": 0.13542486727237701, "learning_rate": 0.0010323223677277645, "loss": 2.2633, "step": 423410 }, { "epoch": 1.6368233056547759, "grad_norm": 0.11956963688135147, "learning_rate": 0.001032115709394971, "loss": 2.2643, "step": 423420 }, { "epoch": 1.636861962858159, "grad_norm": 0.1231904923915863, "learning_rate": 0.0010319090951775242, "loss": 2.2479, "step": 423430 }, { "epoch": 1.6369006200615424, "grad_norm": 0.10609181225299835, "learning_rate": 0.0010317025250471837, "loss": 2.2597, "step": 423440 }, { "epoch": 1.6369392772649256, "grad_norm": 0.1126595214009285, "learning_rate": 0.00103149599897574, "loss": 2.266, "step": 423450 }, { "epoch": 1.6369779344683089, "grad_norm": 0.12165053188800812, "learning_rate": 0.0010312895169350131, "loss": 2.2647, "step": 423460 }, { "epoch": 1.6370165916716921, "grad_norm": 0.10681026428937912, "learning_rate": 0.0010310830788968542, "loss": 2.254, "step": 423470 }, { "epoch": 1.6370552488750754, "grad_norm": 0.1236894503235817, "learning_rate": 0.0010308766848331425, "loss": 2.2657, "step": 423480 }, { "epoch": 1.6370939060784586, "grad_norm": 0.12302964925765991, "learning_rate": 0.0010306703347157894, "loss": 2.2761, "step": 423490 }, { "epoch": 1.6371325632818419, "grad_norm": 0.1260789930820465, "learning_rate": 0.0010304640285167341, "loss": 2.2721, "step": 423500 }, { "epoch": 1.6371712204852251, "grad_norm": 0.10569873452186584, "learning_rate": 0.0010302577662079475, "loss": 2.2352, "step": 423510 }, { "epoch": 1.6372098776886084, "grad_norm": 0.10717001557350159, "learning_rate": 0.0010300515477614288, "loss": 2.2717, "step": 423520 }, { "epoch": 1.6372485348919916, "grad_norm": 0.10829834640026093, "learning_rate": 0.0010298453731492076, "loss": 2.2524, "step": 423530 }, { "epoch": 1.637287192095375, "grad_norm": 0.10441777855157852, "learning_rate": 0.0010296392423433437, "loss": 2.2603, "step": 423540 }, { "epoch": 1.6373258492987584, "grad_norm": 0.11129403859376907, "learning_rate": 0.0010294331553159257, "loss": 2.2615, "step": 423550 }, { "epoch": 1.6373645065021416, "grad_norm": 0.09813987463712692, "learning_rate": 0.0010292271120390724, "loss": 2.2669, "step": 423560 }, { "epoch": 1.6374031637055249, "grad_norm": 0.10924495756626129, "learning_rate": 0.0010290211124849315, "loss": 2.2598, "step": 423570 }, { "epoch": 1.6374418209089083, "grad_norm": 0.1084313839673996, "learning_rate": 0.0010288151566256812, "loss": 2.2558, "step": 423580 }, { "epoch": 1.6374804781122916, "grad_norm": 0.1174166277050972, "learning_rate": 0.001028609244433529, "loss": 2.2696, "step": 423590 }, { "epoch": 1.6375191353156748, "grad_norm": 0.11019670218229294, "learning_rate": 0.0010284033758807105, "loss": 2.2579, "step": 423600 }, { "epoch": 1.637557792519058, "grad_norm": 0.10024767369031906, "learning_rate": 0.0010281975509394928, "loss": 2.2658, "step": 423610 }, { "epoch": 1.6375964497224413, "grad_norm": 0.10447598993778229, "learning_rate": 0.0010279917695821706, "loss": 2.2696, "step": 423620 }, { "epoch": 1.6376351069258246, "grad_norm": 0.10102497041225433, "learning_rate": 0.0010277860317810692, "loss": 2.2516, "step": 423630 }, { "epoch": 1.6376737641292078, "grad_norm": 0.11171722412109375, "learning_rate": 0.0010275803375085425, "loss": 2.2462, "step": 423640 }, { "epoch": 1.637712421332591, "grad_norm": 0.10647977143526077, "learning_rate": 0.0010273746867369737, "loss": 2.2583, "step": 423650 }, { "epoch": 1.6377510785359743, "grad_norm": 0.10822014510631561, "learning_rate": 0.0010271690794387753, "loss": 2.2726, "step": 423660 }, { "epoch": 1.6377897357393576, "grad_norm": 0.13143102824687958, "learning_rate": 0.0010269635155863888, "loss": 2.2565, "step": 423670 }, { "epoch": 1.6378283929427409, "grad_norm": 0.10811565816402435, "learning_rate": 0.001026757995152285, "loss": 2.2624, "step": 423680 }, { "epoch": 1.637867050146124, "grad_norm": 0.13415491580963135, "learning_rate": 0.0010265525181089635, "loss": 2.2618, "step": 423690 }, { "epoch": 1.6379057073495074, "grad_norm": 0.10379686206579208, "learning_rate": 0.0010263470844289531, "loss": 2.2542, "step": 423700 }, { "epoch": 1.6379443645528908, "grad_norm": 0.1132299154996872, "learning_rate": 0.001026141694084812, "loss": 2.2588, "step": 423710 }, { "epoch": 1.637983021756274, "grad_norm": 0.11209876835346222, "learning_rate": 0.0010259363470491265, "loss": 2.2782, "step": 423720 }, { "epoch": 1.6380216789596573, "grad_norm": 0.10772533714771271, "learning_rate": 0.0010257310432945119, "loss": 2.2668, "step": 423730 }, { "epoch": 1.6380603361630406, "grad_norm": 0.09981733560562134, "learning_rate": 0.0010255257827936136, "loss": 2.245, "step": 423740 }, { "epoch": 1.638098993366424, "grad_norm": 0.1126519963145256, "learning_rate": 0.001025320565519104, "loss": 2.2495, "step": 423750 }, { "epoch": 1.6381376505698073, "grad_norm": 0.09858756512403488, "learning_rate": 0.0010251153914436848, "loss": 2.254, "step": 423760 }, { "epoch": 1.6381763077731906, "grad_norm": 0.11347083002328873, "learning_rate": 0.0010249102605400875, "loss": 2.2546, "step": 423770 }, { "epoch": 1.6382149649765738, "grad_norm": 0.11383738368749619, "learning_rate": 0.0010247051727810712, "loss": 2.2639, "step": 423780 }, { "epoch": 1.638253622179957, "grad_norm": 0.10293114185333252, "learning_rate": 0.0010245001281394242, "loss": 2.2554, "step": 423790 }, { "epoch": 1.6382922793833403, "grad_norm": 0.10788684338331223, "learning_rate": 0.0010242951265879627, "loss": 2.27, "step": 423800 }, { "epoch": 1.6383309365867236, "grad_norm": 0.11021386086940765, "learning_rate": 0.0010240901680995318, "loss": 2.2539, "step": 423810 }, { "epoch": 1.6383695937901068, "grad_norm": 0.09868684411048889, "learning_rate": 0.0010238852526470057, "loss": 2.2646, "step": 423820 }, { "epoch": 1.63840825099349, "grad_norm": 0.09737741947174072, "learning_rate": 0.001023680380203286, "loss": 2.2497, "step": 423830 }, { "epoch": 1.6384469081968733, "grad_norm": 0.0985494926571846, "learning_rate": 0.0010234755507413037, "loss": 2.2438, "step": 423840 }, { "epoch": 1.6384855654002566, "grad_norm": 0.11383437365293503, "learning_rate": 0.0010232707642340176, "loss": 2.2494, "step": 423850 }, { "epoch": 1.6385242226036398, "grad_norm": 0.1099669560790062, "learning_rate": 0.001023066020654415, "loss": 2.2718, "step": 423860 }, { "epoch": 1.638562879807023, "grad_norm": 0.11040462553501129, "learning_rate": 0.0010228613199755113, "loss": 2.2524, "step": 423870 }, { "epoch": 1.6386015370104066, "grad_norm": 0.13061681389808655, "learning_rate": 0.0010226566621703505, "loss": 2.2567, "step": 423880 }, { "epoch": 1.6386401942137898, "grad_norm": 0.0982203334569931, "learning_rate": 0.001022452047212005, "loss": 2.2596, "step": 423890 }, { "epoch": 1.638678851417173, "grad_norm": 0.10891470313072205, "learning_rate": 0.0010222474750735749, "loss": 2.272, "step": 423900 }, { "epoch": 1.6387175086205563, "grad_norm": 0.11531899124383926, "learning_rate": 0.001022042945728188, "loss": 2.2588, "step": 423910 }, { "epoch": 1.6387561658239398, "grad_norm": 0.11567877233028412, "learning_rate": 0.0010218384591490013, "loss": 2.2682, "step": 423920 }, { "epoch": 1.638794823027323, "grad_norm": 0.1000378280878067, "learning_rate": 0.0010216340153091994, "loss": 2.2711, "step": 423930 }, { "epoch": 1.6388334802307063, "grad_norm": 0.10007666051387787, "learning_rate": 0.0010214296141819945, "loss": 2.2551, "step": 423940 }, { "epoch": 1.6388721374340895, "grad_norm": 0.11694041639566422, "learning_rate": 0.0010212252557406276, "loss": 2.2614, "step": 423950 }, { "epoch": 1.6389107946374728, "grad_norm": 0.10663101822137833, "learning_rate": 0.0010210209399583667, "loss": 2.2754, "step": 423960 }, { "epoch": 1.638949451840856, "grad_norm": 0.10280250757932663, "learning_rate": 0.0010208166668085083, "loss": 2.2705, "step": 423970 }, { "epoch": 1.6389881090442393, "grad_norm": 0.11807078868150711, "learning_rate": 0.0010206124362643766, "loss": 2.2682, "step": 423980 }, { "epoch": 1.6390267662476226, "grad_norm": 0.11073262244462967, "learning_rate": 0.0010204082482993236, "loss": 2.2518, "step": 423990 }, { "epoch": 1.6390654234510058, "grad_norm": 0.12303231656551361, "learning_rate": 0.001020204102886729, "loss": 2.2481, "step": 424000 }, { "epoch": 1.639104080654389, "grad_norm": 0.10889635235071182, "learning_rate": 0.00102, "loss": 2.2615, "step": 424010 }, { "epoch": 1.6391427378577723, "grad_norm": 0.09901122748851776, "learning_rate": 0.0010197959396125724, "loss": 2.2585, "step": 424020 }, { "epoch": 1.6391813950611556, "grad_norm": 0.10052678734064102, "learning_rate": 0.0010195919216979083, "loss": 2.2644, "step": 424030 }, { "epoch": 1.6392200522645388, "grad_norm": 0.11836764216423035, "learning_rate": 0.0010193879462294991, "loss": 2.2634, "step": 424040 }, { "epoch": 1.6392587094679223, "grad_norm": 0.10381746292114258, "learning_rate": 0.0010191840131808618, "loss": 2.2557, "step": 424050 }, { "epoch": 1.6392973666713055, "grad_norm": 0.11419299244880676, "learning_rate": 0.001018980122525542, "loss": 2.2556, "step": 424060 }, { "epoch": 1.6393360238746888, "grad_norm": 0.10854178667068481, "learning_rate": 0.0010187762742371138, "loss": 2.2699, "step": 424070 }, { "epoch": 1.639374681078072, "grad_norm": 0.1035003736615181, "learning_rate": 0.0010185724682891762, "loss": 2.2581, "step": 424080 }, { "epoch": 1.6394133382814555, "grad_norm": 0.1015508621931076, "learning_rate": 0.001018368704655358, "loss": 2.2584, "step": 424090 }, { "epoch": 1.6394519954848388, "grad_norm": 0.10948213934898376, "learning_rate": 0.0010181649833093138, "loss": 2.2237, "step": 424100 }, { "epoch": 1.639490652688222, "grad_norm": 0.1012764498591423, "learning_rate": 0.0010179613042247265, "loss": 2.2599, "step": 424110 }, { "epoch": 1.6395293098916053, "grad_norm": 0.1120927631855011, "learning_rate": 0.0010177576673753063, "loss": 2.2643, "step": 424120 }, { "epoch": 1.6395679670949885, "grad_norm": 0.1091916486620903, "learning_rate": 0.0010175540727347893, "loss": 2.2615, "step": 424130 }, { "epoch": 1.6396066242983718, "grad_norm": 0.09791362285614014, "learning_rate": 0.0010173505202769402, "loss": 2.2714, "step": 424140 }, { "epoch": 1.639645281501755, "grad_norm": 0.0960848480463028, "learning_rate": 0.001017147009975551, "loss": 2.25, "step": 424150 }, { "epoch": 1.6396839387051383, "grad_norm": 0.10298629850149155, "learning_rate": 0.0010169435418044394, "loss": 2.2562, "step": 424160 }, { "epoch": 1.6397225959085215, "grad_norm": 0.09647177904844284, "learning_rate": 0.0010167401157374517, "loss": 2.2334, "step": 424170 }, { "epoch": 1.6397612531119048, "grad_norm": 0.09860710054636002, "learning_rate": 0.0010165367317484604, "loss": 2.2593, "step": 424180 }, { "epoch": 1.639799910315288, "grad_norm": 0.10834896564483643, "learning_rate": 0.001016333389811365, "loss": 2.2657, "step": 424190 }, { "epoch": 1.6398385675186713, "grad_norm": 0.11073605716228485, "learning_rate": 0.0010161300899000926, "loss": 2.2648, "step": 424200 }, { "epoch": 1.6398772247220545, "grad_norm": 0.10061077773571014, "learning_rate": 0.0010159268319885966, "loss": 2.2712, "step": 424210 }, { "epoch": 1.639915881925438, "grad_norm": 0.09821108728647232, "learning_rate": 0.0010157236160508573, "loss": 2.2819, "step": 424220 }, { "epoch": 1.6399545391288213, "grad_norm": 0.1119464710354805, "learning_rate": 0.0010155204420608825, "loss": 2.2598, "step": 424230 }, { "epoch": 1.6399931963322045, "grad_norm": 0.11146452277898788, "learning_rate": 0.0010153173099927062, "loss": 2.2582, "step": 424240 }, { "epoch": 1.6400318535355878, "grad_norm": 0.12815554440021515, "learning_rate": 0.0010151142198203894, "loss": 2.2592, "step": 424250 }, { "epoch": 1.6400705107389713, "grad_norm": 0.10622846335172653, "learning_rate": 0.00101491117151802, "loss": 2.2555, "step": 424260 }, { "epoch": 1.6401091679423545, "grad_norm": 0.10454724729061127, "learning_rate": 0.0010147081650597118, "loss": 2.26, "step": 424270 }, { "epoch": 1.6401478251457378, "grad_norm": 0.10535355657339096, "learning_rate": 0.0010145052004196064, "loss": 2.2573, "step": 424280 }, { "epoch": 1.640186482349121, "grad_norm": 0.11934984475374222, "learning_rate": 0.0010143022775718716, "loss": 2.2567, "step": 424290 }, { "epoch": 1.6402251395525043, "grad_norm": 0.12079385668039322, "learning_rate": 0.001014099396490701, "loss": 2.2664, "step": 424300 }, { "epoch": 1.6402637967558875, "grad_norm": 0.13715972006320953, "learning_rate": 0.0010138965571503161, "loss": 2.2608, "step": 424310 }, { "epoch": 1.6403024539592708, "grad_norm": 0.11399926245212555, "learning_rate": 0.0010136937595249637, "loss": 2.2532, "step": 424320 }, { "epoch": 1.640341111162654, "grad_norm": 0.1082891896367073, "learning_rate": 0.0010134910035889183, "loss": 2.2544, "step": 424330 }, { "epoch": 1.6403797683660373, "grad_norm": 0.10113243758678436, "learning_rate": 0.0010132882893164792, "loss": 2.2608, "step": 424340 }, { "epoch": 1.6404184255694205, "grad_norm": 0.1076381579041481, "learning_rate": 0.0010130856166819737, "loss": 2.2438, "step": 424350 }, { "epoch": 1.6404570827728038, "grad_norm": 0.1019374430179596, "learning_rate": 0.0010128829856597547, "loss": 2.2544, "step": 424360 }, { "epoch": 1.640495739976187, "grad_norm": 0.10723643004894257, "learning_rate": 0.0010126803962242014, "loss": 2.2533, "step": 424370 }, { "epoch": 1.6405343971795703, "grad_norm": 0.10868024080991745, "learning_rate": 0.0010124778483497194, "loss": 2.2547, "step": 424380 }, { "epoch": 1.6405730543829538, "grad_norm": 0.10560424625873566, "learning_rate": 0.0010122753420107405, "loss": 2.2505, "step": 424390 }, { "epoch": 1.640611711586337, "grad_norm": 0.12155601382255554, "learning_rate": 0.0010120728771817224, "loss": 2.2348, "step": 424400 }, { "epoch": 1.6406503687897203, "grad_norm": 0.11004646122455597, "learning_rate": 0.00101187045383715, "loss": 2.2577, "step": 424410 }, { "epoch": 1.6406890259931035, "grad_norm": 0.11234424263238907, "learning_rate": 0.001011668071951533, "loss": 2.2611, "step": 424420 }, { "epoch": 1.640727683196487, "grad_norm": 0.10152650624513626, "learning_rate": 0.0010114657314994084, "loss": 2.2629, "step": 424430 }, { "epoch": 1.6407663403998702, "grad_norm": 0.11180943250656128, "learning_rate": 0.0010112634324553381, "loss": 2.2624, "step": 424440 }, { "epoch": 1.6408049976032535, "grad_norm": 0.10553466528654099, "learning_rate": 0.0010110611747939106, "loss": 2.2676, "step": 424450 }, { "epoch": 1.6408436548066367, "grad_norm": 0.10074066370725632, "learning_rate": 0.001010858958489741, "loss": 2.2606, "step": 424460 }, { "epoch": 1.64088231201002, "grad_norm": 0.11224688589572906, "learning_rate": 0.0010106567835174692, "loss": 2.2572, "step": 424470 }, { "epoch": 1.6409209692134032, "grad_norm": 0.10370156168937683, "learning_rate": 0.0010104546498517614, "loss": 2.2662, "step": 424480 }, { "epoch": 1.6409596264167865, "grad_norm": 0.12131761759519577, "learning_rate": 0.0010102525574673103, "loss": 2.2643, "step": 424490 }, { "epoch": 1.6409982836201698, "grad_norm": 0.09501279145479202, "learning_rate": 0.0010100505063388335, "loss": 2.2581, "step": 424500 }, { "epoch": 1.641036940823553, "grad_norm": 0.1053239107131958, "learning_rate": 0.0010098484964410747, "loss": 2.2399, "step": 424510 }, { "epoch": 1.6410755980269363, "grad_norm": 0.11659132689237595, "learning_rate": 0.0010096465277488042, "loss": 2.2612, "step": 424520 }, { "epoch": 1.6411142552303195, "grad_norm": 0.11104684323072433, "learning_rate": 0.0010094446002368168, "loss": 2.2475, "step": 424530 }, { "epoch": 1.6411529124337028, "grad_norm": 0.12283774465322495, "learning_rate": 0.001009242713879933, "loss": 2.2616, "step": 424540 }, { "epoch": 1.641191569637086, "grad_norm": 0.09906791895627975, "learning_rate": 0.0010090408686530003, "loss": 2.2639, "step": 424550 }, { "epoch": 1.6412302268404695, "grad_norm": 0.09812948852777481, "learning_rate": 0.0010088390645308905, "loss": 2.2635, "step": 424560 }, { "epoch": 1.6412688840438527, "grad_norm": 0.1091708242893219, "learning_rate": 0.0010086373014885014, "loss": 2.2476, "step": 424570 }, { "epoch": 1.641307541247236, "grad_norm": 0.10793473571538925, "learning_rate": 0.0010084355795007566, "loss": 2.2551, "step": 424580 }, { "epoch": 1.6413461984506192, "grad_norm": 0.11073902249336243, "learning_rate": 0.001008233898542605, "loss": 2.2469, "step": 424590 }, { "epoch": 1.6413848556540027, "grad_norm": 0.11795094609260559, "learning_rate": 0.0010080322585890205, "loss": 2.2548, "step": 424600 }, { "epoch": 1.641423512857386, "grad_norm": 0.11132269352674484, "learning_rate": 0.001007830659615003, "loss": 2.2419, "step": 424610 }, { "epoch": 1.6414621700607692, "grad_norm": 0.11662492156028748, "learning_rate": 0.0010076291015955778, "loss": 2.2697, "step": 424620 }, { "epoch": 1.6415008272641525, "grad_norm": 0.11011848598718643, "learning_rate": 0.0010074275845057955, "loss": 2.2672, "step": 424630 }, { "epoch": 1.6415394844675357, "grad_norm": 0.11036280542612076, "learning_rate": 0.0010072261083207315, "loss": 2.2444, "step": 424640 }, { "epoch": 1.641578141670919, "grad_norm": 0.10838011652231216, "learning_rate": 0.0010070246730154873, "loss": 2.2596, "step": 424650 }, { "epoch": 1.6416167988743022, "grad_norm": 0.11376731097698212, "learning_rate": 0.001006823278565189, "loss": 2.2606, "step": 424660 }, { "epoch": 1.6416554560776855, "grad_norm": 0.11246142536401749, "learning_rate": 0.0010066219249449885, "loss": 2.2576, "step": 424670 }, { "epoch": 1.6416941132810687, "grad_norm": 0.10369978845119476, "learning_rate": 0.0010064206121300626, "loss": 2.2474, "step": 424680 }, { "epoch": 1.641732770484452, "grad_norm": 0.10715661942958832, "learning_rate": 0.0010062193400956123, "loss": 2.245, "step": 424690 }, { "epoch": 1.6417714276878352, "grad_norm": 0.11022845655679703, "learning_rate": 0.001006018108816866, "loss": 2.271, "step": 424700 }, { "epoch": 1.6418100848912185, "grad_norm": 0.09892480075359344, "learning_rate": 0.0010058169182690746, "loss": 2.2606, "step": 424710 }, { "epoch": 1.6418487420946017, "grad_norm": 0.1053096204996109, "learning_rate": 0.0010056157684275157, "loss": 2.2538, "step": 424720 }, { "epoch": 1.6418873992979852, "grad_norm": 0.1074117124080658, "learning_rate": 0.0010054146592674916, "loss": 2.2492, "step": 424730 }, { "epoch": 1.6419260565013685, "grad_norm": 0.11926567554473877, "learning_rate": 0.001005213590764329, "loss": 2.2562, "step": 424740 }, { "epoch": 1.6419647137047517, "grad_norm": 0.11387331038713455, "learning_rate": 0.0010050125628933801, "loss": 2.2639, "step": 424750 }, { "epoch": 1.642003370908135, "grad_norm": 0.10490171611309052, "learning_rate": 0.0010048115756300217, "loss": 2.2594, "step": 424760 }, { "epoch": 1.6420420281115184, "grad_norm": 0.12902913987636566, "learning_rate": 0.0010046106289496558, "loss": 2.262, "step": 424770 }, { "epoch": 1.6420806853149017, "grad_norm": 0.10744645446538925, "learning_rate": 0.0010044097228277087, "loss": 2.258, "step": 424780 }, { "epoch": 1.642119342518285, "grad_norm": 0.11677946895360947, "learning_rate": 0.0010042088572396319, "loss": 2.2542, "step": 424790 }, { "epoch": 1.6421579997216682, "grad_norm": 0.11043225228786469, "learning_rate": 0.0010040080321609014, "loss": 2.2571, "step": 424800 }, { "epoch": 1.6421966569250515, "grad_norm": 0.10317932069301605, "learning_rate": 0.0010038072475670183, "loss": 2.266, "step": 424810 }, { "epoch": 1.6422353141284347, "grad_norm": 0.10283423215150833, "learning_rate": 0.001003606503433508, "loss": 2.2703, "step": 424820 }, { "epoch": 1.642273971331818, "grad_norm": 0.0987296849489212, "learning_rate": 0.0010034057997359205, "loss": 2.2538, "step": 424830 }, { "epoch": 1.6423126285352012, "grad_norm": 0.10434217751026154, "learning_rate": 0.0010032051364498309, "loss": 2.2522, "step": 424840 }, { "epoch": 1.6423512857385845, "grad_norm": 0.1010420098900795, "learning_rate": 0.0010030045135508386, "loss": 2.2399, "step": 424850 }, { "epoch": 1.6423899429419677, "grad_norm": 0.10522980988025665, "learning_rate": 0.0010028039310145673, "loss": 2.2664, "step": 424860 }, { "epoch": 1.642428600145351, "grad_norm": 0.10818637907505035, "learning_rate": 0.0010026033888166652, "loss": 2.255, "step": 424870 }, { "epoch": 1.6424672573487342, "grad_norm": 0.1131870374083519, "learning_rate": 0.001002402886932806, "loss": 2.2624, "step": 424880 }, { "epoch": 1.6425059145521177, "grad_norm": 0.11998511850833893, "learning_rate": 0.0010022024253386861, "loss": 2.2607, "step": 424890 }, { "epoch": 1.642544571755501, "grad_norm": 0.11467783898115158, "learning_rate": 0.0010020020040100279, "loss": 2.2609, "step": 424900 }, { "epoch": 1.6425832289588842, "grad_norm": 0.1055130586028099, "learning_rate": 0.0010018016229225775, "loss": 2.2366, "step": 424910 }, { "epoch": 1.6426218861622675, "grad_norm": 0.10552117228507996, "learning_rate": 0.0010016012820521052, "loss": 2.2577, "step": 424920 }, { "epoch": 1.6426605433656507, "grad_norm": 0.104463130235672, "learning_rate": 0.0010014009813744055, "loss": 2.2649, "step": 424930 }, { "epoch": 1.6426992005690342, "grad_norm": 0.11977139860391617, "learning_rate": 0.0010012007208652983, "loss": 2.2532, "step": 424940 }, { "epoch": 1.6427378577724174, "grad_norm": 0.0970083475112915, "learning_rate": 0.0010010005005006257, "loss": 2.2499, "step": 424950 }, { "epoch": 1.6427765149758007, "grad_norm": 0.11830824613571167, "learning_rate": 0.0010008003202562562, "loss": 2.2518, "step": 424960 }, { "epoch": 1.642815172179184, "grad_norm": 0.11555589735507965, "learning_rate": 0.0010006001801080812, "loss": 2.2423, "step": 424970 }, { "epoch": 1.6428538293825672, "grad_norm": 0.13867305219173431, "learning_rate": 0.0010004000800320162, "loss": 2.2492, "step": 424980 }, { "epoch": 1.6428924865859504, "grad_norm": 0.1125546395778656, "learning_rate": 0.0010002000200040012, "loss": 2.2543, "step": 424990 }, { "epoch": 1.6429311437893337, "grad_norm": 0.10740401595830917, "learning_rate": 0.001, "loss": 2.2475, "step": 425000 }, { "epoch": 1.642969800992717, "grad_norm": 0.10235648602247238, "learning_rate": 0.000999800019996001, "loss": 2.2456, "step": 425010 }, { "epoch": 1.6430084581961002, "grad_norm": 0.10700272023677826, "learning_rate": 0.000999600079968016, "loss": 2.255, "step": 425020 }, { "epoch": 1.6430471153994834, "grad_norm": 0.10306088626384735, "learning_rate": 0.000999400179892081, "loss": 2.2475, "step": 425030 }, { "epoch": 1.6430857726028667, "grad_norm": 0.10797042399644852, "learning_rate": 0.0009992003197442556, "loss": 2.2633, "step": 425040 }, { "epoch": 1.64312442980625, "grad_norm": 0.11185480654239655, "learning_rate": 0.0009990004995006241, "loss": 2.2482, "step": 425050 }, { "epoch": 1.6431630870096334, "grad_norm": 0.09936090558767319, "learning_rate": 0.0009988007191372938, "loss": 2.2524, "step": 425060 }, { "epoch": 1.6432017442130167, "grad_norm": 0.12256762385368347, "learning_rate": 0.0009986009786303964, "loss": 2.2479, "step": 425070 }, { "epoch": 1.6432404014164, "grad_norm": 0.13450071215629578, "learning_rate": 0.0009984012779560869, "loss": 2.2452, "step": 425080 }, { "epoch": 1.6432790586197832, "grad_norm": 0.0990997776389122, "learning_rate": 0.0009982016170905447, "loss": 2.2521, "step": 425090 }, { "epoch": 1.6433177158231664, "grad_norm": 0.11909011751413345, "learning_rate": 0.0009980019960099723, "loss": 2.2547, "step": 425100 }, { "epoch": 1.64335637302655, "grad_norm": 0.10571201890707016, "learning_rate": 0.0009978024146905962, "loss": 2.2402, "step": 425110 }, { "epoch": 1.6433950302299332, "grad_norm": 0.10910934209823608, "learning_rate": 0.0009976028731086665, "loss": 2.2554, "step": 425120 }, { "epoch": 1.6434336874333164, "grad_norm": 0.10239577293395996, "learning_rate": 0.0009974033712404572, "loss": 2.2534, "step": 425130 }, { "epoch": 1.6434723446366997, "grad_norm": 0.10150449723005295, "learning_rate": 0.000997203909062266, "loss": 2.2713, "step": 425140 }, { "epoch": 1.643511001840083, "grad_norm": 0.10944680124521255, "learning_rate": 0.0009970044865504134, "loss": 2.2461, "step": 425150 }, { "epoch": 1.6435496590434662, "grad_norm": 0.09132153540849686, "learning_rate": 0.0009968051036812438, "loss": 2.2605, "step": 425160 }, { "epoch": 1.6435883162468494, "grad_norm": 0.10329840332269669, "learning_rate": 0.0009966057604311256, "loss": 2.2648, "step": 425170 }, { "epoch": 1.6436269734502327, "grad_norm": 0.10862980782985687, "learning_rate": 0.0009964064567764496, "loss": 2.2633, "step": 425180 }, { "epoch": 1.643665630653616, "grad_norm": 0.11289060860872269, "learning_rate": 0.0009962071926936315, "loss": 2.252, "step": 425190 }, { "epoch": 1.6437042878569992, "grad_norm": 0.09915979951620102, "learning_rate": 0.0009960079681591094, "loss": 2.2745, "step": 425200 }, { "epoch": 1.6437429450603824, "grad_norm": 0.10634247958660126, "learning_rate": 0.0009958087831493448, "loss": 2.2621, "step": 425210 }, { "epoch": 1.6437816022637657, "grad_norm": 0.1050662100315094, "learning_rate": 0.0009956096376408225, "loss": 2.2557, "step": 425220 }, { "epoch": 1.6438202594671492, "grad_norm": 0.11785628646612167, "learning_rate": 0.0009954105316100513, "loss": 2.2499, "step": 425230 }, { "epoch": 1.6438589166705324, "grad_norm": 0.12110677361488342, "learning_rate": 0.0009952114650335624, "loss": 2.2651, "step": 425240 }, { "epoch": 1.6438975738739157, "grad_norm": 0.10299248993396759, "learning_rate": 0.000995012437887911, "loss": 2.2651, "step": 425250 }, { "epoch": 1.643936231077299, "grad_norm": 0.10756676644086838, "learning_rate": 0.0009948134501496751, "loss": 2.2693, "step": 425260 }, { "epoch": 1.6439748882806822, "grad_norm": 0.10314938426017761, "learning_rate": 0.0009946145017954554, "loss": 2.2577, "step": 425270 }, { "epoch": 1.6440135454840656, "grad_norm": 0.11622797697782516, "learning_rate": 0.0009944155928018773, "loss": 2.2472, "step": 425280 }, { "epoch": 1.644052202687449, "grad_norm": 0.11092694103717804, "learning_rate": 0.0009942167231455875, "loss": 2.2558, "step": 425290 }, { "epoch": 1.6440908598908321, "grad_norm": 0.10614550113677979, "learning_rate": 0.0009940178928032567, "loss": 2.2537, "step": 425300 }, { "epoch": 1.6441295170942154, "grad_norm": 0.11072509735822678, "learning_rate": 0.0009938191017515787, "loss": 2.2539, "step": 425310 }, { "epoch": 1.6441681742975987, "grad_norm": 0.11297177523374557, "learning_rate": 0.00099362034996727, "loss": 2.2611, "step": 425320 }, { "epoch": 1.644206831500982, "grad_norm": 0.11130084097385406, "learning_rate": 0.0009934216374270703, "loss": 2.2615, "step": 425330 }, { "epoch": 1.6442454887043652, "grad_norm": 0.11282171308994293, "learning_rate": 0.0009932229641077424, "loss": 2.2504, "step": 425340 }, { "epoch": 1.6442841459077484, "grad_norm": 0.10654444247484207, "learning_rate": 0.0009930243299860718, "loss": 2.2593, "step": 425350 }, { "epoch": 1.6443228031111317, "grad_norm": 0.21849265694618225, "learning_rate": 0.0009928257350388663, "loss": 2.2636, "step": 425360 }, { "epoch": 1.644361460314515, "grad_norm": 0.10909666121006012, "learning_rate": 0.0009926271792429578, "loss": 2.264, "step": 425370 }, { "epoch": 1.6444001175178982, "grad_norm": 0.10071917623281479, "learning_rate": 0.0009924286625752, "loss": 2.2555, "step": 425380 }, { "epoch": 1.6444387747212814, "grad_norm": 0.12604159116744995, "learning_rate": 0.0009922301850124702, "loss": 2.2665, "step": 425390 }, { "epoch": 1.644477431924665, "grad_norm": 0.1069197803735733, "learning_rate": 0.0009920317465316676, "loss": 2.2636, "step": 425400 }, { "epoch": 1.6445160891280481, "grad_norm": 0.12726236879825592, "learning_rate": 0.0009918333471097152, "loss": 2.2594, "step": 425410 }, { "epoch": 1.6445547463314314, "grad_norm": 0.10156135261058807, "learning_rate": 0.0009916349867235576, "loss": 2.262, "step": 425420 }, { "epoch": 1.6445934035348146, "grad_norm": 0.12336266785860062, "learning_rate": 0.0009914366653501626, "loss": 2.26, "step": 425430 }, { "epoch": 1.6446320607381981, "grad_norm": 0.0935782939195633, "learning_rate": 0.0009912383829665207, "loss": 2.258, "step": 425440 }, { "epoch": 1.6446707179415814, "grad_norm": 0.1038680374622345, "learning_rate": 0.000991040139549645, "loss": 2.2522, "step": 425450 }, { "epoch": 1.6447093751449646, "grad_norm": 0.09510288387537003, "learning_rate": 0.000990841935076571, "loss": 2.2711, "step": 425460 }, { "epoch": 1.6447480323483479, "grad_norm": 0.10547652095556259, "learning_rate": 0.0009906437695243566, "loss": 2.2335, "step": 425470 }, { "epoch": 1.6447866895517311, "grad_norm": 0.10449301451444626, "learning_rate": 0.000990445642870083, "loss": 2.2724, "step": 425480 }, { "epoch": 1.6448253467551144, "grad_norm": 0.12347263842821121, "learning_rate": 0.000990247555090853, "loss": 2.2578, "step": 425490 }, { "epoch": 1.6448640039584976, "grad_norm": 0.10921213030815125, "learning_rate": 0.000990049506163792, "loss": 2.2582, "step": 425500 }, { "epoch": 1.6449026611618809, "grad_norm": 0.09873101860284805, "learning_rate": 0.0009898514960660488, "loss": 2.2663, "step": 425510 }, { "epoch": 1.6449413183652641, "grad_norm": 0.11116547882556915, "learning_rate": 0.000989653524774793, "loss": 2.2505, "step": 425520 }, { "epoch": 1.6449799755686474, "grad_norm": 0.10780694335699081, "learning_rate": 0.0009894555922672175, "loss": 2.2628, "step": 425530 }, { "epoch": 1.6450186327720306, "grad_norm": 0.11441248655319214, "learning_rate": 0.0009892576985205377, "loss": 2.2729, "step": 425540 }, { "epoch": 1.645057289975414, "grad_norm": 0.11041711270809174, "learning_rate": 0.000989059843511991, "loss": 2.256, "step": 425550 }, { "epoch": 1.6450959471787971, "grad_norm": 0.11487311869859695, "learning_rate": 0.0009888620272188369, "loss": 2.2597, "step": 425560 }, { "epoch": 1.6451346043821806, "grad_norm": 0.10112126171588898, "learning_rate": 0.0009886642496183574, "loss": 2.2567, "step": 425570 }, { "epoch": 1.6451732615855639, "grad_norm": 0.10316457599401474, "learning_rate": 0.0009884665106878565, "loss": 2.2661, "step": 425580 }, { "epoch": 1.6452119187889471, "grad_norm": 0.10630446672439575, "learning_rate": 0.000988268810404661, "loss": 2.2652, "step": 425590 }, { "epoch": 1.6452505759923304, "grad_norm": 0.1076459065079689, "learning_rate": 0.0009880711487461186, "loss": 2.2796, "step": 425600 }, { "epoch": 1.6452892331957139, "grad_norm": 0.1187729686498642, "learning_rate": 0.0009878735256896004, "loss": 2.252, "step": 425610 }, { "epoch": 1.645327890399097, "grad_norm": 0.10718236863613129, "learning_rate": 0.000987675941212499, "loss": 2.2586, "step": 425620 }, { "epoch": 1.6453665476024804, "grad_norm": 0.10935017466545105, "learning_rate": 0.0009874783952922288, "loss": 2.2572, "step": 425630 }, { "epoch": 1.6454052048058636, "grad_norm": 0.10489913076162338, "learning_rate": 0.000987280887906227, "loss": 2.2476, "step": 425640 }, { "epoch": 1.6454438620092469, "grad_norm": 0.1212213784456253, "learning_rate": 0.000987083419031952, "loss": 2.2457, "step": 425650 }, { "epoch": 1.6454825192126301, "grad_norm": 0.1128239780664444, "learning_rate": 0.000986885988646885, "loss": 2.2689, "step": 425660 }, { "epoch": 1.6455211764160134, "grad_norm": 0.11003052443265915, "learning_rate": 0.000986688596728528, "loss": 2.2626, "step": 425670 }, { "epoch": 1.6455598336193966, "grad_norm": 0.11292414367198944, "learning_rate": 0.000986491243254406, "loss": 2.2537, "step": 425680 }, { "epoch": 1.6455984908227799, "grad_norm": 0.12339409440755844, "learning_rate": 0.0009862939282020652, "loss": 2.263, "step": 425690 }, { "epoch": 1.6456371480261631, "grad_norm": 0.12118395417928696, "learning_rate": 0.000986096651549074, "loss": 2.2554, "step": 425700 }, { "epoch": 1.6456758052295464, "grad_norm": 0.1183353140950203, "learning_rate": 0.0009858994132730225, "loss": 2.2703, "step": 425710 }, { "epoch": 1.6457144624329296, "grad_norm": 0.10364186763763428, "learning_rate": 0.0009857022133515228, "loss": 2.2594, "step": 425720 }, { "epoch": 1.6457531196363129, "grad_norm": 0.11221203953027725, "learning_rate": 0.0009855050517622083, "loss": 2.247, "step": 425730 }, { "epoch": 1.6457917768396964, "grad_norm": 0.10890361666679382, "learning_rate": 0.0009853079284827342, "loss": 2.2598, "step": 425740 }, { "epoch": 1.6458304340430796, "grad_norm": 0.10923411697149277, "learning_rate": 0.000985110843490778, "loss": 2.2519, "step": 425750 }, { "epoch": 1.6458690912464629, "grad_norm": 0.10897491872310638, "learning_rate": 0.0009849137967640384, "loss": 2.2534, "step": 425760 }, { "epoch": 1.645907748449846, "grad_norm": 0.10118908435106277, "learning_rate": 0.0009847167882802356, "loss": 2.2614, "step": 425770 }, { "epoch": 1.6459464056532296, "grad_norm": 0.09627126157283783, "learning_rate": 0.0009845198180171118, "loss": 2.2502, "step": 425780 }, { "epoch": 1.6459850628566128, "grad_norm": 0.11942839622497559, "learning_rate": 0.0009843228859524303, "loss": 2.258, "step": 425790 }, { "epoch": 1.646023720059996, "grad_norm": 0.10836474597454071, "learning_rate": 0.0009841259920639765, "loss": 2.2394, "step": 425800 }, { "epoch": 1.6460623772633793, "grad_norm": 0.13480186462402344, "learning_rate": 0.0009839291363295572, "loss": 2.2497, "step": 425810 }, { "epoch": 1.6461010344667626, "grad_norm": 0.11613373458385468, "learning_rate": 0.000983732318727, "loss": 2.2545, "step": 425820 }, { "epoch": 1.6461396916701458, "grad_norm": 0.111439548432827, "learning_rate": 0.0009835355392341552, "loss": 2.2602, "step": 425830 }, { "epoch": 1.646178348873529, "grad_norm": 0.11013883352279663, "learning_rate": 0.0009833387978288933, "loss": 2.2707, "step": 425840 }, { "epoch": 1.6462170060769123, "grad_norm": 0.11753743886947632, "learning_rate": 0.0009831420944891073, "loss": 2.2612, "step": 425850 }, { "epoch": 1.6462556632802956, "grad_norm": 0.10817738622426987, "learning_rate": 0.0009829454291927105, "loss": 2.2462, "step": 425860 }, { "epoch": 1.6462943204836789, "grad_norm": 0.10038787871599197, "learning_rate": 0.0009827488019176385, "loss": 2.2579, "step": 425870 }, { "epoch": 1.646332977687062, "grad_norm": 0.11860653758049011, "learning_rate": 0.0009825522126418477, "loss": 2.2591, "step": 425880 }, { "epoch": 1.6463716348904454, "grad_norm": 0.10090608894824982, "learning_rate": 0.0009823556613433157, "loss": 2.2707, "step": 425890 }, { "epoch": 1.6464102920938286, "grad_norm": 0.1041901633143425, "learning_rate": 0.0009821591480000422, "loss": 2.247, "step": 425900 }, { "epoch": 1.646448949297212, "grad_norm": 0.11358907073736191, "learning_rate": 0.000981962672590047, "loss": 2.2617, "step": 425910 }, { "epoch": 1.6464876065005953, "grad_norm": 0.10183798521757126, "learning_rate": 0.0009817662350913717, "loss": 2.2519, "step": 425920 }, { "epoch": 1.6465262637039786, "grad_norm": 0.11067637801170349, "learning_rate": 0.0009815698354820788, "loss": 2.2595, "step": 425930 }, { "epoch": 1.6465649209073618, "grad_norm": 0.11494000256061554, "learning_rate": 0.0009813734737402523, "loss": 2.2702, "step": 425940 }, { "epoch": 1.6466035781107453, "grad_norm": 0.10512025654315948, "learning_rate": 0.0009811771498439976, "loss": 2.2519, "step": 425950 }, { "epoch": 1.6466422353141286, "grad_norm": 0.1056099459528923, "learning_rate": 0.0009809808637714404, "loss": 2.2437, "step": 425960 }, { "epoch": 1.6466808925175118, "grad_norm": 0.6840528249740601, "learning_rate": 0.0009807846155007274, "loss": 2.2509, "step": 425970 }, { "epoch": 1.646719549720895, "grad_norm": 0.1259155571460724, "learning_rate": 0.0009805884050100274, "loss": 2.2623, "step": 425980 }, { "epoch": 1.6467582069242783, "grad_norm": 0.09720668196678162, "learning_rate": 0.000980392232277529, "loss": 2.2419, "step": 425990 }, { "epoch": 1.6467968641276616, "grad_norm": 0.1126490980386734, "learning_rate": 0.000980196097281443, "loss": 2.2718, "step": 426000 }, { "epoch": 1.6468355213310448, "grad_norm": 0.11232949048280716, "learning_rate": 0.00098, "loss": 2.2525, "step": 426010 }, { "epoch": 1.646874178534428, "grad_norm": 0.10978632420301437, "learning_rate": 0.000979803940411452, "loss": 2.2546, "step": 426020 }, { "epoch": 1.6469128357378113, "grad_norm": 0.12147060036659241, "learning_rate": 0.0009796079184940722, "loss": 2.2502, "step": 426030 }, { "epoch": 1.6469514929411946, "grad_norm": 0.1027112528681755, "learning_rate": 0.000979411934226154, "loss": 2.2509, "step": 426040 }, { "epoch": 1.6469901501445778, "grad_norm": 0.1303754299879074, "learning_rate": 0.0009792159875860124, "loss": 2.2437, "step": 426050 }, { "epoch": 1.647028807347961, "grad_norm": 0.1287584751844406, "learning_rate": 0.0009790200785519826, "loss": 2.255, "step": 426060 }, { "epoch": 1.6470674645513443, "grad_norm": 0.11077667027711868, "learning_rate": 0.0009788242071024206, "loss": 2.2641, "step": 426070 }, { "epoch": 1.6471061217547278, "grad_norm": 0.11055146902799606, "learning_rate": 0.0009786283732157038, "loss": 2.2852, "step": 426080 }, { "epoch": 1.647144778958111, "grad_norm": 0.11356287449598312, "learning_rate": 0.0009784325768702293, "loss": 2.2459, "step": 426090 }, { "epoch": 1.6471834361614943, "grad_norm": 0.11561722308397293, "learning_rate": 0.0009782368180444158, "loss": 2.2573, "step": 426100 }, { "epoch": 1.6472220933648776, "grad_norm": 0.11019952595233917, "learning_rate": 0.0009780410967167026, "loss": 2.26, "step": 426110 }, { "epoch": 1.647260750568261, "grad_norm": 0.11386414617300034, "learning_rate": 0.000977845412865549, "loss": 2.2544, "step": 426120 }, { "epoch": 1.6472994077716443, "grad_norm": 0.11020597070455551, "learning_rate": 0.0009776497664694356, "loss": 2.2622, "step": 426130 }, { "epoch": 1.6473380649750275, "grad_norm": 0.10992365330457687, "learning_rate": 0.0009774541575068628, "loss": 2.2434, "step": 426140 }, { "epoch": 1.6473767221784108, "grad_norm": 0.1001507043838501, "learning_rate": 0.0009772585859563524, "loss": 2.2524, "step": 426150 }, { "epoch": 1.647415379381794, "grad_norm": 0.11525522172451019, "learning_rate": 0.0009770630517964463, "loss": 2.2604, "step": 426160 }, { "epoch": 1.6474540365851773, "grad_norm": 0.12931174039840698, "learning_rate": 0.0009768675550057072, "loss": 2.2657, "step": 426170 }, { "epoch": 1.6474926937885606, "grad_norm": 0.1104811578989029, "learning_rate": 0.0009766720955627175, "loss": 2.2442, "step": 426180 }, { "epoch": 1.6475313509919438, "grad_norm": 0.11013077944517136, "learning_rate": 0.000976476673446081, "loss": 2.2659, "step": 426190 }, { "epoch": 1.647570008195327, "grad_norm": 0.10370016098022461, "learning_rate": 0.0009762812886344219, "loss": 2.2669, "step": 426200 }, { "epoch": 1.6476086653987103, "grad_norm": 0.1222548633813858, "learning_rate": 0.0009760859411063836, "loss": 2.2639, "step": 426210 }, { "epoch": 1.6476473226020936, "grad_norm": 0.10938958823680878, "learning_rate": 0.0009758906308406315, "loss": 2.2544, "step": 426220 }, { "epoch": 1.6476859798054768, "grad_norm": 0.11079689115285873, "learning_rate": 0.0009756953578158498, "loss": 2.2695, "step": 426230 }, { "epoch": 1.64772463700886, "grad_norm": 0.1136854887008667, "learning_rate": 0.0009755001220107442, "loss": 2.275, "step": 426240 }, { "epoch": 1.6477632942122435, "grad_norm": 0.10319668054580688, "learning_rate": 0.00097530492340404, "loss": 2.2559, "step": 426250 }, { "epoch": 1.6478019514156268, "grad_norm": 0.10906700044870377, "learning_rate": 0.0009751097619744836, "loss": 2.2499, "step": 426260 }, { "epoch": 1.64784060861901, "grad_norm": 0.10607217997312546, "learning_rate": 0.00097491463770084, "loss": 2.2514, "step": 426270 }, { "epoch": 1.6478792658223933, "grad_norm": 0.100669264793396, "learning_rate": 0.0009747195505618964, "loss": 2.2576, "step": 426280 }, { "epoch": 1.6479179230257768, "grad_norm": 0.10994735360145569, "learning_rate": 0.0009745245005364585, "loss": 2.2471, "step": 426290 }, { "epoch": 1.64795658022916, "grad_norm": 0.10938809812068939, "learning_rate": 0.0009743294876033532, "loss": 2.2393, "step": 426300 }, { "epoch": 1.6479952374325433, "grad_norm": 0.12271041423082352, "learning_rate": 0.0009741345117414273, "loss": 2.2595, "step": 426310 }, { "epoch": 1.6480338946359265, "grad_norm": 0.1128772720694542, "learning_rate": 0.0009739395729295473, "loss": 2.2539, "step": 426320 }, { "epoch": 1.6480725518393098, "grad_norm": 0.10963631421327591, "learning_rate": 0.0009737446711466002, "loss": 2.2432, "step": 426330 }, { "epoch": 1.648111209042693, "grad_norm": 0.10848647356033325, "learning_rate": 0.0009735498063714927, "loss": 2.2454, "step": 426340 }, { "epoch": 1.6481498662460763, "grad_norm": 0.10321880131959915, "learning_rate": 0.0009733549785831522, "loss": 2.2597, "step": 426350 }, { "epoch": 1.6481885234494595, "grad_norm": 0.10751194506883621, "learning_rate": 0.0009731601877605252, "loss": 2.2644, "step": 426360 }, { "epoch": 1.6482271806528428, "grad_norm": 0.10674343258142471, "learning_rate": 0.0009729654338825786, "loss": 2.2647, "step": 426370 }, { "epoch": 1.648265837856226, "grad_norm": 0.10442473739385605, "learning_rate": 0.0009727707169282996, "loss": 2.274, "step": 426380 }, { "epoch": 1.6483044950596093, "grad_norm": 0.10820572823286057, "learning_rate": 0.0009725760368766943, "loss": 2.2479, "step": 426390 }, { "epoch": 1.6483431522629926, "grad_norm": 0.11544471234083176, "learning_rate": 0.0009723813937067895, "loss": 2.2568, "step": 426400 }, { "epoch": 1.6483818094663758, "grad_norm": 0.1048877164721489, "learning_rate": 0.0009721867873976322, "loss": 2.2424, "step": 426410 }, { "epoch": 1.6484204666697593, "grad_norm": 0.10477080196142197, "learning_rate": 0.0009719922179282883, "loss": 2.2582, "step": 426420 }, { "epoch": 1.6484591238731425, "grad_norm": 0.12967807054519653, "learning_rate": 0.0009717976852778438, "loss": 2.2631, "step": 426430 }, { "epoch": 1.6484977810765258, "grad_norm": 0.10460349172353745, "learning_rate": 0.0009716031894254048, "loss": 2.2536, "step": 426440 }, { "epoch": 1.648536438279909, "grad_norm": 0.10953984409570694, "learning_rate": 0.0009714087303500967, "loss": 2.2624, "step": 426450 }, { "epoch": 1.6485750954832925, "grad_norm": 0.10295036435127258, "learning_rate": 0.0009712143080310651, "loss": 2.2548, "step": 426460 }, { "epoch": 1.6486137526866758, "grad_norm": 0.10450485348701477, "learning_rate": 0.0009710199224474751, "loss": 2.2464, "step": 426470 }, { "epoch": 1.648652409890059, "grad_norm": 0.11599656194448471, "learning_rate": 0.0009708255735785114, "loss": 2.258, "step": 426480 }, { "epoch": 1.6486910670934423, "grad_norm": 0.10033763945102692, "learning_rate": 0.0009706312614033783, "loss": 2.2594, "step": 426490 }, { "epoch": 1.6487297242968255, "grad_norm": 0.10577484220266342, "learning_rate": 0.0009704369859013, "loss": 2.2442, "step": 426500 }, { "epoch": 1.6487683815002088, "grad_norm": 0.11341305822134018, "learning_rate": 0.0009702427470515198, "loss": 2.2558, "step": 426510 }, { "epoch": 1.648807038703592, "grad_norm": 0.1167931854724884, "learning_rate": 0.0009700485448333014, "loss": 2.2407, "step": 426520 }, { "epoch": 1.6488456959069753, "grad_norm": 0.1225115954875946, "learning_rate": 0.0009698543792259272, "loss": 2.2495, "step": 426530 }, { "epoch": 1.6488843531103585, "grad_norm": 0.10878846794366837, "learning_rate": 0.0009696602502086993, "loss": 2.2589, "step": 426540 }, { "epoch": 1.6489230103137418, "grad_norm": 0.11994361132383347, "learning_rate": 0.0009694661577609398, "loss": 2.247, "step": 426550 }, { "epoch": 1.648961667517125, "grad_norm": 0.11334957182407379, "learning_rate": 0.0009692721018619898, "loss": 2.243, "step": 426560 }, { "epoch": 1.6490003247205083, "grad_norm": 0.13322052359580994, "learning_rate": 0.0009690780824912102, "loss": 2.266, "step": 426570 }, { "epoch": 1.6490389819238915, "grad_norm": 0.10879038274288177, "learning_rate": 0.0009688840996279808, "loss": 2.242, "step": 426580 }, { "epoch": 1.649077639127275, "grad_norm": 0.11077425628900528, "learning_rate": 0.000968690153251701, "loss": 2.2625, "step": 426590 }, { "epoch": 1.6491162963306583, "grad_norm": 0.10870052129030228, "learning_rate": 0.0009684962433417899, "loss": 2.2531, "step": 426600 }, { "epoch": 1.6491549535340415, "grad_norm": 0.11626043170690536, "learning_rate": 0.0009683023698776856, "loss": 2.2521, "step": 426610 }, { "epoch": 1.6491936107374248, "grad_norm": 0.12165232002735138, "learning_rate": 0.0009681085328388455, "loss": 2.2427, "step": 426620 }, { "epoch": 1.6492322679408082, "grad_norm": 0.11001374572515488, "learning_rate": 0.0009679147322047465, "loss": 2.2666, "step": 426630 }, { "epoch": 1.6492709251441915, "grad_norm": 0.10634226351976395, "learning_rate": 0.0009677209679548848, "loss": 2.2462, "step": 426640 }, { "epoch": 1.6493095823475747, "grad_norm": 0.11076271533966064, "learning_rate": 0.0009675272400687755, "loss": 2.263, "step": 426650 }, { "epoch": 1.649348239550958, "grad_norm": 0.11283444613218307, "learning_rate": 0.0009673335485259531, "loss": 2.2408, "step": 426660 }, { "epoch": 1.6493868967543412, "grad_norm": 0.1021152213215828, "learning_rate": 0.0009671398933059714, "loss": 2.2574, "step": 426670 }, { "epoch": 1.6494255539577245, "grad_norm": 0.10668480396270752, "learning_rate": 0.0009669462743884035, "loss": 2.2569, "step": 426680 }, { "epoch": 1.6494642111611078, "grad_norm": 0.12497959285974503, "learning_rate": 0.0009667526917528409, "loss": 2.2608, "step": 426690 }, { "epoch": 1.649502868364491, "grad_norm": 0.11128103733062744, "learning_rate": 0.0009665591453788949, "loss": 2.2698, "step": 426700 }, { "epoch": 1.6495415255678743, "grad_norm": 0.11678318679332733, "learning_rate": 0.000966365635246196, "loss": 2.2519, "step": 426710 }, { "epoch": 1.6495801827712575, "grad_norm": 0.10596548020839691, "learning_rate": 0.0009661721613343933, "loss": 2.2611, "step": 426720 }, { "epoch": 1.6496188399746408, "grad_norm": 0.10452347993850708, "learning_rate": 0.0009659787236231549, "loss": 2.2548, "step": 426730 }, { "epoch": 1.649657497178024, "grad_norm": 0.10891494154930115, "learning_rate": 0.0009657853220921684, "loss": 2.2609, "step": 426740 }, { "epoch": 1.6496961543814075, "grad_norm": 0.11339747905731201, "learning_rate": 0.0009655919567211399, "loss": 2.2433, "step": 426750 }, { "epoch": 1.6497348115847907, "grad_norm": 0.10721655189990997, "learning_rate": 0.0009653986274897951, "loss": 2.2454, "step": 426760 }, { "epoch": 1.649773468788174, "grad_norm": 0.10534560680389404, "learning_rate": 0.0009652053343778777, "loss": 2.2601, "step": 426770 }, { "epoch": 1.6498121259915572, "grad_norm": 0.10680735856294632, "learning_rate": 0.0009650120773651512, "loss": 2.2511, "step": 426780 }, { "epoch": 1.6498507831949405, "grad_norm": 0.11978033930063248, "learning_rate": 0.0009648188564313974, "loss": 2.2635, "step": 426790 }, { "epoch": 1.649889440398324, "grad_norm": 0.12228116393089294, "learning_rate": 0.0009646256715564172, "loss": 2.2419, "step": 426800 }, { "epoch": 1.6499280976017072, "grad_norm": 0.10044317692518234, "learning_rate": 0.0009644325227200305, "loss": 2.2565, "step": 426810 }, { "epoch": 1.6499667548050905, "grad_norm": 0.10718821734189987, "learning_rate": 0.0009642394099020759, "loss": 2.2409, "step": 426820 }, { "epoch": 1.6500054120084737, "grad_norm": 0.11532682180404663, "learning_rate": 0.0009640463330824105, "loss": 2.2531, "step": 426830 }, { "epoch": 1.650044069211857, "grad_norm": 0.11697693169116974, "learning_rate": 0.0009638532922409105, "loss": 2.2531, "step": 426840 }, { "epoch": 1.6500827264152402, "grad_norm": 0.10346714407205582, "learning_rate": 0.0009636602873574707, "loss": 2.2426, "step": 426850 }, { "epoch": 1.6501213836186235, "grad_norm": 0.09724732488393784, "learning_rate": 0.0009634673184120048, "loss": 2.2553, "step": 426860 }, { "epoch": 1.6501600408220067, "grad_norm": 0.10752703994512558, "learning_rate": 0.0009632743853844452, "loss": 2.2566, "step": 426870 }, { "epoch": 1.65019869802539, "grad_norm": 0.10641635209321976, "learning_rate": 0.0009630814882547425, "loss": 2.2422, "step": 426880 }, { "epoch": 1.6502373552287732, "grad_norm": 0.11156083643436432, "learning_rate": 0.0009628886270028663, "loss": 2.2586, "step": 426890 }, { "epoch": 1.6502760124321565, "grad_norm": 0.12447494268417358, "learning_rate": 0.0009626958016088048, "loss": 2.2554, "step": 426900 }, { "epoch": 1.6503146696355397, "grad_norm": 0.10488735139369965, "learning_rate": 0.0009625030120525651, "loss": 2.2754, "step": 426910 }, { "epoch": 1.6503533268389232, "grad_norm": 0.10971377044916153, "learning_rate": 0.0009623102583141723, "loss": 2.2477, "step": 426920 }, { "epoch": 1.6503919840423065, "grad_norm": 0.1068492978811264, "learning_rate": 0.0009621175403736704, "loss": 2.2591, "step": 426930 }, { "epoch": 1.6504306412456897, "grad_norm": 0.1091977134346962, "learning_rate": 0.0009619248582111217, "loss": 2.2522, "step": 426940 }, { "epoch": 1.650469298449073, "grad_norm": 0.10923954099416733, "learning_rate": 0.0009617322118066072, "loss": 2.2399, "step": 426950 }, { "epoch": 1.6505079556524562, "grad_norm": 0.10942035168409348, "learning_rate": 0.0009615396011402264, "loss": 2.2533, "step": 426960 }, { "epoch": 1.6505466128558397, "grad_norm": 0.10097722709178925, "learning_rate": 0.0009613470261920971, "loss": 2.253, "step": 426970 }, { "epoch": 1.650585270059223, "grad_norm": 0.10516929626464844, "learning_rate": 0.0009611544869423559, "loss": 2.2447, "step": 426980 }, { "epoch": 1.6506239272626062, "grad_norm": 0.11061711609363556, "learning_rate": 0.0009609619833711569, "loss": 2.2492, "step": 426990 }, { "epoch": 1.6506625844659895, "grad_norm": 0.11997164040803909, "learning_rate": 0.0009607695154586736, "loss": 2.2495, "step": 427000 }, { "epoch": 1.6507012416693727, "grad_norm": 0.11050672829151154, "learning_rate": 0.0009605770831850973, "loss": 2.2576, "step": 427010 }, { "epoch": 1.650739898872756, "grad_norm": 0.10786289721727371, "learning_rate": 0.000960384686530638, "loss": 2.2417, "step": 427020 }, { "epoch": 1.6507785560761392, "grad_norm": 0.11886247992515564, "learning_rate": 0.0009601923254755234, "loss": 2.2575, "step": 427030 }, { "epoch": 1.6508172132795225, "grad_norm": 1.0953397750854492, "learning_rate": 0.00096, "loss": 2.2613, "step": 427040 }, { "epoch": 1.6508558704829057, "grad_norm": 0.12538954615592957, "learning_rate": 0.0009598077100843325, "loss": 2.2515, "step": 427050 }, { "epoch": 1.650894527686289, "grad_norm": 0.10170107334852219, "learning_rate": 0.0009596154557088037, "loss": 2.254, "step": 427060 }, { "epoch": 1.6509331848896722, "grad_norm": 0.10495311766862869, "learning_rate": 0.0009594232368537147, "loss": 2.258, "step": 427070 }, { "epoch": 1.6509718420930555, "grad_norm": 0.10858013480901718, "learning_rate": 0.0009592310534993851, "loss": 2.2545, "step": 427080 }, { "epoch": 1.651010499296439, "grad_norm": 0.09755268692970276, "learning_rate": 0.0009590389056261517, "loss": 2.2653, "step": 427090 }, { "epoch": 1.6510491564998222, "grad_norm": 0.1101205050945282, "learning_rate": 0.0009588467932143703, "loss": 2.2551, "step": 427100 }, { "epoch": 1.6510878137032055, "grad_norm": 0.11821103096008301, "learning_rate": 0.000958654716244415, "loss": 2.2626, "step": 427110 }, { "epoch": 1.6511264709065887, "grad_norm": 0.10163707286119461, "learning_rate": 0.0009584626746966771, "loss": 2.2663, "step": 427120 }, { "epoch": 1.651165128109972, "grad_norm": 0.10656595975160599, "learning_rate": 0.0009582706685515668, "loss": 2.2503, "step": 427130 }, { "epoch": 1.6512037853133554, "grad_norm": 0.10545650869607925, "learning_rate": 0.0009580786977895115, "loss": 2.2588, "step": 427140 }, { "epoch": 1.6512424425167387, "grad_norm": 0.11352559924125671, "learning_rate": 0.0009578867623909577, "loss": 2.2588, "step": 427150 }, { "epoch": 1.651281099720122, "grad_norm": 0.10905435681343079, "learning_rate": 0.0009576948623363692, "loss": 2.2396, "step": 427160 }, { "epoch": 1.6513197569235052, "grad_norm": 0.11660381406545639, "learning_rate": 0.0009575029976062281, "loss": 2.261, "step": 427170 }, { "epoch": 1.6513584141268884, "grad_norm": 0.10470879822969437, "learning_rate": 0.0009573111681810341, "loss": 2.2581, "step": 427180 }, { "epoch": 1.6513970713302717, "grad_norm": 0.10995166003704071, "learning_rate": 0.0009571193740413049, "loss": 2.275, "step": 427190 }, { "epoch": 1.651435728533655, "grad_norm": 0.10868128389120102, "learning_rate": 0.000956927615167576, "loss": 2.2598, "step": 427200 }, { "epoch": 1.6514743857370382, "grad_norm": 0.12169184535741806, "learning_rate": 0.0009567358915404018, "loss": 2.2511, "step": 427210 }, { "epoch": 1.6515130429404214, "grad_norm": 0.10812258720397949, "learning_rate": 0.0009565442031403535, "loss": 2.251, "step": 427220 }, { "epoch": 1.6515517001438047, "grad_norm": 0.13253743946552277, "learning_rate": 0.0009563525499480201, "loss": 2.2602, "step": 427230 }, { "epoch": 1.651590357347188, "grad_norm": 0.10694847255945206, "learning_rate": 0.000956160931944009, "loss": 2.2517, "step": 427240 }, { "epoch": 1.6516290145505712, "grad_norm": 0.1060081422328949, "learning_rate": 0.000955969349108945, "loss": 2.2521, "step": 427250 }, { "epoch": 1.6516676717539547, "grad_norm": 0.12398270517587662, "learning_rate": 0.0009557778014234711, "loss": 2.2524, "step": 427260 }, { "epoch": 1.651706328957338, "grad_norm": 0.11210572719573975, "learning_rate": 0.0009555862888682474, "loss": 2.2627, "step": 427270 }, { "epoch": 1.6517449861607212, "grad_norm": 0.10051599144935608, "learning_rate": 0.0009553948114239524, "loss": 2.2607, "step": 427280 }, { "epoch": 1.6517836433641044, "grad_norm": 0.09925200045108795, "learning_rate": 0.0009552033690712821, "loss": 2.2416, "step": 427290 }, { "epoch": 1.651822300567488, "grad_norm": 0.10382227599620819, "learning_rate": 0.0009550119617909494, "loss": 2.2518, "step": 427300 }, { "epoch": 1.6518609577708712, "grad_norm": 0.11811905354261398, "learning_rate": 0.0009548205895636864, "loss": 2.2539, "step": 427310 }, { "epoch": 1.6518996149742544, "grad_norm": 0.10599834471940994, "learning_rate": 0.0009546292523702416, "loss": 2.2412, "step": 427320 }, { "epoch": 1.6519382721776377, "grad_norm": 0.09794063121080399, "learning_rate": 0.0009544379501913816, "loss": 2.2539, "step": 427330 }, { "epoch": 1.651976929381021, "grad_norm": 0.10884908586740494, "learning_rate": 0.0009542466830078904, "loss": 2.2581, "step": 427340 }, { "epoch": 1.6520155865844042, "grad_norm": 0.10881958901882172, "learning_rate": 0.0009540554508005694, "loss": 2.2551, "step": 427350 }, { "epoch": 1.6520542437877874, "grad_norm": 0.10935207456350327, "learning_rate": 0.0009538642535502384, "loss": 2.2541, "step": 427360 }, { "epoch": 1.6520929009911707, "grad_norm": 0.12142116576433182, "learning_rate": 0.0009536730912377336, "loss": 2.2406, "step": 427370 }, { "epoch": 1.652131558194554, "grad_norm": 0.10374008864164352, "learning_rate": 0.0009534819638439096, "loss": 2.2327, "step": 427380 }, { "epoch": 1.6521702153979372, "grad_norm": 0.1134655550122261, "learning_rate": 0.000953290871349638, "loss": 2.2278, "step": 427390 }, { "epoch": 1.6522088726013204, "grad_norm": 0.11115321516990662, "learning_rate": 0.000953099813735808, "loss": 2.2584, "step": 427400 }, { "epoch": 1.6522475298047037, "grad_norm": 0.10089276731014252, "learning_rate": 0.0009529087909833261, "loss": 2.2527, "step": 427410 }, { "epoch": 1.652286187008087, "grad_norm": 0.10366006195545197, "learning_rate": 0.0009527178030731163, "loss": 2.2522, "step": 427420 }, { "epoch": 1.6523248442114704, "grad_norm": 0.10919071733951569, "learning_rate": 0.0009525268499861202, "loss": 2.2589, "step": 427430 }, { "epoch": 1.6523635014148537, "grad_norm": 0.10370808839797974, "learning_rate": 0.0009523359317032964, "loss": 2.246, "step": 427440 }, { "epoch": 1.652402158618237, "grad_norm": 0.09934686124324799, "learning_rate": 0.0009521450482056211, "loss": 2.2522, "step": 427450 }, { "epoch": 1.6524408158216202, "grad_norm": 0.11634613573551178, "learning_rate": 0.0009519541994740879, "loss": 2.2676, "step": 427460 }, { "epoch": 1.6524794730250036, "grad_norm": 0.0998673290014267, "learning_rate": 0.0009517633854897072, "loss": 2.2456, "step": 427470 }, { "epoch": 1.652518130228387, "grad_norm": 0.11801506578922272, "learning_rate": 0.0009515726062335075, "loss": 2.2584, "step": 427480 }, { "epoch": 1.6525567874317701, "grad_norm": 0.12109418958425522, "learning_rate": 0.0009513818616865337, "loss": 2.2496, "step": 427490 }, { "epoch": 1.6525954446351534, "grad_norm": 0.10361652821302414, "learning_rate": 0.0009511911518298484, "loss": 2.2555, "step": 427500 }, { "epoch": 1.6526341018385367, "grad_norm": 0.0983557403087616, "learning_rate": 0.0009510004766445315, "loss": 2.2535, "step": 427510 }, { "epoch": 1.65267275904192, "grad_norm": 0.12221435457468033, "learning_rate": 0.0009508098361116799, "loss": 2.2423, "step": 427520 }, { "epoch": 1.6527114162453032, "grad_norm": 0.10871323198080063, "learning_rate": 0.0009506192302124077, "loss": 2.252, "step": 427530 }, { "epoch": 1.6527500734486864, "grad_norm": 0.09926702082157135, "learning_rate": 0.0009504286589278459, "loss": 2.2522, "step": 427540 }, { "epoch": 1.6527887306520697, "grad_norm": 0.1006474420428276, "learning_rate": 0.0009502381222391432, "loss": 2.2596, "step": 427550 }, { "epoch": 1.652827387855453, "grad_norm": 0.1122966781258583, "learning_rate": 0.0009500476201274651, "loss": 2.2436, "step": 427560 }, { "epoch": 1.6528660450588362, "grad_norm": 0.10408204793930054, "learning_rate": 0.0009498571525739938, "loss": 2.2635, "step": 427570 }, { "epoch": 1.6529047022622194, "grad_norm": 0.09867561608552933, "learning_rate": 0.0009496667195599294, "loss": 2.2467, "step": 427580 }, { "epoch": 1.6529433594656027, "grad_norm": 0.10931237787008286, "learning_rate": 0.0009494763210664883, "loss": 2.2519, "step": 427590 }, { "epoch": 1.6529820166689861, "grad_norm": 0.11653488129377365, "learning_rate": 0.0009492859570749042, "loss": 2.2308, "step": 427600 }, { "epoch": 1.6530206738723694, "grad_norm": 0.10837239027023315, "learning_rate": 0.000949095627566428, "loss": 2.2455, "step": 427610 }, { "epoch": 1.6530593310757526, "grad_norm": 0.10967964679002762, "learning_rate": 0.0009489053325223269, "loss": 2.2432, "step": 427620 }, { "epoch": 1.653097988279136, "grad_norm": 0.10297200828790665, "learning_rate": 0.0009487150719238862, "loss": 2.2671, "step": 427630 }, { "epoch": 1.6531366454825194, "grad_norm": 0.10992071777582169, "learning_rate": 0.0009485248457524068, "loss": 2.2518, "step": 427640 }, { "epoch": 1.6531753026859026, "grad_norm": 0.11099092662334442, "learning_rate": 0.0009483346539892075, "loss": 2.2612, "step": 427650 }, { "epoch": 1.6532139598892859, "grad_norm": 0.11154554784297943, "learning_rate": 0.0009481444966156237, "loss": 2.2533, "step": 427660 }, { "epoch": 1.6532526170926691, "grad_norm": 0.12147395312786102, "learning_rate": 0.0009479543736130072, "loss": 2.2529, "step": 427670 }, { "epoch": 1.6532912742960524, "grad_norm": 0.11459475755691528, "learning_rate": 0.0009477642849627277, "loss": 2.248, "step": 427680 }, { "epoch": 1.6533299314994356, "grad_norm": 0.1090463325381279, "learning_rate": 0.0009475742306461705, "loss": 2.2538, "step": 427690 }, { "epoch": 1.6533685887028189, "grad_norm": 0.1133263036608696, "learning_rate": 0.0009473842106447385, "loss": 2.2485, "step": 427700 }, { "epoch": 1.6534072459062021, "grad_norm": 0.10831756889820099, "learning_rate": 0.0009471942249398515, "loss": 2.2402, "step": 427710 }, { "epoch": 1.6534459031095854, "grad_norm": 0.11087630689144135, "learning_rate": 0.0009470042735129454, "loss": 2.2558, "step": 427720 }, { "epoch": 1.6534845603129686, "grad_norm": 0.12291962653398514, "learning_rate": 0.0009468143563454732, "loss": 2.2593, "step": 427730 }, { "epoch": 1.653523217516352, "grad_norm": 0.11833938956260681, "learning_rate": 0.0009466244734189046, "loss": 2.2444, "step": 427740 }, { "epoch": 1.6535618747197351, "grad_norm": 0.1083853617310524, "learning_rate": 0.0009464346247147261, "loss": 2.2469, "step": 427750 }, { "epoch": 1.6536005319231184, "grad_norm": 0.10361374914646149, "learning_rate": 0.0009462448102144407, "loss": 2.249, "step": 427760 }, { "epoch": 1.6536391891265019, "grad_norm": 0.10491149872541428, "learning_rate": 0.0009460550298995683, "loss": 2.2537, "step": 427770 }, { "epoch": 1.6536778463298851, "grad_norm": 0.11759241670370102, "learning_rate": 0.0009458652837516451, "loss": 2.2442, "step": 427780 }, { "epoch": 1.6537165035332684, "grad_norm": 0.13166950643062592, "learning_rate": 0.0009456755717522239, "loss": 2.2472, "step": 427790 }, { "epoch": 1.6537551607366516, "grad_norm": 0.1135098785161972, "learning_rate": 0.0009454858938828745, "loss": 2.2581, "step": 427800 }, { "epoch": 1.653793817940035, "grad_norm": 0.10833874344825745, "learning_rate": 0.0009452962501251831, "loss": 2.2566, "step": 427810 }, { "epoch": 1.6538324751434184, "grad_norm": 0.10796923190355301, "learning_rate": 0.0009451066404607525, "loss": 2.2464, "step": 427820 }, { "epoch": 1.6538711323468016, "grad_norm": 0.11246345937252045, "learning_rate": 0.0009449170648712018, "loss": 2.2557, "step": 427830 }, { "epoch": 1.6539097895501849, "grad_norm": 0.11669411510229111, "learning_rate": 0.0009447275233381664, "loss": 2.2483, "step": 427840 }, { "epoch": 1.6539484467535681, "grad_norm": 0.10845163464546204, "learning_rate": 0.000944538015843299, "loss": 2.2599, "step": 427850 }, { "epoch": 1.6539871039569514, "grad_norm": 0.11250095069408417, "learning_rate": 0.0009443485423682683, "loss": 2.2588, "step": 427860 }, { "epoch": 1.6540257611603346, "grad_norm": 0.10398449748754501, "learning_rate": 0.0009441591028947592, "loss": 2.2561, "step": 427870 }, { "epoch": 1.6540644183637179, "grad_norm": 0.10691075772047043, "learning_rate": 0.0009439696974044732, "loss": 2.2514, "step": 427880 }, { "epoch": 1.6541030755671011, "grad_norm": 0.1094600111246109, "learning_rate": 0.0009437803258791287, "loss": 2.2392, "step": 427890 }, { "epoch": 1.6541417327704844, "grad_norm": 0.1066112220287323, "learning_rate": 0.0009435909883004594, "loss": 2.2663, "step": 427900 }, { "epoch": 1.6541803899738676, "grad_norm": 0.11026117205619812, "learning_rate": 0.0009434016846502167, "loss": 2.2482, "step": 427910 }, { "epoch": 1.6542190471772509, "grad_norm": 0.12330233305692673, "learning_rate": 0.0009432124149101676, "loss": 2.2624, "step": 427920 }, { "epoch": 1.6542577043806341, "grad_norm": 0.10916101187467575, "learning_rate": 0.0009430231790620951, "loss": 2.2532, "step": 427930 }, { "epoch": 1.6542963615840176, "grad_norm": 0.11618591845035553, "learning_rate": 0.0009428339770877991, "loss": 2.2408, "step": 427940 }, { "epoch": 1.6543350187874009, "grad_norm": 0.10629193484783173, "learning_rate": 0.0009426448089690958, "loss": 2.256, "step": 427950 }, { "epoch": 1.654373675990784, "grad_norm": 0.11059808731079102, "learning_rate": 0.0009424556746878173, "loss": 2.264, "step": 427960 }, { "epoch": 1.6544123331941674, "grad_norm": 0.11043719202280045, "learning_rate": 0.0009422665742258118, "loss": 2.2513, "step": 427970 }, { "epoch": 1.6544509903975508, "grad_norm": 0.1055753082036972, "learning_rate": 0.0009420775075649445, "loss": 2.2552, "step": 427980 }, { "epoch": 1.654489647600934, "grad_norm": 0.1074535995721817, "learning_rate": 0.0009418884746870962, "loss": 2.2654, "step": 427990 }, { "epoch": 1.6545283048043173, "grad_norm": 0.09793099015951157, "learning_rate": 0.0009416994755741637, "loss": 2.2577, "step": 428000 }, { "epoch": 1.6545669620077006, "grad_norm": 0.09761539101600647, "learning_rate": 0.0009415105102080608, "loss": 2.2593, "step": 428010 }, { "epoch": 1.6546056192110838, "grad_norm": 0.1087460145354271, "learning_rate": 0.0009413215785707164, "loss": 2.2625, "step": 428020 }, { "epoch": 1.654644276414467, "grad_norm": 0.11112965643405914, "learning_rate": 0.0009411326806440761, "loss": 2.2731, "step": 428030 }, { "epoch": 1.6546829336178503, "grad_norm": 0.10286249965429306, "learning_rate": 0.0009409438164101018, "loss": 2.2389, "step": 428040 }, { "epoch": 1.6547215908212336, "grad_norm": 0.1073416993021965, "learning_rate": 0.0009407549858507712, "loss": 2.2416, "step": 428050 }, { "epoch": 1.6547602480246169, "grad_norm": 0.09992457181215286, "learning_rate": 0.0009405661889480779, "loss": 2.2534, "step": 428060 }, { "epoch": 1.654798905228, "grad_norm": 0.113508440554142, "learning_rate": 0.0009403774256840316, "loss": 2.2542, "step": 428070 }, { "epoch": 1.6548375624313834, "grad_norm": 0.1302674412727356, "learning_rate": 0.0009401886960406585, "loss": 2.2504, "step": 428080 }, { "epoch": 1.6548762196347666, "grad_norm": 0.11876347661018372, "learning_rate": 0.00094, "loss": 2.2503, "step": 428090 }, { "epoch": 1.6549148768381499, "grad_norm": 0.12014816701412201, "learning_rate": 0.0009398113375441142, "loss": 2.2484, "step": 428100 }, { "epoch": 1.6549535340415333, "grad_norm": 0.11447982490062714, "learning_rate": 0.000939622708655075, "loss": 2.2527, "step": 428110 }, { "epoch": 1.6549921912449166, "grad_norm": 0.11171301454305649, "learning_rate": 0.0009394341133149717, "loss": 2.2517, "step": 428120 }, { "epoch": 1.6550308484482998, "grad_norm": 0.11847539991140366, "learning_rate": 0.0009392455515059106, "loss": 2.2573, "step": 428130 }, { "epoch": 1.655069505651683, "grad_norm": 0.12345657497644424, "learning_rate": 0.0009390570232100127, "loss": 2.2517, "step": 428140 }, { "epoch": 1.6551081628550666, "grad_norm": 0.23822791874408722, "learning_rate": 0.0009388685284094154, "loss": 2.264, "step": 428150 }, { "epoch": 1.6551468200584498, "grad_norm": 0.12832707166671753, "learning_rate": 0.0009386800670862721, "loss": 2.2367, "step": 428160 }, { "epoch": 1.655185477261833, "grad_norm": 0.12387876212596893, "learning_rate": 0.000938491639222752, "loss": 2.2495, "step": 428170 }, { "epoch": 1.6552241344652163, "grad_norm": 0.11859184503555298, "learning_rate": 0.0009383032448010403, "loss": 2.253, "step": 428180 }, { "epoch": 1.6552627916685996, "grad_norm": 0.1142348125576973, "learning_rate": 0.0009381148838033374, "loss": 2.2502, "step": 428190 }, { "epoch": 1.6553014488719828, "grad_norm": 0.10157260298728943, "learning_rate": 0.0009379265562118597, "loss": 2.2477, "step": 428200 }, { "epoch": 1.655340106075366, "grad_norm": 0.10549125075340271, "learning_rate": 0.0009377382620088399, "loss": 2.254, "step": 428210 }, { "epoch": 1.6553787632787493, "grad_norm": 0.1005273088812828, "learning_rate": 0.0009375500011765259, "loss": 2.2603, "step": 428220 }, { "epoch": 1.6554174204821326, "grad_norm": 0.11275097727775574, "learning_rate": 0.0009373617736971816, "loss": 2.2494, "step": 428230 }, { "epoch": 1.6554560776855158, "grad_norm": 0.11196176707744598, "learning_rate": 0.0009371735795530862, "loss": 2.2549, "step": 428240 }, { "epoch": 1.655494734888899, "grad_norm": 0.11383403837680817, "learning_rate": 0.0009369854187265352, "loss": 2.2607, "step": 428250 }, { "epoch": 1.6555333920922823, "grad_norm": 0.1119704395532608, "learning_rate": 0.000936797291199839, "loss": 2.2554, "step": 428260 }, { "epoch": 1.6555720492956656, "grad_norm": 0.1140163317322731, "learning_rate": 0.0009366091969553246, "loss": 2.2582, "step": 428270 }, { "epoch": 1.655610706499049, "grad_norm": 0.1339382827281952, "learning_rate": 0.0009364211359753336, "loss": 2.249, "step": 428280 }, { "epoch": 1.6556493637024323, "grad_norm": 0.11727656424045563, "learning_rate": 0.000936233108242224, "loss": 2.2539, "step": 428290 }, { "epoch": 1.6556880209058156, "grad_norm": 0.1095530167222023, "learning_rate": 0.0009360451137383692, "loss": 2.2535, "step": 428300 }, { "epoch": 1.6557266781091988, "grad_norm": 0.11644624918699265, "learning_rate": 0.0009358571524461577, "loss": 2.2363, "step": 428310 }, { "epoch": 1.6557653353125823, "grad_norm": 0.11836208403110504, "learning_rate": 0.0009356692243479943, "loss": 2.2377, "step": 428320 }, { "epoch": 1.6558039925159656, "grad_norm": 0.11358748376369476, "learning_rate": 0.0009354813294262989, "loss": 2.2457, "step": 428330 }, { "epoch": 1.6558426497193488, "grad_norm": 0.10452094674110413, "learning_rate": 0.0009352934676635068, "loss": 2.2444, "step": 428340 }, { "epoch": 1.655881306922732, "grad_norm": 0.12100542336702347, "learning_rate": 0.0009351056390420691, "loss": 2.2365, "step": 428350 }, { "epoch": 1.6559199641261153, "grad_norm": 0.12400267273187637, "learning_rate": 0.0009349178435444521, "loss": 2.265, "step": 428360 }, { "epoch": 1.6559586213294986, "grad_norm": 0.10073690861463547, "learning_rate": 0.0009347300811531381, "loss": 2.2545, "step": 428370 }, { "epoch": 1.6559972785328818, "grad_norm": 0.11887051910161972, "learning_rate": 0.0009345423518506237, "loss": 2.2383, "step": 428380 }, { "epoch": 1.656035935736265, "grad_norm": 0.10939810425043106, "learning_rate": 0.0009343546556194224, "loss": 2.2501, "step": 428390 }, { "epoch": 1.6560745929396483, "grad_norm": 0.11234070360660553, "learning_rate": 0.000934166992442062, "loss": 2.267, "step": 428400 }, { "epoch": 1.6561132501430316, "grad_norm": 0.10380993783473969, "learning_rate": 0.0009339793623010858, "loss": 2.2573, "step": 428410 }, { "epoch": 1.6561519073464148, "grad_norm": 0.11642767488956451, "learning_rate": 0.0009337917651790528, "loss": 2.2488, "step": 428420 }, { "epoch": 1.656190564549798, "grad_norm": 0.1705406755208969, "learning_rate": 0.0009336042010585375, "loss": 2.262, "step": 428430 }, { "epoch": 1.6562292217531813, "grad_norm": 0.10769027471542358, "learning_rate": 0.0009334166699221293, "loss": 2.2575, "step": 428440 }, { "epoch": 1.6562678789565648, "grad_norm": 0.12309370189905167, "learning_rate": 0.0009332291717524331, "loss": 2.2636, "step": 428450 }, { "epoch": 1.656306536159948, "grad_norm": 0.1017761379480362, "learning_rate": 0.0009330417065320689, "loss": 2.2526, "step": 428460 }, { "epoch": 1.6563451933633313, "grad_norm": 0.10486218333244324, "learning_rate": 0.000932854274243672, "loss": 2.2651, "step": 428470 }, { "epoch": 1.6563838505667146, "grad_norm": 0.10576413571834564, "learning_rate": 0.0009326668748698934, "loss": 2.2361, "step": 428480 }, { "epoch": 1.656422507770098, "grad_norm": 0.11108098179101944, "learning_rate": 0.0009324795083933986, "loss": 2.2508, "step": 428490 }, { "epoch": 1.6564611649734813, "grad_norm": 0.11978644132614136, "learning_rate": 0.0009322921747968689, "loss": 2.2496, "step": 428500 }, { "epoch": 1.6564998221768645, "grad_norm": 0.1151486188173294, "learning_rate": 0.0009321048740630005, "loss": 2.2483, "step": 428510 }, { "epoch": 1.6565384793802478, "grad_norm": 0.11211903393268585, "learning_rate": 0.0009319176061745049, "loss": 2.2325, "step": 428520 }, { "epoch": 1.656577136583631, "grad_norm": 0.10804494470357895, "learning_rate": 0.0009317303711141087, "loss": 2.2513, "step": 428530 }, { "epoch": 1.6566157937870143, "grad_norm": 0.10815191268920898, "learning_rate": 0.0009315431688645535, "loss": 2.2588, "step": 428540 }, { "epoch": 1.6566544509903975, "grad_norm": 0.10301661491394043, "learning_rate": 0.0009313559994085963, "loss": 2.2488, "step": 428550 }, { "epoch": 1.6566931081937808, "grad_norm": 0.1148388609290123, "learning_rate": 0.0009311688627290089, "loss": 2.2533, "step": 428560 }, { "epoch": 1.656731765397164, "grad_norm": 0.11594461649656296, "learning_rate": 0.0009309817588085787, "loss": 2.244, "step": 428570 }, { "epoch": 1.6567704226005473, "grad_norm": 0.11438317596912384, "learning_rate": 0.0009307946876301072, "loss": 2.2396, "step": 428580 }, { "epoch": 1.6568090798039306, "grad_norm": 0.10656829923391342, "learning_rate": 0.0009306076491764119, "loss": 2.2418, "step": 428590 }, { "epoch": 1.6568477370073138, "grad_norm": 0.10780458152294159, "learning_rate": 0.000930420643430325, "loss": 2.249, "step": 428600 }, { "epoch": 1.656886394210697, "grad_norm": 0.10777982324361801, "learning_rate": 0.0009302336703746934, "loss": 2.2533, "step": 428610 }, { "epoch": 1.6569250514140805, "grad_norm": 0.1133468970656395, "learning_rate": 0.0009300467299923796, "loss": 2.2359, "step": 428620 }, { "epoch": 1.6569637086174638, "grad_norm": 0.1100376546382904, "learning_rate": 0.0009298598222662604, "loss": 2.2605, "step": 428630 }, { "epoch": 1.657002365820847, "grad_norm": 0.10588014870882034, "learning_rate": 0.0009296729471792278, "loss": 2.2534, "step": 428640 }, { "epoch": 1.6570410230242303, "grad_norm": 0.12535402178764343, "learning_rate": 0.0009294861047141891, "loss": 2.2533, "step": 428650 }, { "epoch": 1.6570796802276138, "grad_norm": 0.1060071587562561, "learning_rate": 0.0009292992948540662, "loss": 2.2553, "step": 428660 }, { "epoch": 1.657118337430997, "grad_norm": 0.11963797360658646, "learning_rate": 0.0009291125175817956, "loss": 2.248, "step": 428670 }, { "epoch": 1.6571569946343803, "grad_norm": 0.11207103729248047, "learning_rate": 0.0009289257728803293, "loss": 2.2429, "step": 428680 }, { "epoch": 1.6571956518377635, "grad_norm": 0.11880432069301605, "learning_rate": 0.0009287390607326336, "loss": 2.2482, "step": 428690 }, { "epoch": 1.6572343090411468, "grad_norm": 0.11663631349802017, "learning_rate": 0.0009285523811216902, "loss": 2.251, "step": 428700 }, { "epoch": 1.65727296624453, "grad_norm": 0.10760597884654999, "learning_rate": 0.0009283657340304949, "loss": 2.245, "step": 428710 }, { "epoch": 1.6573116234479133, "grad_norm": 0.10502638667821884, "learning_rate": 0.000928179119442059, "loss": 2.2628, "step": 428720 }, { "epoch": 1.6573502806512965, "grad_norm": 0.12468273192644119, "learning_rate": 0.0009279925373394082, "loss": 2.2573, "step": 428730 }, { "epoch": 1.6573889378546798, "grad_norm": 0.1035030260682106, "learning_rate": 0.0009278059877055833, "loss": 2.2489, "step": 428740 }, { "epoch": 1.657427595058063, "grad_norm": 0.12911434471607208, "learning_rate": 0.0009276194705236391, "loss": 2.2567, "step": 428750 }, { "epoch": 1.6574662522614463, "grad_norm": 0.10658340156078339, "learning_rate": 0.0009274329857766461, "loss": 2.254, "step": 428760 }, { "epoch": 1.6575049094648295, "grad_norm": 0.12113990634679794, "learning_rate": 0.0009272465334476892, "loss": 2.2473, "step": 428770 }, { "epoch": 1.657543566668213, "grad_norm": 0.11048426479101181, "learning_rate": 0.0009270601135198673, "loss": 2.2467, "step": 428780 }, { "epoch": 1.6575822238715963, "grad_norm": 0.11958407610654831, "learning_rate": 0.000926873725976295, "loss": 2.245, "step": 428790 }, { "epoch": 1.6576208810749795, "grad_norm": 0.11110121756792068, "learning_rate": 0.000926687370800101, "loss": 2.2709, "step": 428800 }, { "epoch": 1.6576595382783628, "grad_norm": 0.1004750207066536, "learning_rate": 0.0009265010479744287, "loss": 2.2589, "step": 428810 }, { "epoch": 1.657698195481746, "grad_norm": 0.10071204602718353, "learning_rate": 0.0009263147574824362, "loss": 2.2381, "step": 428820 }, { "epoch": 1.6577368526851295, "grad_norm": 0.10761163383722305, "learning_rate": 0.000926128499307296, "loss": 2.2352, "step": 428830 }, { "epoch": 1.6577755098885127, "grad_norm": 0.1146104633808136, "learning_rate": 0.0009259422734321959, "loss": 2.2684, "step": 428840 }, { "epoch": 1.657814167091896, "grad_norm": 0.11707509309053421, "learning_rate": 0.0009257560798403373, "loss": 2.2539, "step": 428850 }, { "epoch": 1.6578528242952792, "grad_norm": 0.11978209018707275, "learning_rate": 0.0009255699185149366, "loss": 2.2507, "step": 428860 }, { "epoch": 1.6578914814986625, "grad_norm": 0.11238358169794083, "learning_rate": 0.0009253837894392249, "loss": 2.2411, "step": 428870 }, { "epoch": 1.6579301387020458, "grad_norm": 0.12410169094800949, "learning_rate": 0.0009251976925964479, "loss": 2.2599, "step": 428880 }, { "epoch": 1.657968795905429, "grad_norm": 0.1125243753194809, "learning_rate": 0.000925011627969865, "loss": 2.2505, "step": 428890 }, { "epoch": 1.6580074531088123, "grad_norm": 0.11119139194488525, "learning_rate": 0.0009248255955427512, "loss": 2.2575, "step": 428900 }, { "epoch": 1.6580461103121955, "grad_norm": 0.11171048879623413, "learning_rate": 0.0009246395952983948, "loss": 2.2559, "step": 428910 }, { "epoch": 1.6580847675155788, "grad_norm": 0.10550161451101303, "learning_rate": 0.0009244536272200999, "loss": 2.253, "step": 428920 }, { "epoch": 1.658123424718962, "grad_norm": 0.12499670684337616, "learning_rate": 0.0009242676912911838, "loss": 2.2507, "step": 428930 }, { "epoch": 1.6581620819223453, "grad_norm": 0.1260816603899002, "learning_rate": 0.0009240817874949787, "loss": 2.2499, "step": 428940 }, { "epoch": 1.6582007391257287, "grad_norm": 0.11161303520202637, "learning_rate": 0.0009238959158148317, "loss": 2.2466, "step": 428950 }, { "epoch": 1.658239396329112, "grad_norm": 0.10817040503025055, "learning_rate": 0.0009237100762341031, "loss": 2.2437, "step": 428960 }, { "epoch": 1.6582780535324952, "grad_norm": 0.10055804997682571, "learning_rate": 0.000923524268736169, "loss": 2.2603, "step": 428970 }, { "epoch": 1.6583167107358785, "grad_norm": 0.10730566084384918, "learning_rate": 0.0009233384933044185, "loss": 2.2334, "step": 428980 }, { "epoch": 1.6583553679392617, "grad_norm": 0.11016399413347244, "learning_rate": 0.0009231527499222556, "loss": 2.2524, "step": 428990 }, { "epoch": 1.6583940251426452, "grad_norm": 0.1141536608338356, "learning_rate": 0.0009229670385730993, "loss": 2.2545, "step": 429000 }, { "epoch": 1.6584326823460285, "grad_norm": 0.10983778536319733, "learning_rate": 0.0009227813592403815, "loss": 2.2329, "step": 429010 }, { "epoch": 1.6584713395494117, "grad_norm": 0.10804615169763565, "learning_rate": 0.0009225957119075494, "loss": 2.2365, "step": 429020 }, { "epoch": 1.658509996752795, "grad_norm": 0.11451501399278641, "learning_rate": 0.0009224100965580644, "loss": 2.2429, "step": 429030 }, { "epoch": 1.6585486539561782, "grad_norm": 0.11562760919332504, "learning_rate": 0.0009222245131754017, "loss": 2.2622, "step": 429040 }, { "epoch": 1.6585873111595615, "grad_norm": 0.11589565873146057, "learning_rate": 0.0009220389617430509, "loss": 2.2514, "step": 429050 }, { "epoch": 1.6586259683629447, "grad_norm": 0.11859513819217682, "learning_rate": 0.0009218534422445155, "loss": 2.2567, "step": 429060 }, { "epoch": 1.658664625566328, "grad_norm": 0.11401848495006561, "learning_rate": 0.0009216679546633143, "loss": 2.254, "step": 429070 }, { "epoch": 1.6587032827697112, "grad_norm": 0.10089422017335892, "learning_rate": 0.0009214824989829789, "loss": 2.2438, "step": 429080 }, { "epoch": 1.6587419399730945, "grad_norm": 0.10089923441410065, "learning_rate": 0.000921297075187056, "loss": 2.255, "step": 429090 }, { "epoch": 1.6587805971764777, "grad_norm": 0.11789452284574509, "learning_rate": 0.0009211116832591057, "loss": 2.2382, "step": 429100 }, { "epoch": 1.658819254379861, "grad_norm": 0.10543384402990341, "learning_rate": 0.0009209263231827032, "loss": 2.2512, "step": 429110 }, { "epoch": 1.6588579115832445, "grad_norm": 0.11147383600473404, "learning_rate": 0.0009207409949414367, "loss": 2.2502, "step": 429120 }, { "epoch": 1.6588965687866277, "grad_norm": 0.12240874022245407, "learning_rate": 0.0009205556985189091, "loss": 2.2401, "step": 429130 }, { "epoch": 1.658935225990011, "grad_norm": 0.10780297964811325, "learning_rate": 0.0009203704338987378, "loss": 2.2508, "step": 429140 }, { "epoch": 1.6589738831933942, "grad_norm": 0.10800083726644516, "learning_rate": 0.000920185201064553, "loss": 2.2502, "step": 429150 }, { "epoch": 1.6590125403967777, "grad_norm": 0.11921687424182892, "learning_rate": 0.0009199999999999999, "loss": 2.2589, "step": 429160 }, { "epoch": 1.659051197600161, "grad_norm": 0.11528116464614868, "learning_rate": 0.000919814830688738, "loss": 2.261, "step": 429170 }, { "epoch": 1.6590898548035442, "grad_norm": 0.10715014487504959, "learning_rate": 0.0009196296931144397, "loss": 2.2543, "step": 429180 }, { "epoch": 1.6591285120069275, "grad_norm": 0.15646180510520935, "learning_rate": 0.0009194445872607921, "loss": 2.2577, "step": 429190 }, { "epoch": 1.6591671692103107, "grad_norm": 0.12420929223299026, "learning_rate": 0.0009192595131114965, "loss": 2.2586, "step": 429200 }, { "epoch": 1.659205826413694, "grad_norm": 0.11077667772769928, "learning_rate": 0.0009190744706502672, "loss": 2.2467, "step": 429210 }, { "epoch": 1.6592444836170772, "grad_norm": 0.11203381419181824, "learning_rate": 0.0009188894598608338, "loss": 2.2385, "step": 429220 }, { "epoch": 1.6592831408204605, "grad_norm": 0.12216757982969284, "learning_rate": 0.0009187044807269385, "loss": 2.2625, "step": 429230 }, { "epoch": 1.6593217980238437, "grad_norm": 0.5819114446640015, "learning_rate": 0.0009185195332323382, "loss": 2.2358, "step": 429240 }, { "epoch": 1.659360455227227, "grad_norm": 0.12224812805652618, "learning_rate": 0.0009183346173608034, "loss": 2.2408, "step": 429250 }, { "epoch": 1.6593991124306102, "grad_norm": 0.112460657954216, "learning_rate": 0.0009181497330961184, "loss": 2.2388, "step": 429260 }, { "epoch": 1.6594377696339935, "grad_norm": 0.10450280457735062, "learning_rate": 0.0009179648804220817, "loss": 2.2453, "step": 429270 }, { "epoch": 1.6594764268373767, "grad_norm": 0.10354512929916382, "learning_rate": 0.0009177800593225054, "loss": 2.2328, "step": 429280 }, { "epoch": 1.6595150840407602, "grad_norm": 0.10230404138565063, "learning_rate": 0.0009175952697812154, "loss": 2.2497, "step": 429290 }, { "epoch": 1.6595537412441435, "grad_norm": 0.11147142946720123, "learning_rate": 0.0009174105117820514, "loss": 2.2462, "step": 429300 }, { "epoch": 1.6595923984475267, "grad_norm": 0.10749982297420502, "learning_rate": 0.0009172257853088668, "loss": 2.2448, "step": 429310 }, { "epoch": 1.65963105565091, "grad_norm": 0.11876123398542404, "learning_rate": 0.0009170410903455293, "loss": 2.2617, "step": 429320 }, { "epoch": 1.6596697128542934, "grad_norm": 0.09572720527648926, "learning_rate": 0.0009168564268759197, "loss": 2.2545, "step": 429330 }, { "epoch": 1.6597083700576767, "grad_norm": 0.11779268831014633, "learning_rate": 0.0009166717948839327, "loss": 2.2728, "step": 429340 }, { "epoch": 1.65974702726106, "grad_norm": 0.10509568452835083, "learning_rate": 0.0009164871943534769, "loss": 2.2402, "step": 429350 }, { "epoch": 1.6597856844644432, "grad_norm": 0.11478833109140396, "learning_rate": 0.0009163026252684747, "loss": 2.2635, "step": 429360 }, { "epoch": 1.6598243416678264, "grad_norm": 0.11095292121171951, "learning_rate": 0.0009161180876128619, "loss": 2.2436, "step": 429370 }, { "epoch": 1.6598629988712097, "grad_norm": 0.10812583565711975, "learning_rate": 0.0009159335813705878, "loss": 2.2479, "step": 429380 }, { "epoch": 1.659901656074593, "grad_norm": 0.10396292060613632, "learning_rate": 0.000915749106525616, "loss": 2.2489, "step": 429390 }, { "epoch": 1.6599403132779762, "grad_norm": 0.10780052095651627, "learning_rate": 0.0009155646630619234, "loss": 2.2573, "step": 429400 }, { "epoch": 1.6599789704813595, "grad_norm": 0.10211651772260666, "learning_rate": 0.0009153802509635001, "loss": 2.249, "step": 429410 }, { "epoch": 1.6600176276847427, "grad_norm": 0.11910141259431839, "learning_rate": 0.0009151958702143506, "loss": 2.2429, "step": 429420 }, { "epoch": 1.660056284888126, "grad_norm": 0.12283129245042801, "learning_rate": 0.0009150115207984926, "loss": 2.2516, "step": 429430 }, { "epoch": 1.6600949420915092, "grad_norm": 0.11943577229976654, "learning_rate": 0.0009148272026999571, "loss": 2.2532, "step": 429440 }, { "epoch": 1.6601335992948925, "grad_norm": 0.10331227630376816, "learning_rate": 0.0009146429159027892, "loss": 2.2506, "step": 429450 }, { "epoch": 1.660172256498276, "grad_norm": 0.10485690832138062, "learning_rate": 0.0009144586603910472, "loss": 2.2521, "step": 429460 }, { "epoch": 1.6602109137016592, "grad_norm": 0.10611864179372787, "learning_rate": 0.0009142744361488029, "loss": 2.2539, "step": 429470 }, { "epoch": 1.6602495709050424, "grad_norm": 0.10822539776563644, "learning_rate": 0.0009140902431601419, "loss": 2.2417, "step": 429480 }, { "epoch": 1.6602882281084257, "grad_norm": 0.12675762176513672, "learning_rate": 0.0009139060814091629, "loss": 2.2652, "step": 429490 }, { "epoch": 1.6603268853118092, "grad_norm": 0.1188879907131195, "learning_rate": 0.0009137219508799785, "loss": 2.247, "step": 429500 }, { "epoch": 1.6603655425151924, "grad_norm": 0.10695726424455643, "learning_rate": 0.0009135378515567145, "loss": 2.2492, "step": 429510 }, { "epoch": 1.6604041997185757, "grad_norm": 0.1097661629319191, "learning_rate": 0.00091335378342351, "loss": 2.2508, "step": 429520 }, { "epoch": 1.660442856921959, "grad_norm": 0.11859133839607239, "learning_rate": 0.0009131697464645182, "loss": 2.2535, "step": 429530 }, { "epoch": 1.6604815141253422, "grad_norm": 0.11683262139558792, "learning_rate": 0.0009129857406639046, "loss": 2.2557, "step": 429540 }, { "epoch": 1.6605201713287254, "grad_norm": 0.10748874396085739, "learning_rate": 0.0009128017660058494, "loss": 2.2361, "step": 429550 }, { "epoch": 1.6605588285321087, "grad_norm": 0.11847436428070068, "learning_rate": 0.0009126178224745452, "loss": 2.2459, "step": 429560 }, { "epoch": 1.660597485735492, "grad_norm": 0.10861270874738693, "learning_rate": 0.000912433910054198, "loss": 2.2419, "step": 429570 }, { "epoch": 1.6606361429388752, "grad_norm": 0.10589105635881424, "learning_rate": 0.0009122500287290282, "loss": 2.243, "step": 429580 }, { "epoch": 1.6606748001422584, "grad_norm": 0.10588544607162476, "learning_rate": 0.0009120661784832682, "loss": 2.2527, "step": 429590 }, { "epoch": 1.6607134573456417, "grad_norm": 0.10675523430109024, "learning_rate": 0.0009118823593011645, "loss": 2.2518, "step": 429600 }, { "epoch": 1.660752114549025, "grad_norm": 0.12099213898181915, "learning_rate": 0.0009116985711669767, "loss": 2.2574, "step": 429610 }, { "epoch": 1.6607907717524082, "grad_norm": 0.10642857104539871, "learning_rate": 0.0009115148140649778, "loss": 2.2414, "step": 429620 }, { "epoch": 1.6608294289557917, "grad_norm": 0.11251429468393326, "learning_rate": 0.0009113310879794536, "loss": 2.2461, "step": 429630 }, { "epoch": 1.660868086159175, "grad_norm": 0.12505125999450684, "learning_rate": 0.000911147392894704, "loss": 2.2353, "step": 429640 }, { "epoch": 1.6609067433625582, "grad_norm": 0.11787581443786621, "learning_rate": 0.0009109637287950414, "loss": 2.2494, "step": 429650 }, { "epoch": 1.6609454005659414, "grad_norm": 0.10979120433330536, "learning_rate": 0.0009107800956647918, "loss": 2.2496, "step": 429660 }, { "epoch": 1.660984057769325, "grad_norm": 0.11420413851737976, "learning_rate": 0.0009105964934882942, "loss": 2.2467, "step": 429670 }, { "epoch": 1.6610227149727081, "grad_norm": 0.12395305186510086, "learning_rate": 0.0009104129222499011, "loss": 2.2687, "step": 429680 }, { "epoch": 1.6610613721760914, "grad_norm": 0.10555867850780487, "learning_rate": 0.0009102293819339777, "loss": 2.251, "step": 429690 }, { "epoch": 1.6611000293794747, "grad_norm": 0.10683988779783249, "learning_rate": 0.000910045872524903, "loss": 2.2562, "step": 429700 }, { "epoch": 1.661138686582858, "grad_norm": 0.10842365026473999, "learning_rate": 0.0009098623940070685, "loss": 2.2396, "step": 429710 }, { "epoch": 1.6611773437862412, "grad_norm": 0.10488323122262955, "learning_rate": 0.0009096789463648792, "loss": 2.2285, "step": 429720 }, { "epoch": 1.6612160009896244, "grad_norm": 0.11940070241689682, "learning_rate": 0.0009094955295827532, "loss": 2.242, "step": 429730 }, { "epoch": 1.6612546581930077, "grad_norm": 0.10247818380594254, "learning_rate": 0.0009093121436451215, "loss": 2.2618, "step": 429740 }, { "epoch": 1.661293315396391, "grad_norm": 0.11934773623943329, "learning_rate": 0.0009091287885364286, "loss": 2.2557, "step": 429750 }, { "epoch": 1.6613319725997742, "grad_norm": 0.10337629169225693, "learning_rate": 0.0009089454642411316, "loss": 2.2742, "step": 429760 }, { "epoch": 1.6613706298031574, "grad_norm": 0.10209142416715622, "learning_rate": 0.0009087621707437008, "loss": 2.2409, "step": 429770 }, { "epoch": 1.6614092870065407, "grad_norm": 0.1119256392121315, "learning_rate": 0.0009085789080286198, "loss": 2.2386, "step": 429780 }, { "epoch": 1.661447944209924, "grad_norm": 0.11805194616317749, "learning_rate": 0.0009083956760803849, "loss": 2.2608, "step": 429790 }, { "epoch": 1.6614866014133074, "grad_norm": 0.10996301472187042, "learning_rate": 0.0009082124748835057, "loss": 2.2548, "step": 429800 }, { "epoch": 1.6615252586166906, "grad_norm": 0.10933899879455566, "learning_rate": 0.0009080293044225043, "loss": 2.2535, "step": 429810 }, { "epoch": 1.661563915820074, "grad_norm": 0.11633667349815369, "learning_rate": 0.0009078461646819162, "loss": 2.2526, "step": 429820 }, { "epoch": 1.6616025730234572, "grad_norm": 0.11690562963485718, "learning_rate": 0.0009076630556462902, "loss": 2.2501, "step": 429830 }, { "epoch": 1.6616412302268406, "grad_norm": 0.10380557924509048, "learning_rate": 0.000907479977300187, "loss": 2.2453, "step": 429840 }, { "epoch": 1.6616798874302239, "grad_norm": 0.11537251621484756, "learning_rate": 0.0009072969296281812, "loss": 2.2411, "step": 429850 }, { "epoch": 1.6617185446336071, "grad_norm": 0.10554520785808563, "learning_rate": 0.00090711391261486, "loss": 2.2456, "step": 429860 }, { "epoch": 1.6617572018369904, "grad_norm": 0.11479167640209198, "learning_rate": 0.0009069309262448233, "loss": 2.249, "step": 429870 }, { "epoch": 1.6617958590403736, "grad_norm": 0.12557867169380188, "learning_rate": 0.0009067479705026842, "loss": 2.2555, "step": 429880 }, { "epoch": 1.6618345162437569, "grad_norm": 0.10398616641759872, "learning_rate": 0.0009065650453730685, "loss": 2.2422, "step": 429890 }, { "epoch": 1.6618731734471401, "grad_norm": 0.10483341664075851, "learning_rate": 0.0009063821508406147, "loss": 2.2538, "step": 429900 }, { "epoch": 1.6619118306505234, "grad_norm": 0.11726875603199005, "learning_rate": 0.0009061992868899747, "loss": 2.2467, "step": 429910 }, { "epoch": 1.6619504878539066, "grad_norm": 0.11767543852329254, "learning_rate": 0.0009060164535058126, "loss": 2.2571, "step": 429920 }, { "epoch": 1.66198914505729, "grad_norm": 0.14433105289936066, "learning_rate": 0.0009058336506728056, "loss": 2.246, "step": 429930 }, { "epoch": 1.6620278022606731, "grad_norm": 0.11061185598373413, "learning_rate": 0.0009056508783756438, "loss": 2.2666, "step": 429940 }, { "epoch": 1.6620664594640564, "grad_norm": 0.11711091548204422, "learning_rate": 0.0009054681365990298, "loss": 2.2388, "step": 429950 }, { "epoch": 1.6621051166674397, "grad_norm": 0.11604579538106918, "learning_rate": 0.0009052854253276795, "loss": 2.2553, "step": 429960 }, { "epoch": 1.6621437738708231, "grad_norm": 0.11772461980581284, "learning_rate": 0.0009051027445463205, "loss": 2.2394, "step": 429970 }, { "epoch": 1.6621824310742064, "grad_norm": 0.12372942268848419, "learning_rate": 0.0009049200942396942, "loss": 2.2579, "step": 429980 }, { "epoch": 1.6622210882775896, "grad_norm": 0.10607621818780899, "learning_rate": 0.0009047374743925547, "loss": 2.2435, "step": 429990 }, { "epoch": 1.6622597454809729, "grad_norm": 0.09799272567033768, "learning_rate": 0.0009045548849896678, "loss": 2.2502, "step": 430000 }, { "epoch": 1.6622984026843564, "grad_norm": 0.11451854556798935, "learning_rate": 0.0009043723260158129, "loss": 2.2488, "step": 430010 }, { "epoch": 1.6623370598877396, "grad_norm": 0.10894210636615753, "learning_rate": 0.000904189797455782, "loss": 2.2416, "step": 430020 }, { "epoch": 1.6623757170911229, "grad_norm": 0.11218996345996857, "learning_rate": 0.0009040072992943795, "loss": 2.2556, "step": 430030 }, { "epoch": 1.6624143742945061, "grad_norm": 0.11067520827054977, "learning_rate": 0.0009038248315164224, "loss": 2.2386, "step": 430040 }, { "epoch": 1.6624530314978894, "grad_norm": 0.11744607985019684, "learning_rate": 0.0009036423941067405, "loss": 2.2545, "step": 430050 }, { "epoch": 1.6624916887012726, "grad_norm": 0.10939358174800873, "learning_rate": 0.0009034599870501761, "loss": 2.2637, "step": 430060 }, { "epoch": 1.6625303459046559, "grad_norm": 0.11345665901899338, "learning_rate": 0.0009032776103315843, "loss": 2.246, "step": 430070 }, { "epoch": 1.6625690031080391, "grad_norm": 0.13914474844932556, "learning_rate": 0.000903095263935833, "loss": 2.2506, "step": 430080 }, { "epoch": 1.6626076603114224, "grad_norm": 0.11508025974035263, "learning_rate": 0.0009029129478478017, "loss": 2.2635, "step": 430090 }, { "epoch": 1.6626463175148056, "grad_norm": 0.11344173550605774, "learning_rate": 0.0009027306620523839, "loss": 2.25, "step": 430100 }, { "epoch": 1.6626849747181889, "grad_norm": 0.19048510491847992, "learning_rate": 0.0009025484065344842, "loss": 2.247, "step": 430110 }, { "epoch": 1.6627236319215721, "grad_norm": 0.11867443472146988, "learning_rate": 0.0009023661812790205, "loss": 2.2456, "step": 430120 }, { "epoch": 1.6627622891249554, "grad_norm": 0.12190329283475876, "learning_rate": 0.0009021839862709233, "loss": 2.2494, "step": 430130 }, { "epoch": 1.6628009463283389, "grad_norm": 0.10324706137180328, "learning_rate": 0.0009020018214951355, "loss": 2.2587, "step": 430140 }, { "epoch": 1.662839603531722, "grad_norm": 0.10841317474842072, "learning_rate": 0.0009018196869366124, "loss": 2.231, "step": 430150 }, { "epoch": 1.6628782607351054, "grad_norm": 0.11271698772907257, "learning_rate": 0.0009016375825803216, "loss": 2.2357, "step": 430160 }, { "epoch": 1.6629169179384886, "grad_norm": 0.09953344613313675, "learning_rate": 0.0009014555084112432, "loss": 2.2477, "step": 430170 }, { "epoch": 1.662955575141872, "grad_norm": 0.11217759549617767, "learning_rate": 0.0009012734644143703, "loss": 2.2484, "step": 430180 }, { "epoch": 1.6629942323452553, "grad_norm": 0.12850646674633026, "learning_rate": 0.0009010914505747077, "loss": 2.2343, "step": 430190 }, { "epoch": 1.6630328895486386, "grad_norm": 0.1115790531039238, "learning_rate": 0.000900909466877273, "loss": 2.2501, "step": 430200 }, { "epoch": 1.6630715467520218, "grad_norm": 0.1096728965640068, "learning_rate": 0.0009007275133070964, "loss": 2.2545, "step": 430210 }, { "epoch": 1.663110203955405, "grad_norm": 0.11958139389753342, "learning_rate": 0.0009005455898492198, "loss": 2.2532, "step": 430220 }, { "epoch": 1.6631488611587883, "grad_norm": 0.11160731315612793, "learning_rate": 0.000900363696488698, "loss": 2.2455, "step": 430230 }, { "epoch": 1.6631875183621716, "grad_norm": 0.10113270580768585, "learning_rate": 0.0009001818332105984, "loss": 2.247, "step": 430240 }, { "epoch": 1.6632261755655549, "grad_norm": 0.12471241503953934, "learning_rate": 0.0009, "loss": 2.2522, "step": 430250 }, { "epoch": 1.663264832768938, "grad_norm": 0.11001195013523102, "learning_rate": 0.0008998181968419948, "loss": 2.2513, "step": 430260 }, { "epoch": 1.6633034899723214, "grad_norm": 0.10488203167915344, "learning_rate": 0.0008996364237216863, "loss": 2.2361, "step": 430270 }, { "epoch": 1.6633421471757046, "grad_norm": 0.10875547677278519, "learning_rate": 0.0008994546806241917, "loss": 2.2525, "step": 430280 }, { "epoch": 1.6633808043790879, "grad_norm": 0.10580891370773315, "learning_rate": 0.0008992729675346389, "loss": 2.242, "step": 430290 }, { "epoch": 1.6634194615824711, "grad_norm": 0.11021474748849869, "learning_rate": 0.0008990912844381694, "loss": 2.249, "step": 430300 }, { "epoch": 1.6634581187858546, "grad_norm": 0.10796932131052017, "learning_rate": 0.0008989096313199356, "loss": 2.2521, "step": 430310 }, { "epoch": 1.6634967759892378, "grad_norm": 0.10939501971006393, "learning_rate": 0.0008987280081651037, "loss": 2.2517, "step": 430320 }, { "epoch": 1.663535433192621, "grad_norm": 0.11583207547664642, "learning_rate": 0.0008985464149588509, "loss": 2.2461, "step": 430330 }, { "epoch": 1.6635740903960043, "grad_norm": 0.11901375651359558, "learning_rate": 0.0008983648516863672, "loss": 2.2551, "step": 430340 }, { "epoch": 1.6636127475993878, "grad_norm": 0.10402972996234894, "learning_rate": 0.0008981833183328545, "loss": 2.2568, "step": 430350 }, { "epoch": 1.663651404802771, "grad_norm": 0.11303649842739105, "learning_rate": 0.0008980018148835271, "loss": 2.253, "step": 430360 }, { "epoch": 1.6636900620061543, "grad_norm": 0.13464075326919556, "learning_rate": 0.0008978203413236116, "loss": 2.265, "step": 430370 }, { "epoch": 1.6637287192095376, "grad_norm": 0.10891130566596985, "learning_rate": 0.0008976388976383465, "loss": 2.2428, "step": 430380 }, { "epoch": 1.6637673764129208, "grad_norm": 0.10768184065818787, "learning_rate": 0.0008974574838129823, "loss": 2.2448, "step": 430390 }, { "epoch": 1.663806033616304, "grad_norm": 0.1049170047044754, "learning_rate": 0.0008972760998327823, "loss": 2.2468, "step": 430400 }, { "epoch": 1.6638446908196873, "grad_norm": 0.12340379506349564, "learning_rate": 0.0008970947456830212, "loss": 2.2287, "step": 430410 }, { "epoch": 1.6638833480230706, "grad_norm": 0.11856955289840698, "learning_rate": 0.0008969134213489858, "loss": 2.2431, "step": 430420 }, { "epoch": 1.6639220052264538, "grad_norm": 0.11601000279188156, "learning_rate": 0.0008967321268159758, "loss": 2.2439, "step": 430430 }, { "epoch": 1.663960662429837, "grad_norm": 0.10934384167194366, "learning_rate": 0.0008965508620693024, "loss": 2.2599, "step": 430440 }, { "epoch": 1.6639993196332203, "grad_norm": 0.1057877317070961, "learning_rate": 0.0008963696270942884, "loss": 2.2452, "step": 430450 }, { "epoch": 1.6640379768366036, "grad_norm": 0.10983796417713165, "learning_rate": 0.0008961884218762697, "loss": 2.2452, "step": 430460 }, { "epoch": 1.6640766340399868, "grad_norm": 0.11676836758852005, "learning_rate": 0.0008960072464005935, "loss": 2.2562, "step": 430470 }, { "epoch": 1.6641152912433703, "grad_norm": 0.11772457510232925, "learning_rate": 0.0008958261006526191, "loss": 2.2428, "step": 430480 }, { "epoch": 1.6641539484467536, "grad_norm": 0.11872987449169159, "learning_rate": 0.0008956449846177181, "loss": 2.2394, "step": 430490 }, { "epoch": 1.6641926056501368, "grad_norm": 0.10234441608190536, "learning_rate": 0.0008954638982812739, "loss": 2.2341, "step": 430500 }, { "epoch": 1.66423126285352, "grad_norm": 0.10162276774644852, "learning_rate": 0.000895282841628682, "loss": 2.2467, "step": 430510 }, { "epoch": 1.6642699200569036, "grad_norm": 0.11268150061368942, "learning_rate": 0.0008951018146453494, "loss": 2.2491, "step": 430520 }, { "epoch": 1.6643085772602868, "grad_norm": 0.11581974476575851, "learning_rate": 0.0008949208173166957, "loss": 2.2468, "step": 430530 }, { "epoch": 1.66434723446367, "grad_norm": 0.10957831144332886, "learning_rate": 0.000894739849628152, "loss": 2.2501, "step": 430540 }, { "epoch": 1.6643858916670533, "grad_norm": 0.10688143968582153, "learning_rate": 0.0008945589115651617, "loss": 2.2277, "step": 430550 }, { "epoch": 1.6644245488704366, "grad_norm": 0.09974631667137146, "learning_rate": 0.0008943780031131799, "loss": 2.2454, "step": 430560 }, { "epoch": 1.6644632060738198, "grad_norm": 0.10290662199258804, "learning_rate": 0.0008941971242576732, "loss": 2.2437, "step": 430570 }, { "epoch": 1.664501863277203, "grad_norm": 0.10654833167791367, "learning_rate": 0.0008940162749841207, "loss": 2.2453, "step": 430580 }, { "epoch": 1.6645405204805863, "grad_norm": 0.10874156653881073, "learning_rate": 0.0008938354552780135, "loss": 2.2484, "step": 430590 }, { "epoch": 1.6645791776839696, "grad_norm": 0.10365212708711624, "learning_rate": 0.0008936546651248537, "loss": 2.2587, "step": 430600 }, { "epoch": 1.6646178348873528, "grad_norm": 0.10374290496110916, "learning_rate": 0.0008934739045101559, "loss": 2.2406, "step": 430610 }, { "epoch": 1.664656492090736, "grad_norm": 0.11166556924581528, "learning_rate": 0.0008932931734194463, "loss": 2.2551, "step": 430620 }, { "epoch": 1.6646951492941193, "grad_norm": 0.11560613662004471, "learning_rate": 0.0008931124718382631, "loss": 2.2479, "step": 430630 }, { "epoch": 1.6647338064975028, "grad_norm": 0.12513025104999542, "learning_rate": 0.0008929317997521562, "loss": 2.2509, "step": 430640 }, { "epoch": 1.664772463700886, "grad_norm": 0.10663405060768127, "learning_rate": 0.0008927511571466873, "loss": 2.2483, "step": 430650 }, { "epoch": 1.6648111209042693, "grad_norm": 0.11393394321203232, "learning_rate": 0.0008925705440074299, "loss": 2.2414, "step": 430660 }, { "epoch": 1.6648497781076526, "grad_norm": 0.1111840158700943, "learning_rate": 0.000892389960319969, "loss": 2.2541, "step": 430670 }, { "epoch": 1.6648884353110358, "grad_norm": 0.11242654174566269, "learning_rate": 0.0008922094060699017, "loss": 2.2532, "step": 430680 }, { "epoch": 1.6649270925144193, "grad_norm": 0.1310926228761673, "learning_rate": 0.0008920288812428367, "loss": 2.2469, "step": 430690 }, { "epoch": 1.6649657497178025, "grad_norm": 0.12348271161317825, "learning_rate": 0.0008918483858243947, "loss": 2.2479, "step": 430700 }, { "epoch": 1.6650044069211858, "grad_norm": 0.1277979016304016, "learning_rate": 0.0008916679198002071, "loss": 2.2559, "step": 430710 }, { "epoch": 1.665043064124569, "grad_norm": 0.12305956333875656, "learning_rate": 0.0008914874831559185, "loss": 2.2578, "step": 430720 }, { "epoch": 1.6650817213279523, "grad_norm": 0.1079287901520729, "learning_rate": 0.0008913070758771841, "loss": 2.2552, "step": 430730 }, { "epoch": 1.6651203785313355, "grad_norm": 0.10787540674209595, "learning_rate": 0.000891126697949671, "loss": 2.2576, "step": 430740 }, { "epoch": 1.6651590357347188, "grad_norm": 0.12472599744796753, "learning_rate": 0.0008909463493590584, "loss": 2.2463, "step": 430750 }, { "epoch": 1.665197692938102, "grad_norm": 0.1202596053481102, "learning_rate": 0.0008907660300910363, "loss": 2.2422, "step": 430760 }, { "epoch": 1.6652363501414853, "grad_norm": 0.109699547290802, "learning_rate": 0.0008905857401313071, "loss": 2.2489, "step": 430770 }, { "epoch": 1.6652750073448686, "grad_norm": 0.11688680946826935, "learning_rate": 0.0008904054794655842, "loss": 2.2414, "step": 430780 }, { "epoch": 1.6653136645482518, "grad_norm": 0.1130024716258049, "learning_rate": 0.0008902252480795933, "loss": 2.2508, "step": 430790 }, { "epoch": 1.665352321751635, "grad_norm": 0.10916703939437866, "learning_rate": 0.0008900450459590714, "loss": 2.2454, "step": 430800 }, { "epoch": 1.6653909789550185, "grad_norm": 0.1368051916360855, "learning_rate": 0.0008898648730897667, "loss": 2.2517, "step": 430810 }, { "epoch": 1.6654296361584018, "grad_norm": 0.11163930594921112, "learning_rate": 0.0008896847294574393, "loss": 2.2516, "step": 430820 }, { "epoch": 1.665468293361785, "grad_norm": 0.11241559684276581, "learning_rate": 0.0008895046150478608, "loss": 2.2304, "step": 430830 }, { "epoch": 1.6655069505651683, "grad_norm": 0.0991520881652832, "learning_rate": 0.0008893245298468145, "loss": 2.2313, "step": 430840 }, { "epoch": 1.6655456077685515, "grad_norm": 0.10892762243747711, "learning_rate": 0.0008891444738400947, "loss": 2.2454, "step": 430850 }, { "epoch": 1.665584264971935, "grad_norm": 0.11580385267734528, "learning_rate": 0.0008889644470135081, "loss": 2.2417, "step": 430860 }, { "epoch": 1.6656229221753183, "grad_norm": 0.11292415112257004, "learning_rate": 0.0008887844493528721, "loss": 2.2342, "step": 430870 }, { "epoch": 1.6656615793787015, "grad_norm": 0.12061168998479843, "learning_rate": 0.0008886044808440157, "loss": 2.2514, "step": 430880 }, { "epoch": 1.6657002365820848, "grad_norm": 0.12646254897117615, "learning_rate": 0.0008884245414727797, "loss": 2.2457, "step": 430890 }, { "epoch": 1.665738893785468, "grad_norm": 0.10348045825958252, "learning_rate": 0.0008882446312250163, "loss": 2.2545, "step": 430900 }, { "epoch": 1.6657775509888513, "grad_norm": 0.11797824501991272, "learning_rate": 0.000888064750086589, "loss": 2.2461, "step": 430910 }, { "epoch": 1.6658162081922345, "grad_norm": 0.09973704814910889, "learning_rate": 0.0008878848980433724, "loss": 2.237, "step": 430920 }, { "epoch": 1.6658548653956178, "grad_norm": 0.12584131956100464, "learning_rate": 0.0008877050750812534, "loss": 2.2363, "step": 430930 }, { "epoch": 1.665893522599001, "grad_norm": 0.19706349074840546, "learning_rate": 0.0008875252811861296, "loss": 2.2357, "step": 430940 }, { "epoch": 1.6659321798023843, "grad_norm": 0.11496458202600479, "learning_rate": 0.0008873455163439101, "loss": 2.2444, "step": 430950 }, { "epoch": 1.6659708370057675, "grad_norm": 0.12585723400115967, "learning_rate": 0.0008871657805405156, "loss": 2.2509, "step": 430960 }, { "epoch": 1.6660094942091508, "grad_norm": 0.11942458897829056, "learning_rate": 0.000886986073761878, "loss": 2.2366, "step": 430970 }, { "epoch": 1.6660481514125343, "grad_norm": 0.10830435156822205, "learning_rate": 0.0008868063959939403, "loss": 2.2243, "step": 430980 }, { "epoch": 1.6660868086159175, "grad_norm": 0.10963542014360428, "learning_rate": 0.0008866267472226577, "loss": 2.2427, "step": 430990 }, { "epoch": 1.6661254658193008, "grad_norm": 0.11515636742115021, "learning_rate": 0.0008864471274339956, "loss": 2.254, "step": 431000 }, { "epoch": 1.666164123022684, "grad_norm": 0.1160406842827797, "learning_rate": 0.0008862675366139317, "loss": 2.2439, "step": 431010 }, { "epoch": 1.6662027802260673, "grad_norm": 0.11950808763504028, "learning_rate": 0.0008860879747484545, "loss": 2.253, "step": 431020 }, { "epoch": 1.6662414374294507, "grad_norm": 0.11876123398542404, "learning_rate": 0.0008859084418235637, "loss": 2.2502, "step": 431030 }, { "epoch": 1.666280094632834, "grad_norm": 0.11513090878725052, "learning_rate": 0.0008857289378252705, "loss": 2.2285, "step": 431040 }, { "epoch": 1.6663187518362172, "grad_norm": 0.12304891645908356, "learning_rate": 0.0008855494627395974, "loss": 2.2394, "step": 431050 }, { "epoch": 1.6663574090396005, "grad_norm": 0.10489034652709961, "learning_rate": 0.0008853700165525782, "loss": 2.2555, "step": 431060 }, { "epoch": 1.6663960662429838, "grad_norm": 0.11117493361234665, "learning_rate": 0.0008851905992502576, "loss": 2.2338, "step": 431070 }, { "epoch": 1.666434723446367, "grad_norm": 0.10987547039985657, "learning_rate": 0.0008850112108186916, "loss": 2.2481, "step": 431080 }, { "epoch": 1.6664733806497503, "grad_norm": 0.118398517370224, "learning_rate": 0.0008848318512439481, "loss": 2.2264, "step": 431090 }, { "epoch": 1.6665120378531335, "grad_norm": 0.1149090901017189, "learning_rate": 0.0008846525205121052, "loss": 2.2488, "step": 431100 }, { "epoch": 1.6665506950565168, "grad_norm": 0.13283200562000275, "learning_rate": 0.0008844732186092528, "loss": 2.2594, "step": 431110 }, { "epoch": 1.6665893522599, "grad_norm": 0.10119766741991043, "learning_rate": 0.0008842939455214918, "loss": 2.2459, "step": 431120 }, { "epoch": 1.6666280094632833, "grad_norm": 0.10677947849035263, "learning_rate": 0.0008841147012349342, "loss": 2.2287, "step": 431130 }, { "epoch": 1.6666666666666665, "grad_norm": 0.11360535025596619, "learning_rate": 0.0008839354857357036, "loss": 2.2408, "step": 431140 }, { "epoch": 1.66670532387005, "grad_norm": 0.11793069541454315, "learning_rate": 0.000883756299009934, "loss": 2.2585, "step": 431150 }, { "epoch": 1.6667439810734332, "grad_norm": 0.11654367297887802, "learning_rate": 0.0008835771410437709, "loss": 2.2534, "step": 431160 }, { "epoch": 1.6667826382768165, "grad_norm": 0.11465385556221008, "learning_rate": 0.0008833980118233714, "loss": 2.2575, "step": 431170 }, { "epoch": 1.6668212954801997, "grad_norm": 0.1096300333738327, "learning_rate": 0.0008832189113349027, "loss": 2.2482, "step": 431180 }, { "epoch": 1.6668599526835832, "grad_norm": 0.11422011256217957, "learning_rate": 0.0008830398395645438, "loss": 2.2579, "step": 431190 }, { "epoch": 1.6668986098869665, "grad_norm": 0.11084701120853424, "learning_rate": 0.0008828607964984847, "loss": 2.2696, "step": 431200 }, { "epoch": 1.6669372670903497, "grad_norm": 0.10878776013851166, "learning_rate": 0.0008826817821229263, "loss": 2.2408, "step": 431210 }, { "epoch": 1.666975924293733, "grad_norm": 0.1218942254781723, "learning_rate": 0.0008825027964240806, "loss": 2.2495, "step": 431220 }, { "epoch": 1.6670145814971162, "grad_norm": 0.12331361323595047, "learning_rate": 0.0008823238393881705, "loss": 2.2684, "step": 431230 }, { "epoch": 1.6670532387004995, "grad_norm": 0.10126879066228867, "learning_rate": 0.0008821449110014304, "loss": 2.2499, "step": 431240 }, { "epoch": 1.6670918959038827, "grad_norm": 0.1190921738743782, "learning_rate": 0.0008819660112501051, "loss": 2.2495, "step": 431250 }, { "epoch": 1.667130553107266, "grad_norm": 0.11584290117025375, "learning_rate": 0.0008817871401204509, "loss": 2.2431, "step": 431260 }, { "epoch": 1.6671692103106492, "grad_norm": 0.11810228228569031, "learning_rate": 0.0008816082975987349, "loss": 2.2485, "step": 431270 }, { "epoch": 1.6672078675140325, "grad_norm": 0.10765083879232407, "learning_rate": 0.0008814294836712349, "loss": 2.2312, "step": 431280 }, { "epoch": 1.6672465247174157, "grad_norm": 0.10427460074424744, "learning_rate": 0.0008812506983242403, "loss": 2.2435, "step": 431290 }, { "epoch": 1.667285181920799, "grad_norm": 0.11015084385871887, "learning_rate": 0.0008810719415440509, "loss": 2.2476, "step": 431300 }, { "epoch": 1.6673238391241822, "grad_norm": 0.10388490557670593, "learning_rate": 0.0008808932133169776, "loss": 2.2322, "step": 431310 }, { "epoch": 1.6673624963275657, "grad_norm": 0.12453647702932358, "learning_rate": 0.0008807145136293422, "loss": 2.2401, "step": 431320 }, { "epoch": 1.667401153530949, "grad_norm": 0.10797908157110214, "learning_rate": 0.0008805358424674776, "loss": 2.2416, "step": 431330 }, { "epoch": 1.6674398107343322, "grad_norm": 0.11077652126550674, "learning_rate": 0.0008803571998177276, "loss": 2.2297, "step": 431340 }, { "epoch": 1.6674784679377155, "grad_norm": 0.10809510201215744, "learning_rate": 0.0008801785856664466, "loss": 2.2528, "step": 431350 }, { "epoch": 1.667517125141099, "grad_norm": 0.12976865470409393, "learning_rate": 0.0008800000000000001, "loss": 2.2607, "step": 431360 }, { "epoch": 1.6675557823444822, "grad_norm": 0.10788272321224213, "learning_rate": 0.0008798214428047644, "loss": 2.2536, "step": 431370 }, { "epoch": 1.6675944395478655, "grad_norm": 0.1121840849518776, "learning_rate": 0.0008796429140671265, "loss": 2.2359, "step": 431380 }, { "epoch": 1.6676330967512487, "grad_norm": 0.11245424300432205, "learning_rate": 0.0008794644137734848, "loss": 2.2442, "step": 431390 }, { "epoch": 1.667671753954632, "grad_norm": 0.11103110015392303, "learning_rate": 0.0008792859419102481, "loss": 2.2438, "step": 431400 }, { "epoch": 1.6677104111580152, "grad_norm": 0.10202562063932419, "learning_rate": 0.0008791074984638358, "loss": 2.2362, "step": 431410 }, { "epoch": 1.6677490683613985, "grad_norm": 0.1100311353802681, "learning_rate": 0.0008789290834206786, "loss": 2.2495, "step": 431420 }, { "epoch": 1.6677877255647817, "grad_norm": 0.11951546370983124, "learning_rate": 0.0008787506967672177, "loss": 2.2381, "step": 431430 }, { "epoch": 1.667826382768165, "grad_norm": 0.1079200878739357, "learning_rate": 0.0008785723384899051, "loss": 2.2268, "step": 431440 }, { "epoch": 1.6678650399715482, "grad_norm": 0.1171964779496193, "learning_rate": 0.0008783940085752038, "loss": 2.2546, "step": 431450 }, { "epoch": 1.6679036971749315, "grad_norm": 0.10935444384813309, "learning_rate": 0.0008782157070095873, "loss": 2.235, "step": 431460 }, { "epoch": 1.6679423543783147, "grad_norm": 0.11907418817281723, "learning_rate": 0.0008780374337795401, "loss": 2.2489, "step": 431470 }, { "epoch": 1.667981011581698, "grad_norm": 0.1128414049744606, "learning_rate": 0.000877859188871557, "loss": 2.2393, "step": 431480 }, { "epoch": 1.6680196687850815, "grad_norm": 0.11093578487634659, "learning_rate": 0.0008776809722721439, "loss": 2.2426, "step": 431490 }, { "epoch": 1.6680583259884647, "grad_norm": 0.10630373656749725, "learning_rate": 0.0008775027839678176, "loss": 2.2528, "step": 431500 }, { "epoch": 1.668096983191848, "grad_norm": 0.12331317365169525, "learning_rate": 0.000877324623945105, "loss": 2.2234, "step": 431510 }, { "epoch": 1.6681356403952312, "grad_norm": 0.12409404665231705, "learning_rate": 0.000877146492190544, "loss": 2.2452, "step": 431520 }, { "epoch": 1.6681742975986147, "grad_norm": 0.096986323595047, "learning_rate": 0.0008769683886906834, "loss": 2.2409, "step": 431530 }, { "epoch": 1.668212954801998, "grad_norm": 0.11354723572731018, "learning_rate": 0.0008767903134320823, "loss": 2.2366, "step": 431540 }, { "epoch": 1.6682516120053812, "grad_norm": 0.11607275903224945, "learning_rate": 0.0008766122664013105, "loss": 2.2302, "step": 431550 }, { "epoch": 1.6682902692087644, "grad_norm": 0.12799812853336334, "learning_rate": 0.0008764342475849489, "loss": 2.2438, "step": 431560 }, { "epoch": 1.6683289264121477, "grad_norm": 0.1058444157242775, "learning_rate": 0.0008762562569695885, "loss": 2.2385, "step": 431570 }, { "epoch": 1.668367583615531, "grad_norm": 0.12132367491722107, "learning_rate": 0.0008760782945418307, "loss": 2.2529, "step": 431580 }, { "epoch": 1.6684062408189142, "grad_norm": 0.11287418007850647, "learning_rate": 0.0008759003602882882, "loss": 2.2405, "step": 431590 }, { "epoch": 1.6684448980222975, "grad_norm": 0.12102866917848587, "learning_rate": 0.0008757224541955844, "loss": 2.2565, "step": 431600 }, { "epoch": 1.6684835552256807, "grad_norm": 0.12273454666137695, "learning_rate": 0.0008755445762503522, "loss": 2.2464, "step": 431610 }, { "epoch": 1.668522212429064, "grad_norm": 0.11583053320646286, "learning_rate": 0.0008753667264392362, "loss": 2.2305, "step": 431620 }, { "epoch": 1.6685608696324472, "grad_norm": 0.11362405866384506, "learning_rate": 0.0008751889047488908, "loss": 2.2496, "step": 431630 }, { "epoch": 1.6685995268358305, "grad_norm": 0.13401475548744202, "learning_rate": 0.0008750111111659813, "loss": 2.2463, "step": 431640 }, { "epoch": 1.6686381840392137, "grad_norm": 0.10942229628562927, "learning_rate": 0.0008748333456771838, "loss": 2.2409, "step": 431650 }, { "epoch": 1.6686768412425972, "grad_norm": 0.11597096174955368, "learning_rate": 0.0008746556082691842, "loss": 2.2476, "step": 431660 }, { "epoch": 1.6687154984459804, "grad_norm": 0.11072780191898346, "learning_rate": 0.0008744778989286795, "loss": 2.2465, "step": 431670 }, { "epoch": 1.6687541556493637, "grad_norm": 0.12409374117851257, "learning_rate": 0.0008743002176423768, "loss": 2.2536, "step": 431680 }, { "epoch": 1.668792812852747, "grad_norm": 0.11742675304412842, "learning_rate": 0.0008741225643969944, "loss": 2.2482, "step": 431690 }, { "epoch": 1.6688314700561304, "grad_norm": 0.10112820565700531, "learning_rate": 0.0008739449391792602, "loss": 2.2443, "step": 431700 }, { "epoch": 1.6688701272595137, "grad_norm": 0.11522382497787476, "learning_rate": 0.0008737673419759131, "loss": 2.2438, "step": 431710 }, { "epoch": 1.668908784462897, "grad_norm": 0.11149576306343079, "learning_rate": 0.0008735897727737022, "loss": 2.2417, "step": 431720 }, { "epoch": 1.6689474416662802, "grad_norm": 0.11229491233825684, "learning_rate": 0.0008734122315593871, "loss": 2.2558, "step": 431730 }, { "epoch": 1.6689860988696634, "grad_norm": 0.12534652650356293, "learning_rate": 0.000873234718319738, "loss": 2.2479, "step": 431740 }, { "epoch": 1.6690247560730467, "grad_norm": 0.11140571534633636, "learning_rate": 0.0008730572330415356, "loss": 2.2596, "step": 431750 }, { "epoch": 1.66906341327643, "grad_norm": 0.12196607887744904, "learning_rate": 0.0008728797757115703, "loss": 2.2337, "step": 431760 }, { "epoch": 1.6691020704798132, "grad_norm": 0.11696066707372665, "learning_rate": 0.0008727023463166439, "loss": 2.259, "step": 431770 }, { "epoch": 1.6691407276831964, "grad_norm": 0.12137053906917572, "learning_rate": 0.0008725249448435677, "loss": 2.2402, "step": 431780 }, { "epoch": 1.6691793848865797, "grad_norm": 0.1035657450556755, "learning_rate": 0.0008723475712791639, "loss": 2.2573, "step": 431790 }, { "epoch": 1.669218042089963, "grad_norm": 0.11626846343278885, "learning_rate": 0.0008721702256102653, "loss": 2.2402, "step": 431800 }, { "epoch": 1.6692566992933462, "grad_norm": 0.12115205824375153, "learning_rate": 0.0008719929078237141, "loss": 2.2604, "step": 431810 }, { "epoch": 1.6692953564967294, "grad_norm": 0.11947688460350037, "learning_rate": 0.0008718156179063636, "loss": 2.2519, "step": 431820 }, { "epoch": 1.669334013700113, "grad_norm": 0.11764021217823029, "learning_rate": 0.0008716383558450776, "loss": 2.2667, "step": 431830 }, { "epoch": 1.6693726709034962, "grad_norm": 0.10446431487798691, "learning_rate": 0.0008714611216267292, "loss": 2.2413, "step": 431840 }, { "epoch": 1.6694113281068794, "grad_norm": 0.10729694366455078, "learning_rate": 0.0008712839152382031, "loss": 2.2525, "step": 431850 }, { "epoch": 1.6694499853102627, "grad_norm": 0.11575144529342651, "learning_rate": 0.0008711067366663932, "loss": 2.2576, "step": 431860 }, { "epoch": 1.6694886425136461, "grad_norm": 0.11094532907009125, "learning_rate": 0.0008709295858982045, "loss": 2.2446, "step": 431870 }, { "epoch": 1.6695272997170294, "grad_norm": 0.12855194509029388, "learning_rate": 0.0008707524629205516, "loss": 2.2415, "step": 431880 }, { "epoch": 1.6695659569204127, "grad_norm": 0.12823374569416046, "learning_rate": 0.0008705753677203601, "loss": 2.2407, "step": 431890 }, { "epoch": 1.669604614123796, "grad_norm": 0.11963459104299545, "learning_rate": 0.000870398300284565, "loss": 2.2368, "step": 431900 }, { "epoch": 1.6696432713271792, "grad_norm": 0.11403647810220718, "learning_rate": 0.000870221260600112, "loss": 2.2371, "step": 431910 }, { "epoch": 1.6696819285305624, "grad_norm": 0.10871980339288712, "learning_rate": 0.0008700442486539573, "loss": 2.2482, "step": 431920 }, { "epoch": 1.6697205857339457, "grad_norm": 0.10171128064393997, "learning_rate": 0.0008698672644330668, "loss": 2.2281, "step": 431930 }, { "epoch": 1.669759242937329, "grad_norm": 0.11919943988323212, "learning_rate": 0.0008696903079244168, "loss": 2.2469, "step": 431940 }, { "epoch": 1.6697979001407122, "grad_norm": 0.11724691838026047, "learning_rate": 0.0008695133791149937, "loss": 2.2392, "step": 431950 }, { "epoch": 1.6698365573440954, "grad_norm": 0.11415465176105499, "learning_rate": 0.0008693364779917945, "loss": 2.2448, "step": 431960 }, { "epoch": 1.6698752145474787, "grad_norm": 0.11876343190670013, "learning_rate": 0.0008691596045418258, "loss": 2.2439, "step": 431970 }, { "epoch": 1.669913871750862, "grad_norm": 0.10392026603221893, "learning_rate": 0.0008689827587521047, "loss": 2.2271, "step": 431980 }, { "epoch": 1.6699525289542452, "grad_norm": 0.10942797362804413, "learning_rate": 0.0008688059406096583, "loss": 2.2455, "step": 431990 }, { "epoch": 1.6699911861576286, "grad_norm": 0.10648475587368011, "learning_rate": 0.000868629150101524, "loss": 2.2466, "step": 432000 }, { "epoch": 1.670029843361012, "grad_norm": 0.10488341748714447, "learning_rate": 0.0008684523872147492, "loss": 2.2435, "step": 432010 }, { "epoch": 1.6700685005643952, "grad_norm": 0.10440246015787125, "learning_rate": 0.0008682756519363914, "loss": 2.2564, "step": 432020 }, { "epoch": 1.6701071577677784, "grad_norm": 0.11767731606960297, "learning_rate": 0.0008680989442535183, "loss": 2.2363, "step": 432030 }, { "epoch": 1.6701458149711619, "grad_norm": 0.13144199550151825, "learning_rate": 0.0008679222641532076, "loss": 2.2453, "step": 432040 }, { "epoch": 1.6701844721745451, "grad_norm": 0.12658940255641937, "learning_rate": 0.000867745611622547, "loss": 2.2459, "step": 432050 }, { "epoch": 1.6702231293779284, "grad_norm": 0.11060245335102081, "learning_rate": 0.0008675689866486347, "loss": 2.2388, "step": 432060 }, { "epoch": 1.6702617865813116, "grad_norm": 0.09972178190946579, "learning_rate": 0.0008673923892185784, "loss": 2.2461, "step": 432070 }, { "epoch": 1.6703004437846949, "grad_norm": 0.10841628164052963, "learning_rate": 0.0008672158193194963, "loss": 2.2379, "step": 432080 }, { "epoch": 1.6703391009880781, "grad_norm": 0.12501399219036102, "learning_rate": 0.0008670392769385163, "loss": 2.237, "step": 432090 }, { "epoch": 1.6703777581914614, "grad_norm": 0.1202363669872284, "learning_rate": 0.0008668627620627765, "loss": 2.2352, "step": 432100 }, { "epoch": 1.6704164153948446, "grad_norm": 0.11828488856554031, "learning_rate": 0.0008666862746794248, "loss": 2.2343, "step": 432110 }, { "epoch": 1.670455072598228, "grad_norm": 0.11299415677785873, "learning_rate": 0.0008665098147756196, "loss": 2.2432, "step": 432120 }, { "epoch": 1.6704937298016111, "grad_norm": 0.11967718601226807, "learning_rate": 0.0008663333823385291, "loss": 2.2426, "step": 432130 }, { "epoch": 1.6705323870049944, "grad_norm": 0.11156123131513596, "learning_rate": 0.0008661569773553307, "loss": 2.2385, "step": 432140 }, { "epoch": 1.6705710442083777, "grad_norm": 0.11214423179626465, "learning_rate": 0.0008659805998132131, "loss": 2.2429, "step": 432150 }, { "epoch": 1.670609701411761, "grad_norm": 0.11777114868164062, "learning_rate": 0.0008658042496993741, "loss": 2.2674, "step": 432160 }, { "epoch": 1.6706483586151444, "grad_norm": 0.1242789775133133, "learning_rate": 0.0008656279270010214, "loss": 2.2411, "step": 432170 }, { "epoch": 1.6706870158185276, "grad_norm": 0.11542826890945435, "learning_rate": 0.0008654516317053734, "loss": 2.266, "step": 432180 }, { "epoch": 1.6707256730219109, "grad_norm": 0.12458527833223343, "learning_rate": 0.0008652753637996574, "loss": 2.2377, "step": 432190 }, { "epoch": 1.6707643302252941, "grad_norm": 0.2273350954055786, "learning_rate": 0.0008650991232711115, "loss": 2.2562, "step": 432200 }, { "epoch": 1.6708029874286776, "grad_norm": 0.12100516259670258, "learning_rate": 0.0008649229101069831, "loss": 2.2496, "step": 432210 }, { "epoch": 1.6708416446320609, "grad_norm": 0.10887449234724045, "learning_rate": 0.00086474672429453, "loss": 2.2499, "step": 432220 }, { "epoch": 1.6708803018354441, "grad_norm": 0.11747176945209503, "learning_rate": 0.0008645705658210194, "loss": 2.255, "step": 432230 }, { "epoch": 1.6709189590388274, "grad_norm": 0.0999768003821373, "learning_rate": 0.0008643944346737286, "loss": 2.2439, "step": 432240 }, { "epoch": 1.6709576162422106, "grad_norm": 0.11573263257741928, "learning_rate": 0.0008642183308399454, "loss": 2.2453, "step": 432250 }, { "epoch": 1.6709962734455939, "grad_norm": 0.1162898987531662, "learning_rate": 0.0008640422543069659, "loss": 2.2726, "step": 432260 }, { "epoch": 1.6710349306489771, "grad_norm": 0.09759674966335297, "learning_rate": 0.0008638662050620975, "loss": 2.249, "step": 432270 }, { "epoch": 1.6710735878523604, "grad_norm": 0.11636500805616379, "learning_rate": 0.0008636901830926567, "loss": 2.2548, "step": 432280 }, { "epoch": 1.6711122450557436, "grad_norm": 0.11740848422050476, "learning_rate": 0.0008635141883859702, "loss": 2.2332, "step": 432290 }, { "epoch": 1.6711509022591269, "grad_norm": 0.1161784678697586, "learning_rate": 0.0008633382209293744, "loss": 2.2506, "step": 432300 }, { "epoch": 1.6711895594625101, "grad_norm": 0.11073528230190277, "learning_rate": 0.0008631622807102151, "loss": 2.256, "step": 432310 }, { "epoch": 1.6712282166658934, "grad_norm": 0.12388008832931519, "learning_rate": 0.0008629863677158485, "loss": 2.2599, "step": 432320 }, { "epoch": 1.6712668738692766, "grad_norm": 0.11557994037866592, "learning_rate": 0.0008628104819336402, "loss": 2.2329, "step": 432330 }, { "epoch": 1.6713055310726601, "grad_norm": 0.1085330918431282, "learning_rate": 0.0008626346233509654, "loss": 2.235, "step": 432340 }, { "epoch": 1.6713441882760434, "grad_norm": 0.11548107117414474, "learning_rate": 0.00086245879195521, "loss": 2.2461, "step": 432350 }, { "epoch": 1.6713828454794266, "grad_norm": 0.10183994472026825, "learning_rate": 0.0008622829877337688, "loss": 2.2411, "step": 432360 }, { "epoch": 1.6714215026828099, "grad_norm": 0.12165366113185883, "learning_rate": 0.000862107210674046, "loss": 2.2321, "step": 432370 }, { "epoch": 1.6714601598861933, "grad_norm": 0.13182103633880615, "learning_rate": 0.0008619314607634565, "loss": 2.2393, "step": 432380 }, { "epoch": 1.6714988170895766, "grad_norm": 0.1082003116607666, "learning_rate": 0.0008617557379894243, "loss": 2.2366, "step": 432390 }, { "epoch": 1.6715374742929598, "grad_norm": 0.10970823466777802, "learning_rate": 0.0008615800423393833, "loss": 2.2465, "step": 432400 }, { "epoch": 1.671576131496343, "grad_norm": 0.11980883032083511, "learning_rate": 0.0008614043738007773, "loss": 2.236, "step": 432410 }, { "epoch": 1.6716147886997264, "grad_norm": 0.10353901982307434, "learning_rate": 0.0008612287323610593, "loss": 2.2578, "step": 432420 }, { "epoch": 1.6716534459031096, "grad_norm": 0.10307720303535461, "learning_rate": 0.0008610531180076921, "loss": 2.2465, "step": 432430 }, { "epoch": 1.6716921031064929, "grad_norm": 0.2446184903383255, "learning_rate": 0.0008608775307281486, "loss": 2.2489, "step": 432440 }, { "epoch": 1.671730760309876, "grad_norm": 0.1153741404414177, "learning_rate": 0.0008607019705099108, "loss": 2.2633, "step": 432450 }, { "epoch": 1.6717694175132594, "grad_norm": 0.12404379993677139, "learning_rate": 0.0008605264373404708, "loss": 2.2432, "step": 432460 }, { "epoch": 1.6718080747166426, "grad_norm": 0.11334104835987091, "learning_rate": 0.0008603509312073298, "loss": 2.239, "step": 432470 }, { "epoch": 1.6718467319200259, "grad_norm": 0.11389446258544922, "learning_rate": 0.0008601754520979994, "loss": 2.2402, "step": 432480 }, { "epoch": 1.6718853891234091, "grad_norm": 0.1087617427110672, "learning_rate": 0.0008599999999999999, "loss": 2.254, "step": 432490 }, { "epoch": 1.6719240463267926, "grad_norm": 0.10388283431529999, "learning_rate": 0.000859824574900862, "loss": 2.2297, "step": 432500 }, { "epoch": 1.6719627035301758, "grad_norm": 0.10273746401071548, "learning_rate": 0.0008596491767881254, "loss": 2.2353, "step": 432510 }, { "epoch": 1.672001360733559, "grad_norm": 0.10638001561164856, "learning_rate": 0.0008594738056493398, "loss": 2.2482, "step": 432520 }, { "epoch": 1.6720400179369423, "grad_norm": 0.11977694183588028, "learning_rate": 0.0008592984614720642, "loss": 2.2543, "step": 432530 }, { "epoch": 1.6720786751403256, "grad_norm": 0.11505132168531418, "learning_rate": 0.0008591231442438672, "loss": 2.2266, "step": 432540 }, { "epoch": 1.672117332343709, "grad_norm": 0.10981100797653198, "learning_rate": 0.000858947853952327, "loss": 2.2501, "step": 432550 }, { "epoch": 1.6721559895470923, "grad_norm": 0.11681405454874039, "learning_rate": 0.0008587725905850317, "loss": 2.2366, "step": 432560 }, { "epoch": 1.6721946467504756, "grad_norm": 0.10692845284938812, "learning_rate": 0.000858597354129578, "loss": 2.2335, "step": 432570 }, { "epoch": 1.6722333039538588, "grad_norm": 0.11772792786359787, "learning_rate": 0.0008584221445735731, "loss": 2.2127, "step": 432580 }, { "epoch": 1.672271961157242, "grad_norm": 0.1118854507803917, "learning_rate": 0.0008582469619046331, "loss": 2.2323, "step": 432590 }, { "epoch": 1.6723106183606253, "grad_norm": 0.13144323229789734, "learning_rate": 0.0008580718061103842, "loss": 2.2465, "step": 432600 }, { "epoch": 1.6723492755640086, "grad_norm": 0.12407439202070236, "learning_rate": 0.0008578966771784613, "loss": 2.2601, "step": 432610 }, { "epoch": 1.6723879327673918, "grad_norm": 0.10566231608390808, "learning_rate": 0.0008577215750965092, "loss": 2.2387, "step": 432620 }, { "epoch": 1.672426589970775, "grad_norm": 0.10437638312578201, "learning_rate": 0.0008575464998521823, "loss": 2.2387, "step": 432630 }, { "epoch": 1.6724652471741583, "grad_norm": 0.10871285945177078, "learning_rate": 0.0008573714514331439, "loss": 2.2408, "step": 432640 }, { "epoch": 1.6725039043775416, "grad_norm": 0.11669822037220001, "learning_rate": 0.0008571964298270678, "loss": 2.2493, "step": 432650 }, { "epoch": 1.6725425615809248, "grad_norm": 0.10993584990501404, "learning_rate": 0.0008570214350216361, "loss": 2.242, "step": 432660 }, { "epoch": 1.6725812187843083, "grad_norm": 0.11364573985338211, "learning_rate": 0.000856846467004541, "loss": 2.2404, "step": 432670 }, { "epoch": 1.6726198759876916, "grad_norm": 0.2189556360244751, "learning_rate": 0.0008566715257634838, "loss": 2.2508, "step": 432680 }, { "epoch": 1.6726585331910748, "grad_norm": 0.11758209019899368, "learning_rate": 0.0008564966112861754, "loss": 2.2624, "step": 432690 }, { "epoch": 1.672697190394458, "grad_norm": 0.12056602537631989, "learning_rate": 0.0008563217235603363, "loss": 2.252, "step": 432700 }, { "epoch": 1.6727358475978413, "grad_norm": 0.11321151256561279, "learning_rate": 0.0008561468625736956, "loss": 2.2469, "step": 432710 }, { "epoch": 1.6727745048012248, "grad_norm": 0.10741432011127472, "learning_rate": 0.0008559720283139926, "loss": 2.2467, "step": 432720 }, { "epoch": 1.672813162004608, "grad_norm": 0.10732309520244598, "learning_rate": 0.0008557972207689757, "loss": 2.2452, "step": 432730 }, { "epoch": 1.6728518192079913, "grad_norm": 0.11348134279251099, "learning_rate": 0.0008556224399264025, "loss": 2.2465, "step": 432740 }, { "epoch": 1.6728904764113746, "grad_norm": 0.10850790143013, "learning_rate": 0.0008554476857740403, "loss": 2.2373, "step": 432750 }, { "epoch": 1.6729291336147578, "grad_norm": 0.106198750436306, "learning_rate": 0.0008552729582996652, "loss": 2.2509, "step": 432760 }, { "epoch": 1.672967790818141, "grad_norm": 0.12779173254966736, "learning_rate": 0.0008550982574910631, "loss": 2.2461, "step": 432770 }, { "epoch": 1.6730064480215243, "grad_norm": 0.11383172869682312, "learning_rate": 0.0008549235833360292, "loss": 2.247, "step": 432780 }, { "epoch": 1.6730451052249076, "grad_norm": 0.10072283446788788, "learning_rate": 0.0008547489358223674, "loss": 2.2412, "step": 432790 }, { "epoch": 1.6730837624282908, "grad_norm": 0.10738955438137054, "learning_rate": 0.0008545743149378917, "loss": 2.2481, "step": 432800 }, { "epoch": 1.673122419631674, "grad_norm": 0.11644309014081955, "learning_rate": 0.0008543997206704251, "loss": 2.2521, "step": 432810 }, { "epoch": 1.6731610768350573, "grad_norm": 0.10279635339975357, "learning_rate": 0.0008542251530077998, "loss": 2.2456, "step": 432820 }, { "epoch": 1.6731997340384406, "grad_norm": 0.1066320389509201, "learning_rate": 0.000854050611937857, "loss": 2.2543, "step": 432830 }, { "epoch": 1.673238391241824, "grad_norm": 0.11159369349479675, "learning_rate": 0.0008538760974484476, "loss": 2.241, "step": 432840 }, { "epoch": 1.6732770484452073, "grad_norm": 0.13942484557628632, "learning_rate": 0.0008537016095274319, "loss": 2.2506, "step": 432850 }, { "epoch": 1.6733157056485906, "grad_norm": 0.11398211121559143, "learning_rate": 0.0008535271481626789, "loss": 2.244, "step": 432860 }, { "epoch": 1.6733543628519738, "grad_norm": 0.11198543012142181, "learning_rate": 0.0008533527133420671, "loss": 2.2465, "step": 432870 }, { "epoch": 1.673393020055357, "grad_norm": 0.10565642267465591, "learning_rate": 0.000853178305053484, "loss": 2.2346, "step": 432880 }, { "epoch": 1.6734316772587405, "grad_norm": 0.11750909686088562, "learning_rate": 0.0008530039232848266, "loss": 2.2336, "step": 432890 }, { "epoch": 1.6734703344621238, "grad_norm": 0.13077422976493835, "learning_rate": 0.000852829568024001, "loss": 2.2583, "step": 432900 }, { "epoch": 1.673508991665507, "grad_norm": 0.11255418509244919, "learning_rate": 0.0008526552392589226, "loss": 2.2267, "step": 432910 }, { "epoch": 1.6735476488688903, "grad_norm": 0.11538899689912796, "learning_rate": 0.0008524809369775159, "loss": 2.2303, "step": 432920 }, { "epoch": 1.6735863060722735, "grad_norm": 0.12092319875955582, "learning_rate": 0.0008523066611677142, "loss": 2.2376, "step": 432930 }, { "epoch": 1.6736249632756568, "grad_norm": 0.1103396788239479, "learning_rate": 0.0008521324118174605, "loss": 2.2324, "step": 432940 }, { "epoch": 1.67366362047904, "grad_norm": 0.11736710369586945, "learning_rate": 0.0008519581889147068, "loss": 2.2286, "step": 432950 }, { "epoch": 1.6737022776824233, "grad_norm": 0.10646773129701614, "learning_rate": 0.000851783992447414, "loss": 2.2438, "step": 432960 }, { "epoch": 1.6737409348858066, "grad_norm": 0.15531231462955475, "learning_rate": 0.0008516098224035526, "loss": 2.2338, "step": 432970 }, { "epoch": 1.6737795920891898, "grad_norm": 0.11417527496814728, "learning_rate": 0.0008514356787711017, "loss": 2.2416, "step": 432980 }, { "epoch": 1.673818249292573, "grad_norm": 0.11312133818864822, "learning_rate": 0.0008512615615380496, "loss": 2.2368, "step": 432990 }, { "epoch": 1.6738569064959563, "grad_norm": 0.10843250900506973, "learning_rate": 0.0008510874706923943, "loss": 2.2359, "step": 433000 }, { "epoch": 1.6738955636993398, "grad_norm": 0.12344299256801605, "learning_rate": 0.000850913406222142, "loss": 2.2437, "step": 433010 }, { "epoch": 1.673934220902723, "grad_norm": 0.11518251150846481, "learning_rate": 0.0008507393681153086, "loss": 2.2436, "step": 433020 }, { "epoch": 1.6739728781061063, "grad_norm": 0.10837063193321228, "learning_rate": 0.000850565356359919, "loss": 2.2399, "step": 433030 }, { "epoch": 1.6740115353094895, "grad_norm": 0.10487712919712067, "learning_rate": 0.0008503913709440068, "loss": 2.2361, "step": 433040 }, { "epoch": 1.674050192512873, "grad_norm": 0.11273378878831863, "learning_rate": 0.0008502174118556152, "loss": 2.2508, "step": 433050 }, { "epoch": 1.6740888497162563, "grad_norm": 0.1132865771651268, "learning_rate": 0.000850043479082796, "loss": 2.2494, "step": 433060 }, { "epoch": 1.6741275069196395, "grad_norm": 0.11141993850469589, "learning_rate": 0.0008498695726136101, "loss": 2.2422, "step": 433070 }, { "epoch": 1.6741661641230228, "grad_norm": 0.11210117489099503, "learning_rate": 0.0008496956924361277, "loss": 2.2295, "step": 433080 }, { "epoch": 1.674204821326406, "grad_norm": 0.11931067705154419, "learning_rate": 0.0008495218385384276, "loss": 2.2409, "step": 433090 }, { "epoch": 1.6742434785297893, "grad_norm": 0.10923109203577042, "learning_rate": 0.000849348010908598, "loss": 2.2518, "step": 433100 }, { "epoch": 1.6742821357331725, "grad_norm": 0.11988096684217453, "learning_rate": 0.0008491742095347359, "loss": 2.2395, "step": 433110 }, { "epoch": 1.6743207929365558, "grad_norm": 0.10944250971078873, "learning_rate": 0.0008490004344049474, "loss": 2.251, "step": 433120 }, { "epoch": 1.674359450139939, "grad_norm": 0.11326149851083755, "learning_rate": 0.0008488266855073472, "loss": 2.2478, "step": 433130 }, { "epoch": 1.6743981073433223, "grad_norm": 0.10903026908636093, "learning_rate": 0.0008486529628300597, "loss": 2.2318, "step": 433140 }, { "epoch": 1.6744367645467055, "grad_norm": 0.11459324508905411, "learning_rate": 0.0008484792663612176, "loss": 2.2409, "step": 433150 }, { "epoch": 1.6744754217500888, "grad_norm": 0.1096375361084938, "learning_rate": 0.0008483055960889625, "loss": 2.2473, "step": 433160 }, { "epoch": 1.674514078953472, "grad_norm": 0.6692434549331665, "learning_rate": 0.0008481319520014457, "loss": 2.2463, "step": 433170 }, { "epoch": 1.6745527361568555, "grad_norm": 0.10968945920467377, "learning_rate": 0.0008479583340868267, "loss": 2.2401, "step": 433180 }, { "epoch": 1.6745913933602388, "grad_norm": 0.1067478135228157, "learning_rate": 0.000847784742333274, "loss": 2.244, "step": 433190 }, { "epoch": 1.674630050563622, "grad_norm": 0.11387550830841064, "learning_rate": 0.0008476111767289654, "loss": 2.2493, "step": 433200 }, { "epoch": 1.6746687077670053, "grad_norm": 0.1319890320301056, "learning_rate": 0.0008474376372620873, "loss": 2.2588, "step": 433210 }, { "epoch": 1.6747073649703887, "grad_norm": 0.1147097498178482, "learning_rate": 0.000847264123920835, "loss": 2.2387, "step": 433220 }, { "epoch": 1.674746022173772, "grad_norm": 0.09701906889677048, "learning_rate": 0.000847090636693413, "loss": 2.2458, "step": 433230 }, { "epoch": 1.6747846793771552, "grad_norm": 0.11148305237293243, "learning_rate": 0.000846917175568034, "loss": 2.2552, "step": 433240 }, { "epoch": 1.6748233365805385, "grad_norm": 0.11302665621042252, "learning_rate": 0.0008467437405329203, "loss": 2.253, "step": 433250 }, { "epoch": 1.6748619937839218, "grad_norm": 0.1179070770740509, "learning_rate": 0.0008465703315763029, "loss": 2.2427, "step": 433260 }, { "epoch": 1.674900650987305, "grad_norm": 0.10579831898212433, "learning_rate": 0.0008463969486864209, "loss": 2.2545, "step": 433270 }, { "epoch": 1.6749393081906883, "grad_norm": 0.11859673261642456, "learning_rate": 0.0008462235918515235, "loss": 2.2473, "step": 433280 }, { "epoch": 1.6749779653940715, "grad_norm": 0.11856315284967422, "learning_rate": 0.0008460502610598675, "loss": 2.2396, "step": 433290 }, { "epoch": 1.6750166225974548, "grad_norm": 0.10576929897069931, "learning_rate": 0.0008458769562997193, "loss": 2.2248, "step": 433300 }, { "epoch": 1.675055279800838, "grad_norm": 0.1199721097946167, "learning_rate": 0.0008457036775593538, "loss": 2.2395, "step": 433310 }, { "epoch": 1.6750939370042213, "grad_norm": 0.1224532350897789, "learning_rate": 0.000845530424827055, "loss": 2.2324, "step": 433320 }, { "epoch": 1.6751325942076045, "grad_norm": 0.11451449990272522, "learning_rate": 0.0008453571980911153, "loss": 2.2386, "step": 433330 }, { "epoch": 1.6751712514109878, "grad_norm": 0.11305505037307739, "learning_rate": 0.000845183997339836, "loss": 2.2481, "step": 433340 }, { "epoch": 1.6752099086143712, "grad_norm": 0.11151070147752762, "learning_rate": 0.0008450108225615271, "loss": 2.2443, "step": 433350 }, { "epoch": 1.6752485658177545, "grad_norm": 0.11432652175426483, "learning_rate": 0.0008448376737445079, "loss": 2.249, "step": 433360 }, { "epoch": 1.6752872230211378, "grad_norm": 0.11757257580757141, "learning_rate": 0.0008446645508771056, "loss": 2.2494, "step": 433370 }, { "epoch": 1.675325880224521, "grad_norm": 0.11913491040468216, "learning_rate": 0.0008444914539476569, "loss": 2.2326, "step": 433380 }, { "epoch": 1.6753645374279045, "grad_norm": 0.11336777359247208, "learning_rate": 0.0008443183829445067, "loss": 2.2449, "step": 433390 }, { "epoch": 1.6754031946312877, "grad_norm": 0.11028832942247391, "learning_rate": 0.0008441453378560089, "loss": 2.2437, "step": 433400 }, { "epoch": 1.675441851834671, "grad_norm": 0.11487679183483124, "learning_rate": 0.000843972318670526, "loss": 2.2354, "step": 433410 }, { "epoch": 1.6754805090380542, "grad_norm": 0.11123806983232498, "learning_rate": 0.0008437993253764294, "loss": 2.2586, "step": 433420 }, { "epoch": 1.6755191662414375, "grad_norm": 0.11151211708784103, "learning_rate": 0.0008436263579620989, "loss": 2.2376, "step": 433430 }, { "epoch": 1.6755578234448207, "grad_norm": 0.11394939571619034, "learning_rate": 0.0008434534164159233, "loss": 2.2531, "step": 433440 }, { "epoch": 1.675596480648204, "grad_norm": 0.11680968105792999, "learning_rate": 0.0008432805007262996, "loss": 2.2414, "step": 433450 }, { "epoch": 1.6756351378515872, "grad_norm": 0.11009000986814499, "learning_rate": 0.0008431076108816342, "loss": 2.2546, "step": 433460 }, { "epoch": 1.6756737950549705, "grad_norm": 0.12866313755512238, "learning_rate": 0.0008429347468703419, "loss": 2.2518, "step": 433470 }, { "epoch": 1.6757124522583537, "grad_norm": 0.10869014263153076, "learning_rate": 0.0008427619086808453, "loss": 2.2603, "step": 433480 }, { "epoch": 1.675751109461737, "grad_norm": 0.11136332899332047, "learning_rate": 0.0008425890963015772, "loss": 2.2243, "step": 433490 }, { "epoch": 1.6757897666651203, "grad_norm": 0.10977095365524292, "learning_rate": 0.0008424163097209774, "loss": 2.2328, "step": 433500 }, { "epoch": 1.6758284238685035, "grad_norm": 0.11136292666196823, "learning_rate": 0.0008422435489274958, "loss": 2.2468, "step": 433510 }, { "epoch": 1.675867081071887, "grad_norm": 0.10493572056293488, "learning_rate": 0.0008420708139095899, "loss": 2.2378, "step": 433520 }, { "epoch": 1.6759057382752702, "grad_norm": 0.1025480106472969, "learning_rate": 0.0008418981046557259, "loss": 2.243, "step": 433530 }, { "epoch": 1.6759443954786535, "grad_norm": 0.10371068120002747, "learning_rate": 0.0008417254211543795, "loss": 2.2481, "step": 433540 }, { "epoch": 1.6759830526820367, "grad_norm": 0.1214233785867691, "learning_rate": 0.0008415527633940336, "loss": 2.2477, "step": 433550 }, { "epoch": 1.6760217098854202, "grad_norm": 0.11474094539880753, "learning_rate": 0.0008413801313631808, "loss": 2.2529, "step": 433560 }, { "epoch": 1.6760603670888035, "grad_norm": 0.11168044805526733, "learning_rate": 0.0008412075250503222, "loss": 2.2294, "step": 433570 }, { "epoch": 1.6760990242921867, "grad_norm": 0.11817635595798492, "learning_rate": 0.0008410349444439664, "loss": 2.2516, "step": 433580 }, { "epoch": 1.67613768149557, "grad_norm": 0.12926152348518372, "learning_rate": 0.0008408623895326319, "loss": 2.2297, "step": 433590 }, { "epoch": 1.6761763386989532, "grad_norm": 0.1127639040350914, "learning_rate": 0.000840689860304845, "loss": 2.2331, "step": 433600 }, { "epoch": 1.6762149959023365, "grad_norm": 0.10601234436035156, "learning_rate": 0.0008405173567491405, "loss": 2.2555, "step": 433610 }, { "epoch": 1.6762536531057197, "grad_norm": 0.10784386098384857, "learning_rate": 0.0008403448788540621, "loss": 2.2521, "step": 433620 }, { "epoch": 1.676292310309103, "grad_norm": 0.1180407926440239, "learning_rate": 0.0008401724266081618, "loss": 2.2318, "step": 433630 }, { "epoch": 1.6763309675124862, "grad_norm": 0.12449698150157928, "learning_rate": 0.0008400000000000001, "loss": 2.2353, "step": 433640 }, { "epoch": 1.6763696247158695, "grad_norm": 0.11709899455308914, "learning_rate": 0.0008398275990181459, "loss": 2.2449, "step": 433650 }, { "epoch": 1.6764082819192527, "grad_norm": 0.1138455793261528, "learning_rate": 0.0008396552236511769, "loss": 2.2635, "step": 433660 }, { "epoch": 1.676446939122636, "grad_norm": 0.10678897798061371, "learning_rate": 0.0008394828738876795, "loss": 2.223, "step": 433670 }, { "epoch": 1.6764855963260192, "grad_norm": 0.10805068910121918, "learning_rate": 0.0008393105497162475, "loss": 2.2387, "step": 433680 }, { "epoch": 1.6765242535294027, "grad_norm": 0.10988160222768784, "learning_rate": 0.0008391382511254839, "loss": 2.2353, "step": 433690 }, { "epoch": 1.676562910732786, "grad_norm": 0.13759155571460724, "learning_rate": 0.0008389659781040007, "loss": 2.2423, "step": 433700 }, { "epoch": 1.6766015679361692, "grad_norm": 0.10952576994895935, "learning_rate": 0.0008387937306404172, "loss": 2.2527, "step": 433710 }, { "epoch": 1.6766402251395525, "grad_norm": 0.11295206844806671, "learning_rate": 0.0008386215087233619, "loss": 2.2396, "step": 433720 }, { "epoch": 1.676678882342936, "grad_norm": 0.10747696459293365, "learning_rate": 0.0008384493123414718, "loss": 2.2501, "step": 433730 }, { "epoch": 1.6767175395463192, "grad_norm": 0.5068920254707336, "learning_rate": 0.0008382771414833916, "loss": 2.2422, "step": 433740 }, { "epoch": 1.6767561967497024, "grad_norm": 0.11631965637207031, "learning_rate": 0.0008381049961377749, "loss": 2.2445, "step": 433750 }, { "epoch": 1.6767948539530857, "grad_norm": 0.11662419885396957, "learning_rate": 0.000837932876293284, "loss": 2.2426, "step": 433760 }, { "epoch": 1.676833511156469, "grad_norm": 0.11134974658489227, "learning_rate": 0.0008377607819385891, "loss": 2.2489, "step": 433770 }, { "epoch": 1.6768721683598522, "grad_norm": 0.10038750618696213, "learning_rate": 0.0008375887130623687, "loss": 2.243, "step": 433780 }, { "epoch": 1.6769108255632355, "grad_norm": 0.1259811818599701, "learning_rate": 0.0008374166696533106, "loss": 2.2413, "step": 433790 }, { "epoch": 1.6769494827666187, "grad_norm": 0.11071491986513138, "learning_rate": 0.0008372446517001094, "loss": 2.2426, "step": 433800 }, { "epoch": 1.676988139970002, "grad_norm": 0.11462465673685074, "learning_rate": 0.0008370726591914694, "loss": 2.2504, "step": 433810 }, { "epoch": 1.6770267971733852, "grad_norm": 0.11320709437131882, "learning_rate": 0.000836900692116103, "loss": 2.2483, "step": 433820 }, { "epoch": 1.6770654543767685, "grad_norm": 0.11926060914993286, "learning_rate": 0.0008367287504627308, "loss": 2.2477, "step": 433830 }, { "epoch": 1.6771041115801517, "grad_norm": 0.11070722341537476, "learning_rate": 0.0008365568342200812, "loss": 2.2517, "step": 433840 }, { "epoch": 1.677142768783535, "grad_norm": 0.1156579926609993, "learning_rate": 0.0008363849433768915, "loss": 2.2366, "step": 433850 }, { "epoch": 1.6771814259869184, "grad_norm": 0.7100057005882263, "learning_rate": 0.0008362130779219075, "loss": 2.2295, "step": 433860 }, { "epoch": 1.6772200831903017, "grad_norm": 0.115716353058815, "learning_rate": 0.0008360412378438831, "loss": 2.2474, "step": 433870 }, { "epoch": 1.677258740393685, "grad_norm": 0.11384305357933044, "learning_rate": 0.0008358694231315802, "loss": 2.2434, "step": 433880 }, { "epoch": 1.6772973975970682, "grad_norm": 0.12071774899959564, "learning_rate": 0.0008356976337737692, "loss": 2.2407, "step": 433890 }, { "epoch": 1.6773360548004517, "grad_norm": 0.12149536609649658, "learning_rate": 0.000835525869759229, "loss": 2.2504, "step": 433900 }, { "epoch": 1.677374712003835, "grad_norm": 0.12283730506896973, "learning_rate": 0.0008353541310767465, "loss": 2.2382, "step": 433910 }, { "epoch": 1.6774133692072182, "grad_norm": 0.11751092225313187, "learning_rate": 0.000835182417715117, "loss": 2.2325, "step": 433920 }, { "epoch": 1.6774520264106014, "grad_norm": 0.11363212764263153, "learning_rate": 0.0008350107296631442, "loss": 2.2431, "step": 433930 }, { "epoch": 1.6774906836139847, "grad_norm": 0.10969270765781403, "learning_rate": 0.0008348390669096393, "loss": 2.2233, "step": 433940 }, { "epoch": 1.677529340817368, "grad_norm": 0.10469599813222885, "learning_rate": 0.0008346674294434226, "loss": 2.2186, "step": 433950 }, { "epoch": 1.6775679980207512, "grad_norm": 0.11149562895298004, "learning_rate": 0.0008344958172533228, "loss": 2.2441, "step": 433960 }, { "epoch": 1.6776066552241344, "grad_norm": 0.11854884773492813, "learning_rate": 0.0008343242303281757, "loss": 2.2403, "step": 433970 }, { "epoch": 1.6776453124275177, "grad_norm": 0.11190243810415268, "learning_rate": 0.0008341526686568264, "loss": 2.2354, "step": 433980 }, { "epoch": 1.677683969630901, "grad_norm": 0.10802937299013138, "learning_rate": 0.0008339811322281275, "loss": 2.2483, "step": 433990 }, { "epoch": 1.6777226268342842, "grad_norm": 0.10827367752790451, "learning_rate": 0.0008338096210309398, "loss": 2.2282, "step": 434000 }, { "epoch": 1.6777612840376674, "grad_norm": 0.12005242705345154, "learning_rate": 0.0008336381350541333, "loss": 2.2401, "step": 434010 }, { "epoch": 1.6777999412410507, "grad_norm": 0.11597011983394623, "learning_rate": 0.0008334666742865851, "loss": 2.2423, "step": 434020 }, { "epoch": 1.6778385984444342, "grad_norm": 0.10232312232255936, "learning_rate": 0.0008332952387171809, "loss": 2.2447, "step": 434030 }, { "epoch": 1.6778772556478174, "grad_norm": 0.12289629131555557, "learning_rate": 0.0008331238283348143, "loss": 2.2457, "step": 434040 }, { "epoch": 1.6779159128512007, "grad_norm": 0.11531048268079758, "learning_rate": 0.0008329524431283873, "loss": 2.2458, "step": 434050 }, { "epoch": 1.677954570054584, "grad_norm": 0.10182934254407883, "learning_rate": 0.0008327810830868101, "loss": 2.2387, "step": 434060 }, { "epoch": 1.6779932272579674, "grad_norm": 0.11879070103168488, "learning_rate": 0.0008326097481990009, "loss": 2.2588, "step": 434070 }, { "epoch": 1.6780318844613507, "grad_norm": 0.1200638934969902, "learning_rate": 0.0008324384384538862, "loss": 2.2382, "step": 434080 }, { "epoch": 1.678070541664734, "grad_norm": 0.12407013773918152, "learning_rate": 0.0008322671538404001, "loss": 2.2371, "step": 434090 }, { "epoch": 1.6781091988681172, "grad_norm": 0.1167188510298729, "learning_rate": 0.0008320958943474854, "loss": 2.2397, "step": 434100 }, { "epoch": 1.6781478560715004, "grad_norm": 0.11031246930360794, "learning_rate": 0.0008319246599640929, "loss": 2.2392, "step": 434110 }, { "epoch": 1.6781865132748837, "grad_norm": 0.11474674940109253, "learning_rate": 0.0008317534506791813, "loss": 2.2337, "step": 434120 }, { "epoch": 1.678225170478267, "grad_norm": 0.11170880496501923, "learning_rate": 0.0008315822664817176, "loss": 2.2453, "step": 434130 }, { "epoch": 1.6782638276816502, "grad_norm": 0.10359736531972885, "learning_rate": 0.0008314111073606767, "loss": 2.24, "step": 434140 }, { "epoch": 1.6783024848850334, "grad_norm": 0.1339753270149231, "learning_rate": 0.0008312399733050415, "loss": 2.2454, "step": 434150 }, { "epoch": 1.6783411420884167, "grad_norm": 0.11264664679765701, "learning_rate": 0.0008310688643038034, "loss": 2.2393, "step": 434160 }, { "epoch": 1.6783797992918, "grad_norm": 0.11087250709533691, "learning_rate": 0.0008308977803459614, "loss": 2.2491, "step": 434170 }, { "epoch": 1.6784184564951832, "grad_norm": 0.1208563819527626, "learning_rate": 0.0008307267214205228, "loss": 2.2409, "step": 434180 }, { "epoch": 1.6784571136985664, "grad_norm": 0.10672164708375931, "learning_rate": 0.0008305556875165026, "loss": 2.2378, "step": 434190 }, { "epoch": 1.67849577090195, "grad_norm": 0.1270405799150467, "learning_rate": 0.0008303846786229243, "loss": 2.2528, "step": 434200 }, { "epoch": 1.6785344281053332, "grad_norm": 0.12053734809160233, "learning_rate": 0.0008302136947288193, "loss": 2.2447, "step": 434210 }, { "epoch": 1.6785730853087164, "grad_norm": 0.11692651361227036, "learning_rate": 0.0008300427358232268, "loss": 2.248, "step": 434220 }, { "epoch": 1.6786117425120997, "grad_norm": 0.11162137240171432, "learning_rate": 0.0008298718018951941, "loss": 2.2481, "step": 434230 }, { "epoch": 1.6786503997154831, "grad_norm": 0.11267007142305374, "learning_rate": 0.0008297008929337766, "loss": 2.2286, "step": 434240 }, { "epoch": 1.6786890569188664, "grad_norm": 0.10077975690364838, "learning_rate": 0.0008295300089280375, "loss": 2.2344, "step": 434250 }, { "epoch": 1.6787277141222496, "grad_norm": 0.10727986693382263, "learning_rate": 0.0008293591498670483, "loss": 2.2429, "step": 434260 }, { "epoch": 1.678766371325633, "grad_norm": 0.1061238944530487, "learning_rate": 0.000829188315739888, "loss": 2.2419, "step": 434270 }, { "epoch": 1.6788050285290161, "grad_norm": 0.10600124299526215, "learning_rate": 0.0008290175065356442, "loss": 2.2373, "step": 434280 }, { "epoch": 1.6788436857323994, "grad_norm": 0.10449835658073425, "learning_rate": 0.0008288467222434119, "loss": 2.232, "step": 434290 }, { "epoch": 1.6788823429357826, "grad_norm": 0.11127332597970963, "learning_rate": 0.0008286759628522942, "loss": 2.2503, "step": 434300 }, { "epoch": 1.678921000139166, "grad_norm": 0.11499613523483276, "learning_rate": 0.0008285052283514023, "loss": 2.2404, "step": 434310 }, { "epoch": 1.6789596573425491, "grad_norm": 0.12473177164793015, "learning_rate": 0.0008283345187298552, "loss": 2.2386, "step": 434320 }, { "epoch": 1.6789983145459324, "grad_norm": 0.11327109485864639, "learning_rate": 0.0008281638339767798, "loss": 2.2629, "step": 434330 }, { "epoch": 1.6790369717493157, "grad_norm": 0.11517111957073212, "learning_rate": 0.0008279931740813112, "loss": 2.2466, "step": 434340 }, { "epoch": 1.679075628952699, "grad_norm": 0.11408182233572006, "learning_rate": 0.0008278225390325918, "loss": 2.2413, "step": 434350 }, { "epoch": 1.6791142861560822, "grad_norm": 0.12840931117534637, "learning_rate": 0.0008276519288197724, "loss": 2.2428, "step": 434360 }, { "epoch": 1.6791529433594656, "grad_norm": 0.11731917411088943, "learning_rate": 0.0008274813434320117, "loss": 2.2335, "step": 434370 }, { "epoch": 1.6791916005628489, "grad_norm": 0.12040312588214874, "learning_rate": 0.0008273107828584762, "loss": 2.2504, "step": 434380 }, { "epoch": 1.6792302577662321, "grad_norm": 0.11635112762451172, "learning_rate": 0.00082714024708834, "loss": 2.2268, "step": 434390 }, { "epoch": 1.6792689149696154, "grad_norm": 0.11051749438047409, "learning_rate": 0.0008269697361107857, "loss": 2.2221, "step": 434400 }, { "epoch": 1.6793075721729989, "grad_norm": 0.10762675106525421, "learning_rate": 0.0008267992499150028, "loss": 2.2511, "step": 434410 }, { "epoch": 1.6793462293763821, "grad_norm": 0.10734720528125763, "learning_rate": 0.0008266287884901897, "loss": 2.2316, "step": 434420 }, { "epoch": 1.6793848865797654, "grad_norm": 0.1141144186258316, "learning_rate": 0.000826458351825552, "loss": 2.2486, "step": 434430 }, { "epoch": 1.6794235437831486, "grad_norm": 0.12552301585674286, "learning_rate": 0.0008262879399103031, "loss": 2.2438, "step": 434440 }, { "epoch": 1.6794622009865319, "grad_norm": 0.11154180765151978, "learning_rate": 0.0008261175527336651, "loss": 2.2512, "step": 434450 }, { "epoch": 1.6795008581899151, "grad_norm": 0.115838922560215, "learning_rate": 0.0008259471902848663, "loss": 2.2562, "step": 434460 }, { "epoch": 1.6795395153932984, "grad_norm": 0.11728062480688095, "learning_rate": 0.0008257768525531444, "loss": 2.2508, "step": 434470 }, { "epoch": 1.6795781725966816, "grad_norm": 0.11453123390674591, "learning_rate": 0.0008256065395277441, "loss": 2.2384, "step": 434480 }, { "epoch": 1.6796168298000649, "grad_norm": 0.11977676302194595, "learning_rate": 0.0008254362511979181, "loss": 2.2453, "step": 434490 }, { "epoch": 1.6796554870034481, "grad_norm": 0.10919841378927231, "learning_rate": 0.000825265987552927, "loss": 2.241, "step": 434500 }, { "epoch": 1.6796941442068314, "grad_norm": 0.1256256252527237, "learning_rate": 0.0008250957485820388, "loss": 2.2508, "step": 434510 }, { "epoch": 1.6797328014102146, "grad_norm": 0.11276651918888092, "learning_rate": 0.0008249255342745295, "loss": 2.227, "step": 434520 }, { "epoch": 1.6797714586135981, "grad_norm": 0.14407971501350403, "learning_rate": 0.0008247553446196832, "loss": 2.2511, "step": 434530 }, { "epoch": 1.6798101158169814, "grad_norm": 0.11039101332426071, "learning_rate": 0.0008245851796067909, "loss": 2.2354, "step": 434540 }, { "epoch": 1.6798487730203646, "grad_norm": 0.12132669985294342, "learning_rate": 0.0008244150392251524, "loss": 2.2445, "step": 434550 }, { "epoch": 1.6798874302237479, "grad_norm": 0.10520847141742706, "learning_rate": 0.0008242449234640745, "loss": 2.2486, "step": 434560 }, { "epoch": 1.6799260874271311, "grad_norm": 0.10257355123758316, "learning_rate": 0.000824074832312872, "loss": 2.2381, "step": 434570 }, { "epoch": 1.6799647446305146, "grad_norm": 0.11039949953556061, "learning_rate": 0.0008239047657608676, "loss": 2.2327, "step": 434580 }, { "epoch": 1.6800034018338978, "grad_norm": 0.11216014623641968, "learning_rate": 0.0008237347237973911, "loss": 2.2398, "step": 434590 }, { "epoch": 1.680042059037281, "grad_norm": 0.11249549686908722, "learning_rate": 0.0008235647064117806, "loss": 2.2475, "step": 434600 }, { "epoch": 1.6800807162406644, "grad_norm": 0.12227798253297806, "learning_rate": 0.0008233947135933817, "loss": 2.2414, "step": 434610 }, { "epoch": 1.6801193734440476, "grad_norm": 0.1259644478559494, "learning_rate": 0.000823224745331548, "loss": 2.2347, "step": 434620 }, { "epoch": 1.6801580306474309, "grad_norm": 0.10499641299247742, "learning_rate": 0.0008230548016156403, "loss": 2.238, "step": 434630 }, { "epoch": 1.680196687850814, "grad_norm": 0.10853856056928635, "learning_rate": 0.0008228848824350272, "loss": 2.2311, "step": 434640 }, { "epoch": 1.6802353450541974, "grad_norm": 0.11641088128089905, "learning_rate": 0.0008227149877790852, "loss": 2.2381, "step": 434650 }, { "epoch": 1.6802740022575806, "grad_norm": 0.1016199067234993, "learning_rate": 0.0008225451176371979, "loss": 2.2369, "step": 434660 }, { "epoch": 1.6803126594609639, "grad_norm": 0.10874945670366287, "learning_rate": 0.0008223752719987576, "loss": 2.2379, "step": 434670 }, { "epoch": 1.6803513166643471, "grad_norm": 0.10710814595222473, "learning_rate": 0.0008222054508531635, "loss": 2.2216, "step": 434680 }, { "epoch": 1.6803899738677304, "grad_norm": 0.11364852637052536, "learning_rate": 0.0008220356541898224, "loss": 2.225, "step": 434690 }, { "epoch": 1.6804286310711138, "grad_norm": 0.1084376871585846, "learning_rate": 0.0008218658819981489, "loss": 2.2278, "step": 434700 }, { "epoch": 1.680467288274497, "grad_norm": 0.10847639292478561, "learning_rate": 0.0008216961342675651, "loss": 2.2327, "step": 434710 }, { "epoch": 1.6805059454778803, "grad_norm": 0.11690942943096161, "learning_rate": 0.0008215264109875011, "loss": 2.2303, "step": 434720 }, { "epoch": 1.6805446026812636, "grad_norm": 0.13263235986232758, "learning_rate": 0.0008213567121473944, "loss": 2.2391, "step": 434730 }, { "epoch": 1.6805832598846469, "grad_norm": 0.11565765738487244, "learning_rate": 0.0008211870377366899, "loss": 2.2387, "step": 434740 }, { "epoch": 1.6806219170880303, "grad_norm": 0.10726010799407959, "learning_rate": 0.0008210173877448403, "loss": 2.2389, "step": 434750 }, { "epoch": 1.6806605742914136, "grad_norm": 0.12307916581630707, "learning_rate": 0.000820847762161306, "loss": 2.2329, "step": 434760 }, { "epoch": 1.6806992314947968, "grad_norm": 0.12959054112434387, "learning_rate": 0.0008206781609755544, "loss": 2.2452, "step": 434770 }, { "epoch": 1.68073788869818, "grad_norm": 0.10963719338178635, "learning_rate": 0.0008205085841770614, "loss": 2.2464, "step": 434780 }, { "epoch": 1.6807765459015633, "grad_norm": 0.11189696192741394, "learning_rate": 0.0008203390317553099, "loss": 2.2463, "step": 434790 }, { "epoch": 1.6808152031049466, "grad_norm": 0.12894082069396973, "learning_rate": 0.00082016950369979, "loss": 2.2488, "step": 434800 }, { "epoch": 1.6808538603083298, "grad_norm": 0.11304847151041031, "learning_rate": 0.0008200000000000001, "loss": 2.2495, "step": 434810 }, { "epoch": 1.680892517511713, "grad_norm": 0.09899675101041794, "learning_rate": 0.0008198305206454455, "loss": 2.2271, "step": 434820 }, { "epoch": 1.6809311747150963, "grad_norm": 0.11843240261077881, "learning_rate": 0.0008196610656256398, "loss": 2.2536, "step": 434830 }, { "epoch": 1.6809698319184796, "grad_norm": 0.11657701432704926, "learning_rate": 0.0008194916349301036, "loss": 2.2409, "step": 434840 }, { "epoch": 1.6810084891218628, "grad_norm": 0.11496693640947342, "learning_rate": 0.0008193222285483648, "loss": 2.2426, "step": 434850 }, { "epoch": 1.681047146325246, "grad_norm": 0.10746250301599503, "learning_rate": 0.0008191528464699591, "loss": 2.2362, "step": 434860 }, { "epoch": 1.6810858035286296, "grad_norm": 0.10326814651489258, "learning_rate": 0.0008189834886844299, "loss": 2.2427, "step": 434870 }, { "epoch": 1.6811244607320128, "grad_norm": 0.11777054518461227, "learning_rate": 0.0008188141551813279, "loss": 2.2526, "step": 434880 }, { "epoch": 1.681163117935396, "grad_norm": 0.11334793269634247, "learning_rate": 0.0008186448459502113, "loss": 2.2404, "step": 434890 }, { "epoch": 1.6812017751387793, "grad_norm": 0.11864189058542252, "learning_rate": 0.0008184755609806458, "loss": 2.2406, "step": 434900 }, { "epoch": 1.6812404323421628, "grad_norm": 0.10265098512172699, "learning_rate": 0.0008183063002622042, "loss": 2.2526, "step": 434910 }, { "epoch": 1.681279089545546, "grad_norm": 0.1151779443025589, "learning_rate": 0.0008181370637844674, "loss": 2.2438, "step": 434920 }, { "epoch": 1.6813177467489293, "grad_norm": 0.11113797873258591, "learning_rate": 0.0008179678515370235, "loss": 2.2429, "step": 434930 }, { "epoch": 1.6813564039523126, "grad_norm": 0.1169753447175026, "learning_rate": 0.0008177986635094681, "loss": 2.2402, "step": 434940 }, { "epoch": 1.6813950611556958, "grad_norm": 0.11297095566987991, "learning_rate": 0.000817629499691404, "loss": 2.2376, "step": 434950 }, { "epoch": 1.681433718359079, "grad_norm": 0.11857719719409943, "learning_rate": 0.0008174603600724413, "loss": 2.2458, "step": 434960 }, { "epoch": 1.6814723755624623, "grad_norm": 0.11382398009300232, "learning_rate": 0.0008172912446421985, "loss": 2.2401, "step": 434970 }, { "epoch": 1.6815110327658456, "grad_norm": 0.11434874683618546, "learning_rate": 0.0008171221533903003, "loss": 2.2599, "step": 434980 }, { "epoch": 1.6815496899692288, "grad_norm": 0.111310675740242, "learning_rate": 0.0008169530863063798, "loss": 2.2327, "step": 434990 }, { "epoch": 1.681588347172612, "grad_norm": 0.1281977891921997, "learning_rate": 0.0008167840433800769, "loss": 2.2315, "step": 435000 }, { "epoch": 1.6816270043759953, "grad_norm": 0.11103682219982147, "learning_rate": 0.0008166150246010389, "loss": 2.2362, "step": 435010 }, { "epoch": 1.6816656615793786, "grad_norm": 0.10804154723882675, "learning_rate": 0.0008164460299589207, "loss": 2.2377, "step": 435020 }, { "epoch": 1.6817043187827618, "grad_norm": 0.11247711628675461, "learning_rate": 0.0008162770594433848, "loss": 2.2278, "step": 435030 }, { "epoch": 1.6817429759861453, "grad_norm": 0.104110486805439, "learning_rate": 0.0008161081130441006, "loss": 2.2496, "step": 435040 }, { "epoch": 1.6817816331895286, "grad_norm": 0.12114261090755463, "learning_rate": 0.0008159391907507454, "loss": 2.2437, "step": 435050 }, { "epoch": 1.6818202903929118, "grad_norm": 0.10757270455360413, "learning_rate": 0.000815770292553003, "loss": 2.2504, "step": 435060 }, { "epoch": 1.681858947596295, "grad_norm": 0.10915961861610413, "learning_rate": 0.0008156014184405656, "loss": 2.2338, "step": 435070 }, { "epoch": 1.6818976047996785, "grad_norm": 0.11000694334506989, "learning_rate": 0.0008154325684031323, "loss": 2.2493, "step": 435080 }, { "epoch": 1.6819362620030618, "grad_norm": 0.11791567504405975, "learning_rate": 0.0008152637424304092, "loss": 2.238, "step": 435090 }, { "epoch": 1.681974919206445, "grad_norm": 0.1171281635761261, "learning_rate": 0.0008150949405121102, "loss": 2.2355, "step": 435100 }, { "epoch": 1.6820135764098283, "grad_norm": 0.1159917414188385, "learning_rate": 0.000814926162637956, "loss": 2.2312, "step": 435110 }, { "epoch": 1.6820522336132115, "grad_norm": 0.11876057088375092, "learning_rate": 0.0008147574087976757, "loss": 2.2319, "step": 435120 }, { "epoch": 1.6820908908165948, "grad_norm": 0.24806475639343262, "learning_rate": 0.0008145886789810044, "loss": 2.2469, "step": 435130 }, { "epoch": 1.682129548019978, "grad_norm": 0.11031191796064377, "learning_rate": 0.0008144199731776856, "loss": 2.2502, "step": 435140 }, { "epoch": 1.6821682052233613, "grad_norm": 0.13095290958881378, "learning_rate": 0.0008142512913774691, "loss": 2.2459, "step": 435150 }, { "epoch": 1.6822068624267446, "grad_norm": 0.11507758498191833, "learning_rate": 0.0008140826335701125, "loss": 2.2297, "step": 435160 }, { "epoch": 1.6822455196301278, "grad_norm": 0.11004200577735901, "learning_rate": 0.0008139139997453811, "loss": 2.2435, "step": 435170 }, { "epoch": 1.682284176833511, "grad_norm": 0.10870189964771271, "learning_rate": 0.0008137453898930467, "loss": 2.2386, "step": 435180 }, { "epoch": 1.6823228340368943, "grad_norm": 0.1069776639342308, "learning_rate": 0.0008135768040028888, "loss": 2.2409, "step": 435190 }, { "epoch": 1.6823614912402776, "grad_norm": 0.11982312798500061, "learning_rate": 0.000813408242064694, "loss": 2.2342, "step": 435200 }, { "epoch": 1.682400148443661, "grad_norm": 0.1095123365521431, "learning_rate": 0.0008132397040682562, "loss": 2.2532, "step": 435210 }, { "epoch": 1.6824388056470443, "grad_norm": 0.12205439805984497, "learning_rate": 0.0008130711900033769, "loss": 2.2439, "step": 435220 }, { "epoch": 1.6824774628504275, "grad_norm": 0.11363526433706284, "learning_rate": 0.0008129026998598641, "loss": 2.2415, "step": 435230 }, { "epoch": 1.6825161200538108, "grad_norm": 0.1271890550851822, "learning_rate": 0.0008127342336275336, "loss": 2.2278, "step": 435240 }, { "epoch": 1.6825547772571943, "grad_norm": 0.11817679554224014, "learning_rate": 0.0008125657912962084, "loss": 2.2376, "step": 435250 }, { "epoch": 1.6825934344605775, "grad_norm": 0.15573549270629883, "learning_rate": 0.0008123973728557182, "loss": 2.2288, "step": 435260 }, { "epoch": 1.6826320916639608, "grad_norm": 0.11569619923830032, "learning_rate": 0.0008122289782959007, "loss": 2.2432, "step": 435270 }, { "epoch": 1.682670748867344, "grad_norm": 0.12004324793815613, "learning_rate": 0.0008120606076066002, "loss": 2.2458, "step": 435280 }, { "epoch": 1.6827094060707273, "grad_norm": 0.12503516674041748, "learning_rate": 0.0008118922607776684, "loss": 2.2414, "step": 435290 }, { "epoch": 1.6827480632741105, "grad_norm": 0.12091667205095291, "learning_rate": 0.0008117239377989642, "loss": 2.2288, "step": 435300 }, { "epoch": 1.6827867204774938, "grad_norm": 0.10575675219297409, "learning_rate": 0.0008115556386603535, "loss": 2.2406, "step": 435310 }, { "epoch": 1.682825377680877, "grad_norm": 0.10925006121397018, "learning_rate": 0.0008113873633517099, "loss": 2.2628, "step": 435320 }, { "epoch": 1.6828640348842603, "grad_norm": 0.10843341797590256, "learning_rate": 0.0008112191118629135, "loss": 2.2336, "step": 435330 }, { "epoch": 1.6829026920876435, "grad_norm": 0.12656168639659882, "learning_rate": 0.000811050884183852, "loss": 2.2303, "step": 435340 }, { "epoch": 1.6829413492910268, "grad_norm": 0.12437848746776581, "learning_rate": 0.0008108826803044201, "loss": 2.2249, "step": 435350 }, { "epoch": 1.68298000649441, "grad_norm": 0.12143638730049133, "learning_rate": 0.0008107145002145196, "loss": 2.2448, "step": 435360 }, { "epoch": 1.6830186636977933, "grad_norm": 0.10871739685535431, "learning_rate": 0.0008105463439040594, "loss": 2.2366, "step": 435370 }, { "epoch": 1.6830573209011768, "grad_norm": 0.10439775884151459, "learning_rate": 0.0008103782113629558, "loss": 2.2327, "step": 435380 }, { "epoch": 1.68309597810456, "grad_norm": 0.12392468750476837, "learning_rate": 0.0008102101025811322, "loss": 2.2342, "step": 435390 }, { "epoch": 1.6831346353079433, "grad_norm": 0.11335171014070511, "learning_rate": 0.0008100420175485188, "loss": 2.2235, "step": 435400 }, { "epoch": 1.6831732925113265, "grad_norm": 0.11640524119138718, "learning_rate": 0.0008098739562550528, "loss": 2.2414, "step": 435410 }, { "epoch": 1.68321194971471, "grad_norm": 0.11219155043363571, "learning_rate": 0.0008097059186906792, "loss": 2.2484, "step": 435420 }, { "epoch": 1.6832506069180933, "grad_norm": 0.11859866231679916, "learning_rate": 0.0008095379048453495, "loss": 2.2264, "step": 435430 }, { "epoch": 1.6832892641214765, "grad_norm": 0.10446832329034805, "learning_rate": 0.0008093699147090228, "loss": 2.2355, "step": 435440 }, { "epoch": 1.6833279213248598, "grad_norm": 0.1112954244017601, "learning_rate": 0.0008092019482716644, "loss": 2.2348, "step": 435450 }, { "epoch": 1.683366578528243, "grad_norm": 0.11098206788301468, "learning_rate": 0.0008090340055232475, "loss": 2.2291, "step": 435460 }, { "epoch": 1.6834052357316263, "grad_norm": 0.11901814490556717, "learning_rate": 0.0008088660864537522, "loss": 2.2318, "step": 435470 }, { "epoch": 1.6834438929350095, "grad_norm": 0.11065753549337387, "learning_rate": 0.0008086981910531656, "loss": 2.2411, "step": 435480 }, { "epoch": 1.6834825501383928, "grad_norm": 0.11770408600568771, "learning_rate": 0.0008085303193114817, "loss": 2.2352, "step": 435490 }, { "epoch": 1.683521207341776, "grad_norm": 0.11291767656803131, "learning_rate": 0.0008083624712187016, "loss": 2.2282, "step": 435500 }, { "epoch": 1.6835598645451593, "grad_norm": 0.10530194640159607, "learning_rate": 0.0008081946467648336, "loss": 2.2432, "step": 435510 }, { "epoch": 1.6835985217485425, "grad_norm": 0.11069527268409729, "learning_rate": 0.0008080268459398928, "loss": 2.246, "step": 435520 }, { "epoch": 1.6836371789519258, "grad_norm": 0.10665157437324524, "learning_rate": 0.0008078590687339018, "loss": 2.2393, "step": 435530 }, { "epoch": 1.683675836155309, "grad_norm": 0.10939627885818481, "learning_rate": 0.0008076913151368896, "loss": 2.2327, "step": 435540 }, { "epoch": 1.6837144933586925, "grad_norm": 0.12476851791143417, "learning_rate": 0.0008075235851388927, "loss": 2.2323, "step": 435550 }, { "epoch": 1.6837531505620758, "grad_norm": 0.1157846450805664, "learning_rate": 0.000807355878729954, "loss": 2.2307, "step": 435560 }, { "epoch": 1.683791807765459, "grad_norm": 0.1271163523197174, "learning_rate": 0.0008071881959001244, "loss": 2.2507, "step": 435570 }, { "epoch": 1.6838304649688423, "grad_norm": 0.11389724910259247, "learning_rate": 0.0008070205366394609, "loss": 2.2252, "step": 435580 }, { "epoch": 1.6838691221722257, "grad_norm": 0.11247779428958893, "learning_rate": 0.0008068529009380277, "loss": 2.2317, "step": 435590 }, { "epoch": 1.683907779375609, "grad_norm": 0.1254976987838745, "learning_rate": 0.0008066852887858961, "loss": 2.2501, "step": 435600 }, { "epoch": 1.6839464365789922, "grad_norm": 0.12125872820615768, "learning_rate": 0.0008065177001731445, "loss": 2.2396, "step": 435610 }, { "epoch": 1.6839850937823755, "grad_norm": 0.11988002061843872, "learning_rate": 0.0008063501350898581, "loss": 2.235, "step": 435620 }, { "epoch": 1.6840237509857587, "grad_norm": 0.1185842901468277, "learning_rate": 0.0008061825935261289, "loss": 2.253, "step": 435630 }, { "epoch": 1.684062408189142, "grad_norm": 0.09899508953094482, "learning_rate": 0.0008060150754720561, "loss": 2.2286, "step": 435640 }, { "epoch": 1.6841010653925252, "grad_norm": 0.11395104229450226, "learning_rate": 0.0008058475809177457, "loss": 2.2287, "step": 435650 }, { "epoch": 1.6841397225959085, "grad_norm": 0.11625102162361145, "learning_rate": 0.0008056801098533108, "loss": 2.2282, "step": 435660 }, { "epoch": 1.6841783797992917, "grad_norm": 0.10915978997945786, "learning_rate": 0.0008055126622688711, "loss": 2.2437, "step": 435670 }, { "epoch": 1.684217037002675, "grad_norm": 0.11652518808841705, "learning_rate": 0.0008053452381545536, "loss": 2.2294, "step": 435680 }, { "epoch": 1.6842556942060583, "grad_norm": 0.12197697907686234, "learning_rate": 0.0008051778375004924, "loss": 2.2312, "step": 435690 }, { "epoch": 1.6842943514094415, "grad_norm": 0.11725616455078125, "learning_rate": 0.0008050104602968276, "loss": 2.236, "step": 435700 }, { "epoch": 1.6843330086128248, "grad_norm": 0.10901392996311188, "learning_rate": 0.0008048431065337072, "loss": 2.238, "step": 435710 }, { "epoch": 1.6843716658162082, "grad_norm": 0.13330137729644775, "learning_rate": 0.0008046757762012852, "loss": 2.2334, "step": 435720 }, { "epoch": 1.6844103230195915, "grad_norm": 0.11101020872592926, "learning_rate": 0.0008045084692897235, "loss": 2.2301, "step": 435730 }, { "epoch": 1.6844489802229747, "grad_norm": 0.1197330430150032, "learning_rate": 0.0008043411857891902, "loss": 2.2456, "step": 435740 }, { "epoch": 1.684487637426358, "grad_norm": 0.1282912790775299, "learning_rate": 0.0008041739256898601, "loss": 2.2396, "step": 435750 }, { "epoch": 1.6845262946297415, "grad_norm": 0.14463169872760773, "learning_rate": 0.0008040066889819158, "loss": 2.2448, "step": 435760 }, { "epoch": 1.6845649518331247, "grad_norm": 0.2296954095363617, "learning_rate": 0.0008038394756555456, "loss": 2.2475, "step": 435770 }, { "epoch": 1.684603609036508, "grad_norm": 0.12037109583616257, "learning_rate": 0.0008036722857009455, "loss": 2.2443, "step": 435780 }, { "epoch": 1.6846422662398912, "grad_norm": 0.11113885790109634, "learning_rate": 0.0008035051191083183, "loss": 2.2346, "step": 435790 }, { "epoch": 1.6846809234432745, "grad_norm": 0.11480911821126938, "learning_rate": 0.0008033379758678727, "loss": 2.2301, "step": 435800 }, { "epoch": 1.6847195806466577, "grad_norm": 0.11218812316656113, "learning_rate": 0.0008031708559698257, "loss": 2.2337, "step": 435810 }, { "epoch": 1.684758237850041, "grad_norm": 0.10466307401657104, "learning_rate": 0.0008030037594043998, "loss": 2.253, "step": 435820 }, { "epoch": 1.6847968950534242, "grad_norm": 0.1097770482301712, "learning_rate": 0.0008028366861618253, "loss": 2.2468, "step": 435830 }, { "epoch": 1.6848355522568075, "grad_norm": 0.1140095442533493, "learning_rate": 0.0008026696362323387, "loss": 2.2301, "step": 435840 }, { "epoch": 1.6848742094601907, "grad_norm": 0.11084846407175064, "learning_rate": 0.0008025026096061839, "loss": 2.2466, "step": 435850 }, { "epoch": 1.684912866663574, "grad_norm": 0.11485522985458374, "learning_rate": 0.0008023356062736107, "loss": 2.2199, "step": 435860 }, { "epoch": 1.6849515238669572, "grad_norm": 0.12026914954185486, "learning_rate": 0.0008021686262248764, "loss": 2.2324, "step": 435870 }, { "epoch": 1.6849901810703405, "grad_norm": 0.1091637834906578, "learning_rate": 0.000802001669450245, "loss": 2.2447, "step": 435880 }, { "epoch": 1.685028838273724, "grad_norm": 0.10844144970178604, "learning_rate": 0.0008018347359399873, "loss": 2.224, "step": 435890 }, { "epoch": 1.6850674954771072, "grad_norm": 0.1304885745048523, "learning_rate": 0.0008016678256843806, "loss": 2.2458, "step": 435900 }, { "epoch": 1.6851061526804905, "grad_norm": 0.11517968028783798, "learning_rate": 0.0008015009386737093, "loss": 2.242, "step": 435910 }, { "epoch": 1.6851448098838737, "grad_norm": 0.11388219147920609, "learning_rate": 0.0008013340748982642, "loss": 2.2413, "step": 435920 }, { "epoch": 1.6851834670872572, "grad_norm": 0.11688657850027084, "learning_rate": 0.0008011672343483433, "loss": 2.2418, "step": 435930 }, { "epoch": 1.6852221242906404, "grad_norm": 0.11711782217025757, "learning_rate": 0.0008010004170142511, "loss": 2.2142, "step": 435940 }, { "epoch": 1.6852607814940237, "grad_norm": 0.11181101948022842, "learning_rate": 0.0008008336228862987, "loss": 2.2418, "step": 435950 }, { "epoch": 1.685299438697407, "grad_norm": 0.11422090977430344, "learning_rate": 0.0008006668519548041, "loss": 2.2268, "step": 435960 }, { "epoch": 1.6853380959007902, "grad_norm": 0.1257568597793579, "learning_rate": 0.0008005001042100922, "loss": 2.2397, "step": 435970 }, { "epoch": 1.6853767531041735, "grad_norm": 0.13817913830280304, "learning_rate": 0.0008003333796424941, "loss": 2.2585, "step": 435980 }, { "epoch": 1.6854154103075567, "grad_norm": 0.12498199194669724, "learning_rate": 0.0008001666782423484, "loss": 2.2443, "step": 435990 }, { "epoch": 1.68545406751094, "grad_norm": 0.1228543221950531, "learning_rate": 0.0008, "loss": 2.2499, "step": 436000 }, { "epoch": 1.6854927247143232, "grad_norm": 0.10874808579683304, "learning_rate": 0.0007998333449058002, "loss": 2.2391, "step": 436010 }, { "epoch": 1.6855313819177065, "grad_norm": 0.17795898020267487, "learning_rate": 0.0007996667129501074, "loss": 2.2406, "step": 436020 }, { "epoch": 1.6855700391210897, "grad_norm": 0.11963041871786118, "learning_rate": 0.0007995001041232866, "loss": 2.244, "step": 436030 }, { "epoch": 1.685608696324473, "grad_norm": 0.11893066018819809, "learning_rate": 0.0007993335184157093, "loss": 2.2372, "step": 436040 }, { "epoch": 1.6856473535278562, "grad_norm": 0.11747374385595322, "learning_rate": 0.0007991669558177541, "loss": 2.2453, "step": 436050 }, { "epoch": 1.6856860107312397, "grad_norm": 0.1093200072646141, "learning_rate": 0.0007990004163198057, "loss": 2.2456, "step": 436060 }, { "epoch": 1.685724667934623, "grad_norm": 0.11985526978969574, "learning_rate": 0.0007988338999122561, "loss": 2.2312, "step": 436070 }, { "epoch": 1.6857633251380062, "grad_norm": 0.11669023334980011, "learning_rate": 0.0007986674065855035, "loss": 2.2357, "step": 436080 }, { "epoch": 1.6858019823413894, "grad_norm": 0.10922613739967346, "learning_rate": 0.0007985009363299529, "loss": 2.2336, "step": 436090 }, { "epoch": 1.685840639544773, "grad_norm": 0.11648507416248322, "learning_rate": 0.0007983344891360158, "loss": 2.2257, "step": 436100 }, { "epoch": 1.6858792967481562, "grad_norm": 0.12221698462963104, "learning_rate": 0.0007981680649941107, "loss": 2.2485, "step": 436110 }, { "epoch": 1.6859179539515394, "grad_norm": 0.13133299350738525, "learning_rate": 0.0007980016638946623, "loss": 2.2318, "step": 436120 }, { "epoch": 1.6859566111549227, "grad_norm": 0.11439281702041626, "learning_rate": 0.0007978352858281026, "loss": 2.2357, "step": 436130 }, { "epoch": 1.685995268358306, "grad_norm": 0.11321944743394852, "learning_rate": 0.0007976689307848691, "loss": 2.2493, "step": 436140 }, { "epoch": 1.6860339255616892, "grad_norm": 0.11675392836332321, "learning_rate": 0.000797502598755407, "loss": 2.2432, "step": 436150 }, { "epoch": 1.6860725827650724, "grad_norm": 0.10947708785533905, "learning_rate": 0.0007973362897301674, "loss": 2.2416, "step": 436160 }, { "epoch": 1.6861112399684557, "grad_norm": 0.10901670902967453, "learning_rate": 0.0007971700036996084, "loss": 2.2424, "step": 436170 }, { "epoch": 1.686149897171839, "grad_norm": 0.11359596997499466, "learning_rate": 0.0007970037406541948, "loss": 2.2417, "step": 436180 }, { "epoch": 1.6861885543752222, "grad_norm": 0.12114213407039642, "learning_rate": 0.0007968375005843973, "loss": 2.2356, "step": 436190 }, { "epoch": 1.6862272115786054, "grad_norm": 0.10651782155036926, "learning_rate": 0.0007966712834806942, "loss": 2.2537, "step": 436200 }, { "epoch": 1.6862658687819887, "grad_norm": 0.11422091722488403, "learning_rate": 0.0007965050893335695, "loss": 2.2345, "step": 436210 }, { "epoch": 1.686304525985372, "grad_norm": 0.11653254926204681, "learning_rate": 0.0007963389181335137, "loss": 2.241, "step": 436220 }, { "epoch": 1.6863431831887554, "grad_norm": 0.1081620454788208, "learning_rate": 0.000796172769871025, "loss": 2.2503, "step": 436230 }, { "epoch": 1.6863818403921387, "grad_norm": 0.10417952388525009, "learning_rate": 0.0007960066445366072, "loss": 2.2409, "step": 436240 }, { "epoch": 1.686420497595522, "grad_norm": 0.12107262015342712, "learning_rate": 0.0007958405421207704, "loss": 2.2275, "step": 436250 }, { "epoch": 1.6864591547989052, "grad_norm": 0.1189051941037178, "learning_rate": 0.0007956744626140322, "loss": 2.2281, "step": 436260 }, { "epoch": 1.6864978120022887, "grad_norm": 0.15268446505069733, "learning_rate": 0.0007955084060069162, "loss": 2.2323, "step": 436270 }, { "epoch": 1.686536469205672, "grad_norm": 0.10644841194152832, "learning_rate": 0.0007953423722899522, "loss": 2.2357, "step": 436280 }, { "epoch": 1.6865751264090552, "grad_norm": 0.10994943231344223, "learning_rate": 0.0007951763614536773, "loss": 2.2354, "step": 436290 }, { "epoch": 1.6866137836124384, "grad_norm": 0.12903369963169098, "learning_rate": 0.0007950103734886345, "loss": 2.2514, "step": 436300 }, { "epoch": 1.6866524408158217, "grad_norm": 0.1179652139544487, "learning_rate": 0.0007948444083853736, "loss": 2.2267, "step": 436310 }, { "epoch": 1.686691098019205, "grad_norm": 0.12354667484760284, "learning_rate": 0.0007946784661344509, "loss": 2.2425, "step": 436320 }, { "epoch": 1.6867297552225882, "grad_norm": 0.10569813847541809, "learning_rate": 0.0007945125467264289, "loss": 2.2445, "step": 436330 }, { "epoch": 1.6867684124259714, "grad_norm": 0.11753513664007187, "learning_rate": 0.0007943466501518772, "loss": 2.2344, "step": 436340 }, { "epoch": 1.6868070696293547, "grad_norm": 0.11324722319841385, "learning_rate": 0.0007941807764013713, "loss": 2.2285, "step": 436350 }, { "epoch": 1.686845726832738, "grad_norm": 0.11834903061389923, "learning_rate": 0.0007940149254654931, "loss": 2.2293, "step": 436360 }, { "epoch": 1.6868843840361212, "grad_norm": 0.12163624912500381, "learning_rate": 0.0007938490973348318, "loss": 2.241, "step": 436370 }, { "epoch": 1.6869230412395044, "grad_norm": 0.1194707453250885, "learning_rate": 0.0007936832919999824, "loss": 2.2524, "step": 436380 }, { "epoch": 1.686961698442888, "grad_norm": 0.1264716535806656, "learning_rate": 0.0007935175094515461, "loss": 2.241, "step": 436390 }, { "epoch": 1.6870003556462712, "grad_norm": 0.11369436234235764, "learning_rate": 0.0007933517496801314, "loss": 2.2272, "step": 436400 }, { "epoch": 1.6870390128496544, "grad_norm": 0.12565520405769348, "learning_rate": 0.0007931860126763528, "loss": 2.2445, "step": 436410 }, { "epoch": 1.6870776700530377, "grad_norm": 0.11966858804225922, "learning_rate": 0.000793020298430831, "loss": 2.238, "step": 436420 }, { "epoch": 1.687116327256421, "grad_norm": 0.11708260327577591, "learning_rate": 0.0007928546069341937, "loss": 2.2359, "step": 436430 }, { "epoch": 1.6871549844598044, "grad_norm": 0.13556437194347382, "learning_rate": 0.0007926889381770746, "loss": 2.2413, "step": 436440 }, { "epoch": 1.6871936416631876, "grad_norm": 0.11219292134046555, "learning_rate": 0.0007925232921501135, "loss": 2.2345, "step": 436450 }, { "epoch": 1.687232298866571, "grad_norm": 0.13132259249687195, "learning_rate": 0.0007923576688439578, "loss": 2.2199, "step": 436460 }, { "epoch": 1.6872709560699541, "grad_norm": 0.20593777298927307, "learning_rate": 0.00079219206824926, "loss": 2.241, "step": 436470 }, { "epoch": 1.6873096132733374, "grad_norm": 0.12723521888256073, "learning_rate": 0.0007920264903566801, "loss": 2.2332, "step": 436480 }, { "epoch": 1.6873482704767206, "grad_norm": 0.10929694771766663, "learning_rate": 0.0007918609351568835, "loss": 2.2362, "step": 436490 }, { "epoch": 1.687386927680104, "grad_norm": 0.128523588180542, "learning_rate": 0.0007916954026405428, "loss": 2.2349, "step": 436500 }, { "epoch": 1.6874255848834872, "grad_norm": 0.11908918619155884, "learning_rate": 0.0007915298927983366, "loss": 2.223, "step": 436510 }, { "epoch": 1.6874642420868704, "grad_norm": 0.11230950802564621, "learning_rate": 0.0007913644056209497, "loss": 2.2304, "step": 436520 }, { "epoch": 1.6875028992902537, "grad_norm": 0.10958006232976913, "learning_rate": 0.0007911989410990741, "loss": 2.2408, "step": 436530 }, { "epoch": 1.687541556493637, "grad_norm": 0.1269649863243103, "learning_rate": 0.000791033499223407, "loss": 2.2347, "step": 436540 }, { "epoch": 1.6875802136970202, "grad_norm": 0.12071368098258972, "learning_rate": 0.0007908680799846528, "loss": 2.2387, "step": 436550 }, { "epoch": 1.6876188709004036, "grad_norm": 0.11131490767002106, "learning_rate": 0.0007907026833735221, "loss": 2.2226, "step": 436560 }, { "epoch": 1.6876575281037869, "grad_norm": 0.13059940934181213, "learning_rate": 0.0007905373093807315, "loss": 2.2337, "step": 436570 }, { "epoch": 1.6876961853071701, "grad_norm": 0.11803121864795685, "learning_rate": 0.0007903719579970047, "loss": 2.2357, "step": 436580 }, { "epoch": 1.6877348425105534, "grad_norm": 0.11218202114105225, "learning_rate": 0.000790206629213071, "loss": 2.2211, "step": 436590 }, { "epoch": 1.6877734997139366, "grad_norm": 0.1089283898472786, "learning_rate": 0.0007900413230196661, "loss": 2.2399, "step": 436600 }, { "epoch": 1.6878121569173201, "grad_norm": 0.11095636337995529, "learning_rate": 0.0007898760394075324, "loss": 2.2284, "step": 436610 }, { "epoch": 1.6878508141207034, "grad_norm": 0.11514124274253845, "learning_rate": 0.0007897107783674185, "loss": 2.2505, "step": 436620 }, { "epoch": 1.6878894713240866, "grad_norm": 0.11509499698877335, "learning_rate": 0.000789545539890079, "loss": 2.2474, "step": 436630 }, { "epoch": 1.6879281285274699, "grad_norm": 0.10745836794376373, "learning_rate": 0.0007893803239662756, "loss": 2.2213, "step": 436640 }, { "epoch": 1.6879667857308531, "grad_norm": 0.12291122227907181, "learning_rate": 0.0007892151305867752, "loss": 2.2538, "step": 436650 }, { "epoch": 1.6880054429342364, "grad_norm": 0.10713893920183182, "learning_rate": 0.0007890499597423517, "loss": 2.2462, "step": 436660 }, { "epoch": 1.6880441001376196, "grad_norm": 0.11409778892993927, "learning_rate": 0.0007888848114237852, "loss": 2.2395, "step": 436670 }, { "epoch": 1.6880827573410029, "grad_norm": 0.11547887325286865, "learning_rate": 0.0007887196856218624, "loss": 2.2307, "step": 436680 }, { "epoch": 1.6881214145443861, "grad_norm": 0.11228621006011963, "learning_rate": 0.0007885545823273755, "loss": 2.2463, "step": 436690 }, { "epoch": 1.6881600717477694, "grad_norm": 0.11682931333780289, "learning_rate": 0.0007883895015311233, "loss": 2.2332, "step": 436700 }, { "epoch": 1.6881987289511526, "grad_norm": 0.11516440659761429, "learning_rate": 0.0007882244432239112, "loss": 2.2388, "step": 436710 }, { "epoch": 1.688237386154536, "grad_norm": 0.10949617624282837, "learning_rate": 0.0007880594073965505, "loss": 2.2354, "step": 436720 }, { "epoch": 1.6882760433579194, "grad_norm": 0.1049916222691536, "learning_rate": 0.0007878943940398593, "loss": 2.2306, "step": 436730 }, { "epoch": 1.6883147005613026, "grad_norm": 0.11417264491319656, "learning_rate": 0.0007877294031446609, "loss": 2.2326, "step": 436740 }, { "epoch": 1.6883533577646859, "grad_norm": 0.12302906811237335, "learning_rate": 0.0007875644347017859, "loss": 2.2302, "step": 436750 }, { "epoch": 1.6883920149680691, "grad_norm": 0.11686044186353683, "learning_rate": 0.0007873994887020706, "loss": 2.233, "step": 436760 }, { "epoch": 1.6884306721714524, "grad_norm": 0.11717838048934937, "learning_rate": 0.0007872345651363575, "loss": 2.2468, "step": 436770 }, { "epoch": 1.6884693293748358, "grad_norm": 0.11655833572149277, "learning_rate": 0.0007870696639954955, "loss": 2.2305, "step": 436780 }, { "epoch": 1.688507986578219, "grad_norm": 0.12989924848079681, "learning_rate": 0.0007869047852703399, "loss": 2.2287, "step": 436790 }, { "epoch": 1.6885466437816024, "grad_norm": 0.10573769360780716, "learning_rate": 0.0007867399289517518, "loss": 2.2215, "step": 436800 }, { "epoch": 1.6885853009849856, "grad_norm": 0.11621452867984772, "learning_rate": 0.000786575095030599, "loss": 2.2406, "step": 436810 }, { "epoch": 1.6886239581883689, "grad_norm": 0.11387400329113007, "learning_rate": 0.0007864102834977547, "loss": 2.2355, "step": 436820 }, { "epoch": 1.688662615391752, "grad_norm": 0.11899558454751968, "learning_rate": 0.0007862454943440993, "loss": 2.2448, "step": 436830 }, { "epoch": 1.6887012725951354, "grad_norm": 0.10371288657188416, "learning_rate": 0.0007860807275605186, "loss": 2.2206, "step": 436840 }, { "epoch": 1.6887399297985186, "grad_norm": 0.10565133392810822, "learning_rate": 0.0007859159831379051, "loss": 2.2243, "step": 436850 }, { "epoch": 1.6887785870019019, "grad_norm": 0.11194362491369247, "learning_rate": 0.0007857512610671569, "loss": 2.2368, "step": 436860 }, { "epoch": 1.6888172442052851, "grad_norm": 0.11670593172311783, "learning_rate": 0.0007855865613391788, "loss": 2.2415, "step": 436870 }, { "epoch": 1.6888559014086684, "grad_norm": 0.12032661586999893, "learning_rate": 0.0007854218839448819, "loss": 2.252, "step": 436880 }, { "epoch": 1.6888945586120516, "grad_norm": 0.48441967368125916, "learning_rate": 0.000785257228875183, "loss": 2.2309, "step": 436890 }, { "epoch": 1.688933215815435, "grad_norm": 0.12135181576013565, "learning_rate": 0.0007850925961210047, "loss": 2.2247, "step": 436900 }, { "epoch": 1.6889718730188183, "grad_norm": 0.11387261003255844, "learning_rate": 0.0007849279856732772, "loss": 2.2446, "step": 436910 }, { "epoch": 1.6890105302222016, "grad_norm": 0.10993428528308868, "learning_rate": 0.0007847633975229351, "loss": 2.2272, "step": 436920 }, { "epoch": 1.6890491874255849, "grad_norm": 0.11586098372936249, "learning_rate": 0.0007845988316609203, "loss": 2.2399, "step": 436930 }, { "epoch": 1.6890878446289683, "grad_norm": 0.11747764050960541, "learning_rate": 0.0007844342880781805, "loss": 2.2379, "step": 436940 }, { "epoch": 1.6891265018323516, "grad_norm": 0.1304158717393875, "learning_rate": 0.0007842697667656693, "loss": 2.2475, "step": 436950 }, { "epoch": 1.6891651590357348, "grad_norm": 1.0930097103118896, "learning_rate": 0.000784105267714347, "loss": 2.2354, "step": 436960 }, { "epoch": 1.689203816239118, "grad_norm": 0.11207807809114456, "learning_rate": 0.0007839407909151793, "loss": 2.2389, "step": 436970 }, { "epoch": 1.6892424734425013, "grad_norm": 0.12363407760858536, "learning_rate": 0.0007837763363591382, "loss": 2.2415, "step": 436980 }, { "epoch": 1.6892811306458846, "grad_norm": 0.12563955783843994, "learning_rate": 0.0007836119040372025, "loss": 2.2347, "step": 436990 }, { "epoch": 1.6893197878492678, "grad_norm": 0.11330549418926239, "learning_rate": 0.0007834474939403562, "loss": 2.2352, "step": 437000 }, { "epoch": 1.689358445052651, "grad_norm": 0.11254936456680298, "learning_rate": 0.0007832831060595896, "loss": 2.24, "step": 437010 }, { "epoch": 1.6893971022560343, "grad_norm": 0.10804528743028641, "learning_rate": 0.0007831187403858995, "loss": 2.2298, "step": 437020 }, { "epoch": 1.6894357594594176, "grad_norm": 0.10461485385894775, "learning_rate": 0.0007829543969102884, "loss": 2.2459, "step": 437030 }, { "epoch": 1.6894744166628008, "grad_norm": 0.12711456418037415, "learning_rate": 0.000782790075623765, "loss": 2.2228, "step": 437040 }, { "epoch": 1.689513073866184, "grad_norm": 0.1143861711025238, "learning_rate": 0.0007826257765173439, "loss": 2.2317, "step": 437050 }, { "epoch": 1.6895517310695674, "grad_norm": 0.13040152192115784, "learning_rate": 0.000782461499582046, "loss": 2.2323, "step": 437060 }, { "epoch": 1.6895903882729508, "grad_norm": 0.11680342257022858, "learning_rate": 0.0007822972448088986, "loss": 2.2316, "step": 437070 }, { "epoch": 1.689629045476334, "grad_norm": 0.12657737731933594, "learning_rate": 0.0007821330121889336, "loss": 2.2422, "step": 437080 }, { "epoch": 1.6896677026797173, "grad_norm": 0.1121034175157547, "learning_rate": 0.0007819688017131909, "loss": 2.2449, "step": 437090 }, { "epoch": 1.6897063598831006, "grad_norm": 0.1272219717502594, "learning_rate": 0.0007818046133727152, "loss": 2.2453, "step": 437100 }, { "epoch": 1.689745017086484, "grad_norm": 0.11899504065513611, "learning_rate": 0.0007816404471585575, "loss": 2.2234, "step": 437110 }, { "epoch": 1.6897836742898673, "grad_norm": 0.11514095216989517, "learning_rate": 0.0007814763030617746, "loss": 2.2334, "step": 437120 }, { "epoch": 1.6898223314932506, "grad_norm": 0.12913601100444794, "learning_rate": 0.0007813121810734301, "loss": 2.2434, "step": 437130 }, { "epoch": 1.6898609886966338, "grad_norm": 0.10719872266054153, "learning_rate": 0.0007811480811845927, "loss": 2.2474, "step": 437140 }, { "epoch": 1.689899645900017, "grad_norm": 0.12911660969257355, "learning_rate": 0.0007809840033863378, "loss": 2.2343, "step": 437150 }, { "epoch": 1.6899383031034003, "grad_norm": 0.12792739272117615, "learning_rate": 0.0007808199476697464, "loss": 2.2358, "step": 437160 }, { "epoch": 1.6899769603067836, "grad_norm": 0.148227721452713, "learning_rate": 0.0007806559140259055, "loss": 2.2331, "step": 437170 }, { "epoch": 1.6900156175101668, "grad_norm": 0.14317235350608826, "learning_rate": 0.0007804919024459083, "loss": 2.2307, "step": 437180 }, { "epoch": 1.69005427471355, "grad_norm": 0.11549597978591919, "learning_rate": 0.0007803279129208541, "loss": 2.2356, "step": 437190 }, { "epoch": 1.6900929319169333, "grad_norm": 0.13664087653160095, "learning_rate": 0.0007801639454418475, "loss": 2.2359, "step": 437200 }, { "epoch": 1.6901315891203166, "grad_norm": 0.11562107503414154, "learning_rate": 0.0007800000000000001, "loss": 2.2495, "step": 437210 }, { "epoch": 1.6901702463236998, "grad_norm": 0.1262626051902771, "learning_rate": 0.0007798360765864285, "loss": 2.2357, "step": 437220 }, { "epoch": 1.690208903527083, "grad_norm": 0.10728706419467926, "learning_rate": 0.000779672175192256, "loss": 2.2461, "step": 437230 }, { "epoch": 1.6902475607304666, "grad_norm": 0.12150957435369492, "learning_rate": 0.0007795082958086115, "loss": 2.2385, "step": 437240 }, { "epoch": 1.6902862179338498, "grad_norm": 0.11588594317436218, "learning_rate": 0.0007793444384266297, "loss": 2.2286, "step": 437250 }, { "epoch": 1.690324875137233, "grad_norm": 0.12389259040355682, "learning_rate": 0.0007791806030374518, "loss": 2.2318, "step": 437260 }, { "epoch": 1.6903635323406163, "grad_norm": 0.12836752831935883, "learning_rate": 0.0007790167896322244, "loss": 2.2452, "step": 437270 }, { "epoch": 1.6904021895439998, "grad_norm": 0.13800232112407684, "learning_rate": 0.0007788529982021002, "loss": 2.2299, "step": 437280 }, { "epoch": 1.690440846747383, "grad_norm": 0.13133391737937927, "learning_rate": 0.0007786892287382379, "loss": 2.23, "step": 437290 }, { "epoch": 1.6904795039507663, "grad_norm": 0.11495410650968552, "learning_rate": 0.0007785254812318023, "loss": 2.2349, "step": 437300 }, { "epoch": 1.6905181611541495, "grad_norm": 0.11895423382520676, "learning_rate": 0.0007783617556739639, "loss": 2.2475, "step": 437310 }, { "epoch": 1.6905568183575328, "grad_norm": 0.10464183241128922, "learning_rate": 0.0007781980520558989, "loss": 2.225, "step": 437320 }, { "epoch": 1.690595475560916, "grad_norm": 0.11086255311965942, "learning_rate": 0.0007780343703687898, "loss": 2.2374, "step": 437330 }, { "epoch": 1.6906341327642993, "grad_norm": 0.13207247853279114, "learning_rate": 0.000777870710603825, "loss": 2.2261, "step": 437340 }, { "epoch": 1.6906727899676826, "grad_norm": 0.10976863652467728, "learning_rate": 0.0007777070727521982, "loss": 2.2425, "step": 437350 }, { "epoch": 1.6907114471710658, "grad_norm": 0.1116211786866188, "learning_rate": 0.00077754345680511, "loss": 2.2315, "step": 437360 }, { "epoch": 1.690750104374449, "grad_norm": 0.1117439717054367, "learning_rate": 0.000777379862753766, "loss": 2.2355, "step": 437370 }, { "epoch": 1.6907887615778323, "grad_norm": 0.12214884161949158, "learning_rate": 0.0007772162905893783, "loss": 2.2325, "step": 437380 }, { "epoch": 1.6908274187812156, "grad_norm": 0.1459735631942749, "learning_rate": 0.0007770527403031642, "loss": 2.2192, "step": 437390 }, { "epoch": 1.6908660759845988, "grad_norm": 0.1475234031677246, "learning_rate": 0.0007768892118863476, "loss": 2.2378, "step": 437400 }, { "epoch": 1.6909047331879823, "grad_norm": 0.1206105649471283, "learning_rate": 0.0007767257053301577, "loss": 2.2379, "step": 437410 }, { "epoch": 1.6909433903913655, "grad_norm": 0.11356187611818314, "learning_rate": 0.0007765622206258303, "loss": 2.2263, "step": 437420 }, { "epoch": 1.6909820475947488, "grad_norm": 0.11754736304283142, "learning_rate": 0.0007763987577646057, "loss": 2.2509, "step": 437430 }, { "epoch": 1.691020704798132, "grad_norm": 0.10678063333034515, "learning_rate": 0.0007762353167377316, "loss": 2.2204, "step": 437440 }, { "epoch": 1.6910593620015155, "grad_norm": 0.11095842719078064, "learning_rate": 0.0007760718975364607, "loss": 2.231, "step": 437450 }, { "epoch": 1.6910980192048988, "grad_norm": 0.11462483555078506, "learning_rate": 0.0007759085001520516, "loss": 2.2279, "step": 437460 }, { "epoch": 1.691136676408282, "grad_norm": 0.11364367604255676, "learning_rate": 0.0007757451245757687, "loss": 2.2319, "step": 437470 }, { "epoch": 1.6911753336116653, "grad_norm": 0.10768604278564453, "learning_rate": 0.0007755817707988826, "loss": 2.2347, "step": 437480 }, { "epoch": 1.6912139908150485, "grad_norm": 0.1304168403148651, "learning_rate": 0.0007754184388126693, "loss": 2.2462, "step": 437490 }, { "epoch": 1.6912526480184318, "grad_norm": 0.11165142059326172, "learning_rate": 0.0007752551286084111, "loss": 2.2348, "step": 437500 }, { "epoch": 1.691291305221815, "grad_norm": 0.1312442570924759, "learning_rate": 0.0007750918401773952, "loss": 2.2279, "step": 437510 }, { "epoch": 1.6913299624251983, "grad_norm": 0.1164218857884407, "learning_rate": 0.0007749285735109158, "loss": 2.2144, "step": 437520 }, { "epoch": 1.6913686196285815, "grad_norm": 0.10903254896402359, "learning_rate": 0.0007747653286002718, "loss": 2.2336, "step": 437530 }, { "epoch": 1.6914072768319648, "grad_norm": 0.11387763917446136, "learning_rate": 0.0007746021054367687, "loss": 2.2441, "step": 437540 }, { "epoch": 1.691445934035348, "grad_norm": 0.1062643975019455, "learning_rate": 0.0007744389040117177, "loss": 2.2491, "step": 437550 }, { "epoch": 1.6914845912387313, "grad_norm": 0.11953529715538025, "learning_rate": 0.0007742757243164351, "loss": 2.241, "step": 437560 }, { "epoch": 1.6915232484421145, "grad_norm": 0.1143541932106018, "learning_rate": 0.0007741125663422437, "loss": 2.2266, "step": 437570 }, { "epoch": 1.691561905645498, "grad_norm": 0.11018265038728714, "learning_rate": 0.0007739494300804717, "loss": 2.2375, "step": 437580 }, { "epoch": 1.6916005628488813, "grad_norm": 0.10305047035217285, "learning_rate": 0.0007737863155224534, "loss": 2.2377, "step": 437590 }, { "epoch": 1.6916392200522645, "grad_norm": 0.11621309071779251, "learning_rate": 0.0007736232226595288, "loss": 2.2271, "step": 437600 }, { "epoch": 1.6916778772556478, "grad_norm": 0.11574336886405945, "learning_rate": 0.000773460151483043, "loss": 2.2305, "step": 437610 }, { "epoch": 1.6917165344590313, "grad_norm": 0.1166810542345047, "learning_rate": 0.0007732971019843478, "loss": 2.2445, "step": 437620 }, { "epoch": 1.6917551916624145, "grad_norm": 0.12316501885652542, "learning_rate": 0.0007731340741547999, "loss": 2.2501, "step": 437630 }, { "epoch": 1.6917938488657978, "grad_norm": 0.11434192955493927, "learning_rate": 0.0007729710679857626, "loss": 2.2479, "step": 437640 }, { "epoch": 1.691832506069181, "grad_norm": 0.11277862638235092, "learning_rate": 0.0007728080834686045, "loss": 2.235, "step": 437650 }, { "epoch": 1.6918711632725643, "grad_norm": 0.12093667685985565, "learning_rate": 0.0007726451205946993, "loss": 2.2313, "step": 437660 }, { "epoch": 1.6919098204759475, "grad_norm": 0.11708587408065796, "learning_rate": 0.000772482179355428, "loss": 2.2413, "step": 437670 }, { "epoch": 1.6919484776793308, "grad_norm": 0.11986162513494492, "learning_rate": 0.0007723192597421755, "loss": 2.2377, "step": 437680 }, { "epoch": 1.691987134882714, "grad_norm": 0.12617745995521545, "learning_rate": 0.0007721563617463338, "loss": 2.247, "step": 437690 }, { "epoch": 1.6920257920860973, "grad_norm": 0.11964748054742813, "learning_rate": 0.0007719934853592998, "loss": 2.2231, "step": 437700 }, { "epoch": 1.6920644492894805, "grad_norm": 0.11791455000638962, "learning_rate": 0.0007718306305724768, "loss": 2.2276, "step": 437710 }, { "epoch": 1.6921031064928638, "grad_norm": 0.12315932661294937, "learning_rate": 0.000771667797377273, "loss": 2.2186, "step": 437720 }, { "epoch": 1.692141763696247, "grad_norm": 0.11123772710561752, "learning_rate": 0.0007715049857651029, "loss": 2.2471, "step": 437730 }, { "epoch": 1.6921804208996303, "grad_norm": 0.11152539402246475, "learning_rate": 0.0007713421957273865, "loss": 2.2474, "step": 437740 }, { "epoch": 1.6922190781030138, "grad_norm": 0.1093481257557869, "learning_rate": 0.0007711794272555493, "loss": 2.2447, "step": 437750 }, { "epoch": 1.692257735306397, "grad_norm": 0.11701294034719467, "learning_rate": 0.0007710166803410228, "loss": 2.2304, "step": 437760 }, { "epoch": 1.6922963925097803, "grad_norm": 0.1318010836839676, "learning_rate": 0.0007708539549752438, "loss": 2.2436, "step": 437770 }, { "epoch": 1.6923350497131635, "grad_norm": 0.1183793693780899, "learning_rate": 0.0007706912511496553, "loss": 2.2321, "step": 437780 }, { "epoch": 1.692373706916547, "grad_norm": 0.1210879236459732, "learning_rate": 0.0007705285688557053, "loss": 2.2202, "step": 437790 }, { "epoch": 1.6924123641199302, "grad_norm": 0.10407286137342453, "learning_rate": 0.0007703659080848482, "loss": 2.2356, "step": 437800 }, { "epoch": 1.6924510213233135, "grad_norm": 0.13408103585243225, "learning_rate": 0.0007702032688285434, "loss": 2.2296, "step": 437810 }, { "epoch": 1.6924896785266967, "grad_norm": 0.11854076385498047, "learning_rate": 0.0007700406510782562, "loss": 2.2367, "step": 437820 }, { "epoch": 1.69252833573008, "grad_norm": 0.12286543101072311, "learning_rate": 0.0007698780548254575, "loss": 2.2357, "step": 437830 }, { "epoch": 1.6925669929334632, "grad_norm": 0.11990126222372055, "learning_rate": 0.0007697154800616242, "loss": 2.2409, "step": 437840 }, { "epoch": 1.6926056501368465, "grad_norm": 0.11182884126901627, "learning_rate": 0.000769552926778238, "loss": 2.247, "step": 437850 }, { "epoch": 1.6926443073402297, "grad_norm": 0.10320017486810684, "learning_rate": 0.0007693903949667874, "loss": 2.2336, "step": 437860 }, { "epoch": 1.692682964543613, "grad_norm": 0.1216001883149147, "learning_rate": 0.000769227884618765, "loss": 2.2217, "step": 437870 }, { "epoch": 1.6927216217469963, "grad_norm": 0.11251985281705856, "learning_rate": 0.0007690653957256705, "loss": 2.2181, "step": 437880 }, { "epoch": 1.6927602789503795, "grad_norm": 0.11518998444080353, "learning_rate": 0.0007689029282790086, "loss": 2.2333, "step": 437890 }, { "epoch": 1.6927989361537628, "grad_norm": 0.10871678590774536, "learning_rate": 0.0007687404822702892, "loss": 2.2404, "step": 437900 }, { "epoch": 1.692837593357146, "grad_norm": 0.11232757568359375, "learning_rate": 0.0007685780576910284, "loss": 2.2283, "step": 437910 }, { "epoch": 1.6928762505605295, "grad_norm": 0.11824655532836914, "learning_rate": 0.0007684156545327478, "loss": 2.2392, "step": 437920 }, { "epoch": 1.6929149077639127, "grad_norm": 0.12078500539064407, "learning_rate": 0.0007682532727869742, "loss": 2.2307, "step": 437930 }, { "epoch": 1.692953564967296, "grad_norm": 0.11573991179466248, "learning_rate": 0.0007680909124452404, "loss": 2.2287, "step": 437940 }, { "epoch": 1.6929922221706792, "grad_norm": 0.1250699907541275, "learning_rate": 0.0007679285734990849, "loss": 2.2252, "step": 437950 }, { "epoch": 1.6930308793740627, "grad_norm": 0.12099843472242355, "learning_rate": 0.0007677662559400509, "loss": 2.2479, "step": 437960 }, { "epoch": 1.693069536577446, "grad_norm": 0.11248032003641129, "learning_rate": 0.0007676039597596884, "loss": 2.2282, "step": 437970 }, { "epoch": 1.6931081937808292, "grad_norm": 0.12657272815704346, "learning_rate": 0.0007674416849495518, "loss": 2.2384, "step": 437980 }, { "epoch": 1.6931468509842125, "grad_norm": 0.15892231464385986, "learning_rate": 0.0007672794315012019, "loss": 2.2418, "step": 437990 }, { "epoch": 1.6931855081875957, "grad_norm": 0.11984408646821976, "learning_rate": 0.0007671171994062048, "loss": 2.2419, "step": 438000 }, { "epoch": 1.693224165390979, "grad_norm": 0.11554060876369476, "learning_rate": 0.0007669549886561318, "loss": 2.246, "step": 438010 }, { "epoch": 1.6932628225943622, "grad_norm": 0.11163786053657532, "learning_rate": 0.0007667927992425606, "loss": 2.2258, "step": 438020 }, { "epoch": 1.6933014797977455, "grad_norm": 0.1620757281780243, "learning_rate": 0.000766630631157073, "loss": 2.237, "step": 438030 }, { "epoch": 1.6933401370011287, "grad_norm": 0.11534059047698975, "learning_rate": 0.000766468484391258, "loss": 2.2374, "step": 438040 }, { "epoch": 1.693378794204512, "grad_norm": 0.10129819810390472, "learning_rate": 0.0007663063589367091, "loss": 2.2325, "step": 438050 }, { "epoch": 1.6934174514078952, "grad_norm": 0.10799936205148697, "learning_rate": 0.0007661442547850255, "loss": 2.2235, "step": 438060 }, { "epoch": 1.6934561086112785, "grad_norm": 0.12132129818201065, "learning_rate": 0.0007659821719278122, "loss": 2.2319, "step": 438070 }, { "epoch": 1.6934947658146617, "grad_norm": 0.11375699192285538, "learning_rate": 0.0007658201103566788, "loss": 2.2386, "step": 438080 }, { "epoch": 1.6935334230180452, "grad_norm": 0.1231660544872284, "learning_rate": 0.0007656580700632421, "loss": 2.2262, "step": 438090 }, { "epoch": 1.6935720802214285, "grad_norm": 0.11016866564750671, "learning_rate": 0.0007654960510391229, "loss": 2.2417, "step": 438100 }, { "epoch": 1.6936107374248117, "grad_norm": 0.11380646377801895, "learning_rate": 0.0007653340532759479, "loss": 2.2404, "step": 438110 }, { "epoch": 1.693649394628195, "grad_norm": 0.11561800539493561, "learning_rate": 0.0007651720767653497, "loss": 2.2405, "step": 438120 }, { "epoch": 1.6936880518315784, "grad_norm": 0.11656072735786438, "learning_rate": 0.0007650101214989655, "loss": 2.2353, "step": 438130 }, { "epoch": 1.6937267090349617, "grad_norm": 0.12446340918540955, "learning_rate": 0.0007648481874684393, "loss": 2.2307, "step": 438140 }, { "epoch": 1.693765366238345, "grad_norm": 0.11343683302402496, "learning_rate": 0.0007646862746654193, "loss": 2.2288, "step": 438150 }, { "epoch": 1.6938040234417282, "grad_norm": 0.12251932173967361, "learning_rate": 0.00076452438308156, "loss": 2.2151, "step": 438160 }, { "epoch": 1.6938426806451115, "grad_norm": 0.11948093771934509, "learning_rate": 0.000764362512708521, "loss": 2.2239, "step": 438170 }, { "epoch": 1.6938813378484947, "grad_norm": 0.11201823502779007, "learning_rate": 0.0007642006635379674, "loss": 2.2578, "step": 438180 }, { "epoch": 1.693919995051878, "grad_norm": 0.12762024998664856, "learning_rate": 0.0007640388355615699, "loss": 2.2318, "step": 438190 }, { "epoch": 1.6939586522552612, "grad_norm": 0.10953720659017563, "learning_rate": 0.0007638770287710046, "loss": 2.225, "step": 438200 }, { "epoch": 1.6939973094586445, "grad_norm": 0.10989776998758316, "learning_rate": 0.0007637152431579529, "loss": 2.2204, "step": 438210 }, { "epoch": 1.6940359666620277, "grad_norm": 0.11345095187425613, "learning_rate": 0.0007635534787141014, "loss": 2.2301, "step": 438220 }, { "epoch": 1.694074623865411, "grad_norm": 0.1317012459039688, "learning_rate": 0.000763391735431143, "loss": 2.2261, "step": 438230 }, { "epoch": 1.6941132810687942, "grad_norm": 0.11273641139268875, "learning_rate": 0.0007632300133007755, "loss": 2.2358, "step": 438240 }, { "epoch": 1.6941519382721775, "grad_norm": 0.11396468430757523, "learning_rate": 0.0007630683123147018, "loss": 2.2261, "step": 438250 }, { "epoch": 1.694190595475561, "grad_norm": 0.12092260271310806, "learning_rate": 0.0007629066324646307, "loss": 2.2145, "step": 438260 }, { "epoch": 1.6942292526789442, "grad_norm": 0.13460946083068848, "learning_rate": 0.0007627449737422765, "loss": 2.2435, "step": 438270 }, { "epoch": 1.6942679098823274, "grad_norm": 0.11652474105358124, "learning_rate": 0.0007625833361393586, "loss": 2.2193, "step": 438280 }, { "epoch": 1.6943065670857107, "grad_norm": 0.13675649464130402, "learning_rate": 0.0007624217196476013, "loss": 2.2361, "step": 438290 }, { "epoch": 1.6943452242890942, "grad_norm": 0.11373266577720642, "learning_rate": 0.000762260124258736, "loss": 2.2246, "step": 438300 }, { "epoch": 1.6943838814924774, "grad_norm": 0.1127764880657196, "learning_rate": 0.0007620985499644975, "loss": 2.232, "step": 438310 }, { "epoch": 1.6944225386958607, "grad_norm": 0.12021084874868393, "learning_rate": 0.0007619369967566272, "loss": 2.2371, "step": 438320 }, { "epoch": 1.694461195899244, "grad_norm": 0.11365853250026703, "learning_rate": 0.0007617754646268715, "loss": 2.2264, "step": 438330 }, { "epoch": 1.6944998531026272, "grad_norm": 0.11727002263069153, "learning_rate": 0.0007616139535669824, "loss": 2.2274, "step": 438340 }, { "epoch": 1.6945385103060104, "grad_norm": 0.11850646883249283, "learning_rate": 0.0007614524635687172, "loss": 2.2445, "step": 438350 }, { "epoch": 1.6945771675093937, "grad_norm": 0.11157086491584778, "learning_rate": 0.0007612909946238382, "loss": 2.235, "step": 438360 }, { "epoch": 1.694615824712777, "grad_norm": 0.11376459151506424, "learning_rate": 0.0007611295467241137, "loss": 2.2364, "step": 438370 }, { "epoch": 1.6946544819161602, "grad_norm": 0.12699368596076965, "learning_rate": 0.0007609681198613169, "loss": 2.2349, "step": 438380 }, { "epoch": 1.6946931391195434, "grad_norm": 0.1290704905986786, "learning_rate": 0.0007608067140272266, "loss": 2.2258, "step": 438390 }, { "epoch": 1.6947317963229267, "grad_norm": 0.11632368713617325, "learning_rate": 0.0007606453292136266, "loss": 2.2269, "step": 438400 }, { "epoch": 1.69477045352631, "grad_norm": 0.12675462663173676, "learning_rate": 0.0007604839654123066, "loss": 2.2516, "step": 438410 }, { "epoch": 1.6948091107296934, "grad_norm": 0.13376300036907196, "learning_rate": 0.0007603226226150612, "loss": 2.2244, "step": 438420 }, { "epoch": 1.6948477679330767, "grad_norm": 0.10991213470697403, "learning_rate": 0.0007601613008136906, "loss": 2.2433, "step": 438430 }, { "epoch": 1.69488642513646, "grad_norm": 0.11957073211669922, "learning_rate": 0.00076, "loss": 2.2275, "step": 438440 }, { "epoch": 1.6949250823398432, "grad_norm": 0.12245447188615799, "learning_rate": 0.0007598387201658004, "loss": 2.2361, "step": 438450 }, { "epoch": 1.6949637395432264, "grad_norm": 0.12726320326328278, "learning_rate": 0.0007596774613029078, "loss": 2.2432, "step": 438460 }, { "epoch": 1.69500239674661, "grad_norm": 0.11078235507011414, "learning_rate": 0.0007595162234031434, "loss": 2.2305, "step": 438470 }, { "epoch": 1.6950410539499932, "grad_norm": 0.1352330446243286, "learning_rate": 0.0007593550064583344, "loss": 2.2238, "step": 438480 }, { "epoch": 1.6950797111533764, "grad_norm": 0.1313025951385498, "learning_rate": 0.000759193810460312, "loss": 2.2367, "step": 438490 }, { "epoch": 1.6951183683567597, "grad_norm": 0.13372932374477386, "learning_rate": 0.0007590326354009143, "loss": 2.2524, "step": 438500 }, { "epoch": 1.695157025560143, "grad_norm": 0.11021479964256287, "learning_rate": 0.0007588714812719837, "loss": 2.2203, "step": 438510 }, { "epoch": 1.6951956827635262, "grad_norm": 0.12476097792387009, "learning_rate": 0.0007587103480653679, "loss": 2.2314, "step": 438520 }, { "epoch": 1.6952343399669094, "grad_norm": 0.12518496811389923, "learning_rate": 0.0007585492357729205, "loss": 2.2402, "step": 438530 }, { "epoch": 1.6952729971702927, "grad_norm": 0.11186251789331436, "learning_rate": 0.0007583881443864995, "loss": 2.2293, "step": 438540 }, { "epoch": 1.695311654373676, "grad_norm": 0.13721270859241486, "learning_rate": 0.0007582270738979691, "loss": 2.2125, "step": 438550 }, { "epoch": 1.6953503115770592, "grad_norm": 0.11518587917089462, "learning_rate": 0.0007580660242991981, "loss": 2.2245, "step": 438560 }, { "epoch": 1.6953889687804424, "grad_norm": 0.12563805282115936, "learning_rate": 0.0007579049955820609, "loss": 2.2328, "step": 438570 }, { "epoch": 1.6954276259838257, "grad_norm": 0.10423476248979568, "learning_rate": 0.0007577439877384373, "loss": 2.2422, "step": 438580 }, { "epoch": 1.6954662831872092, "grad_norm": 0.11267977207899094, "learning_rate": 0.0007575830007602118, "loss": 2.238, "step": 438590 }, { "epoch": 1.6955049403905924, "grad_norm": 0.11336838454008102, "learning_rate": 0.0007574220346392746, "loss": 2.2182, "step": 438600 }, { "epoch": 1.6955435975939757, "grad_norm": 0.11938924342393875, "learning_rate": 0.0007572610893675214, "loss": 2.2387, "step": 438610 }, { "epoch": 1.695582254797359, "grad_norm": 0.10956771671772003, "learning_rate": 0.0007571001649368523, "loss": 2.2408, "step": 438620 }, { "epoch": 1.6956209120007422, "grad_norm": 0.1219608262181282, "learning_rate": 0.0007569392613391735, "loss": 2.2466, "step": 438630 }, { "epoch": 1.6956595692041256, "grad_norm": 0.10723282396793365, "learning_rate": 0.0007567783785663957, "loss": 2.2179, "step": 438640 }, { "epoch": 1.695698226407509, "grad_norm": 0.11632703244686127, "learning_rate": 0.0007566175166104356, "loss": 2.22, "step": 438650 }, { "epoch": 1.6957368836108921, "grad_norm": 0.10991436243057251, "learning_rate": 0.000756456675463215, "loss": 2.2231, "step": 438660 }, { "epoch": 1.6957755408142754, "grad_norm": 0.10563716292381287, "learning_rate": 0.0007562958551166601, "loss": 2.2178, "step": 438670 }, { "epoch": 1.6958141980176586, "grad_norm": 0.11046472191810608, "learning_rate": 0.0007561350555627031, "loss": 2.2455, "step": 438680 }, { "epoch": 1.695852855221042, "grad_norm": 0.11357564479112625, "learning_rate": 0.0007559742767932811, "loss": 2.2423, "step": 438690 }, { "epoch": 1.6958915124244252, "grad_norm": 0.10833998769521713, "learning_rate": 0.0007558135188003368, "loss": 2.2279, "step": 438700 }, { "epoch": 1.6959301696278084, "grad_norm": 0.11807499825954437, "learning_rate": 0.0007556527815758176, "loss": 2.2377, "step": 438710 }, { "epoch": 1.6959688268311917, "grad_norm": 0.1214083805680275, "learning_rate": 0.0007554920651116763, "loss": 2.2414, "step": 438720 }, { "epoch": 1.696007484034575, "grad_norm": 0.11282476037740707, "learning_rate": 0.0007553313693998711, "loss": 2.2357, "step": 438730 }, { "epoch": 1.6960461412379582, "grad_norm": 0.1160525232553482, "learning_rate": 0.0007551706944323651, "loss": 2.2273, "step": 438740 }, { "epoch": 1.6960847984413414, "grad_norm": 0.10337352007627487, "learning_rate": 0.0007550100402011268, "loss": 2.2308, "step": 438750 }, { "epoch": 1.6961234556447249, "grad_norm": 0.11672735959291458, "learning_rate": 0.0007548494066981295, "loss": 2.2276, "step": 438760 }, { "epoch": 1.6961621128481081, "grad_norm": 0.10167520493268967, "learning_rate": 0.0007546887939153524, "loss": 2.2383, "step": 438770 }, { "epoch": 1.6962007700514914, "grad_norm": 0.1253589689731598, "learning_rate": 0.0007545282018447788, "loss": 2.2296, "step": 438780 }, { "epoch": 1.6962394272548746, "grad_norm": 0.1127503514289856, "learning_rate": 0.0007543676304783984, "loss": 2.2529, "step": 438790 }, { "epoch": 1.6962780844582581, "grad_norm": 0.11058793216943741, "learning_rate": 0.000754207079808205, "loss": 2.2298, "step": 438800 }, { "epoch": 1.6963167416616414, "grad_norm": 0.13061091303825378, "learning_rate": 0.0007540465498261983, "loss": 2.2353, "step": 438810 }, { "epoch": 1.6963553988650246, "grad_norm": 0.15342095494270325, "learning_rate": 0.0007538860405243828, "loss": 2.2369, "step": 438820 }, { "epoch": 1.6963940560684079, "grad_norm": 0.11850441992282867, "learning_rate": 0.0007537255518947683, "loss": 2.2259, "step": 438830 }, { "epoch": 1.6964327132717911, "grad_norm": 0.12837547063827515, "learning_rate": 0.0007535650839293693, "loss": 2.2321, "step": 438840 }, { "epoch": 1.6964713704751744, "grad_norm": 0.11992836743593216, "learning_rate": 0.0007534046366202063, "loss": 2.2297, "step": 438850 }, { "epoch": 1.6965100276785576, "grad_norm": 0.11032480746507645, "learning_rate": 0.000753244209959304, "loss": 2.2297, "step": 438860 }, { "epoch": 1.6965486848819409, "grad_norm": 0.10852088034152985, "learning_rate": 0.000753083803938693, "loss": 2.2426, "step": 438870 }, { "epoch": 1.6965873420853241, "grad_norm": 0.12265793234109879, "learning_rate": 0.0007529234185504084, "loss": 2.2353, "step": 438880 }, { "epoch": 1.6966259992887074, "grad_norm": 0.12141349166631699, "learning_rate": 0.0007527630537864909, "loss": 2.2293, "step": 438890 }, { "epoch": 1.6966646564920906, "grad_norm": 0.12046696990728378, "learning_rate": 0.000752602709638986, "loss": 2.2295, "step": 438900 }, { "epoch": 1.696703313695474, "grad_norm": 0.11540514975786209, "learning_rate": 0.0007524423860999445, "loss": 2.2345, "step": 438910 }, { "epoch": 1.6967419708988571, "grad_norm": 0.11729709804058075, "learning_rate": 0.0007522820831614224, "loss": 2.2502, "step": 438920 }, { "epoch": 1.6967806281022406, "grad_norm": 0.12267860025167465, "learning_rate": 0.0007521218008154802, "loss": 2.2374, "step": 438930 }, { "epoch": 1.6968192853056239, "grad_norm": 0.12478138506412506, "learning_rate": 0.0007519615390541845, "loss": 2.2198, "step": 438940 }, { "epoch": 1.6968579425090071, "grad_norm": 0.11276324093341827, "learning_rate": 0.000751801297869606, "loss": 2.2365, "step": 438950 }, { "epoch": 1.6968965997123904, "grad_norm": 0.11225543171167374, "learning_rate": 0.0007516410772538211, "loss": 2.2429, "step": 438960 }, { "epoch": 1.6969352569157738, "grad_norm": 0.1132887527346611, "learning_rate": 0.0007514808771989113, "loss": 2.2426, "step": 438970 }, { "epoch": 1.696973914119157, "grad_norm": 0.12351055443286896, "learning_rate": 0.0007513206976969627, "loss": 2.242, "step": 438980 }, { "epoch": 1.6970125713225404, "grad_norm": 0.27838459610939026, "learning_rate": 0.0007511605387400668, "loss": 2.2265, "step": 438990 }, { "epoch": 1.6970512285259236, "grad_norm": 0.12767164409160614, "learning_rate": 0.0007510004003203203, "loss": 2.2389, "step": 439000 }, { "epoch": 1.6970898857293069, "grad_norm": 0.125470831990242, "learning_rate": 0.0007508402824298249, "loss": 2.245, "step": 439010 }, { "epoch": 1.69712854293269, "grad_norm": 0.10499616712331772, "learning_rate": 0.0007506801850606868, "loss": 2.2235, "step": 439020 }, { "epoch": 1.6971672001360734, "grad_norm": 0.12618540227413177, "learning_rate": 0.0007505201082050179, "loss": 2.2373, "step": 439030 }, { "epoch": 1.6972058573394566, "grad_norm": 0.1158444806933403, "learning_rate": 0.0007503600518549352, "loss": 2.2312, "step": 439040 }, { "epoch": 1.6972445145428399, "grad_norm": 0.11925296485424042, "learning_rate": 0.0007502000160025606, "loss": 2.229, "step": 439050 }, { "epoch": 1.6972831717462231, "grad_norm": 0.10973548889160156, "learning_rate": 0.0007500400006400206, "loss": 2.2363, "step": 439060 }, { "epoch": 1.6973218289496064, "grad_norm": 0.12064634263515472, "learning_rate": 0.000749880005759447, "loss": 2.2325, "step": 439070 }, { "epoch": 1.6973604861529896, "grad_norm": 0.11418332904577255, "learning_rate": 0.0007497200313529772, "loss": 2.2384, "step": 439080 }, { "epoch": 1.6973991433563729, "grad_norm": 0.1222028136253357, "learning_rate": 0.0007495600774127531, "loss": 2.242, "step": 439090 }, { "epoch": 1.6974378005597563, "grad_norm": 0.13102611899375916, "learning_rate": 0.0007494001439309214, "loss": 2.2403, "step": 439100 }, { "epoch": 1.6974764577631396, "grad_norm": 0.12919282913208008, "learning_rate": 0.0007492402308996345, "loss": 2.2329, "step": 439110 }, { "epoch": 1.6975151149665229, "grad_norm": 0.11866680532693863, "learning_rate": 0.0007490803383110489, "loss": 2.2272, "step": 439120 }, { "epoch": 1.697553772169906, "grad_norm": 0.11926010996103287, "learning_rate": 0.000748920466157327, "loss": 2.2397, "step": 439130 }, { "epoch": 1.6975924293732896, "grad_norm": 0.12326746433973312, "learning_rate": 0.0007487606144306358, "loss": 2.2397, "step": 439140 }, { "epoch": 1.6976310865766728, "grad_norm": 0.11257810145616531, "learning_rate": 0.0007486007831231474, "loss": 2.2453, "step": 439150 }, { "epoch": 1.697669743780056, "grad_norm": 0.11413078755140305, "learning_rate": 0.0007484409722270386, "loss": 2.2264, "step": 439160 }, { "epoch": 1.6977084009834393, "grad_norm": 0.11529600620269775, "learning_rate": 0.0007482811817344919, "loss": 2.2171, "step": 439170 }, { "epoch": 1.6977470581868226, "grad_norm": 0.11455454677343369, "learning_rate": 0.000748121411637694, "loss": 2.2401, "step": 439180 }, { "epoch": 1.6977857153902058, "grad_norm": 0.10799404233694077, "learning_rate": 0.0007479616619288369, "loss": 2.2538, "step": 439190 }, { "epoch": 1.697824372593589, "grad_norm": 0.12358862161636353, "learning_rate": 0.0007478019326001178, "loss": 2.2365, "step": 439200 }, { "epoch": 1.6978630297969723, "grad_norm": 0.11603404581546783, "learning_rate": 0.0007476422236437386, "loss": 2.2275, "step": 439210 }, { "epoch": 1.6979016870003556, "grad_norm": 0.12009704113006592, "learning_rate": 0.0007474825350519059, "loss": 2.2362, "step": 439220 }, { "epoch": 1.6979403442037388, "grad_norm": 0.40134766697883606, "learning_rate": 0.0007473228668168321, "loss": 2.2192, "step": 439230 }, { "epoch": 1.697979001407122, "grad_norm": 0.12011298537254333, "learning_rate": 0.000747163218930734, "loss": 2.2333, "step": 439240 }, { "epoch": 1.6980176586105054, "grad_norm": 0.11388115584850311, "learning_rate": 0.0007470035913858333, "loss": 2.2308, "step": 439250 }, { "epoch": 1.6980563158138886, "grad_norm": 0.11122046411037445, "learning_rate": 0.0007468439841743567, "loss": 2.2393, "step": 439260 }, { "epoch": 1.698094973017272, "grad_norm": 0.12878040969371796, "learning_rate": 0.0007466843972885362, "loss": 2.225, "step": 439270 }, { "epoch": 1.6981336302206553, "grad_norm": 0.12966978549957275, "learning_rate": 0.0007465248307206083, "loss": 2.2251, "step": 439280 }, { "epoch": 1.6981722874240386, "grad_norm": 0.12152548879384995, "learning_rate": 0.0007463652844628143, "loss": 2.2385, "step": 439290 }, { "epoch": 1.6982109446274218, "grad_norm": 0.11005580425262451, "learning_rate": 0.0007462057585074015, "loss": 2.2247, "step": 439300 }, { "epoch": 1.6982496018308053, "grad_norm": 0.11342678219079971, "learning_rate": 0.0007460462528466212, "loss": 2.2347, "step": 439310 }, { "epoch": 1.6982882590341886, "grad_norm": 0.1343384087085724, "learning_rate": 0.0007458867674727294, "loss": 2.2522, "step": 439320 }, { "epoch": 1.6983269162375718, "grad_norm": 0.12136346101760864, "learning_rate": 0.0007457273023779878, "loss": 2.2129, "step": 439330 }, { "epoch": 1.698365573440955, "grad_norm": 0.1206677258014679, "learning_rate": 0.0007455678575546623, "loss": 2.2405, "step": 439340 }, { "epoch": 1.6984042306443383, "grad_norm": 0.12405674904584885, "learning_rate": 0.0007454084329950244, "loss": 2.226, "step": 439350 }, { "epoch": 1.6984428878477216, "grad_norm": 0.11063183844089508, "learning_rate": 0.0007452490286913502, "loss": 2.2321, "step": 439360 }, { "epoch": 1.6984815450511048, "grad_norm": 0.11682227998971939, "learning_rate": 0.0007450896446359206, "loss": 2.2382, "step": 439370 }, { "epoch": 1.698520202254488, "grad_norm": 0.23449300229549408, "learning_rate": 0.0007449302808210215, "loss": 2.2405, "step": 439380 }, { "epoch": 1.6985588594578713, "grad_norm": 0.10931873321533203, "learning_rate": 0.0007447709372389437, "loss": 2.2459, "step": 439390 }, { "epoch": 1.6985975166612546, "grad_norm": 0.11637139320373535, "learning_rate": 0.0007446116138819827, "loss": 2.2302, "step": 439400 }, { "epoch": 1.6986361738646378, "grad_norm": 0.12564273178577423, "learning_rate": 0.0007444523107424394, "loss": 2.2344, "step": 439410 }, { "epoch": 1.698674831068021, "grad_norm": 0.12010937184095383, "learning_rate": 0.000744293027812619, "loss": 2.235, "step": 439420 }, { "epoch": 1.6987134882714043, "grad_norm": 0.11572326719760895, "learning_rate": 0.0007441337650848321, "loss": 2.2332, "step": 439430 }, { "epoch": 1.6987521454747878, "grad_norm": 0.10662706196308136, "learning_rate": 0.0007439745225513934, "loss": 2.2284, "step": 439440 }, { "epoch": 1.698790802678171, "grad_norm": 0.11132440716028214, "learning_rate": 0.0007438153002046235, "loss": 2.2324, "step": 439450 }, { "epoch": 1.6988294598815543, "grad_norm": 0.13395658135414124, "learning_rate": 0.0007436560980368472, "loss": 2.2202, "step": 439460 }, { "epoch": 1.6988681170849376, "grad_norm": 0.12403550744056702, "learning_rate": 0.0007434969160403944, "loss": 2.221, "step": 439470 }, { "epoch": 1.698906774288321, "grad_norm": 0.12234672158956528, "learning_rate": 0.0007433377542075994, "loss": 2.2176, "step": 439480 }, { "epoch": 1.6989454314917043, "grad_norm": 0.11924166977405548, "learning_rate": 0.000743178612530802, "loss": 2.2254, "step": 439490 }, { "epoch": 1.6989840886950875, "grad_norm": 0.11710778623819351, "learning_rate": 0.0007430194910023465, "loss": 2.2276, "step": 439500 }, { "epoch": 1.6990227458984708, "grad_norm": 0.1245732381939888, "learning_rate": 0.0007428603896145823, "loss": 2.2331, "step": 439510 }, { "epoch": 1.699061403101854, "grad_norm": 0.11419229954481125, "learning_rate": 0.000742701308359863, "loss": 2.2187, "step": 439520 }, { "epoch": 1.6991000603052373, "grad_norm": 0.11181352287530899, "learning_rate": 0.0007425422472305481, "loss": 2.2307, "step": 439530 }, { "epoch": 1.6991387175086206, "grad_norm": 0.10994283109903336, "learning_rate": 0.000742383206219001, "loss": 2.2313, "step": 439540 }, { "epoch": 1.6991773747120038, "grad_norm": 0.1081324964761734, "learning_rate": 0.0007422241853175899, "loss": 2.2315, "step": 439550 }, { "epoch": 1.699216031915387, "grad_norm": 0.1146218553185463, "learning_rate": 0.0007420651845186889, "loss": 2.2244, "step": 439560 }, { "epoch": 1.6992546891187703, "grad_norm": 0.10938213020563126, "learning_rate": 0.0007419062038146758, "loss": 2.2324, "step": 439570 }, { "epoch": 1.6992933463221536, "grad_norm": 0.12179584056138992, "learning_rate": 0.0007417472431979338, "loss": 2.2156, "step": 439580 }, { "epoch": 1.6993320035255368, "grad_norm": 0.14004698395729065, "learning_rate": 0.0007415883026608503, "loss": 2.2273, "step": 439590 }, { "epoch": 1.69937066072892, "grad_norm": 0.10983067005872726, "learning_rate": 0.0007414293821958182, "loss": 2.2346, "step": 439600 }, { "epoch": 1.6994093179323035, "grad_norm": 0.11252199113368988, "learning_rate": 0.0007412704817952349, "loss": 2.2324, "step": 439610 }, { "epoch": 1.6994479751356868, "grad_norm": 0.11938013881444931, "learning_rate": 0.0007411116014515027, "loss": 2.2355, "step": 439620 }, { "epoch": 1.69948663233907, "grad_norm": 0.100449338555336, "learning_rate": 0.0007409527411570287, "loss": 2.2268, "step": 439630 }, { "epoch": 1.6995252895424533, "grad_norm": 0.11269817501306534, "learning_rate": 0.0007407939009042246, "loss": 2.2361, "step": 439640 }, { "epoch": 1.6995639467458368, "grad_norm": 0.12172277271747589, "learning_rate": 0.0007406350806855067, "loss": 2.2153, "step": 439650 }, { "epoch": 1.69960260394922, "grad_norm": 0.1266396939754486, "learning_rate": 0.000740476280493297, "loss": 2.2371, "step": 439660 }, { "epoch": 1.6996412611526033, "grad_norm": 0.1194564625620842, "learning_rate": 0.0007403175003200211, "loss": 2.2284, "step": 439670 }, { "epoch": 1.6996799183559865, "grad_norm": 0.11998004466295242, "learning_rate": 0.0007401587401581102, "loss": 2.2247, "step": 439680 }, { "epoch": 1.6997185755593698, "grad_norm": 0.11383472383022308, "learning_rate": 0.00074, "loss": 2.2396, "step": 439690 }, { "epoch": 1.699757232762753, "grad_norm": 0.11866569519042969, "learning_rate": 0.0007398412798381308, "loss": 2.2296, "step": 439700 }, { "epoch": 1.6997958899661363, "grad_norm": 0.1243966594338417, "learning_rate": 0.000739682579664948, "loss": 2.2135, "step": 439710 }, { "epoch": 1.6998345471695195, "grad_norm": 0.11566292494535446, "learning_rate": 0.0007395238994729016, "loss": 2.2231, "step": 439720 }, { "epoch": 1.6998732043729028, "grad_norm": 0.10893825441598892, "learning_rate": 0.0007393652392544461, "loss": 2.2209, "step": 439730 }, { "epoch": 1.699911861576286, "grad_norm": 0.1245025172829628, "learning_rate": 0.0007392065990020412, "loss": 2.2291, "step": 439740 }, { "epoch": 1.6999505187796693, "grad_norm": 0.11219801008701324, "learning_rate": 0.0007390479787081508, "loss": 2.2277, "step": 439750 }, { "epoch": 1.6999891759830525, "grad_norm": 0.11668980866670609, "learning_rate": 0.0007388893783652444, "loss": 2.2231, "step": 439760 }, { "epoch": 1.7000278331864358, "grad_norm": 0.1189895048737526, "learning_rate": 0.0007387307979657952, "loss": 2.2323, "step": 439770 }, { "epoch": 1.7000664903898193, "grad_norm": 0.11900747567415237, "learning_rate": 0.0007385722375022817, "loss": 2.2281, "step": 439780 }, { "epoch": 1.7001051475932025, "grad_norm": 0.11199655383825302, "learning_rate": 0.0007384136969671874, "loss": 2.2401, "step": 439790 }, { "epoch": 1.7001438047965858, "grad_norm": 0.12800821661949158, "learning_rate": 0.0007382551763529996, "loss": 2.2334, "step": 439800 }, { "epoch": 1.700182461999969, "grad_norm": 0.12204767763614655, "learning_rate": 0.0007380966756522114, "loss": 2.2353, "step": 439810 }, { "epoch": 1.7002211192033525, "grad_norm": 0.11441326886415482, "learning_rate": 0.0007379381948573201, "loss": 2.2297, "step": 439820 }, { "epoch": 1.7002597764067358, "grad_norm": 0.11331064999103546, "learning_rate": 0.0007377797339608272, "loss": 2.2188, "step": 439830 }, { "epoch": 1.700298433610119, "grad_norm": 0.11376959830522537, "learning_rate": 0.00073762129295524, "loss": 2.2265, "step": 439840 }, { "epoch": 1.7003370908135023, "grad_norm": 0.5333940386772156, "learning_rate": 0.0007374628718330696, "loss": 2.2425, "step": 439850 }, { "epoch": 1.7003757480168855, "grad_norm": 0.11315008997917175, "learning_rate": 0.0007373044705868321, "loss": 2.2499, "step": 439860 }, { "epoch": 1.7004144052202688, "grad_norm": 0.11870913207530975, "learning_rate": 0.0007371460892090487, "loss": 2.2377, "step": 439870 }, { "epoch": 1.700453062423652, "grad_norm": 0.11550546437501907, "learning_rate": 0.0007369877276922445, "loss": 2.2279, "step": 439880 }, { "epoch": 1.7004917196270353, "grad_norm": 0.11039859801530838, "learning_rate": 0.0007368293860289498, "loss": 2.2327, "step": 439890 }, { "epoch": 1.7005303768304185, "grad_norm": 0.12192100286483765, "learning_rate": 0.0007366710642116994, "loss": 2.2262, "step": 439900 }, { "epoch": 1.7005690340338018, "grad_norm": 0.11512763053178787, "learning_rate": 0.000736512762233033, "loss": 2.2384, "step": 439910 }, { "epoch": 1.700607691237185, "grad_norm": 0.1126255914568901, "learning_rate": 0.0007363544800854948, "loss": 2.2286, "step": 439920 }, { "epoch": 1.7006463484405683, "grad_norm": 0.12926539778709412, "learning_rate": 0.0007361962177616338, "loss": 2.2213, "step": 439930 }, { "epoch": 1.7006850056439515, "grad_norm": 0.13191105425357819, "learning_rate": 0.0007360379752540031, "loss": 2.2367, "step": 439940 }, { "epoch": 1.700723662847335, "grad_norm": 0.12585218250751495, "learning_rate": 0.0007358797525551612, "loss": 2.2326, "step": 439950 }, { "epoch": 1.7007623200507183, "grad_norm": 0.12355031073093414, "learning_rate": 0.0007357215496576712, "loss": 2.2416, "step": 439960 }, { "epoch": 1.7008009772541015, "grad_norm": 0.11259719729423523, "learning_rate": 0.0007355633665541006, "loss": 2.2416, "step": 439970 }, { "epoch": 1.7008396344574848, "grad_norm": 0.11513018608093262, "learning_rate": 0.0007354052032370211, "loss": 2.2359, "step": 439980 }, { "epoch": 1.7008782916608682, "grad_norm": 0.12916196882724762, "learning_rate": 0.0007352470596990099, "loss": 2.2247, "step": 439990 }, { "epoch": 1.7009169488642515, "grad_norm": 0.13871927559375763, "learning_rate": 0.0007350889359326483, "loss": 2.2284, "step": 440000 }, { "epoch": 1.7009556060676347, "grad_norm": 0.11416509002447128, "learning_rate": 0.0007349308319305226, "loss": 2.2375, "step": 440010 }, { "epoch": 1.700994263271018, "grad_norm": 0.13041462004184723, "learning_rate": 0.000734772747685223, "loss": 2.2287, "step": 440020 }, { "epoch": 1.7010329204744012, "grad_norm": 0.12060035020112991, "learning_rate": 0.0007346146831893457, "loss": 2.2406, "step": 440030 }, { "epoch": 1.7010715776777845, "grad_norm": 0.11347172409296036, "learning_rate": 0.00073445663843549, "loss": 2.2122, "step": 440040 }, { "epoch": 1.7011102348811677, "grad_norm": 0.1186690703034401, "learning_rate": 0.0007342986134162608, "loss": 2.2252, "step": 440050 }, { "epoch": 1.701148892084551, "grad_norm": 0.12383898347616196, "learning_rate": 0.0007341406081242672, "loss": 2.2199, "step": 440060 }, { "epoch": 1.7011875492879343, "grad_norm": 0.10738757997751236, "learning_rate": 0.0007339826225521231, "loss": 2.2372, "step": 440070 }, { "epoch": 1.7012262064913175, "grad_norm": 0.11477794498205185, "learning_rate": 0.000733824656692447, "loss": 2.2221, "step": 440080 }, { "epoch": 1.7012648636947008, "grad_norm": 0.1131504625082016, "learning_rate": 0.0007336667105378616, "loss": 2.2349, "step": 440090 }, { "epoch": 1.701303520898084, "grad_norm": 0.11446657031774521, "learning_rate": 0.0007335087840809949, "loss": 2.2232, "step": 440100 }, { "epoch": 1.7013421781014673, "grad_norm": 0.11273016035556793, "learning_rate": 0.000733350877314479, "loss": 2.2224, "step": 440110 }, { "epoch": 1.7013808353048507, "grad_norm": 0.12036816030740738, "learning_rate": 0.0007331929902309509, "loss": 2.2417, "step": 440120 }, { "epoch": 1.701419492508234, "grad_norm": 0.10991771519184113, "learning_rate": 0.0007330351228230516, "loss": 2.2201, "step": 440130 }, { "epoch": 1.7014581497116172, "grad_norm": 0.12521474063396454, "learning_rate": 0.0007328772750834273, "loss": 2.2289, "step": 440140 }, { "epoch": 1.7014968069150005, "grad_norm": 0.11640115827322006, "learning_rate": 0.000732719447004729, "loss": 2.231, "step": 440150 }, { "epoch": 1.701535464118384, "grad_norm": 0.10815521329641342, "learning_rate": 0.0007325616385796112, "loss": 2.2207, "step": 440160 }, { "epoch": 1.7015741213217672, "grad_norm": 0.11253977566957474, "learning_rate": 0.0007324038498007342, "loss": 2.2248, "step": 440170 }, { "epoch": 1.7016127785251505, "grad_norm": 0.14925464987754822, "learning_rate": 0.0007322460806607616, "loss": 2.241, "step": 440180 }, { "epoch": 1.7016514357285337, "grad_norm": 0.11597122997045517, "learning_rate": 0.000732088331152363, "loss": 2.2306, "step": 440190 }, { "epoch": 1.701690092931917, "grad_norm": 0.10843616724014282, "learning_rate": 0.0007319306012682114, "loss": 2.2004, "step": 440200 }, { "epoch": 1.7017287501353002, "grad_norm": 0.12545384466648102, "learning_rate": 0.0007317728910009849, "loss": 2.2162, "step": 440210 }, { "epoch": 1.7017674073386835, "grad_norm": 0.12916113436222076, "learning_rate": 0.0007316152003433658, "loss": 2.2277, "step": 440220 }, { "epoch": 1.7018060645420667, "grad_norm": 0.11176025122404099, "learning_rate": 0.0007314575292880417, "loss": 2.2222, "step": 440230 }, { "epoch": 1.70184472174545, "grad_norm": 0.10779713094234467, "learning_rate": 0.0007312998778277036, "loss": 2.2345, "step": 440240 }, { "epoch": 1.7018833789488332, "grad_norm": 0.1066531166434288, "learning_rate": 0.0007311422459550478, "loss": 2.2192, "step": 440250 }, { "epoch": 1.7019220361522165, "grad_norm": 0.10790924727916718, "learning_rate": 0.0007309846336627754, "loss": 2.2154, "step": 440260 }, { "epoch": 1.7019606933555997, "grad_norm": 0.11977184563875198, "learning_rate": 0.0007308270409435916, "loss": 2.2333, "step": 440270 }, { "epoch": 1.7019993505589832, "grad_norm": 0.11540991812944412, "learning_rate": 0.0007306694677902055, "loss": 2.2302, "step": 440280 }, { "epoch": 1.7020380077623665, "grad_norm": 0.11778786778450012, "learning_rate": 0.000730511914195332, "loss": 2.2422, "step": 440290 }, { "epoch": 1.7020766649657497, "grad_norm": 0.11950423568487167, "learning_rate": 0.0007303543801516896, "loss": 2.2264, "step": 440300 }, { "epoch": 1.702115322169133, "grad_norm": 0.11625012010335922, "learning_rate": 0.0007301968656520019, "loss": 2.2313, "step": 440310 }, { "epoch": 1.7021539793725162, "grad_norm": 0.11900748312473297, "learning_rate": 0.0007300393706889965, "loss": 2.2341, "step": 440320 }, { "epoch": 1.7021926365758997, "grad_norm": 0.1217237040400505, "learning_rate": 0.0007298818952554059, "loss": 2.2381, "step": 440330 }, { "epoch": 1.702231293779283, "grad_norm": 0.1119767427444458, "learning_rate": 0.0007297244393439665, "loss": 2.2379, "step": 440340 }, { "epoch": 1.7022699509826662, "grad_norm": 0.11901693046092987, "learning_rate": 0.0007295670029474202, "loss": 2.2327, "step": 440350 }, { "epoch": 1.7023086081860495, "grad_norm": 0.12310083210468292, "learning_rate": 0.0007294095860585128, "loss": 2.2318, "step": 440360 }, { "epoch": 1.7023472653894327, "grad_norm": 0.11015886813402176, "learning_rate": 0.0007292521886699943, "loss": 2.2377, "step": 440370 }, { "epoch": 1.702385922592816, "grad_norm": 0.11281030625104904, "learning_rate": 0.0007290948107746195, "loss": 2.2326, "step": 440380 }, { "epoch": 1.7024245797961992, "grad_norm": 0.11704161763191223, "learning_rate": 0.0007289374523651482, "loss": 2.2232, "step": 440390 }, { "epoch": 1.7024632369995825, "grad_norm": 0.13201874494552612, "learning_rate": 0.0007287801134343437, "loss": 2.2255, "step": 440400 }, { "epoch": 1.7025018942029657, "grad_norm": 0.13003620505332947, "learning_rate": 0.0007286227939749746, "loss": 2.2309, "step": 440410 }, { "epoch": 1.702540551406349, "grad_norm": 0.11561232060194016, "learning_rate": 0.0007284654939798134, "loss": 2.2376, "step": 440420 }, { "epoch": 1.7025792086097322, "grad_norm": 0.13177193701267242, "learning_rate": 0.0007283082134416374, "loss": 2.2421, "step": 440430 }, { "epoch": 1.7026178658131155, "grad_norm": 0.11514715850353241, "learning_rate": 0.0007281509523532284, "loss": 2.2364, "step": 440440 }, { "epoch": 1.702656523016499, "grad_norm": 0.1128527820110321, "learning_rate": 0.0007279937107073722, "loss": 2.2416, "step": 440450 }, { "epoch": 1.7026951802198822, "grad_norm": 0.10707538574934006, "learning_rate": 0.00072783648849686, "loss": 2.235, "step": 440460 }, { "epoch": 1.7027338374232655, "grad_norm": 0.12871518731117249, "learning_rate": 0.0007276792857144863, "loss": 2.2221, "step": 440470 }, { "epoch": 1.7027724946266487, "grad_norm": 0.1377457082271576, "learning_rate": 0.0007275221023530507, "loss": 2.216, "step": 440480 }, { "epoch": 1.702811151830032, "grad_norm": 0.11359865963459015, "learning_rate": 0.0007273649384053573, "loss": 2.2297, "step": 440490 }, { "epoch": 1.7028498090334154, "grad_norm": 0.12144174426794052, "learning_rate": 0.0007272077938642145, "loss": 2.2326, "step": 440500 }, { "epoch": 1.7028884662367987, "grad_norm": 0.10958414524793625, "learning_rate": 0.000727050668722435, "loss": 2.2301, "step": 440510 }, { "epoch": 1.702927123440182, "grad_norm": 0.12128295749425888, "learning_rate": 0.0007268935629728361, "loss": 2.2342, "step": 440520 }, { "epoch": 1.7029657806435652, "grad_norm": 0.12747161090373993, "learning_rate": 0.0007267364766082396, "loss": 2.2195, "step": 440530 }, { "epoch": 1.7030044378469484, "grad_norm": 0.10787870734930038, "learning_rate": 0.0007265794096214715, "loss": 2.234, "step": 440540 }, { "epoch": 1.7030430950503317, "grad_norm": 0.10992727428674698, "learning_rate": 0.0007264223620053625, "loss": 2.2427, "step": 440550 }, { "epoch": 1.703081752253715, "grad_norm": 0.2564323842525482, "learning_rate": 0.0007262653337527474, "loss": 2.2179, "step": 440560 }, { "epoch": 1.7031204094570982, "grad_norm": 0.13141652941703796, "learning_rate": 0.0007261083248564657, "loss": 2.2201, "step": 440570 }, { "epoch": 1.7031590666604814, "grad_norm": 0.11201170086860657, "learning_rate": 0.0007259513353093612, "loss": 2.2353, "step": 440580 }, { "epoch": 1.7031977238638647, "grad_norm": 0.11595015227794647, "learning_rate": 0.000725794365104282, "loss": 2.2409, "step": 440590 }, { "epoch": 1.703236381067248, "grad_norm": 0.127515509724617, "learning_rate": 0.0007256374142340807, "loss": 2.2457, "step": 440600 }, { "epoch": 1.7032750382706312, "grad_norm": 0.11486254632472992, "learning_rate": 0.0007254804826916144, "loss": 2.2223, "step": 440610 }, { "epoch": 1.7033136954740147, "grad_norm": 0.10647368431091309, "learning_rate": 0.0007253235704697447, "loss": 2.2236, "step": 440620 }, { "epoch": 1.703352352677398, "grad_norm": 0.12386307865381241, "learning_rate": 0.0007251666775613371, "loss": 2.2227, "step": 440630 }, { "epoch": 1.7033910098807812, "grad_norm": 0.11835476756095886, "learning_rate": 0.0007250098039592619, "loss": 2.2424, "step": 440640 }, { "epoch": 1.7034296670841644, "grad_norm": 0.12118931114673615, "learning_rate": 0.0007248529496563936, "loss": 2.2326, "step": 440650 }, { "epoch": 1.703468324287548, "grad_norm": 0.11157720535993576, "learning_rate": 0.0007246961146456113, "loss": 2.2188, "step": 440660 }, { "epoch": 1.7035069814909312, "grad_norm": 0.11851327121257782, "learning_rate": 0.000724539298919798, "loss": 2.2229, "step": 440670 }, { "epoch": 1.7035456386943144, "grad_norm": 0.11927254498004913, "learning_rate": 0.000724382502471842, "loss": 2.2241, "step": 440680 }, { "epoch": 1.7035842958976977, "grad_norm": 0.11081274598836899, "learning_rate": 0.0007242257252946351, "loss": 2.2228, "step": 440690 }, { "epoch": 1.703622953101081, "grad_norm": 0.11584481596946716, "learning_rate": 0.0007240689673810736, "loss": 2.2289, "step": 440700 }, { "epoch": 1.7036616103044642, "grad_norm": 0.11202621459960938, "learning_rate": 0.0007239122287240583, "loss": 2.2206, "step": 440710 }, { "epoch": 1.7037002675078474, "grad_norm": 0.14524437487125397, "learning_rate": 0.0007237555093164947, "loss": 2.2355, "step": 440720 }, { "epoch": 1.7037389247112307, "grad_norm": 0.12101338058710098, "learning_rate": 0.000723598809151292, "loss": 2.2313, "step": 440730 }, { "epoch": 1.703777581914614, "grad_norm": 0.11293715983629227, "learning_rate": 0.0007234421282213641, "loss": 2.2358, "step": 440740 }, { "epoch": 1.7038162391179972, "grad_norm": 0.11404402554035187, "learning_rate": 0.0007232854665196295, "loss": 2.223, "step": 440750 }, { "epoch": 1.7038548963213804, "grad_norm": 0.12247447669506073, "learning_rate": 0.0007231288240390106, "loss": 2.227, "step": 440760 }, { "epoch": 1.7038935535247637, "grad_norm": 0.11357226222753525, "learning_rate": 0.0007229722007724342, "loss": 2.2241, "step": 440770 }, { "epoch": 1.703932210728147, "grad_norm": 0.125160813331604, "learning_rate": 0.0007228155967128318, "loss": 2.2468, "step": 440780 }, { "epoch": 1.7039708679315304, "grad_norm": 0.14034807682037354, "learning_rate": 0.0007226590118531389, "loss": 2.2248, "step": 440790 }, { "epoch": 1.7040095251349137, "grad_norm": 0.12015635520219803, "learning_rate": 0.000722502446186295, "loss": 2.2198, "step": 440800 }, { "epoch": 1.704048182338297, "grad_norm": 0.11873217672109604, "learning_rate": 0.000722345899705245, "loss": 2.2231, "step": 440810 }, { "epoch": 1.7040868395416802, "grad_norm": 0.12021807581186295, "learning_rate": 0.0007221893724029369, "loss": 2.2297, "step": 440820 }, { "epoch": 1.7041254967450636, "grad_norm": 0.12242366373538971, "learning_rate": 0.0007220328642723242, "loss": 2.2212, "step": 440830 }, { "epoch": 1.704164153948447, "grad_norm": 0.11865365505218506, "learning_rate": 0.0007218763753063636, "loss": 2.2154, "step": 440840 }, { "epoch": 1.7042028111518301, "grad_norm": 0.11381933093070984, "learning_rate": 0.0007217199054980166, "loss": 2.2479, "step": 440850 }, { "epoch": 1.7042414683552134, "grad_norm": 0.1155766099691391, "learning_rate": 0.000721563454840249, "loss": 2.2197, "step": 440860 }, { "epoch": 1.7042801255585966, "grad_norm": 0.11473380029201508, "learning_rate": 0.0007214070233260313, "loss": 2.2314, "step": 440870 }, { "epoch": 1.70431878276198, "grad_norm": 0.11671894788742065, "learning_rate": 0.0007212506109483377, "loss": 2.2294, "step": 440880 }, { "epoch": 1.7043574399653632, "grad_norm": 0.1232934519648552, "learning_rate": 0.0007210942177001467, "loss": 2.2311, "step": 440890 }, { "epoch": 1.7043960971687464, "grad_norm": 0.125223308801651, "learning_rate": 0.0007209378435744416, "loss": 2.2277, "step": 440900 }, { "epoch": 1.7044347543721297, "grad_norm": 0.11535106599330902, "learning_rate": 0.0007207814885642095, "loss": 2.2408, "step": 440910 }, { "epoch": 1.704473411575513, "grad_norm": 0.1292526125907898, "learning_rate": 0.0007206251526624418, "loss": 2.2395, "step": 440920 }, { "epoch": 1.7045120687788962, "grad_norm": 0.1217828020453453, "learning_rate": 0.000720468835862135, "loss": 2.2428, "step": 440930 }, { "epoch": 1.7045507259822794, "grad_norm": 0.13024525344371796, "learning_rate": 0.0007203125381562887, "loss": 2.2379, "step": 440940 }, { "epoch": 1.7045893831856627, "grad_norm": 0.11056360602378845, "learning_rate": 0.0007201562595379077, "loss": 2.2234, "step": 440950 }, { "epoch": 1.7046280403890461, "grad_norm": 0.11324920505285263, "learning_rate": 0.0007199999999999999, "loss": 2.241, "step": 440960 }, { "epoch": 1.7046666975924294, "grad_norm": 0.1242329403758049, "learning_rate": 0.0007198437595355791, "loss": 2.2321, "step": 440970 }, { "epoch": 1.7047053547958126, "grad_norm": 0.1274898648262024, "learning_rate": 0.0007196875381376621, "loss": 2.2192, "step": 440980 }, { "epoch": 1.704744011999196, "grad_norm": 0.12029425799846649, "learning_rate": 0.0007195313357992708, "loss": 2.2226, "step": 440990 }, { "epoch": 1.7047826692025794, "grad_norm": 0.12936897575855255, "learning_rate": 0.0007193751525134302, "loss": 2.2388, "step": 441000 }, { "epoch": 1.7048213264059626, "grad_norm": 0.10859957337379456, "learning_rate": 0.0007192189882731707, "loss": 2.2357, "step": 441010 }, { "epoch": 1.7048599836093459, "grad_norm": 0.11123181134462357, "learning_rate": 0.0007190628430715267, "loss": 2.2324, "step": 441020 }, { "epoch": 1.7048986408127291, "grad_norm": 0.13620242476463318, "learning_rate": 0.0007189067169015365, "loss": 2.2336, "step": 441030 }, { "epoch": 1.7049372980161124, "grad_norm": 0.1191759929060936, "learning_rate": 0.0007187506097562427, "loss": 2.2249, "step": 441040 }, { "epoch": 1.7049759552194956, "grad_norm": 0.11479087173938751, "learning_rate": 0.0007185945216286924, "loss": 2.2355, "step": 441050 }, { "epoch": 1.7050146124228789, "grad_norm": 0.11262121051549911, "learning_rate": 0.0007184384525119365, "loss": 2.2327, "step": 441060 }, { "epoch": 1.7050532696262621, "grad_norm": 0.11017955839633942, "learning_rate": 0.0007182824023990309, "loss": 2.2182, "step": 441070 }, { "epoch": 1.7050919268296454, "grad_norm": 0.12410023808479309, "learning_rate": 0.0007181263712830348, "loss": 2.217, "step": 441080 }, { "epoch": 1.7051305840330286, "grad_norm": 0.12589412927627563, "learning_rate": 0.0007179703591570124, "loss": 2.2384, "step": 441090 }, { "epoch": 1.705169241236412, "grad_norm": 0.12104815989732742, "learning_rate": 0.0007178143660140315, "loss": 2.2323, "step": 441100 }, { "epoch": 1.7052078984397951, "grad_norm": 0.11863759160041809, "learning_rate": 0.0007176583918471647, "loss": 2.2349, "step": 441110 }, { "epoch": 1.7052465556431784, "grad_norm": 0.12504743039608002, "learning_rate": 0.0007175024366494882, "loss": 2.2385, "step": 441120 }, { "epoch": 1.7052852128465619, "grad_norm": 0.1270468831062317, "learning_rate": 0.0007173465004140831, "loss": 2.2271, "step": 441130 }, { "epoch": 1.7053238700499451, "grad_norm": 0.10368954390287399, "learning_rate": 0.0007171905831340339, "loss": 2.2132, "step": 441140 }, { "epoch": 1.7053625272533284, "grad_norm": 0.11982696503400803, "learning_rate": 0.00071703468480243, "loss": 2.2359, "step": 441150 }, { "epoch": 1.7054011844567116, "grad_norm": 0.11196932196617126, "learning_rate": 0.0007168788054123647, "loss": 2.2196, "step": 441160 }, { "epoch": 1.705439841660095, "grad_norm": 0.10839185863733292, "learning_rate": 0.0007167229449569357, "loss": 2.2373, "step": 441170 }, { "epoch": 1.7054784988634784, "grad_norm": 0.11195364594459534, "learning_rate": 0.0007165671034292442, "loss": 2.2293, "step": 441180 }, { "epoch": 1.7055171560668616, "grad_norm": 0.13077500462532043, "learning_rate": 0.0007164112808223968, "loss": 2.2397, "step": 441190 }, { "epoch": 1.7055558132702449, "grad_norm": 0.12081755697727203, "learning_rate": 0.0007162554771295031, "loss": 2.2298, "step": 441200 }, { "epoch": 1.705594470473628, "grad_norm": 0.11815162748098373, "learning_rate": 0.0007160996923436774, "loss": 2.2314, "step": 441210 }, { "epoch": 1.7056331276770114, "grad_norm": 0.10875197499990463, "learning_rate": 0.0007159439264580384, "loss": 2.2288, "step": 441220 }, { "epoch": 1.7056717848803946, "grad_norm": 0.11378846317529678, "learning_rate": 0.0007157881794657082, "loss": 2.239, "step": 441230 }, { "epoch": 1.7057104420837779, "grad_norm": 0.11096439510583878, "learning_rate": 0.0007156324513598142, "loss": 2.2092, "step": 441240 }, { "epoch": 1.7057490992871611, "grad_norm": 0.12965725362300873, "learning_rate": 0.0007154767421334871, "loss": 2.2304, "step": 441250 }, { "epoch": 1.7057877564905444, "grad_norm": 0.10906452685594559, "learning_rate": 0.0007153210517798621, "loss": 2.2307, "step": 441260 }, { "epoch": 1.7058264136939276, "grad_norm": 0.12163577973842621, "learning_rate": 0.0007151653802920781, "loss": 2.228, "step": 441270 }, { "epoch": 1.7058650708973109, "grad_norm": 0.13851909339427948, "learning_rate": 0.0007150097276632792, "loss": 2.233, "step": 441280 }, { "epoch": 1.7059037281006941, "grad_norm": 0.10406989604234695, "learning_rate": 0.0007148540938866124, "loss": 2.2227, "step": 441290 }, { "epoch": 1.7059423853040776, "grad_norm": 0.10548216104507446, "learning_rate": 0.0007146984789552298, "loss": 2.2178, "step": 441300 }, { "epoch": 1.7059810425074609, "grad_norm": 0.12286142259836197, "learning_rate": 0.000714542882862287, "loss": 2.2449, "step": 441310 }, { "epoch": 1.706019699710844, "grad_norm": 0.12756648659706116, "learning_rate": 0.0007143873056009442, "loss": 2.2351, "step": 441320 }, { "epoch": 1.7060583569142274, "grad_norm": 0.12511231005191803, "learning_rate": 0.0007142317471643656, "loss": 2.2333, "step": 441330 }, { "epoch": 1.7060970141176108, "grad_norm": 0.11963135004043579, "learning_rate": 0.0007140762075457192, "loss": 2.2267, "step": 441340 }, { "epoch": 1.706135671320994, "grad_norm": 0.11961111426353455, "learning_rate": 0.000713920686738178, "loss": 2.2079, "step": 441350 }, { "epoch": 1.7061743285243773, "grad_norm": 0.12484431266784668, "learning_rate": 0.0007137651847349178, "loss": 2.2246, "step": 441360 }, { "epoch": 1.7062129857277606, "grad_norm": 0.12263071537017822, "learning_rate": 0.0007136097015291199, "loss": 2.2212, "step": 441370 }, { "epoch": 1.7062516429311438, "grad_norm": 0.1359153389930725, "learning_rate": 0.0007134542371139687, "loss": 2.2241, "step": 441380 }, { "epoch": 1.706290300134527, "grad_norm": 0.1229487881064415, "learning_rate": 0.0007132987914826534, "loss": 2.2287, "step": 441390 }, { "epoch": 1.7063289573379103, "grad_norm": 0.13365302979946136, "learning_rate": 0.0007131433646283672, "loss": 2.2353, "step": 441400 }, { "epoch": 1.7063676145412936, "grad_norm": 0.10661803930997849, "learning_rate": 0.0007129879565443064, "loss": 2.2285, "step": 441410 }, { "epoch": 1.7064062717446769, "grad_norm": 0.11683390289545059, "learning_rate": 0.000712832567223673, "loss": 2.2084, "step": 441420 }, { "epoch": 1.70644492894806, "grad_norm": 0.11777091771364212, "learning_rate": 0.0007126771966596724, "loss": 2.2091, "step": 441430 }, { "epoch": 1.7064835861514434, "grad_norm": 0.12691377103328705, "learning_rate": 0.0007125218448455136, "loss": 2.2209, "step": 441440 }, { "epoch": 1.7065222433548266, "grad_norm": 0.11685075610876083, "learning_rate": 0.0007123665117744103, "loss": 2.2329, "step": 441450 }, { "epoch": 1.7065609005582099, "grad_norm": 0.11418899893760681, "learning_rate": 0.0007122111974395802, "loss": 2.2261, "step": 441460 }, { "epoch": 1.7065995577615933, "grad_norm": 0.11462923884391785, "learning_rate": 0.0007120559018342449, "loss": 2.2272, "step": 441470 }, { "epoch": 1.7066382149649766, "grad_norm": 0.1235089898109436, "learning_rate": 0.0007119006249516304, "loss": 2.2162, "step": 441480 }, { "epoch": 1.7066768721683598, "grad_norm": 0.12144121527671814, "learning_rate": 0.0007117453667849667, "loss": 2.2373, "step": 441490 }, { "epoch": 1.706715529371743, "grad_norm": 0.12170761823654175, "learning_rate": 0.0007115901273274874, "loss": 2.2289, "step": 441500 }, { "epoch": 1.7067541865751266, "grad_norm": 0.1287725567817688, "learning_rate": 0.0007114349065724308, "loss": 2.2374, "step": 441510 }, { "epoch": 1.7067928437785098, "grad_norm": 0.1142718568444252, "learning_rate": 0.000711279704513039, "loss": 2.225, "step": 441520 }, { "epoch": 1.706831500981893, "grad_norm": 0.1222819834947586, "learning_rate": 0.0007111245211425581, "loss": 2.2287, "step": 441530 }, { "epoch": 1.7068701581852763, "grad_norm": 0.12381406128406525, "learning_rate": 0.0007109693564542385, "loss": 2.2443, "step": 441540 }, { "epoch": 1.7069088153886596, "grad_norm": 0.11265474557876587, "learning_rate": 0.0007108142104413344, "loss": 2.232, "step": 441550 }, { "epoch": 1.7069474725920428, "grad_norm": 0.2676023840904236, "learning_rate": 0.0007106590830971043, "loss": 2.2231, "step": 441560 }, { "epoch": 1.706986129795426, "grad_norm": 0.10900603234767914, "learning_rate": 0.0007105039744148103, "loss": 2.2242, "step": 441570 }, { "epoch": 1.7070247869988093, "grad_norm": 0.13303890824317932, "learning_rate": 0.0007103488843877195, "loss": 2.228, "step": 441580 }, { "epoch": 1.7070634442021926, "grad_norm": 0.11852958798408508, "learning_rate": 0.0007101938130091017, "loss": 2.2438, "step": 441590 }, { "epoch": 1.7071021014055758, "grad_norm": 0.13023656606674194, "learning_rate": 0.0007100387602722321, "loss": 2.2012, "step": 441600 }, { "epoch": 1.707140758608959, "grad_norm": 0.12023354321718216, "learning_rate": 0.000709883726170389, "loss": 2.2342, "step": 441610 }, { "epoch": 1.7071794158123423, "grad_norm": 0.10630054026842117, "learning_rate": 0.0007097287106968551, "loss": 2.215, "step": 441620 }, { "epoch": 1.7072180730157256, "grad_norm": 0.11944133043289185, "learning_rate": 0.000709573713844917, "loss": 2.2422, "step": 441630 }, { "epoch": 1.707256730219109, "grad_norm": 0.1279240846633911, "learning_rate": 0.0007094187356078658, "loss": 2.222, "step": 441640 }, { "epoch": 1.7072953874224923, "grad_norm": 0.10906361043453217, "learning_rate": 0.0007092637759789957, "loss": 2.2284, "step": 441650 }, { "epoch": 1.7073340446258756, "grad_norm": 0.12351000308990479, "learning_rate": 0.000709108834951606, "loss": 2.2395, "step": 441660 }, { "epoch": 1.7073727018292588, "grad_norm": 0.13086514174938202, "learning_rate": 0.0007089539125189992, "loss": 2.2114, "step": 441670 }, { "epoch": 1.7074113590326423, "grad_norm": 0.11564716696739197, "learning_rate": 0.0007087990086744822, "loss": 2.2449, "step": 441680 }, { "epoch": 1.7074500162360255, "grad_norm": 0.1139092743396759, "learning_rate": 0.0007086441234113659, "loss": 2.2346, "step": 441690 }, { "epoch": 1.7074886734394088, "grad_norm": 0.10772178322076797, "learning_rate": 0.0007084892567229648, "loss": 2.2295, "step": 441700 }, { "epoch": 1.707527330642792, "grad_norm": 0.11219760775566101, "learning_rate": 0.000708334408602598, "loss": 2.2395, "step": 441710 }, { "epoch": 1.7075659878461753, "grad_norm": 0.12415587902069092, "learning_rate": 0.0007081795790435885, "loss": 2.2292, "step": 441720 }, { "epoch": 1.7076046450495586, "grad_norm": 0.3202018439769745, "learning_rate": 0.0007080247680392631, "loss": 2.2219, "step": 441730 }, { "epoch": 1.7076433022529418, "grad_norm": 0.1251060515642166, "learning_rate": 0.0007078699755829525, "loss": 2.2395, "step": 441740 }, { "epoch": 1.707681959456325, "grad_norm": 0.11131077259778976, "learning_rate": 0.0007077152016679915, "loss": 2.2115, "step": 441750 }, { "epoch": 1.7077206166597083, "grad_norm": 0.11476995795965195, "learning_rate": 0.000707560446287719, "loss": 2.2454, "step": 441760 }, { "epoch": 1.7077592738630916, "grad_norm": 0.12419164180755615, "learning_rate": 0.000707405709435478, "loss": 2.2283, "step": 441770 }, { "epoch": 1.7077979310664748, "grad_norm": 0.11394430696964264, "learning_rate": 0.000707250991104615, "loss": 2.2277, "step": 441780 }, { "epoch": 1.707836588269858, "grad_norm": 0.11585311591625214, "learning_rate": 0.0007070962912884811, "loss": 2.2278, "step": 441790 }, { "epoch": 1.7078752454732413, "grad_norm": 0.11358223110437393, "learning_rate": 0.0007069416099804309, "loss": 2.2072, "step": 441800 }, { "epoch": 1.7079139026766248, "grad_norm": 0.12552201747894287, "learning_rate": 0.000706786947173823, "loss": 2.23, "step": 441810 }, { "epoch": 1.707952559880008, "grad_norm": 0.12313517183065414, "learning_rate": 0.0007066323028620205, "loss": 2.2224, "step": 441820 }, { "epoch": 1.7079912170833913, "grad_norm": 0.11810357868671417, "learning_rate": 0.0007064776770383898, "loss": 2.2391, "step": 441830 }, { "epoch": 1.7080298742867746, "grad_norm": 0.12771636247634888, "learning_rate": 0.0007063230696963017, "loss": 2.2318, "step": 441840 }, { "epoch": 1.708068531490158, "grad_norm": 0.12263831496238708, "learning_rate": 0.0007061684808291307, "loss": 2.2251, "step": 441850 }, { "epoch": 1.7081071886935413, "grad_norm": 0.11750944703817368, "learning_rate": 0.000706013910430255, "loss": 2.2202, "step": 441860 }, { "epoch": 1.7081458458969245, "grad_norm": 0.11194665729999542, "learning_rate": 0.0007058593584930577, "loss": 2.2213, "step": 441870 }, { "epoch": 1.7081845031003078, "grad_norm": 0.11499247699975967, "learning_rate": 0.0007057048250109251, "loss": 2.2294, "step": 441880 }, { "epoch": 1.708223160303691, "grad_norm": 0.12411858141422272, "learning_rate": 0.0007055503099772475, "loss": 2.2097, "step": 441890 }, { "epoch": 1.7082618175070743, "grad_norm": 0.11611645668745041, "learning_rate": 0.0007053958133854195, "loss": 2.236, "step": 441900 }, { "epoch": 1.7083004747104575, "grad_norm": 0.11352533102035522, "learning_rate": 0.0007052413352288389, "loss": 2.221, "step": 441910 }, { "epoch": 1.7083391319138408, "grad_norm": 0.12064797431230545, "learning_rate": 0.0007050868755009084, "loss": 2.2303, "step": 441920 }, { "epoch": 1.708377789117224, "grad_norm": 0.1243981420993805, "learning_rate": 0.000704932434195034, "loss": 2.2332, "step": 441930 }, { "epoch": 1.7084164463206073, "grad_norm": 0.12540730834007263, "learning_rate": 0.0007047780113046258, "loss": 2.224, "step": 441940 }, { "epoch": 1.7084551035239905, "grad_norm": 0.1284138411283493, "learning_rate": 0.000704623606823098, "loss": 2.2289, "step": 441950 }, { "epoch": 1.7084937607273738, "grad_norm": 0.12039217352867126, "learning_rate": 0.0007044692207438683, "loss": 2.2247, "step": 441960 }, { "epoch": 1.708532417930757, "grad_norm": 0.11266115307807922, "learning_rate": 0.0007043148530603587, "loss": 2.2306, "step": 441970 }, { "epoch": 1.7085710751341405, "grad_norm": 0.12143003940582275, "learning_rate": 0.0007041605037659951, "loss": 2.2297, "step": 441980 }, { "epoch": 1.7086097323375238, "grad_norm": 0.12860099971294403, "learning_rate": 0.0007040061728542068, "loss": 2.2166, "step": 441990 }, { "epoch": 1.708648389540907, "grad_norm": 0.13188594579696655, "learning_rate": 0.0007038518603184281, "loss": 2.2359, "step": 442000 }, { "epoch": 1.7086870467442903, "grad_norm": 0.11238020658493042, "learning_rate": 0.0007036975661520959, "loss": 2.2043, "step": 442010 }, { "epoch": 1.7087257039476738, "grad_norm": 0.11865348368883133, "learning_rate": 0.0007035432903486519, "loss": 2.2362, "step": 442020 }, { "epoch": 1.708764361151057, "grad_norm": 0.11815221607685089, "learning_rate": 0.0007033890329015415, "loss": 2.2237, "step": 442030 }, { "epoch": 1.7088030183544403, "grad_norm": 0.11716797202825546, "learning_rate": 0.0007032347938042138, "loss": 2.2311, "step": 442040 }, { "epoch": 1.7088416755578235, "grad_norm": 0.1247939020395279, "learning_rate": 0.0007030805730501219, "loss": 2.2316, "step": 442050 }, { "epoch": 1.7088803327612068, "grad_norm": 0.1377456784248352, "learning_rate": 0.0007029263706327232, "loss": 2.2231, "step": 442060 }, { "epoch": 1.70891898996459, "grad_norm": 0.11857406049966812, "learning_rate": 0.0007027721865454781, "loss": 2.2285, "step": 442070 }, { "epoch": 1.7089576471679733, "grad_norm": 0.1162823960185051, "learning_rate": 0.0007026180207818515, "loss": 2.2282, "step": 442080 }, { "epoch": 1.7089963043713565, "grad_norm": 0.12219282239675522, "learning_rate": 0.0007024638733353126, "loss": 2.2222, "step": 442090 }, { "epoch": 1.7090349615747398, "grad_norm": 0.12192238122224808, "learning_rate": 0.0007023097441993334, "loss": 2.22, "step": 442100 }, { "epoch": 1.709073618778123, "grad_norm": 0.12040101736783981, "learning_rate": 0.0007021556333673902, "loss": 2.2399, "step": 442110 }, { "epoch": 1.7091122759815063, "grad_norm": 0.11780326068401337, "learning_rate": 0.0007020015408329639, "loss": 2.2216, "step": 442120 }, { "epoch": 1.7091509331848895, "grad_norm": 0.11659206449985504, "learning_rate": 0.0007018474665895383, "loss": 2.245, "step": 442130 }, { "epoch": 1.709189590388273, "grad_norm": 0.121012844145298, "learning_rate": 0.0007016934106306014, "loss": 2.2198, "step": 442140 }, { "epoch": 1.7092282475916563, "grad_norm": 0.1228698194026947, "learning_rate": 0.0007015393729496455, "loss": 2.2225, "step": 442150 }, { "epoch": 1.7092669047950395, "grad_norm": 0.11647068709135056, "learning_rate": 0.000701385353540166, "loss": 2.2216, "step": 442160 }, { "epoch": 1.7093055619984228, "grad_norm": 0.11902830004692078, "learning_rate": 0.0007012313523956624, "loss": 2.2339, "step": 442170 }, { "epoch": 1.709344219201806, "grad_norm": 0.11790589243173599, "learning_rate": 0.0007010773695096386, "loss": 2.2266, "step": 442180 }, { "epoch": 1.7093828764051895, "grad_norm": 0.11872167140245438, "learning_rate": 0.0007009234048756017, "loss": 2.2206, "step": 442190 }, { "epoch": 1.7094215336085727, "grad_norm": 0.11382316797971725, "learning_rate": 0.0007007694584870627, "loss": 2.2259, "step": 442200 }, { "epoch": 1.709460190811956, "grad_norm": 0.12118104845285416, "learning_rate": 0.0007006155303375373, "loss": 2.2368, "step": 442210 }, { "epoch": 1.7094988480153392, "grad_norm": 0.12145468592643738, "learning_rate": 0.0007004616204205434, "loss": 2.2353, "step": 442220 }, { "epoch": 1.7095375052187225, "grad_norm": 0.11479642242193222, "learning_rate": 0.0007003077287296043, "loss": 2.2225, "step": 442230 }, { "epoch": 1.7095761624221057, "grad_norm": 0.12208642810583115, "learning_rate": 0.0007001538552582463, "loss": 2.2327, "step": 442240 }, { "epoch": 1.709614819625489, "grad_norm": 0.11871983855962753, "learning_rate": 0.0007, "loss": 2.2391, "step": 442250 }, { "epoch": 1.7096534768288723, "grad_norm": 0.12440042197704315, "learning_rate": 0.0006998461629483994, "loss": 2.2219, "step": 442260 }, { "epoch": 1.7096921340322555, "grad_norm": 0.11856591701507568, "learning_rate": 0.0006996923440969825, "loss": 2.219, "step": 442270 }, { "epoch": 1.7097307912356388, "grad_norm": 0.12889623641967773, "learning_rate": 0.0006995385434392914, "loss": 2.2218, "step": 442280 }, { "epoch": 1.709769448439022, "grad_norm": 0.11815499514341354, "learning_rate": 0.0006993847609688713, "loss": 2.2189, "step": 442290 }, { "epoch": 1.7098081056424053, "grad_norm": 0.11574645340442657, "learning_rate": 0.000699230996679272, "loss": 2.2393, "step": 442300 }, { "epoch": 1.7098467628457887, "grad_norm": 0.11625123023986816, "learning_rate": 0.0006990772505640468, "loss": 2.2252, "step": 442310 }, { "epoch": 1.709885420049172, "grad_norm": 0.11689998209476471, "learning_rate": 0.0006989235226167526, "loss": 2.2395, "step": 442320 }, { "epoch": 1.7099240772525552, "grad_norm": 0.1170109286904335, "learning_rate": 0.0006987698128309504, "loss": 2.2008, "step": 442330 }, { "epoch": 1.7099627344559385, "grad_norm": 0.12176986038684845, "learning_rate": 0.0006986161212002049, "loss": 2.2235, "step": 442340 }, { "epoch": 1.7100013916593217, "grad_norm": 0.12022269517183304, "learning_rate": 0.0006984624477180846, "loss": 2.2106, "step": 442350 }, { "epoch": 1.7100400488627052, "grad_norm": 0.11816758662462234, "learning_rate": 0.0006983087923781617, "loss": 2.2358, "step": 442360 }, { "epoch": 1.7100787060660885, "grad_norm": 0.11895138025283813, "learning_rate": 0.0006981551551740122, "loss": 2.2133, "step": 442370 }, { "epoch": 1.7101173632694717, "grad_norm": 0.12274456769227982, "learning_rate": 0.0006980015360992164, "loss": 2.2237, "step": 442380 }, { "epoch": 1.710156020472855, "grad_norm": 0.11600464582443237, "learning_rate": 0.0006978479351473577, "loss": 2.2238, "step": 442390 }, { "epoch": 1.7101946776762382, "grad_norm": 0.12388424575328827, "learning_rate": 0.0006976943523120236, "loss": 2.2129, "step": 442400 }, { "epoch": 1.7102333348796215, "grad_norm": 0.12280719727277756, "learning_rate": 0.0006975407875868052, "loss": 2.2335, "step": 442410 }, { "epoch": 1.7102719920830047, "grad_norm": 0.11842313408851624, "learning_rate": 0.0006973872409652975, "loss": 2.2475, "step": 442420 }, { "epoch": 1.710310649286388, "grad_norm": 0.12219894677400589, "learning_rate": 0.0006972337124410995, "loss": 2.2275, "step": 442430 }, { "epoch": 1.7103493064897712, "grad_norm": 0.12560917437076569, "learning_rate": 0.0006970802020078136, "loss": 2.2431, "step": 442440 }, { "epoch": 1.7103879636931545, "grad_norm": 0.11803045868873596, "learning_rate": 0.0006969267096590461, "loss": 2.2286, "step": 442450 }, { "epoch": 1.7104266208965377, "grad_norm": 0.12114856392145157, "learning_rate": 0.0006967732353884074, "loss": 2.2382, "step": 442460 }, { "epoch": 1.710465278099921, "grad_norm": 0.10938682407140732, "learning_rate": 0.0006966197791895105, "loss": 2.2211, "step": 442470 }, { "epoch": 1.7105039353033045, "grad_norm": 0.12028611451387405, "learning_rate": 0.0006964663410559741, "loss": 2.2159, "step": 442480 }, { "epoch": 1.7105425925066877, "grad_norm": 0.1148734763264656, "learning_rate": 0.000696312920981419, "loss": 2.2237, "step": 442490 }, { "epoch": 1.710581249710071, "grad_norm": 0.10638713091611862, "learning_rate": 0.0006961595189594702, "loss": 2.2443, "step": 442500 }, { "epoch": 1.7106199069134542, "grad_norm": 0.12149275839328766, "learning_rate": 0.000696006134983757, "loss": 2.2041, "step": 442510 }, { "epoch": 1.7106585641168375, "grad_norm": 0.1199207454919815, "learning_rate": 0.0006958527690479115, "loss": 2.2283, "step": 442520 }, { "epoch": 1.710697221320221, "grad_norm": 0.12160733342170715, "learning_rate": 0.0006956994211455705, "loss": 2.2289, "step": 442530 }, { "epoch": 1.7107358785236042, "grad_norm": 0.11109825223684311, "learning_rate": 0.0006955460912703738, "loss": 2.2329, "step": 442540 }, { "epoch": 1.7107745357269875, "grad_norm": 0.1456461101770401, "learning_rate": 0.0006953927794159653, "loss": 2.237, "step": 442550 }, { "epoch": 1.7108131929303707, "grad_norm": 0.1287081092596054, "learning_rate": 0.0006952394855759927, "loss": 2.2036, "step": 442560 }, { "epoch": 1.710851850133754, "grad_norm": 0.11479346454143524, "learning_rate": 0.0006950862097441073, "loss": 2.234, "step": 442570 }, { "epoch": 1.7108905073371372, "grad_norm": 0.11606258153915405, "learning_rate": 0.000694932951913964, "loss": 2.2269, "step": 442580 }, { "epoch": 1.7109291645405205, "grad_norm": 0.1183663159608841, "learning_rate": 0.0006947797120792214, "loss": 2.2307, "step": 442590 }, { "epoch": 1.7109678217439037, "grad_norm": 0.11088277399539948, "learning_rate": 0.0006946264902335424, "loss": 2.2407, "step": 442600 }, { "epoch": 1.711006478947287, "grad_norm": 0.12547467648983002, "learning_rate": 0.000694473286370593, "loss": 2.2271, "step": 442610 }, { "epoch": 1.7110451361506702, "grad_norm": 0.11433514952659607, "learning_rate": 0.0006943201004840428, "loss": 2.2148, "step": 442620 }, { "epoch": 1.7110837933540535, "grad_norm": 0.10790496319532394, "learning_rate": 0.0006941669325675659, "loss": 2.2212, "step": 442630 }, { "epoch": 1.7111224505574367, "grad_norm": 0.12909001111984253, "learning_rate": 0.0006940137826148394, "loss": 2.2184, "step": 442640 }, { "epoch": 1.7111611077608202, "grad_norm": 0.12274988740682602, "learning_rate": 0.0006938606506195444, "loss": 2.2007, "step": 442650 }, { "epoch": 1.7111997649642035, "grad_norm": 0.11600089818239212, "learning_rate": 0.0006937075365753655, "loss": 2.2283, "step": 442660 }, { "epoch": 1.7112384221675867, "grad_norm": 0.13487693667411804, "learning_rate": 0.0006935544404759913, "loss": 2.2205, "step": 442670 }, { "epoch": 1.71127707937097, "grad_norm": 0.11452995240688324, "learning_rate": 0.0006934013623151141, "loss": 2.2286, "step": 442680 }, { "epoch": 1.7113157365743534, "grad_norm": 0.11441690474748611, "learning_rate": 0.0006932483020864293, "loss": 2.221, "step": 442690 }, { "epoch": 1.7113543937777367, "grad_norm": 0.23739032447338104, "learning_rate": 0.0006930952597836369, "loss": 2.2291, "step": 442700 }, { "epoch": 1.71139305098112, "grad_norm": 0.12318938225507736, "learning_rate": 0.0006929422354004396, "loss": 2.2293, "step": 442710 }, { "epoch": 1.7114317081845032, "grad_norm": 0.11755301803350449, "learning_rate": 0.0006927892289305446, "loss": 2.2111, "step": 442720 }, { "epoch": 1.7114703653878864, "grad_norm": 0.11043576151132584, "learning_rate": 0.0006926362403676627, "loss": 2.2184, "step": 442730 }, { "epoch": 1.7115090225912697, "grad_norm": 0.12790408730506897, "learning_rate": 0.0006924832697055076, "loss": 2.2348, "step": 442740 }, { "epoch": 1.711547679794653, "grad_norm": 0.12552985548973083, "learning_rate": 0.000692330316937798, "loss": 2.233, "step": 442750 }, { "epoch": 1.7115863369980362, "grad_norm": 0.11527630686759949, "learning_rate": 0.0006921773820582549, "loss": 2.2267, "step": 442760 }, { "epoch": 1.7116249942014194, "grad_norm": 0.14568762481212616, "learning_rate": 0.0006920244650606036, "loss": 2.2405, "step": 442770 }, { "epoch": 1.7116636514048027, "grad_norm": 0.11509274691343307, "learning_rate": 0.0006918715659385735, "loss": 2.2165, "step": 442780 }, { "epoch": 1.711702308608186, "grad_norm": 0.12123371660709381, "learning_rate": 0.0006917186846858968, "loss": 2.2079, "step": 442790 }, { "epoch": 1.7117409658115692, "grad_norm": 0.12158721685409546, "learning_rate": 0.0006915658212963098, "loss": 2.2149, "step": 442800 }, { "epoch": 1.7117796230149525, "grad_norm": 0.14907032251358032, "learning_rate": 0.0006914129757635528, "loss": 2.2102, "step": 442810 }, { "epoch": 1.711818280218336, "grad_norm": 0.11518526077270508, "learning_rate": 0.000691260148081369, "loss": 2.2294, "step": 442820 }, { "epoch": 1.7118569374217192, "grad_norm": 0.10713840276002884, "learning_rate": 0.0006911073382435061, "loss": 2.2254, "step": 442830 }, { "epoch": 1.7118955946251024, "grad_norm": 0.1185065507888794, "learning_rate": 0.0006909545462437143, "loss": 2.224, "step": 442840 }, { "epoch": 1.7119342518284857, "grad_norm": 0.11165358871221542, "learning_rate": 0.0006908017720757487, "loss": 2.2305, "step": 442850 }, { "epoch": 1.7119729090318692, "grad_norm": 0.12423279881477356, "learning_rate": 0.0006906490157333674, "loss": 2.2175, "step": 442860 }, { "epoch": 1.7120115662352524, "grad_norm": 0.11754172295331955, "learning_rate": 0.0006904962772103318, "loss": 2.2327, "step": 442870 }, { "epoch": 1.7120502234386357, "grad_norm": 0.10995957255363464, "learning_rate": 0.0006903435565004079, "loss": 2.2268, "step": 442880 }, { "epoch": 1.712088880642019, "grad_norm": 0.11185383051633835, "learning_rate": 0.0006901908535973647, "loss": 2.2169, "step": 442890 }, { "epoch": 1.7121275378454022, "grad_norm": 0.12045712769031525, "learning_rate": 0.0006900381684949748, "loss": 2.2206, "step": 442900 }, { "epoch": 1.7121661950487854, "grad_norm": 0.1269853264093399, "learning_rate": 0.0006898855011870146, "loss": 2.227, "step": 442910 }, { "epoch": 1.7122048522521687, "grad_norm": 0.12694627046585083, "learning_rate": 0.000689732851667264, "loss": 2.2273, "step": 442920 }, { "epoch": 1.712243509455552, "grad_norm": 0.13340239226818085, "learning_rate": 0.0006895802199295068, "loss": 2.2214, "step": 442930 }, { "epoch": 1.7122821666589352, "grad_norm": 0.1058509573340416, "learning_rate": 0.00068942760596753, "loss": 2.2176, "step": 442940 }, { "epoch": 1.7123208238623184, "grad_norm": 0.11555317789316177, "learning_rate": 0.0006892750097751245, "loss": 2.2183, "step": 442950 }, { "epoch": 1.7123594810657017, "grad_norm": 0.12041497975587845, "learning_rate": 0.0006891224313460848, "loss": 2.2211, "step": 442960 }, { "epoch": 1.712398138269085, "grad_norm": 0.13038748502731323, "learning_rate": 0.0006889698706742092, "loss": 2.2241, "step": 442970 }, { "epoch": 1.7124367954724682, "grad_norm": 0.109809011220932, "learning_rate": 0.000688817327753299, "loss": 2.2233, "step": 442980 }, { "epoch": 1.7124754526758517, "grad_norm": 0.12648747861385345, "learning_rate": 0.0006886648025771595, "loss": 2.2257, "step": 442990 }, { "epoch": 1.712514109879235, "grad_norm": 0.12341243773698807, "learning_rate": 0.0006885122951395999, "loss": 2.2346, "step": 443000 }, { "epoch": 1.7125527670826182, "grad_norm": 0.10717613250017166, "learning_rate": 0.0006883598054344324, "loss": 2.2232, "step": 443010 }, { "epoch": 1.7125914242860014, "grad_norm": 0.12053383141756058, "learning_rate": 0.0006882073334554732, "loss": 2.2433, "step": 443020 }, { "epoch": 1.712630081489385, "grad_norm": 0.11753083020448685, "learning_rate": 0.000688054879196542, "loss": 2.2348, "step": 443030 }, { "epoch": 1.7126687386927681, "grad_norm": 0.13164658844470978, "learning_rate": 0.0006879024426514619, "loss": 2.2298, "step": 443040 }, { "epoch": 1.7127073958961514, "grad_norm": 0.12515607476234436, "learning_rate": 0.0006877500238140602, "loss": 2.2234, "step": 443050 }, { "epoch": 1.7127460530995346, "grad_norm": 0.12741313874721527, "learning_rate": 0.0006875976226781666, "loss": 2.2117, "step": 443060 }, { "epoch": 1.712784710302918, "grad_norm": 0.12356637418270111, "learning_rate": 0.0006874452392376156, "loss": 2.2141, "step": 443070 }, { "epoch": 1.7128233675063012, "grad_norm": 0.11521610617637634, "learning_rate": 0.000687292873486245, "loss": 2.2277, "step": 443080 }, { "epoch": 1.7128620247096844, "grad_norm": 0.11198043078184128, "learning_rate": 0.0006871405254178953, "loss": 2.2267, "step": 443090 }, { "epoch": 1.7129006819130677, "grad_norm": 0.12801900506019592, "learning_rate": 0.0006869881950264118, "loss": 2.2302, "step": 443100 }, { "epoch": 1.712939339116451, "grad_norm": 0.11122292280197144, "learning_rate": 0.0006868358823056426, "loss": 2.2143, "step": 443110 }, { "epoch": 1.7129779963198342, "grad_norm": 0.13782921433448792, "learning_rate": 0.0006866835872494397, "loss": 2.2395, "step": 443120 }, { "epoch": 1.7130166535232174, "grad_norm": 0.11965320259332657, "learning_rate": 0.0006865313098516585, "loss": 2.2362, "step": 443130 }, { "epoch": 1.7130553107266007, "grad_norm": 0.11909079551696777, "learning_rate": 0.000686379050106158, "loss": 2.229, "step": 443140 }, { "epoch": 1.713093967929984, "grad_norm": 0.11601749807596207, "learning_rate": 0.0006862268080068006, "loss": 2.2231, "step": 443150 }, { "epoch": 1.7131326251333674, "grad_norm": 0.12354492396116257, "learning_rate": 0.0006860745835474527, "loss": 2.2202, "step": 443160 }, { "epoch": 1.7131712823367506, "grad_norm": 0.12105528265237808, "learning_rate": 0.0006859223767219837, "loss": 2.2322, "step": 443170 }, { "epoch": 1.713209939540134, "grad_norm": 0.1187669187784195, "learning_rate": 0.000685770187524267, "loss": 2.2257, "step": 443180 }, { "epoch": 1.7132485967435171, "grad_norm": 0.11206571757793427, "learning_rate": 0.0006856180159481796, "loss": 2.2207, "step": 443190 }, { "epoch": 1.7132872539469006, "grad_norm": 0.10545578598976135, "learning_rate": 0.0006854658619876013, "loss": 2.2282, "step": 443200 }, { "epoch": 1.7133259111502839, "grad_norm": 0.12910322844982147, "learning_rate": 0.0006853137256364164, "loss": 2.2446, "step": 443210 }, { "epoch": 1.7133645683536671, "grad_norm": 0.1320909857749939, "learning_rate": 0.000685161606888512, "loss": 2.2198, "step": 443220 }, { "epoch": 1.7134032255570504, "grad_norm": 0.120067298412323, "learning_rate": 0.0006850095057377792, "loss": 2.2227, "step": 443230 }, { "epoch": 1.7134418827604336, "grad_norm": 0.11289399117231369, "learning_rate": 0.0006848574221781123, "loss": 2.2408, "step": 443240 }, { "epoch": 1.7134805399638169, "grad_norm": 0.12850508093833923, "learning_rate": 0.0006847053562034096, "loss": 2.2264, "step": 443250 }, { "epoch": 1.7135191971672001, "grad_norm": 0.11475897580385208, "learning_rate": 0.0006845533078075723, "loss": 2.2214, "step": 443260 }, { "epoch": 1.7135578543705834, "grad_norm": 0.12015476077795029, "learning_rate": 0.0006844012769845054, "loss": 2.2107, "step": 443270 }, { "epoch": 1.7135965115739666, "grad_norm": 0.13094781339168549, "learning_rate": 0.000684249263728118, "loss": 2.2236, "step": 443280 }, { "epoch": 1.71363516877735, "grad_norm": 0.11818785220384598, "learning_rate": 0.0006840972680323214, "loss": 2.2141, "step": 443290 }, { "epoch": 1.7136738259807331, "grad_norm": 0.11630553752183914, "learning_rate": 0.0006839452898910319, "loss": 2.2402, "step": 443300 }, { "epoch": 1.7137124831841164, "grad_norm": 0.11747671663761139, "learning_rate": 0.0006837933292981684, "loss": 2.2243, "step": 443310 }, { "epoch": 1.7137511403874996, "grad_norm": 0.11672230064868927, "learning_rate": 0.0006836413862476534, "loss": 2.243, "step": 443320 }, { "epoch": 1.7137897975908831, "grad_norm": 0.11931329220533371, "learning_rate": 0.0006834894607334128, "loss": 2.2089, "step": 443330 }, { "epoch": 1.7138284547942664, "grad_norm": 0.11820429563522339, "learning_rate": 0.0006833375527493768, "loss": 2.2415, "step": 443340 }, { "epoch": 1.7138671119976496, "grad_norm": 0.11610442399978638, "learning_rate": 0.0006831856622894784, "loss": 2.218, "step": 443350 }, { "epoch": 1.7139057692010329, "grad_norm": 0.1124664694070816, "learning_rate": 0.0006830337893476537, "loss": 2.2184, "step": 443360 }, { "epoch": 1.7139444264044164, "grad_norm": 0.11392097175121307, "learning_rate": 0.0006828819339178435, "loss": 2.2293, "step": 443370 }, { "epoch": 1.7139830836077996, "grad_norm": 0.11781250685453415, "learning_rate": 0.0006827300959939911, "loss": 2.2433, "step": 443380 }, { "epoch": 1.7140217408111829, "grad_norm": 0.11644378304481506, "learning_rate": 0.0006825782755700435, "loss": 2.2392, "step": 443390 }, { "epoch": 1.7140603980145661, "grad_norm": 0.12304277718067169, "learning_rate": 0.0006824264726399516, "loss": 2.2292, "step": 443400 }, { "epoch": 1.7140990552179494, "grad_norm": 0.12490229308605194, "learning_rate": 0.0006822746871976695, "loss": 2.2272, "step": 443410 }, { "epoch": 1.7141377124213326, "grad_norm": 0.1096290871500969, "learning_rate": 0.0006821229192371543, "loss": 2.2061, "step": 443420 }, { "epoch": 1.7141763696247159, "grad_norm": 0.1179693341255188, "learning_rate": 0.0006819711687523675, "loss": 2.2079, "step": 443430 }, { "epoch": 1.7142150268280991, "grad_norm": 0.1262316107749939, "learning_rate": 0.0006818194357372736, "loss": 2.2354, "step": 443440 }, { "epoch": 1.7142536840314824, "grad_norm": 0.12826380133628845, "learning_rate": 0.0006816677201858404, "loss": 2.2022, "step": 443450 }, { "epoch": 1.7142923412348656, "grad_norm": 0.10918699949979782, "learning_rate": 0.0006815160220920393, "loss": 2.2211, "step": 443460 }, { "epoch": 1.7143309984382489, "grad_norm": 0.11608961224555969, "learning_rate": 0.0006813643414498453, "loss": 2.2409, "step": 443470 }, { "epoch": 1.7143696556416321, "grad_norm": 0.12766702473163605, "learning_rate": 0.000681212678253237, "loss": 2.2255, "step": 443480 }, { "epoch": 1.7144083128450154, "grad_norm": 0.11442360281944275, "learning_rate": 0.0006810610324961961, "loss": 2.2186, "step": 443490 }, { "epoch": 1.7144469700483989, "grad_norm": 0.11635372787714005, "learning_rate": 0.0006809094041727082, "loss": 2.2253, "step": 443500 }, { "epoch": 1.714485627251782, "grad_norm": 0.12290455400943756, "learning_rate": 0.0006807577932767615, "loss": 2.2331, "step": 443510 }, { "epoch": 1.7145242844551654, "grad_norm": 0.12026640772819519, "learning_rate": 0.0006806061998023488, "loss": 2.2263, "step": 443520 }, { "epoch": 1.7145629416585486, "grad_norm": 0.11638140678405762, "learning_rate": 0.0006804546237434652, "loss": 2.2382, "step": 443530 }, { "epoch": 1.714601598861932, "grad_norm": 0.11954324692487717, "learning_rate": 0.0006803030650941102, "loss": 2.2281, "step": 443540 }, { "epoch": 1.7146402560653153, "grad_norm": 0.12044857442378998, "learning_rate": 0.0006801515238482867, "loss": 2.2327, "step": 443550 }, { "epoch": 1.7146789132686986, "grad_norm": 0.11535559594631195, "learning_rate": 0.0006799999999999999, "loss": 2.224, "step": 443560 }, { "epoch": 1.7147175704720818, "grad_norm": 0.1135011836886406, "learning_rate": 0.00067984849354326, "loss": 2.2207, "step": 443570 }, { "epoch": 1.714756227675465, "grad_norm": 0.11144589632749557, "learning_rate": 0.0006796970044720797, "loss": 2.2177, "step": 443580 }, { "epoch": 1.7147948848788483, "grad_norm": 0.12376515567302704, "learning_rate": 0.0006795455327804749, "loss": 2.2236, "step": 443590 }, { "epoch": 1.7148335420822316, "grad_norm": 0.11902966350317001, "learning_rate": 0.0006793940784624657, "loss": 2.2315, "step": 443600 }, { "epoch": 1.7148721992856149, "grad_norm": 0.1146560087800026, "learning_rate": 0.0006792426415120756, "loss": 2.2475, "step": 443610 }, { "epoch": 1.714910856488998, "grad_norm": 0.1380588263273239, "learning_rate": 0.0006790912219233305, "loss": 2.2383, "step": 443620 }, { "epoch": 1.7149495136923814, "grad_norm": 0.11876972764730453, "learning_rate": 0.000678939819690261, "loss": 2.2289, "step": 443630 }, { "epoch": 1.7149881708957646, "grad_norm": 0.11715719103813171, "learning_rate": 0.0006787884348069005, "loss": 2.2285, "step": 443640 }, { "epoch": 1.7150268280991479, "grad_norm": 0.1241312250494957, "learning_rate": 0.0006786370672672857, "loss": 2.2041, "step": 443650 }, { "epoch": 1.7150654853025311, "grad_norm": 0.13172374665737152, "learning_rate": 0.0006784857170654568, "loss": 2.2239, "step": 443660 }, { "epoch": 1.7151041425059146, "grad_norm": 0.13301695883274078, "learning_rate": 0.0006783343841954577, "loss": 2.2153, "step": 443670 }, { "epoch": 1.7151427997092978, "grad_norm": 0.12190935015678406, "learning_rate": 0.0006781830686513355, "loss": 2.2298, "step": 443680 }, { "epoch": 1.715181456912681, "grad_norm": 0.11668330430984497, "learning_rate": 0.0006780317704271409, "loss": 2.22, "step": 443690 }, { "epoch": 1.7152201141160643, "grad_norm": 0.11997809261083603, "learning_rate": 0.0006778804895169273, "loss": 2.2273, "step": 443700 }, { "epoch": 1.7152587713194478, "grad_norm": 0.11524324119091034, "learning_rate": 0.0006777292259147525, "loss": 2.2307, "step": 443710 }, { "epoch": 1.715297428522831, "grad_norm": 0.12497083842754364, "learning_rate": 0.0006775779796146769, "loss": 2.2292, "step": 443720 }, { "epoch": 1.7153360857262143, "grad_norm": 0.1098659485578537, "learning_rate": 0.0006774267506107648, "loss": 2.2332, "step": 443730 }, { "epoch": 1.7153747429295976, "grad_norm": 1.0404858589172363, "learning_rate": 0.0006772755388970839, "loss": 2.2316, "step": 443740 }, { "epoch": 1.7154134001329808, "grad_norm": 0.11820700019598007, "learning_rate": 0.0006771243444677047, "loss": 2.2289, "step": 443750 }, { "epoch": 1.715452057336364, "grad_norm": 0.11342157423496246, "learning_rate": 0.0006769731673167019, "loss": 2.2338, "step": 443760 }, { "epoch": 1.7154907145397473, "grad_norm": 0.1244228407740593, "learning_rate": 0.0006768220074381528, "loss": 2.2127, "step": 443770 }, { "epoch": 1.7155293717431306, "grad_norm": 0.13897007703781128, "learning_rate": 0.0006766708648261386, "loss": 2.2198, "step": 443780 }, { "epoch": 1.7155680289465138, "grad_norm": 0.11900126934051514, "learning_rate": 0.0006765197394747438, "loss": 2.2161, "step": 443790 }, { "epoch": 1.715606686149897, "grad_norm": 0.12850025296211243, "learning_rate": 0.0006763686313780562, "loss": 2.2388, "step": 443800 }, { "epoch": 1.7156453433532803, "grad_norm": 0.11627134680747986, "learning_rate": 0.0006762175405301672, "loss": 2.217, "step": 443810 }, { "epoch": 1.7156840005566636, "grad_norm": 0.11488353461027145, "learning_rate": 0.0006760664669251709, "loss": 2.2271, "step": 443820 }, { "epoch": 1.7157226577600468, "grad_norm": 0.12036576122045517, "learning_rate": 0.0006759154105571654, "loss": 2.2228, "step": 443830 }, { "epoch": 1.7157613149634303, "grad_norm": 0.1195918470621109, "learning_rate": 0.0006757643714202521, "loss": 2.2124, "step": 443840 }, { "epoch": 1.7157999721668136, "grad_norm": 0.12076837569475174, "learning_rate": 0.0006756133495085356, "loss": 2.224, "step": 443850 }, { "epoch": 1.7158386293701968, "grad_norm": 0.1215423047542572, "learning_rate": 0.0006754623448161242, "loss": 2.2251, "step": 443860 }, { "epoch": 1.71587728657358, "grad_norm": 0.14442507922649384, "learning_rate": 0.0006753113573371288, "loss": 2.2188, "step": 443870 }, { "epoch": 1.7159159437769635, "grad_norm": 0.1276344209909439, "learning_rate": 0.0006751603870656644, "loss": 2.2229, "step": 443880 }, { "epoch": 1.7159546009803468, "grad_norm": 0.1226276233792305, "learning_rate": 0.0006750094339958492, "loss": 2.2298, "step": 443890 }, { "epoch": 1.71599325818373, "grad_norm": 0.12590661644935608, "learning_rate": 0.0006748584981218044, "loss": 2.2336, "step": 443900 }, { "epoch": 1.7160319153871133, "grad_norm": 0.10643420368432999, "learning_rate": 0.000674707579437655, "loss": 2.2199, "step": 443910 }, { "epoch": 1.7160705725904966, "grad_norm": 0.12374252825975418, "learning_rate": 0.0006745566779375287, "loss": 2.2253, "step": 443920 }, { "epoch": 1.7161092297938798, "grad_norm": 0.13089606165885925, "learning_rate": 0.0006744057936155574, "loss": 2.229, "step": 443930 }, { "epoch": 1.716147886997263, "grad_norm": 0.12076699733734131, "learning_rate": 0.0006742549264658759, "loss": 2.2205, "step": 443940 }, { "epoch": 1.7161865442006463, "grad_norm": 0.11502376198768616, "learning_rate": 0.0006741040764826222, "loss": 2.2202, "step": 443950 }, { "epoch": 1.7162252014040296, "grad_norm": 0.11473656445741653, "learning_rate": 0.0006739532436599381, "loss": 2.2108, "step": 443960 }, { "epoch": 1.7162638586074128, "grad_norm": 0.1203451007604599, "learning_rate": 0.0006738024279919676, "loss": 2.2314, "step": 443970 }, { "epoch": 1.716302515810796, "grad_norm": 0.12273450940847397, "learning_rate": 0.0006736516294728598, "loss": 2.2244, "step": 443980 }, { "epoch": 1.7163411730141793, "grad_norm": 0.12457942962646484, "learning_rate": 0.0006735008480967657, "loss": 2.2277, "step": 443990 }, { "epoch": 1.7163798302175626, "grad_norm": 0.15763746201992035, "learning_rate": 0.0006733500838578401, "loss": 2.2214, "step": 444000 }, { "epoch": 1.716418487420946, "grad_norm": 0.16036611795425415, "learning_rate": 0.0006731993367502412, "loss": 2.2192, "step": 444010 }, { "epoch": 1.7164571446243293, "grad_norm": 0.12345317006111145, "learning_rate": 0.0006730486067681303, "loss": 2.231, "step": 444020 }, { "epoch": 1.7164958018277126, "grad_norm": 0.11703797429800034, "learning_rate": 0.0006728978939056725, "loss": 2.2239, "step": 444030 }, { "epoch": 1.7165344590310958, "grad_norm": 0.12373912334442139, "learning_rate": 0.0006727471981570354, "loss": 2.2086, "step": 444040 }, { "epoch": 1.7165731162344793, "grad_norm": 0.11341448873281479, "learning_rate": 0.0006725965195163905, "loss": 2.2288, "step": 444050 }, { "epoch": 1.7166117734378625, "grad_norm": 0.12175527960062027, "learning_rate": 0.0006724458579779129, "loss": 2.2337, "step": 444060 }, { "epoch": 1.7166504306412458, "grad_norm": 0.11818042397499084, "learning_rate": 0.00067229521353578, "loss": 2.2296, "step": 444070 }, { "epoch": 1.716689087844629, "grad_norm": 0.10906556248664856, "learning_rate": 0.0006721445861841735, "loss": 2.2157, "step": 444080 }, { "epoch": 1.7167277450480123, "grad_norm": 0.11369844526052475, "learning_rate": 0.0006719939759172777, "loss": 2.2194, "step": 444090 }, { "epoch": 1.7167664022513955, "grad_norm": 0.10923272371292114, "learning_rate": 0.0006718433827292807, "loss": 2.2263, "step": 444100 }, { "epoch": 1.7168050594547788, "grad_norm": 0.1153886690735817, "learning_rate": 0.0006716928066143735, "loss": 2.2018, "step": 444110 }, { "epoch": 1.716843716658162, "grad_norm": 0.11441920697689056, "learning_rate": 0.0006715422475667508, "loss": 2.211, "step": 444120 }, { "epoch": 1.7168823738615453, "grad_norm": 0.14035643637180328, "learning_rate": 0.0006713917055806102, "loss": 2.2223, "step": 444130 }, { "epoch": 1.7169210310649285, "grad_norm": 0.11534959822893143, "learning_rate": 0.0006712411806501529, "loss": 2.2296, "step": 444140 }, { "epoch": 1.7169596882683118, "grad_norm": 0.13395413756370544, "learning_rate": 0.0006710906727695829, "loss": 2.2234, "step": 444150 }, { "epoch": 1.716998345471695, "grad_norm": 0.12049634009599686, "learning_rate": 0.000670940181933108, "loss": 2.2113, "step": 444160 }, { "epoch": 1.7170370026750785, "grad_norm": 0.11423641443252563, "learning_rate": 0.0006707897081349392, "loss": 2.2294, "step": 444170 }, { "epoch": 1.7170756598784618, "grad_norm": 0.11800158768892288, "learning_rate": 0.0006706392513692907, "loss": 2.2239, "step": 444180 }, { "epoch": 1.717114317081845, "grad_norm": 0.11680900305509567, "learning_rate": 0.0006704888116303798, "loss": 2.2212, "step": 444190 }, { "epoch": 1.7171529742852283, "grad_norm": 0.1276027113199234, "learning_rate": 0.000670338388912427, "loss": 2.2155, "step": 444200 }, { "epoch": 1.7171916314886115, "grad_norm": 0.12111736088991165, "learning_rate": 0.0006701879832096569, "loss": 2.2268, "step": 444210 }, { "epoch": 1.717230288691995, "grad_norm": 0.12418892979621887, "learning_rate": 0.0006700375945162962, "loss": 2.227, "step": 444220 }, { "epoch": 1.7172689458953783, "grad_norm": 0.13062898814678192, "learning_rate": 0.0006698872228265755, "loss": 2.2251, "step": 444230 }, { "epoch": 1.7173076030987615, "grad_norm": 0.14824402332305908, "learning_rate": 0.0006697368681347287, "loss": 2.2186, "step": 444240 }, { "epoch": 1.7173462603021448, "grad_norm": 0.1204281598329544, "learning_rate": 0.0006695865304349928, "loss": 2.2185, "step": 444250 }, { "epoch": 1.717384917505528, "grad_norm": 0.12832044064998627, "learning_rate": 0.0006694362097216083, "loss": 2.2203, "step": 444260 }, { "epoch": 1.7174235747089113, "grad_norm": 0.1236930713057518, "learning_rate": 0.0006692859059888183, "loss": 2.2224, "step": 444270 }, { "epoch": 1.7174622319122945, "grad_norm": 0.12458539754152298, "learning_rate": 0.00066913561923087, "loss": 2.2163, "step": 444280 }, { "epoch": 1.7175008891156778, "grad_norm": 0.11675764620304108, "learning_rate": 0.0006689853494420131, "loss": 2.2251, "step": 444290 }, { "epoch": 1.717539546319061, "grad_norm": 0.12367656826972961, "learning_rate": 0.0006688350966165012, "loss": 2.241, "step": 444300 }, { "epoch": 1.7175782035224443, "grad_norm": 0.11717633903026581, "learning_rate": 0.0006686848607485905, "loss": 2.2313, "step": 444310 }, { "epoch": 1.7176168607258275, "grad_norm": 0.12098561972379684, "learning_rate": 0.0006685346418325411, "loss": 2.2126, "step": 444320 }, { "epoch": 1.7176555179292108, "grad_norm": 0.11992432177066803, "learning_rate": 0.0006683844398626156, "loss": 2.2176, "step": 444330 }, { "epoch": 1.7176941751325943, "grad_norm": 0.11694101244211197, "learning_rate": 0.0006682342548330806, "loss": 2.213, "step": 444340 }, { "epoch": 1.7177328323359775, "grad_norm": 0.12024518102407455, "learning_rate": 0.0006680840867382056, "loss": 2.2342, "step": 444350 }, { "epoch": 1.7177714895393608, "grad_norm": 0.11152735352516174, "learning_rate": 0.0006679339355722631, "loss": 2.2141, "step": 444360 }, { "epoch": 1.717810146742744, "grad_norm": 0.12339860945940018, "learning_rate": 0.000667783801329529, "loss": 2.2292, "step": 444370 }, { "epoch": 1.7178488039461273, "grad_norm": 0.12653346359729767, "learning_rate": 0.0006676336840042827, "loss": 2.2173, "step": 444380 }, { "epoch": 1.7178874611495107, "grad_norm": 0.11451555788516998, "learning_rate": 0.0006674835835908062, "loss": 2.2211, "step": 444390 }, { "epoch": 1.717926118352894, "grad_norm": 0.12210563570261002, "learning_rate": 0.0006673335000833856, "loss": 2.2222, "step": 444400 }, { "epoch": 1.7179647755562772, "grad_norm": 0.13091878592967987, "learning_rate": 0.0006671834334763091, "loss": 2.2273, "step": 444410 }, { "epoch": 1.7180034327596605, "grad_norm": 0.13674227893352509, "learning_rate": 0.0006670333837638694, "loss": 2.2236, "step": 444420 }, { "epoch": 1.7180420899630438, "grad_norm": 0.13061882555484772, "learning_rate": 0.0006668833509403614, "loss": 2.2297, "step": 444430 }, { "epoch": 1.718080747166427, "grad_norm": 0.11517611145973206, "learning_rate": 0.0006667333350000833, "loss": 2.2195, "step": 444440 }, { "epoch": 1.7181194043698103, "grad_norm": 0.12318595498800278, "learning_rate": 0.0006665833359373372, "loss": 2.2204, "step": 444450 }, { "epoch": 1.7181580615731935, "grad_norm": 0.11372929066419601, "learning_rate": 0.000666433353746428, "loss": 2.2293, "step": 444460 }, { "epoch": 1.7181967187765768, "grad_norm": 0.13681162893772125, "learning_rate": 0.0006662833884216633, "loss": 2.2225, "step": 444470 }, { "epoch": 1.71823537597996, "grad_norm": 0.12110299617052078, "learning_rate": 0.0006661334399573547, "loss": 2.2333, "step": 444480 }, { "epoch": 1.7182740331833433, "grad_norm": 0.11480377614498138, "learning_rate": 0.0006659835083478165, "loss": 2.2093, "step": 444490 }, { "epoch": 1.7183126903867265, "grad_norm": 0.11662760376930237, "learning_rate": 0.0006658335935873668, "loss": 2.2338, "step": 444500 }, { "epoch": 1.71835134759011, "grad_norm": 0.1263725608587265, "learning_rate": 0.0006656836956703258, "loss": 2.2405, "step": 444510 }, { "epoch": 1.7183900047934932, "grad_norm": 0.1221756860613823, "learning_rate": 0.000665533814591018, "loss": 2.1957, "step": 444520 }, { "epoch": 1.7184286619968765, "grad_norm": 0.12357661873102188, "learning_rate": 0.0006653839503437702, "loss": 2.2182, "step": 444530 }, { "epoch": 1.7184673192002597, "grad_norm": 0.11788289994001389, "learning_rate": 0.0006652341029229131, "loss": 2.2142, "step": 444540 }, { "epoch": 1.7185059764036432, "grad_norm": 0.11715588718652725, "learning_rate": 0.0006650842723227806, "loss": 2.2243, "step": 444550 }, { "epoch": 1.7185446336070265, "grad_norm": 0.12393707782030106, "learning_rate": 0.000664934458537709, "loss": 2.2029, "step": 444560 }, { "epoch": 1.7185832908104097, "grad_norm": 0.11894424259662628, "learning_rate": 0.0006647846615620385, "loss": 2.2181, "step": 444570 }, { "epoch": 1.718621948013793, "grad_norm": 0.124919593334198, "learning_rate": 0.0006646348813901121, "loss": 2.2404, "step": 444580 }, { "epoch": 1.7186606052171762, "grad_norm": 0.1154218316078186, "learning_rate": 0.000664485118016276, "loss": 2.223, "step": 444590 }, { "epoch": 1.7186992624205595, "grad_norm": 0.11173471808433533, "learning_rate": 0.0006643353714348801, "loss": 2.2221, "step": 444600 }, { "epoch": 1.7187379196239427, "grad_norm": 0.11307603120803833, "learning_rate": 0.0006641856416402765, "loss": 2.2244, "step": 444610 }, { "epoch": 1.718776576827326, "grad_norm": 0.12163484841585159, "learning_rate": 0.0006640359286268212, "loss": 2.2129, "step": 444620 }, { "epoch": 1.7188152340307092, "grad_norm": 0.11416550725698471, "learning_rate": 0.0006638862323888733, "loss": 2.2057, "step": 444630 }, { "epoch": 1.7188538912340925, "grad_norm": 0.13622942566871643, "learning_rate": 0.0006637365529207948, "loss": 2.2229, "step": 444640 }, { "epoch": 1.7188925484374757, "grad_norm": 0.12028181552886963, "learning_rate": 0.0006635868902169508, "loss": 2.1998, "step": 444650 }, { "epoch": 1.718931205640859, "grad_norm": 0.12066903710365295, "learning_rate": 0.0006634372442717103, "loss": 2.231, "step": 444660 }, { "epoch": 1.7189698628442422, "grad_norm": 0.13217706978321075, "learning_rate": 0.0006632876150794442, "loss": 2.2073, "step": 444670 }, { "epoch": 1.7190085200476257, "grad_norm": 0.12672095000743866, "learning_rate": 0.0006631380026345278, "loss": 2.2151, "step": 444680 }, { "epoch": 1.719047177251009, "grad_norm": 0.12009608000516891, "learning_rate": 0.0006629884069313385, "loss": 2.2226, "step": 444690 }, { "epoch": 1.7190858344543922, "grad_norm": 0.1271923929452896, "learning_rate": 0.0006628388279642576, "loss": 2.2376, "step": 444700 }, { "epoch": 1.7191244916577755, "grad_norm": 0.11179065704345703, "learning_rate": 0.0006626892657276695, "loss": 2.2257, "step": 444710 }, { "epoch": 1.719163148861159, "grad_norm": 0.11001638323068619, "learning_rate": 0.0006625397202159611, "loss": 2.2217, "step": 444720 }, { "epoch": 1.7192018060645422, "grad_norm": 0.12382563203573227, "learning_rate": 0.0006623901914235229, "loss": 2.2224, "step": 444730 }, { "epoch": 1.7192404632679255, "grad_norm": 0.13235601782798767, "learning_rate": 0.0006622406793447485, "loss": 2.2283, "step": 444740 }, { "epoch": 1.7192791204713087, "grad_norm": 0.12467984110116959, "learning_rate": 0.0006620911839740349, "loss": 2.2097, "step": 444750 }, { "epoch": 1.719317777674692, "grad_norm": 0.11879763007164001, "learning_rate": 0.0006619417053057814, "loss": 2.215, "step": 444760 }, { "epoch": 1.7193564348780752, "grad_norm": 0.11860118806362152, "learning_rate": 0.0006617922433343917, "loss": 2.2388, "step": 444770 }, { "epoch": 1.7193950920814585, "grad_norm": 0.12211289256811142, "learning_rate": 0.0006616427980542714, "loss": 2.2081, "step": 444780 }, { "epoch": 1.7194337492848417, "grad_norm": 0.1151881217956543, "learning_rate": 0.0006614933694598298, "loss": 2.232, "step": 444790 }, { "epoch": 1.719472406488225, "grad_norm": 0.11083666235208511, "learning_rate": 0.0006613439575454791, "loss": 2.2128, "step": 444800 }, { "epoch": 1.7195110636916082, "grad_norm": 0.12252917885780334, "learning_rate": 0.000661194562305635, "loss": 2.2166, "step": 444810 }, { "epoch": 1.7195497208949915, "grad_norm": 0.1250115931034088, "learning_rate": 0.0006610451837347162, "loss": 2.2224, "step": 444820 }, { "epoch": 1.7195883780983747, "grad_norm": 0.11606471240520477, "learning_rate": 0.0006608958218271441, "loss": 2.2206, "step": 444830 }, { "epoch": 1.719627035301758, "grad_norm": 0.11997007578611374, "learning_rate": 0.0006607464765773435, "loss": 2.2247, "step": 444840 }, { "epoch": 1.7196656925051415, "grad_norm": 0.15658508241176605, "learning_rate": 0.0006605971479797424, "loss": 2.2172, "step": 444850 }, { "epoch": 1.7197043497085247, "grad_norm": 0.12012956291437149, "learning_rate": 0.000660447836028772, "loss": 2.2259, "step": 444860 }, { "epoch": 1.719743006911908, "grad_norm": 0.12634234130382538, "learning_rate": 0.000660298540718866, "loss": 2.2114, "step": 444870 }, { "epoch": 1.7197816641152912, "grad_norm": 0.11848779022693634, "learning_rate": 0.000660149262044462, "loss": 2.2296, "step": 444880 }, { "epoch": 1.7198203213186747, "grad_norm": 0.11735141277313232, "learning_rate": 0.0006599999999999999, "loss": 2.2307, "step": 444890 }, { "epoch": 1.719858978522058, "grad_norm": 0.12146174907684326, "learning_rate": 0.0006598507545799237, "loss": 2.237, "step": 444900 }, { "epoch": 1.7198976357254412, "grad_norm": 0.11710981279611588, "learning_rate": 0.0006597015257786793, "loss": 2.2072, "step": 444910 }, { "epoch": 1.7199362929288244, "grad_norm": 0.10774870216846466, "learning_rate": 0.0006595523135907169, "loss": 2.2138, "step": 444920 }, { "epoch": 1.7199749501322077, "grad_norm": 0.10720662027597427, "learning_rate": 0.0006594031180104887, "loss": 2.2166, "step": 444930 }, { "epoch": 1.720013607335591, "grad_norm": 0.12658320367336273, "learning_rate": 0.0006592539390324506, "loss": 2.2122, "step": 444940 }, { "epoch": 1.7200522645389742, "grad_norm": 0.12777094542980194, "learning_rate": 0.0006591047766510614, "loss": 2.22, "step": 444950 }, { "epoch": 1.7200909217423574, "grad_norm": 0.12313709408044815, "learning_rate": 0.0006589556308607831, "loss": 2.2184, "step": 444960 }, { "epoch": 1.7201295789457407, "grad_norm": 0.12547916173934937, "learning_rate": 0.0006588065016560809, "loss": 2.2357, "step": 444970 }, { "epoch": 1.720168236149124, "grad_norm": 0.12291128933429718, "learning_rate": 0.0006586573890314227, "loss": 2.2213, "step": 444980 }, { "epoch": 1.7202068933525072, "grad_norm": 0.12009264528751373, "learning_rate": 0.0006585082929812798, "loss": 2.2199, "step": 444990 }, { "epoch": 1.7202455505558905, "grad_norm": 0.12587212026119232, "learning_rate": 0.0006583592135001262, "loss": 2.2265, "step": 445000 }, { "epoch": 1.7202842077592737, "grad_norm": 0.11582623422145844, "learning_rate": 0.0006582101505824394, "loss": 2.226, "step": 445010 }, { "epoch": 1.7203228649626572, "grad_norm": 0.11064667254686356, "learning_rate": 0.0006580611042226998, "loss": 2.2131, "step": 445020 }, { "epoch": 1.7203615221660404, "grad_norm": 0.129017174243927, "learning_rate": 0.0006579120744153906, "loss": 2.2159, "step": 445030 }, { "epoch": 1.7204001793694237, "grad_norm": 0.12192103266716003, "learning_rate": 0.0006577630611549985, "loss": 2.217, "step": 445040 }, { "epoch": 1.720438836572807, "grad_norm": 0.12519840896129608, "learning_rate": 0.0006576140644360133, "loss": 2.2237, "step": 445050 }, { "epoch": 1.7204774937761904, "grad_norm": 0.12600666284561157, "learning_rate": 0.0006574650842529271, "loss": 2.2137, "step": 445060 }, { "epoch": 1.7205161509795737, "grad_norm": 0.12854309380054474, "learning_rate": 0.0006573161206002361, "loss": 2.2217, "step": 445070 }, { "epoch": 1.720554808182957, "grad_norm": 0.13512104749679565, "learning_rate": 0.0006571671734724385, "loss": 2.2124, "step": 445080 }, { "epoch": 1.7205934653863402, "grad_norm": 0.12210392951965332, "learning_rate": 0.0006570182428640366, "loss": 2.2163, "step": 445090 }, { "epoch": 1.7206321225897234, "grad_norm": 0.12861166894435883, "learning_rate": 0.0006568693287695348, "loss": 2.2293, "step": 445100 }, { "epoch": 1.7206707797931067, "grad_norm": 0.13264372944831848, "learning_rate": 0.0006567204311834411, "loss": 2.2396, "step": 445110 }, { "epoch": 1.72070943699649, "grad_norm": 0.11817973107099533, "learning_rate": 0.0006565715501002669, "loss": 2.2088, "step": 445120 }, { "epoch": 1.7207480941998732, "grad_norm": 0.11759193986654282, "learning_rate": 0.0006564226855145254, "loss": 2.2167, "step": 445130 }, { "epoch": 1.7207867514032564, "grad_norm": 0.12298763543367386, "learning_rate": 0.000656273837420734, "loss": 2.2151, "step": 445140 }, { "epoch": 1.7208254086066397, "grad_norm": 0.11796259880065918, "learning_rate": 0.0006561250058134127, "loss": 2.2232, "step": 445150 }, { "epoch": 1.720864065810023, "grad_norm": 0.12086260318756104, "learning_rate": 0.0006559761906870847, "loss": 2.2131, "step": 445160 }, { "epoch": 1.7209027230134062, "grad_norm": 0.1197434514760971, "learning_rate": 0.0006558273920362758, "loss": 2.2348, "step": 445170 }, { "epoch": 1.7209413802167894, "grad_norm": 0.1237284317612648, "learning_rate": 0.0006556786098555152, "loss": 2.2158, "step": 445180 }, { "epoch": 1.720980037420173, "grad_norm": 0.1284116953611374, "learning_rate": 0.0006555298441393353, "loss": 2.2081, "step": 445190 }, { "epoch": 1.7210186946235562, "grad_norm": 0.11554042249917984, "learning_rate": 0.0006553810948822711, "loss": 2.2232, "step": 445200 }, { "epoch": 1.7210573518269394, "grad_norm": 0.11683430522680283, "learning_rate": 0.000655232362078861, "loss": 2.2306, "step": 445210 }, { "epoch": 1.7210960090303227, "grad_norm": 0.12061230093240738, "learning_rate": 0.0006550836457236458, "loss": 2.2214, "step": 445220 }, { "epoch": 1.7211346662337061, "grad_norm": 0.11342422664165497, "learning_rate": 0.0006549349458111702, "loss": 2.2395, "step": 445230 }, { "epoch": 1.7211733234370894, "grad_norm": 0.11453254520893097, "learning_rate": 0.000654786262335981, "loss": 2.2273, "step": 445240 }, { "epoch": 1.7212119806404726, "grad_norm": 0.11891185492277145, "learning_rate": 0.0006546375952926289, "loss": 2.2225, "step": 445250 }, { "epoch": 1.721250637843856, "grad_norm": 0.12148097902536392, "learning_rate": 0.0006544889446756672, "loss": 2.2243, "step": 445260 }, { "epoch": 1.7212892950472392, "grad_norm": 0.11537045985460281, "learning_rate": 0.0006543403104796518, "loss": 2.2045, "step": 445270 }, { "epoch": 1.7213279522506224, "grad_norm": 0.11426082253456116, "learning_rate": 0.0006541916926991422, "loss": 2.2174, "step": 445280 }, { "epoch": 1.7213666094540057, "grad_norm": 0.14135867357254028, "learning_rate": 0.0006540430913287008, "loss": 2.221, "step": 445290 }, { "epoch": 1.721405266657389, "grad_norm": 0.12051600217819214, "learning_rate": 0.0006538945063628927, "loss": 2.2112, "step": 445300 }, { "epoch": 1.7214439238607722, "grad_norm": 0.12268465012311935, "learning_rate": 0.0006537459377962864, "loss": 2.2138, "step": 445310 }, { "epoch": 1.7214825810641554, "grad_norm": 0.1147061213850975, "learning_rate": 0.0006535973856234534, "loss": 2.2405, "step": 445320 }, { "epoch": 1.7215212382675387, "grad_norm": 0.12111300975084305, "learning_rate": 0.0006534488498389675, "loss": 2.2157, "step": 445330 }, { "epoch": 1.721559895470922, "grad_norm": 0.1415233463048935, "learning_rate": 0.0006533003304374058, "loss": 2.2129, "step": 445340 }, { "epoch": 1.7215985526743052, "grad_norm": 0.1263972520828247, "learning_rate": 0.0006531518274133495, "loss": 2.2091, "step": 445350 }, { "epoch": 1.7216372098776886, "grad_norm": 0.12139548361301422, "learning_rate": 0.0006530033407613811, "loss": 2.2188, "step": 445360 }, { "epoch": 1.721675867081072, "grad_norm": 0.11739082634449005, "learning_rate": 0.0006528548704760871, "loss": 2.2273, "step": 445370 }, { "epoch": 1.7217145242844552, "grad_norm": 0.11622051149606705, "learning_rate": 0.0006527064165520569, "loss": 2.2207, "step": 445380 }, { "epoch": 1.7217531814878384, "grad_norm": 0.12785564363002777, "learning_rate": 0.0006525579789838822, "loss": 2.2097, "step": 445390 }, { "epoch": 1.7217918386912219, "grad_norm": 0.10811824351549149, "learning_rate": 0.0006524095577661586, "loss": 2.2177, "step": 445400 }, { "epoch": 1.7218304958946051, "grad_norm": 0.12352680414915085, "learning_rate": 0.0006522611528934843, "loss": 2.2056, "step": 445410 }, { "epoch": 1.7218691530979884, "grad_norm": 0.11806277185678482, "learning_rate": 0.0006521127643604603, "loss": 2.2267, "step": 445420 }, { "epoch": 1.7219078103013716, "grad_norm": 0.1163213849067688, "learning_rate": 0.0006519643921616907, "loss": 2.2342, "step": 445430 }, { "epoch": 1.7219464675047549, "grad_norm": 0.12437734007835388, "learning_rate": 0.0006518160362917827, "loss": 2.2137, "step": 445440 }, { "epoch": 1.7219851247081381, "grad_norm": 0.1166505515575409, "learning_rate": 0.0006516676967453461, "loss": 2.2165, "step": 445450 }, { "epoch": 1.7220237819115214, "grad_norm": 0.12200496345758438, "learning_rate": 0.0006515193735169942, "loss": 2.2214, "step": 445460 }, { "epoch": 1.7220624391149046, "grad_norm": 0.1296072155237198, "learning_rate": 0.0006513710666013428, "loss": 2.2285, "step": 445470 }, { "epoch": 1.722101096318288, "grad_norm": 0.11958147585391998, "learning_rate": 0.0006512227759930108, "loss": 2.2189, "step": 445480 }, { "epoch": 1.7221397535216711, "grad_norm": 0.11607889831066132, "learning_rate": 0.0006510745016866202, "loss": 2.2212, "step": 445490 }, { "epoch": 1.7221784107250544, "grad_norm": 0.12207704037427902, "learning_rate": 0.0006509262436767958, "loss": 2.2218, "step": 445500 }, { "epoch": 1.7222170679284377, "grad_norm": 0.11960524320602417, "learning_rate": 0.0006507780019581655, "loss": 2.2181, "step": 445510 }, { "epoch": 1.722255725131821, "grad_norm": 0.13021332025527954, "learning_rate": 0.00065062977652536, "loss": 2.2144, "step": 445520 }, { "epoch": 1.7222943823352044, "grad_norm": 0.12092035263776779, "learning_rate": 0.0006504815673730129, "loss": 2.2227, "step": 445530 }, { "epoch": 1.7223330395385876, "grad_norm": 0.12823942303657532, "learning_rate": 0.0006503333744957609, "loss": 2.2265, "step": 445540 }, { "epoch": 1.7223716967419709, "grad_norm": 0.11256150901317596, "learning_rate": 0.0006501851978882436, "loss": 2.2281, "step": 445550 }, { "epoch": 1.7224103539453541, "grad_norm": 0.12659218907356262, "learning_rate": 0.0006500370375451036, "loss": 2.2213, "step": 445560 }, { "epoch": 1.7224490111487376, "grad_norm": 0.12542758882045746, "learning_rate": 0.0006498888934609863, "loss": 2.211, "step": 445570 }, { "epoch": 1.7224876683521209, "grad_norm": 0.11898112297058105, "learning_rate": 0.0006497407656305401, "loss": 2.2196, "step": 445580 }, { "epoch": 1.7225263255555041, "grad_norm": 0.12938259541988373, "learning_rate": 0.0006495926540484163, "loss": 2.2249, "step": 445590 }, { "epoch": 1.7225649827588874, "grad_norm": 0.1213701143860817, "learning_rate": 0.0006494445587092695, "loss": 2.2273, "step": 445600 }, { "epoch": 1.7226036399622706, "grad_norm": 0.11537019908428192, "learning_rate": 0.0006492964796077566, "loss": 2.2111, "step": 445610 }, { "epoch": 1.7226422971656539, "grad_norm": 0.11240558326244354, "learning_rate": 0.0006491484167385376, "loss": 2.2243, "step": 445620 }, { "epoch": 1.7226809543690371, "grad_norm": 0.11864791810512543, "learning_rate": 0.0006490003700962756, "loss": 2.2224, "step": 445630 }, { "epoch": 1.7227196115724204, "grad_norm": 0.11303960531949997, "learning_rate": 0.0006488523396756371, "loss": 2.2231, "step": 445640 }, { "epoch": 1.7227582687758036, "grad_norm": 0.11428240686655045, "learning_rate": 0.0006487043254712906, "loss": 2.2177, "step": 445650 }, { "epoch": 1.7227969259791869, "grad_norm": 0.12397156655788422, "learning_rate": 0.0006485563274779079, "loss": 2.2114, "step": 445660 }, { "epoch": 1.7228355831825701, "grad_norm": 0.13758109509944916, "learning_rate": 0.0006484083456901637, "loss": 2.2248, "step": 445670 }, { "epoch": 1.7228742403859534, "grad_norm": 0.12868931889533997, "learning_rate": 0.0006482603801027361, "loss": 2.2144, "step": 445680 }, { "epoch": 1.7229128975893366, "grad_norm": 0.11709859222173691, "learning_rate": 0.000648112430710305, "loss": 2.2239, "step": 445690 }, { "epoch": 1.72295155479272, "grad_norm": 0.11064182966947556, "learning_rate": 0.0006479644975075543, "loss": 2.2251, "step": 445700 }, { "epoch": 1.7229902119961034, "grad_norm": 0.12446606904268265, "learning_rate": 0.0006478165804891705, "loss": 2.213, "step": 445710 }, { "epoch": 1.7230288691994866, "grad_norm": 0.12083175778388977, "learning_rate": 0.0006476686796498426, "loss": 2.2172, "step": 445720 }, { "epoch": 1.7230675264028699, "grad_norm": 0.11902615427970886, "learning_rate": 0.0006475207949842629, "loss": 2.2191, "step": 445730 }, { "epoch": 1.7231061836062533, "grad_norm": 0.11912401020526886, "learning_rate": 0.0006473729264871266, "loss": 2.2099, "step": 445740 }, { "epoch": 1.7231448408096366, "grad_norm": 0.13067235052585602, "learning_rate": 0.0006472250741531316, "loss": 2.2282, "step": 445750 }, { "epoch": 1.7231834980130198, "grad_norm": 0.12016969174146652, "learning_rate": 0.000647077237976979, "loss": 2.2329, "step": 445760 }, { "epoch": 1.723222155216403, "grad_norm": 0.12072800099849701, "learning_rate": 0.0006469294179533722, "loss": 2.2306, "step": 445770 }, { "epoch": 1.7232608124197863, "grad_norm": 0.12538011372089386, "learning_rate": 0.0006467816140770182, "loss": 2.2163, "step": 445780 }, { "epoch": 1.7232994696231696, "grad_norm": 0.12371477484703064, "learning_rate": 0.0006466338263426266, "loss": 2.2263, "step": 445790 }, { "epoch": 1.7233381268265529, "grad_norm": 0.11195444315671921, "learning_rate": 0.0006464860547449096, "loss": 2.2203, "step": 445800 }, { "epoch": 1.723376784029936, "grad_norm": 0.11557309329509735, "learning_rate": 0.0006463382992785827, "loss": 2.2286, "step": 445810 }, { "epoch": 1.7234154412333194, "grad_norm": 0.130571186542511, "learning_rate": 0.0006461905599383642, "loss": 2.2137, "step": 445820 }, { "epoch": 1.7234540984367026, "grad_norm": 0.12194689363241196, "learning_rate": 0.000646042836718975, "loss": 2.2364, "step": 445830 }, { "epoch": 1.7234927556400859, "grad_norm": 0.1201247125864029, "learning_rate": 0.0006458951296151394, "loss": 2.2271, "step": 445840 }, { "epoch": 1.7235314128434691, "grad_norm": 0.1234743744134903, "learning_rate": 0.000645747438621584, "loss": 2.2252, "step": 445850 }, { "epoch": 1.7235700700468524, "grad_norm": 0.1283080130815506, "learning_rate": 0.0006455997637330389, "loss": 2.2164, "step": 445860 }, { "epoch": 1.7236087272502358, "grad_norm": 0.1170172318816185, "learning_rate": 0.0006454521049442364, "loss": 2.2252, "step": 445870 }, { "epoch": 1.723647384453619, "grad_norm": 0.12163842469453812, "learning_rate": 0.000645304462249912, "loss": 2.2164, "step": 445880 }, { "epoch": 1.7236860416570023, "grad_norm": 0.11199560016393661, "learning_rate": 0.0006451568356448044, "loss": 2.2202, "step": 445890 }, { "epoch": 1.7237246988603856, "grad_norm": 0.12853404879570007, "learning_rate": 0.0006450092251236541, "loss": 2.22, "step": 445900 }, { "epoch": 1.723763356063769, "grad_norm": 0.14157810807228088, "learning_rate": 0.0006448616306812061, "loss": 2.2222, "step": 445910 }, { "epoch": 1.7238020132671523, "grad_norm": 0.11695588380098343, "learning_rate": 0.0006447140523122068, "loss": 2.2137, "step": 445920 }, { "epoch": 1.7238406704705356, "grad_norm": 0.1321401298046112, "learning_rate": 0.0006445664900114061, "loss": 2.2065, "step": 445930 }, { "epoch": 1.7238793276739188, "grad_norm": 0.1265600621700287, "learning_rate": 0.0006444189437735566, "loss": 2.2215, "step": 445940 }, { "epoch": 1.723917984877302, "grad_norm": 0.12171867489814758, "learning_rate": 0.000644271413593414, "loss": 2.2215, "step": 445950 }, { "epoch": 1.7239566420806853, "grad_norm": 0.12688489258289337, "learning_rate": 0.0006441238994657366, "loss": 2.2183, "step": 445960 }, { "epoch": 1.7239952992840686, "grad_norm": 0.12067555636167526, "learning_rate": 0.0006439764013852856, "loss": 2.2144, "step": 445970 }, { "epoch": 1.7240339564874518, "grad_norm": 0.12304933369159698, "learning_rate": 0.0006438289193468252, "loss": 2.2178, "step": 445980 }, { "epoch": 1.724072613690835, "grad_norm": 0.12591667473316193, "learning_rate": 0.0006436814533451223, "loss": 2.2231, "step": 445990 }, { "epoch": 1.7241112708942183, "grad_norm": 0.12514787912368774, "learning_rate": 0.0006435340033749463, "loss": 2.2262, "step": 446000 }, { "epoch": 1.7241499280976016, "grad_norm": 0.11285724490880966, "learning_rate": 0.0006433865694310704, "loss": 2.2122, "step": 446010 }, { "epoch": 1.7241885853009848, "grad_norm": 0.12354803830385208, "learning_rate": 0.0006432391515082696, "loss": 2.2238, "step": 446020 }, { "epoch": 1.7242272425043683, "grad_norm": 0.1407475471496582, "learning_rate": 0.0006430917496013227, "loss": 2.2238, "step": 446030 }, { "epoch": 1.7242658997077516, "grad_norm": 0.11316511034965515, "learning_rate": 0.0006429443637050101, "loss": 2.2267, "step": 446040 }, { "epoch": 1.7243045569111348, "grad_norm": 0.13038979470729828, "learning_rate": 0.0006427969938141162, "loss": 2.2036, "step": 446050 }, { "epoch": 1.724343214114518, "grad_norm": 0.12853805720806122, "learning_rate": 0.0006426496399234279, "loss": 2.2179, "step": 446060 }, { "epoch": 1.7243818713179013, "grad_norm": 0.12553642690181732, "learning_rate": 0.0006425023020277345, "loss": 2.2264, "step": 446070 }, { "epoch": 1.7244205285212848, "grad_norm": 0.12129683792591095, "learning_rate": 0.0006423549801218287, "loss": 2.2099, "step": 446080 }, { "epoch": 1.724459185724668, "grad_norm": 0.12941096723079681, "learning_rate": 0.0006422076742005059, "loss": 2.2182, "step": 446090 }, { "epoch": 1.7244978429280513, "grad_norm": 0.12963566184043884, "learning_rate": 0.0006420603842585635, "loss": 2.2186, "step": 446100 }, { "epoch": 1.7245365001314346, "grad_norm": 0.12317825853824615, "learning_rate": 0.0006419131102908031, "loss": 2.2073, "step": 446110 }, { "epoch": 1.7245751573348178, "grad_norm": 0.11838335543870926, "learning_rate": 0.0006417658522920285, "loss": 2.2142, "step": 446120 }, { "epoch": 1.724613814538201, "grad_norm": 0.14520931243896484, "learning_rate": 0.0006416186102570456, "loss": 2.2245, "step": 446130 }, { "epoch": 1.7246524717415843, "grad_norm": 0.11642967164516449, "learning_rate": 0.0006414713841806645, "loss": 2.2206, "step": 446140 }, { "epoch": 1.7246911289449676, "grad_norm": 0.11772605031728745, "learning_rate": 0.0006413241740576967, "loss": 2.2062, "step": 446150 }, { "epoch": 1.7247297861483508, "grad_norm": 0.12353634089231491, "learning_rate": 0.0006411769798829577, "loss": 2.2219, "step": 446160 }, { "epoch": 1.724768443351734, "grad_norm": 0.12665092945098877, "learning_rate": 0.000641029801651265, "loss": 2.2258, "step": 446170 }, { "epoch": 1.7248071005551173, "grad_norm": 0.11800059676170349, "learning_rate": 0.0006408826393574394, "loss": 2.2209, "step": 446180 }, { "epoch": 1.7248457577585006, "grad_norm": 0.10817782580852509, "learning_rate": 0.0006407354929963043, "loss": 2.2159, "step": 446190 }, { "epoch": 1.724884414961884, "grad_norm": 0.11082755029201508, "learning_rate": 0.0006405883625626856, "loss": 2.2224, "step": 446200 }, { "epoch": 1.7249230721652673, "grad_norm": 0.11737308651208878, "learning_rate": 0.0006404412480514128, "loss": 2.2266, "step": 446210 }, { "epoch": 1.7249617293686506, "grad_norm": 0.121700718998909, "learning_rate": 0.0006402941494573173, "loss": 2.2221, "step": 446220 }, { "epoch": 1.7250003865720338, "grad_norm": 0.11579973995685577, "learning_rate": 0.0006401470667752341, "loss": 2.2178, "step": 446230 }, { "epoch": 1.725039043775417, "grad_norm": 0.11861163377761841, "learning_rate": 0.0006400000000000002, "loss": 2.2232, "step": 446240 }, { "epoch": 1.7250777009788005, "grad_norm": 0.11449922621250153, "learning_rate": 0.0006398529491264557, "loss": 2.2155, "step": 446250 }, { "epoch": 1.7251163581821838, "grad_norm": 0.12742897868156433, "learning_rate": 0.0006397059141494439, "loss": 2.2319, "step": 446260 }, { "epoch": 1.725155015385567, "grad_norm": 0.11458654701709747, "learning_rate": 0.0006395588950638105, "loss": 2.2206, "step": 446270 }, { "epoch": 1.7251936725889503, "grad_norm": 0.13417372107505798, "learning_rate": 0.000639411891864404, "loss": 2.2128, "step": 446280 }, { "epoch": 1.7252323297923335, "grad_norm": 0.1364544928073883, "learning_rate": 0.0006392649045460759, "loss": 2.219, "step": 446290 }, { "epoch": 1.7252709869957168, "grad_norm": 0.10574916005134583, "learning_rate": 0.0006391179331036801, "loss": 2.2163, "step": 446300 }, { "epoch": 1.7253096441991, "grad_norm": 0.12731365859508514, "learning_rate": 0.0006389709775320733, "loss": 2.2058, "step": 446310 }, { "epoch": 1.7253483014024833, "grad_norm": 0.12686818838119507, "learning_rate": 0.0006388240378261157, "loss": 2.2266, "step": 446320 }, { "epoch": 1.7253869586058665, "grad_norm": 0.11585552990436554, "learning_rate": 0.0006386771139806693, "loss": 2.2223, "step": 446330 }, { "epoch": 1.7254256158092498, "grad_norm": 0.12526173889636993, "learning_rate": 0.0006385302059905993, "loss": 2.2305, "step": 446340 }, { "epoch": 1.725464273012633, "grad_norm": 0.12107162177562714, "learning_rate": 0.000638383313850774, "loss": 2.2051, "step": 446350 }, { "epoch": 1.7255029302160163, "grad_norm": 0.10977458953857422, "learning_rate": 0.0006382364375560639, "loss": 2.2154, "step": 446360 }, { "epoch": 1.7255415874193998, "grad_norm": 0.12415580451488495, "learning_rate": 0.0006380895771013426, "loss": 2.211, "step": 446370 }, { "epoch": 1.725580244622783, "grad_norm": 0.12037225067615509, "learning_rate": 0.0006379427324814863, "loss": 2.2301, "step": 446380 }, { "epoch": 1.7256189018261663, "grad_norm": 0.1225074976682663, "learning_rate": 0.0006377959036913743, "loss": 2.2222, "step": 446390 }, { "epoch": 1.7256575590295495, "grad_norm": 0.11446145921945572, "learning_rate": 0.0006376490907258879, "loss": 2.2202, "step": 446400 }, { "epoch": 1.7256962162329328, "grad_norm": 0.11765024811029434, "learning_rate": 0.0006375022935799122, "loss": 2.1948, "step": 446410 }, { "epoch": 1.7257348734363163, "grad_norm": 0.1386559158563614, "learning_rate": 0.0006373555122483341, "loss": 2.2254, "step": 446420 }, { "epoch": 1.7257735306396995, "grad_norm": 0.1227501630783081, "learning_rate": 0.0006372087467260438, "loss": 2.2166, "step": 446430 }, { "epoch": 1.7258121878430828, "grad_norm": 0.1231738030910492, "learning_rate": 0.0006370619970079343, "loss": 2.2255, "step": 446440 }, { "epoch": 1.725850845046466, "grad_norm": 0.11780079454183578, "learning_rate": 0.000636915263088901, "loss": 2.2198, "step": 446450 }, { "epoch": 1.7258895022498493, "grad_norm": 0.12161796540021896, "learning_rate": 0.0006367685449638421, "loss": 2.2301, "step": 446460 }, { "epoch": 1.7259281594532325, "grad_norm": 0.11437132954597473, "learning_rate": 0.0006366218426276589, "loss": 2.2177, "step": 446470 }, { "epoch": 1.7259668166566158, "grad_norm": 0.12394043058156967, "learning_rate": 0.0006364751560752551, "loss": 2.2125, "step": 446480 }, { "epoch": 1.726005473859999, "grad_norm": 0.1160041093826294, "learning_rate": 0.0006363284853015372, "loss": 2.2283, "step": 446490 }, { "epoch": 1.7260441310633823, "grad_norm": 0.1226138100028038, "learning_rate": 0.0006361818303014144, "loss": 2.2327, "step": 446500 }, { "epoch": 1.7260827882667655, "grad_norm": 0.1209820955991745, "learning_rate": 0.0006360351910697988, "loss": 2.2324, "step": 446510 }, { "epoch": 1.7261214454701488, "grad_norm": 0.16416043043136597, "learning_rate": 0.0006358885676016053, "loss": 2.2182, "step": 446520 }, { "epoch": 1.726160102673532, "grad_norm": 0.11868501454591751, "learning_rate": 0.0006357419598917514, "loss": 2.2337, "step": 446530 }, { "epoch": 1.7261987598769155, "grad_norm": 0.11926791816949844, "learning_rate": 0.0006355953679351569, "loss": 2.2073, "step": 446540 }, { "epoch": 1.7262374170802988, "grad_norm": 0.12692750990390778, "learning_rate": 0.0006354487917267451, "loss": 2.2077, "step": 446550 }, { "epoch": 1.726276074283682, "grad_norm": 0.11207547783851624, "learning_rate": 0.0006353022312614416, "loss": 2.2183, "step": 446560 }, { "epoch": 1.7263147314870653, "grad_norm": 0.11618570238351822, "learning_rate": 0.0006351556865341748, "loss": 2.2015, "step": 446570 }, { "epoch": 1.7263533886904487, "grad_norm": 0.13771525025367737, "learning_rate": 0.0006350091575398757, "loss": 2.2115, "step": 446580 }, { "epoch": 1.726392045893832, "grad_norm": 0.11748812347650528, "learning_rate": 0.0006348626442734784, "loss": 2.226, "step": 446590 }, { "epoch": 1.7264307030972152, "grad_norm": 0.13634926080703735, "learning_rate": 0.0006347161467299189, "loss": 2.2148, "step": 446600 }, { "epoch": 1.7264693603005985, "grad_norm": 0.11584623903036118, "learning_rate": 0.0006345696649041373, "loss": 2.2066, "step": 446610 }, { "epoch": 1.7265080175039818, "grad_norm": 0.11857582628726959, "learning_rate": 0.0006344231987910749, "loss": 2.2201, "step": 446620 }, { "epoch": 1.726546674707365, "grad_norm": 0.1365407258272171, "learning_rate": 0.0006342767483856768, "loss": 2.2263, "step": 446630 }, { "epoch": 1.7265853319107483, "grad_norm": 0.12051232159137726, "learning_rate": 0.0006341303136828902, "loss": 2.1958, "step": 446640 }, { "epoch": 1.7266239891141315, "grad_norm": 0.11777618527412415, "learning_rate": 0.0006339838946776653, "loss": 2.2354, "step": 446650 }, { "epoch": 1.7266626463175148, "grad_norm": 0.13300010561943054, "learning_rate": 0.0006338374913649547, "loss": 2.2211, "step": 446660 }, { "epoch": 1.726701303520898, "grad_norm": 0.11809588968753815, "learning_rate": 0.0006336911037397144, "loss": 2.2186, "step": 446670 }, { "epoch": 1.7267399607242813, "grad_norm": 0.11888469755649567, "learning_rate": 0.0006335447317969023, "loss": 2.2236, "step": 446680 }, { "epoch": 1.7267786179276645, "grad_norm": 0.12028637528419495, "learning_rate": 0.0006333983755314791, "loss": 2.2162, "step": 446690 }, { "epoch": 1.7268172751310478, "grad_norm": 0.12548750638961792, "learning_rate": 0.000633252034938409, "loss": 2.2181, "step": 446700 }, { "epoch": 1.7268559323344312, "grad_norm": 0.12346018850803375, "learning_rate": 0.0006331057100126578, "loss": 2.2081, "step": 446710 }, { "epoch": 1.7268945895378145, "grad_norm": 0.12081938236951828, "learning_rate": 0.000632959400749195, "loss": 2.2296, "step": 446720 }, { "epoch": 1.7269332467411977, "grad_norm": 0.11280404776334763, "learning_rate": 0.0006328131071429919, "loss": 2.2113, "step": 446730 }, { "epoch": 1.726971903944581, "grad_norm": 0.13663756847381592, "learning_rate": 0.0006326668291890232, "loss": 2.2204, "step": 446740 }, { "epoch": 1.7270105611479645, "grad_norm": 0.118993379175663, "learning_rate": 0.0006325205668822655, "loss": 2.1998, "step": 446750 }, { "epoch": 1.7270492183513477, "grad_norm": 0.11974366009235382, "learning_rate": 0.0006323743202176993, "loss": 2.2148, "step": 446760 }, { "epoch": 1.727087875554731, "grad_norm": 0.11485361307859421, "learning_rate": 0.0006322280891903065, "loss": 2.2232, "step": 446770 }, { "epoch": 1.7271265327581142, "grad_norm": 0.12438970804214478, "learning_rate": 0.0006320818737950725, "loss": 2.2116, "step": 446780 }, { "epoch": 1.7271651899614975, "grad_norm": 0.1330035924911499, "learning_rate": 0.0006319356740269851, "loss": 2.2095, "step": 446790 }, { "epoch": 1.7272038471648807, "grad_norm": 0.12693437933921814, "learning_rate": 0.0006317894898810344, "loss": 2.2123, "step": 446800 }, { "epoch": 1.727242504368264, "grad_norm": 0.5244444608688354, "learning_rate": 0.0006316433213522141, "loss": 2.2197, "step": 446810 }, { "epoch": 1.7272811615716472, "grad_norm": 0.11980904638767242, "learning_rate": 0.0006314971684355197, "loss": 2.2133, "step": 446820 }, { "epoch": 1.7273198187750305, "grad_norm": 0.13215787708759308, "learning_rate": 0.0006313510311259501, "loss": 2.219, "step": 446830 }, { "epoch": 1.7273584759784137, "grad_norm": 0.1250036060810089, "learning_rate": 0.0006312049094185061, "loss": 2.2113, "step": 446840 }, { "epoch": 1.727397133181797, "grad_norm": 0.10954178869724274, "learning_rate": 0.0006310588033081917, "loss": 2.2151, "step": 446850 }, { "epoch": 1.7274357903851802, "grad_norm": 0.11306620389223099, "learning_rate": 0.0006309127127900136, "loss": 2.2184, "step": 446860 }, { "epoch": 1.7274744475885635, "grad_norm": 0.12376853823661804, "learning_rate": 0.0006307666378589807, "loss": 2.2204, "step": 446870 }, { "epoch": 1.727513104791947, "grad_norm": 0.13085810840129852, "learning_rate": 0.0006306205785101049, "loss": 2.2203, "step": 446880 }, { "epoch": 1.7275517619953302, "grad_norm": 0.11990802735090256, "learning_rate": 0.0006304745347384007, "loss": 2.2085, "step": 446890 }, { "epoch": 1.7275904191987135, "grad_norm": 0.11620058864355087, "learning_rate": 0.0006303285065388855, "loss": 2.2054, "step": 446900 }, { "epoch": 1.7276290764020967, "grad_norm": 0.12165633589029312, "learning_rate": 0.0006301824939065788, "loss": 2.2177, "step": 446910 }, { "epoch": 1.7276677336054802, "grad_norm": 0.12509748339653015, "learning_rate": 0.0006300364968365033, "loss": 2.2255, "step": 446920 }, { "epoch": 1.7277063908088635, "grad_norm": 0.13173136115074158, "learning_rate": 0.000629890515323684, "loss": 2.1986, "step": 446930 }, { "epoch": 1.7277450480122467, "grad_norm": 0.14559979736804962, "learning_rate": 0.0006297445493631488, "loss": 2.2201, "step": 446940 }, { "epoch": 1.72778370521563, "grad_norm": 0.13024260103702545, "learning_rate": 0.0006295985989499283, "loss": 2.2112, "step": 446950 }, { "epoch": 1.7278223624190132, "grad_norm": 0.11923927813768387, "learning_rate": 0.0006294526640790547, "loss": 2.2082, "step": 446960 }, { "epoch": 1.7278610196223965, "grad_norm": 0.12412068992853165, "learning_rate": 0.000629306744745565, "loss": 2.2242, "step": 446970 }, { "epoch": 1.7278996768257797, "grad_norm": 0.14609181880950928, "learning_rate": 0.0006291608409444965, "loss": 2.2167, "step": 446980 }, { "epoch": 1.727938334029163, "grad_norm": 0.12911032140254974, "learning_rate": 0.0006290149526708909, "loss": 2.2289, "step": 446990 }, { "epoch": 1.7279769912325462, "grad_norm": 0.12092292308807373, "learning_rate": 0.0006288690799197912, "loss": 2.1986, "step": 447000 }, { "epoch": 1.7280156484359295, "grad_norm": 0.1374613344669342, "learning_rate": 0.0006287232226862441, "loss": 2.2165, "step": 447010 }, { "epoch": 1.7280543056393127, "grad_norm": 0.13300615549087524, "learning_rate": 0.0006285773809652985, "loss": 2.209, "step": 447020 }, { "epoch": 1.728092962842696, "grad_norm": 0.12323442101478577, "learning_rate": 0.0006284315547520059, "loss": 2.2132, "step": 447030 }, { "epoch": 1.7281316200460792, "grad_norm": 0.11762463301420212, "learning_rate": 0.0006282857440414203, "loss": 2.2134, "step": 447040 }, { "epoch": 1.7281702772494627, "grad_norm": 0.12495895475149155, "learning_rate": 0.0006281399488285986, "loss": 2.2217, "step": 447050 }, { "epoch": 1.728208934452846, "grad_norm": 0.11891249567270279, "learning_rate": 0.0006279941691086004, "loss": 2.2145, "step": 447060 }, { "epoch": 1.7282475916562292, "grad_norm": 0.13742129504680634, "learning_rate": 0.0006278484048764874, "loss": 2.2091, "step": 447070 }, { "epoch": 1.7282862488596125, "grad_norm": 0.13152971863746643, "learning_rate": 0.0006277026561273247, "loss": 2.21, "step": 447080 }, { "epoch": 1.728324906062996, "grad_norm": 0.12500789761543274, "learning_rate": 0.0006275569228561791, "loss": 2.2193, "step": 447090 }, { "epoch": 1.7283635632663792, "grad_norm": 0.11646206676959991, "learning_rate": 0.0006274112050581209, "loss": 2.2038, "step": 447100 }, { "epoch": 1.7284022204697624, "grad_norm": 0.11558002233505249, "learning_rate": 0.0006272655027282223, "loss": 2.2186, "step": 447110 }, { "epoch": 1.7284408776731457, "grad_norm": 0.1294437050819397, "learning_rate": 0.000627119815861559, "loss": 2.2197, "step": 447120 }, { "epoch": 1.728479534876529, "grad_norm": 0.11961176246404648, "learning_rate": 0.0006269741444532079, "loss": 2.2251, "step": 447130 }, { "epoch": 1.7285181920799122, "grad_norm": 0.12001509964466095, "learning_rate": 0.0006268284884982503, "loss": 2.2222, "step": 447140 }, { "epoch": 1.7285568492832954, "grad_norm": 0.11960349977016449, "learning_rate": 0.0006266828479917685, "loss": 2.2247, "step": 447150 }, { "epoch": 1.7285955064866787, "grad_norm": 0.12857332825660706, "learning_rate": 0.0006265372229288484, "loss": 2.2133, "step": 447160 }, { "epoch": 1.728634163690062, "grad_norm": 0.11253602057695389, "learning_rate": 0.0006263916133045779, "loss": 2.2307, "step": 447170 }, { "epoch": 1.7286728208934452, "grad_norm": 0.12653528153896332, "learning_rate": 0.0006262460191140482, "loss": 2.2116, "step": 447180 }, { "epoch": 1.7287114780968285, "grad_norm": 0.12356472760438919, "learning_rate": 0.0006261004403523524, "loss": 2.2344, "step": 447190 }, { "epoch": 1.7287501353002117, "grad_norm": 0.11877021193504333, "learning_rate": 0.0006259548770145867, "loss": 2.2024, "step": 447200 }, { "epoch": 1.728788792503595, "grad_norm": 0.11799889802932739, "learning_rate": 0.0006258093290958493, "loss": 2.2131, "step": 447210 }, { "epoch": 1.7288274497069784, "grad_norm": 0.12331540137529373, "learning_rate": 0.0006256637965912417, "loss": 2.2295, "step": 447220 }, { "epoch": 1.7288661069103617, "grad_norm": 0.12345191091299057, "learning_rate": 0.0006255182794958676, "loss": 2.2403, "step": 447230 }, { "epoch": 1.728904764113745, "grad_norm": 0.12320110946893692, "learning_rate": 0.0006253727778048335, "loss": 2.2158, "step": 447240 }, { "epoch": 1.7289434213171282, "grad_norm": 0.1262044906616211, "learning_rate": 0.000625227291513248, "loss": 2.227, "step": 447250 }, { "epoch": 1.7289820785205117, "grad_norm": 0.12599673867225647, "learning_rate": 0.000625081820616223, "loss": 2.2218, "step": 447260 }, { "epoch": 1.729020735723895, "grad_norm": 0.11664579063653946, "learning_rate": 0.0006249363651088724, "loss": 2.2173, "step": 447270 }, { "epoch": 1.7290593929272782, "grad_norm": 0.11499513685703278, "learning_rate": 0.0006247909249863133, "loss": 2.2268, "step": 447280 }, { "epoch": 1.7290980501306614, "grad_norm": 0.1192687600851059, "learning_rate": 0.0006246455002436644, "loss": 2.2244, "step": 447290 }, { "epoch": 1.7291367073340447, "grad_norm": 0.12507928907871246, "learning_rate": 0.0006245000908760481, "loss": 2.2109, "step": 447300 }, { "epoch": 1.729175364537428, "grad_norm": 0.1301724910736084, "learning_rate": 0.0006243546968785885, "loss": 2.2207, "step": 447310 }, { "epoch": 1.7292140217408112, "grad_norm": 0.11521703004837036, "learning_rate": 0.0006242093182464128, "loss": 2.2104, "step": 447320 }, { "epoch": 1.7292526789441944, "grad_norm": 0.1285400092601776, "learning_rate": 0.0006240639549746507, "loss": 2.2045, "step": 447330 }, { "epoch": 1.7292913361475777, "grad_norm": 0.13523685932159424, "learning_rate": 0.0006239186070584342, "loss": 2.2077, "step": 447340 }, { "epoch": 1.729329993350961, "grad_norm": 0.11786042153835297, "learning_rate": 0.0006237732744928981, "loss": 2.2301, "step": 447350 }, { "epoch": 1.7293686505543442, "grad_norm": 0.1203950047492981, "learning_rate": 0.0006236279572731797, "loss": 2.2259, "step": 447360 }, { "epoch": 1.7294073077577274, "grad_norm": 0.12070076167583466, "learning_rate": 0.0006234826553944188, "loss": 2.2325, "step": 447370 }, { "epoch": 1.7294459649611107, "grad_norm": 0.12071788311004639, "learning_rate": 0.0006233373688517583, "loss": 2.2153, "step": 447380 }, { "epoch": 1.7294846221644942, "grad_norm": 0.12840916216373444, "learning_rate": 0.0006231920976403427, "loss": 2.2167, "step": 447390 }, { "epoch": 1.7295232793678774, "grad_norm": 0.12395832687616348, "learning_rate": 0.0006230468417553197, "loss": 2.2113, "step": 447400 }, { "epoch": 1.7295619365712607, "grad_norm": 0.1331869661808014, "learning_rate": 0.0006229016011918394, "loss": 2.2126, "step": 447410 }, { "epoch": 1.729600593774644, "grad_norm": 0.11826955527067184, "learning_rate": 0.0006227563759450545, "loss": 2.2116, "step": 447420 }, { "epoch": 1.7296392509780274, "grad_norm": 0.131355881690979, "learning_rate": 0.0006226111660101204, "loss": 2.2146, "step": 447430 }, { "epoch": 1.7296779081814107, "grad_norm": 0.12471216917037964, "learning_rate": 0.000622465971382195, "loss": 2.2117, "step": 447440 }, { "epoch": 1.729716565384794, "grad_norm": 0.12048184871673584, "learning_rate": 0.0006223207920564382, "loss": 2.2216, "step": 447450 }, { "epoch": 1.7297552225881772, "grad_norm": 0.11597990989685059, "learning_rate": 0.000622175628028013, "loss": 2.2262, "step": 447460 }, { "epoch": 1.7297938797915604, "grad_norm": 0.12440779060125351, "learning_rate": 0.0006220304792920855, "loss": 2.2227, "step": 447470 }, { "epoch": 1.7298325369949437, "grad_norm": 0.13827833533287048, "learning_rate": 0.0006218853458438227, "loss": 2.2121, "step": 447480 }, { "epoch": 1.729871194198327, "grad_norm": 0.12489471584558487, "learning_rate": 0.0006217402276783959, "loss": 2.2203, "step": 447490 }, { "epoch": 1.7299098514017102, "grad_norm": 0.12917496263980865, "learning_rate": 0.0006215951247909779, "loss": 2.2124, "step": 447500 }, { "epoch": 1.7299485086050934, "grad_norm": 0.1222403421998024, "learning_rate": 0.0006214500371767442, "loss": 2.2361, "step": 447510 }, { "epoch": 1.7299871658084767, "grad_norm": 0.11857359856367111, "learning_rate": 0.0006213049648308731, "loss": 2.2152, "step": 447520 }, { "epoch": 1.73002582301186, "grad_norm": 0.12194454669952393, "learning_rate": 0.0006211599077485452, "loss": 2.2267, "step": 447530 }, { "epoch": 1.7300644802152432, "grad_norm": 0.12156818807125092, "learning_rate": 0.0006210148659249442, "loss": 2.2068, "step": 447540 }, { "epoch": 1.7301031374186264, "grad_norm": 0.1164856106042862, "learning_rate": 0.0006208698393552552, "loss": 2.1847, "step": 447550 }, { "epoch": 1.73014179462201, "grad_norm": 0.13245020806789398, "learning_rate": 0.0006207248280346667, "loss": 2.2223, "step": 447560 }, { "epoch": 1.7301804518253932, "grad_norm": 0.11174594610929489, "learning_rate": 0.0006205798319583695, "loss": 2.2077, "step": 447570 }, { "epoch": 1.7302191090287764, "grad_norm": 0.12638911604881287, "learning_rate": 0.0006204348511215571, "loss": 2.2113, "step": 447580 }, { "epoch": 1.7302577662321597, "grad_norm": 0.13453972339630127, "learning_rate": 0.0006202898855194255, "loss": 2.2295, "step": 447590 }, { "epoch": 1.7302964234355431, "grad_norm": 0.12614595890045166, "learning_rate": 0.0006201449351471729, "loss": 2.217, "step": 447600 }, { "epoch": 1.7303350806389264, "grad_norm": 0.12270817905664444, "learning_rate": 0.0006199999999999999, "loss": 2.2219, "step": 447610 }, { "epoch": 1.7303737378423096, "grad_norm": 0.12108409404754639, "learning_rate": 0.0006198550800731107, "loss": 2.2281, "step": 447620 }, { "epoch": 1.7304123950456929, "grad_norm": 0.13020919263362885, "learning_rate": 0.0006197101753617105, "loss": 2.2101, "step": 447630 }, { "epoch": 1.7304510522490761, "grad_norm": 0.12765903770923615, "learning_rate": 0.0006195652858610081, "loss": 2.2209, "step": 447640 }, { "epoch": 1.7304897094524594, "grad_norm": 0.13279370963573456, "learning_rate": 0.0006194204115662148, "loss": 2.2142, "step": 447650 }, { "epoch": 1.7305283666558426, "grad_norm": 0.11896040290594101, "learning_rate": 0.0006192755524725435, "loss": 2.2109, "step": 447660 }, { "epoch": 1.730567023859226, "grad_norm": 0.11061503738164902, "learning_rate": 0.0006191307085752105, "loss": 2.2193, "step": 447670 }, { "epoch": 1.7306056810626091, "grad_norm": 0.11819742619991302, "learning_rate": 0.0006189858798694345, "loss": 2.2226, "step": 447680 }, { "epoch": 1.7306443382659924, "grad_norm": 0.1292245090007782, "learning_rate": 0.0006188410663504363, "loss": 2.2278, "step": 447690 }, { "epoch": 1.7306829954693757, "grad_norm": 0.11678581684827805, "learning_rate": 0.0006186962680134394, "loss": 2.2191, "step": 447700 }, { "epoch": 1.730721652672759, "grad_norm": 0.13057443499565125, "learning_rate": 0.00061855148485367, "loss": 2.2129, "step": 447710 }, { "epoch": 1.7307603098761422, "grad_norm": 0.13378068804740906, "learning_rate": 0.0006184067168663565, "loss": 2.2068, "step": 447720 }, { "epoch": 1.7307989670795256, "grad_norm": 0.12511661648750305, "learning_rate": 0.00061826196404673, "loss": 2.2216, "step": 447730 }, { "epoch": 1.7308376242829089, "grad_norm": 0.1140625923871994, "learning_rate": 0.0006181172263900241, "loss": 2.2254, "step": 447740 }, { "epoch": 1.7308762814862921, "grad_norm": 0.12888774275779724, "learning_rate": 0.0006179725038914747, "loss": 2.2218, "step": 447750 }, { "epoch": 1.7309149386896754, "grad_norm": 0.1261301040649414, "learning_rate": 0.0006178277965463204, "loss": 2.2167, "step": 447760 }, { "epoch": 1.7309535958930589, "grad_norm": 0.12283816933631897, "learning_rate": 0.0006176831043498021, "loss": 2.202, "step": 447770 }, { "epoch": 1.7309922530964421, "grad_norm": 0.12190502136945724, "learning_rate": 0.0006175384272971636, "loss": 2.2091, "step": 447780 }, { "epoch": 1.7310309102998254, "grad_norm": 0.13062943518161774, "learning_rate": 0.0006173937653836506, "loss": 2.2211, "step": 447790 }, { "epoch": 1.7310695675032086, "grad_norm": 0.12369003146886826, "learning_rate": 0.0006172491186045117, "loss": 2.2243, "step": 447800 }, { "epoch": 1.7311082247065919, "grad_norm": 0.1110776886343956, "learning_rate": 0.0006171044869549977, "loss": 2.2149, "step": 447810 }, { "epoch": 1.7311468819099751, "grad_norm": 0.11940490454435349, "learning_rate": 0.0006169598704303623, "loss": 2.2093, "step": 447820 }, { "epoch": 1.7311855391133584, "grad_norm": 0.11867483705282211, "learning_rate": 0.0006168152690258615, "loss": 2.2007, "step": 447830 }, { "epoch": 1.7312241963167416, "grad_norm": 0.1168961450457573, "learning_rate": 0.0006166706827367535, "loss": 2.2347, "step": 447840 }, { "epoch": 1.7312628535201249, "grad_norm": 0.11691093444824219, "learning_rate": 0.0006165261115582992, "loss": 2.2129, "step": 447850 }, { "epoch": 1.7313015107235081, "grad_norm": 0.1283358633518219, "learning_rate": 0.0006163815554857617, "loss": 2.236, "step": 447860 }, { "epoch": 1.7313401679268914, "grad_norm": 0.12356679141521454, "learning_rate": 0.0006162370145144076, "loss": 2.2175, "step": 447870 }, { "epoch": 1.7313788251302746, "grad_norm": 0.11469010263681412, "learning_rate": 0.0006160924886395046, "loss": 2.2193, "step": 447880 }, { "epoch": 1.731417482333658, "grad_norm": 0.11554548144340515, "learning_rate": 0.0006159479778563235, "loss": 2.2131, "step": 447890 }, { "epoch": 1.7314561395370414, "grad_norm": 0.12214815616607666, "learning_rate": 0.0006158034821601379, "loss": 2.2112, "step": 447900 }, { "epoch": 1.7314947967404246, "grad_norm": 0.10934476554393768, "learning_rate": 0.0006156590015462231, "loss": 2.2148, "step": 447910 }, { "epoch": 1.7315334539438079, "grad_norm": 0.1100868284702301, "learning_rate": 0.0006155145360098575, "loss": 2.2088, "step": 447920 }, { "epoch": 1.7315721111471911, "grad_norm": 0.1273280680179596, "learning_rate": 0.0006153700855463218, "loss": 2.2182, "step": 447930 }, { "epoch": 1.7316107683505746, "grad_norm": 0.13480187952518463, "learning_rate": 0.0006152256501508992, "loss": 2.2206, "step": 447940 }, { "epoch": 1.7316494255539578, "grad_norm": 0.11295372247695923, "learning_rate": 0.000615081229818875, "loss": 2.2338, "step": 447950 }, { "epoch": 1.731688082757341, "grad_norm": 0.11729270219802856, "learning_rate": 0.0006149368245455372, "loss": 2.2163, "step": 447960 }, { "epoch": 1.7317267399607243, "grad_norm": 0.11652059853076935, "learning_rate": 0.0006147924343261766, "loss": 2.2358, "step": 447970 }, { "epoch": 1.7317653971641076, "grad_norm": 0.12786738574504852, "learning_rate": 0.0006146480591560857, "loss": 2.2123, "step": 447980 }, { "epoch": 1.7318040543674909, "grad_norm": 0.1158597394824028, "learning_rate": 0.0006145036990305604, "loss": 2.213, "step": 447990 }, { "epoch": 1.731842711570874, "grad_norm": 0.11124974489212036, "learning_rate": 0.0006143593539448982, "loss": 2.2188, "step": 448000 }, { "epoch": 1.7318813687742574, "grad_norm": 0.1128598302602768, "learning_rate": 0.0006142150238943994, "loss": 2.2263, "step": 448010 }, { "epoch": 1.7319200259776406, "grad_norm": 0.11894000321626663, "learning_rate": 0.0006140707088743668, "loss": 2.2184, "step": 448020 }, { "epoch": 1.7319586831810239, "grad_norm": 0.12482871115207672, "learning_rate": 0.0006139264088801058, "loss": 2.2137, "step": 448030 }, { "epoch": 1.7319973403844071, "grad_norm": 0.12437481433153152, "learning_rate": 0.0006137821239069235, "loss": 2.2008, "step": 448040 }, { "epoch": 1.7320359975877904, "grad_norm": 0.11717011779546738, "learning_rate": 0.0006136378539501304, "loss": 2.2206, "step": 448050 }, { "epoch": 1.7320746547911738, "grad_norm": 0.11646430939435959, "learning_rate": 0.0006134935990050387, "loss": 2.2076, "step": 448060 }, { "epoch": 1.732113311994557, "grad_norm": 0.13522832095623016, "learning_rate": 0.0006133493590669639, "loss": 2.2138, "step": 448070 }, { "epoch": 1.7321519691979403, "grad_norm": 0.1385970264673233, "learning_rate": 0.0006132051341312229, "loss": 2.2113, "step": 448080 }, { "epoch": 1.7321906264013236, "grad_norm": 0.1351778656244278, "learning_rate": 0.0006130609241931353, "loss": 2.2277, "step": 448090 }, { "epoch": 1.7322292836047068, "grad_norm": 0.12390823662281036, "learning_rate": 0.0006129167292480238, "loss": 2.2221, "step": 448100 }, { "epoch": 1.7322679408080903, "grad_norm": 0.12284976989030838, "learning_rate": 0.000612772549291213, "loss": 2.2234, "step": 448110 }, { "epoch": 1.7323065980114736, "grad_norm": 0.12573711574077606, "learning_rate": 0.00061262838431803, "loss": 2.2208, "step": 448120 }, { "epoch": 1.7323452552148568, "grad_norm": 0.11867741495370865, "learning_rate": 0.0006124842343238042, "loss": 2.2038, "step": 448130 }, { "epoch": 1.73238391241824, "grad_norm": 0.13029922544956207, "learning_rate": 0.0006123400993038677, "loss": 2.2247, "step": 448140 }, { "epoch": 1.7324225696216233, "grad_norm": 0.13197588920593262, "learning_rate": 0.0006121959792535547, "loss": 2.2099, "step": 448150 }, { "epoch": 1.7324612268250066, "grad_norm": 0.11653072386980057, "learning_rate": 0.0006120518741682022, "loss": 2.2156, "step": 448160 }, { "epoch": 1.7324998840283898, "grad_norm": 0.115585096180439, "learning_rate": 0.0006119077840431493, "loss": 2.1938, "step": 448170 }, { "epoch": 1.732538541231773, "grad_norm": 0.12081106007099152, "learning_rate": 0.0006117637088737378, "loss": 2.2054, "step": 448180 }, { "epoch": 1.7325771984351563, "grad_norm": 0.12495361268520355, "learning_rate": 0.0006116196486553119, "loss": 2.2058, "step": 448190 }, { "epoch": 1.7326158556385396, "grad_norm": 0.12298168241977692, "learning_rate": 0.0006114756033832174, "loss": 2.218, "step": 448200 }, { "epoch": 1.7326545128419228, "grad_norm": 0.11500807851552963, "learning_rate": 0.000611331573052804, "loss": 2.2318, "step": 448210 }, { "epoch": 1.732693170045306, "grad_norm": 0.12313594669103622, "learning_rate": 0.0006111875576594223, "loss": 2.2102, "step": 448220 }, { "epoch": 1.7327318272486896, "grad_norm": 0.11742036789655685, "learning_rate": 0.0006110435571984267, "loss": 2.1994, "step": 448230 }, { "epoch": 1.7327704844520728, "grad_norm": 0.11603717505931854, "learning_rate": 0.000610899571665173, "loss": 2.2122, "step": 448240 }, { "epoch": 1.732809141655456, "grad_norm": 0.11510173976421356, "learning_rate": 0.0006107556010550195, "loss": 2.2255, "step": 448250 }, { "epoch": 1.7328477988588393, "grad_norm": 0.14430029690265656, "learning_rate": 0.0006106116453633275, "loss": 2.2189, "step": 448260 }, { "epoch": 1.7328864560622226, "grad_norm": 0.11953294277191162, "learning_rate": 0.0006104677045854601, "loss": 2.2153, "step": 448270 }, { "epoch": 1.732925113265606, "grad_norm": 0.12118013948202133, "learning_rate": 0.0006103237787167833, "loss": 2.2345, "step": 448280 }, { "epoch": 1.7329637704689893, "grad_norm": 0.11401266604661942, "learning_rate": 0.0006101798677526648, "loss": 2.2085, "step": 448290 }, { "epoch": 1.7330024276723726, "grad_norm": 0.11563297361135483, "learning_rate": 0.0006100359716884758, "loss": 2.2193, "step": 448300 }, { "epoch": 1.7330410848757558, "grad_norm": 0.12193930894136429, "learning_rate": 0.0006098920905195886, "loss": 2.207, "step": 448310 }, { "epoch": 1.733079742079139, "grad_norm": 0.12167327105998993, "learning_rate": 0.0006097482242413785, "loss": 2.2036, "step": 448320 }, { "epoch": 1.7331183992825223, "grad_norm": 0.11306396871805191, "learning_rate": 0.000609604372849224, "loss": 2.2245, "step": 448330 }, { "epoch": 1.7331570564859056, "grad_norm": 0.13491253554821014, "learning_rate": 0.0006094605363385044, "loss": 2.2217, "step": 448340 }, { "epoch": 1.7331957136892888, "grad_norm": 0.12088317424058914, "learning_rate": 0.0006093167147046026, "loss": 2.2149, "step": 448350 }, { "epoch": 1.733234370892672, "grad_norm": 0.12405755370855331, "learning_rate": 0.0006091729079429033, "loss": 2.2224, "step": 448360 }, { "epoch": 1.7332730280960553, "grad_norm": 0.13035620748996735, "learning_rate": 0.0006090291160487937, "loss": 2.2282, "step": 448370 }, { "epoch": 1.7333116852994386, "grad_norm": 0.1224067360162735, "learning_rate": 0.0006088853390176639, "loss": 2.2041, "step": 448380 }, { "epoch": 1.7333503425028218, "grad_norm": 0.11730349063873291, "learning_rate": 0.0006087415768449056, "loss": 2.2087, "step": 448390 }, { "epoch": 1.7333889997062053, "grad_norm": 0.12100856751203537, "learning_rate": 0.0006085978295259131, "loss": 2.2237, "step": 448400 }, { "epoch": 1.7334276569095886, "grad_norm": 0.11594785749912262, "learning_rate": 0.0006084540970560835, "loss": 2.2147, "step": 448410 }, { "epoch": 1.7334663141129718, "grad_norm": 0.12283364683389664, "learning_rate": 0.0006083103794308158, "loss": 2.221, "step": 448420 }, { "epoch": 1.733504971316355, "grad_norm": 0.436133474111557, "learning_rate": 0.0006081666766455116, "loss": 2.2137, "step": 448430 }, { "epoch": 1.7335436285197385, "grad_norm": 0.1321917474269867, "learning_rate": 0.0006080229886955748, "loss": 2.2198, "step": 448440 }, { "epoch": 1.7335822857231218, "grad_norm": 0.13761860132217407, "learning_rate": 0.0006078793155764118, "loss": 2.2164, "step": 448450 }, { "epoch": 1.733620942926505, "grad_norm": 0.1234583705663681, "learning_rate": 0.0006077356572834309, "loss": 2.2194, "step": 448460 }, { "epoch": 1.7336596001298883, "grad_norm": 0.12498248368501663, "learning_rate": 0.0006075920138120436, "loss": 2.2087, "step": 448470 }, { "epoch": 1.7336982573332715, "grad_norm": 0.11420132219791412, "learning_rate": 0.000607448385157663, "loss": 2.2153, "step": 448480 }, { "epoch": 1.7337369145366548, "grad_norm": 0.1190991923213005, "learning_rate": 0.0006073047713157053, "loss": 2.2134, "step": 448490 }, { "epoch": 1.733775571740038, "grad_norm": 0.11640120297670364, "learning_rate": 0.0006071611722815881, "loss": 2.2098, "step": 448500 }, { "epoch": 1.7338142289434213, "grad_norm": 0.11825591325759888, "learning_rate": 0.0006070175880507321, "loss": 2.2108, "step": 448510 }, { "epoch": 1.7338528861468046, "grad_norm": 0.12558484077453613, "learning_rate": 0.0006068740186185602, "loss": 2.2028, "step": 448520 }, { "epoch": 1.7338915433501878, "grad_norm": 0.13272039592266083, "learning_rate": 0.0006067304639804973, "loss": 2.2234, "step": 448530 }, { "epoch": 1.733930200553571, "grad_norm": 0.11638154089450836, "learning_rate": 0.0006065869241319715, "loss": 2.2005, "step": 448540 }, { "epoch": 1.7339688577569543, "grad_norm": 0.1183534637093544, "learning_rate": 0.0006064433990684123, "loss": 2.211, "step": 448550 }, { "epoch": 1.7340075149603376, "grad_norm": 0.12037092447280884, "learning_rate": 0.0006062998887852525, "loss": 2.211, "step": 448560 }, { "epoch": 1.734046172163721, "grad_norm": 0.12053132802248001, "learning_rate": 0.0006061563932779259, "loss": 2.2033, "step": 448570 }, { "epoch": 1.7340848293671043, "grad_norm": 0.14399179816246033, "learning_rate": 0.00060601291254187, "loss": 2.227, "step": 448580 }, { "epoch": 1.7341234865704875, "grad_norm": 0.1422278881072998, "learning_rate": 0.0006058694465725243, "loss": 2.2382, "step": 448590 }, { "epoch": 1.7341621437738708, "grad_norm": 0.1431090086698532, "learning_rate": 0.00060572599536533, "loss": 2.2108, "step": 448600 }, { "epoch": 1.7342008009772543, "grad_norm": 0.13537760078907013, "learning_rate": 0.0006055825589157313, "loss": 2.2011, "step": 448610 }, { "epoch": 1.7342394581806375, "grad_norm": 0.11662229150533676, "learning_rate": 0.0006054391372191745, "loss": 2.2125, "step": 448620 }, { "epoch": 1.7342781153840208, "grad_norm": 0.11179200559854507, "learning_rate": 0.0006052957302711086, "loss": 2.2193, "step": 448630 }, { "epoch": 1.734316772587404, "grad_norm": 0.11571834236383438, "learning_rate": 0.0006051523380669845, "loss": 2.2138, "step": 448640 }, { "epoch": 1.7343554297907873, "grad_norm": 0.13290511071681976, "learning_rate": 0.0006050089606022553, "loss": 2.2322, "step": 448650 }, { "epoch": 1.7343940869941705, "grad_norm": 0.12603338062763214, "learning_rate": 0.0006048655978723772, "loss": 2.2116, "step": 448660 }, { "epoch": 1.7344327441975538, "grad_norm": 0.12034429609775543, "learning_rate": 0.0006047222498728076, "loss": 2.2063, "step": 448670 }, { "epoch": 1.734471401400937, "grad_norm": 0.12012781202793121, "learning_rate": 0.0006045789165990075, "loss": 2.2177, "step": 448680 }, { "epoch": 1.7345100586043203, "grad_norm": 0.16808289289474487, "learning_rate": 0.0006044355980464391, "loss": 2.2166, "step": 448690 }, { "epoch": 1.7345487158077035, "grad_norm": 0.12908385694026947, "learning_rate": 0.000604292294210568, "loss": 2.2184, "step": 448700 }, { "epoch": 1.7345873730110868, "grad_norm": 0.11955581605434418, "learning_rate": 0.000604149005086861, "loss": 2.2119, "step": 448710 }, { "epoch": 1.73462603021447, "grad_norm": 0.12239424139261246, "learning_rate": 0.0006040057306707882, "loss": 2.2173, "step": 448720 }, { "epoch": 1.7346646874178533, "grad_norm": 0.12347353249788284, "learning_rate": 0.0006038624709578215, "loss": 2.2297, "step": 448730 }, { "epoch": 1.7347033446212368, "grad_norm": 0.12630808353424072, "learning_rate": 0.0006037192259434353, "loss": 2.2182, "step": 448740 }, { "epoch": 1.73474200182462, "grad_norm": 0.11457516252994537, "learning_rate": 0.000603575995623106, "loss": 2.2195, "step": 448750 }, { "epoch": 1.7347806590280033, "grad_norm": 0.12003987282514572, "learning_rate": 0.0006034327799923127, "loss": 2.2304, "step": 448760 }, { "epoch": 1.7348193162313865, "grad_norm": 0.12798136472702026, "learning_rate": 0.0006032895790465369, "loss": 2.1916, "step": 448770 }, { "epoch": 1.73485797343477, "grad_norm": 0.1289728581905365, "learning_rate": 0.0006031463927812622, "loss": 2.2131, "step": 448780 }, { "epoch": 1.7348966306381532, "grad_norm": 0.1295253038406372, "learning_rate": 0.0006030032211919743, "loss": 2.2124, "step": 448790 }, { "epoch": 1.7349352878415365, "grad_norm": 0.12527629733085632, "learning_rate": 0.0006028600642741615, "loss": 2.2248, "step": 448800 }, { "epoch": 1.7349739450449198, "grad_norm": 0.12497811764478683, "learning_rate": 0.0006027169220233147, "loss": 2.2218, "step": 448810 }, { "epoch": 1.735012602248303, "grad_norm": 0.1233152449131012, "learning_rate": 0.0006025737944349261, "loss": 2.2148, "step": 448820 }, { "epoch": 1.7350512594516863, "grad_norm": 0.12288574874401093, "learning_rate": 0.0006024306815044916, "loss": 2.2177, "step": 448830 }, { "epoch": 1.7350899166550695, "grad_norm": 0.11841818690299988, "learning_rate": 0.0006022875832275081, "loss": 2.204, "step": 448840 }, { "epoch": 1.7351285738584528, "grad_norm": 0.11461430042982101, "learning_rate": 0.0006021444995994758, "loss": 2.2094, "step": 448850 }, { "epoch": 1.735167231061836, "grad_norm": 0.13102898001670837, "learning_rate": 0.0006020014306158965, "loss": 2.2057, "step": 448860 }, { "epoch": 1.7352058882652193, "grad_norm": 0.12146873772144318, "learning_rate": 0.0006018583762722747, "loss": 2.1905, "step": 448870 }, { "epoch": 1.7352445454686025, "grad_norm": 0.1266041398048401, "learning_rate": 0.0006017153365641172, "loss": 2.213, "step": 448880 }, { "epoch": 1.7352832026719858, "grad_norm": 0.13279256224632263, "learning_rate": 0.0006015723114869329, "loss": 2.2169, "step": 448890 }, { "epoch": 1.735321859875369, "grad_norm": 0.13102956116199493, "learning_rate": 0.0006014293010362331, "loss": 2.2103, "step": 448900 }, { "epoch": 1.7353605170787525, "grad_norm": 0.12755140662193298, "learning_rate": 0.000601286305207531, "loss": 2.2107, "step": 448910 }, { "epoch": 1.7353991742821357, "grad_norm": 0.12142160534858704, "learning_rate": 0.000601143323996343, "loss": 2.2136, "step": 448920 }, { "epoch": 1.735437831485519, "grad_norm": 0.11610239744186401, "learning_rate": 0.0006010003573981873, "loss": 2.2124, "step": 448930 }, { "epoch": 1.7354764886889023, "grad_norm": 0.11581006646156311, "learning_rate": 0.0006008574054085838, "loss": 2.2107, "step": 448940 }, { "epoch": 1.7355151458922857, "grad_norm": 0.1240667849779129, "learning_rate": 0.0006007144680230557, "loss": 2.2187, "step": 448950 }, { "epoch": 1.735553803095669, "grad_norm": 0.12831291556358337, "learning_rate": 0.0006005715452371278, "loss": 2.2144, "step": 448960 }, { "epoch": 1.7355924602990522, "grad_norm": 0.12649348378181458, "learning_rate": 0.0006004286370463277, "loss": 2.2068, "step": 448970 }, { "epoch": 1.7356311175024355, "grad_norm": 0.11840377002954483, "learning_rate": 0.0006002857434461846, "loss": 2.2232, "step": 448980 }, { "epoch": 1.7356697747058187, "grad_norm": 0.12125206738710403, "learning_rate": 0.0006001428644322307, "loss": 2.2108, "step": 448990 }, { "epoch": 1.735708431909202, "grad_norm": 0.12650954723358154, "learning_rate": 0.0006000000000000001, "loss": 2.2199, "step": 449000 }, { "epoch": 1.7357470891125852, "grad_norm": 0.1308407336473465, "learning_rate": 0.000599857150145029, "loss": 2.2146, "step": 449010 }, { "epoch": 1.7357857463159685, "grad_norm": 0.11478245258331299, "learning_rate": 0.0005997143148628563, "loss": 2.2307, "step": 449020 }, { "epoch": 1.7358244035193517, "grad_norm": 0.11808864772319794, "learning_rate": 0.0005995714941490229, "loss": 2.2122, "step": 449030 }, { "epoch": 1.735863060722735, "grad_norm": 0.122349813580513, "learning_rate": 0.0005994286879990723, "loss": 2.2138, "step": 449040 }, { "epoch": 1.7359017179261182, "grad_norm": 0.12328719347715378, "learning_rate": 0.0005992858964085497, "loss": 2.206, "step": 449050 }, { "epoch": 1.7359403751295015, "grad_norm": 0.11808309704065323, "learning_rate": 0.0005991431193730032, "loss": 2.2194, "step": 449060 }, { "epoch": 1.7359790323328848, "grad_norm": 0.12056610733270645, "learning_rate": 0.0005990003568879827, "loss": 2.2169, "step": 449070 }, { "epoch": 1.7360176895362682, "grad_norm": 0.11538299173116684, "learning_rate": 0.0005988576089490406, "loss": 2.2127, "step": 449080 }, { "epoch": 1.7360563467396515, "grad_norm": 0.1362743377685547, "learning_rate": 0.0005987148755517314, "loss": 2.2164, "step": 449090 }, { "epoch": 1.7360950039430347, "grad_norm": 0.13896846771240234, "learning_rate": 0.0005985721566916121, "loss": 2.2146, "step": 449100 }, { "epoch": 1.736133661146418, "grad_norm": 0.12329170852899551, "learning_rate": 0.0005984294523642415, "loss": 2.211, "step": 449110 }, { "epoch": 1.7361723183498015, "grad_norm": 0.14520715177059174, "learning_rate": 0.0005982867625651813, "loss": 2.221, "step": 449120 }, { "epoch": 1.7362109755531847, "grad_norm": 0.12350068241357803, "learning_rate": 0.0005981440872899954, "loss": 2.2143, "step": 449130 }, { "epoch": 1.736249632756568, "grad_norm": 0.12957386672496796, "learning_rate": 0.0005980014265342492, "loss": 2.2216, "step": 449140 }, { "epoch": 1.7362882899599512, "grad_norm": 0.11654391139745712, "learning_rate": 0.0005978587802935112, "loss": 2.2209, "step": 449150 }, { "epoch": 1.7363269471633345, "grad_norm": 0.12524645030498505, "learning_rate": 0.0005977161485633518, "loss": 2.2174, "step": 449160 }, { "epoch": 1.7363656043667177, "grad_norm": 0.12301818281412125, "learning_rate": 0.0005975735313393433, "loss": 2.2034, "step": 449170 }, { "epoch": 1.736404261570101, "grad_norm": 0.11993245035409927, "learning_rate": 0.000597430928617061, "loss": 2.2224, "step": 449180 }, { "epoch": 1.7364429187734842, "grad_norm": 0.12051360309123993, "learning_rate": 0.0005972883403920819, "loss": 2.2104, "step": 449190 }, { "epoch": 1.7364815759768675, "grad_norm": 0.128607839345932, "learning_rate": 0.0005971457666599856, "loss": 2.1977, "step": 449200 }, { "epoch": 1.7365202331802507, "grad_norm": 0.2069489061832428, "learning_rate": 0.0005970032074163534, "loss": 2.2098, "step": 449210 }, { "epoch": 1.736558890383634, "grad_norm": 0.11907302588224411, "learning_rate": 0.0005968606626567695, "loss": 2.1977, "step": 449220 }, { "epoch": 1.7365975475870172, "grad_norm": 0.13132734596729279, "learning_rate": 0.00059671813237682, "loss": 2.2094, "step": 449230 }, { "epoch": 1.7366362047904005, "grad_norm": 0.2081662267446518, "learning_rate": 0.0005965756165720932, "loss": 2.2004, "step": 449240 }, { "epoch": 1.736674861993784, "grad_norm": 0.1204073578119278, "learning_rate": 0.0005964331152381802, "loss": 2.2146, "step": 449250 }, { "epoch": 1.7367135191971672, "grad_norm": 0.12416879087686539, "learning_rate": 0.0005962906283706731, "loss": 2.1914, "step": 449260 }, { "epoch": 1.7367521764005505, "grad_norm": 0.11044029891490936, "learning_rate": 0.0005961481559651673, "loss": 2.2234, "step": 449270 }, { "epoch": 1.7367908336039337, "grad_norm": 0.11880560219287872, "learning_rate": 0.0005960056980172606, "loss": 2.2078, "step": 449280 }, { "epoch": 1.7368294908073172, "grad_norm": 0.13040591776371002, "learning_rate": 0.000595863254522552, "loss": 2.2239, "step": 449290 }, { "epoch": 1.7368681480107004, "grad_norm": 0.12390241771936417, "learning_rate": 0.0005957208254766433, "loss": 2.1982, "step": 449300 }, { "epoch": 1.7369068052140837, "grad_norm": 0.11649197340011597, "learning_rate": 0.000595578410875139, "loss": 2.2188, "step": 449310 }, { "epoch": 1.736945462417467, "grad_norm": 0.13983146846294403, "learning_rate": 0.000595436010713645, "loss": 2.2209, "step": 449320 }, { "epoch": 1.7369841196208502, "grad_norm": 0.12697160243988037, "learning_rate": 0.0005952936249877699, "loss": 2.2083, "step": 449330 }, { "epoch": 1.7370227768242334, "grad_norm": 0.13057808578014374, "learning_rate": 0.0005951512536931245, "loss": 2.2, "step": 449340 }, { "epoch": 1.7370614340276167, "grad_norm": 0.1150062158703804, "learning_rate": 0.0005950088968253215, "loss": 2.2219, "step": 449350 }, { "epoch": 1.737100091231, "grad_norm": 0.12175679206848145, "learning_rate": 0.0005948665543799763, "loss": 2.2185, "step": 449360 }, { "epoch": 1.7371387484343832, "grad_norm": 0.12388991564512253, "learning_rate": 0.0005947242263527061, "loss": 2.2012, "step": 449370 }, { "epoch": 1.7371774056377665, "grad_norm": 0.12117626518011093, "learning_rate": 0.0005945819127391308, "loss": 2.2063, "step": 449380 }, { "epoch": 1.7372160628411497, "grad_norm": 0.14415040612220764, "learning_rate": 0.000594439613534872, "loss": 2.2086, "step": 449390 }, { "epoch": 1.737254720044533, "grad_norm": 0.1306789666414261, "learning_rate": 0.0005942973287355538, "loss": 2.2124, "step": 449400 }, { "epoch": 1.7372933772479162, "grad_norm": 0.11676350980997086, "learning_rate": 0.0005941550583368023, "loss": 2.224, "step": 449410 }, { "epoch": 1.7373320344512997, "grad_norm": 0.14833512902259827, "learning_rate": 0.0005940128023342461, "loss": 2.2216, "step": 449420 }, { "epoch": 1.737370691654683, "grad_norm": 0.11934653669595718, "learning_rate": 0.0005938705607235157, "loss": 2.2032, "step": 449430 }, { "epoch": 1.7374093488580662, "grad_norm": 0.11536265164613724, "learning_rate": 0.0005937283335002443, "loss": 2.226, "step": 449440 }, { "epoch": 1.7374480060614494, "grad_norm": 0.11779036372900009, "learning_rate": 0.0005935861206600668, "loss": 2.2144, "step": 449450 }, { "epoch": 1.737486663264833, "grad_norm": 0.13895969092845917, "learning_rate": 0.0005934439221986208, "loss": 2.2101, "step": 449460 }, { "epoch": 1.7375253204682162, "grad_norm": 0.1290643811225891, "learning_rate": 0.000593301738111545, "loss": 2.21, "step": 449470 }, { "epoch": 1.7375639776715994, "grad_norm": 0.1520080864429474, "learning_rate": 0.0005931595683944822, "loss": 2.2248, "step": 449480 }, { "epoch": 1.7376026348749827, "grad_norm": 0.12581993639469147, "learning_rate": 0.0005930174130430755, "loss": 2.1992, "step": 449490 }, { "epoch": 1.737641292078366, "grad_norm": 0.12312814593315125, "learning_rate": 0.0005928752720529711, "loss": 2.2185, "step": 449500 }, { "epoch": 1.7376799492817492, "grad_norm": 0.12611688673496246, "learning_rate": 0.0005927331454198177, "loss": 2.2151, "step": 449510 }, { "epoch": 1.7377186064851324, "grad_norm": 0.13197670876979828, "learning_rate": 0.0005925910331392657, "loss": 2.2227, "step": 449520 }, { "epoch": 1.7377572636885157, "grad_norm": 0.13880470395088196, "learning_rate": 0.0005924489352069673, "loss": 2.2194, "step": 449530 }, { "epoch": 1.737795920891899, "grad_norm": 0.11517778784036636, "learning_rate": 0.0005923068516185781, "loss": 2.217, "step": 449540 }, { "epoch": 1.7378345780952822, "grad_norm": 0.12511885166168213, "learning_rate": 0.0005921647823697547, "loss": 2.2034, "step": 449550 }, { "epoch": 1.7378732352986654, "grad_norm": 0.11904377490282059, "learning_rate": 0.0005920227274561567, "loss": 2.207, "step": 449560 }, { "epoch": 1.7379118925020487, "grad_norm": 0.12568025290966034, "learning_rate": 0.0005918806868734454, "loss": 2.2043, "step": 449570 }, { "epoch": 1.737950549705432, "grad_norm": 0.13138815760612488, "learning_rate": 0.0005917386606172844, "loss": 2.2226, "step": 449580 }, { "epoch": 1.7379892069088154, "grad_norm": 0.1285579949617386, "learning_rate": 0.0005915966486833397, "loss": 2.2, "step": 449590 }, { "epoch": 1.7380278641121987, "grad_norm": 0.12767644226551056, "learning_rate": 0.0005914546510672793, "loss": 2.2297, "step": 449600 }, { "epoch": 1.738066521315582, "grad_norm": 0.11568326503038406, "learning_rate": 0.0005913126677647734, "loss": 2.2072, "step": 449610 }, { "epoch": 1.7381051785189652, "grad_norm": 0.13093574345111847, "learning_rate": 0.0005911706987714942, "loss": 2.2219, "step": 449620 }, { "epoch": 1.7381438357223487, "grad_norm": 0.11886344105005264, "learning_rate": 0.0005910287440831166, "loss": 2.2273, "step": 449630 }, { "epoch": 1.738182492925732, "grad_norm": 0.1266063153743744, "learning_rate": 0.000590886803695317, "loss": 2.213, "step": 449640 }, { "epoch": 1.7382211501291152, "grad_norm": 0.130299374461174, "learning_rate": 0.0005907448776037747, "loss": 2.2096, "step": 449650 }, { "epoch": 1.7382598073324984, "grad_norm": 0.1166926845908165, "learning_rate": 0.0005906029658041705, "loss": 2.2258, "step": 449660 }, { "epoch": 1.7382984645358817, "grad_norm": 0.1253158152103424, "learning_rate": 0.000590461068292188, "loss": 2.2087, "step": 449670 }, { "epoch": 1.738337121739265, "grad_norm": 0.11593757569789886, "learning_rate": 0.0005903191850635123, "loss": 2.2053, "step": 449680 }, { "epoch": 1.7383757789426482, "grad_norm": 0.12687519192695618, "learning_rate": 0.0005901773161138313, "loss": 2.2177, "step": 449690 }, { "epoch": 1.7384144361460314, "grad_norm": 0.12391290068626404, "learning_rate": 0.0005900354614388346, "loss": 2.2005, "step": 449700 }, { "epoch": 1.7384530933494147, "grad_norm": 0.12147536873817444, "learning_rate": 0.0005898936210342143, "loss": 2.1999, "step": 449710 }, { "epoch": 1.738491750552798, "grad_norm": 0.1351267397403717, "learning_rate": 0.0005897517948956646, "loss": 2.2156, "step": 449720 }, { "epoch": 1.7385304077561812, "grad_norm": 0.14201191067695618, "learning_rate": 0.0005896099830188815, "loss": 2.2097, "step": 449730 }, { "epoch": 1.7385690649595644, "grad_norm": 0.12384200096130371, "learning_rate": 0.0005894681853995635, "loss": 2.2043, "step": 449740 }, { "epoch": 1.7386077221629477, "grad_norm": 0.11880270391702652, "learning_rate": 0.0005893264020334115, "loss": 2.2133, "step": 449750 }, { "epoch": 1.7386463793663312, "grad_norm": 0.12355902045965195, "learning_rate": 0.0005891846329161281, "loss": 2.2097, "step": 449760 }, { "epoch": 1.7386850365697144, "grad_norm": 0.12222316116094589, "learning_rate": 0.0005890428780434185, "loss": 2.2234, "step": 449770 }, { "epoch": 1.7387236937730977, "grad_norm": 0.11164572834968567, "learning_rate": 0.0005889011374109891, "loss": 2.2085, "step": 449780 }, { "epoch": 1.738762350976481, "grad_norm": 0.1121785044670105, "learning_rate": 0.00058875941101455, "loss": 2.2242, "step": 449790 }, { "epoch": 1.7388010081798644, "grad_norm": 0.1273416131734848, "learning_rate": 0.0005886176988498119, "loss": 2.1992, "step": 449800 }, { "epoch": 1.7388396653832476, "grad_norm": 0.14521710574626923, "learning_rate": 0.0005884760009124889, "loss": 2.2091, "step": 449810 }, { "epoch": 1.7388783225866309, "grad_norm": 0.12968093156814575, "learning_rate": 0.0005883343171982964, "loss": 2.2175, "step": 449820 }, { "epoch": 1.7389169797900141, "grad_norm": 0.1272241622209549, "learning_rate": 0.0005881926477029524, "loss": 2.2064, "step": 449830 }, { "epoch": 1.7389556369933974, "grad_norm": 0.12387470155954361, "learning_rate": 0.0005880509924221767, "loss": 2.2229, "step": 449840 }, { "epoch": 1.7389942941967806, "grad_norm": 0.13222984969615936, "learning_rate": 0.0005879093513516917, "loss": 2.2201, "step": 449850 }, { "epoch": 1.739032951400164, "grad_norm": 0.12421976029872894, "learning_rate": 0.0005877677244872217, "loss": 2.2115, "step": 449860 }, { "epoch": 1.7390716086035471, "grad_norm": 0.12273195385932922, "learning_rate": 0.000587626111824493, "loss": 2.2061, "step": 449870 }, { "epoch": 1.7391102658069304, "grad_norm": 0.1302752047777176, "learning_rate": 0.0005874845133592339, "loss": 2.2158, "step": 449880 }, { "epoch": 1.7391489230103137, "grad_norm": 0.1115131750702858, "learning_rate": 0.0005873429290871759, "loss": 2.201, "step": 449890 }, { "epoch": 1.739187580213697, "grad_norm": 0.1338716298341751, "learning_rate": 0.0005872013590040513, "loss": 2.2057, "step": 449900 }, { "epoch": 1.7392262374170802, "grad_norm": 0.12222873419523239, "learning_rate": 0.000587059803105595, "loss": 2.2239, "step": 449910 }, { "epoch": 1.7392648946204636, "grad_norm": 0.12289751321077347, "learning_rate": 0.0005869182613875446, "loss": 2.1973, "step": 449920 }, { "epoch": 1.7393035518238469, "grad_norm": 0.12132169306278229, "learning_rate": 0.0005867767338456389, "loss": 2.1977, "step": 449930 }, { "epoch": 1.7393422090272301, "grad_norm": 0.12245948612689972, "learning_rate": 0.0005866352204756198, "loss": 2.2134, "step": 449940 }, { "epoch": 1.7393808662306134, "grad_norm": 0.1194300726056099, "learning_rate": 0.0005864937212732304, "loss": 2.2151, "step": 449950 }, { "epoch": 1.7394195234339966, "grad_norm": 0.129800483584404, "learning_rate": 0.0005863522362342166, "loss": 2.2056, "step": 449960 }, { "epoch": 1.7394581806373801, "grad_norm": 0.11177778989076614, "learning_rate": 0.0005862107653543262, "loss": 2.2063, "step": 449970 }, { "epoch": 1.7394968378407634, "grad_norm": 0.11829909682273865, "learning_rate": 0.000586069308629309, "loss": 2.2196, "step": 449980 }, { "epoch": 1.7395354950441466, "grad_norm": 0.12138839066028595, "learning_rate": 0.0005859278660549172, "loss": 2.2104, "step": 449990 }, { "epoch": 1.7395741522475299, "grad_norm": 0.12623794376850128, "learning_rate": 0.0005857864376269049, "loss": 2.2161, "step": 450000 }, { "epoch": 1.7396128094509131, "grad_norm": 0.1256103515625, "learning_rate": 0.0005856450233410284, "loss": 2.2011, "step": 450010 }, { "epoch": 1.7396514666542964, "grad_norm": 0.11964461952447891, "learning_rate": 0.0005855036231930461, "loss": 2.2061, "step": 450020 }, { "epoch": 1.7396901238576796, "grad_norm": 0.11546836048364639, "learning_rate": 0.0005853622371787186, "loss": 2.2067, "step": 450030 }, { "epoch": 1.7397287810610629, "grad_norm": 0.1272667646408081, "learning_rate": 0.0005852208652938085, "loss": 2.1995, "step": 450040 }, { "epoch": 1.7397674382644461, "grad_norm": 0.13499830663204193, "learning_rate": 0.0005850795075340807, "loss": 2.2311, "step": 450050 }, { "epoch": 1.7398060954678294, "grad_norm": 0.1112581118941307, "learning_rate": 0.0005849381638953018, "loss": 2.1999, "step": 450060 }, { "epoch": 1.7398447526712126, "grad_norm": 0.12504318356513977, "learning_rate": 0.0005847968343732408, "loss": 2.2002, "step": 450070 }, { "epoch": 1.7398834098745959, "grad_norm": 0.12915337085723877, "learning_rate": 0.0005846555189636694, "loss": 2.2103, "step": 450080 }, { "epoch": 1.7399220670779794, "grad_norm": 0.12873782217502594, "learning_rate": 0.0005845142176623603, "loss": 2.2116, "step": 450090 }, { "epoch": 1.7399607242813626, "grad_norm": 0.12179477512836456, "learning_rate": 0.0005843729304650889, "loss": 2.2131, "step": 450100 }, { "epoch": 1.7399993814847459, "grad_norm": 0.11743723601102829, "learning_rate": 0.0005842316573676327, "loss": 2.208, "step": 450110 }, { "epoch": 1.7400380386881291, "grad_norm": 0.12051267176866531, "learning_rate": 0.0005840903983657715, "loss": 2.216, "step": 450120 }, { "epoch": 1.7400766958915124, "grad_norm": 0.13697391748428345, "learning_rate": 0.0005839491534552865, "loss": 2.2053, "step": 450130 }, { "epoch": 1.7401153530948958, "grad_norm": 0.12576167285442352, "learning_rate": 0.0005838079226319617, "loss": 2.2214, "step": 450140 }, { "epoch": 1.740154010298279, "grad_norm": 0.12356912344694138, "learning_rate": 0.000583666705891583, "loss": 2.2187, "step": 450150 }, { "epoch": 1.7401926675016623, "grad_norm": 0.1240406483411789, "learning_rate": 0.0005835255032299381, "loss": 2.2062, "step": 450160 }, { "epoch": 1.7402313247050456, "grad_norm": 0.12559963762760162, "learning_rate": 0.0005833843146428175, "loss": 2.2052, "step": 450170 }, { "epoch": 1.7402699819084289, "grad_norm": 0.1147528812289238, "learning_rate": 0.000583243140126013, "loss": 2.2193, "step": 450180 }, { "epoch": 1.740308639111812, "grad_norm": 0.12529990077018738, "learning_rate": 0.000583101979675319, "loss": 2.2089, "step": 450190 }, { "epoch": 1.7403472963151954, "grad_norm": 0.1308630257844925, "learning_rate": 0.0005829608332865319, "loss": 2.2181, "step": 450200 }, { "epoch": 1.7403859535185786, "grad_norm": 0.12569352984428406, "learning_rate": 0.0005828197009554501, "loss": 2.2027, "step": 450210 }, { "epoch": 1.7404246107219619, "grad_norm": 0.1284223347902298, "learning_rate": 0.000582678582677874, "loss": 2.2122, "step": 450220 }, { "epoch": 1.7404632679253451, "grad_norm": 0.12393103539943695, "learning_rate": 0.0005825374784496065, "loss": 2.2113, "step": 450230 }, { "epoch": 1.7405019251287284, "grad_norm": 0.11839126795530319, "learning_rate": 0.000582396388266452, "loss": 2.2164, "step": 450240 }, { "epoch": 1.7405405823321116, "grad_norm": 0.12693467736244202, "learning_rate": 0.0005822553121242176, "loss": 2.2165, "step": 450250 }, { "epoch": 1.740579239535495, "grad_norm": 0.11685807257890701, "learning_rate": 0.0005821142500187118, "loss": 2.2087, "step": 450260 }, { "epoch": 1.7406178967388783, "grad_norm": 0.12128729373216629, "learning_rate": 0.000581973201945746, "loss": 2.2145, "step": 450270 }, { "epoch": 1.7406565539422616, "grad_norm": 0.12367042899131775, "learning_rate": 0.0005818321679011331, "loss": 2.2069, "step": 450280 }, { "epoch": 1.7406952111456448, "grad_norm": 0.12804968655109406, "learning_rate": 0.0005816911478806881, "loss": 2.2099, "step": 450290 }, { "epoch": 1.7407338683490283, "grad_norm": 0.12371846288442612, "learning_rate": 0.0005815501418802285, "loss": 2.2068, "step": 450300 }, { "epoch": 1.7407725255524116, "grad_norm": 0.13237400352954865, "learning_rate": 0.0005814091498955733, "loss": 2.2264, "step": 450310 }, { "epoch": 1.7408111827557948, "grad_norm": 0.12525179982185364, "learning_rate": 0.0005812681719225441, "loss": 2.2056, "step": 450320 }, { "epoch": 1.740849839959178, "grad_norm": 0.1195155680179596, "learning_rate": 0.0005811272079569642, "loss": 2.2177, "step": 450330 }, { "epoch": 1.7408884971625613, "grad_norm": 0.1262855976819992, "learning_rate": 0.0005809862579946592, "loss": 2.1999, "step": 450340 }, { "epoch": 1.7409271543659446, "grad_norm": 0.13950762152671814, "learning_rate": 0.0005808453220314567, "loss": 2.2182, "step": 450350 }, { "epoch": 1.7409658115693278, "grad_norm": 0.12949655950069427, "learning_rate": 0.0005807044000631863, "loss": 2.2177, "step": 450360 }, { "epoch": 1.741004468772711, "grad_norm": 0.11451691389083862, "learning_rate": 0.0005805634920856797, "loss": 2.21, "step": 450370 }, { "epoch": 1.7410431259760943, "grad_norm": 0.11944366991519928, "learning_rate": 0.000580422598094771, "loss": 2.2095, "step": 450380 }, { "epoch": 1.7410817831794776, "grad_norm": 0.13329607248306274, "learning_rate": 0.0005802817180862958, "loss": 2.213, "step": 450390 }, { "epoch": 1.7411204403828608, "grad_norm": 0.1350400447845459, "learning_rate": 0.0005801408520560923, "loss": 2.2152, "step": 450400 }, { "epoch": 1.741159097586244, "grad_norm": 0.12450750172138214, "learning_rate": 0.0005800000000000001, "loss": 2.2142, "step": 450410 }, { "epoch": 1.7411977547896273, "grad_norm": 0.11537111550569534, "learning_rate": 0.0005798591619138616, "loss": 2.209, "step": 450420 }, { "epoch": 1.7412364119930108, "grad_norm": 0.11340397596359253, "learning_rate": 0.0005797183377935207, "loss": 2.2062, "step": 450430 }, { "epoch": 1.741275069196394, "grad_norm": 0.12494982779026031, "learning_rate": 0.0005795775276348238, "loss": 2.2113, "step": 450440 }, { "epoch": 1.7413137263997773, "grad_norm": 0.12394999712705612, "learning_rate": 0.0005794367314336191, "loss": 2.2185, "step": 450450 }, { "epoch": 1.7413523836031606, "grad_norm": 0.13133755326271057, "learning_rate": 0.0005792959491857565, "loss": 2.2064, "step": 450460 }, { "epoch": 1.741391040806544, "grad_norm": 0.13078813254833221, "learning_rate": 0.0005791551808870892, "loss": 2.2135, "step": 450470 }, { "epoch": 1.7414296980099273, "grad_norm": 0.12275271862745285, "learning_rate": 0.0005790144265334712, "loss": 2.2116, "step": 450480 }, { "epoch": 1.7414683552133106, "grad_norm": 0.12438642978668213, "learning_rate": 0.0005788736861207587, "loss": 2.2078, "step": 450490 }, { "epoch": 1.7415070124166938, "grad_norm": 0.13157707452774048, "learning_rate": 0.0005787329596448105, "loss": 2.1979, "step": 450500 }, { "epoch": 1.741545669620077, "grad_norm": 0.12612107396125793, "learning_rate": 0.000578592247101487, "loss": 2.2122, "step": 450510 }, { "epoch": 1.7415843268234603, "grad_norm": 0.12490899860858917, "learning_rate": 0.0005784515484866513, "loss": 2.2124, "step": 450520 }, { "epoch": 1.7416229840268436, "grad_norm": 0.1354866623878479, "learning_rate": 0.0005783108637961674, "loss": 2.2024, "step": 450530 }, { "epoch": 1.7416616412302268, "grad_norm": 0.11852476745843887, "learning_rate": 0.0005781701930259023, "loss": 2.2092, "step": 450540 }, { "epoch": 1.74170029843361, "grad_norm": 0.11969569325447083, "learning_rate": 0.0005780295361717249, "loss": 2.2063, "step": 450550 }, { "epoch": 1.7417389556369933, "grad_norm": 0.12728244066238403, "learning_rate": 0.0005778888932295057, "loss": 2.2148, "step": 450560 }, { "epoch": 1.7417776128403766, "grad_norm": 0.1304050236940384, "learning_rate": 0.0005777482641951179, "loss": 2.2052, "step": 450570 }, { "epoch": 1.7418162700437598, "grad_norm": 0.12834100425243378, "learning_rate": 0.0005776076490644362, "loss": 2.206, "step": 450580 }, { "epoch": 1.741854927247143, "grad_norm": 0.12126423418521881, "learning_rate": 0.0005774670478333375, "loss": 2.2091, "step": 450590 }, { "epoch": 1.7418935844505266, "grad_norm": 0.1424793004989624, "learning_rate": 0.0005773264604977009, "loss": 2.2037, "step": 450600 }, { "epoch": 1.7419322416539098, "grad_norm": 0.14608636498451233, "learning_rate": 0.000577185887053407, "loss": 2.2184, "step": 450610 }, { "epoch": 1.741970898857293, "grad_norm": 0.120540089905262, "learning_rate": 0.0005770453274963394, "loss": 2.2063, "step": 450620 }, { "epoch": 1.7420095560606763, "grad_norm": 0.12794199585914612, "learning_rate": 0.0005769047818223827, "loss": 2.221, "step": 450630 }, { "epoch": 1.7420482132640598, "grad_norm": 0.11833221465349197, "learning_rate": 0.0005767642500274243, "loss": 2.2074, "step": 450640 }, { "epoch": 1.742086870467443, "grad_norm": 0.12499483674764633, "learning_rate": 0.0005766237321073533, "loss": 2.2001, "step": 450650 }, { "epoch": 1.7421255276708263, "grad_norm": 0.17976893484592438, "learning_rate": 0.0005764832280580603, "loss": 2.2064, "step": 450660 }, { "epoch": 1.7421641848742095, "grad_norm": 0.13779857754707336, "learning_rate": 0.0005763427378754393, "loss": 2.1962, "step": 450670 }, { "epoch": 1.7422028420775928, "grad_norm": 0.12299429625272751, "learning_rate": 0.000576202261555385, "loss": 2.2107, "step": 450680 }, { "epoch": 1.742241499280976, "grad_norm": 0.12656106054782867, "learning_rate": 0.0005760617990937948, "loss": 2.2029, "step": 450690 }, { "epoch": 1.7422801564843593, "grad_norm": 0.13690079748630524, "learning_rate": 0.0005759213504865681, "loss": 2.2119, "step": 450700 }, { "epoch": 1.7423188136877426, "grad_norm": 0.12545296549797058, "learning_rate": 0.0005757809157296059, "loss": 2.2108, "step": 450710 }, { "epoch": 1.7423574708911258, "grad_norm": 0.1948815882205963, "learning_rate": 0.0005756404948188117, "loss": 2.2058, "step": 450720 }, { "epoch": 1.742396128094509, "grad_norm": 0.1338101625442505, "learning_rate": 0.0005755000877500905, "loss": 2.2384, "step": 450730 }, { "epoch": 1.7424347852978923, "grad_norm": 0.11879181861877441, "learning_rate": 0.00057535969451935, "loss": 2.2143, "step": 450740 }, { "epoch": 1.7424734425012756, "grad_norm": 0.11900530010461807, "learning_rate": 0.0005752193151224994, "loss": 2.2085, "step": 450750 }, { "epoch": 1.7425120997046588, "grad_norm": 0.12955817580223083, "learning_rate": 0.0005750789495554498, "loss": 2.2031, "step": 450760 }, { "epoch": 1.7425507569080423, "grad_norm": 0.11215945333242416, "learning_rate": 0.0005749385978141153, "loss": 2.21, "step": 450770 }, { "epoch": 1.7425894141114255, "grad_norm": 0.11732306331396103, "learning_rate": 0.0005747982598944106, "loss": 2.226, "step": 450780 }, { "epoch": 1.7426280713148088, "grad_norm": 0.12146113067865372, "learning_rate": 0.0005746579357922535, "loss": 2.2001, "step": 450790 }, { "epoch": 1.742666728518192, "grad_norm": 0.13044078648090363, "learning_rate": 0.0005745176255035631, "loss": 2.2073, "step": 450800 }, { "epoch": 1.7427053857215755, "grad_norm": 0.12934516370296478, "learning_rate": 0.0005743773290242611, "loss": 2.2063, "step": 450810 }, { "epoch": 1.7427440429249588, "grad_norm": 0.1249857172369957, "learning_rate": 0.0005742370463502709, "loss": 2.2067, "step": 450820 }, { "epoch": 1.742782700128342, "grad_norm": 0.1371041238307953, "learning_rate": 0.0005740967774775177, "loss": 2.2142, "step": 450830 }, { "epoch": 1.7428213573317253, "grad_norm": 0.12729176878929138, "learning_rate": 0.0005739565224019289, "loss": 2.2039, "step": 450840 }, { "epoch": 1.7428600145351085, "grad_norm": 0.12598155438899994, "learning_rate": 0.0005738162811194345, "loss": 2.2005, "step": 450850 }, { "epoch": 1.7428986717384918, "grad_norm": 0.1471673548221588, "learning_rate": 0.0005736760536259653, "loss": 2.2131, "step": 450860 }, { "epoch": 1.742937328941875, "grad_norm": 0.11984395235776901, "learning_rate": 0.000573535839917455, "loss": 2.2269, "step": 450870 }, { "epoch": 1.7429759861452583, "grad_norm": 0.12927258014678955, "learning_rate": 0.0005733956399898392, "loss": 2.1974, "step": 450880 }, { "epoch": 1.7430146433486415, "grad_norm": 0.13765715062618256, "learning_rate": 0.0005732554538390553, "loss": 2.2144, "step": 450890 }, { "epoch": 1.7430533005520248, "grad_norm": 0.13073231279850006, "learning_rate": 0.0005731152814610425, "loss": 2.2167, "step": 450900 }, { "epoch": 1.743091957755408, "grad_norm": 0.11769286543130875, "learning_rate": 0.0005729751228517423, "loss": 2.2048, "step": 450910 }, { "epoch": 1.7431306149587913, "grad_norm": 0.13146555423736572, "learning_rate": 0.0005728349780070983, "loss": 2.211, "step": 450920 }, { "epoch": 1.7431692721621745, "grad_norm": 0.120726577937603, "learning_rate": 0.0005726948469230556, "loss": 2.2024, "step": 450930 }, { "epoch": 1.743207929365558, "grad_norm": 0.1122736856341362, "learning_rate": 0.0005725547295955618, "loss": 2.2047, "step": 450940 }, { "epoch": 1.7432465865689413, "grad_norm": 0.13733603060245514, "learning_rate": 0.0005724146260205663, "loss": 2.2107, "step": 450950 }, { "epoch": 1.7432852437723245, "grad_norm": 0.1244693174958229, "learning_rate": 0.0005722745361940203, "loss": 2.2065, "step": 450960 }, { "epoch": 1.7433239009757078, "grad_norm": 0.12130724638700485, "learning_rate": 0.0005721344601118772, "loss": 2.2024, "step": 450970 }, { "epoch": 1.7433625581790912, "grad_norm": 0.1321234107017517, "learning_rate": 0.0005719943977700927, "loss": 2.2016, "step": 450980 }, { "epoch": 1.7434012153824745, "grad_norm": 0.12718746066093445, "learning_rate": 0.000571854349164624, "loss": 2.2084, "step": 450990 }, { "epoch": 1.7434398725858578, "grad_norm": 0.3911479413509369, "learning_rate": 0.0005717143142914301, "loss": 2.2242, "step": 451000 }, { "epoch": 1.743478529789241, "grad_norm": 0.13979850709438324, "learning_rate": 0.0005715742931464724, "loss": 2.2095, "step": 451010 }, { "epoch": 1.7435171869926243, "grad_norm": 0.1296544075012207, "learning_rate": 0.0005714342857257145, "loss": 2.2119, "step": 451020 }, { "epoch": 1.7435558441960075, "grad_norm": 0.12337645888328552, "learning_rate": 0.0005712942920251212, "loss": 2.212, "step": 451030 }, { "epoch": 1.7435945013993908, "grad_norm": 0.12149360775947571, "learning_rate": 0.00057115431204066, "loss": 2.1986, "step": 451040 }, { "epoch": 1.743633158602774, "grad_norm": 0.1235906183719635, "learning_rate": 0.0005710143457683, "loss": 2.2207, "step": 451050 }, { "epoch": 1.7436718158061573, "grad_norm": 0.12550729513168335, "learning_rate": 0.0005708743932040124, "loss": 2.198, "step": 451060 }, { "epoch": 1.7437104730095405, "grad_norm": 0.12192264944314957, "learning_rate": 0.0005707344543437702, "loss": 2.2223, "step": 451070 }, { "epoch": 1.7437491302129238, "grad_norm": 0.11551124602556229, "learning_rate": 0.0005705945291835491, "loss": 2.2179, "step": 451080 }, { "epoch": 1.743787787416307, "grad_norm": 0.12643487751483917, "learning_rate": 0.0005704546177193255, "loss": 2.199, "step": 451090 }, { "epoch": 1.7438264446196903, "grad_norm": 0.1214127466082573, "learning_rate": 0.0005703147199470787, "loss": 2.2017, "step": 451100 }, { "epoch": 1.7438651018230737, "grad_norm": 0.125314861536026, "learning_rate": 0.0005701748358627898, "loss": 2.2147, "step": 451110 }, { "epoch": 1.743903759026457, "grad_norm": 0.1230613961815834, "learning_rate": 0.000570034965462442, "loss": 2.2113, "step": 451120 }, { "epoch": 1.7439424162298403, "grad_norm": 0.12328998744487762, "learning_rate": 0.0005698951087420197, "loss": 2.2204, "step": 451130 }, { "epoch": 1.7439810734332235, "grad_norm": 0.3015275001525879, "learning_rate": 0.0005697552656975102, "loss": 2.2021, "step": 451140 }, { "epoch": 1.744019730636607, "grad_norm": 0.13240598142147064, "learning_rate": 0.0005696154363249022, "loss": 2.2112, "step": 451150 }, { "epoch": 1.7440583878399902, "grad_norm": 0.12521402537822723, "learning_rate": 0.0005694756206201866, "loss": 2.2096, "step": 451160 }, { "epoch": 1.7440970450433735, "grad_norm": 0.1339920163154602, "learning_rate": 0.0005693358185793564, "loss": 2.1992, "step": 451170 }, { "epoch": 1.7441357022467567, "grad_norm": 0.1233472228050232, "learning_rate": 0.0005691960301984063, "loss": 2.2051, "step": 451180 }, { "epoch": 1.74417435945014, "grad_norm": 0.12355933338403702, "learning_rate": 0.0005690562554733328, "loss": 2.1977, "step": 451190 }, { "epoch": 1.7442130166535232, "grad_norm": 0.12268158048391342, "learning_rate": 0.0005689164944001346, "loss": 2.1982, "step": 451200 }, { "epoch": 1.7442516738569065, "grad_norm": 0.1282660961151123, "learning_rate": 0.0005687767469748124, "loss": 2.2058, "step": 451210 }, { "epoch": 1.7442903310602897, "grad_norm": 0.11994381994009018, "learning_rate": 0.0005686370131933689, "loss": 2.1929, "step": 451220 }, { "epoch": 1.744328988263673, "grad_norm": 0.12090937048196793, "learning_rate": 0.0005684972930518085, "loss": 2.2087, "step": 451230 }, { "epoch": 1.7443676454670562, "grad_norm": 0.1255774199962616, "learning_rate": 0.0005683575865461376, "loss": 2.2111, "step": 451240 }, { "epoch": 1.7444063026704395, "grad_norm": 0.1352381408214569, "learning_rate": 0.0005682178936723647, "loss": 2.2018, "step": 451250 }, { "epoch": 1.7444449598738228, "grad_norm": 0.12163727730512619, "learning_rate": 0.0005680782144265002, "loss": 2.2189, "step": 451260 }, { "epoch": 1.744483617077206, "grad_norm": 0.1344509720802307, "learning_rate": 0.0005679385488045563, "loss": 2.2011, "step": 451270 }, { "epoch": 1.7445222742805895, "grad_norm": 0.1113949567079544, "learning_rate": 0.0005677988968025474, "loss": 2.216, "step": 451280 }, { "epoch": 1.7445609314839727, "grad_norm": 0.12429040670394897, "learning_rate": 0.0005676592584164897, "loss": 2.2099, "step": 451290 }, { "epoch": 1.744599588687356, "grad_norm": 0.12263821810483932, "learning_rate": 0.0005675196336424013, "loss": 2.198, "step": 451300 }, { "epoch": 1.7446382458907392, "grad_norm": 0.120047427713871, "learning_rate": 0.0005673800224763024, "loss": 2.2053, "step": 451310 }, { "epoch": 1.7446769030941227, "grad_norm": 0.12776467204093933, "learning_rate": 0.0005672404249142147, "loss": 2.2238, "step": 451320 }, { "epoch": 1.744715560297506, "grad_norm": 0.12136024981737137, "learning_rate": 0.0005671008409521626, "loss": 2.1972, "step": 451330 }, { "epoch": 1.7447542175008892, "grad_norm": 0.12847566604614258, "learning_rate": 0.0005669612705861716, "loss": 2.2046, "step": 451340 }, { "epoch": 1.7447928747042725, "grad_norm": 0.11636916548013687, "learning_rate": 0.0005668217138122697, "loss": 2.2228, "step": 451350 }, { "epoch": 1.7448315319076557, "grad_norm": 0.13120609521865845, "learning_rate": 0.0005666821706264867, "loss": 2.2101, "step": 451360 }, { "epoch": 1.744870189111039, "grad_norm": 0.12237390875816345, "learning_rate": 0.0005665426410248542, "loss": 2.2067, "step": 451370 }, { "epoch": 1.7449088463144222, "grad_norm": 0.13978280127048492, "learning_rate": 0.0005664031250034059, "loss": 2.2129, "step": 451380 }, { "epoch": 1.7449475035178055, "grad_norm": 0.13153356313705444, "learning_rate": 0.0005662636225581774, "loss": 2.2129, "step": 451390 }, { "epoch": 1.7449861607211887, "grad_norm": 0.12097728997468948, "learning_rate": 0.0005661241336852063, "loss": 2.2106, "step": 451400 }, { "epoch": 1.745024817924572, "grad_norm": 0.1258837878704071, "learning_rate": 0.0005659846583805319, "loss": 2.2322, "step": 451410 }, { "epoch": 1.7450634751279552, "grad_norm": 0.13201965391635895, "learning_rate": 0.0005658451966401954, "loss": 2.2006, "step": 451420 }, { "epoch": 1.7451021323313385, "grad_norm": 0.13621696829795837, "learning_rate": 0.0005657057484602402, "loss": 2.217, "step": 451430 }, { "epoch": 1.7451407895347217, "grad_norm": 0.12259390950202942, "learning_rate": 0.0005655663138367115, "loss": 2.201, "step": 451440 }, { "epoch": 1.7451794467381052, "grad_norm": 0.11789116263389587, "learning_rate": 0.0005654268927656563, "loss": 2.2053, "step": 451450 }, { "epoch": 1.7452181039414885, "grad_norm": 0.13373173773288727, "learning_rate": 0.0005652874852431238, "loss": 2.2009, "step": 451460 }, { "epoch": 1.7452567611448717, "grad_norm": 0.1411893516778946, "learning_rate": 0.0005651480912651647, "loss": 2.209, "step": 451470 }, { "epoch": 1.745295418348255, "grad_norm": 0.12485019117593765, "learning_rate": 0.0005650087108278321, "loss": 2.2159, "step": 451480 }, { "epoch": 1.7453340755516384, "grad_norm": 0.13727016746997833, "learning_rate": 0.0005648693439271811, "loss": 2.2193, "step": 451490 }, { "epoch": 1.7453727327550217, "grad_norm": 0.11968348175287247, "learning_rate": 0.0005647299905592676, "loss": 2.2009, "step": 451500 }, { "epoch": 1.745411389958405, "grad_norm": 0.11810345947742462, "learning_rate": 0.0005645906507201508, "loss": 2.2111, "step": 451510 }, { "epoch": 1.7454500471617882, "grad_norm": 0.12262141704559326, "learning_rate": 0.0005644513244058911, "loss": 2.2229, "step": 451520 }, { "epoch": 1.7454887043651715, "grad_norm": 0.12217133492231369, "learning_rate": 0.0005643120116125511, "loss": 2.2015, "step": 451530 }, { "epoch": 1.7455273615685547, "grad_norm": 0.1324571669101715, "learning_rate": 0.0005641727123361949, "loss": 2.1986, "step": 451540 }, { "epoch": 1.745566018771938, "grad_norm": 0.12952816486358643, "learning_rate": 0.0005640334265728886, "loss": 2.2094, "step": 451550 }, { "epoch": 1.7456046759753212, "grad_norm": 0.1282138228416443, "learning_rate": 0.0005638941543187008, "loss": 2.2228, "step": 451560 }, { "epoch": 1.7456433331787045, "grad_norm": 0.12335231155157089, "learning_rate": 0.0005637548955697012, "loss": 2.2075, "step": 451570 }, { "epoch": 1.7456819903820877, "grad_norm": 0.13678689301013947, "learning_rate": 0.000563615650321962, "loss": 2.2099, "step": 451580 }, { "epoch": 1.745720647585471, "grad_norm": 0.1274346113204956, "learning_rate": 0.0005634764185715571, "loss": 2.2113, "step": 451590 }, { "epoch": 1.7457593047888542, "grad_norm": 0.1281769573688507, "learning_rate": 0.0005633372003145623, "loss": 2.2036, "step": 451600 }, { "epoch": 1.7457979619922375, "grad_norm": 0.12230178713798523, "learning_rate": 0.0005631979955470551, "loss": 2.1982, "step": 451610 }, { "epoch": 1.745836619195621, "grad_norm": 0.12558716535568237, "learning_rate": 0.0005630588042651155, "loss": 2.2187, "step": 451620 }, { "epoch": 1.7458752763990042, "grad_norm": 0.14368413388729095, "learning_rate": 0.0005629196264648244, "loss": 2.22, "step": 451630 }, { "epoch": 1.7459139336023874, "grad_norm": 0.13558371365070343, "learning_rate": 0.0005627804621422657, "loss": 2.198, "step": 451640 }, { "epoch": 1.7459525908057707, "grad_norm": 0.11974722892045975, "learning_rate": 0.0005626413112935241, "loss": 2.2166, "step": 451650 }, { "epoch": 1.7459912480091542, "grad_norm": 0.12421214580535889, "learning_rate": 0.0005625021739146874, "loss": 2.2027, "step": 451660 }, { "epoch": 1.7460299052125374, "grad_norm": 0.13483013212680817, "learning_rate": 0.0005623630500018442, "loss": 2.207, "step": 451670 }, { "epoch": 1.7460685624159207, "grad_norm": 0.12035472691059113, "learning_rate": 0.0005622239395510857, "loss": 2.1978, "step": 451680 }, { "epoch": 1.746107219619304, "grad_norm": 0.12705770134925842, "learning_rate": 0.0005620848425585048, "loss": 2.2167, "step": 451690 }, { "epoch": 1.7461458768226872, "grad_norm": 0.1203598827123642, "learning_rate": 0.000561945759020196, "loss": 2.2198, "step": 451700 }, { "epoch": 1.7461845340260704, "grad_norm": 0.1211920827627182, "learning_rate": 0.0005618066889322562, "loss": 2.2095, "step": 451710 }, { "epoch": 1.7462231912294537, "grad_norm": 0.1302730292081833, "learning_rate": 0.0005616676322907839, "loss": 2.1989, "step": 451720 }, { "epoch": 1.746261848432837, "grad_norm": 0.1250298172235489, "learning_rate": 0.0005615285890918791, "loss": 2.2241, "step": 451730 }, { "epoch": 1.7463005056362202, "grad_norm": 0.1284303367137909, "learning_rate": 0.0005613895593316445, "loss": 2.2192, "step": 451740 }, { "epoch": 1.7463391628396034, "grad_norm": 0.12830610573291779, "learning_rate": 0.0005612505430061843, "loss": 2.2104, "step": 451750 }, { "epoch": 1.7463778200429867, "grad_norm": 0.12058718502521515, "learning_rate": 0.0005611115401116043, "loss": 2.2115, "step": 451760 }, { "epoch": 1.74641647724637, "grad_norm": 0.12984438240528107, "learning_rate": 0.0005609725506440122, "loss": 2.2087, "step": 451770 }, { "epoch": 1.7464551344497534, "grad_norm": 0.12947475910186768, "learning_rate": 0.0005608335745995183, "loss": 2.2224, "step": 451780 }, { "epoch": 1.7464937916531367, "grad_norm": 0.11166150122880936, "learning_rate": 0.0005606946119742342, "loss": 2.194, "step": 451790 }, { "epoch": 1.74653244885652, "grad_norm": 0.13033993542194366, "learning_rate": 0.0005605556627642734, "loss": 2.2127, "step": 451800 }, { "epoch": 1.7465711060599032, "grad_norm": 0.12836289405822754, "learning_rate": 0.0005604167269657514, "loss": 2.1921, "step": 451810 }, { "epoch": 1.7466097632632864, "grad_norm": 0.13571980595588684, "learning_rate": 0.0005602778045747854, "loss": 2.2235, "step": 451820 }, { "epoch": 1.74664842046667, "grad_norm": 0.13120993971824646, "learning_rate": 0.0005601388955874946, "loss": 2.2069, "step": 451830 }, { "epoch": 1.7466870776700532, "grad_norm": 0.13109606504440308, "learning_rate": 0.0005600000000000001, "loss": 2.2094, "step": 451840 }, { "epoch": 1.7467257348734364, "grad_norm": 0.12630265951156616, "learning_rate": 0.0005598611178084247, "loss": 2.2149, "step": 451850 }, { "epoch": 1.7467643920768197, "grad_norm": 0.12479433417320251, "learning_rate": 0.0005597222490088936, "loss": 2.2069, "step": 451860 }, { "epoch": 1.746803049280203, "grad_norm": 0.12622688710689545, "learning_rate": 0.0005595833935975327, "loss": 2.2114, "step": 451870 }, { "epoch": 1.7468417064835862, "grad_norm": 0.1303582340478897, "learning_rate": 0.0005594445515704713, "loss": 2.2146, "step": 451880 }, { "epoch": 1.7468803636869694, "grad_norm": 0.12129681557416916, "learning_rate": 0.0005593057229238397, "loss": 2.2068, "step": 451890 }, { "epoch": 1.7469190208903527, "grad_norm": 0.12745247781276703, "learning_rate": 0.00055916690765377, "loss": 2.2152, "step": 451900 }, { "epoch": 1.746957678093736, "grad_norm": 0.14266474545001984, "learning_rate": 0.0005590281057563962, "loss": 2.225, "step": 451910 }, { "epoch": 1.7469963352971192, "grad_norm": 0.14710094034671783, "learning_rate": 0.0005588893172278544, "loss": 2.2181, "step": 451920 }, { "epoch": 1.7470349925005024, "grad_norm": 0.12489454448223114, "learning_rate": 0.0005587505420642825, "loss": 2.2114, "step": 451930 }, { "epoch": 1.7470736497038857, "grad_norm": 0.11751807481050491, "learning_rate": 0.00055861178026182, "loss": 2.2161, "step": 451940 }, { "epoch": 1.7471123069072692, "grad_norm": 0.12177549302577972, "learning_rate": 0.0005584730318166087, "loss": 2.2063, "step": 451950 }, { "epoch": 1.7471509641106524, "grad_norm": 0.12565843760967255, "learning_rate": 0.0005583342967247922, "loss": 2.2211, "step": 451960 }, { "epoch": 1.7471896213140357, "grad_norm": 0.1387435495853424, "learning_rate": 0.0005581955749825152, "loss": 2.2068, "step": 451970 }, { "epoch": 1.747228278517419, "grad_norm": 0.12651048600673676, "learning_rate": 0.0005580568665859253, "loss": 2.1967, "step": 451980 }, { "epoch": 1.7472669357208022, "grad_norm": 0.12083800137042999, "learning_rate": 0.0005579181715311714, "loss": 2.2148, "step": 451990 }, { "epoch": 1.7473055929241856, "grad_norm": 0.11948376148939133, "learning_rate": 0.0005577794898144042, "loss": 2.2078, "step": 452000 }, { "epoch": 1.7473442501275689, "grad_norm": 0.13278955221176147, "learning_rate": 0.0005576408214317767, "loss": 2.1954, "step": 452010 }, { "epoch": 1.7473829073309521, "grad_norm": 0.127226322889328, "learning_rate": 0.0005575021663794431, "loss": 2.2025, "step": 452020 }, { "epoch": 1.7474215645343354, "grad_norm": 0.12776382267475128, "learning_rate": 0.00055736352465356, "loss": 2.2093, "step": 452030 }, { "epoch": 1.7474602217377186, "grad_norm": 0.1194329634308815, "learning_rate": 0.0005572248962502853, "loss": 2.1928, "step": 452040 }, { "epoch": 1.747498878941102, "grad_norm": 0.12772822380065918, "learning_rate": 0.0005570862811657795, "loss": 2.1951, "step": 452050 }, { "epoch": 1.7475375361444851, "grad_norm": 0.1270766258239746, "learning_rate": 0.0005569476793962042, "loss": 2.2269, "step": 452060 }, { "epoch": 1.7475761933478684, "grad_norm": 0.1367577165365219, "learning_rate": 0.0005568090909377235, "loss": 2.2082, "step": 452070 }, { "epoch": 1.7476148505512517, "grad_norm": 0.12789234519004822, "learning_rate": 0.0005566705157865026, "loss": 2.2093, "step": 452080 }, { "epoch": 1.747653507754635, "grad_norm": 0.12344441562891006, "learning_rate": 0.0005565319539387095, "loss": 2.2084, "step": 452090 }, { "epoch": 1.7476921649580182, "grad_norm": 0.1296205371618271, "learning_rate": 0.0005563934053905129, "loss": 2.2148, "step": 452100 }, { "epoch": 1.7477308221614014, "grad_norm": 0.1363183856010437, "learning_rate": 0.0005562548701380843, "loss": 2.2012, "step": 452110 }, { "epoch": 1.7477694793647849, "grad_norm": 0.13961145281791687, "learning_rate": 0.0005561163481775964, "loss": 2.207, "step": 452120 }, { "epoch": 1.7478081365681681, "grad_norm": 0.14444896578788757, "learning_rate": 0.0005559778395052242, "loss": 2.2128, "step": 452130 }, { "epoch": 1.7478467937715514, "grad_norm": 0.12200967967510223, "learning_rate": 0.0005558393441171443, "loss": 2.2039, "step": 452140 }, { "epoch": 1.7478854509749346, "grad_norm": 0.12712444365024567, "learning_rate": 0.0005557008620095352, "loss": 2.2076, "step": 452150 }, { "epoch": 1.747924108178318, "grad_norm": 0.12497568875551224, "learning_rate": 0.0005555623931785769, "loss": 2.2101, "step": 452160 }, { "epoch": 1.7479627653817014, "grad_norm": 0.1273370087146759, "learning_rate": 0.000555423937620452, "loss": 2.2146, "step": 452170 }, { "epoch": 1.7480014225850846, "grad_norm": 0.1270856112241745, "learning_rate": 0.000555285495331344, "loss": 2.213, "step": 452180 }, { "epoch": 1.7480400797884679, "grad_norm": 0.12017151713371277, "learning_rate": 0.0005551470663074389, "loss": 2.2232, "step": 452190 }, { "epoch": 1.7480787369918511, "grad_norm": 0.12553970515727997, "learning_rate": 0.0005550086505449245, "loss": 2.2053, "step": 452200 }, { "epoch": 1.7481173941952344, "grad_norm": 0.1256796270608902, "learning_rate": 0.0005548702480399901, "loss": 2.2238, "step": 452210 }, { "epoch": 1.7481560513986176, "grad_norm": 0.13260310888290405, "learning_rate": 0.0005547318587888265, "loss": 2.1899, "step": 452220 }, { "epoch": 1.7481947086020009, "grad_norm": 0.13431571424007416, "learning_rate": 0.0005545934827876277, "loss": 2.199, "step": 452230 }, { "epoch": 1.7482333658053841, "grad_norm": 0.13198308646678925, "learning_rate": 0.0005544551200325878, "loss": 2.2265, "step": 452240 }, { "epoch": 1.7482720230087674, "grad_norm": 0.11962399631738663, "learning_rate": 0.000554316770519904, "loss": 2.2126, "step": 452250 }, { "epoch": 1.7483106802121506, "grad_norm": 0.1246422603726387, "learning_rate": 0.0005541784342457748, "loss": 2.2041, "step": 452260 }, { "epoch": 1.7483493374155339, "grad_norm": 0.1360652893781662, "learning_rate": 0.0005540401112064, "loss": 2.2057, "step": 452270 }, { "epoch": 1.7483879946189171, "grad_norm": 0.11979234218597412, "learning_rate": 0.0005539018013979825, "loss": 2.2141, "step": 452280 }, { "epoch": 1.7484266518223006, "grad_norm": 0.1446179449558258, "learning_rate": 0.0005537635048167259, "loss": 2.2118, "step": 452290 }, { "epoch": 1.7484653090256839, "grad_norm": 0.13093620538711548, "learning_rate": 0.0005536252214588364, "loss": 2.2136, "step": 452300 }, { "epoch": 1.7485039662290671, "grad_norm": 0.11662276089191437, "learning_rate": 0.0005534869513205212, "loss": 2.1986, "step": 452310 }, { "epoch": 1.7485426234324504, "grad_norm": 0.1224411353468895, "learning_rate": 0.00055334869439799, "loss": 2.2184, "step": 452320 }, { "epoch": 1.7485812806358338, "grad_norm": 0.1330765187740326, "learning_rate": 0.000553210450687454, "loss": 2.2062, "step": 452330 }, { "epoch": 1.748619937839217, "grad_norm": 0.12264736741781235, "learning_rate": 0.0005530722201851261, "loss": 2.2153, "step": 452340 }, { "epoch": 1.7486585950426003, "grad_norm": 0.13567610085010529, "learning_rate": 0.0005529340028872216, "loss": 2.1991, "step": 452350 }, { "epoch": 1.7486972522459836, "grad_norm": 0.13990464806556702, "learning_rate": 0.0005527957987899565, "loss": 2.2066, "step": 452360 }, { "epoch": 1.7487359094493669, "grad_norm": 0.12277691811323166, "learning_rate": 0.0005526576078895498, "loss": 2.2136, "step": 452370 }, { "epoch": 1.74877456665275, "grad_norm": 0.1335594654083252, "learning_rate": 0.0005525194301822216, "loss": 2.2146, "step": 452380 }, { "epoch": 1.7488132238561334, "grad_norm": 0.11977743357419968, "learning_rate": 0.0005523812656641942, "loss": 2.2034, "step": 452390 }, { "epoch": 1.7488518810595166, "grad_norm": 0.14657334983348846, "learning_rate": 0.0005522431143316913, "loss": 2.203, "step": 452400 }, { "epoch": 1.7488905382628999, "grad_norm": 0.12392140924930573, "learning_rate": 0.0005521049761809387, "loss": 2.2205, "step": 452410 }, { "epoch": 1.7489291954662831, "grad_norm": 0.12456782907247543, "learning_rate": 0.0005519668512081639, "loss": 2.2103, "step": 452420 }, { "epoch": 1.7489678526696664, "grad_norm": 0.12019629776477814, "learning_rate": 0.000551828739409596, "loss": 2.2114, "step": 452430 }, { "epoch": 1.7490065098730496, "grad_norm": 0.1146310493350029, "learning_rate": 0.0005516906407814662, "loss": 2.2189, "step": 452440 }, { "epoch": 1.7490451670764329, "grad_norm": 0.12347836792469025, "learning_rate": 0.0005515525553200076, "loss": 2.2096, "step": 452450 }, { "epoch": 1.7490838242798163, "grad_norm": 0.1218772679567337, "learning_rate": 0.0005514144830214545, "loss": 2.2048, "step": 452460 }, { "epoch": 1.7491224814831996, "grad_norm": 0.1261073648929596, "learning_rate": 0.0005512764238820436, "loss": 2.195, "step": 452470 }, { "epoch": 1.7491611386865829, "grad_norm": 0.1243288516998291, "learning_rate": 0.0005511383778980133, "loss": 2.2038, "step": 452480 }, { "epoch": 1.749199795889966, "grad_norm": 0.1278071403503418, "learning_rate": 0.0005510003450656036, "loss": 2.2095, "step": 452490 }, { "epoch": 1.7492384530933496, "grad_norm": 0.12030568718910217, "learning_rate": 0.000550862325381056, "loss": 2.2037, "step": 452500 }, { "epoch": 1.7492771102967328, "grad_norm": 0.13277964293956757, "learning_rate": 0.0005507243188406148, "loss": 2.2189, "step": 452510 }, { "epoch": 1.749315767500116, "grad_norm": 0.11886333674192429, "learning_rate": 0.000550586325440525, "loss": 2.1993, "step": 452520 }, { "epoch": 1.7493544247034993, "grad_norm": 0.11295314133167267, "learning_rate": 0.0005504483451770337, "loss": 2.2119, "step": 452530 }, { "epoch": 1.7493930819068826, "grad_norm": 0.12786784768104553, "learning_rate": 0.0005503103780463902, "loss": 2.2215, "step": 452540 }, { "epoch": 1.7494317391102658, "grad_norm": 0.12659190595149994, "learning_rate": 0.0005501724240448454, "loss": 2.2226, "step": 452550 }, { "epoch": 1.749470396313649, "grad_norm": 0.1250794380903244, "learning_rate": 0.0005500344831686515, "loss": 2.2043, "step": 452560 }, { "epoch": 1.7495090535170323, "grad_norm": 0.12980180978775024, "learning_rate": 0.000549896555414063, "loss": 2.207, "step": 452570 }, { "epoch": 1.7495477107204156, "grad_norm": 0.11602694541215897, "learning_rate": 0.0005497586407773358, "loss": 2.2181, "step": 452580 }, { "epoch": 1.7495863679237988, "grad_norm": 0.12851059436798096, "learning_rate": 0.0005496207392547285, "loss": 2.2058, "step": 452590 }, { "epoch": 1.749625025127182, "grad_norm": 0.1294124275445938, "learning_rate": 0.0005494828508425003, "loss": 2.2064, "step": 452600 }, { "epoch": 1.7496636823305654, "grad_norm": 0.122859887778759, "learning_rate": 0.0005493449755369129, "loss": 2.2065, "step": 452610 }, { "epoch": 1.7497023395339486, "grad_norm": 0.13860875368118286, "learning_rate": 0.0005492071133342294, "loss": 2.2032, "step": 452620 }, { "epoch": 1.749740996737332, "grad_norm": 0.14647898077964783, "learning_rate": 0.0005490692642307147, "loss": 2.2083, "step": 452630 }, { "epoch": 1.7497796539407153, "grad_norm": 0.13019074499607086, "learning_rate": 0.0005489314282226356, "loss": 2.1914, "step": 452640 }, { "epoch": 1.7498183111440986, "grad_norm": 0.1232682466506958, "learning_rate": 0.0005487936053062612, "loss": 2.2194, "step": 452650 }, { "epoch": 1.7498569683474818, "grad_norm": 0.1230897456407547, "learning_rate": 0.0005486557954778613, "loss": 2.1953, "step": 452660 }, { "epoch": 1.7498956255508653, "grad_norm": 0.11922044306993484, "learning_rate": 0.0005485179987337081, "loss": 2.2055, "step": 452670 }, { "epoch": 1.7499342827542486, "grad_norm": 0.12729021906852722, "learning_rate": 0.0005483802150700756, "loss": 2.2174, "step": 452680 }, { "epoch": 1.7499729399576318, "grad_norm": 0.11684662103652954, "learning_rate": 0.0005482424444832395, "loss": 2.2029, "step": 452690 }, { "epoch": 1.750011597161015, "grad_norm": 0.1304619461297989, "learning_rate": 0.0005481046869694771, "loss": 2.2144, "step": 452700 }, { "epoch": 1.7500502543643983, "grad_norm": 0.12067972123622894, "learning_rate": 0.0005479669425250677, "loss": 2.1993, "step": 452710 }, { "epoch": 1.7500889115677816, "grad_norm": 0.12739378213882446, "learning_rate": 0.0005478292111462922, "loss": 2.2076, "step": 452720 }, { "epoch": 1.7501275687711648, "grad_norm": 0.1453160047531128, "learning_rate": 0.0005476914928294334, "loss": 2.1889, "step": 452730 }, { "epoch": 1.750166225974548, "grad_norm": 0.1349991261959076, "learning_rate": 0.0005475537875707754, "loss": 2.1994, "step": 452740 }, { "epoch": 1.7502048831779313, "grad_norm": 0.12166766822338104, "learning_rate": 0.0005474160953666049, "loss": 2.1917, "step": 452750 }, { "epoch": 1.7502435403813146, "grad_norm": 0.12614476680755615, "learning_rate": 0.0005472784162132098, "loss": 2.2207, "step": 452760 }, { "epoch": 1.7502821975846978, "grad_norm": 0.11877798289060593, "learning_rate": 0.0005471407501068799, "loss": 2.2097, "step": 452770 }, { "epoch": 1.750320854788081, "grad_norm": 0.11452053487300873, "learning_rate": 0.0005470030970439061, "loss": 2.194, "step": 452780 }, { "epoch": 1.7503595119914643, "grad_norm": 0.14666223526000977, "learning_rate": 0.0005468654570205826, "loss": 2.2067, "step": 452790 }, { "epoch": 1.7503981691948478, "grad_norm": 0.12348919361829758, "learning_rate": 0.000546727830033204, "loss": 2.2057, "step": 452800 }, { "epoch": 1.750436826398231, "grad_norm": 0.12077920883893967, "learning_rate": 0.0005465902160780669, "loss": 2.2069, "step": 452810 }, { "epoch": 1.7504754836016143, "grad_norm": 0.1277742087841034, "learning_rate": 0.0005464526151514701, "loss": 2.1998, "step": 452820 }, { "epoch": 1.7505141408049976, "grad_norm": 0.12332631647586823, "learning_rate": 0.0005463150272497139, "loss": 2.2114, "step": 452830 }, { "epoch": 1.750552798008381, "grad_norm": 0.1287272572517395, "learning_rate": 0.0005461774523691001, "loss": 2.2085, "step": 452840 }, { "epoch": 1.7505914552117643, "grad_norm": 0.131668359041214, "learning_rate": 0.0005460398905059327, "loss": 2.2095, "step": 452850 }, { "epoch": 1.7506301124151475, "grad_norm": 0.13040921092033386, "learning_rate": 0.0005459023416565172, "loss": 2.199, "step": 452860 }, { "epoch": 1.7506687696185308, "grad_norm": 0.12430719286203384, "learning_rate": 0.000545764805817161, "loss": 2.1903, "step": 452870 }, { "epoch": 1.750707426821914, "grad_norm": 0.1209225133061409, "learning_rate": 0.0005456272829841727, "loss": 2.1949, "step": 452880 }, { "epoch": 1.7507460840252973, "grad_norm": 0.12247639149427414, "learning_rate": 0.0005454897731538633, "loss": 2.2034, "step": 452890 }, { "epoch": 1.7507847412286806, "grad_norm": 0.1321645826101303, "learning_rate": 0.0005453522763225456, "loss": 2.2039, "step": 452900 }, { "epoch": 1.7508233984320638, "grad_norm": 0.12608574330806732, "learning_rate": 0.0005452147924865333, "loss": 2.208, "step": 452910 }, { "epoch": 1.750862055635447, "grad_norm": 0.12137756496667862, "learning_rate": 0.0005450773216421432, "loss": 2.2106, "step": 452920 }, { "epoch": 1.7509007128388303, "grad_norm": 0.12233705073595047, "learning_rate": 0.0005449398637856922, "loss": 2.2127, "step": 452930 }, { "epoch": 1.7509393700422136, "grad_norm": 0.14403283596038818, "learning_rate": 0.0005448024189135004, "loss": 2.1966, "step": 452940 }, { "epoch": 1.7509780272455968, "grad_norm": 0.1319127082824707, "learning_rate": 0.0005446649870218885, "loss": 2.1946, "step": 452950 }, { "epoch": 1.75101668444898, "grad_norm": 0.13181868195533752, "learning_rate": 0.00054452756810718, "loss": 2.2135, "step": 452960 }, { "epoch": 1.7510553416523635, "grad_norm": 0.118177130818367, "learning_rate": 0.000544390162165699, "loss": 2.2074, "step": 452970 }, { "epoch": 1.7510939988557468, "grad_norm": 0.13542814552783966, "learning_rate": 0.0005442527691937724, "loss": 2.2117, "step": 452980 }, { "epoch": 1.75113265605913, "grad_norm": 0.12567058205604553, "learning_rate": 0.0005441153891877282, "loss": 2.1741, "step": 452990 }, { "epoch": 1.7511713132625133, "grad_norm": 0.12935258448123932, "learning_rate": 0.0005439780221438964, "loss": 2.1909, "step": 453000 }, { "epoch": 1.7512099704658968, "grad_norm": 0.12639570236206055, "learning_rate": 0.0005438406680586083, "loss": 2.2082, "step": 453010 }, { "epoch": 1.75124862766928, "grad_norm": 0.13248620927333832, "learning_rate": 0.0005437033269281977, "loss": 2.1987, "step": 453020 }, { "epoch": 1.7512872848726633, "grad_norm": 0.16943858563899994, "learning_rate": 0.0005435659987489994, "loss": 2.2054, "step": 453030 }, { "epoch": 1.7513259420760465, "grad_norm": 0.12565425038337708, "learning_rate": 0.0005434286835173501, "loss": 2.2227, "step": 453040 }, { "epoch": 1.7513645992794298, "grad_norm": 0.12456222623586655, "learning_rate": 0.0005432913812295886, "loss": 2.2046, "step": 453050 }, { "epoch": 1.751403256482813, "grad_norm": 0.1141451820731163, "learning_rate": 0.000543154091882055, "loss": 2.2103, "step": 453060 }, { "epoch": 1.7514419136861963, "grad_norm": 0.11694903671741486, "learning_rate": 0.0005430168154710915, "loss": 2.1978, "step": 453070 }, { "epoch": 1.7514805708895795, "grad_norm": 0.122064508497715, "learning_rate": 0.0005428795519930411, "loss": 2.2294, "step": 453080 }, { "epoch": 1.7515192280929628, "grad_norm": 0.1250613033771515, "learning_rate": 0.0005427423014442503, "loss": 2.1978, "step": 453090 }, { "epoch": 1.751557885296346, "grad_norm": 0.1284988522529602, "learning_rate": 0.0005426050638210656, "loss": 2.2149, "step": 453100 }, { "epoch": 1.7515965424997293, "grad_norm": 0.11649057269096375, "learning_rate": 0.0005424678391198362, "loss": 2.2019, "step": 453110 }, { "epoch": 1.7516351997031125, "grad_norm": 0.12649868428707123, "learning_rate": 0.0005423306273369122, "loss": 2.2269, "step": 453120 }, { "epoch": 1.7516738569064958, "grad_norm": 0.13215883076190948, "learning_rate": 0.0005421934284686463, "loss": 2.1992, "step": 453130 }, { "epoch": 1.7517125141098793, "grad_norm": 0.11886328458786011, "learning_rate": 0.0005420562425113924, "loss": 2.1983, "step": 453140 }, { "epoch": 1.7517511713132625, "grad_norm": 0.1284274458885193, "learning_rate": 0.0005419190694615062, "loss": 2.1949, "step": 453150 }, { "epoch": 1.7517898285166458, "grad_norm": 0.13134321570396423, "learning_rate": 0.0005417819093153453, "loss": 2.2101, "step": 453160 }, { "epoch": 1.751828485720029, "grad_norm": 0.12084176391363144, "learning_rate": 0.0005416447620692688, "loss": 2.1954, "step": 453170 }, { "epoch": 1.7518671429234125, "grad_norm": 0.12745612859725952, "learning_rate": 0.0005415076277196373, "loss": 2.2209, "step": 453180 }, { "epoch": 1.7519058001267958, "grad_norm": 0.1288878470659256, "learning_rate": 0.0005413705062628138, "loss": 2.197, "step": 453190 }, { "epoch": 1.751944457330179, "grad_norm": 0.12644262611865997, "learning_rate": 0.0005412333976951624, "loss": 2.1995, "step": 453200 }, { "epoch": 1.7519831145335623, "grad_norm": 0.12212696671485901, "learning_rate": 0.0005410963020130493, "loss": 2.211, "step": 453210 }, { "epoch": 1.7520217717369455, "grad_norm": 0.12275119870901108, "learning_rate": 0.0005409592192128418, "loss": 2.2088, "step": 453220 }, { "epoch": 1.7520604289403288, "grad_norm": 0.12905056774616241, "learning_rate": 0.0005408221492909098, "loss": 2.2058, "step": 453230 }, { "epoch": 1.752099086143712, "grad_norm": 0.1202598512172699, "learning_rate": 0.0005406850922436241, "loss": 2.2178, "step": 453240 }, { "epoch": 1.7521377433470953, "grad_norm": 0.11684327572584152, "learning_rate": 0.0005405480480673577, "loss": 2.2047, "step": 453250 }, { "epoch": 1.7521764005504785, "grad_norm": 0.1299309879541397, "learning_rate": 0.0005404110167584848, "loss": 2.2119, "step": 453260 }, { "epoch": 1.7522150577538618, "grad_norm": 0.133416086435318, "learning_rate": 0.0005402739983133822, "loss": 2.2059, "step": 453270 }, { "epoch": 1.752253714957245, "grad_norm": 0.13269919157028198, "learning_rate": 0.0005401369927284272, "loss": 2.2089, "step": 453280 }, { "epoch": 1.7522923721606283, "grad_norm": 0.1308826357126236, "learning_rate": 0.00054, "loss": 2.1902, "step": 453290 }, { "epoch": 1.7523310293640115, "grad_norm": 0.2655637264251709, "learning_rate": 0.0005398630201244816, "loss": 2.2136, "step": 453300 }, { "epoch": 1.752369686567395, "grad_norm": 0.12664833664894104, "learning_rate": 0.0005397260530982551, "loss": 2.2168, "step": 453310 }, { "epoch": 1.7524083437707783, "grad_norm": 0.11643911898136139, "learning_rate": 0.0005395890989177052, "loss": 2.2128, "step": 453320 }, { "epoch": 1.7524470009741615, "grad_norm": 0.13293546438217163, "learning_rate": 0.0005394521575792186, "loss": 2.1997, "step": 453330 }, { "epoch": 1.7524856581775448, "grad_norm": 0.1252000778913498, "learning_rate": 0.000539315229079183, "loss": 2.2026, "step": 453340 }, { "epoch": 1.7525243153809282, "grad_norm": 0.11817177385091782, "learning_rate": 0.0005391783134139883, "loss": 2.208, "step": 453350 }, { "epoch": 1.7525629725843115, "grad_norm": 0.12528595328330994, "learning_rate": 0.0005390414105800261, "loss": 2.2127, "step": 453360 }, { "epoch": 1.7526016297876947, "grad_norm": 0.13388140499591827, "learning_rate": 0.0005389045205736895, "loss": 2.2181, "step": 453370 }, { "epoch": 1.752640286991078, "grad_norm": 0.14031468331813812, "learning_rate": 0.0005387676433913735, "loss": 2.1913, "step": 453380 }, { "epoch": 1.7526789441944612, "grad_norm": 0.12155263125896454, "learning_rate": 0.0005386307790294746, "loss": 2.2062, "step": 453390 }, { "epoch": 1.7527176013978445, "grad_norm": 0.1317920684814453, "learning_rate": 0.0005384939274843912, "loss": 2.2134, "step": 453400 }, { "epoch": 1.7527562586012277, "grad_norm": 0.1260206401348114, "learning_rate": 0.0005383570887525229, "loss": 2.2299, "step": 453410 }, { "epoch": 1.752794915804611, "grad_norm": 0.12761461734771729, "learning_rate": 0.0005382202628302717, "loss": 2.2078, "step": 453420 }, { "epoch": 1.7528335730079943, "grad_norm": 0.12996332347393036, "learning_rate": 0.0005380834497140406, "loss": 2.2013, "step": 453430 }, { "epoch": 1.7528722302113775, "grad_norm": 0.15279492735862732, "learning_rate": 0.0005379466494002347, "loss": 2.2176, "step": 453440 }, { "epoch": 1.7529108874147608, "grad_norm": 0.1288621574640274, "learning_rate": 0.000537809861885261, "loss": 2.1883, "step": 453450 }, { "epoch": 1.752949544618144, "grad_norm": 0.13647779822349548, "learning_rate": 0.0005376730871655271, "loss": 2.2179, "step": 453460 }, { "epoch": 1.7529882018215273, "grad_norm": 0.12741248309612274, "learning_rate": 0.0005375363252374439, "loss": 2.2043, "step": 453470 }, { "epoch": 1.7530268590249107, "grad_norm": 0.12702925503253937, "learning_rate": 0.0005373995760974222, "loss": 2.1951, "step": 453480 }, { "epoch": 1.753065516228294, "grad_norm": 0.13216041028499603, "learning_rate": 0.0005372628397418761, "loss": 2.2046, "step": 453490 }, { "epoch": 1.7531041734316772, "grad_norm": 0.12956835329532623, "learning_rate": 0.0005371261161672206, "loss": 2.1989, "step": 453500 }, { "epoch": 1.7531428306350605, "grad_norm": 0.11891122162342072, "learning_rate": 0.0005369894053698723, "loss": 2.204, "step": 453510 }, { "epoch": 1.753181487838444, "grad_norm": 0.13319365680217743, "learning_rate": 0.0005368527073462495, "loss": 2.2232, "step": 453520 }, { "epoch": 1.7532201450418272, "grad_norm": 0.12692497670650482, "learning_rate": 0.0005367160220927723, "loss": 2.2106, "step": 453530 }, { "epoch": 1.7532588022452105, "grad_norm": 0.11499441415071487, "learning_rate": 0.0005365793496058626, "loss": 2.202, "step": 453540 }, { "epoch": 1.7532974594485937, "grad_norm": 0.12278398871421814, "learning_rate": 0.0005364426898819439, "loss": 2.2035, "step": 453550 }, { "epoch": 1.753336116651977, "grad_norm": 0.12266360968351364, "learning_rate": 0.0005363060429174412, "loss": 2.2051, "step": 453560 }, { "epoch": 1.7533747738553602, "grad_norm": 0.13310161232948303, "learning_rate": 0.000536169408708781, "loss": 2.2049, "step": 453570 }, { "epoch": 1.7534134310587435, "grad_norm": 0.12809118628501892, "learning_rate": 0.000536032787252392, "loss": 2.1987, "step": 453580 }, { "epoch": 1.7534520882621267, "grad_norm": 0.12067703902721405, "learning_rate": 0.0005358961785447043, "loss": 2.2204, "step": 453590 }, { "epoch": 1.75349074546551, "grad_norm": 0.12874580919742584, "learning_rate": 0.0005357595825821497, "loss": 2.2005, "step": 453600 }, { "epoch": 1.7535294026688932, "grad_norm": 0.12450389564037323, "learning_rate": 0.0005356229993611617, "loss": 2.1904, "step": 453610 }, { "epoch": 1.7535680598722765, "grad_norm": 0.12393300235271454, "learning_rate": 0.0005354864288781753, "loss": 2.2067, "step": 453620 }, { "epoch": 1.7536067170756597, "grad_norm": 0.12177273631095886, "learning_rate": 0.0005353498711296272, "loss": 2.213, "step": 453630 }, { "epoch": 1.7536453742790432, "grad_norm": 0.12027515470981598, "learning_rate": 0.0005352133261119556, "loss": 2.19, "step": 453640 }, { "epoch": 1.7536840314824265, "grad_norm": 0.12833796441555023, "learning_rate": 0.000535076793821601, "loss": 2.1944, "step": 453650 }, { "epoch": 1.7537226886858097, "grad_norm": 0.1365223377943039, "learning_rate": 0.0005349402742550051, "loss": 2.1946, "step": 453660 }, { "epoch": 1.753761345889193, "grad_norm": 0.1281602382659912, "learning_rate": 0.000534803767408611, "loss": 2.2129, "step": 453670 }, { "epoch": 1.7538000030925762, "grad_norm": 0.13165245950222015, "learning_rate": 0.0005346672732788638, "loss": 2.1915, "step": 453680 }, { "epoch": 1.7538386602959597, "grad_norm": 0.12608040869235992, "learning_rate": 0.0005345307918622104, "loss": 2.1995, "step": 453690 }, { "epoch": 1.753877317499343, "grad_norm": 0.13146719336509705, "learning_rate": 0.0005343943231550991, "loss": 2.2035, "step": 453700 }, { "epoch": 1.7539159747027262, "grad_norm": 0.11518460512161255, "learning_rate": 0.0005342578671539799, "loss": 2.2079, "step": 453710 }, { "epoch": 1.7539546319061095, "grad_norm": 0.14398086071014404, "learning_rate": 0.0005341214238553045, "loss": 2.2013, "step": 453720 }, { "epoch": 1.7539932891094927, "grad_norm": 0.12497788667678833, "learning_rate": 0.000533984993255526, "loss": 2.2126, "step": 453730 }, { "epoch": 1.754031946312876, "grad_norm": 0.13252729177474976, "learning_rate": 0.0005338485753510997, "loss": 2.2209, "step": 453740 }, { "epoch": 1.7540706035162592, "grad_norm": 0.12597353756427765, "learning_rate": 0.000533712170138482, "loss": 2.1939, "step": 453750 }, { "epoch": 1.7541092607196425, "grad_norm": 0.12324507534503937, "learning_rate": 0.0005335757776141312, "loss": 2.2004, "step": 453760 }, { "epoch": 1.7541479179230257, "grad_norm": 0.13148833811283112, "learning_rate": 0.0005334393977745074, "loss": 2.1953, "step": 453770 }, { "epoch": 1.754186575126409, "grad_norm": 0.12210629880428314, "learning_rate": 0.0005333030306160718, "loss": 2.1935, "step": 453780 }, { "epoch": 1.7542252323297922, "grad_norm": 0.13100388646125793, "learning_rate": 0.0005331666761352876, "loss": 2.2096, "step": 453790 }, { "epoch": 1.7542638895331755, "grad_norm": 0.12396988272666931, "learning_rate": 0.00053303033432862, "loss": 2.2063, "step": 453800 }, { "epoch": 1.754302546736559, "grad_norm": 0.12156442552804947, "learning_rate": 0.0005328940051925355, "loss": 2.2074, "step": 453810 }, { "epoch": 1.7543412039399422, "grad_norm": 0.12706667184829712, "learning_rate": 0.000532757688723502, "loss": 2.1943, "step": 453820 }, { "epoch": 1.7543798611433254, "grad_norm": 0.14350438117980957, "learning_rate": 0.0005326213849179892, "loss": 2.2274, "step": 453830 }, { "epoch": 1.7544185183467087, "grad_norm": 0.1357300728559494, "learning_rate": 0.0005324850937724688, "loss": 2.211, "step": 453840 }, { "epoch": 1.754457175550092, "grad_norm": 0.12557262182235718, "learning_rate": 0.0005323488152834135, "loss": 2.2091, "step": 453850 }, { "epoch": 1.7544958327534754, "grad_norm": 0.13544535636901855, "learning_rate": 0.0005322125494472982, "loss": 2.2037, "step": 453860 }, { "epoch": 1.7545344899568587, "grad_norm": 0.13692021369934082, "learning_rate": 0.0005320762962605994, "loss": 2.2041, "step": 453870 }, { "epoch": 1.754573147160242, "grad_norm": 0.13866128027439117, "learning_rate": 0.0005319400557197946, "loss": 2.2113, "step": 453880 }, { "epoch": 1.7546118043636252, "grad_norm": 0.12811279296875, "learning_rate": 0.0005318038278213635, "loss": 2.2096, "step": 453890 }, { "epoch": 1.7546504615670084, "grad_norm": 0.13107416033744812, "learning_rate": 0.0005316676125617879, "loss": 2.2015, "step": 453900 }, { "epoch": 1.7546891187703917, "grad_norm": 0.13009685277938843, "learning_rate": 0.0005315314099375499, "loss": 2.1995, "step": 453910 }, { "epoch": 1.754727775973775, "grad_norm": 0.14282618463039398, "learning_rate": 0.0005313952199451344, "loss": 2.201, "step": 453920 }, { "epoch": 1.7547664331771582, "grad_norm": 0.11497281491756439, "learning_rate": 0.0005312590425810275, "loss": 2.2115, "step": 453930 }, { "epoch": 1.7548050903805414, "grad_norm": 0.12178318947553635, "learning_rate": 0.0005311228778417167, "loss": 2.1942, "step": 453940 }, { "epoch": 1.7548437475839247, "grad_norm": 0.13076919317245483, "learning_rate": 0.0005309867257236918, "loss": 2.2137, "step": 453950 }, { "epoch": 1.754882404787308, "grad_norm": 0.12824806571006775, "learning_rate": 0.0005308505862234434, "loss": 2.2139, "step": 453960 }, { "epoch": 1.7549210619906912, "grad_norm": 0.12367754429578781, "learning_rate": 0.0005307144593374644, "loss": 2.2248, "step": 453970 }, { "epoch": 1.7549597191940747, "grad_norm": 0.13095413148403168, "learning_rate": 0.0005305783450622488, "loss": 2.2029, "step": 453980 }, { "epoch": 1.754998376397458, "grad_norm": 0.12684781849384308, "learning_rate": 0.0005304422433942924, "loss": 2.1942, "step": 453990 }, { "epoch": 1.7550370336008412, "grad_norm": 0.14733237028121948, "learning_rate": 0.0005303061543300931, "loss": 2.1993, "step": 454000 }, { "epoch": 1.7550756908042244, "grad_norm": 0.1254054307937622, "learning_rate": 0.0005301700778661498, "loss": 2.2158, "step": 454010 }, { "epoch": 1.7551143480076077, "grad_norm": 0.15444502234458923, "learning_rate": 0.0005300340139989632, "loss": 2.1988, "step": 454020 }, { "epoch": 1.7551530052109912, "grad_norm": 0.13979749381542206, "learning_rate": 0.000529897962725036, "loss": 2.1916, "step": 454030 }, { "epoch": 1.7551916624143744, "grad_norm": 0.11630989611148834, "learning_rate": 0.0005297619240408715, "loss": 2.207, "step": 454040 }, { "epoch": 1.7552303196177577, "grad_norm": 0.13520605862140656, "learning_rate": 0.000529625897942976, "loss": 2.217, "step": 454050 }, { "epoch": 1.755268976821141, "grad_norm": 0.126438707113266, "learning_rate": 0.0005294898844278562, "loss": 2.2166, "step": 454060 }, { "epoch": 1.7553076340245242, "grad_norm": 0.12552592158317566, "learning_rate": 0.0005293538834920211, "loss": 2.2159, "step": 454070 }, { "epoch": 1.7553462912279074, "grad_norm": 0.14067593216896057, "learning_rate": 0.0005292178951319812, "loss": 2.2056, "step": 454080 }, { "epoch": 1.7553849484312907, "grad_norm": 0.13103902339935303, "learning_rate": 0.0005290819193442484, "loss": 2.1993, "step": 454090 }, { "epoch": 1.755423605634674, "grad_norm": 0.11966156214475632, "learning_rate": 0.0005289459561253367, "loss": 2.1988, "step": 454100 }, { "epoch": 1.7554622628380572, "grad_norm": 0.14210577309131622, "learning_rate": 0.0005288100054717608, "loss": 2.2031, "step": 454110 }, { "epoch": 1.7555009200414404, "grad_norm": 0.134195476770401, "learning_rate": 0.0005286740673800383, "loss": 2.2114, "step": 454120 }, { "epoch": 1.7555395772448237, "grad_norm": 0.13276775181293488, "learning_rate": 0.0005285381418466872, "loss": 2.191, "step": 454130 }, { "epoch": 1.755578234448207, "grad_norm": 0.13156795501708984, "learning_rate": 0.0005284022288682277, "loss": 2.1898, "step": 454140 }, { "epoch": 1.7556168916515904, "grad_norm": 0.1209663450717926, "learning_rate": 0.0005282663284411817, "loss": 2.2067, "step": 454150 }, { "epoch": 1.7556555488549737, "grad_norm": 0.1239723488688469, "learning_rate": 0.0005281304405620722, "loss": 2.1923, "step": 454160 }, { "epoch": 1.755694206058357, "grad_norm": 0.13002730906009674, "learning_rate": 0.0005279945652274243, "loss": 2.1867, "step": 454170 }, { "epoch": 1.7557328632617402, "grad_norm": 0.13225910067558289, "learning_rate": 0.0005278587024337644, "loss": 2.21, "step": 454180 }, { "epoch": 1.7557715204651236, "grad_norm": 0.13071177899837494, "learning_rate": 0.0005277228521776207, "loss": 2.2045, "step": 454190 }, { "epoch": 1.7558101776685069, "grad_norm": 0.13750162720680237, "learning_rate": 0.0005275870144555231, "loss": 2.192, "step": 454200 }, { "epoch": 1.7558488348718901, "grad_norm": 0.12981943786144257, "learning_rate": 0.0005274511892640025, "loss": 2.211, "step": 454210 }, { "epoch": 1.7558874920752734, "grad_norm": 0.11927240341901779, "learning_rate": 0.0005273153765995926, "loss": 2.1964, "step": 454220 }, { "epoch": 1.7559261492786566, "grad_norm": 0.1346549540758133, "learning_rate": 0.000527179576458827, "loss": 2.2015, "step": 454230 }, { "epoch": 1.75596480648204, "grad_norm": 0.11863110214471817, "learning_rate": 0.0005270437888382425, "loss": 2.1989, "step": 454240 }, { "epoch": 1.7560034636854231, "grad_norm": 0.11934219300746918, "learning_rate": 0.0005269080137343765, "loss": 2.2103, "step": 454250 }, { "epoch": 1.7560421208888064, "grad_norm": 0.1372785121202469, "learning_rate": 0.0005267722511437684, "loss": 2.2004, "step": 454260 }, { "epoch": 1.7560807780921897, "grad_norm": 0.12632456421852112, "learning_rate": 0.0005266365010629591, "loss": 2.1843, "step": 454270 }, { "epoch": 1.756119435295573, "grad_norm": 0.12038110196590424, "learning_rate": 0.0005265007634884911, "loss": 2.2167, "step": 454280 }, { "epoch": 1.7561580924989562, "grad_norm": 0.11933927237987518, "learning_rate": 0.0005263650384169083, "loss": 2.2137, "step": 454290 }, { "epoch": 1.7561967497023394, "grad_norm": 0.11876744776964188, "learning_rate": 0.0005262293258447568, "loss": 2.1949, "step": 454300 }, { "epoch": 1.7562354069057227, "grad_norm": 0.12975801527500153, "learning_rate": 0.0005260936257685836, "loss": 2.2105, "step": 454310 }, { "epoch": 1.7562740641091061, "grad_norm": 0.12426824867725372, "learning_rate": 0.0005259579381849378, "loss": 2.1841, "step": 454320 }, { "epoch": 1.7563127213124894, "grad_norm": 0.12224451452493668, "learning_rate": 0.0005258222630903695, "loss": 2.2126, "step": 454330 }, { "epoch": 1.7563513785158726, "grad_norm": 0.12690091133117676, "learning_rate": 0.0005256866004814309, "loss": 2.199, "step": 454340 }, { "epoch": 1.756390035719256, "grad_norm": 0.12292948365211487, "learning_rate": 0.0005255509503546758, "loss": 2.1995, "step": 454350 }, { "epoch": 1.7564286929226394, "grad_norm": 0.12324800342321396, "learning_rate": 0.0005254153127066592, "loss": 2.2003, "step": 454360 }, { "epoch": 1.7564673501260226, "grad_norm": 0.11988487094640732, "learning_rate": 0.0005252796875339378, "loss": 2.2043, "step": 454370 }, { "epoch": 1.7565060073294059, "grad_norm": 0.12712682783603668, "learning_rate": 0.0005251440748330705, "loss": 2.1982, "step": 454380 }, { "epoch": 1.7565446645327891, "grad_norm": 0.13616231083869934, "learning_rate": 0.0005250084746006165, "loss": 2.2174, "step": 454390 }, { "epoch": 1.7565833217361724, "grad_norm": 0.1232861876487732, "learning_rate": 0.0005248728868331381, "loss": 2.1907, "step": 454400 }, { "epoch": 1.7566219789395556, "grad_norm": 0.13185040652751923, "learning_rate": 0.0005247373115271979, "loss": 2.179, "step": 454410 }, { "epoch": 1.7566606361429389, "grad_norm": 0.122291199862957, "learning_rate": 0.0005246017486793609, "loss": 2.2056, "step": 454420 }, { "epoch": 1.7566992933463221, "grad_norm": 0.12516425549983978, "learning_rate": 0.0005244661982861931, "loss": 2.2214, "step": 454430 }, { "epoch": 1.7567379505497054, "grad_norm": 0.12709055840969086, "learning_rate": 0.0005243306603442626, "loss": 2.2089, "step": 454440 }, { "epoch": 1.7567766077530886, "grad_norm": 0.1386878341436386, "learning_rate": 0.0005241951348501388, "loss": 2.2162, "step": 454450 }, { "epoch": 1.756815264956472, "grad_norm": 0.12939682602882385, "learning_rate": 0.0005240596218003928, "loss": 2.2067, "step": 454460 }, { "epoch": 1.7568539221598551, "grad_norm": 0.1294785439968109, "learning_rate": 0.0005239241211915968, "loss": 2.2099, "step": 454470 }, { "epoch": 1.7568925793632384, "grad_norm": 0.12862272560596466, "learning_rate": 0.0005237886330203255, "loss": 2.2114, "step": 454480 }, { "epoch": 1.7569312365666219, "grad_norm": 0.11318708956241608, "learning_rate": 0.0005236531572831537, "loss": 2.1954, "step": 454490 }, { "epoch": 1.7569698937700051, "grad_norm": 0.12838613986968994, "learning_rate": 0.0005235176939766599, "loss": 2.2058, "step": 454500 }, { "epoch": 1.7570085509733884, "grad_norm": 0.12900970876216888, "learning_rate": 0.0005233822430974223, "loss": 2.209, "step": 454510 }, { "epoch": 1.7570472081767716, "grad_norm": 0.13854677975177765, "learning_rate": 0.0005232468046420213, "loss": 2.1932, "step": 454520 }, { "epoch": 1.757085865380155, "grad_norm": 0.13848355412483215, "learning_rate": 0.0005231113786070394, "loss": 2.2199, "step": 454530 }, { "epoch": 1.7571245225835384, "grad_norm": 0.12348717451095581, "learning_rate": 0.0005229759649890595, "loss": 2.2052, "step": 454540 }, { "epoch": 1.7571631797869216, "grad_norm": 0.1331428736448288, "learning_rate": 0.0005228405637846672, "loss": 2.1957, "step": 454550 }, { "epoch": 1.7572018369903049, "grad_norm": 0.11971791088581085, "learning_rate": 0.000522705174990449, "loss": 2.1854, "step": 454560 }, { "epoch": 1.757240494193688, "grad_norm": 0.1279265582561493, "learning_rate": 0.0005225697986029934, "loss": 2.2107, "step": 454570 }, { "epoch": 1.7572791513970714, "grad_norm": 0.12536872923374176, "learning_rate": 0.0005224344346188898, "loss": 2.2119, "step": 454580 }, { "epoch": 1.7573178086004546, "grad_norm": 0.12962239980697632, "learning_rate": 0.0005222990830347299, "loss": 2.2129, "step": 454590 }, { "epoch": 1.7573564658038379, "grad_norm": 0.12700778245925903, "learning_rate": 0.0005221637438471067, "loss": 2.2072, "step": 454600 }, { "epoch": 1.7573951230072211, "grad_norm": 0.13613589107990265, "learning_rate": 0.0005220284170526146, "loss": 2.2107, "step": 454610 }, { "epoch": 1.7574337802106044, "grad_norm": 0.11709623038768768, "learning_rate": 0.0005218931026478497, "loss": 2.2034, "step": 454620 }, { "epoch": 1.7574724374139876, "grad_norm": 0.24251140654087067, "learning_rate": 0.0005217578006294097, "loss": 2.2029, "step": 454630 }, { "epoch": 1.7575110946173709, "grad_norm": 0.13349416851997375, "learning_rate": 0.0005216225109938937, "loss": 2.2222, "step": 454640 }, { "epoch": 1.7575497518207541, "grad_norm": 0.1423693746328354, "learning_rate": 0.0005214872337379025, "loss": 2.2006, "step": 454650 }, { "epoch": 1.7575884090241376, "grad_norm": 0.12413844466209412, "learning_rate": 0.0005213519688580382, "loss": 2.2021, "step": 454660 }, { "epoch": 1.7576270662275209, "grad_norm": 0.1254565715789795, "learning_rate": 0.0005212167163509049, "loss": 2.2068, "step": 454670 }, { "epoch": 1.757665723430904, "grad_norm": 0.12610286474227905, "learning_rate": 0.000521081476213108, "loss": 2.1984, "step": 454680 }, { "epoch": 1.7577043806342874, "grad_norm": 0.12193763256072998, "learning_rate": 0.0005209462484412541, "loss": 2.2037, "step": 454690 }, { "epoch": 1.7577430378376708, "grad_norm": 0.14640814065933228, "learning_rate": 0.0005208110330319522, "loss": 2.2038, "step": 454700 }, { "epoch": 1.757781695041054, "grad_norm": 0.12274169921875, "learning_rate": 0.0005206758299818122, "loss": 2.1971, "step": 454710 }, { "epoch": 1.7578203522444373, "grad_norm": 0.12056424468755722, "learning_rate": 0.0005205406392874457, "loss": 2.2161, "step": 454720 }, { "epoch": 1.7578590094478206, "grad_norm": 0.13092705607414246, "learning_rate": 0.0005204054609454655, "loss": 2.2075, "step": 454730 }, { "epoch": 1.7578976666512038, "grad_norm": 0.11801334470510483, "learning_rate": 0.0005202702949524869, "loss": 2.2114, "step": 454740 }, { "epoch": 1.757936323854587, "grad_norm": 0.1536809206008911, "learning_rate": 0.0005201351413051258, "loss": 2.2239, "step": 454750 }, { "epoch": 1.7579749810579703, "grad_norm": 0.12560512125492096, "learning_rate": 0.0005200000000000001, "loss": 2.2159, "step": 454760 }, { "epoch": 1.7580136382613536, "grad_norm": 0.13057692348957062, "learning_rate": 0.0005198648710337291, "loss": 2.2035, "step": 454770 }, { "epoch": 1.7580522954647368, "grad_norm": 0.15517479181289673, "learning_rate": 0.0005197297544029334, "loss": 2.2156, "step": 454780 }, { "epoch": 1.75809095266812, "grad_norm": 0.12616227567195892, "learning_rate": 0.0005195946501042357, "loss": 2.1918, "step": 454790 }, { "epoch": 1.7581296098715034, "grad_norm": 0.13498133420944214, "learning_rate": 0.0005194595581342603, "loss": 2.2042, "step": 454800 }, { "epoch": 1.7581682670748866, "grad_norm": 0.12483587116003036, "learning_rate": 0.0005193244784896321, "loss": 2.1974, "step": 454810 }, { "epoch": 1.7582069242782699, "grad_norm": 0.1263275444507599, "learning_rate": 0.0005191894111669784, "loss": 2.215, "step": 454820 }, { "epoch": 1.7582455814816533, "grad_norm": 0.12117032706737518, "learning_rate": 0.000519054356162928, "loss": 2.2084, "step": 454830 }, { "epoch": 1.7582842386850366, "grad_norm": 0.1304064393043518, "learning_rate": 0.0005189193134741106, "loss": 2.2004, "step": 454840 }, { "epoch": 1.7583228958884198, "grad_norm": 0.12708914279937744, "learning_rate": 0.0005187842830971581, "loss": 2.1867, "step": 454850 }, { "epoch": 1.758361553091803, "grad_norm": 0.1348961889743805, "learning_rate": 0.0005186492650287036, "loss": 2.2099, "step": 454860 }, { "epoch": 1.7584002102951866, "grad_norm": 0.13336822390556335, "learning_rate": 0.000518514259265382, "loss": 2.2037, "step": 454870 }, { "epoch": 1.7584388674985698, "grad_norm": 0.14478354156017303, "learning_rate": 0.0005183792658038294, "loss": 2.1886, "step": 454880 }, { "epoch": 1.758477524701953, "grad_norm": 0.12498698383569717, "learning_rate": 0.0005182442846406834, "loss": 2.1837, "step": 454890 }, { "epoch": 1.7585161819053363, "grad_norm": 0.12127989530563354, "learning_rate": 0.0005181093157725835, "loss": 2.1984, "step": 454900 }, { "epoch": 1.7585548391087196, "grad_norm": 0.12991923093795776, "learning_rate": 0.000517974359196171, "loss": 2.1798, "step": 454910 }, { "epoch": 1.7585934963121028, "grad_norm": 0.12539005279541016, "learning_rate": 0.0005178394149080876, "loss": 2.2072, "step": 454920 }, { "epoch": 1.758632153515486, "grad_norm": 0.12650595605373383, "learning_rate": 0.0005177044829049775, "loss": 2.1956, "step": 454930 }, { "epoch": 1.7586708107188693, "grad_norm": 0.12115669995546341, "learning_rate": 0.0005175695631834862, "loss": 2.1798, "step": 454940 }, { "epoch": 1.7587094679222526, "grad_norm": 0.1294897347688675, "learning_rate": 0.0005174346557402605, "loss": 2.2077, "step": 454950 }, { "epoch": 1.7587481251256358, "grad_norm": 0.1396237313747406, "learning_rate": 0.000517299760571949, "loss": 2.2059, "step": 454960 }, { "epoch": 1.758786782329019, "grad_norm": 0.12254752218723297, "learning_rate": 0.0005171648776752016, "loss": 2.1985, "step": 454970 }, { "epoch": 1.7588254395324023, "grad_norm": 0.1284838616847992, "learning_rate": 0.00051703000704667, "loss": 2.1999, "step": 454980 }, { "epoch": 1.7588640967357856, "grad_norm": 0.12287527322769165, "learning_rate": 0.000516895148683007, "loss": 2.1966, "step": 454990 }, { "epoch": 1.758902753939169, "grad_norm": 0.1231754943728447, "learning_rate": 0.0005167603025808674, "loss": 2.2017, "step": 455000 }, { "epoch": 1.7589414111425523, "grad_norm": 0.14277738332748413, "learning_rate": 0.0005166254687369072, "loss": 2.1753, "step": 455010 }, { "epoch": 1.7589800683459356, "grad_norm": 0.12003131955862045, "learning_rate": 0.0005164906471477841, "loss": 2.1935, "step": 455020 }, { "epoch": 1.7590187255493188, "grad_norm": 0.12798646092414856, "learning_rate": 0.0005163558378101574, "loss": 2.2036, "step": 455030 }, { "epoch": 1.7590573827527023, "grad_norm": 0.12923835217952728, "learning_rate": 0.0005162210407206873, "loss": 2.2028, "step": 455040 }, { "epoch": 1.7590960399560855, "grad_norm": 0.1290251612663269, "learning_rate": 0.0005160862558760364, "loss": 2.1966, "step": 455050 }, { "epoch": 1.7591346971594688, "grad_norm": 0.12686462700366974, "learning_rate": 0.0005159514832728682, "loss": 2.1959, "step": 455060 }, { "epoch": 1.759173354362852, "grad_norm": 0.12811657786369324, "learning_rate": 0.0005158167229078478, "loss": 2.1898, "step": 455070 }, { "epoch": 1.7592120115662353, "grad_norm": 0.11983395367860794, "learning_rate": 0.0005156819747776422, "loss": 2.2102, "step": 455080 }, { "epoch": 1.7592506687696186, "grad_norm": 0.15635327994823456, "learning_rate": 0.0005155472388789196, "loss": 2.2058, "step": 455090 }, { "epoch": 1.7592893259730018, "grad_norm": 0.12226098775863647, "learning_rate": 0.0005154125152083491, "loss": 2.1989, "step": 455100 }, { "epoch": 1.759327983176385, "grad_norm": 0.11969849467277527, "learning_rate": 0.0005152778037626029, "loss": 2.2051, "step": 455110 }, { "epoch": 1.7593666403797683, "grad_norm": 0.11924776434898376, "learning_rate": 0.0005151431045383533, "loss": 2.2042, "step": 455120 }, { "epoch": 1.7594052975831516, "grad_norm": 0.11855210363864899, "learning_rate": 0.0005150084175322744, "loss": 2.1958, "step": 455130 }, { "epoch": 1.7594439547865348, "grad_norm": 0.122954361140728, "learning_rate": 0.0005148737427410422, "loss": 2.2136, "step": 455140 }, { "epoch": 1.759482611989918, "grad_norm": 0.11402136832475662, "learning_rate": 0.0005147390801613341, "loss": 2.1967, "step": 455150 }, { "epoch": 1.7595212691933013, "grad_norm": 0.12220233678817749, "learning_rate": 0.0005146044297898287, "loss": 2.2009, "step": 455160 }, { "epoch": 1.7595599263966848, "grad_norm": 0.1369936317205429, "learning_rate": 0.0005144697916232064, "loss": 2.2066, "step": 455170 }, { "epoch": 1.759598583600068, "grad_norm": 0.13836544752120972, "learning_rate": 0.000514335165658149, "loss": 2.1852, "step": 455180 }, { "epoch": 1.7596372408034513, "grad_norm": 0.13634607195854187, "learning_rate": 0.0005142005518913395, "loss": 2.2191, "step": 455190 }, { "epoch": 1.7596758980068345, "grad_norm": 0.12321195751428604, "learning_rate": 0.0005140659503194632, "loss": 2.1931, "step": 455200 }, { "epoch": 1.759714555210218, "grad_norm": 0.13414765894412994, "learning_rate": 0.000513931360939206, "loss": 2.2071, "step": 455210 }, { "epoch": 1.7597532124136013, "grad_norm": 0.1208004504442215, "learning_rate": 0.0005137967837472562, "loss": 2.2096, "step": 455220 }, { "epoch": 1.7597918696169845, "grad_norm": 0.13674625754356384, "learning_rate": 0.0005136622187403026, "loss": 2.1978, "step": 455230 }, { "epoch": 1.7598305268203678, "grad_norm": 0.13066132366657257, "learning_rate": 0.0005135276659150363, "loss": 2.2067, "step": 455240 }, { "epoch": 1.759869184023751, "grad_norm": 0.12996459007263184, "learning_rate": 0.0005133931252681494, "loss": 2.2065, "step": 455250 }, { "epoch": 1.7599078412271343, "grad_norm": 0.14105352759361267, "learning_rate": 0.0005132585967963359, "loss": 2.194, "step": 455260 }, { "epoch": 1.7599464984305175, "grad_norm": 0.12938223779201508, "learning_rate": 0.0005131240804962911, "loss": 2.1989, "step": 455270 }, { "epoch": 1.7599851556339008, "grad_norm": 0.12563085556030273, "learning_rate": 0.0005129895763647116, "loss": 2.2171, "step": 455280 }, { "epoch": 1.760023812837284, "grad_norm": 0.13781751692295074, "learning_rate": 0.0005128550843982957, "loss": 2.193, "step": 455290 }, { "epoch": 1.7600624700406673, "grad_norm": 0.1279023140668869, "learning_rate": 0.0005127206045937434, "loss": 2.2161, "step": 455300 }, { "epoch": 1.7601011272440505, "grad_norm": 0.1227601021528244, "learning_rate": 0.0005125861369477557, "loss": 2.203, "step": 455310 }, { "epoch": 1.7601397844474338, "grad_norm": 0.14425310492515564, "learning_rate": 0.0005124516814570359, "loss": 2.1953, "step": 455320 }, { "epoch": 1.760178441650817, "grad_norm": 0.1337219476699829, "learning_rate": 0.0005123172381182874, "loss": 2.1978, "step": 455330 }, { "epoch": 1.7602170988542005, "grad_norm": 0.11427665501832962, "learning_rate": 0.0005121828069282169, "loss": 2.1928, "step": 455340 }, { "epoch": 1.7602557560575838, "grad_norm": 0.12122835963964462, "learning_rate": 0.0005120483878835307, "loss": 2.2033, "step": 455350 }, { "epoch": 1.760294413260967, "grad_norm": 0.12823830544948578, "learning_rate": 0.0005119139809809381, "loss": 2.205, "step": 455360 }, { "epoch": 1.7603330704643503, "grad_norm": 0.12689967453479767, "learning_rate": 0.000511779586217149, "loss": 2.2061, "step": 455370 }, { "epoch": 1.7603717276677338, "grad_norm": 0.1314159780740738, "learning_rate": 0.0005116452035888755, "loss": 2.2063, "step": 455380 }, { "epoch": 1.760410384871117, "grad_norm": 0.12121304869651794, "learning_rate": 0.0005115108330928305, "loss": 2.2017, "step": 455390 }, { "epoch": 1.7604490420745003, "grad_norm": 0.1249544620513916, "learning_rate": 0.0005113764747257284, "loss": 2.1912, "step": 455400 }, { "epoch": 1.7604876992778835, "grad_norm": 0.13628491759300232, "learning_rate": 0.0005112421284842857, "loss": 2.2247, "step": 455410 }, { "epoch": 1.7605263564812668, "grad_norm": 0.1296730488538742, "learning_rate": 0.00051110779436522, "loss": 2.2115, "step": 455420 }, { "epoch": 1.76056501368465, "grad_norm": 0.12934458255767822, "learning_rate": 0.0005109734723652504, "loss": 2.1927, "step": 455430 }, { "epoch": 1.7606036708880333, "grad_norm": 0.13591471314430237, "learning_rate": 0.0005108391624810973, "loss": 2.2021, "step": 455440 }, { "epoch": 1.7606423280914165, "grad_norm": 0.132755845785141, "learning_rate": 0.0005107048647094828, "loss": 2.2093, "step": 455450 }, { "epoch": 1.7606809852947998, "grad_norm": 0.13273707032203674, "learning_rate": 0.0005105705790471307, "loss": 2.2104, "step": 455460 }, { "epoch": 1.760719642498183, "grad_norm": 0.12009256333112717, "learning_rate": 0.0005104363054907655, "loss": 2.2, "step": 455470 }, { "epoch": 1.7607582997015663, "grad_norm": 0.12511485815048218, "learning_rate": 0.0005103020440371143, "loss": 2.1958, "step": 455480 }, { "epoch": 1.7607969569049495, "grad_norm": 0.14407357573509216, "learning_rate": 0.0005101677946829046, "loss": 2.2029, "step": 455490 }, { "epoch": 1.7608356141083328, "grad_norm": 0.12807932496070862, "learning_rate": 0.000510033557424866, "loss": 2.1967, "step": 455500 }, { "epoch": 1.7608742713117163, "grad_norm": 0.1200571283698082, "learning_rate": 0.0005098993322597294, "loss": 2.2116, "step": 455510 }, { "epoch": 1.7609129285150995, "grad_norm": 0.1264566034078598, "learning_rate": 0.0005097651191842274, "loss": 2.1993, "step": 455520 }, { "epoch": 1.7609515857184828, "grad_norm": 0.17501239478588104, "learning_rate": 0.0005096309181950936, "loss": 2.2031, "step": 455530 }, { "epoch": 1.760990242921866, "grad_norm": 0.12370411306619644, "learning_rate": 0.0005094967292890633, "loss": 2.2032, "step": 455540 }, { "epoch": 1.7610289001252495, "grad_norm": 0.12493716180324554, "learning_rate": 0.0005093625524628734, "loss": 2.196, "step": 455550 }, { "epoch": 1.7610675573286327, "grad_norm": 0.12528210878372192, "learning_rate": 0.0005092283877132622, "loss": 2.202, "step": 455560 }, { "epoch": 1.761106214532016, "grad_norm": 0.12978526949882507, "learning_rate": 0.0005090942350369691, "loss": 2.1923, "step": 455570 }, { "epoch": 1.7611448717353992, "grad_norm": 0.12521392107009888, "learning_rate": 0.0005089600944307359, "loss": 2.2155, "step": 455580 }, { "epoch": 1.7611835289387825, "grad_norm": 0.12959663569927216, "learning_rate": 0.0005088259658913048, "loss": 2.1988, "step": 455590 }, { "epoch": 1.7612221861421657, "grad_norm": 0.1173701286315918, "learning_rate": 0.00050869184941542, "loss": 2.2132, "step": 455600 }, { "epoch": 1.761260843345549, "grad_norm": 0.13058489561080933, "learning_rate": 0.0005085577449998273, "loss": 2.2075, "step": 455610 }, { "epoch": 1.7612995005489323, "grad_norm": 0.12429569661617279, "learning_rate": 0.0005084236526412735, "loss": 2.1938, "step": 455620 }, { "epoch": 1.7613381577523155, "grad_norm": 0.1280798465013504, "learning_rate": 0.0005082895723365073, "loss": 2.1824, "step": 455630 }, { "epoch": 1.7613768149556988, "grad_norm": 0.11434909701347351, "learning_rate": 0.0005081555040822788, "loss": 2.2017, "step": 455640 }, { "epoch": 1.761415472159082, "grad_norm": 0.13100238144397736, "learning_rate": 0.0005080214478753389, "loss": 2.1949, "step": 455650 }, { "epoch": 1.7614541293624653, "grad_norm": 0.12328257411718369, "learning_rate": 0.000507887403712441, "loss": 2.1991, "step": 455660 }, { "epoch": 1.7614927865658487, "grad_norm": 0.1999645233154297, "learning_rate": 0.0005077533715903393, "loss": 2.205, "step": 455670 }, { "epoch": 1.761531443769232, "grad_norm": 0.12685048580169678, "learning_rate": 0.0005076193515057896, "loss": 2.1929, "step": 455680 }, { "epoch": 1.7615701009726152, "grad_norm": 0.11934152245521545, "learning_rate": 0.0005074853434555493, "loss": 2.22, "step": 455690 }, { "epoch": 1.7616087581759985, "grad_norm": 0.11731681227684021, "learning_rate": 0.0005073513474363767, "loss": 2.2034, "step": 455700 }, { "epoch": 1.7616474153793817, "grad_norm": 0.12497756630182266, "learning_rate": 0.0005072173634450326, "loss": 2.2034, "step": 455710 }, { "epoch": 1.7616860725827652, "grad_norm": 0.12380430847406387, "learning_rate": 0.0005070833914782781, "loss": 2.1856, "step": 455720 }, { "epoch": 1.7617247297861485, "grad_norm": 0.12082111090421677, "learning_rate": 0.0005069494315328766, "loss": 2.2045, "step": 455730 }, { "epoch": 1.7617633869895317, "grad_norm": 0.12536385655403137, "learning_rate": 0.0005068154836055927, "loss": 2.1895, "step": 455740 }, { "epoch": 1.761802044192915, "grad_norm": 0.12240228801965714, "learning_rate": 0.000506681547693192, "loss": 2.1965, "step": 455750 }, { "epoch": 1.7618407013962982, "grad_norm": 0.1273859590291977, "learning_rate": 0.0005065476237924425, "loss": 2.2206, "step": 455760 }, { "epoch": 1.7618793585996815, "grad_norm": 0.131776824593544, "learning_rate": 0.0005064137119001127, "loss": 2.2186, "step": 455770 }, { "epoch": 1.7619180158030647, "grad_norm": 0.12719875574111938, "learning_rate": 0.0005062798120129728, "loss": 2.2156, "step": 455780 }, { "epoch": 1.761956673006448, "grad_norm": 0.13013708591461182, "learning_rate": 0.0005061459241277951, "loss": 2.2029, "step": 455790 }, { "epoch": 1.7619953302098312, "grad_norm": 0.13835778832435608, "learning_rate": 0.0005060120482413521, "loss": 2.1965, "step": 455800 }, { "epoch": 1.7620339874132145, "grad_norm": 0.12558209896087646, "learning_rate": 0.0005058781843504192, "loss": 2.198, "step": 455810 }, { "epoch": 1.7620726446165977, "grad_norm": 0.1262013167142868, "learning_rate": 0.0005057443324517721, "loss": 2.2012, "step": 455820 }, { "epoch": 1.762111301819981, "grad_norm": 0.12975828349590302, "learning_rate": 0.0005056104925421888, "loss": 2.2104, "step": 455830 }, { "epoch": 1.7621499590233645, "grad_norm": 0.13392338156700134, "learning_rate": 0.0005054766646184477, "loss": 2.1919, "step": 455840 }, { "epoch": 1.7621886162267477, "grad_norm": 0.1283750832080841, "learning_rate": 0.0005053428486773297, "loss": 2.2028, "step": 455850 }, { "epoch": 1.762227273430131, "grad_norm": 0.12651102244853973, "learning_rate": 0.0005052090447156164, "loss": 2.2032, "step": 455860 }, { "epoch": 1.7622659306335142, "grad_norm": 0.12298119813203812, "learning_rate": 0.0005050752527300914, "loss": 2.2079, "step": 455870 }, { "epoch": 1.7623045878368975, "grad_norm": 0.13039034605026245, "learning_rate": 0.0005049414727175394, "loss": 2.2161, "step": 455880 }, { "epoch": 1.762343245040281, "grad_norm": 0.1351780742406845, "learning_rate": 0.0005048077046747465, "loss": 2.1956, "step": 455890 }, { "epoch": 1.7623819022436642, "grad_norm": 0.13028337061405182, "learning_rate": 0.0005046739485985004, "loss": 2.1939, "step": 455900 }, { "epoch": 1.7624205594470475, "grad_norm": 0.12082945555448532, "learning_rate": 0.0005045402044855903, "loss": 2.1899, "step": 455910 }, { "epoch": 1.7624592166504307, "grad_norm": 0.1275576949119568, "learning_rate": 0.0005044064723328065, "loss": 2.2041, "step": 455920 }, { "epoch": 1.762497873853814, "grad_norm": 0.12402354925870895, "learning_rate": 0.0005042727521369412, "loss": 2.1986, "step": 455930 }, { "epoch": 1.7625365310571972, "grad_norm": 0.12115433067083359, "learning_rate": 0.0005041390438947877, "loss": 2.2154, "step": 455940 }, { "epoch": 1.7625751882605805, "grad_norm": 0.11511493474245071, "learning_rate": 0.0005040053476031408, "loss": 2.1881, "step": 455950 }, { "epoch": 1.7626138454639637, "grad_norm": 0.12377490848302841, "learning_rate": 0.0005038716632587965, "loss": 2.211, "step": 455960 }, { "epoch": 1.762652502667347, "grad_norm": 0.1281753033399582, "learning_rate": 0.0005037379908585528, "loss": 2.2096, "step": 455970 }, { "epoch": 1.7626911598707302, "grad_norm": 0.13222567737102509, "learning_rate": 0.000503604330399209, "loss": 2.2073, "step": 455980 }, { "epoch": 1.7627298170741135, "grad_norm": 0.12830236554145813, "learning_rate": 0.0005034706818775652, "loss": 2.1993, "step": 455990 }, { "epoch": 1.7627684742774967, "grad_norm": 0.12811852991580963, "learning_rate": 0.0005033370452904234, "loss": 2.2049, "step": 456000 }, { "epoch": 1.7628071314808802, "grad_norm": 0.1232878714799881, "learning_rate": 0.0005032034206345872, "loss": 2.2021, "step": 456010 }, { "epoch": 1.7628457886842634, "grad_norm": 0.14702750742435455, "learning_rate": 0.0005030698079068616, "loss": 2.1981, "step": 456020 }, { "epoch": 1.7628844458876467, "grad_norm": 0.12906567752361298, "learning_rate": 0.0005029362071040526, "loss": 2.1987, "step": 456030 }, { "epoch": 1.76292310309103, "grad_norm": 0.5849501490592957, "learning_rate": 0.0005028026182229679, "loss": 2.1847, "step": 456040 }, { "epoch": 1.7629617602944134, "grad_norm": 0.13289207220077515, "learning_rate": 0.0005026690412604167, "loss": 2.2058, "step": 456050 }, { "epoch": 1.7630004174977967, "grad_norm": 0.1222372055053711, "learning_rate": 0.0005025354762132093, "loss": 2.1919, "step": 456060 }, { "epoch": 1.76303907470118, "grad_norm": 0.12752196192741394, "learning_rate": 0.0005024019230781578, "loss": 2.217, "step": 456070 }, { "epoch": 1.7630777319045632, "grad_norm": 0.12730394303798676, "learning_rate": 0.0005022683818520757, "loss": 2.2028, "step": 456080 }, { "epoch": 1.7631163891079464, "grad_norm": 0.12045823037624359, "learning_rate": 0.0005021348525317775, "loss": 2.2016, "step": 456090 }, { "epoch": 1.7631550463113297, "grad_norm": 0.12794707715511322, "learning_rate": 0.0005020013351140796, "loss": 2.1824, "step": 456100 }, { "epoch": 1.763193703514713, "grad_norm": 0.11931639164686203, "learning_rate": 0.0005018678295957996, "loss": 2.1882, "step": 456110 }, { "epoch": 1.7632323607180962, "grad_norm": 0.12232300639152527, "learning_rate": 0.0005017343359737565, "loss": 2.216, "step": 456120 }, { "epoch": 1.7632710179214794, "grad_norm": 0.12490812689065933, "learning_rate": 0.000501600854244771, "loss": 2.1968, "step": 456130 }, { "epoch": 1.7633096751248627, "grad_norm": 0.12569020688533783, "learning_rate": 0.0005014673844056648, "loss": 2.1944, "step": 456140 }, { "epoch": 1.763348332328246, "grad_norm": 0.13292300701141357, "learning_rate": 0.0005013339264532609, "loss": 2.2009, "step": 456150 }, { "epoch": 1.7633869895316292, "grad_norm": 0.12760011851787567, "learning_rate": 0.0005012004803843843, "loss": 2.2126, "step": 456160 }, { "epoch": 1.7634256467350125, "grad_norm": 0.12860462069511414, "learning_rate": 0.0005010670461958613, "loss": 2.2051, "step": 456170 }, { "epoch": 1.763464303938396, "grad_norm": 0.11949852854013443, "learning_rate": 0.0005009336238845192, "loss": 2.1977, "step": 456180 }, { "epoch": 1.7635029611417792, "grad_norm": 0.13162904977798462, "learning_rate": 0.0005008002134471871, "loss": 2.198, "step": 456190 }, { "epoch": 1.7635416183451624, "grad_norm": 0.1294323205947876, "learning_rate": 0.000500666814880695, "loss": 2.203, "step": 456200 }, { "epoch": 1.7635802755485457, "grad_norm": 0.13341623544692993, "learning_rate": 0.000500533428181875, "loss": 2.1982, "step": 456210 }, { "epoch": 1.7636189327519292, "grad_norm": 0.1469571441411972, "learning_rate": 0.0005004000533475603, "loss": 2.2157, "step": 456220 }, { "epoch": 1.7636575899553124, "grad_norm": 0.14060497283935547, "learning_rate": 0.0005002666903745854, "loss": 2.1957, "step": 456230 }, { "epoch": 1.7636962471586957, "grad_norm": 0.13437320291996002, "learning_rate": 0.000500133339259786, "loss": 2.1918, "step": 456240 }, { "epoch": 1.763734904362079, "grad_norm": 0.11874886602163315, "learning_rate": 0.0005, "loss": 2.201, "step": 456250 }, { "epoch": 1.7637735615654622, "grad_norm": 0.13021759688854218, "learning_rate": 0.000499866672592066, "loss": 2.2052, "step": 456260 }, { "epoch": 1.7638122187688454, "grad_norm": 0.13554084300994873, "learning_rate": 0.0004997333570328239, "loss": 2.1993, "step": 456270 }, { "epoch": 1.7638508759722287, "grad_norm": 0.12686790525913239, "learning_rate": 0.0004996000533191158, "loss": 2.2008, "step": 456280 }, { "epoch": 1.763889533175612, "grad_norm": 0.14258117973804474, "learning_rate": 0.0004994667614477846, "loss": 2.1982, "step": 456290 }, { "epoch": 1.7639281903789952, "grad_norm": 0.1296672224998474, "learning_rate": 0.0004993334814156745, "loss": 2.1993, "step": 456300 }, { "epoch": 1.7639668475823784, "grad_norm": 0.14639268815517426, "learning_rate": 0.0004992002132196314, "loss": 2.1921, "step": 456310 }, { "epoch": 1.7640055047857617, "grad_norm": 0.15819869935512543, "learning_rate": 0.0004990669568565025, "loss": 2.1994, "step": 456320 }, { "epoch": 1.764044161989145, "grad_norm": 0.1342620998620987, "learning_rate": 0.0004989337123231366, "loss": 2.188, "step": 456330 }, { "epoch": 1.7640828191925282, "grad_norm": 0.11846864968538284, "learning_rate": 0.0004988004796163836, "loss": 2.184, "step": 456340 }, { "epoch": 1.7641214763959117, "grad_norm": 0.12967292964458466, "learning_rate": 0.0004986672587330949, "loss": 2.2067, "step": 456350 }, { "epoch": 1.764160133599295, "grad_norm": 0.12366357445716858, "learning_rate": 0.0004985340496701232, "loss": 2.1873, "step": 456360 }, { "epoch": 1.7641987908026782, "grad_norm": 0.1255825161933899, "learning_rate": 0.0004984008524243228, "loss": 2.2074, "step": 456370 }, { "epoch": 1.7642374480060614, "grad_norm": 0.1342882215976715, "learning_rate": 0.0004982676669925497, "loss": 2.1949, "step": 456380 }, { "epoch": 1.764276105209445, "grad_norm": 0.14513757824897766, "learning_rate": 0.0004981344933716603, "loss": 2.1977, "step": 456390 }, { "epoch": 1.7643147624128281, "grad_norm": 0.1229807436466217, "learning_rate": 0.000498001331558513, "loss": 2.1863, "step": 456400 }, { "epoch": 1.7643534196162114, "grad_norm": 0.1304650902748108, "learning_rate": 0.0004978681815499679, "loss": 2.1815, "step": 456410 }, { "epoch": 1.7643920768195946, "grad_norm": 0.12147001177072525, "learning_rate": 0.0004977350433428863, "loss": 2.2013, "step": 456420 }, { "epoch": 1.764430734022978, "grad_norm": 0.12276685982942581, "learning_rate": 0.0004976019169341302, "loss": 2.2108, "step": 456430 }, { "epoch": 1.7644693912263612, "grad_norm": 0.13635388016700745, "learning_rate": 0.000497468802320564, "loss": 2.1976, "step": 456440 }, { "epoch": 1.7645080484297444, "grad_norm": 0.13049404323101044, "learning_rate": 0.0004973356994990532, "loss": 2.195, "step": 456450 }, { "epoch": 1.7645467056331277, "grad_norm": 0.12726396322250366, "learning_rate": 0.000497202608466464, "loss": 2.2044, "step": 456460 }, { "epoch": 1.764585362836511, "grad_norm": 0.1332763433456421, "learning_rate": 0.0004970695292196648, "loss": 2.1888, "step": 456470 }, { "epoch": 1.7646240200398942, "grad_norm": 0.28297457098960876, "learning_rate": 0.0004969364617555252, "loss": 2.1922, "step": 456480 }, { "epoch": 1.7646626772432774, "grad_norm": 0.1367010772228241, "learning_rate": 0.0004968034060709159, "loss": 2.1988, "step": 456490 }, { "epoch": 1.7647013344466607, "grad_norm": 0.12271040678024292, "learning_rate": 0.0004966703621627094, "loss": 2.1955, "step": 456500 }, { "epoch": 1.764739991650044, "grad_norm": 0.14147435128688812, "learning_rate": 0.0004965373300277789, "loss": 2.1992, "step": 456510 }, { "epoch": 1.7647786488534274, "grad_norm": 0.1327211707830429, "learning_rate": 0.0004964043096629998, "loss": 2.2041, "step": 456520 }, { "epoch": 1.7648173060568106, "grad_norm": 0.12535947561264038, "learning_rate": 0.0004962713010652486, "loss": 2.1888, "step": 456530 }, { "epoch": 1.764855963260194, "grad_norm": 0.13628923892974854, "learning_rate": 0.0004961383042314031, "loss": 2.2029, "step": 456540 }, { "epoch": 1.7648946204635771, "grad_norm": 0.12428577244281769, "learning_rate": 0.0004960053191583422, "loss": 2.1966, "step": 456550 }, { "epoch": 1.7649332776669606, "grad_norm": 0.1212974563241005, "learning_rate": 0.0004958723458429468, "loss": 2.1948, "step": 456560 }, { "epoch": 1.7649719348703439, "grad_norm": 0.12550272047519684, "learning_rate": 0.0004957393842820986, "loss": 2.2202, "step": 456570 }, { "epoch": 1.7650105920737271, "grad_norm": 0.12495537847280502, "learning_rate": 0.0004956064344726809, "loss": 2.2067, "step": 456580 }, { "epoch": 1.7650492492771104, "grad_norm": 0.12538939714431763, "learning_rate": 0.0004954734964115788, "loss": 2.2029, "step": 456590 }, { "epoch": 1.7650879064804936, "grad_norm": 0.11974082887172699, "learning_rate": 0.000495340570095678, "loss": 2.1994, "step": 456600 }, { "epoch": 1.7651265636838769, "grad_norm": 0.12425250560045242, "learning_rate": 0.0004952076555218656, "loss": 2.205, "step": 456610 }, { "epoch": 1.7651652208872601, "grad_norm": 0.13043095171451569, "learning_rate": 0.0004950747526870312, "loss": 2.188, "step": 456620 }, { "epoch": 1.7652038780906434, "grad_norm": 0.127410888671875, "learning_rate": 0.0004949418615880647, "loss": 2.1993, "step": 456630 }, { "epoch": 1.7652425352940266, "grad_norm": 0.1242605522274971, "learning_rate": 0.0004948089822218576, "loss": 2.1888, "step": 456640 }, { "epoch": 1.76528119249741, "grad_norm": 0.125991091132164, "learning_rate": 0.0004946761145853029, "loss": 2.2001, "step": 456650 }, { "epoch": 1.7653198497007931, "grad_norm": 0.14134454727172852, "learning_rate": 0.0004945432586752949, "loss": 2.1943, "step": 456660 }, { "epoch": 1.7653585069041764, "grad_norm": 0.13556590676307678, "learning_rate": 0.0004944104144887294, "loss": 2.1929, "step": 456670 }, { "epoch": 1.7653971641075596, "grad_norm": 0.12267480045557022, "learning_rate": 0.0004942775820225032, "loss": 2.2012, "step": 456680 }, { "epoch": 1.7654358213109431, "grad_norm": 0.12042094767093658, "learning_rate": 0.0004941447612735148, "loss": 2.1937, "step": 456690 }, { "epoch": 1.7654744785143264, "grad_norm": 0.13384388387203217, "learning_rate": 0.0004940119522386642, "loss": 2.1987, "step": 456700 }, { "epoch": 1.7655131357177096, "grad_norm": 0.12269002944231033, "learning_rate": 0.000493879154914852, "loss": 2.2075, "step": 456710 }, { "epoch": 1.7655517929210929, "grad_norm": 0.12442340701818466, "learning_rate": 0.0004937463692989815, "loss": 2.2074, "step": 456720 }, { "epoch": 1.7655904501244764, "grad_norm": 0.1305408775806427, "learning_rate": 0.0004936135953879561, "loss": 2.1942, "step": 456730 }, { "epoch": 1.7656291073278596, "grad_norm": 0.12869402766227722, "learning_rate": 0.0004934808331786813, "loss": 2.1924, "step": 456740 }, { "epoch": 1.7656677645312429, "grad_norm": 0.13615325093269348, "learning_rate": 0.0004933480826680636, "loss": 2.2168, "step": 456750 }, { "epoch": 1.765706421734626, "grad_norm": 0.13996773958206177, "learning_rate": 0.0004932153438530109, "loss": 2.1946, "step": 456760 }, { "epoch": 1.7657450789380094, "grad_norm": 0.124326691031456, "learning_rate": 0.0004930826167304327, "loss": 2.195, "step": 456770 }, { "epoch": 1.7657837361413926, "grad_norm": 0.12879958748817444, "learning_rate": 0.0004929499012972397, "loss": 2.2016, "step": 456780 }, { "epoch": 1.7658223933447759, "grad_norm": 0.1394444704055786, "learning_rate": 0.0004928171975503437, "loss": 2.2002, "step": 456790 }, { "epoch": 1.7658610505481591, "grad_norm": 0.1241043284535408, "learning_rate": 0.0004926845054866582, "loss": 2.2017, "step": 456800 }, { "epoch": 1.7658997077515424, "grad_norm": 0.12370163947343826, "learning_rate": 0.0004925518251030983, "loss": 2.1875, "step": 456810 }, { "epoch": 1.7659383649549256, "grad_norm": 0.13047485053539276, "learning_rate": 0.0004924191563965798, "loss": 2.2002, "step": 456820 }, { "epoch": 1.7659770221583089, "grad_norm": 0.13031162321567535, "learning_rate": 0.0004922864993640204, "loss": 2.19, "step": 456830 }, { "epoch": 1.7660156793616921, "grad_norm": 0.14417153596878052, "learning_rate": 0.000492153854002339, "loss": 2.2101, "step": 456840 }, { "epoch": 1.7660543365650754, "grad_norm": 0.12199871242046356, "learning_rate": 0.0004920212203084553, "loss": 2.2007, "step": 456850 }, { "epoch": 1.7660929937684589, "grad_norm": 0.12932102382183075, "learning_rate": 0.0004918885982792916, "loss": 2.1939, "step": 456860 }, { "epoch": 1.766131650971842, "grad_norm": 0.12995269894599915, "learning_rate": 0.0004917559879117704, "loss": 2.1926, "step": 456870 }, { "epoch": 1.7661703081752254, "grad_norm": 0.2126019448041916, "learning_rate": 0.0004916233892028159, "loss": 2.1925, "step": 456880 }, { "epoch": 1.7662089653786086, "grad_norm": 0.12991484999656677, "learning_rate": 0.0004914908021493538, "loss": 2.1984, "step": 456890 }, { "epoch": 1.766247622581992, "grad_norm": 0.13209208846092224, "learning_rate": 0.0004913582267483112, "loss": 2.1932, "step": 456900 }, { "epoch": 1.7662862797853753, "grad_norm": 0.12427953630685806, "learning_rate": 0.0004912256629966163, "loss": 2.1839, "step": 456910 }, { "epoch": 1.7663249369887586, "grad_norm": 0.1290198415517807, "learning_rate": 0.0004910931108911986, "loss": 2.2005, "step": 456920 }, { "epoch": 1.7663635941921418, "grad_norm": 0.12674565613269806, "learning_rate": 0.0004909605704289897, "loss": 2.1965, "step": 456930 }, { "epoch": 1.766402251395525, "grad_norm": 0.13432876765727997, "learning_rate": 0.0004908280416069215, "loss": 2.2038, "step": 456940 }, { "epoch": 1.7664409085989083, "grad_norm": 0.12446749210357666, "learning_rate": 0.0004906955244219275, "loss": 2.2015, "step": 456950 }, { "epoch": 1.7664795658022916, "grad_norm": 0.13796383142471313, "learning_rate": 0.0004905630188709434, "loss": 2.1983, "step": 456960 }, { "epoch": 1.7665182230056748, "grad_norm": 0.14042441546916962, "learning_rate": 0.0004904305249509052, "loss": 2.2075, "step": 456970 }, { "epoch": 1.766556880209058, "grad_norm": 0.12553489208221436, "learning_rate": 0.0004902980426587505, "loss": 2.1971, "step": 456980 }, { "epoch": 1.7665955374124414, "grad_norm": 0.15119601786136627, "learning_rate": 0.0004901655719914186, "loss": 2.1954, "step": 456990 }, { "epoch": 1.7666341946158246, "grad_norm": 0.14849050343036652, "learning_rate": 0.0004900331129458502, "loss": 2.2048, "step": 457000 }, { "epoch": 1.7666728518192079, "grad_norm": 0.13672266900539398, "learning_rate": 0.0004899006655189864, "loss": 2.2095, "step": 457010 }, { "epoch": 1.766711509022591, "grad_norm": 0.12951894104480743, "learning_rate": 0.000489768229707771, "loss": 2.1923, "step": 457020 }, { "epoch": 1.7667501662259746, "grad_norm": 0.13723427057266235, "learning_rate": 0.0004896358055091481, "loss": 2.1984, "step": 457030 }, { "epoch": 1.7667888234293578, "grad_norm": 0.12522564828395844, "learning_rate": 0.0004895033929200634, "loss": 2.1975, "step": 457040 }, { "epoch": 1.766827480632741, "grad_norm": 0.133930966258049, "learning_rate": 0.0004893709919374645, "loss": 2.1949, "step": 457050 }, { "epoch": 1.7668661378361243, "grad_norm": 0.1456451416015625, "learning_rate": 0.0004892386025582993, "loss": 2.1979, "step": 457060 }, { "epoch": 1.7669047950395078, "grad_norm": 0.13085532188415527, "learning_rate": 0.0004891062247795181, "loss": 2.2061, "step": 457070 }, { "epoch": 1.766943452242891, "grad_norm": 0.13043569028377533, "learning_rate": 0.0004889738585980718, "loss": 2.1919, "step": 457080 }, { "epoch": 1.7669821094462743, "grad_norm": 0.1371423453092575, "learning_rate": 0.0004888415040109129, "loss": 2.21, "step": 457090 }, { "epoch": 1.7670207666496576, "grad_norm": 0.12515562772750854, "learning_rate": 0.0004887091610149952, "loss": 2.1925, "step": 457100 }, { "epoch": 1.7670594238530408, "grad_norm": 0.16014552116394043, "learning_rate": 0.0004885768296072737, "loss": 2.2099, "step": 457110 }, { "epoch": 1.767098081056424, "grad_norm": 0.13860388100147247, "learning_rate": 0.0004884445097847052, "loss": 2.1975, "step": 457120 }, { "epoch": 1.7671367382598073, "grad_norm": 0.12750791013240814, "learning_rate": 0.0004883122015442474, "loss": 2.1925, "step": 457130 }, { "epoch": 1.7671753954631906, "grad_norm": 0.12645824253559113, "learning_rate": 0.00048817990488285944, "loss": 2.1883, "step": 457140 }, { "epoch": 1.7672140526665738, "grad_norm": 0.12171449512243271, "learning_rate": 0.00048804761979750165, "loss": 2.2046, "step": 457150 }, { "epoch": 1.767252709869957, "grad_norm": 0.13183607161045074, "learning_rate": 0.000487915346285136, "loss": 2.2009, "step": 457160 }, { "epoch": 1.7672913670733403, "grad_norm": 0.1332433670759201, "learning_rate": 0.00048778308434272577, "loss": 2.1939, "step": 457170 }, { "epoch": 1.7673300242767236, "grad_norm": 0.12962956726551056, "learning_rate": 0.00048765083396723496, "loss": 2.2094, "step": 457180 }, { "epoch": 1.7673686814801068, "grad_norm": 0.1359495371580124, "learning_rate": 0.0004875185951556298, "loss": 2.1924, "step": 457190 }, { "epoch": 1.7674073386834903, "grad_norm": 0.13195596635341644, "learning_rate": 0.0004873863679048771, "loss": 2.1995, "step": 457200 }, { "epoch": 1.7674459958868736, "grad_norm": 0.13214111328125, "learning_rate": 0.00048725415221194537, "loss": 2.1957, "step": 457210 }, { "epoch": 1.7674846530902568, "grad_norm": 0.13750267028808594, "learning_rate": 0.0004871219480738045, "loss": 2.1915, "step": 457220 }, { "epoch": 1.76752331029364, "grad_norm": 0.13220281898975372, "learning_rate": 0.0004869897554874256, "loss": 2.1841, "step": 457230 }, { "epoch": 1.7675619674970235, "grad_norm": 0.12308496981859207, "learning_rate": 0.000486857574449781, "loss": 2.1852, "step": 457240 }, { "epoch": 1.7676006247004068, "grad_norm": 0.11604126542806625, "learning_rate": 0.00048672540495784447, "loss": 2.1906, "step": 457250 }, { "epoch": 1.76763928190379, "grad_norm": 0.12625819444656372, "learning_rate": 0.000486593247008591, "loss": 2.1857, "step": 457260 }, { "epoch": 1.7676779391071733, "grad_norm": 0.13055488467216492, "learning_rate": 0.00048646110059899695, "loss": 2.1866, "step": 457270 }, { "epoch": 1.7677165963105566, "grad_norm": 0.12539438903331757, "learning_rate": 0.0004863289657260399, "loss": 2.2114, "step": 457280 }, { "epoch": 1.7677552535139398, "grad_norm": 0.12512290477752686, "learning_rate": 0.00048619684238669934, "loss": 2.1954, "step": 457290 }, { "epoch": 1.767793910717323, "grad_norm": 0.14459888637065887, "learning_rate": 0.0004860647305779551, "loss": 2.2088, "step": 457300 }, { "epoch": 1.7678325679207063, "grad_norm": 0.13872483372688293, "learning_rate": 0.0004859326302967888, "loss": 2.179, "step": 457310 }, { "epoch": 1.7678712251240896, "grad_norm": 0.13036368787288666, "learning_rate": 0.00048580054154018405, "loss": 2.1955, "step": 457320 }, { "epoch": 1.7679098823274728, "grad_norm": 0.128155916929245, "learning_rate": 0.0004856684643051246, "loss": 2.2028, "step": 457330 }, { "epoch": 1.767948539530856, "grad_norm": 0.13497613370418549, "learning_rate": 0.0004855363985885959, "loss": 2.1886, "step": 457340 }, { "epoch": 1.7679871967342393, "grad_norm": 0.12754927575588226, "learning_rate": 0.0004854043443875853, "loss": 2.1898, "step": 457350 }, { "epoch": 1.7680258539376226, "grad_norm": 0.13196970522403717, "learning_rate": 0.000485272301699081, "loss": 2.2183, "step": 457360 }, { "epoch": 1.768064511141006, "grad_norm": 0.1224970743060112, "learning_rate": 0.0004851402705200723, "loss": 2.199, "step": 457370 }, { "epoch": 1.7681031683443893, "grad_norm": 0.12491404265165329, "learning_rate": 0.00048500825084754997, "loss": 2.203, "step": 457380 }, { "epoch": 1.7681418255477725, "grad_norm": 0.13020554184913635, "learning_rate": 0.0004848762426785065, "loss": 2.1925, "step": 457390 }, { "epoch": 1.7681804827511558, "grad_norm": 0.12115182727575302, "learning_rate": 0.00048474424600993514, "loss": 2.1961, "step": 457400 }, { "epoch": 1.7682191399545393, "grad_norm": 0.12314828485250473, "learning_rate": 0.0004846122608388306, "loss": 2.1979, "step": 457410 }, { "epoch": 1.7682577971579225, "grad_norm": 0.13972881436347961, "learning_rate": 0.0004844802871621894, "loss": 2.1956, "step": 457420 }, { "epoch": 1.7682964543613058, "grad_norm": 0.14746752381324768, "learning_rate": 0.0004843483249770084, "loss": 2.1894, "step": 457430 }, { "epoch": 1.768335111564689, "grad_norm": 0.1263071745634079, "learning_rate": 0.00048421637428028676, "loss": 2.2025, "step": 457440 }, { "epoch": 1.7683737687680723, "grad_norm": 0.12901432812213898, "learning_rate": 0.0004840844350690241, "loss": 2.1954, "step": 457450 }, { "epoch": 1.7684124259714555, "grad_norm": 0.1326717585325241, "learning_rate": 0.00048395250734022176, "loss": 2.194, "step": 457460 }, { "epoch": 1.7684510831748388, "grad_norm": 0.13518276810646057, "learning_rate": 0.0004838205910908828, "loss": 2.196, "step": 457470 }, { "epoch": 1.768489740378222, "grad_norm": 0.1248217448592186, "learning_rate": 0.0004836886863180108, "loss": 2.2143, "step": 457480 }, { "epoch": 1.7685283975816053, "grad_norm": 0.13813568651676178, "learning_rate": 0.0004835567930186109, "loss": 2.1984, "step": 457490 }, { "epoch": 1.7685670547849885, "grad_norm": 0.12963175773620605, "learning_rate": 0.00048342491118969, "loss": 2.203, "step": 457500 }, { "epoch": 1.7686057119883718, "grad_norm": 0.13578617572784424, "learning_rate": 0.0004832930408282554, "loss": 2.2018, "step": 457510 }, { "epoch": 1.768644369191755, "grad_norm": 0.1339896023273468, "learning_rate": 0.00048316118193131666, "loss": 2.2028, "step": 457520 }, { "epoch": 1.7686830263951385, "grad_norm": 0.1255003660917282, "learning_rate": 0.0004830293344958843, "loss": 2.2142, "step": 457530 }, { "epoch": 1.7687216835985218, "grad_norm": 0.1350245177745819, "learning_rate": 0.0004828974985189696, "loss": 2.2086, "step": 457540 }, { "epoch": 1.768760340801905, "grad_norm": 0.13166548311710358, "learning_rate": 0.00048276567399758584, "loss": 2.187, "step": 457550 }, { "epoch": 1.7687989980052883, "grad_norm": 0.12755592167377472, "learning_rate": 0.0004826338609287473, "loss": 2.1914, "step": 457560 }, { "epoch": 1.7688376552086715, "grad_norm": 0.13070148229599, "learning_rate": 0.00048250205930946977, "loss": 2.2107, "step": 457570 }, { "epoch": 1.768876312412055, "grad_norm": 0.12268534302711487, "learning_rate": 0.00048237026913677, "loss": 2.1851, "step": 457580 }, { "epoch": 1.7689149696154383, "grad_norm": 0.12911690771579742, "learning_rate": 0.0004822384904076662, "loss": 2.195, "step": 457590 }, { "epoch": 1.7689536268188215, "grad_norm": 0.127009779214859, "learning_rate": 0.00048210672311917803, "loss": 2.1929, "step": 457600 }, { "epoch": 1.7689922840222048, "grad_norm": 0.12624113261699677, "learning_rate": 0.000481974967268326, "loss": 2.1953, "step": 457610 }, { "epoch": 1.769030941225588, "grad_norm": 0.14546051621437073, "learning_rate": 0.00048184322285213234, "loss": 2.186, "step": 457620 }, { "epoch": 1.7690695984289713, "grad_norm": 0.12668795883655548, "learning_rate": 0.0004817114898676207, "loss": 2.1864, "step": 457630 }, { "epoch": 1.7691082556323545, "grad_norm": 0.13258390128612518, "learning_rate": 0.0004815797683118155, "loss": 2.208, "step": 457640 }, { "epoch": 1.7691469128357378, "grad_norm": 0.13000395894050598, "learning_rate": 0.00048144805818174267, "loss": 2.1986, "step": 457650 }, { "epoch": 1.769185570039121, "grad_norm": 0.136027991771698, "learning_rate": 0.00048131635947442963, "loss": 2.1954, "step": 457660 }, { "epoch": 1.7692242272425043, "grad_norm": 0.13348853588104248, "learning_rate": 0.0004811846721869049, "loss": 2.2083, "step": 457670 }, { "epoch": 1.7692628844458875, "grad_norm": 0.12772707641124725, "learning_rate": 0.00048105299631619804, "loss": 2.2012, "step": 457680 }, { "epoch": 1.7693015416492708, "grad_norm": 0.1307777315378189, "learning_rate": 0.0004809213318593406, "loss": 2.178, "step": 457690 }, { "epoch": 1.7693401988526543, "grad_norm": 0.13382311165332794, "learning_rate": 0.00048078967881336453, "loss": 2.2047, "step": 457700 }, { "epoch": 1.7693788560560375, "grad_norm": 0.13161014020442963, "learning_rate": 0.000480658037175304, "loss": 2.1928, "step": 457710 }, { "epoch": 1.7694175132594208, "grad_norm": 0.14786985516548157, "learning_rate": 0.00048052640694219376, "loss": 2.1747, "step": 457720 }, { "epoch": 1.769456170462804, "grad_norm": 0.1263905167579651, "learning_rate": 0.00048039478811107, "loss": 2.2018, "step": 457730 }, { "epoch": 1.7694948276661873, "grad_norm": 0.1310981661081314, "learning_rate": 0.00048026318067897035, "loss": 2.1885, "step": 457740 }, { "epoch": 1.7695334848695707, "grad_norm": 0.1322363317012787, "learning_rate": 0.0004801315846429337, "loss": 2.1894, "step": 457750 }, { "epoch": 1.769572142072954, "grad_norm": 0.12454366683959961, "learning_rate": 0.00048, "loss": 2.1919, "step": 457760 }, { "epoch": 1.7696107992763372, "grad_norm": 0.12184811383485794, "learning_rate": 0.00047986842674721086, "loss": 2.1935, "step": 457770 }, { "epoch": 1.7696494564797205, "grad_norm": 0.13372832536697388, "learning_rate": 0.00047973686488160874, "loss": 2.1851, "step": 457780 }, { "epoch": 1.7696881136831037, "grad_norm": 0.12762166559696198, "learning_rate": 0.0004796053144002377, "loss": 2.1971, "step": 457790 }, { "epoch": 1.769726770886487, "grad_norm": 0.1250433623790741, "learning_rate": 0.000479473775300143, "loss": 2.2086, "step": 457800 }, { "epoch": 1.7697654280898703, "grad_norm": 0.12746527791023254, "learning_rate": 0.00047934224757837106, "loss": 2.1934, "step": 457810 }, { "epoch": 1.7698040852932535, "grad_norm": 0.12750469148159027, "learning_rate": 0.00047921073123196977, "loss": 2.1892, "step": 457820 }, { "epoch": 1.7698427424966368, "grad_norm": 0.13376954197883606, "learning_rate": 0.0004790792262579882, "loss": 2.1917, "step": 457830 }, { "epoch": 1.76988139970002, "grad_norm": 0.12152202427387238, "learning_rate": 0.0004789477326534766, "loss": 2.191, "step": 457840 }, { "epoch": 1.7699200569034033, "grad_norm": 0.1229873076081276, "learning_rate": 0.0004788162504154865, "loss": 2.2052, "step": 457850 }, { "epoch": 1.7699587141067865, "grad_norm": 0.12747801840305328, "learning_rate": 0.0004786847795410709, "loss": 2.1836, "step": 457860 }, { "epoch": 1.76999737131017, "grad_norm": 0.13957123458385468, "learning_rate": 0.000478553320027284, "loss": 2.1998, "step": 457870 }, { "epoch": 1.7700360285135532, "grad_norm": 0.14057600498199463, "learning_rate": 0.00047842187187118124, "loss": 2.1965, "step": 457880 }, { "epoch": 1.7700746857169365, "grad_norm": 0.12310191988945007, "learning_rate": 0.0004782904350698194, "loss": 2.1866, "step": 457890 }, { "epoch": 1.7701133429203197, "grad_norm": 0.13113313913345337, "learning_rate": 0.00047815900962025617, "loss": 2.2088, "step": 457900 }, { "epoch": 1.770152000123703, "grad_norm": 0.12382102012634277, "learning_rate": 0.000478027595519551, "loss": 2.1849, "step": 457910 }, { "epoch": 1.7701906573270865, "grad_norm": 0.12879589200019836, "learning_rate": 0.00047789619276476406, "loss": 2.189, "step": 457920 }, { "epoch": 1.7702293145304697, "grad_norm": 0.1366998553276062, "learning_rate": 0.0004777648013529579, "loss": 2.1999, "step": 457930 }, { "epoch": 1.770267971733853, "grad_norm": 0.13403339684009552, "learning_rate": 0.0004776334212811948, "loss": 2.2097, "step": 457940 }, { "epoch": 1.7703066289372362, "grad_norm": 0.13324300944805145, "learning_rate": 0.0004775020525465394, "loss": 2.193, "step": 457950 }, { "epoch": 1.7703452861406195, "grad_norm": 0.13828282058238983, "learning_rate": 0.0004773706951460575, "loss": 2.1866, "step": 457960 }, { "epoch": 1.7703839433440027, "grad_norm": 0.14581134915351868, "learning_rate": 0.00047723934907681565, "loss": 2.1945, "step": 457970 }, { "epoch": 1.770422600547386, "grad_norm": 0.1256742626428604, "learning_rate": 0.00047710801433588216, "loss": 2.2081, "step": 457980 }, { "epoch": 1.7704612577507692, "grad_norm": 0.1261265128850937, "learning_rate": 0.00047697669092032616, "loss": 2.1937, "step": 457990 }, { "epoch": 1.7704999149541525, "grad_norm": 0.13095766305923462, "learning_rate": 0.00047684537882721845, "loss": 2.1984, "step": 458000 }, { "epoch": 1.7705385721575357, "grad_norm": 0.12314862012863159, "learning_rate": 0.000476714078053631, "loss": 2.1993, "step": 458010 }, { "epoch": 1.770577229360919, "grad_norm": 0.13177534937858582, "learning_rate": 0.00047658278859663653, "loss": 2.1924, "step": 458020 }, { "epoch": 1.7706158865643022, "grad_norm": 0.12194767594337463, "learning_rate": 0.00047645151045331023, "loss": 2.2062, "step": 458030 }, { "epoch": 1.7706545437676857, "grad_norm": 0.12695930898189545, "learning_rate": 0.00047632024362072724, "loss": 2.1973, "step": 458040 }, { "epoch": 1.770693200971069, "grad_norm": 0.13179419934749603, "learning_rate": 0.0004761889880959647, "loss": 2.1966, "step": 458050 }, { "epoch": 1.7707318581744522, "grad_norm": 0.14159278571605682, "learning_rate": 0.0004760577438761009, "loss": 2.1981, "step": 458060 }, { "epoch": 1.7707705153778355, "grad_norm": 0.12899576127529144, "learning_rate": 0.0004759265109582149, "loss": 2.1843, "step": 458070 }, { "epoch": 1.770809172581219, "grad_norm": 0.12268215417861938, "learning_rate": 0.000475795289339388, "loss": 2.195, "step": 458080 }, { "epoch": 1.7708478297846022, "grad_norm": 0.11523349583148956, "learning_rate": 0.0004756640790167017, "loss": 2.1898, "step": 458090 }, { "epoch": 1.7708864869879855, "grad_norm": 0.14718125760555267, "learning_rate": 0.00047553287998723957, "loss": 2.1935, "step": 458100 }, { "epoch": 1.7709251441913687, "grad_norm": 0.14676856994628906, "learning_rate": 0.0004754016922480859, "loss": 2.2079, "step": 458110 }, { "epoch": 1.770963801394752, "grad_norm": 0.1454949676990509, "learning_rate": 0.0004752705157963266, "loss": 2.2149, "step": 458120 }, { "epoch": 1.7710024585981352, "grad_norm": 0.1337183564901352, "learning_rate": 0.00047513935062904844, "loss": 2.2025, "step": 458130 }, { "epoch": 1.7710411158015185, "grad_norm": 0.13073621690273285, "learning_rate": 0.00047500819674333995, "loss": 2.2021, "step": 458140 }, { "epoch": 1.7710797730049017, "grad_norm": 0.12698140740394592, "learning_rate": 0.00047487705413629035, "loss": 2.1931, "step": 458150 }, { "epoch": 1.771118430208285, "grad_norm": 0.1372763216495514, "learning_rate": 0.0004747459228049904, "loss": 2.1923, "step": 458160 }, { "epoch": 1.7711570874116682, "grad_norm": 0.1382165402173996, "learning_rate": 0.00047461480274653243, "loss": 2.1947, "step": 458170 }, { "epoch": 1.7711957446150515, "grad_norm": 0.134132981300354, "learning_rate": 0.0004744836939580095, "loss": 2.1894, "step": 458180 }, { "epoch": 1.7712344018184347, "grad_norm": 0.1326516568660736, "learning_rate": 0.00047435259643651605, "loss": 2.1872, "step": 458190 }, { "epoch": 1.771273059021818, "grad_norm": 0.12394417822360992, "learning_rate": 0.00047422151017914804, "loss": 2.1994, "step": 458200 }, { "epoch": 1.7713117162252014, "grad_norm": 0.13924540579319, "learning_rate": 0.0004740904351830022, "loss": 2.1929, "step": 458210 }, { "epoch": 1.7713503734285847, "grad_norm": 0.1372320055961609, "learning_rate": 0.0004739593714451766, "loss": 2.1995, "step": 458220 }, { "epoch": 1.771389030631968, "grad_norm": 0.12690575420856476, "learning_rate": 0.00047382831896277143, "loss": 2.1852, "step": 458230 }, { "epoch": 1.7714276878353512, "grad_norm": 0.12741948664188385, "learning_rate": 0.00047369727773288696, "loss": 2.1826, "step": 458240 }, { "epoch": 1.7714663450387347, "grad_norm": 0.13204288482666016, "learning_rate": 0.00047356624775262524, "loss": 2.1832, "step": 458250 }, { "epoch": 1.771505002242118, "grad_norm": 0.13010390102863312, "learning_rate": 0.0004734352290190893, "loss": 2.1916, "step": 458260 }, { "epoch": 1.7715436594455012, "grad_norm": 0.12276510894298553, "learning_rate": 0.0004733042215293841, "loss": 2.1894, "step": 458270 }, { "epoch": 1.7715823166488844, "grad_norm": 0.12860916554927826, "learning_rate": 0.00047317322528061493, "loss": 2.2065, "step": 458280 }, { "epoch": 1.7716209738522677, "grad_norm": 0.143928661942482, "learning_rate": 0.00047304224026988885, "loss": 2.1892, "step": 458290 }, { "epoch": 1.771659631055651, "grad_norm": 0.1406896710395813, "learning_rate": 0.00047291126649431404, "loss": 2.2035, "step": 458300 }, { "epoch": 1.7716982882590342, "grad_norm": 0.12735261023044586, "learning_rate": 0.0004727803039510001, "loss": 2.2184, "step": 458310 }, { "epoch": 1.7717369454624174, "grad_norm": 0.13912999629974365, "learning_rate": 0.0004726493526370572, "loss": 2.1947, "step": 458320 }, { "epoch": 1.7717756026658007, "grad_norm": 0.1308005154132843, "learning_rate": 0.00047251841254959806, "loss": 2.1873, "step": 458330 }, { "epoch": 1.771814259869184, "grad_norm": 0.12966060638427734, "learning_rate": 0.00047238748368573513, "loss": 2.1975, "step": 458340 }, { "epoch": 1.7718529170725672, "grad_norm": 0.13258548080921173, "learning_rate": 0.00047225656604258325, "loss": 2.1979, "step": 458350 }, { "epoch": 1.7718915742759505, "grad_norm": 0.14349143207073212, "learning_rate": 0.0004721256596172576, "loss": 2.1986, "step": 458360 }, { "epoch": 1.7719302314793337, "grad_norm": 0.13455979526042938, "learning_rate": 0.0004719947644068754, "loss": 2.189, "step": 458370 }, { "epoch": 1.7719688886827172, "grad_norm": 0.12838658690452576, "learning_rate": 0.00047186388040855465, "loss": 2.1984, "step": 458380 }, { "epoch": 1.7720075458861004, "grad_norm": 0.13850080966949463, "learning_rate": 0.0004717330076194146, "loss": 2.1958, "step": 458390 }, { "epoch": 1.7720462030894837, "grad_norm": 0.12754419445991516, "learning_rate": 0.000471602146036576, "loss": 2.191, "step": 458400 }, { "epoch": 1.772084860292867, "grad_norm": 0.132229745388031, "learning_rate": 0.00047147129565716054, "loss": 2.1912, "step": 458410 }, { "epoch": 1.7721235174962504, "grad_norm": 0.12338346987962723, "learning_rate": 0.0004713404564782908, "loss": 2.1925, "step": 458420 }, { "epoch": 1.7721621746996337, "grad_norm": 0.13899773359298706, "learning_rate": 0.0004712096284970917, "loss": 2.2055, "step": 458430 }, { "epoch": 1.772200831903017, "grad_norm": 0.12873870134353638, "learning_rate": 0.00047107881171068856, "loss": 2.2032, "step": 458440 }, { "epoch": 1.7722394891064002, "grad_norm": 0.12777495384216309, "learning_rate": 0.0004709480061162079, "loss": 2.2002, "step": 458450 }, { "epoch": 1.7722781463097834, "grad_norm": 0.1315607875585556, "learning_rate": 0.00047081721171077787, "loss": 2.1856, "step": 458460 }, { "epoch": 1.7723168035131667, "grad_norm": 0.12554654479026794, "learning_rate": 0.0004706864284915275, "loss": 2.1838, "step": 458470 }, { "epoch": 1.77235546071655, "grad_norm": 0.11828701198101044, "learning_rate": 0.00047055565645558704, "loss": 2.1797, "step": 458480 }, { "epoch": 1.7723941179199332, "grad_norm": 0.13972051441669464, "learning_rate": 0.0004704248956000885, "loss": 2.216, "step": 458490 }, { "epoch": 1.7724327751233164, "grad_norm": 0.13295525312423706, "learning_rate": 0.00047029414592216455, "loss": 2.1843, "step": 458500 }, { "epoch": 1.7724714323266997, "grad_norm": 0.1282622516155243, "learning_rate": 0.0004701634074189494, "loss": 2.1813, "step": 458510 }, { "epoch": 1.772510089530083, "grad_norm": 0.1341368854045868, "learning_rate": 0.00047003268008757784, "loss": 2.1947, "step": 458520 }, { "epoch": 1.7725487467334662, "grad_norm": 0.1381879597902298, "learning_rate": 0.0004699019639251869, "loss": 2.19, "step": 458530 }, { "epoch": 1.7725874039368494, "grad_norm": 0.13804525136947632, "learning_rate": 0.00046977125892891424, "loss": 2.1871, "step": 458540 }, { "epoch": 1.772626061140233, "grad_norm": 0.12451229244470596, "learning_rate": 0.0004696405650958988, "loss": 2.1922, "step": 458550 }, { "epoch": 1.7726647183436162, "grad_norm": 0.13908781111240387, "learning_rate": 0.0004695098824232806, "loss": 2.2021, "step": 458560 }, { "epoch": 1.7727033755469994, "grad_norm": 0.1446731686592102, "learning_rate": 0.00046937921090820157, "loss": 2.2011, "step": 458570 }, { "epoch": 1.7727420327503827, "grad_norm": 0.13333015143871307, "learning_rate": 0.0004692485505478037, "loss": 2.1967, "step": 458580 }, { "epoch": 1.7727806899537661, "grad_norm": 0.13534389436244965, "learning_rate": 0.0004691179013392313, "loss": 2.1932, "step": 458590 }, { "epoch": 1.7728193471571494, "grad_norm": 0.1271008998155594, "learning_rate": 0.00046898726327962903, "loss": 2.1878, "step": 458600 }, { "epoch": 1.7728580043605326, "grad_norm": 0.13144579529762268, "learning_rate": 0.0004688566363661435, "loss": 2.1871, "step": 458610 }, { "epoch": 1.772896661563916, "grad_norm": 0.11865829676389694, "learning_rate": 0.0004687260205959221, "loss": 2.1993, "step": 458620 }, { "epoch": 1.7729353187672992, "grad_norm": 0.12198822945356369, "learning_rate": 0.00046859541596611364, "loss": 2.191, "step": 458630 }, { "epoch": 1.7729739759706824, "grad_norm": 0.14284397661685944, "learning_rate": 0.0004684648224738681, "loss": 2.1975, "step": 458640 }, { "epoch": 1.7730126331740657, "grad_norm": 0.12675946950912476, "learning_rate": 0.00046833424011633664, "loss": 2.1877, "step": 458650 }, { "epoch": 1.773051290377449, "grad_norm": 0.14137062430381775, "learning_rate": 0.00046820366889067146, "loss": 2.2019, "step": 458660 }, { "epoch": 1.7730899475808322, "grad_norm": 0.12649844586849213, "learning_rate": 0.000468073108794026, "loss": 2.1905, "step": 458670 }, { "epoch": 1.7731286047842154, "grad_norm": 0.1343609243631363, "learning_rate": 0.0004679425598235554, "loss": 2.1982, "step": 458680 }, { "epoch": 1.7731672619875987, "grad_norm": 0.13129524886608124, "learning_rate": 0.0004678120219764157, "loss": 2.2044, "step": 458690 }, { "epoch": 1.773205919190982, "grad_norm": 0.1215507909655571, "learning_rate": 0.0004676814952497637, "loss": 2.1676, "step": 458700 }, { "epoch": 1.7732445763943652, "grad_norm": 0.13051870465278625, "learning_rate": 0.0004675509796407582, "loss": 2.193, "step": 458710 }, { "epoch": 1.7732832335977486, "grad_norm": 0.12099869549274445, "learning_rate": 0.0004674204751465585, "loss": 2.2044, "step": 458720 }, { "epoch": 1.773321890801132, "grad_norm": 0.13086313009262085, "learning_rate": 0.0004672899817643259, "loss": 2.2017, "step": 458730 }, { "epoch": 1.7733605480045151, "grad_norm": 0.13035260140895844, "learning_rate": 0.0004671594994912223, "loss": 2.2122, "step": 458740 }, { "epoch": 1.7733992052078984, "grad_norm": 0.13473574817180634, "learning_rate": 0.00046702902832441073, "loss": 2.1852, "step": 458750 }, { "epoch": 1.7734378624112819, "grad_norm": 0.12415792793035507, "learning_rate": 0.00046689856826105604, "loss": 2.1954, "step": 458760 }, { "epoch": 1.7734765196146651, "grad_norm": 0.13153572380542755, "learning_rate": 0.0004667681192983235, "loss": 2.1873, "step": 458770 }, { "epoch": 1.7735151768180484, "grad_norm": 0.13177259266376495, "learning_rate": 0.00046663768143338015, "loss": 2.2101, "step": 458780 }, { "epoch": 1.7735538340214316, "grad_norm": 0.13995657861232758, "learning_rate": 0.0004665072546633944, "loss": 2.1976, "step": 458790 }, { "epoch": 1.7735924912248149, "grad_norm": 0.13670065999031067, "learning_rate": 0.00046637683898553497, "loss": 2.1987, "step": 458800 }, { "epoch": 1.7736311484281981, "grad_norm": 0.13310128450393677, "learning_rate": 0.0004662464343969728, "loss": 2.2139, "step": 458810 }, { "epoch": 1.7736698056315814, "grad_norm": 0.130756676197052, "learning_rate": 0.00046611604089487926, "loss": 2.1914, "step": 458820 }, { "epoch": 1.7737084628349646, "grad_norm": 0.12607665359973907, "learning_rate": 0.0004659856584764273, "loss": 2.1856, "step": 458830 }, { "epoch": 1.773747120038348, "grad_norm": 0.12528374791145325, "learning_rate": 0.0004658552871387913, "loss": 2.2033, "step": 458840 }, { "epoch": 1.7737857772417311, "grad_norm": 0.12461727857589722, "learning_rate": 0.00046572492687914656, "loss": 2.1869, "step": 458850 }, { "epoch": 1.7738244344451144, "grad_norm": 0.1290518194437027, "learning_rate": 0.0004655945776946693, "loss": 2.182, "step": 458860 }, { "epoch": 1.7738630916484976, "grad_norm": 0.1273990273475647, "learning_rate": 0.00046546423958253747, "loss": 2.2012, "step": 458870 }, { "epoch": 1.773901748851881, "grad_norm": 0.1488787829875946, "learning_rate": 0.00046533391253992986, "loss": 2.211, "step": 458880 }, { "epoch": 1.7739404060552644, "grad_norm": 0.12724000215530396, "learning_rate": 0.00046520359656402645, "loss": 2.1899, "step": 458890 }, { "epoch": 1.7739790632586476, "grad_norm": 0.12783779203891754, "learning_rate": 0.0004650732916520086, "loss": 2.1994, "step": 458900 }, { "epoch": 1.7740177204620309, "grad_norm": 0.12095726281404495, "learning_rate": 0.0004649429978010589, "loss": 2.1844, "step": 458910 }, { "epoch": 1.7740563776654141, "grad_norm": 0.12530633807182312, "learning_rate": 0.000464812715008361, "loss": 2.1985, "step": 458920 }, { "epoch": 1.7740950348687976, "grad_norm": 0.12608347833156586, "learning_rate": 0.00046468244327109963, "loss": 2.1984, "step": 458930 }, { "epoch": 1.7741336920721809, "grad_norm": 0.12942135334014893, "learning_rate": 0.00046455218258646116, "loss": 2.2085, "step": 458940 }, { "epoch": 1.774172349275564, "grad_norm": 0.13217510282993317, "learning_rate": 0.0004644219329516326, "loss": 2.1949, "step": 458950 }, { "epoch": 1.7742110064789474, "grad_norm": 0.12174969911575317, "learning_rate": 0.0004642916943638027, "loss": 2.2047, "step": 458960 }, { "epoch": 1.7742496636823306, "grad_norm": 0.1340894103050232, "learning_rate": 0.0004641614668201608, "loss": 2.1964, "step": 458970 }, { "epoch": 1.7742883208857139, "grad_norm": 0.1508149355649948, "learning_rate": 0.0004640312503178978, "loss": 2.1945, "step": 458980 }, { "epoch": 1.7743269780890971, "grad_norm": 0.12747126817703247, "learning_rate": 0.00046390104485420603, "loss": 2.2081, "step": 458990 }, { "epoch": 1.7743656352924804, "grad_norm": 0.13547492027282715, "learning_rate": 0.0004637708504262783, "loss": 2.1964, "step": 459000 }, { "epoch": 1.7744042924958636, "grad_norm": 0.1237700879573822, "learning_rate": 0.00046364066703130934, "loss": 2.1918, "step": 459010 }, { "epoch": 1.7744429496992469, "grad_norm": 0.12852251529693604, "learning_rate": 0.00046351049466649476, "loss": 2.1965, "step": 459020 }, { "epoch": 1.7744816069026301, "grad_norm": 0.12711748480796814, "learning_rate": 0.00046338033332903093, "loss": 2.1877, "step": 459030 }, { "epoch": 1.7745202641060134, "grad_norm": 0.13563001155853271, "learning_rate": 0.0004632501830161162, "loss": 2.1793, "step": 459040 }, { "epoch": 1.7745589213093966, "grad_norm": 0.1352197676897049, "learning_rate": 0.00046312004372494985, "loss": 2.1924, "step": 459050 }, { "epoch": 1.77459757851278, "grad_norm": 0.1381702423095703, "learning_rate": 0.0004629899154527319, "loss": 2.1913, "step": 459060 }, { "epoch": 1.7746362357161634, "grad_norm": 0.13926814496517181, "learning_rate": 0.00046285979819666424, "loss": 2.1881, "step": 459070 }, { "epoch": 1.7746748929195466, "grad_norm": 0.1228603944182396, "learning_rate": 0.0004627296919539492, "loss": 2.1881, "step": 459080 }, { "epoch": 1.7747135501229299, "grad_norm": 0.13640454411506653, "learning_rate": 0.00046259959672179086, "loss": 2.2026, "step": 459090 }, { "epoch": 1.7747522073263133, "grad_norm": 0.13040600717067719, "learning_rate": 0.00046246951249739453, "loss": 2.1888, "step": 459100 }, { "epoch": 1.7747908645296966, "grad_norm": 0.1405375450849533, "learning_rate": 0.0004623394392779661, "loss": 2.1886, "step": 459110 }, { "epoch": 1.7748295217330798, "grad_norm": 0.1282537281513214, "learning_rate": 0.0004622093770607132, "loss": 2.1804, "step": 459120 }, { "epoch": 1.774868178936463, "grad_norm": 0.136846661567688, "learning_rate": 0.0004620793258428444, "loss": 2.1889, "step": 459130 }, { "epoch": 1.7749068361398463, "grad_norm": 0.13499949872493744, "learning_rate": 0.0004619492856215697, "loss": 2.1955, "step": 459140 }, { "epoch": 1.7749454933432296, "grad_norm": 0.13540257513523102, "learning_rate": 0.0004618192563940997, "loss": 2.1899, "step": 459150 }, { "epoch": 1.7749841505466128, "grad_norm": 0.1366579830646515, "learning_rate": 0.0004616892381576472, "loss": 2.211, "step": 459160 }, { "epoch": 1.775022807749996, "grad_norm": 0.13207294046878815, "learning_rate": 0.000461559230909425, "loss": 2.1789, "step": 459170 }, { "epoch": 1.7750614649533794, "grad_norm": 0.13058124482631683, "learning_rate": 0.0004614292346466478, "loss": 2.1933, "step": 459180 }, { "epoch": 1.7751001221567626, "grad_norm": 0.12599864602088928, "learning_rate": 0.00046129924936653136, "loss": 2.1844, "step": 459190 }, { "epoch": 1.7751387793601459, "grad_norm": 0.27933165431022644, "learning_rate": 0.0004611692750662924, "loss": 2.1865, "step": 459200 }, { "epoch": 1.775177436563529, "grad_norm": 0.13544274866580963, "learning_rate": 0.00046103931174314925, "loss": 2.19, "step": 459210 }, { "epoch": 1.7752160937669124, "grad_norm": 0.13466085493564606, "learning_rate": 0.0004609093593943208, "loss": 2.2043, "step": 459220 }, { "epoch": 1.7752547509702958, "grad_norm": 0.12241952866315842, "learning_rate": 0.0004607794180170277, "loss": 2.1852, "step": 459230 }, { "epoch": 1.775293408173679, "grad_norm": 0.14127551019191742, "learning_rate": 0.00046064948760849144, "loss": 2.2023, "step": 459240 }, { "epoch": 1.7753320653770623, "grad_norm": 0.13037101924419403, "learning_rate": 0.00046051956816593466, "loss": 2.1943, "step": 459250 }, { "epoch": 1.7753707225804456, "grad_norm": 0.13260316848754883, "learning_rate": 0.0004603896596865815, "loss": 2.1981, "step": 459260 }, { "epoch": 1.775409379783829, "grad_norm": 0.13140569627285004, "learning_rate": 0.000460259762167657, "loss": 2.2033, "step": 459270 }, { "epoch": 1.7754480369872123, "grad_norm": 0.13945728540420532, "learning_rate": 0.0004601298756063874, "loss": 2.2103, "step": 459280 }, { "epoch": 1.7754866941905956, "grad_norm": 0.1657353937625885, "learning_rate": 0.00045999999999999996, "loss": 2.1856, "step": 459290 }, { "epoch": 1.7755253513939788, "grad_norm": 0.1261647492647171, "learning_rate": 0.00045987013534572353, "loss": 2.1814, "step": 459300 }, { "epoch": 1.775564008597362, "grad_norm": 0.1261998414993286, "learning_rate": 0.0004597402816407878, "loss": 2.1964, "step": 459310 }, { "epoch": 1.7756026658007453, "grad_norm": 0.12563207745552063, "learning_rate": 0.0004596104388824236, "loss": 2.1929, "step": 459320 }, { "epoch": 1.7756413230041286, "grad_norm": 0.13220050930976868, "learning_rate": 0.000459480607067863, "loss": 2.1892, "step": 459330 }, { "epoch": 1.7756799802075118, "grad_norm": 0.12477528303861618, "learning_rate": 0.00045935078619433936, "loss": 2.1909, "step": 459340 }, { "epoch": 1.775718637410895, "grad_norm": 0.12958276271820068, "learning_rate": 0.00045922097625908734, "loss": 2.188, "step": 459350 }, { "epoch": 1.7757572946142783, "grad_norm": 0.14196142554283142, "learning_rate": 0.00045909117725934225, "loss": 2.1954, "step": 459360 }, { "epoch": 1.7757959518176616, "grad_norm": 0.12300693243741989, "learning_rate": 0.00045896138919234076, "loss": 2.1936, "step": 459370 }, { "epoch": 1.7758346090210448, "grad_norm": 0.13028477132320404, "learning_rate": 0.0004588316120553213, "loss": 2.1722, "step": 459380 }, { "epoch": 1.7758732662244283, "grad_norm": 0.134907066822052, "learning_rate": 0.0004587018458455223, "loss": 2.2024, "step": 459390 }, { "epoch": 1.7759119234278116, "grad_norm": 0.13216833770275116, "learning_rate": 0.00045857209056018465, "loss": 2.1715, "step": 459400 }, { "epoch": 1.7759505806311948, "grad_norm": 0.12976202368736267, "learning_rate": 0.0004584423461965492, "loss": 2.1829, "step": 459410 }, { "epoch": 1.775989237834578, "grad_norm": 0.1588214933872223, "learning_rate": 0.00045831261275185885, "loss": 2.1936, "step": 459420 }, { "epoch": 1.7760278950379613, "grad_norm": 0.13371814787387848, "learning_rate": 0.0004581828902233571, "loss": 2.1858, "step": 459430 }, { "epoch": 1.7760665522413448, "grad_norm": 0.13082599639892578, "learning_rate": 0.00045805317860828933, "loss": 2.1968, "step": 459440 }, { "epoch": 1.776105209444728, "grad_norm": 0.2999109625816345, "learning_rate": 0.0004579234779039012, "loss": 2.1796, "step": 459450 }, { "epoch": 1.7761438666481113, "grad_norm": 0.13211677968502045, "learning_rate": 0.0004577937881074399, "loss": 2.1919, "step": 459460 }, { "epoch": 1.7761825238514946, "grad_norm": 0.12072337418794632, "learning_rate": 0.00045766410921615396, "loss": 2.1943, "step": 459470 }, { "epoch": 1.7762211810548778, "grad_norm": 0.13391290605068207, "learning_rate": 0.0004575344412272928, "loss": 2.1965, "step": 459480 }, { "epoch": 1.776259838258261, "grad_norm": 0.12569762766361237, "learning_rate": 0.00045740478413810705, "loss": 2.1858, "step": 459490 }, { "epoch": 1.7762984954616443, "grad_norm": 0.12477441877126694, "learning_rate": 0.00045727513794584885, "loss": 2.1926, "step": 459500 }, { "epoch": 1.7763371526650276, "grad_norm": 0.1322563886642456, "learning_rate": 0.0004571455026477709, "loss": 2.1936, "step": 459510 }, { "epoch": 1.7763758098684108, "grad_norm": 0.12976054847240448, "learning_rate": 0.00045701587824112735, "loss": 2.1956, "step": 459520 }, { "epoch": 1.776414467071794, "grad_norm": 0.13775162398815155, "learning_rate": 0.0004568862647231733, "loss": 2.1815, "step": 459530 }, { "epoch": 1.7764531242751773, "grad_norm": 0.13193078339099884, "learning_rate": 0.00045675666209116586, "loss": 2.2069, "step": 459540 }, { "epoch": 1.7764917814785606, "grad_norm": 0.14443346858024597, "learning_rate": 0.00045662707034236207, "loss": 2.1823, "step": 459550 }, { "epoch": 1.776530438681944, "grad_norm": 0.12272831797599792, "learning_rate": 0.00045649748947402104, "loss": 2.196, "step": 459560 }, { "epoch": 1.7765690958853273, "grad_norm": 0.1343587189912796, "learning_rate": 0.0004563679194834023, "loss": 2.1984, "step": 459570 }, { "epoch": 1.7766077530887106, "grad_norm": 0.14258071780204773, "learning_rate": 0.0004562383603677671, "loss": 2.1905, "step": 459580 }, { "epoch": 1.7766464102920938, "grad_norm": 0.13907821476459503, "learning_rate": 0.0004561088121243777, "loss": 2.1901, "step": 459590 }, { "epoch": 1.776685067495477, "grad_norm": 0.12912242114543915, "learning_rate": 0.0004559792747504974, "loss": 2.2063, "step": 459600 }, { "epoch": 1.7767237246988605, "grad_norm": 0.13161329925060272, "learning_rate": 0.0004558497482433908, "loss": 2.1862, "step": 459610 }, { "epoch": 1.7767623819022438, "grad_norm": 0.1229773536324501, "learning_rate": 0.0004557202326003231, "loss": 2.2066, "step": 459620 }, { "epoch": 1.776801039105627, "grad_norm": 0.12915240228176117, "learning_rate": 0.00045559072781856157, "loss": 2.1936, "step": 459630 }, { "epoch": 1.7768396963090103, "grad_norm": 0.12329524755477905, "learning_rate": 0.0004554612338953741, "loss": 2.1853, "step": 459640 }, { "epoch": 1.7768783535123935, "grad_norm": 0.12746720016002655, "learning_rate": 0.0004553317508280297, "loss": 2.198, "step": 459650 }, { "epoch": 1.7769170107157768, "grad_norm": 0.14305472373962402, "learning_rate": 0.0004552022786137986, "loss": 2.1832, "step": 459660 }, { "epoch": 1.77695566791916, "grad_norm": 0.13622303307056427, "learning_rate": 0.00045507281724995206, "loss": 2.2125, "step": 459670 }, { "epoch": 1.7769943251225433, "grad_norm": 0.12965580821037292, "learning_rate": 0.0004549433667337628, "loss": 2.1963, "step": 459680 }, { "epoch": 1.7770329823259265, "grad_norm": 0.13008244335651398, "learning_rate": 0.0004548139270625042, "loss": 2.1986, "step": 459690 }, { "epoch": 1.7770716395293098, "grad_norm": 0.13434603810310364, "learning_rate": 0.00045468449823345147, "loss": 2.1983, "step": 459700 }, { "epoch": 1.777110296732693, "grad_norm": 0.13101531565189362, "learning_rate": 0.00045455508024388005, "loss": 2.1924, "step": 459710 }, { "epoch": 1.7771489539360763, "grad_norm": 0.13777051866054535, "learning_rate": 0.0004544256730910674, "loss": 2.203, "step": 459720 }, { "epoch": 1.7771876111394598, "grad_norm": 0.12869952619075775, "learning_rate": 0.0004542962767722916, "loss": 2.1834, "step": 459730 }, { "epoch": 1.777226268342843, "grad_norm": 0.14540624618530273, "learning_rate": 0.000454166891284832, "loss": 2.1836, "step": 459740 }, { "epoch": 1.7772649255462263, "grad_norm": 0.14128723740577698, "learning_rate": 0.0004540375166259694, "loss": 2.1843, "step": 459750 }, { "epoch": 1.7773035827496095, "grad_norm": 0.12434931099414825, "learning_rate": 0.0004539081527929849, "loss": 2.1968, "step": 459760 }, { "epoch": 1.7773422399529928, "grad_norm": 0.1366758644580841, "learning_rate": 0.00045377879978316174, "loss": 2.1951, "step": 459770 }, { "epoch": 1.7773808971563763, "grad_norm": 0.13118357956409454, "learning_rate": 0.00045364945759378354, "loss": 2.18, "step": 459780 }, { "epoch": 1.7774195543597595, "grad_norm": 0.13108068704605103, "learning_rate": 0.00045352012622213535, "loss": 2.1932, "step": 459790 }, { "epoch": 1.7774582115631428, "grad_norm": 0.13078756630420685, "learning_rate": 0.0004533908056655036, "loss": 2.1999, "step": 459800 }, { "epoch": 1.777496868766526, "grad_norm": 0.12737669050693512, "learning_rate": 0.0004532614959211754, "loss": 2.1839, "step": 459810 }, { "epoch": 1.7775355259699093, "grad_norm": 0.13121169805526733, "learning_rate": 0.0004531321969864395, "loss": 2.1935, "step": 459820 }, { "epoch": 1.7775741831732925, "grad_norm": 0.13091163337230682, "learning_rate": 0.00045300290885858473, "loss": 2.1976, "step": 459830 }, { "epoch": 1.7776128403766758, "grad_norm": 0.1273505985736847, "learning_rate": 0.0004528736315349027, "loss": 2.2018, "step": 459840 }, { "epoch": 1.777651497580059, "grad_norm": 0.13054147362709045, "learning_rate": 0.0004527443650126848, "loss": 2.2042, "step": 459850 }, { "epoch": 1.7776901547834423, "grad_norm": 0.13942508399486542, "learning_rate": 0.0004526151092892241, "loss": 2.1913, "step": 459860 }, { "epoch": 1.7777288119868255, "grad_norm": 0.12371347099542618, "learning_rate": 0.00045248586436181463, "loss": 2.1769, "step": 459870 }, { "epoch": 1.7777674691902088, "grad_norm": 0.12571966648101807, "learning_rate": 0.00045235663022775177, "loss": 2.2091, "step": 459880 }, { "epoch": 1.777806126393592, "grad_norm": 0.13976377248764038, "learning_rate": 0.0004522274068843317, "loss": 2.1819, "step": 459890 }, { "epoch": 1.7778447835969755, "grad_norm": 0.13170628249645233, "learning_rate": 0.0004520981943288522, "loss": 2.2026, "step": 459900 }, { "epoch": 1.7778834408003588, "grad_norm": 0.1292218416929245, "learning_rate": 0.00045196899255861166, "loss": 2.1987, "step": 459910 }, { "epoch": 1.777922098003742, "grad_norm": 0.12546850740909576, "learning_rate": 0.00045183980157090974, "loss": 2.1963, "step": 459920 }, { "epoch": 1.7779607552071253, "grad_norm": 0.1390175074338913, "learning_rate": 0.0004517106213630475, "loss": 2.2001, "step": 459930 }, { "epoch": 1.7779994124105087, "grad_norm": 0.1215895414352417, "learning_rate": 0.0004515814519323271, "loss": 2.199, "step": 459940 }, { "epoch": 1.778038069613892, "grad_norm": 0.12747113406658173, "learning_rate": 0.0004514522932760514, "loss": 2.1829, "step": 459950 }, { "epoch": 1.7780767268172752, "grad_norm": 0.13074922561645508, "learning_rate": 0.0004513231453915247, "loss": 2.1869, "step": 459960 }, { "epoch": 1.7781153840206585, "grad_norm": 0.1335902363061905, "learning_rate": 0.0004511940082760526, "loss": 2.1893, "step": 459970 }, { "epoch": 1.7781540412240417, "grad_norm": 0.1362961083650589, "learning_rate": 0.0004510648819269414, "loss": 2.2001, "step": 459980 }, { "epoch": 1.778192698427425, "grad_norm": 0.137433722615242, "learning_rate": 0.0004509357663414988, "loss": 2.1871, "step": 459990 }, { "epoch": 1.7782313556308083, "grad_norm": 0.12900055944919586, "learning_rate": 0.0004508066615170332, "loss": 2.1741, "step": 460000 }, { "epoch": 1.7782700128341915, "grad_norm": 0.12328225374221802, "learning_rate": 0.000450677567450855, "loss": 2.1998, "step": 460010 }, { "epoch": 1.7783086700375748, "grad_norm": 0.13213379681110382, "learning_rate": 0.00045054848414027496, "loss": 2.1951, "step": 460020 }, { "epoch": 1.778347327240958, "grad_norm": 0.13553999364376068, "learning_rate": 0.00045041941158260505, "loss": 2.1872, "step": 460030 }, { "epoch": 1.7783859844443413, "grad_norm": 0.13455605506896973, "learning_rate": 0.0004502903497751587, "loss": 2.1847, "step": 460040 }, { "epoch": 1.7784246416477245, "grad_norm": 0.1322959065437317, "learning_rate": 0.00045016129871525016, "loss": 2.1975, "step": 460050 }, { "epoch": 1.7784632988511078, "grad_norm": 0.129024937748909, "learning_rate": 0.000450032258400195, "loss": 2.1846, "step": 460060 }, { "epoch": 1.7785019560544912, "grad_norm": 0.14066551625728607, "learning_rate": 0.0004499032288273097, "loss": 2.1748, "step": 460070 }, { "epoch": 1.7785406132578745, "grad_norm": 0.14260342717170715, "learning_rate": 0.00044977420999391197, "loss": 2.2117, "step": 460080 }, { "epoch": 1.7785792704612577, "grad_norm": 0.12708856165409088, "learning_rate": 0.0004496452018973205, "loss": 2.195, "step": 460090 }, { "epoch": 1.778617927664641, "grad_norm": 0.12720675766468048, "learning_rate": 0.00044951620453485553, "loss": 2.1898, "step": 460100 }, { "epoch": 1.7786565848680245, "grad_norm": 0.1320163458585739, "learning_rate": 0.00044938721790383786, "loss": 2.215, "step": 460110 }, { "epoch": 1.7786952420714077, "grad_norm": 0.12699101865291595, "learning_rate": 0.00044925824200158984, "loss": 2.1794, "step": 460120 }, { "epoch": 1.778733899274791, "grad_norm": 0.1358613818883896, "learning_rate": 0.00044912927682543447, "loss": 2.202, "step": 460130 }, { "epoch": 1.7787725564781742, "grad_norm": 0.12044976651668549, "learning_rate": 0.00044900032237269616, "loss": 2.1867, "step": 460140 }, { "epoch": 1.7788112136815575, "grad_norm": 0.13260480761528015, "learning_rate": 0.0004488713786407008, "loss": 2.1866, "step": 460150 }, { "epoch": 1.7788498708849407, "grad_norm": 0.13614635169506073, "learning_rate": 0.0004487424456267748, "loss": 2.2049, "step": 460160 }, { "epoch": 1.778888528088324, "grad_norm": 0.17420963943004608, "learning_rate": 0.0004486135233282456, "loss": 2.1924, "step": 460170 }, { "epoch": 1.7789271852917072, "grad_norm": 0.13680464029312134, "learning_rate": 0.00044848461174244237, "loss": 2.1879, "step": 460180 }, { "epoch": 1.7789658424950905, "grad_norm": 0.1447361707687378, "learning_rate": 0.00044835571086669493, "loss": 2.2007, "step": 460190 }, { "epoch": 1.7790044996984737, "grad_norm": 0.12781181931495667, "learning_rate": 0.0004482268206983342, "loss": 2.1904, "step": 460200 }, { "epoch": 1.779043156901857, "grad_norm": 0.12790387868881226, "learning_rate": 0.0004480979412346926, "loss": 2.1889, "step": 460210 }, { "epoch": 1.7790818141052402, "grad_norm": 0.13949060440063477, "learning_rate": 0.00044796907247310314, "loss": 2.2021, "step": 460220 }, { "epoch": 1.7791204713086235, "grad_norm": 0.12806452810764313, "learning_rate": 0.0004478402144109004, "loss": 2.2124, "step": 460230 }, { "epoch": 1.779159128512007, "grad_norm": 0.13862504065036774, "learning_rate": 0.0004477113670454196, "loss": 2.1974, "step": 460240 }, { "epoch": 1.7791977857153902, "grad_norm": 0.13251705467700958, "learning_rate": 0.00044758253037399757, "loss": 2.1849, "step": 460250 }, { "epoch": 1.7792364429187735, "grad_norm": 0.12595920264720917, "learning_rate": 0.0004474537043939719, "loss": 2.1959, "step": 460260 }, { "epoch": 1.7792751001221567, "grad_norm": 0.12909527122974396, "learning_rate": 0.00044732488910268154, "loss": 2.1947, "step": 460270 }, { "epoch": 1.7793137573255402, "grad_norm": 0.13403162360191345, "learning_rate": 0.0004471960844974663, "loss": 2.1987, "step": 460280 }, { "epoch": 1.7793524145289235, "grad_norm": 0.12949995696544647, "learning_rate": 0.000447067290575667, "loss": 2.1963, "step": 460290 }, { "epoch": 1.7793910717323067, "grad_norm": 0.13066048920154572, "learning_rate": 0.00044693850733462594, "loss": 2.1802, "step": 460300 }, { "epoch": 1.77942972893569, "grad_norm": 0.12212751060724258, "learning_rate": 0.0004468097347716862, "loss": 2.1827, "step": 460310 }, { "epoch": 1.7794683861390732, "grad_norm": 0.12501434981822968, "learning_rate": 0.00044668097288419207, "loss": 2.1835, "step": 460320 }, { "epoch": 1.7795070433424565, "grad_norm": 0.1318371295928955, "learning_rate": 0.00044655222166948926, "loss": 2.1791, "step": 460330 }, { "epoch": 1.7795457005458397, "grad_norm": 0.1317320317029953, "learning_rate": 0.0004464234811249237, "loss": 2.1771, "step": 460340 }, { "epoch": 1.779584357749223, "grad_norm": 0.12879504263401031, "learning_rate": 0.0004462947512478437, "loss": 2.1777, "step": 460350 }, { "epoch": 1.7796230149526062, "grad_norm": 0.13029487431049347, "learning_rate": 0.0004461660320355974, "loss": 2.1747, "step": 460360 }, { "epoch": 1.7796616721559895, "grad_norm": 0.13985006511211395, "learning_rate": 0.00044603732348553484, "loss": 2.1885, "step": 460370 }, { "epoch": 1.7797003293593727, "grad_norm": 0.14560475945472717, "learning_rate": 0.0004459086255950071, "loss": 2.1876, "step": 460380 }, { "epoch": 1.779738986562756, "grad_norm": 0.1299740970134735, "learning_rate": 0.00044577993836136587, "loss": 2.1706, "step": 460390 }, { "epoch": 1.7797776437661392, "grad_norm": 0.12563489377498627, "learning_rate": 0.00044565126178196414, "loss": 2.1969, "step": 460400 }, { "epoch": 1.7798163009695227, "grad_norm": 0.12972305715084076, "learning_rate": 0.00044552259585415666, "loss": 2.1906, "step": 460410 }, { "epoch": 1.779854958172906, "grad_norm": 0.1361573189496994, "learning_rate": 0.000445393940575298, "loss": 2.1895, "step": 460420 }, { "epoch": 1.7798936153762892, "grad_norm": 0.13461749255657196, "learning_rate": 0.0004452652959427452, "loss": 2.1983, "step": 460430 }, { "epoch": 1.7799322725796725, "grad_norm": 0.12253117561340332, "learning_rate": 0.00044513666195385507, "loss": 2.1851, "step": 460440 }, { "epoch": 1.779970929783056, "grad_norm": 0.14229539036750793, "learning_rate": 0.00044500803860598694, "loss": 2.1843, "step": 460450 }, { "epoch": 1.7800095869864392, "grad_norm": 0.13668429851531982, "learning_rate": 0.0004448794258964999, "loss": 2.1747, "step": 460460 }, { "epoch": 1.7800482441898224, "grad_norm": 0.13260917365550995, "learning_rate": 0.0004447508238227547, "loss": 2.1825, "step": 460470 }, { "epoch": 1.7800869013932057, "grad_norm": 0.12992557883262634, "learning_rate": 0.00044462223238211365, "loss": 2.1952, "step": 460480 }, { "epoch": 1.780125558596589, "grad_norm": 0.13101063668727875, "learning_rate": 0.0004444936515719391, "loss": 2.2045, "step": 460490 }, { "epoch": 1.7801642157999722, "grad_norm": 0.125391885638237, "learning_rate": 0.0004443650813895954, "loss": 2.1854, "step": 460500 }, { "epoch": 1.7802028730033554, "grad_norm": 0.13517551124095917, "learning_rate": 0.0004442365218324478, "loss": 2.1908, "step": 460510 }, { "epoch": 1.7802415302067387, "grad_norm": 0.1373213678598404, "learning_rate": 0.0004441079728978621, "loss": 2.1948, "step": 460520 }, { "epoch": 1.780280187410122, "grad_norm": 0.13275998830795288, "learning_rate": 0.0004439794345832058, "loss": 2.1953, "step": 460530 }, { "epoch": 1.7803188446135052, "grad_norm": 0.13309723138809204, "learning_rate": 0.00044385090688584715, "loss": 2.1857, "step": 460540 }, { "epoch": 1.7803575018168885, "grad_norm": 0.12807965278625488, "learning_rate": 0.0004437223898031559, "loss": 2.1946, "step": 460550 }, { "epoch": 1.7803961590202717, "grad_norm": 0.13153384625911713, "learning_rate": 0.0004435938833325024, "loss": 2.2084, "step": 460560 }, { "epoch": 1.780434816223655, "grad_norm": 0.13783849775791168, "learning_rate": 0.0004434653874712582, "loss": 2.1858, "step": 460570 }, { "epoch": 1.7804734734270384, "grad_norm": 0.13290105760097504, "learning_rate": 0.00044333690221679635, "loss": 2.2028, "step": 460580 }, { "epoch": 1.7805121306304217, "grad_norm": 0.14658907055854797, "learning_rate": 0.00044320842756649025, "loss": 2.2051, "step": 460590 }, { "epoch": 1.780550787833805, "grad_norm": 0.14052848517894745, "learning_rate": 0.00044307996351771497, "loss": 2.2021, "step": 460600 }, { "epoch": 1.7805894450371882, "grad_norm": 0.12826259434223175, "learning_rate": 0.00044295151006784627, "loss": 2.2002, "step": 460610 }, { "epoch": 1.7806281022405717, "grad_norm": 0.1517890989780426, "learning_rate": 0.0004428230672142617, "loss": 2.1957, "step": 460620 }, { "epoch": 1.780666759443955, "grad_norm": 0.14213670790195465, "learning_rate": 0.0004426946349543388, "loss": 2.1969, "step": 460630 }, { "epoch": 1.7807054166473382, "grad_norm": 0.12808117270469666, "learning_rate": 0.0004425662132854571, "loss": 2.1873, "step": 460640 }, { "epoch": 1.7807440738507214, "grad_norm": 0.12215771526098251, "learning_rate": 0.0004424378022049971, "loss": 2.1998, "step": 460650 }, { "epoch": 1.7807827310541047, "grad_norm": 0.12312013655900955, "learning_rate": 0.00044230940171033974, "loss": 2.1841, "step": 460660 }, { "epoch": 1.780821388257488, "grad_norm": 0.12614500522613525, "learning_rate": 0.0004421810117988676, "loss": 2.1705, "step": 460670 }, { "epoch": 1.7808600454608712, "grad_norm": 0.13631653785705566, "learning_rate": 0.00044205263246796436, "loss": 2.2122, "step": 460680 }, { "epoch": 1.7808987026642544, "grad_norm": 0.1294577717781067, "learning_rate": 0.00044192426371501446, "loss": 2.1958, "step": 460690 }, { "epoch": 1.7809373598676377, "grad_norm": 0.12741263210773468, "learning_rate": 0.00044179590553740367, "loss": 2.1946, "step": 460700 }, { "epoch": 1.780976017071021, "grad_norm": 0.13759537041187286, "learning_rate": 0.0004416675579325187, "loss": 2.1925, "step": 460710 }, { "epoch": 1.7810146742744042, "grad_norm": 0.14019958674907684, "learning_rate": 0.00044153922089774754, "loss": 2.2015, "step": 460720 }, { "epoch": 1.7810533314777874, "grad_norm": 0.13502752780914307, "learning_rate": 0.0004414108944304789, "loss": 2.1901, "step": 460730 }, { "epoch": 1.7810919886811707, "grad_norm": 0.1355697512626648, "learning_rate": 0.00044128257852810274, "loss": 2.193, "step": 460740 }, { "epoch": 1.7811306458845542, "grad_norm": 0.12670107185840607, "learning_rate": 0.0004411542731880105, "loss": 2.197, "step": 460750 }, { "epoch": 1.7811693030879374, "grad_norm": 0.1347462683916092, "learning_rate": 0.0004410259784075938, "loss": 2.1899, "step": 460760 }, { "epoch": 1.7812079602913207, "grad_norm": 0.13940757513046265, "learning_rate": 0.0004408976941842462, "loss": 2.1961, "step": 460770 }, { "epoch": 1.781246617494704, "grad_norm": 0.13067248463630676, "learning_rate": 0.00044076942051536204, "loss": 2.1989, "step": 460780 }, { "epoch": 1.7812852746980874, "grad_norm": 0.21473339200019836, "learning_rate": 0.0004406411573983364, "loss": 2.1975, "step": 460790 }, { "epoch": 1.7813239319014706, "grad_norm": 0.13648192584514618, "learning_rate": 0.00044051290483056583, "loss": 2.1845, "step": 460800 }, { "epoch": 1.781362589104854, "grad_norm": 0.14370782673358917, "learning_rate": 0.0004403846628094479, "loss": 2.2032, "step": 460810 }, { "epoch": 1.7814012463082372, "grad_norm": 0.13856147229671478, "learning_rate": 0.0004402564313323809, "loss": 2.1891, "step": 460820 }, { "epoch": 1.7814399035116204, "grad_norm": 0.12482549995183945, "learning_rate": 0.00044012821039676476, "loss": 2.1885, "step": 460830 }, { "epoch": 1.7814785607150037, "grad_norm": 0.13354481756687164, "learning_rate": 0.00043999999999999996, "loss": 2.1937, "step": 460840 }, { "epoch": 1.781517217918387, "grad_norm": 0.12710081040859222, "learning_rate": 0.00043987180013948856, "loss": 2.1897, "step": 460850 }, { "epoch": 1.7815558751217702, "grad_norm": 0.13189521431922913, "learning_rate": 0.00043974361081263315, "loss": 2.1795, "step": 460860 }, { "epoch": 1.7815945323251534, "grad_norm": 0.12823714315891266, "learning_rate": 0.00043961543201683775, "loss": 2.1916, "step": 460870 }, { "epoch": 1.7816331895285367, "grad_norm": 0.14169198274612427, "learning_rate": 0.00043948726374950733, "loss": 2.1928, "step": 460880 }, { "epoch": 1.78167184673192, "grad_norm": 0.12744873762130737, "learning_rate": 0.0004393591060080477, "loss": 2.1961, "step": 460890 }, { "epoch": 1.7817105039353032, "grad_norm": 0.12496272474527359, "learning_rate": 0.0004392309587898664, "loss": 2.1796, "step": 460900 }, { "epoch": 1.7817491611386864, "grad_norm": 0.12871375679969788, "learning_rate": 0.0004391028220923712, "loss": 2.1775, "step": 460910 }, { "epoch": 1.78178781834207, "grad_norm": 0.1288425326347351, "learning_rate": 0.00043897469591297147, "loss": 2.1783, "step": 460920 }, { "epoch": 1.7818264755454531, "grad_norm": 0.13534577190876007, "learning_rate": 0.0004388465802490775, "loss": 2.1882, "step": 460930 }, { "epoch": 1.7818651327488364, "grad_norm": 0.13280139863491058, "learning_rate": 0.0004387184750981006, "loss": 2.1937, "step": 460940 }, { "epoch": 1.7819037899522197, "grad_norm": 0.1339811384677887, "learning_rate": 0.00043859038045745334, "loss": 2.1827, "step": 460950 }, { "epoch": 1.7819424471556031, "grad_norm": 0.12586566805839539, "learning_rate": 0.00043846229632454924, "loss": 2.1885, "step": 460960 }, { "epoch": 1.7819811043589864, "grad_norm": 0.130745530128479, "learning_rate": 0.0004383342226968026, "loss": 2.1948, "step": 460970 }, { "epoch": 1.7820197615623696, "grad_norm": 0.12698744237422943, "learning_rate": 0.0004382061595716289, "loss": 2.1902, "step": 460980 }, { "epoch": 1.7820584187657529, "grad_norm": 0.13514763116836548, "learning_rate": 0.00043807810694644525, "loss": 2.1903, "step": 460990 }, { "epoch": 1.7820970759691361, "grad_norm": 0.12937365472316742, "learning_rate": 0.00043795006481866914, "loss": 2.1831, "step": 461000 }, { "epoch": 1.7821357331725194, "grad_norm": 0.13314153254032135, "learning_rate": 0.00043782203318571924, "loss": 2.1806, "step": 461010 }, { "epoch": 1.7821743903759026, "grad_norm": 0.13866551220417023, "learning_rate": 0.0004376940120450157, "loss": 2.1906, "step": 461020 }, { "epoch": 1.782213047579286, "grad_norm": 0.14468303322792053, "learning_rate": 0.00043756600139397886, "loss": 2.1916, "step": 461030 }, { "epoch": 1.7822517047826691, "grad_norm": 0.14432694017887115, "learning_rate": 0.00043743800123003117, "loss": 2.1851, "step": 461040 }, { "epoch": 1.7822903619860524, "grad_norm": 0.12961898744106293, "learning_rate": 0.00043731001155059544, "loss": 2.1895, "step": 461050 }, { "epoch": 1.7823290191894356, "grad_norm": 0.13149994611740112, "learning_rate": 0.0004371820323530957, "loss": 2.1945, "step": 461060 }, { "epoch": 1.782367676392819, "grad_norm": 0.13549242913722992, "learning_rate": 0.00043705406363495736, "loss": 2.1876, "step": 461070 }, { "epoch": 1.7824063335962022, "grad_norm": 0.12361189723014832, "learning_rate": 0.00043692610539360625, "loss": 2.1817, "step": 461080 }, { "epoch": 1.7824449907995856, "grad_norm": 0.1255415827035904, "learning_rate": 0.00043679815762646945, "loss": 2.1865, "step": 461090 }, { "epoch": 1.7824836480029689, "grad_norm": 0.13414831459522247, "learning_rate": 0.0004366702203309758, "loss": 2.1791, "step": 461100 }, { "epoch": 1.7825223052063521, "grad_norm": 0.13580499589443207, "learning_rate": 0.000436542293504554, "loss": 2.1991, "step": 461110 }, { "epoch": 1.7825609624097354, "grad_norm": 0.13255441188812256, "learning_rate": 0.00043641437714463495, "loss": 2.203, "step": 461120 }, { "epoch": 1.7825996196131189, "grad_norm": 0.14729438722133636, "learning_rate": 0.00043628647124864983, "loss": 2.1803, "step": 461130 }, { "epoch": 1.782638276816502, "grad_norm": 0.13216754794120789, "learning_rate": 0.00043615857581403097, "loss": 2.1881, "step": 461140 }, { "epoch": 1.7826769340198854, "grad_norm": 0.13753969967365265, "learning_rate": 0.0004360306908382121, "loss": 2.1955, "step": 461150 }, { "epoch": 1.7827155912232686, "grad_norm": 0.12598957121372223, "learning_rate": 0.0004359028163186278, "loss": 2.1878, "step": 461160 }, { "epoch": 1.7827542484266519, "grad_norm": 0.1264917254447937, "learning_rate": 0.0004357749522527137, "loss": 2.1832, "step": 461170 }, { "epoch": 1.7827929056300351, "grad_norm": 0.12438520789146423, "learning_rate": 0.00043564709863790643, "loss": 2.2021, "step": 461180 }, { "epoch": 1.7828315628334184, "grad_norm": 0.11906236410140991, "learning_rate": 0.0004355192554716436, "loss": 2.2008, "step": 461190 }, { "epoch": 1.7828702200368016, "grad_norm": 0.135292649269104, "learning_rate": 0.0004353914227513642, "loss": 2.1814, "step": 461200 }, { "epoch": 1.7829088772401849, "grad_norm": 0.12936165928840637, "learning_rate": 0.0004352636004745081, "loss": 2.2004, "step": 461210 }, { "epoch": 1.7829475344435681, "grad_norm": 0.1327020227909088, "learning_rate": 0.0004351357886385159, "loss": 2.1905, "step": 461220 }, { "epoch": 1.7829861916469514, "grad_norm": 0.1360083669424057, "learning_rate": 0.00043500798724082946, "loss": 2.1856, "step": 461230 }, { "epoch": 1.7830248488503346, "grad_norm": 0.13193684816360474, "learning_rate": 0.00043488019627889175, "loss": 2.1823, "step": 461240 }, { "epoch": 1.7830635060537179, "grad_norm": 0.13851018249988556, "learning_rate": 0.00043475241575014725, "loss": 2.1703, "step": 461250 }, { "epoch": 1.7831021632571014, "grad_norm": 0.1312793642282486, "learning_rate": 0.0004346246456520404, "loss": 2.1843, "step": 461260 }, { "epoch": 1.7831408204604846, "grad_norm": 0.14227105677127838, "learning_rate": 0.00043449688598201755, "loss": 2.1856, "step": 461270 }, { "epoch": 1.7831794776638679, "grad_norm": 0.13117855787277222, "learning_rate": 0.00043436913673752575, "loss": 2.18, "step": 461280 }, { "epoch": 1.7832181348672511, "grad_norm": 0.13411663472652435, "learning_rate": 0.0004342413979160134, "loss": 2.1885, "step": 461290 }, { "epoch": 1.7832567920706346, "grad_norm": 0.11938449740409851, "learning_rate": 0.00043411366951492925, "loss": 2.1939, "step": 461300 }, { "epoch": 1.7832954492740178, "grad_norm": 0.12993189692497253, "learning_rate": 0.0004339859515317239, "loss": 2.1905, "step": 461310 }, { "epoch": 1.783334106477401, "grad_norm": 0.12578348815441132, "learning_rate": 0.00043385824396384855, "loss": 2.1811, "step": 461320 }, { "epoch": 1.7833727636807843, "grad_norm": 0.1200806200504303, "learning_rate": 0.00043373054680875557, "loss": 2.2001, "step": 461330 }, { "epoch": 1.7834114208841676, "grad_norm": 0.28573447465896606, "learning_rate": 0.00043360286006389814, "loss": 2.209, "step": 461340 }, { "epoch": 1.7834500780875508, "grad_norm": 0.5567190051078796, "learning_rate": 0.0004334751837267308, "loss": 2.2089, "step": 461350 }, { "epoch": 1.783488735290934, "grad_norm": 0.12286386638879776, "learning_rate": 0.0004333475177947088, "loss": 2.1917, "step": 461360 }, { "epoch": 1.7835273924943174, "grad_norm": 0.12691837549209595, "learning_rate": 0.000433219862265289, "loss": 2.1982, "step": 461370 }, { "epoch": 1.7835660496977006, "grad_norm": 0.13498446345329285, "learning_rate": 0.0004330922171359286, "loss": 2.1868, "step": 461380 }, { "epoch": 1.7836047069010839, "grad_norm": 0.13531361520290375, "learning_rate": 0.0004329645824040862, "loss": 2.1926, "step": 461390 }, { "epoch": 1.783643364104467, "grad_norm": 0.13043451309204102, "learning_rate": 0.00043283695806722134, "loss": 2.1929, "step": 461400 }, { "epoch": 1.7836820213078504, "grad_norm": 0.1304284781217575, "learning_rate": 0.00043270934412279496, "loss": 2.1833, "step": 461410 }, { "epoch": 1.7837206785112338, "grad_norm": 0.12887679040431976, "learning_rate": 0.00043258174056826814, "loss": 2.2014, "step": 461420 }, { "epoch": 1.783759335714617, "grad_norm": 0.13068842887878418, "learning_rate": 0.0004324541474011039, "loss": 2.1896, "step": 461430 }, { "epoch": 1.7837979929180003, "grad_norm": 0.1315651834011078, "learning_rate": 0.0004323265646187662, "loss": 2.1961, "step": 461440 }, { "epoch": 1.7838366501213836, "grad_norm": 0.14479106664657593, "learning_rate": 0.00043219899221871904, "loss": 2.1788, "step": 461450 }, { "epoch": 1.7838753073247668, "grad_norm": 0.14688260853290558, "learning_rate": 0.000432071430198429, "loss": 2.1864, "step": 461460 }, { "epoch": 1.7839139645281503, "grad_norm": 0.13669735193252563, "learning_rate": 0.0004319438785553624, "loss": 2.2074, "step": 461470 }, { "epoch": 1.7839526217315336, "grad_norm": 0.12653201818466187, "learning_rate": 0.0004318163372869872, "loss": 2.19, "step": 461480 }, { "epoch": 1.7839912789349168, "grad_norm": 0.14503104984760284, "learning_rate": 0.00043168880639077247, "loss": 2.1842, "step": 461490 }, { "epoch": 1.7840299361383, "grad_norm": 0.12771466374397278, "learning_rate": 0.00043156128586418775, "loss": 2.1907, "step": 461500 }, { "epoch": 1.7840685933416833, "grad_norm": 0.1463581770658493, "learning_rate": 0.0004314337757047042, "loss": 2.1919, "step": 461510 }, { "epoch": 1.7841072505450666, "grad_norm": 0.1495421826839447, "learning_rate": 0.0004313062759097939, "loss": 2.176, "step": 461520 }, { "epoch": 1.7841459077484498, "grad_norm": 0.11844052374362946, "learning_rate": 0.0004311787864769294, "loss": 2.1923, "step": 461530 }, { "epoch": 1.784184564951833, "grad_norm": 0.13675814867019653, "learning_rate": 0.0004310513074035851, "loss": 2.1839, "step": 461540 }, { "epoch": 1.7842232221552163, "grad_norm": 0.13174942135810852, "learning_rate": 0.0004309238386872356, "loss": 2.1829, "step": 461550 }, { "epoch": 1.7842618793585996, "grad_norm": 0.12208150327205658, "learning_rate": 0.0004307963803253574, "loss": 2.1899, "step": 461560 }, { "epoch": 1.7843005365619828, "grad_norm": 0.1265152543783188, "learning_rate": 0.0004306689323154276, "loss": 2.1816, "step": 461570 }, { "epoch": 1.784339193765366, "grad_norm": 0.13260005414485931, "learning_rate": 0.000430541494654924, "loss": 2.181, "step": 461580 }, { "epoch": 1.7843778509687496, "grad_norm": 0.13829654455184937, "learning_rate": 0.00043041406734132593, "loss": 2.1941, "step": 461590 }, { "epoch": 1.7844165081721328, "grad_norm": 0.13675826787948608, "learning_rate": 0.00043028665037211325, "loss": 2.1791, "step": 461600 }, { "epoch": 1.784455165375516, "grad_norm": 0.2085188925266266, "learning_rate": 0.0004301592437447677, "loss": 2.1903, "step": 461610 }, { "epoch": 1.7844938225788993, "grad_norm": 0.13831783831119537, "learning_rate": 0.0004300318474567708, "loss": 2.1827, "step": 461620 }, { "epoch": 1.7845324797822826, "grad_norm": 0.142887681722641, "learning_rate": 0.00042990446150560646, "loss": 2.1777, "step": 461630 }, { "epoch": 1.784571136985666, "grad_norm": 0.13441495597362518, "learning_rate": 0.0004297770858887584, "loss": 2.1861, "step": 461640 }, { "epoch": 1.7846097941890493, "grad_norm": 0.13365398347377777, "learning_rate": 0.0004296497206037118, "loss": 2.1861, "step": 461650 }, { "epoch": 1.7846484513924326, "grad_norm": 0.13537412881851196, "learning_rate": 0.00042952236564795343, "loss": 2.1866, "step": 461660 }, { "epoch": 1.7846871085958158, "grad_norm": 0.13806648552417755, "learning_rate": 0.00042939502101897053, "loss": 2.1839, "step": 461670 }, { "epoch": 1.784725765799199, "grad_norm": 0.1359514445066452, "learning_rate": 0.0004292676867142511, "loss": 2.1896, "step": 461680 }, { "epoch": 1.7847644230025823, "grad_norm": 0.1382725089788437, "learning_rate": 0.0004291403627312846, "loss": 2.1877, "step": 461690 }, { "epoch": 1.7848030802059656, "grad_norm": 0.1247827336192131, "learning_rate": 0.00042901304906756144, "loss": 2.1927, "step": 461700 }, { "epoch": 1.7848417374093488, "grad_norm": 0.13510502874851227, "learning_rate": 0.000428885745720573, "loss": 2.1909, "step": 461710 }, { "epoch": 1.784880394612732, "grad_norm": 0.13972440361976624, "learning_rate": 0.00042875845268781166, "loss": 2.1741, "step": 461720 }, { "epoch": 1.7849190518161153, "grad_norm": 0.1431395560503006, "learning_rate": 0.00042863116996677065, "loss": 2.1991, "step": 461730 }, { "epoch": 1.7849577090194986, "grad_norm": 0.13320407271385193, "learning_rate": 0.0004285038975549447, "loss": 2.1836, "step": 461740 }, { "epoch": 1.7849963662228818, "grad_norm": 0.13148362934589386, "learning_rate": 0.00042837663544982883, "loss": 2.1817, "step": 461750 }, { "epoch": 1.7850350234262653, "grad_norm": 0.12751802802085876, "learning_rate": 0.00042824938364891983, "loss": 2.1946, "step": 461760 }, { "epoch": 1.7850736806296486, "grad_norm": 0.17811429500579834, "learning_rate": 0.00042812214214971525, "loss": 2.1768, "step": 461770 }, { "epoch": 1.7851123378330318, "grad_norm": 0.1396486610174179, "learning_rate": 0.0004279949109497132, "loss": 2.1838, "step": 461780 }, { "epoch": 1.785150995036415, "grad_norm": 0.1367500275373459, "learning_rate": 0.00042786769004641336, "loss": 2.1765, "step": 461790 }, { "epoch": 1.7851896522397985, "grad_norm": 0.1433398723602295, "learning_rate": 0.0004277404794373163, "loss": 2.201, "step": 461800 }, { "epoch": 1.7852283094431818, "grad_norm": 0.13393816351890564, "learning_rate": 0.00042761327911992344, "loss": 2.1902, "step": 461810 }, { "epoch": 1.785266966646565, "grad_norm": 0.13377831876277924, "learning_rate": 0.0004274860890917371, "loss": 2.1767, "step": 461820 }, { "epoch": 1.7853056238499483, "grad_norm": 0.13527078926563263, "learning_rate": 0.00042735890935026124, "loss": 2.1768, "step": 461830 }, { "epoch": 1.7853442810533315, "grad_norm": 0.12564711272716522, "learning_rate": 0.00042723173989300014, "loss": 2.1718, "step": 461840 }, { "epoch": 1.7853829382567148, "grad_norm": 0.13481220602989197, "learning_rate": 0.0004271045807174592, "loss": 2.1888, "step": 461850 }, { "epoch": 1.785421595460098, "grad_norm": 0.1479419767856598, "learning_rate": 0.00042697743182114524, "loss": 2.1969, "step": 461860 }, { "epoch": 1.7854602526634813, "grad_norm": 0.1323142945766449, "learning_rate": 0.00042685029320156567, "loss": 2.1843, "step": 461870 }, { "epoch": 1.7854989098668645, "grad_norm": 0.1364038586616516, "learning_rate": 0.0004267231648562291, "loss": 2.1845, "step": 461880 }, { "epoch": 1.7855375670702478, "grad_norm": 0.1245933398604393, "learning_rate": 0.0004265960467826453, "loss": 2.197, "step": 461890 }, { "epoch": 1.785576224273631, "grad_norm": 0.2402573972940445, "learning_rate": 0.0004264689389783245, "loss": 2.2002, "step": 461900 }, { "epoch": 1.7856148814770143, "grad_norm": 0.14551228284835815, "learning_rate": 0.0004263418414407785, "loss": 2.1978, "step": 461910 }, { "epoch": 1.7856535386803976, "grad_norm": 0.12514843046665192, "learning_rate": 0.0004262147541675199, "loss": 2.1786, "step": 461920 }, { "epoch": 1.785692195883781, "grad_norm": 0.12956567108631134, "learning_rate": 0.00042608767715606225, "loss": 2.1979, "step": 461930 }, { "epoch": 1.7857308530871643, "grad_norm": 0.1494515985250473, "learning_rate": 0.0004259606104039202, "loss": 2.1979, "step": 461940 }, { "epoch": 1.7857695102905475, "grad_norm": 0.13267038762569427, "learning_rate": 0.0004258335539086091, "loss": 2.1715, "step": 461950 }, { "epoch": 1.7858081674939308, "grad_norm": 0.12581340968608856, "learning_rate": 0.00042570650766764586, "loss": 2.1653, "step": 461960 }, { "epoch": 1.7858468246973143, "grad_norm": 0.1374005228281021, "learning_rate": 0.00042557947167854793, "loss": 2.1893, "step": 461970 }, { "epoch": 1.7858854819006975, "grad_norm": 0.13171625137329102, "learning_rate": 0.00042545244593883403, "loss": 2.1875, "step": 461980 }, { "epoch": 1.7859241391040808, "grad_norm": 0.1323147863149643, "learning_rate": 0.0004253254304460239, "loss": 2.1985, "step": 461990 }, { "epoch": 1.785962796307464, "grad_norm": 0.1326800435781479, "learning_rate": 0.00042519842519763774, "loss": 2.1843, "step": 462000 }, { "epoch": 1.7860014535108473, "grad_norm": 0.13129490613937378, "learning_rate": 0.00042507143019119756, "loss": 2.1899, "step": 462010 }, { "epoch": 1.7860401107142305, "grad_norm": 0.1310659795999527, "learning_rate": 0.00042494444542422573, "loss": 2.19, "step": 462020 }, { "epoch": 1.7860787679176138, "grad_norm": 0.1303727775812149, "learning_rate": 0.0004248174708942458, "loss": 2.1927, "step": 462030 }, { "epoch": 1.786117425120997, "grad_norm": 0.13262943923473358, "learning_rate": 0.0004246905065987827, "loss": 2.1929, "step": 462040 }, { "epoch": 1.7861560823243803, "grad_norm": 0.11869736760854721, "learning_rate": 0.00042456355253536174, "loss": 2.1919, "step": 462050 }, { "epoch": 1.7861947395277635, "grad_norm": 0.1460510939359665, "learning_rate": 0.0004244366087015095, "loss": 2.1887, "step": 462060 }, { "epoch": 1.7862333967311468, "grad_norm": 0.12706270813941956, "learning_rate": 0.0004243096750947539, "loss": 2.1937, "step": 462070 }, { "epoch": 1.78627205393453, "grad_norm": 0.1253349334001541, "learning_rate": 0.00042418275171262335, "loss": 2.1891, "step": 462080 }, { "epoch": 1.7863107111379133, "grad_norm": 0.3475554883480072, "learning_rate": 0.0004240558385526472, "loss": 2.1786, "step": 462090 }, { "epoch": 1.7863493683412968, "grad_norm": 0.131262868642807, "learning_rate": 0.00042392893561235637, "loss": 2.1959, "step": 462100 }, { "epoch": 1.78638802554468, "grad_norm": 0.12694989144802094, "learning_rate": 0.0004238020428892824, "loss": 2.1898, "step": 462110 }, { "epoch": 1.7864266827480633, "grad_norm": 0.13427241146564484, "learning_rate": 0.00042367516038095763, "loss": 2.1913, "step": 462120 }, { "epoch": 1.7864653399514465, "grad_norm": 0.13424880802631378, "learning_rate": 0.0004235482880849157, "loss": 2.184, "step": 462130 }, { "epoch": 1.78650399715483, "grad_norm": 0.13431230187416077, "learning_rate": 0.0004234214259986913, "loss": 2.179, "step": 462140 }, { "epoch": 1.7865426543582132, "grad_norm": 0.12677377462387085, "learning_rate": 0.00042329457411982, "loss": 2.1846, "step": 462150 }, { "epoch": 1.7865813115615965, "grad_norm": 0.14376206696033478, "learning_rate": 0.00042316773244583804, "loss": 2.1829, "step": 462160 }, { "epoch": 1.7866199687649797, "grad_norm": 0.15024666488170624, "learning_rate": 0.00042304090097428326, "loss": 2.1885, "step": 462170 }, { "epoch": 1.786658625968363, "grad_norm": 0.15297283232212067, "learning_rate": 0.00042291407970269424, "loss": 2.178, "step": 462180 }, { "epoch": 1.7866972831717463, "grad_norm": 0.14141041040420532, "learning_rate": 0.00042278726862861007, "loss": 2.1869, "step": 462190 }, { "epoch": 1.7867359403751295, "grad_norm": 0.12803156673908234, "learning_rate": 0.00042266046774957176, "loss": 2.2057, "step": 462200 }, { "epoch": 1.7867745975785128, "grad_norm": 0.13150155544281006, "learning_rate": 0.0004225336770631203, "loss": 2.1765, "step": 462210 }, { "epoch": 1.786813254781896, "grad_norm": 0.13283614814281464, "learning_rate": 0.0004224068965667987, "loss": 2.1888, "step": 462220 }, { "epoch": 1.7868519119852793, "grad_norm": 0.13864447176456451, "learning_rate": 0.0004222801262581497, "loss": 2.1821, "step": 462230 }, { "epoch": 1.7868905691886625, "grad_norm": 0.12497738748788834, "learning_rate": 0.00042215336613471855, "loss": 2.196, "step": 462240 }, { "epoch": 1.7869292263920458, "grad_norm": 0.1317017376422882, "learning_rate": 0.00042202661619405004, "loss": 2.1909, "step": 462250 }, { "epoch": 1.786967883595429, "grad_norm": 0.12532906234264374, "learning_rate": 0.00042189987643369075, "loss": 2.1832, "step": 462260 }, { "epoch": 1.7870065407988125, "grad_norm": 0.13995838165283203, "learning_rate": 0.0004217731468511885, "loss": 2.1884, "step": 462270 }, { "epoch": 1.7870451980021957, "grad_norm": 0.13947032392024994, "learning_rate": 0.00042164642744409143, "loss": 2.1886, "step": 462280 }, { "epoch": 1.787083855205579, "grad_norm": 0.13160671293735504, "learning_rate": 0.0004215197182099486, "loss": 2.191, "step": 462290 }, { "epoch": 1.7871225124089622, "grad_norm": 0.12940038740634918, "learning_rate": 0.00042139301914631067, "loss": 2.1905, "step": 462300 }, { "epoch": 1.7871611696123457, "grad_norm": 0.14498552680015564, "learning_rate": 0.0004212663302507291, "loss": 2.1898, "step": 462310 }, { "epoch": 1.787199826815729, "grad_norm": 0.12709790468215942, "learning_rate": 0.000421139651520756, "loss": 2.191, "step": 462320 }, { "epoch": 1.7872384840191122, "grad_norm": 0.13836999237537384, "learning_rate": 0.00042101298295394465, "loss": 2.1813, "step": 462330 }, { "epoch": 1.7872771412224955, "grad_norm": 0.15703056752681732, "learning_rate": 0.00042088632454784935, "loss": 2.1906, "step": 462340 }, { "epoch": 1.7873157984258787, "grad_norm": 0.13247722387313843, "learning_rate": 0.0004207596763000254, "loss": 2.1802, "step": 462350 }, { "epoch": 1.787354455629262, "grad_norm": 0.13744854927062988, "learning_rate": 0.00042063303820802925, "loss": 2.173, "step": 462360 }, { "epoch": 1.7873931128326452, "grad_norm": 0.12450604140758514, "learning_rate": 0.00042050641026941806, "loss": 2.1786, "step": 462370 }, { "epoch": 1.7874317700360285, "grad_norm": 0.13161182403564453, "learning_rate": 0.0004203797924817496, "loss": 2.1803, "step": 462380 }, { "epoch": 1.7874704272394117, "grad_norm": 0.13816915452480316, "learning_rate": 0.00042025318484258367, "loss": 2.1695, "step": 462390 }, { "epoch": 1.787509084442795, "grad_norm": 0.13009148836135864, "learning_rate": 0.0004201265873494802, "loss": 2.1891, "step": 462400 }, { "epoch": 1.7875477416461782, "grad_norm": 0.13122321665287018, "learning_rate": 0.00041999999999999996, "loss": 2.2087, "step": 462410 }, { "epoch": 1.7875863988495615, "grad_norm": 0.13132932782173157, "learning_rate": 0.0004198734227917056, "loss": 2.1995, "step": 462420 }, { "epoch": 1.7876250560529447, "grad_norm": 0.13773812353610992, "learning_rate": 0.00041974685572216, "loss": 2.175, "step": 462430 }, { "epoch": 1.7876637132563282, "grad_norm": 0.1318158358335495, "learning_rate": 0.00041962029878892725, "loss": 2.179, "step": 462440 }, { "epoch": 1.7877023704597115, "grad_norm": 0.13777051866054535, "learning_rate": 0.00041949375198957207, "loss": 2.1998, "step": 462450 }, { "epoch": 1.7877410276630947, "grad_norm": 0.13231171667575836, "learning_rate": 0.000419367215321661, "loss": 2.1742, "step": 462460 }, { "epoch": 1.787779684866478, "grad_norm": 0.13529814779758453, "learning_rate": 0.0004192406887827609, "loss": 2.1972, "step": 462470 }, { "epoch": 1.7878183420698615, "grad_norm": 0.1315818727016449, "learning_rate": 0.0004191141723704397, "loss": 2.1843, "step": 462480 }, { "epoch": 1.7878569992732447, "grad_norm": 0.15160876512527466, "learning_rate": 0.0004189876660822662, "loss": 2.1826, "step": 462490 }, { "epoch": 1.787895656476628, "grad_norm": 0.14363962411880493, "learning_rate": 0.00041886116991581024, "loss": 2.1811, "step": 462500 }, { "epoch": 1.7879343136800112, "grad_norm": 0.13013380765914917, "learning_rate": 0.0004187346838686432, "loss": 2.1926, "step": 462510 }, { "epoch": 1.7879729708833945, "grad_norm": 0.14183056354522705, "learning_rate": 0.0004186082079383364, "loss": 2.1842, "step": 462520 }, { "epoch": 1.7880116280867777, "grad_norm": 0.1285829097032547, "learning_rate": 0.00041848174212246294, "loss": 2.1828, "step": 462530 }, { "epoch": 1.788050285290161, "grad_norm": 0.2322116643190384, "learning_rate": 0.00041835528641859644, "loss": 2.1752, "step": 462540 }, { "epoch": 1.7880889424935442, "grad_norm": 0.1346760243177414, "learning_rate": 0.000418228840824312, "loss": 2.1777, "step": 462550 }, { "epoch": 1.7881275996969275, "grad_norm": 0.13815739750862122, "learning_rate": 0.00041810240533718493, "loss": 2.1846, "step": 462560 }, { "epoch": 1.7881662569003107, "grad_norm": 0.13315899670124054, "learning_rate": 0.0004179759799547922, "loss": 2.1878, "step": 462570 }, { "epoch": 1.788204914103694, "grad_norm": 0.13540950417518616, "learning_rate": 0.00041784956467471137, "loss": 2.201, "step": 462580 }, { "epoch": 1.7882435713070772, "grad_norm": 0.1370074301958084, "learning_rate": 0.0004177231594945212, "loss": 2.1808, "step": 462590 }, { "epoch": 1.7882822285104605, "grad_norm": 0.15163642168045044, "learning_rate": 0.0004175967644118015, "loss": 2.1865, "step": 462600 }, { "epoch": 1.788320885713844, "grad_norm": 0.12375643104314804, "learning_rate": 0.00041747037942413233, "loss": 2.1816, "step": 462610 }, { "epoch": 1.7883595429172272, "grad_norm": 0.132793590426445, "learning_rate": 0.00041734400452909546, "loss": 2.1728, "step": 462620 }, { "epoch": 1.7883982001206105, "grad_norm": 0.1314646154642105, "learning_rate": 0.0004172176397242735, "loss": 2.1967, "step": 462630 }, { "epoch": 1.7884368573239937, "grad_norm": 0.14199237525463104, "learning_rate": 0.0004170912850072497, "loss": 2.192, "step": 462640 }, { "epoch": 1.7884755145273772, "grad_norm": 0.12800417840480804, "learning_rate": 0.00041696494037560884, "loss": 2.1822, "step": 462650 }, { "epoch": 1.7885141717307604, "grad_norm": 0.13144774734973907, "learning_rate": 0.00041683860582693576, "loss": 2.1896, "step": 462660 }, { "epoch": 1.7885528289341437, "grad_norm": 0.13051064312458038, "learning_rate": 0.00041671228135881756, "loss": 2.1785, "step": 462670 }, { "epoch": 1.788591486137527, "grad_norm": 0.13029839098453522, "learning_rate": 0.000416585966968841, "loss": 2.2002, "step": 462680 }, { "epoch": 1.7886301433409102, "grad_norm": 0.13918250799179077, "learning_rate": 0.00041645966265459465, "loss": 2.1887, "step": 462690 }, { "epoch": 1.7886688005442934, "grad_norm": 0.13804082572460175, "learning_rate": 0.00041633336841366764, "loss": 2.1915, "step": 462700 }, { "epoch": 1.7887074577476767, "grad_norm": 0.1327112466096878, "learning_rate": 0.0004162070842436503, "loss": 2.1787, "step": 462710 }, { "epoch": 1.78874611495106, "grad_norm": 0.13810524344444275, "learning_rate": 0.0004160808101421336, "loss": 2.1799, "step": 462720 }, { "epoch": 1.7887847721544432, "grad_norm": 0.13068757951259613, "learning_rate": 0.0004159545461067098, "loss": 2.1706, "step": 462730 }, { "epoch": 1.7888234293578265, "grad_norm": 0.1321135014295578, "learning_rate": 0.00041582829213497184, "loss": 2.1871, "step": 462740 }, { "epoch": 1.7888620865612097, "grad_norm": 0.13681761920452118, "learning_rate": 0.00041570204822451417, "loss": 2.1958, "step": 462750 }, { "epoch": 1.788900743764593, "grad_norm": 0.13599014282226562, "learning_rate": 0.00041557581437293113, "loss": 2.1828, "step": 462760 }, { "epoch": 1.7889394009679762, "grad_norm": 0.13342493772506714, "learning_rate": 0.0004154495905778195, "loss": 2.1702, "step": 462770 }, { "epoch": 1.7889780581713597, "grad_norm": 0.13734008371829987, "learning_rate": 0.0004153233768367755, "loss": 2.1812, "step": 462780 }, { "epoch": 1.789016715374743, "grad_norm": 0.1402251124382019, "learning_rate": 0.0004151971731473976, "loss": 2.2038, "step": 462790 }, { "epoch": 1.7890553725781262, "grad_norm": 0.12966884672641754, "learning_rate": 0.000415070979507284, "loss": 2.1917, "step": 462800 }, { "epoch": 1.7890940297815094, "grad_norm": 0.12560537457466125, "learning_rate": 0.00041494479591403514, "loss": 2.1611, "step": 462810 }, { "epoch": 1.789132686984893, "grad_norm": 0.1583850383758545, "learning_rate": 0.00041481862236525125, "loss": 2.1862, "step": 462820 }, { "epoch": 1.7891713441882762, "grad_norm": 0.1309800148010254, "learning_rate": 0.0004146924588585344, "loss": 2.2087, "step": 462830 }, { "epoch": 1.7892100013916594, "grad_norm": 0.14885538816452026, "learning_rate": 0.000414566305391487, "loss": 2.1931, "step": 462840 }, { "epoch": 1.7892486585950427, "grad_norm": 0.1438407003879547, "learning_rate": 0.0004144401619617126, "loss": 2.2001, "step": 462850 }, { "epoch": 1.789287315798426, "grad_norm": 0.13319040834903717, "learning_rate": 0.0004143140285668161, "loss": 2.1934, "step": 462860 }, { "epoch": 1.7893259730018092, "grad_norm": 0.14416548609733582, "learning_rate": 0.00041418790520440286, "loss": 2.1855, "step": 462870 }, { "epoch": 1.7893646302051924, "grad_norm": 0.141445130109787, "learning_rate": 0.00041406179187207927, "loss": 2.1994, "step": 462880 }, { "epoch": 1.7894032874085757, "grad_norm": 0.12892618775367737, "learning_rate": 0.00041393568856745277, "loss": 2.1844, "step": 462890 }, { "epoch": 1.789441944611959, "grad_norm": 0.12439202517271042, "learning_rate": 0.0004138095952881318, "loss": 2.1728, "step": 462900 }, { "epoch": 1.7894806018153422, "grad_norm": 0.12896130979061127, "learning_rate": 0.0004136835120317257, "loss": 2.1916, "step": 462910 }, { "epoch": 1.7895192590187254, "grad_norm": 0.13055801391601562, "learning_rate": 0.0004135574387958447, "loss": 2.1768, "step": 462920 }, { "epoch": 1.7895579162221087, "grad_norm": 0.13586017489433289, "learning_rate": 0.00041343137557810005, "loss": 2.1835, "step": 462930 }, { "epoch": 1.789596573425492, "grad_norm": 0.12946003675460815, "learning_rate": 0.0004133053223761039, "loss": 2.1799, "step": 462940 }, { "epoch": 1.7896352306288754, "grad_norm": 0.13411448895931244, "learning_rate": 0.0004131792791874691, "loss": 2.1871, "step": 462950 }, { "epoch": 1.7896738878322587, "grad_norm": 0.1489628553390503, "learning_rate": 0.0004130532460098102, "loss": 2.1704, "step": 462960 }, { "epoch": 1.789712545035642, "grad_norm": 0.13088008761405945, "learning_rate": 0.0004129272228407419, "loss": 2.1948, "step": 462970 }, { "epoch": 1.7897512022390252, "grad_norm": 0.14513364434242249, "learning_rate": 0.0004128012096778804, "loss": 2.1811, "step": 462980 }, { "epoch": 1.7897898594424086, "grad_norm": 0.14233975112438202, "learning_rate": 0.00041267520651884216, "loss": 2.1659, "step": 462990 }, { "epoch": 1.789828516645792, "grad_norm": 0.14351864159107208, "learning_rate": 0.0004125492133612456, "loss": 2.2001, "step": 463000 }, { "epoch": 1.7898671738491752, "grad_norm": 0.13553811609745026, "learning_rate": 0.00041242323020270936, "loss": 2.1786, "step": 463010 }, { "epoch": 1.7899058310525584, "grad_norm": 0.1296354979276657, "learning_rate": 0.00041229725704085274, "loss": 2.1878, "step": 463020 }, { "epoch": 1.7899444882559417, "grad_norm": 0.13192026317119598, "learning_rate": 0.00041217129387329687, "loss": 2.1856, "step": 463030 }, { "epoch": 1.789983145459325, "grad_norm": 0.14609040319919586, "learning_rate": 0.00041204534069766343, "loss": 2.1879, "step": 463040 }, { "epoch": 1.7900218026627082, "grad_norm": 0.12881425023078918, "learning_rate": 0.0004119193975115747, "loss": 2.1879, "step": 463050 }, { "epoch": 1.7900604598660914, "grad_norm": 0.13772840797901154, "learning_rate": 0.00041179346431265417, "loss": 2.1975, "step": 463060 }, { "epoch": 1.7900991170694747, "grad_norm": 0.12743765115737915, "learning_rate": 0.0004116675410985269, "loss": 2.1731, "step": 463070 }, { "epoch": 1.790137774272858, "grad_norm": 0.1352182924747467, "learning_rate": 0.0004115416278668176, "loss": 2.1944, "step": 463080 }, { "epoch": 1.7901764314762412, "grad_norm": 0.13374656438827515, "learning_rate": 0.00041141572461515286, "loss": 2.1887, "step": 463090 }, { "epoch": 1.7902150886796244, "grad_norm": 0.13277378678321838, "learning_rate": 0.00041128983134115993, "loss": 2.1919, "step": 463100 }, { "epoch": 1.7902537458830077, "grad_norm": 0.13288132846355438, "learning_rate": 0.00041116394804246716, "loss": 2.1795, "step": 463110 }, { "epoch": 1.7902924030863911, "grad_norm": 0.12260843068361282, "learning_rate": 0.00041103807471670376, "loss": 2.1794, "step": 463120 }, { "epoch": 1.7903310602897744, "grad_norm": 0.13363111019134521, "learning_rate": 0.00041091221136149936, "loss": 2.1825, "step": 463130 }, { "epoch": 1.7903697174931577, "grad_norm": 0.13432249426841736, "learning_rate": 0.00041078635797448573, "loss": 2.1814, "step": 463140 }, { "epoch": 1.790408374696541, "grad_norm": 0.1417021006345749, "learning_rate": 0.0004106605145532942, "loss": 2.1927, "step": 463150 }, { "epoch": 1.7904470318999244, "grad_norm": 0.13484080135822296, "learning_rate": 0.0004105346810955577, "loss": 2.1915, "step": 463160 }, { "epoch": 1.7904856891033076, "grad_norm": 0.13451482355594635, "learning_rate": 0.0004104088575989107, "loss": 2.1812, "step": 463170 }, { "epoch": 1.7905243463066909, "grad_norm": 0.1374129354953766, "learning_rate": 0.0004102830440609875, "loss": 2.1901, "step": 463180 }, { "epoch": 1.7905630035100741, "grad_norm": 0.14045169949531555, "learning_rate": 0.00041015724047942405, "loss": 2.1895, "step": 463190 }, { "epoch": 1.7906016607134574, "grad_norm": 0.13701249659061432, "learning_rate": 0.00041003144685185667, "loss": 2.1921, "step": 463200 }, { "epoch": 1.7906403179168406, "grad_norm": 0.13817450404167175, "learning_rate": 0.0004099056631759235, "loss": 2.1708, "step": 463210 }, { "epoch": 1.790678975120224, "grad_norm": 0.13564909994602203, "learning_rate": 0.0004097798894492624, "loss": 2.195, "step": 463220 }, { "epoch": 1.7907176323236071, "grad_norm": 0.13489286601543427, "learning_rate": 0.00040965412566951344, "loss": 2.1775, "step": 463230 }, { "epoch": 1.7907562895269904, "grad_norm": 0.1390005499124527, "learning_rate": 0.0004095283718343166, "loss": 2.1749, "step": 463240 }, { "epoch": 1.7907949467303736, "grad_norm": 0.1344020962715149, "learning_rate": 0.00040940262794131343, "loss": 2.1894, "step": 463250 }, { "epoch": 1.790833603933757, "grad_norm": 0.13207466900348663, "learning_rate": 0.0004092768939881459, "loss": 2.1758, "step": 463260 }, { "epoch": 1.7908722611371402, "grad_norm": 0.13027574121952057, "learning_rate": 0.0004091511699724577, "loss": 2.1879, "step": 463270 }, { "epoch": 1.7909109183405236, "grad_norm": 0.1281670182943344, "learning_rate": 0.0004090254558918927, "loss": 2.1701, "step": 463280 }, { "epoch": 1.7909495755439069, "grad_norm": 0.15068063139915466, "learning_rate": 0.0004088997517440958, "loss": 2.1783, "step": 463290 }, { "epoch": 1.7909882327472901, "grad_norm": 0.13794660568237305, "learning_rate": 0.00040877405752671314, "loss": 2.1744, "step": 463300 }, { "epoch": 1.7910268899506734, "grad_norm": 0.13627295196056366, "learning_rate": 0.0004086483732373916, "loss": 2.1758, "step": 463310 }, { "epoch": 1.7910655471540566, "grad_norm": 0.13109062612056732, "learning_rate": 0.00040852269887377914, "loss": 2.1872, "step": 463320 }, { "epoch": 1.79110420435744, "grad_norm": 0.14107844233512878, "learning_rate": 0.0004083970344335244, "loss": 2.1697, "step": 463330 }, { "epoch": 1.7911428615608234, "grad_norm": 0.13397075235843658, "learning_rate": 0.000408271379914277, "loss": 2.1803, "step": 463340 }, { "epoch": 1.7911815187642066, "grad_norm": 0.14661332964897156, "learning_rate": 0.0004081457353136877, "loss": 2.1796, "step": 463350 }, { "epoch": 1.7912201759675899, "grad_norm": 0.1381164938211441, "learning_rate": 0.00040802010062940795, "loss": 2.1815, "step": 463360 }, { "epoch": 1.7912588331709731, "grad_norm": 0.13051655888557434, "learning_rate": 0.0004078944758590906, "loss": 2.1773, "step": 463370 }, { "epoch": 1.7912974903743564, "grad_norm": 0.12441668659448624, "learning_rate": 0.0004077688610003885, "loss": 2.1906, "step": 463380 }, { "epoch": 1.7913361475777396, "grad_norm": 0.15715846419334412, "learning_rate": 0.00040764325605095644, "loss": 2.1769, "step": 463390 }, { "epoch": 1.7913748047811229, "grad_norm": 0.12852106988430023, "learning_rate": 0.00040751766100844945, "loss": 2.1843, "step": 463400 }, { "epoch": 1.7914134619845061, "grad_norm": 0.13250230252742767, "learning_rate": 0.00040739207587052364, "loss": 2.1805, "step": 463410 }, { "epoch": 1.7914521191878894, "grad_norm": 0.14088231325149536, "learning_rate": 0.0004072665006348364, "loss": 2.1739, "step": 463420 }, { "epoch": 1.7914907763912726, "grad_norm": 0.13542966544628143, "learning_rate": 0.0004071409352990454, "loss": 2.1783, "step": 463430 }, { "epoch": 1.7915294335946559, "grad_norm": 0.14837566018104553, "learning_rate": 0.00040701537986080985, "loss": 2.1788, "step": 463440 }, { "epoch": 1.7915680907980394, "grad_norm": 0.1346118003129959, "learning_rate": 0.00040688983431778957, "loss": 2.1652, "step": 463450 }, { "epoch": 1.7916067480014226, "grad_norm": 0.13045181334018707, "learning_rate": 0.0004067642986676452, "loss": 2.1795, "step": 463460 }, { "epoch": 1.7916454052048059, "grad_norm": 0.14252230525016785, "learning_rate": 0.00040663877290803897, "loss": 2.1939, "step": 463470 }, { "epoch": 1.7916840624081891, "grad_norm": 0.1370854675769806, "learning_rate": 0.00040651325703663276, "loss": 2.1773, "step": 463480 }, { "epoch": 1.7917227196115724, "grad_norm": 0.1398986577987671, "learning_rate": 0.0004063877510510909, "loss": 2.1854, "step": 463490 }, { "epoch": 1.7917613768149558, "grad_norm": 0.13030345737934113, "learning_rate": 0.0004062622549490773, "loss": 2.1822, "step": 463500 }, { "epoch": 1.791800034018339, "grad_norm": 0.1358768492937088, "learning_rate": 0.00040613676872825757, "loss": 2.1887, "step": 463510 }, { "epoch": 1.7918386912217223, "grad_norm": 0.13510680198669434, "learning_rate": 0.00040601129238629797, "loss": 2.1733, "step": 463520 }, { "epoch": 1.7918773484251056, "grad_norm": 0.1370605230331421, "learning_rate": 0.00040588582592086597, "loss": 2.1913, "step": 463530 }, { "epoch": 1.7919160056284889, "grad_norm": 0.13525953888893127, "learning_rate": 0.00040576036932962945, "loss": 2.1771, "step": 463540 }, { "epoch": 1.791954662831872, "grad_norm": 0.13494880497455597, "learning_rate": 0.0004056349226102574, "loss": 2.1689, "step": 463550 }, { "epoch": 1.7919933200352554, "grad_norm": 0.13117605447769165, "learning_rate": 0.00040550948576042003, "loss": 2.1933, "step": 463560 }, { "epoch": 1.7920319772386386, "grad_norm": 0.14315210282802582, "learning_rate": 0.0004053840587777884, "loss": 2.1824, "step": 463570 }, { "epoch": 1.7920706344420219, "grad_norm": 0.13220641016960144, "learning_rate": 0.0004052586416600339, "loss": 2.1862, "step": 463580 }, { "epoch": 1.7921092916454051, "grad_norm": 0.14106839895248413, "learning_rate": 0.00040513323440482976, "loss": 2.1879, "step": 463590 }, { "epoch": 1.7921479488487884, "grad_norm": 0.13774384558200836, "learning_rate": 0.0004050078370098491, "loss": 2.1773, "step": 463600 }, { "epoch": 1.7921866060521716, "grad_norm": 0.13437210023403168, "learning_rate": 0.00040488244947276697, "loss": 2.2018, "step": 463610 }, { "epoch": 1.792225263255555, "grad_norm": 0.14692780375480652, "learning_rate": 0.00040475707179125854, "loss": 2.1843, "step": 463620 }, { "epoch": 1.7922639204589383, "grad_norm": 0.1312679797410965, "learning_rate": 0.00040463170396300033, "loss": 2.1742, "step": 463630 }, { "epoch": 1.7923025776623216, "grad_norm": 0.14086544513702393, "learning_rate": 0.0004045063459856697, "loss": 2.1907, "step": 463640 }, { "epoch": 1.7923412348657048, "grad_norm": 0.13703036308288574, "learning_rate": 0.0004043809978569446, "loss": 2.1835, "step": 463650 }, { "epoch": 1.792379892069088, "grad_norm": 0.1371353417634964, "learning_rate": 0.0004042556595745044, "loss": 2.1831, "step": 463660 }, { "epoch": 1.7924185492724716, "grad_norm": 0.1327003687620163, "learning_rate": 0.00040413033113602913, "loss": 2.1728, "step": 463670 }, { "epoch": 1.7924572064758548, "grad_norm": 0.12607765197753906, "learning_rate": 0.00040400501253919964, "loss": 2.1761, "step": 463680 }, { "epoch": 1.792495863679238, "grad_norm": 0.1389392912387848, "learning_rate": 0.0004038797037816979, "loss": 2.1914, "step": 463690 }, { "epoch": 1.7925345208826213, "grad_norm": 0.1530158966779709, "learning_rate": 0.0004037544048612067, "loss": 2.182, "step": 463700 }, { "epoch": 1.7925731780860046, "grad_norm": 0.13505354523658752, "learning_rate": 0.00040362911577540973, "loss": 2.1846, "step": 463710 }, { "epoch": 1.7926118352893878, "grad_norm": 0.14173811674118042, "learning_rate": 0.00040350383652199164, "loss": 2.1878, "step": 463720 }, { "epoch": 1.792650492492771, "grad_norm": 0.14959704875946045, "learning_rate": 0.0004033785670986376, "loss": 2.1822, "step": 463730 }, { "epoch": 1.7926891496961543, "grad_norm": 0.13770392537117004, "learning_rate": 0.0004032533075030342, "loss": 2.187, "step": 463740 }, { "epoch": 1.7927278068995376, "grad_norm": 0.14019890129566193, "learning_rate": 0.0004031280577328689, "loss": 2.1767, "step": 463750 }, { "epoch": 1.7927664641029208, "grad_norm": 0.12711156904697418, "learning_rate": 0.0004030028177858298, "loss": 2.18, "step": 463760 }, { "epoch": 1.792805121306304, "grad_norm": 0.12975165247917175, "learning_rate": 0.0004028775876596058, "loss": 2.1658, "step": 463770 }, { "epoch": 1.7928437785096873, "grad_norm": 0.19896434247493744, "learning_rate": 0.00040275236735188734, "loss": 2.1886, "step": 463780 }, { "epoch": 1.7928824357130708, "grad_norm": 0.14500820636749268, "learning_rate": 0.0004026271568603652, "loss": 2.1842, "step": 463790 }, { "epoch": 1.792921092916454, "grad_norm": 0.15005642175674438, "learning_rate": 0.00040250195618273145, "loss": 2.188, "step": 463800 }, { "epoch": 1.7929597501198373, "grad_norm": 0.13256584107875824, "learning_rate": 0.00040237676531667833, "loss": 2.1978, "step": 463810 }, { "epoch": 1.7929984073232206, "grad_norm": 0.14087162911891937, "learning_rate": 0.00040225158425989996, "loss": 2.166, "step": 463820 }, { "epoch": 1.793037064526604, "grad_norm": 0.12890873849391937, "learning_rate": 0.0004021264130100906, "loss": 2.1969, "step": 463830 }, { "epoch": 1.7930757217299873, "grad_norm": 0.13375651836395264, "learning_rate": 0.0004020012515649458, "loss": 2.1854, "step": 463840 }, { "epoch": 1.7931143789333706, "grad_norm": 0.12807835638523102, "learning_rate": 0.00040187609992216203, "loss": 2.1808, "step": 463850 }, { "epoch": 1.7931530361367538, "grad_norm": 0.14423508942127228, "learning_rate": 0.0004017509580794363, "loss": 2.1934, "step": 463860 }, { "epoch": 1.793191693340137, "grad_norm": 0.13110533356666565, "learning_rate": 0.000401625826034467, "loss": 2.1802, "step": 463870 }, { "epoch": 1.7932303505435203, "grad_norm": 0.1442173719406128, "learning_rate": 0.00040150070378495317, "loss": 2.1688, "step": 463880 }, { "epoch": 1.7932690077469036, "grad_norm": 0.15013518929481506, "learning_rate": 0.0004013755913285948, "loss": 2.1949, "step": 463890 }, { "epoch": 1.7933076649502868, "grad_norm": 0.13914164900779724, "learning_rate": 0.00040125048866309254, "loss": 2.1994, "step": 463900 }, { "epoch": 1.79334632215367, "grad_norm": 0.12703953683376312, "learning_rate": 0.0004011253957861485, "loss": 2.1705, "step": 463910 }, { "epoch": 1.7933849793570533, "grad_norm": 0.12627583742141724, "learning_rate": 0.0004010003126954653, "loss": 2.1905, "step": 463920 }, { "epoch": 1.7934236365604366, "grad_norm": 0.1557985097169876, "learning_rate": 0.00040087523938874627, "loss": 2.1901, "step": 463930 }, { "epoch": 1.7934622937638198, "grad_norm": 0.1482551395893097, "learning_rate": 0.0004007501758636958, "loss": 2.1745, "step": 463940 }, { "epoch": 1.793500950967203, "grad_norm": 0.14754800498485565, "learning_rate": 0.00040062512211801947, "loss": 2.1624, "step": 463950 }, { "epoch": 1.7935396081705866, "grad_norm": 0.1354418247938156, "learning_rate": 0.0004005000781494237, "loss": 2.1827, "step": 463960 }, { "epoch": 1.7935782653739698, "grad_norm": 0.13364222645759583, "learning_rate": 0.0004003750439556151, "loss": 2.1831, "step": 463970 }, { "epoch": 1.793616922577353, "grad_norm": 0.14250802993774414, "learning_rate": 0.0004002500195343024, "loss": 2.1793, "step": 463980 }, { "epoch": 1.7936555797807363, "grad_norm": 0.1395851969718933, "learning_rate": 0.00040012500488319393, "loss": 2.1871, "step": 463990 }, { "epoch": 1.7936942369841198, "grad_norm": 0.14435814321041107, "learning_rate": 0.0003999999999999999, "loss": 2.1865, "step": 464000 }, { "epoch": 1.793732894187503, "grad_norm": 0.1335323601961136, "learning_rate": 0.0003998750048824311, "loss": 2.1904, "step": 464010 }, { "epoch": 1.7937715513908863, "grad_norm": 0.13234373927116394, "learning_rate": 0.00039975001952819887, "loss": 2.1629, "step": 464020 }, { "epoch": 1.7938102085942695, "grad_norm": 0.13462892174720764, "learning_rate": 0.00039962504393501597, "loss": 2.1843, "step": 464030 }, { "epoch": 1.7938488657976528, "grad_norm": 0.12947219610214233, "learning_rate": 0.0003995000781005955, "loss": 2.1693, "step": 464040 }, { "epoch": 1.793887523001036, "grad_norm": 0.144515722990036, "learning_rate": 0.000399375122022652, "loss": 2.1667, "step": 464050 }, { "epoch": 1.7939261802044193, "grad_norm": 0.13721828162670135, "learning_rate": 0.0003992501756989009, "loss": 2.1779, "step": 464060 }, { "epoch": 1.7939648374078025, "grad_norm": 0.1374235600233078, "learning_rate": 0.0003991252391270577, "loss": 2.1787, "step": 464070 }, { "epoch": 1.7940034946111858, "grad_norm": 0.1416328251361847, "learning_rate": 0.00039900031230484, "loss": 2.189, "step": 464080 }, { "epoch": 1.794042151814569, "grad_norm": 0.14912135899066925, "learning_rate": 0.0003988753952299653, "loss": 2.1813, "step": 464090 }, { "epoch": 1.7940808090179523, "grad_norm": 0.13194890320301056, "learning_rate": 0.0003987504879001524, "loss": 2.187, "step": 464100 }, { "epoch": 1.7941194662213356, "grad_norm": 0.13360001146793365, "learning_rate": 0.0003986255903131211, "loss": 2.1908, "step": 464110 }, { "epoch": 1.7941581234247188, "grad_norm": 0.13524281978607178, "learning_rate": 0.0003985007024665919, "loss": 2.1818, "step": 464120 }, { "epoch": 1.7941967806281023, "grad_norm": 0.12458459287881851, "learning_rate": 0.000398375824358286, "loss": 2.1824, "step": 464130 }, { "epoch": 1.7942354378314855, "grad_norm": 0.14129717648029327, "learning_rate": 0.000398250955985926, "loss": 2.1798, "step": 464140 }, { "epoch": 1.7942740950348688, "grad_norm": 0.13172756135463715, "learning_rate": 0.00039812609734723515, "loss": 2.1876, "step": 464150 }, { "epoch": 1.794312752238252, "grad_norm": 0.14094604551792145, "learning_rate": 0.0003980012484399371, "loss": 2.1692, "step": 464160 }, { "epoch": 1.7943514094416355, "grad_norm": 0.14977578818798065, "learning_rate": 0.00039787640926175727, "loss": 2.1665, "step": 464170 }, { "epoch": 1.7943900666450188, "grad_norm": 0.1476397067308426, "learning_rate": 0.0003977515798104214, "loss": 2.1795, "step": 464180 }, { "epoch": 1.794428723848402, "grad_norm": 0.13331814110279083, "learning_rate": 0.00039762676008365627, "loss": 2.1777, "step": 464190 }, { "epoch": 1.7944673810517853, "grad_norm": 0.1343361735343933, "learning_rate": 0.00039750195007918967, "loss": 2.1882, "step": 464200 }, { "epoch": 1.7945060382551685, "grad_norm": 0.14114736020565033, "learning_rate": 0.0003973771497947498, "loss": 2.1801, "step": 464210 }, { "epoch": 1.7945446954585518, "grad_norm": 0.14076592028141022, "learning_rate": 0.00039725235922806614, "loss": 2.1783, "step": 464220 }, { "epoch": 1.794583352661935, "grad_norm": 0.1326855570077896, "learning_rate": 0.00039712757837686907, "loss": 2.1827, "step": 464230 }, { "epoch": 1.7946220098653183, "grad_norm": 0.14001700282096863, "learning_rate": 0.0003970028072388898, "loss": 2.1758, "step": 464240 }, { "epoch": 1.7946606670687015, "grad_norm": 0.12555120885372162, "learning_rate": 0.00039687804581186036, "loss": 2.1796, "step": 464250 }, { "epoch": 1.7946993242720848, "grad_norm": 0.13253574073314667, "learning_rate": 0.00039675329409351366, "loss": 2.2041, "step": 464260 }, { "epoch": 1.794737981475468, "grad_norm": 0.14871980249881744, "learning_rate": 0.0003966285520815833, "loss": 2.1794, "step": 464270 }, { "epoch": 1.7947766386788513, "grad_norm": 0.16246652603149414, "learning_rate": 0.00039650381977380443, "loss": 2.1792, "step": 464280 }, { "epoch": 1.7948152958822345, "grad_norm": 0.137555330991745, "learning_rate": 0.0003963790971679122, "loss": 2.1873, "step": 464290 }, { "epoch": 1.794853953085618, "grad_norm": 0.14714735746383667, "learning_rate": 0.0003962543842616435, "loss": 2.1782, "step": 464300 }, { "epoch": 1.7948926102890013, "grad_norm": 0.13641484081745148, "learning_rate": 0.0003961296810527355, "loss": 2.1937, "step": 464310 }, { "epoch": 1.7949312674923845, "grad_norm": 0.12888626754283905, "learning_rate": 0.0003960049875389262, "loss": 2.1855, "step": 464320 }, { "epoch": 1.7949699246957678, "grad_norm": 0.13622303307056427, "learning_rate": 0.000395880303717955, "loss": 2.1954, "step": 464330 }, { "epoch": 1.7950085818991512, "grad_norm": 0.13224974274635315, "learning_rate": 0.00039575562958756196, "loss": 2.1882, "step": 464340 }, { "epoch": 1.7950472391025345, "grad_norm": 0.12995639443397522, "learning_rate": 0.00039563096514548746, "loss": 2.1894, "step": 464350 }, { "epoch": 1.7950858963059177, "grad_norm": 0.13467252254486084, "learning_rate": 0.00039550631038947384, "loss": 2.1652, "step": 464360 }, { "epoch": 1.795124553509301, "grad_norm": 0.14471621811389923, "learning_rate": 0.0003953816653172631, "loss": 2.1806, "step": 464370 }, { "epoch": 1.7951632107126843, "grad_norm": 0.1446365863084793, "learning_rate": 0.00039525702992659916, "loss": 2.1708, "step": 464380 }, { "epoch": 1.7952018679160675, "grad_norm": 0.14779379963874817, "learning_rate": 0.0003951324042152262, "loss": 2.1792, "step": 464390 }, { "epoch": 1.7952405251194508, "grad_norm": 0.13273638486862183, "learning_rate": 0.00039500778818088954, "loss": 2.1942, "step": 464400 }, { "epoch": 1.795279182322834, "grad_norm": 0.14872750639915466, "learning_rate": 0.00039488318182133545, "loss": 2.1748, "step": 464410 }, { "epoch": 1.7953178395262173, "grad_norm": 0.1282881498336792, "learning_rate": 0.00039475858513431073, "loss": 2.1705, "step": 464420 }, { "epoch": 1.7953564967296005, "grad_norm": 0.13489757478237152, "learning_rate": 0.0003946339981175633, "loss": 2.188, "step": 464430 }, { "epoch": 1.7953951539329838, "grad_norm": 0.14069312810897827, "learning_rate": 0.0003945094207688418, "loss": 2.1712, "step": 464440 }, { "epoch": 1.795433811136367, "grad_norm": 0.14453202486038208, "learning_rate": 0.000394384853085896, "loss": 2.1757, "step": 464450 }, { "epoch": 1.7954724683397503, "grad_norm": 0.13123004138469696, "learning_rate": 0.0003942602950664762, "loss": 2.1844, "step": 464460 }, { "epoch": 1.7955111255431337, "grad_norm": 0.12672416865825653, "learning_rate": 0.00039413574670833394, "loss": 2.1749, "step": 464470 }, { "epoch": 1.795549782746517, "grad_norm": 0.13173723220825195, "learning_rate": 0.00039401120800922155, "loss": 2.1734, "step": 464480 }, { "epoch": 1.7955884399499003, "grad_norm": 0.13183413445949554, "learning_rate": 0.0003938866789668918, "loss": 2.1802, "step": 464490 }, { "epoch": 1.7956270971532835, "grad_norm": 0.1551782786846161, "learning_rate": 0.000393762159579099, "loss": 2.1851, "step": 464500 }, { "epoch": 1.795665754356667, "grad_norm": 0.14472754299640656, "learning_rate": 0.0003936376498435976, "loss": 2.1806, "step": 464510 }, { "epoch": 1.7957044115600502, "grad_norm": 0.12884870171546936, "learning_rate": 0.0003935131497581439, "loss": 2.1793, "step": 464520 }, { "epoch": 1.7957430687634335, "grad_norm": 0.1283208727836609, "learning_rate": 0.0003933886593204938, "loss": 2.1979, "step": 464530 }, { "epoch": 1.7957817259668167, "grad_norm": 0.12944021821022034, "learning_rate": 0.00039326417852840543, "loss": 2.1765, "step": 464540 }, { "epoch": 1.7958203831702, "grad_norm": 0.13233332335948944, "learning_rate": 0.00039313970737963656, "loss": 2.1776, "step": 464550 }, { "epoch": 1.7958590403735832, "grad_norm": 0.13329827785491943, "learning_rate": 0.0003930152458719467, "loss": 2.1701, "step": 464560 }, { "epoch": 1.7958976975769665, "grad_norm": 0.14701420068740845, "learning_rate": 0.0003928907940030957, "loss": 2.1762, "step": 464570 }, { "epoch": 1.7959363547803497, "grad_norm": 0.1379716545343399, "learning_rate": 0.0003927663517708446, "loss": 2.1741, "step": 464580 }, { "epoch": 1.795975011983733, "grad_norm": 0.14706137776374817, "learning_rate": 0.00039264191917295536, "loss": 2.177, "step": 464590 }, { "epoch": 1.7960136691871162, "grad_norm": 0.14323627948760986, "learning_rate": 0.00039251749620719046, "loss": 2.1757, "step": 464600 }, { "epoch": 1.7960523263904995, "grad_norm": 0.14055584371089935, "learning_rate": 0.0003923930828713134, "loss": 2.1903, "step": 464610 }, { "epoch": 1.7960909835938828, "grad_norm": 0.14579209685325623, "learning_rate": 0.0003922686791630885, "loss": 2.1685, "step": 464620 }, { "epoch": 1.796129640797266, "grad_norm": 0.14397454261779785, "learning_rate": 0.0003921442850802812, "loss": 2.1694, "step": 464630 }, { "epoch": 1.7961682980006495, "grad_norm": 0.13405250012874603, "learning_rate": 0.0003920199006206575, "loss": 2.1845, "step": 464640 }, { "epoch": 1.7962069552040327, "grad_norm": 0.12927570939064026, "learning_rate": 0.00039189552578198453, "loss": 2.1854, "step": 464650 }, { "epoch": 1.796245612407416, "grad_norm": 0.12672555446624756, "learning_rate": 0.00039177116056203, "loss": 2.1822, "step": 464660 }, { "epoch": 1.7962842696107992, "grad_norm": 0.12893100082874298, "learning_rate": 0.00039164680495856264, "loss": 2.1945, "step": 464670 }, { "epoch": 1.7963229268141827, "grad_norm": 0.15712900459766388, "learning_rate": 0.000391522458969352, "loss": 2.188, "step": 464680 }, { "epoch": 1.796361584017566, "grad_norm": 0.13878193497657776, "learning_rate": 0.00039139812259216856, "loss": 2.189, "step": 464690 }, { "epoch": 1.7964002412209492, "grad_norm": 0.13633514940738678, "learning_rate": 0.0003912737958247836, "loss": 2.169, "step": 464700 }, { "epoch": 1.7964388984243325, "grad_norm": 0.14353691041469574, "learning_rate": 0.0003911494786649694, "loss": 2.1845, "step": 464710 }, { "epoch": 1.7964775556277157, "grad_norm": 0.18341241776943207, "learning_rate": 0.00039102517111049863, "loss": 2.1812, "step": 464720 }, { "epoch": 1.796516212831099, "grad_norm": 0.13173289597034454, "learning_rate": 0.0003909008731591457, "loss": 2.168, "step": 464730 }, { "epoch": 1.7965548700344822, "grad_norm": 0.12816593050956726, "learning_rate": 0.00039077658480868484, "loss": 2.1878, "step": 464740 }, { "epoch": 1.7965935272378655, "grad_norm": 0.1346024125814438, "learning_rate": 0.0003906523060568918, "loss": 2.1862, "step": 464750 }, { "epoch": 1.7966321844412487, "grad_norm": 0.14077845215797424, "learning_rate": 0.00039052803690154336, "loss": 2.1944, "step": 464760 }, { "epoch": 1.796670841644632, "grad_norm": 0.1459149569272995, "learning_rate": 0.0003904037773404163, "loss": 2.1837, "step": 464770 }, { "epoch": 1.7967094988480152, "grad_norm": 0.13631995022296906, "learning_rate": 0.00039027952737128916, "loss": 2.1902, "step": 464780 }, { "epoch": 1.7967481560513985, "grad_norm": 0.14161813259124756, "learning_rate": 0.0003901552869919409, "loss": 2.1842, "step": 464790 }, { "epoch": 1.7967868132547817, "grad_norm": 0.13641470670700073, "learning_rate": 0.00039003105620015146, "loss": 2.1826, "step": 464800 }, { "epoch": 1.7968254704581652, "grad_norm": 0.13865359127521515, "learning_rate": 0.0003899068349937014, "loss": 2.1938, "step": 464810 }, { "epoch": 1.7968641276615485, "grad_norm": 0.1396956443786621, "learning_rate": 0.0003897826233703725, "loss": 2.1741, "step": 464820 }, { "epoch": 1.7969027848649317, "grad_norm": 0.13643811643123627, "learning_rate": 0.00038965842132794706, "loss": 2.1956, "step": 464830 }, { "epoch": 1.796941442068315, "grad_norm": 0.13304957747459412, "learning_rate": 0.0003895342288642083, "loss": 2.1757, "step": 464840 }, { "epoch": 1.7969800992716984, "grad_norm": 0.1358797699213028, "learning_rate": 0.00038941004597694076, "loss": 2.1777, "step": 464850 }, { "epoch": 1.7970187564750817, "grad_norm": 0.139873206615448, "learning_rate": 0.00038928587266392925, "loss": 2.1837, "step": 464860 }, { "epoch": 1.797057413678465, "grad_norm": 0.14250683784484863, "learning_rate": 0.00038916170892295957, "loss": 2.1822, "step": 464870 }, { "epoch": 1.7970960708818482, "grad_norm": 0.1290065497159958, "learning_rate": 0.00038903755475181855, "loss": 2.1697, "step": 464880 }, { "epoch": 1.7971347280852314, "grad_norm": 0.1323602795600891, "learning_rate": 0.00038891341014829365, "loss": 2.1984, "step": 464890 }, { "epoch": 1.7971733852886147, "grad_norm": 0.13214319944381714, "learning_rate": 0.0003887892751101736, "loss": 2.171, "step": 464900 }, { "epoch": 1.797212042491998, "grad_norm": 0.1308826357126236, "learning_rate": 0.00038866514963524734, "loss": 2.17, "step": 464910 }, { "epoch": 1.7972506996953812, "grad_norm": 0.13486738502979279, "learning_rate": 0.0003885410337213051, "loss": 2.1815, "step": 464920 }, { "epoch": 1.7972893568987645, "grad_norm": 0.13937675952911377, "learning_rate": 0.00038841692736613797, "loss": 2.1841, "step": 464930 }, { "epoch": 1.7973280141021477, "grad_norm": 0.1473415046930313, "learning_rate": 0.00038829283056753775, "loss": 2.1837, "step": 464940 }, { "epoch": 1.797366671305531, "grad_norm": 0.1369243860244751, "learning_rate": 0.000388168743323297, "loss": 2.1827, "step": 464950 }, { "epoch": 1.7974053285089142, "grad_norm": 0.13933010399341583, "learning_rate": 0.0003880446656312093, "loss": 2.1799, "step": 464960 }, { "epoch": 1.7974439857122975, "grad_norm": 0.13083308935165405, "learning_rate": 0.00038792059748906895, "loss": 2.183, "step": 464970 }, { "epoch": 1.797482642915681, "grad_norm": 0.13724645972251892, "learning_rate": 0.0003877965388946716, "loss": 2.1802, "step": 464980 }, { "epoch": 1.7975213001190642, "grad_norm": 0.1350887417793274, "learning_rate": 0.00038767248984581304, "loss": 2.1946, "step": 464990 }, { "epoch": 1.7975599573224474, "grad_norm": 0.1418774127960205, "learning_rate": 0.00038754845034028994, "loss": 2.1783, "step": 465000 }, { "epoch": 1.7975986145258307, "grad_norm": 0.14340370893478394, "learning_rate": 0.0003874244203759005, "loss": 2.1918, "step": 465010 }, { "epoch": 1.7976372717292142, "grad_norm": 0.1408160775899887, "learning_rate": 0.0003873003999504434, "loss": 2.1886, "step": 465020 }, { "epoch": 1.7976759289325974, "grad_norm": 0.14962686598300934, "learning_rate": 0.0003871763890617177, "loss": 2.1787, "step": 465030 }, { "epoch": 1.7977145861359807, "grad_norm": 0.12789714336395264, "learning_rate": 0.00038705238770752383, "loss": 2.1711, "step": 465040 }, { "epoch": 1.797753243339364, "grad_norm": 0.13508401811122894, "learning_rate": 0.0003869283958856631, "loss": 2.1685, "step": 465050 }, { "epoch": 1.7977919005427472, "grad_norm": 0.14755527675151825, "learning_rate": 0.00038680441359393745, "loss": 2.1775, "step": 465060 }, { "epoch": 1.7978305577461304, "grad_norm": 0.1413993090391159, "learning_rate": 0.00038668044083014987, "loss": 2.1773, "step": 465070 }, { "epoch": 1.7978692149495137, "grad_norm": 0.13822470605373383, "learning_rate": 0.00038655647759210354, "loss": 2.1674, "step": 465080 }, { "epoch": 1.797907872152897, "grad_norm": 0.1406533569097519, "learning_rate": 0.00038643252387760365, "loss": 2.1813, "step": 465090 }, { "epoch": 1.7979465293562802, "grad_norm": 0.13797791302204132, "learning_rate": 0.0003863085796844552, "loss": 2.1735, "step": 465100 }, { "epoch": 1.7979851865596634, "grad_norm": 0.13203437626361847, "learning_rate": 0.00038618464501046466, "loss": 2.1778, "step": 465110 }, { "epoch": 1.7980238437630467, "grad_norm": 0.13330654799938202, "learning_rate": 0.00038606071985343893, "loss": 2.1701, "step": 465120 }, { "epoch": 1.79806250096643, "grad_norm": 0.12800458073616028, "learning_rate": 0.00038593680421118574, "loss": 2.1876, "step": 465130 }, { "epoch": 1.7981011581698134, "grad_norm": 0.14644229412078857, "learning_rate": 0.0003858128980815143, "loss": 2.1572, "step": 465140 }, { "epoch": 1.7981398153731967, "grad_norm": 0.13411371409893036, "learning_rate": 0.00038568900146223386, "loss": 2.1805, "step": 465150 }, { "epoch": 1.79817847257658, "grad_norm": 0.13013438880443573, "learning_rate": 0.00038556511435115494, "loss": 2.184, "step": 465160 }, { "epoch": 1.7982171297799632, "grad_norm": 0.1430242508649826, "learning_rate": 0.00038544123674608865, "loss": 2.1809, "step": 465170 }, { "epoch": 1.7982557869833464, "grad_norm": 0.13126738369464874, "learning_rate": 0.0003853173686448472, "loss": 2.1838, "step": 465180 }, { "epoch": 1.79829444418673, "grad_norm": 0.13454361259937286, "learning_rate": 0.0003851935100452437, "loss": 2.1774, "step": 465190 }, { "epoch": 1.7983331013901132, "grad_norm": 0.1343718320131302, "learning_rate": 0.0003850696609450921, "loss": 2.1715, "step": 465200 }, { "epoch": 1.7983717585934964, "grad_norm": 0.13827650249004364, "learning_rate": 0.0003849458213422066, "loss": 2.1833, "step": 465210 }, { "epoch": 1.7984104157968797, "grad_norm": 0.15048812329769135, "learning_rate": 0.00038482199123440264, "loss": 2.1765, "step": 465220 }, { "epoch": 1.798449073000263, "grad_norm": 0.13728807866573334, "learning_rate": 0.000384698170619497, "loss": 2.1933, "step": 465230 }, { "epoch": 1.7984877302036462, "grad_norm": 0.13223780691623688, "learning_rate": 0.0003845743594953064, "loss": 2.1672, "step": 465240 }, { "epoch": 1.7985263874070294, "grad_norm": 0.13138167560100555, "learning_rate": 0.0003844505578596489, "loss": 2.1835, "step": 465250 }, { "epoch": 1.7985650446104127, "grad_norm": 0.13611911237239838, "learning_rate": 0.0003843267657103433, "loss": 2.179, "step": 465260 }, { "epoch": 1.798603701813796, "grad_norm": 0.14531941711902618, "learning_rate": 0.0003842029830452094, "loss": 2.1804, "step": 465270 }, { "epoch": 1.7986423590171792, "grad_norm": 0.1326284259557724, "learning_rate": 0.00038407920986206756, "loss": 2.1778, "step": 465280 }, { "epoch": 1.7986810162205624, "grad_norm": 0.13530728220939636, "learning_rate": 0.00038395544615873914, "loss": 2.1922, "step": 465290 }, { "epoch": 1.7987196734239457, "grad_norm": 0.13688497245311737, "learning_rate": 0.00038383169193304624, "loss": 2.1731, "step": 465300 }, { "epoch": 1.7987583306273291, "grad_norm": 0.13359713554382324, "learning_rate": 0.000383707947182812, "loss": 2.1863, "step": 465310 }, { "epoch": 1.7987969878307124, "grad_norm": 0.13442204892635345, "learning_rate": 0.0003835842119058599, "loss": 2.1773, "step": 465320 }, { "epoch": 1.7988356450340957, "grad_norm": 0.13345547020435333, "learning_rate": 0.000383460486100015, "loss": 2.1906, "step": 465330 }, { "epoch": 1.798874302237479, "grad_norm": 0.13196028769016266, "learning_rate": 0.0003833367697631025, "loss": 2.1886, "step": 465340 }, { "epoch": 1.7989129594408622, "grad_norm": 0.1448703408241272, "learning_rate": 0.00038321306289294886, "loss": 2.185, "step": 465350 }, { "epoch": 1.7989516166442456, "grad_norm": 0.1323598027229309, "learning_rate": 0.00038308936548738106, "loss": 2.1767, "step": 465360 }, { "epoch": 1.7989902738476289, "grad_norm": 0.13242816925048828, "learning_rate": 0.0003829656775442274, "loss": 2.1704, "step": 465370 }, { "epoch": 1.7990289310510121, "grad_norm": 0.13908936083316803, "learning_rate": 0.00038284199906131613, "loss": 2.178, "step": 465380 }, { "epoch": 1.7990675882543954, "grad_norm": 0.14099274575710297, "learning_rate": 0.0003827183300364776, "loss": 2.1736, "step": 465390 }, { "epoch": 1.7991062454577786, "grad_norm": 0.13117602467536926, "learning_rate": 0.0003825946704675416, "loss": 2.1667, "step": 465400 }, { "epoch": 1.799144902661162, "grad_norm": 0.13193495571613312, "learning_rate": 0.00038247102035234004, "loss": 2.1999, "step": 465410 }, { "epoch": 1.7991835598645451, "grad_norm": 0.13038615882396698, "learning_rate": 0.0003823473796887047, "loss": 2.1899, "step": 465420 }, { "epoch": 1.7992222170679284, "grad_norm": 0.13985690474510193, "learning_rate": 0.0003822237484744684, "loss": 2.1819, "step": 465430 }, { "epoch": 1.7992608742713116, "grad_norm": 0.13930624723434448, "learning_rate": 0.0003821001267074653, "loss": 2.1834, "step": 465440 }, { "epoch": 1.799299531474695, "grad_norm": 0.1398736536502838, "learning_rate": 0.00038197651438552985, "loss": 2.1811, "step": 465450 }, { "epoch": 1.7993381886780782, "grad_norm": 0.14476174116134644, "learning_rate": 0.00038185291150649725, "loss": 2.1798, "step": 465460 }, { "epoch": 1.7993768458814614, "grad_norm": 0.13657523691654205, "learning_rate": 0.00038172931806820397, "loss": 2.1647, "step": 465470 }, { "epoch": 1.7994155030848449, "grad_norm": 0.13949531316757202, "learning_rate": 0.0003816057340684873, "loss": 2.1851, "step": 465480 }, { "epoch": 1.7994541602882281, "grad_norm": 0.1438172459602356, "learning_rate": 0.0003814821595051847, "loss": 2.1769, "step": 465490 }, { "epoch": 1.7994928174916114, "grad_norm": 0.14232169091701508, "learning_rate": 0.00038135859437613554, "loss": 2.1664, "step": 465500 }, { "epoch": 1.7995314746949946, "grad_norm": 0.14083006978034973, "learning_rate": 0.00038123503867917873, "loss": 2.1734, "step": 465510 }, { "epoch": 1.799570131898378, "grad_norm": 0.2224435806274414, "learning_rate": 0.00038111149241215505, "loss": 2.1903, "step": 465520 }, { "epoch": 1.7996087891017614, "grad_norm": 0.13325290381908417, "learning_rate": 0.0003809879555729057, "loss": 2.1779, "step": 465530 }, { "epoch": 1.7996474463051446, "grad_norm": 0.13875515758991241, "learning_rate": 0.0003808644281592724, "loss": 2.1839, "step": 465540 }, { "epoch": 1.7996861035085279, "grad_norm": 0.12978360056877136, "learning_rate": 0.0003807409101690984, "loss": 2.1805, "step": 465550 }, { "epoch": 1.7997247607119111, "grad_norm": 0.1447114795446396, "learning_rate": 0.0003806174016002273, "loss": 2.1888, "step": 465560 }, { "epoch": 1.7997634179152944, "grad_norm": 0.14308002591133118, "learning_rate": 0.0003804939024505034, "loss": 2.1888, "step": 465570 }, { "epoch": 1.7998020751186776, "grad_norm": 0.131975919008255, "learning_rate": 0.00038037041271777206, "loss": 2.1964, "step": 465580 }, { "epoch": 1.7998407323220609, "grad_norm": 0.1352776437997818, "learning_rate": 0.0003802469323998796, "loss": 2.1995, "step": 465590 }, { "epoch": 1.7998793895254441, "grad_norm": 0.1364874243736267, "learning_rate": 0.00038012346149467294, "loss": 2.1741, "step": 465600 }, { "epoch": 1.7999180467288274, "grad_norm": 0.13249623775482178, "learning_rate": 0.0003799999999999999, "loss": 2.1845, "step": 465610 }, { "epoch": 1.7999567039322106, "grad_norm": 0.13581779599189758, "learning_rate": 0.00037987654791370916, "loss": 2.1756, "step": 465620 }, { "epoch": 1.7999953611355939, "grad_norm": 0.13477849960327148, "learning_rate": 0.0003797531052336498, "loss": 2.1781, "step": 465630 }, { "epoch": 1.8000340183389771, "grad_norm": 0.14062079787254333, "learning_rate": 0.00037962967195767264, "loss": 2.1815, "step": 465640 }, { "epoch": 1.8000726755423606, "grad_norm": 0.13062138855457306, "learning_rate": 0.00037950624808362823, "loss": 2.1799, "step": 465650 }, { "epoch": 1.8001113327457439, "grad_norm": 0.1472616344690323, "learning_rate": 0.00037938283360936854, "loss": 2.1665, "step": 465660 }, { "epoch": 1.8001499899491271, "grad_norm": 0.1320866197347641, "learning_rate": 0.0003792594285327464, "loss": 2.1736, "step": 465670 }, { "epoch": 1.8001886471525104, "grad_norm": 0.131913959980011, "learning_rate": 0.0003791360328516151, "loss": 2.1878, "step": 465680 }, { "epoch": 1.8002273043558938, "grad_norm": 0.1491318941116333, "learning_rate": 0.0003790126465638295, "loss": 2.1772, "step": 465690 }, { "epoch": 1.800265961559277, "grad_norm": 0.1818619966506958, "learning_rate": 0.00037888926966724436, "loss": 2.1752, "step": 465700 }, { "epoch": 1.8003046187626603, "grad_norm": 0.14308425784111023, "learning_rate": 0.00037876590215971584, "loss": 2.1712, "step": 465710 }, { "epoch": 1.8003432759660436, "grad_norm": 0.1418374925851822, "learning_rate": 0.0003786425440391006, "loss": 2.1756, "step": 465720 }, { "epoch": 1.8003819331694269, "grad_norm": 0.12860672175884247, "learning_rate": 0.00037851919530325606, "loss": 2.1726, "step": 465730 }, { "epoch": 1.80042059037281, "grad_norm": 0.13387353718280792, "learning_rate": 0.0003783958559500411, "loss": 2.189, "step": 465740 }, { "epoch": 1.8004592475761934, "grad_norm": 0.14423732459545135, "learning_rate": 0.0003782725259773145, "loss": 2.1842, "step": 465750 }, { "epoch": 1.8004979047795766, "grad_norm": 0.13406185805797577, "learning_rate": 0.0003781492053829367, "loss": 2.1937, "step": 465760 }, { "epoch": 1.8005365619829599, "grad_norm": 0.15299859642982483, "learning_rate": 0.0003780258941647683, "loss": 2.1682, "step": 465770 }, { "epoch": 1.8005752191863431, "grad_norm": 0.14690442383289337, "learning_rate": 0.0003779025923206707, "loss": 2.1801, "step": 465780 }, { "epoch": 1.8006138763897264, "grad_norm": 0.1272706538438797, "learning_rate": 0.000377779299848507, "loss": 2.1661, "step": 465790 }, { "epoch": 1.8006525335931096, "grad_norm": 0.13635335862636566, "learning_rate": 0.00037765601674614026, "loss": 2.1781, "step": 465800 }, { "epoch": 1.8006911907964929, "grad_norm": 0.1357850581407547, "learning_rate": 0.00037753274301143436, "loss": 2.1726, "step": 465810 }, { "epoch": 1.8007298479998763, "grad_norm": 0.14069999754428864, "learning_rate": 0.0003774094786422546, "loss": 2.1902, "step": 465820 }, { "epoch": 1.8007685052032596, "grad_norm": 0.15152522921562195, "learning_rate": 0.00037728622363646645, "loss": 2.1753, "step": 465830 }, { "epoch": 1.8008071624066428, "grad_norm": 0.13653448224067688, "learning_rate": 0.00037716297799193657, "loss": 2.1876, "step": 465840 }, { "epoch": 1.800845819610026, "grad_norm": 0.13473238050937653, "learning_rate": 0.0003770397417065321, "loss": 2.1756, "step": 465850 }, { "epoch": 1.8008844768134096, "grad_norm": 0.14700768887996674, "learning_rate": 0.0003769165147781215, "loss": 2.1749, "step": 465860 }, { "epoch": 1.8009231340167928, "grad_norm": 0.13021835684776306, "learning_rate": 0.00037679329720457356, "loss": 2.1691, "step": 465870 }, { "epoch": 1.800961791220176, "grad_norm": 0.1447163224220276, "learning_rate": 0.000376670088983758, "loss": 2.1935, "step": 465880 }, { "epoch": 1.8010004484235593, "grad_norm": 0.13230176270008087, "learning_rate": 0.0003765468901135456, "loss": 2.1703, "step": 465890 }, { "epoch": 1.8010391056269426, "grad_norm": 0.13884708285331726, "learning_rate": 0.00037642370059180763, "loss": 2.1827, "step": 465900 }, { "epoch": 1.8010777628303258, "grad_norm": 0.1411489099264145, "learning_rate": 0.0003763005204164165, "loss": 2.1745, "step": 465910 }, { "epoch": 1.801116420033709, "grad_norm": 0.1436185985803604, "learning_rate": 0.000376177349585245, "loss": 2.1955, "step": 465920 }, { "epoch": 1.8011550772370923, "grad_norm": 0.1410207450389862, "learning_rate": 0.00037605418809616675, "loss": 2.1815, "step": 465930 }, { "epoch": 1.8011937344404756, "grad_norm": 0.1412554234266281, "learning_rate": 0.0003759310359470569, "loss": 2.1914, "step": 465940 }, { "epoch": 1.8012323916438588, "grad_norm": 0.14789709448814392, "learning_rate": 0.00037580789313579045, "loss": 2.1907, "step": 465950 }, { "epoch": 1.801271048847242, "grad_norm": 0.14720627665519714, "learning_rate": 0.0003756847596602437, "loss": 2.1826, "step": 465960 }, { "epoch": 1.8013097060506253, "grad_norm": 0.1361972987651825, "learning_rate": 0.00037556163551829403, "loss": 2.1794, "step": 465970 }, { "epoch": 1.8013483632540086, "grad_norm": 0.14298291504383087, "learning_rate": 0.0003754385207078188, "loss": 2.1696, "step": 465980 }, { "epoch": 1.801387020457392, "grad_norm": 0.13563038408756256, "learning_rate": 0.00037531541522669686, "loss": 2.1697, "step": 465990 }, { "epoch": 1.8014256776607753, "grad_norm": 0.1419105976819992, "learning_rate": 0.00037519231907280794, "loss": 2.1756, "step": 466000 }, { "epoch": 1.8014643348641586, "grad_norm": 0.13791169226169586, "learning_rate": 0.0003750692322440319, "loss": 2.1956, "step": 466010 }, { "epoch": 1.8015029920675418, "grad_norm": 0.12917031347751617, "learning_rate": 0.00037494615473825, "loss": 2.175, "step": 466020 }, { "epoch": 1.8015416492709253, "grad_norm": 0.13964608311653137, "learning_rate": 0.00037482308655334396, "loss": 2.1758, "step": 466030 }, { "epoch": 1.8015803064743086, "grad_norm": 0.15289010107517242, "learning_rate": 0.00037470002768719656, "loss": 2.1872, "step": 466040 }, { "epoch": 1.8016189636776918, "grad_norm": 0.14537134766578674, "learning_rate": 0.00037457697813769107, "loss": 2.181, "step": 466050 }, { "epoch": 1.801657620881075, "grad_norm": 0.13884811103343964, "learning_rate": 0.0003744539379027121, "loss": 2.1666, "step": 466060 }, { "epoch": 1.8016962780844583, "grad_norm": 0.14185583591461182, "learning_rate": 0.0003743309069801444, "loss": 2.1697, "step": 466070 }, { "epoch": 1.8017349352878416, "grad_norm": 0.12399603426456451, "learning_rate": 0.0003742078853678739, "loss": 2.1698, "step": 466080 }, { "epoch": 1.8017735924912248, "grad_norm": 0.13981971144676208, "learning_rate": 0.00037408487306378734, "loss": 2.1871, "step": 466090 }, { "epoch": 1.801812249694608, "grad_norm": 0.13385088741779327, "learning_rate": 0.0003739618700657723, "loss": 2.1776, "step": 466100 }, { "epoch": 1.8018509068979913, "grad_norm": 0.14517062902450562, "learning_rate": 0.0003738388763717169, "loss": 2.1799, "step": 466110 }, { "epoch": 1.8018895641013746, "grad_norm": 0.1450577825307846, "learning_rate": 0.00037371589197951027, "loss": 2.1745, "step": 466120 }, { "epoch": 1.8019282213047578, "grad_norm": 0.13384157419204712, "learning_rate": 0.000373592916887042, "loss": 2.1802, "step": 466130 }, { "epoch": 1.801966878508141, "grad_norm": 0.14221005141735077, "learning_rate": 0.0003734699510922033, "loss": 2.1944, "step": 466140 }, { "epoch": 1.8020055357115243, "grad_norm": 0.1455065906047821, "learning_rate": 0.0003733469945928849, "loss": 2.1854, "step": 466150 }, { "epoch": 1.8020441929149078, "grad_norm": 0.138362854719162, "learning_rate": 0.00037322404738697967, "loss": 2.1867, "step": 466160 }, { "epoch": 1.802082850118291, "grad_norm": 0.14707984030246735, "learning_rate": 0.0003731011094723804, "loss": 2.1681, "step": 466170 }, { "epoch": 1.8021215073216743, "grad_norm": 0.13016606867313385, "learning_rate": 0.0003729781808469808, "loss": 2.1755, "step": 466180 }, { "epoch": 1.8021601645250576, "grad_norm": 0.1483168751001358, "learning_rate": 0.0003728552615086758, "loss": 2.1808, "step": 466190 }, { "epoch": 1.802198821728441, "grad_norm": 0.13777334988117218, "learning_rate": 0.00037273235145536066, "loss": 2.1752, "step": 466200 }, { "epoch": 1.8022374789318243, "grad_norm": 0.13940338790416718, "learning_rate": 0.0003726094506849316, "loss": 2.1796, "step": 466210 }, { "epoch": 1.8022761361352075, "grad_norm": 0.13678772747516632, "learning_rate": 0.0003724865591952857, "loss": 2.1863, "step": 466220 }, { "epoch": 1.8023147933385908, "grad_norm": 0.13423606753349304, "learning_rate": 0.00037236367698432083, "loss": 2.1723, "step": 466230 }, { "epoch": 1.802353450541974, "grad_norm": 0.14014889299869537, "learning_rate": 0.00037224080404993565, "loss": 2.1795, "step": 466240 }, { "epoch": 1.8023921077453573, "grad_norm": 0.14486335217952728, "learning_rate": 0.00037211794039002945, "loss": 2.1824, "step": 466250 }, { "epoch": 1.8024307649487405, "grad_norm": 0.14388778805732727, "learning_rate": 0.00037199508600250234, "loss": 2.1685, "step": 466260 }, { "epoch": 1.8024694221521238, "grad_norm": 0.13235856592655182, "learning_rate": 0.0003718722408852555, "loss": 2.1741, "step": 466270 }, { "epoch": 1.802508079355507, "grad_norm": 0.1384734809398651, "learning_rate": 0.00037174940503619045, "loss": 2.1746, "step": 466280 }, { "epoch": 1.8025467365588903, "grad_norm": 0.13937100768089294, "learning_rate": 0.0003716265784532098, "loss": 2.1831, "step": 466290 }, { "epoch": 1.8025853937622736, "grad_norm": 0.14748027920722961, "learning_rate": 0.00037150376113421736, "loss": 2.1725, "step": 466300 }, { "epoch": 1.8026240509656568, "grad_norm": 0.1350601464509964, "learning_rate": 0.00037138095307711683, "loss": 2.193, "step": 466310 }, { "epoch": 1.80266270816904, "grad_norm": 0.14014708995819092, "learning_rate": 0.00037125815427981344, "loss": 2.1666, "step": 466320 }, { "epoch": 1.8027013653724235, "grad_norm": 0.13212034106254578, "learning_rate": 0.0003711353647402127, "loss": 2.1827, "step": 466330 }, { "epoch": 1.8027400225758068, "grad_norm": 0.13790425658226013, "learning_rate": 0.0003710125844562211, "loss": 2.1748, "step": 466340 }, { "epoch": 1.80277867977919, "grad_norm": 0.1323886662721634, "learning_rate": 0.00037088981342574636, "loss": 2.1774, "step": 466350 }, { "epoch": 1.8028173369825733, "grad_norm": 0.1366249918937683, "learning_rate": 0.0003707670516466959, "loss": 2.1653, "step": 466360 }, { "epoch": 1.8028559941859568, "grad_norm": 0.13770009577274323, "learning_rate": 0.0003706442991169792, "loss": 2.1673, "step": 466370 }, { "epoch": 1.80289465138934, "grad_norm": 0.13911324739456177, "learning_rate": 0.00037052155583450566, "loss": 2.1823, "step": 466380 }, { "epoch": 1.8029333085927233, "grad_norm": 0.13929787278175354, "learning_rate": 0.0003703988217971859, "loss": 2.1691, "step": 466390 }, { "epoch": 1.8029719657961065, "grad_norm": 0.13730750977993011, "learning_rate": 0.00037027609700293086, "loss": 2.1581, "step": 466400 }, { "epoch": 1.8030106229994898, "grad_norm": 0.13073213398456573, "learning_rate": 0.00037015338144965313, "loss": 2.1739, "step": 466410 }, { "epoch": 1.803049280202873, "grad_norm": 0.16473360359668732, "learning_rate": 0.0003700306751352649, "loss": 2.1711, "step": 466420 }, { "epoch": 1.8030879374062563, "grad_norm": 0.12526480853557587, "learning_rate": 0.0003699079780576804, "loss": 2.16, "step": 466430 }, { "epoch": 1.8031265946096395, "grad_norm": 0.13086701929569244, "learning_rate": 0.0003697852902148136, "loss": 2.1803, "step": 466440 }, { "epoch": 1.8031652518130228, "grad_norm": 0.14068730175495148, "learning_rate": 0.0003696626116045796, "loss": 2.1749, "step": 466450 }, { "epoch": 1.803203909016406, "grad_norm": 0.12961159646511078, "learning_rate": 0.0003695399422248946, "loss": 2.186, "step": 466460 }, { "epoch": 1.8032425662197893, "grad_norm": 0.13988232612609863, "learning_rate": 0.00036941728207367543, "loss": 2.1866, "step": 466470 }, { "epoch": 1.8032812234231725, "grad_norm": 0.1488853394985199, "learning_rate": 0.00036929463114883946, "loss": 2.1771, "step": 466480 }, { "epoch": 1.8033198806265558, "grad_norm": 0.13858400285243988, "learning_rate": 0.00036917198944830476, "loss": 2.1578, "step": 466490 }, { "epoch": 1.8033585378299393, "grad_norm": 0.1406715363264084, "learning_rate": 0.0003690493569699909, "loss": 2.1727, "step": 466500 }, { "epoch": 1.8033971950333225, "grad_norm": 0.14598716795444489, "learning_rate": 0.00036892673371181765, "loss": 2.1956, "step": 466510 }, { "epoch": 1.8034358522367058, "grad_norm": 0.1434890627861023, "learning_rate": 0.00036880411967170535, "loss": 2.1911, "step": 466520 }, { "epoch": 1.803474509440089, "grad_norm": 0.13857607543468475, "learning_rate": 0.00036868151484757575, "loss": 2.165, "step": 466530 }, { "epoch": 1.8035131666434725, "grad_norm": 0.13821934163570404, "learning_rate": 0.00036855891923735106, "loss": 2.1851, "step": 466540 }, { "epoch": 1.8035518238468558, "grad_norm": 0.15694352984428406, "learning_rate": 0.0003684363328389542, "loss": 2.1821, "step": 466550 }, { "epoch": 1.803590481050239, "grad_norm": 0.24491091072559357, "learning_rate": 0.00036831375565030887, "loss": 2.1643, "step": 466560 }, { "epoch": 1.8036291382536223, "grad_norm": 0.13402949273586273, "learning_rate": 0.0003681911876693398, "loss": 2.1775, "step": 466570 }, { "epoch": 1.8036677954570055, "grad_norm": 0.1315721869468689, "learning_rate": 0.0003680686288939723, "loss": 2.1647, "step": 466580 }, { "epoch": 1.8037064526603888, "grad_norm": 0.1430567055940628, "learning_rate": 0.0003679460793221321, "loss": 2.1823, "step": 466590 }, { "epoch": 1.803745109863772, "grad_norm": 0.15081314742565155, "learning_rate": 0.0003678235389517468, "loss": 2.1719, "step": 466600 }, { "epoch": 1.8037837670671553, "grad_norm": 0.36761045455932617, "learning_rate": 0.00036770100778074367, "loss": 2.1738, "step": 466610 }, { "epoch": 1.8038224242705385, "grad_norm": 0.13689853250980377, "learning_rate": 0.000367578485807051, "loss": 2.1746, "step": 466620 }, { "epoch": 1.8038610814739218, "grad_norm": 0.14220766723155975, "learning_rate": 0.00036745597302859843, "loss": 2.1867, "step": 466630 }, { "epoch": 1.803899738677305, "grad_norm": 0.13954247534275055, "learning_rate": 0.00036733346944331594, "loss": 2.1849, "step": 466640 }, { "epoch": 1.8039383958806883, "grad_norm": 0.14342482388019562, "learning_rate": 0.0003672109750491339, "loss": 2.1891, "step": 466650 }, { "epoch": 1.8039770530840715, "grad_norm": 0.13386207818984985, "learning_rate": 0.00036708848984398434, "loss": 2.1792, "step": 466660 }, { "epoch": 1.804015710287455, "grad_norm": 0.14620913565158844, "learning_rate": 0.0003669660138257993, "loss": 2.1828, "step": 466670 }, { "epoch": 1.8040543674908383, "grad_norm": 0.13415135443210602, "learning_rate": 0.0003668435469925118, "loss": 2.1802, "step": 466680 }, { "epoch": 1.8040930246942215, "grad_norm": 0.14544934034347534, "learning_rate": 0.0003667210893420561, "loss": 2.1764, "step": 466690 }, { "epoch": 1.8041316818976048, "grad_norm": 0.1273856908082962, "learning_rate": 0.0003665986408723665, "loss": 2.179, "step": 466700 }, { "epoch": 1.8041703391009882, "grad_norm": 0.13600336015224457, "learning_rate": 0.0003664762015813787, "loss": 2.1661, "step": 466710 }, { "epoch": 1.8042089963043715, "grad_norm": 0.14356358349323273, "learning_rate": 0.0003663537714670291, "loss": 2.1596, "step": 466720 }, { "epoch": 1.8042476535077547, "grad_norm": 0.1356818675994873, "learning_rate": 0.0003662313505272541, "loss": 2.1816, "step": 466730 }, { "epoch": 1.804286310711138, "grad_norm": 0.13973554968833923, "learning_rate": 0.00036610893875999186, "loss": 2.1774, "step": 466740 }, { "epoch": 1.8043249679145212, "grad_norm": 0.1400384157896042, "learning_rate": 0.0003659865361631809, "loss": 2.1816, "step": 466750 }, { "epoch": 1.8043636251179045, "grad_norm": 0.13273154199123383, "learning_rate": 0.0003658641427347602, "loss": 2.1762, "step": 466760 }, { "epoch": 1.8044022823212877, "grad_norm": 0.1496812403202057, "learning_rate": 0.00036574175847267035, "loss": 2.1769, "step": 466770 }, { "epoch": 1.804440939524671, "grad_norm": 0.1461144983768463, "learning_rate": 0.0003656193833748518, "loss": 2.1795, "step": 466780 }, { "epoch": 1.8044795967280542, "grad_norm": 0.13886010646820068, "learning_rate": 0.000365497017439246, "loss": 2.1781, "step": 466790 }, { "epoch": 1.8045182539314375, "grad_norm": 0.1413482129573822, "learning_rate": 0.000365374660663796, "loss": 2.1923, "step": 466800 }, { "epoch": 1.8045569111348208, "grad_norm": 0.1350710391998291, "learning_rate": 0.00036525231304644444, "loss": 2.1937, "step": 466810 }, { "epoch": 1.804595568338204, "grad_norm": 0.14257410168647766, "learning_rate": 0.0003651299745851353, "loss": 2.1789, "step": 466820 }, { "epoch": 1.8046342255415873, "grad_norm": 0.1451081931591034, "learning_rate": 0.0003650076452778135, "loss": 2.1837, "step": 466830 }, { "epoch": 1.8046728827449707, "grad_norm": 0.13941356539726257, "learning_rate": 0.00036488532512242444, "loss": 2.1796, "step": 466840 }, { "epoch": 1.804711539948354, "grad_norm": 0.14537939429283142, "learning_rate": 0.00036476301411691406, "loss": 2.1741, "step": 466850 }, { "epoch": 1.8047501971517372, "grad_norm": 0.14455877244472504, "learning_rate": 0.0003646407122592297, "loss": 2.1726, "step": 466860 }, { "epoch": 1.8047888543551205, "grad_norm": 0.1392398625612259, "learning_rate": 0.0003645184195473188, "loss": 2.1761, "step": 466870 }, { "epoch": 1.804827511558504, "grad_norm": 0.13840122520923615, "learning_rate": 0.00036439613597913037, "loss": 2.1871, "step": 466880 }, { "epoch": 1.8048661687618872, "grad_norm": 0.13393983244895935, "learning_rate": 0.0003642738615526131, "loss": 2.1587, "step": 466890 }, { "epoch": 1.8049048259652705, "grad_norm": 0.12925367057323456, "learning_rate": 0.0003641515962657176, "loss": 2.1718, "step": 466900 }, { "epoch": 1.8049434831686537, "grad_norm": 0.13216130435466766, "learning_rate": 0.00036402934011639434, "loss": 2.1776, "step": 466910 }, { "epoch": 1.804982140372037, "grad_norm": 0.1310267150402069, "learning_rate": 0.0003639070931025952, "loss": 2.1727, "step": 466920 }, { "epoch": 1.8050207975754202, "grad_norm": 0.14622503519058228, "learning_rate": 0.0003637848552222725, "loss": 2.1599, "step": 466930 }, { "epoch": 1.8050594547788035, "grad_norm": 0.1343652307987213, "learning_rate": 0.0003636626264733791, "loss": 2.1721, "step": 466940 }, { "epoch": 1.8050981119821867, "grad_norm": 0.1314699500799179, "learning_rate": 0.00036354040685386926, "loss": 2.1659, "step": 466950 }, { "epoch": 1.80513676918557, "grad_norm": 0.13724282383918762, "learning_rate": 0.00036341819636169736, "loss": 2.177, "step": 466960 }, { "epoch": 1.8051754263889532, "grad_norm": 0.13903264701366425, "learning_rate": 0.00036329599499481893, "loss": 2.1807, "step": 466970 }, { "epoch": 1.8052140835923365, "grad_norm": 0.13293766975402832, "learning_rate": 0.0003631738027511902, "loss": 2.1769, "step": 466980 }, { "epoch": 1.8052527407957197, "grad_norm": 0.13913320004940033, "learning_rate": 0.00036305161962876787, "loss": 2.1863, "step": 466990 }, { "epoch": 1.805291397999103, "grad_norm": 0.14354361593723297, "learning_rate": 0.0003629294456255099, "loss": 2.1992, "step": 467000 }, { "epoch": 1.8053300552024865, "grad_norm": 0.14252910017967224, "learning_rate": 0.000362807280739375, "loss": 2.1758, "step": 467010 }, { "epoch": 1.8053687124058697, "grad_norm": 0.14904700219631195, "learning_rate": 0.00036268512496832184, "loss": 2.1866, "step": 467020 }, { "epoch": 1.805407369609253, "grad_norm": 0.12930892407894135, "learning_rate": 0.0003625629783103106, "loss": 2.1848, "step": 467030 }, { "epoch": 1.8054460268126362, "grad_norm": 0.13121457397937775, "learning_rate": 0.0003624408407633024, "loss": 2.1684, "step": 467040 }, { "epoch": 1.8054846840160197, "grad_norm": 0.1382637470960617, "learning_rate": 0.0003623187123252583, "loss": 2.171, "step": 467050 }, { "epoch": 1.805523341219403, "grad_norm": 0.12879937887191772, "learning_rate": 0.0003621965929941409, "loss": 2.1775, "step": 467060 }, { "epoch": 1.8055619984227862, "grad_norm": 0.13088010251522064, "learning_rate": 0.000362074482767913, "loss": 2.1683, "step": 467070 }, { "epoch": 1.8056006556261694, "grad_norm": 0.1338772475719452, "learning_rate": 0.0003619523816445385, "loss": 2.1611, "step": 467080 }, { "epoch": 1.8056393128295527, "grad_norm": 0.13470415771007538, "learning_rate": 0.00036183028962198183, "loss": 2.1836, "step": 467090 }, { "epoch": 1.805677970032936, "grad_norm": 0.13731078803539276, "learning_rate": 0.0003617082066982085, "loss": 2.1805, "step": 467100 }, { "epoch": 1.8057166272363192, "grad_norm": 0.1420268565416336, "learning_rate": 0.0003615861328711845, "loss": 2.1831, "step": 467110 }, { "epoch": 1.8057552844397025, "grad_norm": 0.1401713788509369, "learning_rate": 0.0003614640681388766, "loss": 2.1768, "step": 467120 }, { "epoch": 1.8057939416430857, "grad_norm": 0.12639404833316803, "learning_rate": 0.00036134201249925257, "loss": 2.1889, "step": 467130 }, { "epoch": 1.805832598846469, "grad_norm": 0.14168035984039307, "learning_rate": 0.0003612199659502804, "loss": 2.1738, "step": 467140 }, { "epoch": 1.8058712560498522, "grad_norm": 0.12691108882427216, "learning_rate": 0.00036109792848992964, "loss": 2.1743, "step": 467150 }, { "epoch": 1.8059099132532355, "grad_norm": 0.15322566032409668, "learning_rate": 0.0003609759001161699, "loss": 2.1749, "step": 467160 }, { "epoch": 1.805948570456619, "grad_norm": 0.1456574946641922, "learning_rate": 0.0003608538808269717, "loss": 2.1733, "step": 467170 }, { "epoch": 1.8059872276600022, "grad_norm": 0.14703112840652466, "learning_rate": 0.0003607318706203064, "loss": 2.1685, "step": 467180 }, { "epoch": 1.8060258848633854, "grad_norm": 0.6242895126342773, "learning_rate": 0.0003606098694941462, "loss": 2.1638, "step": 467190 }, { "epoch": 1.8060645420667687, "grad_norm": 0.1482149362564087, "learning_rate": 0.00036048787744646417, "loss": 2.17, "step": 467200 }, { "epoch": 1.806103199270152, "grad_norm": 0.13098423182964325, "learning_rate": 0.0003603658944752337, "loss": 2.1666, "step": 467210 }, { "epoch": 1.8061418564735354, "grad_norm": 0.13495689630508423, "learning_rate": 0.00036024392057842935, "loss": 2.1674, "step": 467220 }, { "epoch": 1.8061805136769187, "grad_norm": 0.13893181085586548, "learning_rate": 0.0003601219557540263, "loss": 2.1786, "step": 467230 }, { "epoch": 1.806219170880302, "grad_norm": 0.13123945891857147, "learning_rate": 0.00036000000000000013, "loss": 2.1702, "step": 467240 }, { "epoch": 1.8062578280836852, "grad_norm": 0.13648271560668945, "learning_rate": 0.00035987805331432756, "loss": 2.1849, "step": 467250 }, { "epoch": 1.8062964852870684, "grad_norm": 0.14485813677310944, "learning_rate": 0.000359756115694986, "loss": 2.1662, "step": 467260 }, { "epoch": 1.8063351424904517, "grad_norm": 0.14141114056110382, "learning_rate": 0.000359634187139954, "loss": 2.1704, "step": 467270 }, { "epoch": 1.806373799693835, "grad_norm": 0.14906097948551178, "learning_rate": 0.00035951226764720977, "loss": 2.1653, "step": 467280 }, { "epoch": 1.8064124568972182, "grad_norm": 0.13113735616207123, "learning_rate": 0.00035939035721473345, "loss": 2.1679, "step": 467290 }, { "epoch": 1.8064511141006014, "grad_norm": 0.13685385882854462, "learning_rate": 0.00035926845584050525, "loss": 2.1739, "step": 467300 }, { "epoch": 1.8064897713039847, "grad_norm": 0.1268870234489441, "learning_rate": 0.0003591465635225064, "loss": 2.1784, "step": 467310 }, { "epoch": 1.806528428507368, "grad_norm": 0.13800711929798126, "learning_rate": 0.0003590246802587187, "loss": 2.1711, "step": 467320 }, { "epoch": 1.8065670857107512, "grad_norm": 0.1398317515850067, "learning_rate": 0.0003589028060471251, "loss": 2.1831, "step": 467330 }, { "epoch": 1.8066057429141347, "grad_norm": 0.23167477548122406, "learning_rate": 0.00035878094088570854, "loss": 2.1715, "step": 467340 }, { "epoch": 1.806644400117518, "grad_norm": 0.14315100014209747, "learning_rate": 0.0003586590847724536, "loss": 2.1893, "step": 467350 }, { "epoch": 1.8066830573209012, "grad_norm": 0.14273662865161896, "learning_rate": 0.00035853723770534485, "loss": 2.1801, "step": 467360 }, { "epoch": 1.8067217145242844, "grad_norm": 0.13973486423492432, "learning_rate": 0.00035841539968236804, "loss": 2.1668, "step": 467370 }, { "epoch": 1.8067603717276677, "grad_norm": 0.12567126750946045, "learning_rate": 0.00035829357070150936, "loss": 2.162, "step": 467380 }, { "epoch": 1.8067990289310512, "grad_norm": 0.14240488409996033, "learning_rate": 0.00035817175076075625, "loss": 2.1722, "step": 467390 }, { "epoch": 1.8068376861344344, "grad_norm": 0.14131249487400055, "learning_rate": 0.00035804993985809655, "loss": 2.1719, "step": 467400 }, { "epoch": 1.8068763433378177, "grad_norm": 0.13042914867401123, "learning_rate": 0.0003579281379915189, "loss": 2.1837, "step": 467410 }, { "epoch": 1.806915000541201, "grad_norm": 0.13685810565948486, "learning_rate": 0.00035780634515901255, "loss": 2.1775, "step": 467420 }, { "epoch": 1.8069536577445842, "grad_norm": 0.12795154750347137, "learning_rate": 0.00035768456135856774, "loss": 2.1866, "step": 467430 }, { "epoch": 1.8069923149479674, "grad_norm": 0.14272700250148773, "learning_rate": 0.0003575627865881752, "loss": 2.1739, "step": 467440 }, { "epoch": 1.8070309721513507, "grad_norm": 0.267858624458313, "learning_rate": 0.00035744102084582676, "loss": 2.1697, "step": 467450 }, { "epoch": 1.807069629354734, "grad_norm": 0.13735882937908173, "learning_rate": 0.0003573192641295144, "loss": 2.1647, "step": 467460 }, { "epoch": 1.8071082865581172, "grad_norm": 0.14670196175575256, "learning_rate": 0.00035719751643723165, "loss": 2.1725, "step": 467470 }, { "epoch": 1.8071469437615004, "grad_norm": 0.1381799876689911, "learning_rate": 0.00035707577776697197, "loss": 2.1564, "step": 467480 }, { "epoch": 1.8071856009648837, "grad_norm": 0.14322498440742493, "learning_rate": 0.00035695404811673015, "loss": 2.1885, "step": 467490 }, { "epoch": 1.807224258168267, "grad_norm": 0.15391521155834198, "learning_rate": 0.00035683232748450155, "loss": 2.1776, "step": 467500 }, { "epoch": 1.8072629153716504, "grad_norm": 0.13412201404571533, "learning_rate": 0.00035671061586828225, "loss": 2.1704, "step": 467510 }, { "epoch": 1.8073015725750337, "grad_norm": 0.13290143013000488, "learning_rate": 0.0003565889132660689, "loss": 2.1778, "step": 467520 }, { "epoch": 1.807340229778417, "grad_norm": 0.1417669951915741, "learning_rate": 0.0003564672196758594, "loss": 2.1694, "step": 467530 }, { "epoch": 1.8073788869818002, "grad_norm": 0.13997195661067963, "learning_rate": 0.0003563455350956515, "loss": 2.1687, "step": 467540 }, { "epoch": 1.8074175441851836, "grad_norm": 0.14239998161792755, "learning_rate": 0.00035622385952344484, "loss": 2.1887, "step": 467550 }, { "epoch": 1.8074562013885669, "grad_norm": 0.13999323546886444, "learning_rate": 0.0003561021929572386, "loss": 2.1588, "step": 467560 }, { "epoch": 1.8074948585919501, "grad_norm": 0.1441546231508255, "learning_rate": 0.00035598053539503385, "loss": 2.1766, "step": 467570 }, { "epoch": 1.8075335157953334, "grad_norm": 0.13809379935264587, "learning_rate": 0.00035585888683483134, "loss": 2.1899, "step": 467580 }, { "epoch": 1.8075721729987166, "grad_norm": 0.13348519802093506, "learning_rate": 0.0003557372472746334, "loss": 2.1814, "step": 467590 }, { "epoch": 1.8076108302021, "grad_norm": 0.13278010487556458, "learning_rate": 0.0003556156167124427, "loss": 2.1669, "step": 467600 }, { "epoch": 1.8076494874054831, "grad_norm": 0.1412791609764099, "learning_rate": 0.0003554939951462628, "loss": 2.1643, "step": 467610 }, { "epoch": 1.8076881446088664, "grad_norm": 0.16468937695026398, "learning_rate": 0.00035537238257409774, "loss": 2.1733, "step": 467620 }, { "epoch": 1.8077268018122497, "grad_norm": 0.14936642348766327, "learning_rate": 0.0003552507789939525, "loss": 2.1666, "step": 467630 }, { "epoch": 1.807765459015633, "grad_norm": 0.1426035612821579, "learning_rate": 0.0003551291844038329, "loss": 2.1801, "step": 467640 }, { "epoch": 1.8078041162190162, "grad_norm": 0.1369057148694992, "learning_rate": 0.0003550075988017452, "loss": 2.1801, "step": 467650 }, { "epoch": 1.8078427734223994, "grad_norm": 0.16422870755195618, "learning_rate": 0.00035488602218569664, "loss": 2.1837, "step": 467660 }, { "epoch": 1.8078814306257827, "grad_norm": 0.12718096375465393, "learning_rate": 0.00035476445455369523, "loss": 2.1663, "step": 467670 }, { "epoch": 1.8079200878291661, "grad_norm": 0.13378946483135223, "learning_rate": 0.0003546428959037495, "loss": 2.1678, "step": 467680 }, { "epoch": 1.8079587450325494, "grad_norm": 0.1549948751926422, "learning_rate": 0.0003545213462338688, "loss": 2.1786, "step": 467690 }, { "epoch": 1.8079974022359326, "grad_norm": 0.13492682576179504, "learning_rate": 0.000354399805542063, "loss": 2.1713, "step": 467700 }, { "epoch": 1.808036059439316, "grad_norm": 0.14016228914260864, "learning_rate": 0.00035427827382634324, "loss": 2.1588, "step": 467710 }, { "epoch": 1.8080747166426994, "grad_norm": 0.1409558802843094, "learning_rate": 0.0003541567510847212, "loss": 2.1883, "step": 467720 }, { "epoch": 1.8081133738460826, "grad_norm": 0.14013123512268066, "learning_rate": 0.00035403523731520914, "loss": 2.1679, "step": 467730 }, { "epoch": 1.8081520310494659, "grad_norm": 0.13527102768421173, "learning_rate": 0.00035391373251581973, "loss": 2.1759, "step": 467740 }, { "epoch": 1.8081906882528491, "grad_norm": 0.13502882421016693, "learning_rate": 0.00035379223668456714, "loss": 2.163, "step": 467750 }, { "epoch": 1.8082293454562324, "grad_norm": 0.13633443415164948, "learning_rate": 0.0003536707498194658, "loss": 2.1783, "step": 467760 }, { "epoch": 1.8082680026596156, "grad_norm": 0.13043592870235443, "learning_rate": 0.0003535492719185309, "loss": 2.1717, "step": 467770 }, { "epoch": 1.8083066598629989, "grad_norm": 0.14720727503299713, "learning_rate": 0.0003534278029797784, "loss": 2.1837, "step": 467780 }, { "epoch": 1.8083453170663821, "grad_norm": 0.1348484605550766, "learning_rate": 0.00035330634300122487, "loss": 2.1618, "step": 467790 }, { "epoch": 1.8083839742697654, "grad_norm": 0.1376534253358841, "learning_rate": 0.0003531848919808878, "loss": 2.1715, "step": 467800 }, { "epoch": 1.8084226314731486, "grad_norm": 0.15582869946956635, "learning_rate": 0.0003530634499167855, "loss": 2.1807, "step": 467810 }, { "epoch": 1.8084612886765319, "grad_norm": 0.1453525424003601, "learning_rate": 0.00035294201680693705, "loss": 2.1715, "step": 467820 }, { "epoch": 1.8084999458799151, "grad_norm": 0.13753698766231537, "learning_rate": 0.0003528205926493617, "loss": 2.1673, "step": 467830 }, { "epoch": 1.8085386030832984, "grad_norm": 0.1529999077320099, "learning_rate": 0.00035269917744208, "loss": 2.1673, "step": 467840 }, { "epoch": 1.8085772602866819, "grad_norm": 0.13831478357315063, "learning_rate": 0.00035257777118311283, "loss": 2.164, "step": 467850 }, { "epoch": 1.8086159174900651, "grad_norm": 0.13578453660011292, "learning_rate": 0.0003524563738704822, "loss": 2.1694, "step": 467860 }, { "epoch": 1.8086545746934484, "grad_norm": 0.1331843137741089, "learning_rate": 0.00035233498550221086, "loss": 2.1779, "step": 467870 }, { "epoch": 1.8086932318968316, "grad_norm": 0.15052971243858337, "learning_rate": 0.0003522136060763217, "loss": 2.1831, "step": 467880 }, { "epoch": 1.808731889100215, "grad_norm": 0.14466112852096558, "learning_rate": 0.0003520922355908387, "loss": 2.1874, "step": 467890 }, { "epoch": 1.8087705463035983, "grad_norm": 0.12750951945781708, "learning_rate": 0.0003519708740437868, "loss": 2.1797, "step": 467900 }, { "epoch": 1.8088092035069816, "grad_norm": 0.1318550854921341, "learning_rate": 0.0003518495214331914, "loss": 2.1606, "step": 467910 }, { "epoch": 1.8088478607103649, "grad_norm": 0.13449975848197937, "learning_rate": 0.0003517281777570789, "loss": 2.1563, "step": 467920 }, { "epoch": 1.808886517913748, "grad_norm": 0.15311302244663239, "learning_rate": 0.0003516068430134758, "loss": 2.1857, "step": 467930 }, { "epoch": 1.8089251751171314, "grad_norm": 0.14139986038208008, "learning_rate": 0.0003514855172004099, "loss": 2.1793, "step": 467940 }, { "epoch": 1.8089638323205146, "grad_norm": 0.14440183341503143, "learning_rate": 0.00035136420031590964, "loss": 2.1634, "step": 467950 }, { "epoch": 1.8090024895238979, "grad_norm": 0.14607249200344086, "learning_rate": 0.0003512428923580042, "loss": 2.1742, "step": 467960 }, { "epoch": 1.8090411467272811, "grad_norm": 0.14335627853870392, "learning_rate": 0.00035112159332472315, "loss": 2.1647, "step": 467970 }, { "epoch": 1.8090798039306644, "grad_norm": 0.14517302811145782, "learning_rate": 0.00035100030321409713, "loss": 2.1622, "step": 467980 }, { "epoch": 1.8091184611340476, "grad_norm": 0.14808966219425201, "learning_rate": 0.00035087902202415736, "loss": 2.1737, "step": 467990 }, { "epoch": 1.8091571183374309, "grad_norm": 0.14284609258174896, "learning_rate": 0.0003507577497529357, "loss": 2.1733, "step": 468000 }, { "epoch": 1.8091957755408141, "grad_norm": 0.13765506446361542, "learning_rate": 0.0003506364863984652, "loss": 2.1635, "step": 468010 }, { "epoch": 1.8092344327441976, "grad_norm": 0.14198483526706696, "learning_rate": 0.0003505152319587792, "loss": 2.1669, "step": 468020 }, { "epoch": 1.8092730899475808, "grad_norm": 0.1519058495759964, "learning_rate": 0.0003503939864319117, "loss": 2.1671, "step": 468030 }, { "epoch": 1.809311747150964, "grad_norm": 0.14194536209106445, "learning_rate": 0.0003502727498158975, "loss": 2.1746, "step": 468040 }, { "epoch": 1.8093504043543474, "grad_norm": 0.13198499381542206, "learning_rate": 0.00035015152210877255, "loss": 2.15, "step": 468050 }, { "epoch": 1.8093890615577308, "grad_norm": 0.13800212740898132, "learning_rate": 0.000350030303308573, "loss": 2.1659, "step": 468060 }, { "epoch": 1.809427718761114, "grad_norm": 0.14429043233394623, "learning_rate": 0.0003499090934133355, "loss": 2.1731, "step": 468070 }, { "epoch": 1.8094663759644973, "grad_norm": 0.13230136036872864, "learning_rate": 0.00034978789242109867, "loss": 2.1691, "step": 468080 }, { "epoch": 1.8095050331678806, "grad_norm": 0.14464101195335388, "learning_rate": 0.00034966670032990013, "loss": 2.1799, "step": 468090 }, { "epoch": 1.8095436903712638, "grad_norm": 0.1464579999446869, "learning_rate": 0.00034954551713777926, "loss": 2.1786, "step": 468100 }, { "epoch": 1.809582347574647, "grad_norm": 0.1446916162967682, "learning_rate": 0.00034942434284277656, "loss": 2.1968, "step": 468110 }, { "epoch": 1.8096210047780303, "grad_norm": 0.1317659467458725, "learning_rate": 0.00034930317744293207, "loss": 2.1828, "step": 468120 }, { "epoch": 1.8096596619814136, "grad_norm": 0.14055480062961578, "learning_rate": 0.0003491820209362875, "loss": 2.1885, "step": 468130 }, { "epoch": 1.8096983191847968, "grad_norm": 0.13689568638801575, "learning_rate": 0.0003490608733208847, "loss": 2.1813, "step": 468140 }, { "epoch": 1.80973697638818, "grad_norm": 0.1372600942850113, "learning_rate": 0.00034893973459476646, "loss": 2.1628, "step": 468150 }, { "epoch": 1.8097756335915633, "grad_norm": 0.12364128232002258, "learning_rate": 0.0003488186047559765, "loss": 2.1619, "step": 468160 }, { "epoch": 1.8098142907949466, "grad_norm": 0.13477951288223267, "learning_rate": 0.00034869748380255897, "loss": 2.1813, "step": 468170 }, { "epoch": 1.8098529479983299, "grad_norm": 0.1300884187221527, "learning_rate": 0.00034857637173255876, "loss": 2.1549, "step": 468180 }, { "epoch": 1.8098916052017133, "grad_norm": 0.12880156934261322, "learning_rate": 0.00034845526854402167, "loss": 2.171, "step": 468190 }, { "epoch": 1.8099302624050966, "grad_norm": 0.15667806565761566, "learning_rate": 0.0003483341742349937, "loss": 2.178, "step": 468200 }, { "epoch": 1.8099689196084798, "grad_norm": 0.1452578604221344, "learning_rate": 0.0003482130888035224, "loss": 2.1857, "step": 468210 }, { "epoch": 1.810007576811863, "grad_norm": 0.14344045519828796, "learning_rate": 0.0003480920122476554, "loss": 2.1651, "step": 468220 }, { "epoch": 1.8100462340152466, "grad_norm": 0.13554508984088898, "learning_rate": 0.00034797094456544134, "loss": 2.1758, "step": 468230 }, { "epoch": 1.8100848912186298, "grad_norm": 0.1435360163450241, "learning_rate": 0.00034784988575492927, "loss": 2.1536, "step": 468240 }, { "epoch": 1.810123548422013, "grad_norm": 0.12887118756771088, "learning_rate": 0.00034772883581416945, "loss": 2.164, "step": 468250 }, { "epoch": 1.8101622056253963, "grad_norm": 0.14329533278942108, "learning_rate": 0.0003476077947412122, "loss": 2.1841, "step": 468260 }, { "epoch": 1.8102008628287796, "grad_norm": 0.14588004350662231, "learning_rate": 0.00034748676253410914, "loss": 2.159, "step": 468270 }, { "epoch": 1.8102395200321628, "grad_norm": 0.1433817744255066, "learning_rate": 0.0003473657391909122, "loss": 2.1649, "step": 468280 }, { "epoch": 1.810278177235546, "grad_norm": 0.13485339283943176, "learning_rate": 0.0003472447247096744, "loss": 2.1753, "step": 468290 }, { "epoch": 1.8103168344389293, "grad_norm": 0.1369163542985916, "learning_rate": 0.00034712371908844907, "loss": 2.1589, "step": 468300 }, { "epoch": 1.8103554916423126, "grad_norm": 0.13304588198661804, "learning_rate": 0.00034700272232529074, "loss": 2.1755, "step": 468310 }, { "epoch": 1.8103941488456958, "grad_norm": 0.12897934019565582, "learning_rate": 0.00034688173441825397, "loss": 2.1661, "step": 468320 }, { "epoch": 1.810432806049079, "grad_norm": 0.1374320536851883, "learning_rate": 0.00034676075536539485, "loss": 2.1839, "step": 468330 }, { "epoch": 1.8104714632524623, "grad_norm": 0.13342377543449402, "learning_rate": 0.00034663978516476934, "loss": 2.171, "step": 468340 }, { "epoch": 1.8105101204558456, "grad_norm": 0.14364196360111237, "learning_rate": 0.0003465188238144348, "loss": 2.175, "step": 468350 }, { "epoch": 1.810548777659229, "grad_norm": 0.13240283727645874, "learning_rate": 0.0003463978713124489, "loss": 2.1552, "step": 468360 }, { "epoch": 1.8105874348626123, "grad_norm": 0.13555388152599335, "learning_rate": 0.00034627692765687044, "loss": 2.1646, "step": 468370 }, { "epoch": 1.8106260920659956, "grad_norm": 0.1307956725358963, "learning_rate": 0.00034615599284575825, "loss": 2.1846, "step": 468380 }, { "epoch": 1.8106647492693788, "grad_norm": 0.13531222939491272, "learning_rate": 0.00034603506687717233, "loss": 2.1697, "step": 468390 }, { "epoch": 1.8107034064727623, "grad_norm": 0.14121176302433014, "learning_rate": 0.00034591414974917336, "loss": 2.1897, "step": 468400 }, { "epoch": 1.8107420636761455, "grad_norm": 0.15340138971805573, "learning_rate": 0.00034579324145982284, "loss": 2.1687, "step": 468410 }, { "epoch": 1.8107807208795288, "grad_norm": 0.13489243388175964, "learning_rate": 0.00034567234200718255, "loss": 2.1619, "step": 468420 }, { "epoch": 1.810819378082912, "grad_norm": 0.14203903079032898, "learning_rate": 0.00034555145138931567, "loss": 2.1627, "step": 468430 }, { "epoch": 1.8108580352862953, "grad_norm": 0.16522355377674103, "learning_rate": 0.0003454305696042852, "loss": 2.167, "step": 468440 }, { "epoch": 1.8108966924896785, "grad_norm": 0.1337309032678604, "learning_rate": 0.00034530969665015566, "loss": 2.174, "step": 468450 }, { "epoch": 1.8109353496930618, "grad_norm": 0.1513516902923584, "learning_rate": 0.00034518883252499167, "loss": 2.1809, "step": 468460 }, { "epoch": 1.810974006896445, "grad_norm": 0.1391746699810028, "learning_rate": 0.000345067977226859, "loss": 2.1625, "step": 468470 }, { "epoch": 1.8110126640998283, "grad_norm": 0.14154408872127533, "learning_rate": 0.0003449471307538239, "loss": 2.1849, "step": 468480 }, { "epoch": 1.8110513213032116, "grad_norm": 0.13919097185134888, "learning_rate": 0.0003448262931039534, "loss": 2.1749, "step": 468490 }, { "epoch": 1.8110899785065948, "grad_norm": 0.13251088559627533, "learning_rate": 0.00034470546427531513, "loss": 2.1609, "step": 468500 }, { "epoch": 1.811128635709978, "grad_norm": 0.13987651467323303, "learning_rate": 0.0003445846442659775, "loss": 2.1784, "step": 468510 }, { "epoch": 1.8111672929133613, "grad_norm": 0.13735197484493256, "learning_rate": 0.00034446383307400977, "loss": 2.1842, "step": 468520 }, { "epoch": 1.8112059501167448, "grad_norm": 0.14323697984218597, "learning_rate": 0.00034434303069748177, "loss": 2.1885, "step": 468530 }, { "epoch": 1.811244607320128, "grad_norm": 0.1448112279176712, "learning_rate": 0.00034422223713446363, "loss": 2.1646, "step": 468540 }, { "epoch": 1.8112832645235113, "grad_norm": 0.1370207518339157, "learning_rate": 0.0003441014523830273, "loss": 2.1916, "step": 468550 }, { "epoch": 1.8113219217268945, "grad_norm": 0.143899604678154, "learning_rate": 0.00034398067644124386, "loss": 2.1805, "step": 468560 }, { "epoch": 1.811360578930278, "grad_norm": 0.12973351776599884, "learning_rate": 0.0003438599093071868, "loss": 2.1663, "step": 468570 }, { "epoch": 1.8113992361336613, "grad_norm": 0.13267174363136292, "learning_rate": 0.0003437391509789287, "loss": 2.1478, "step": 468580 }, { "epoch": 1.8114378933370445, "grad_norm": 0.13831119239330292, "learning_rate": 0.00034361840145454405, "loss": 2.1778, "step": 468590 }, { "epoch": 1.8114765505404278, "grad_norm": 0.15274249017238617, "learning_rate": 0.00034349766073210744, "loss": 2.1776, "step": 468600 }, { "epoch": 1.811515207743811, "grad_norm": 0.12548744678497314, "learning_rate": 0.0003433769288096944, "loss": 2.1673, "step": 468610 }, { "epoch": 1.8115538649471943, "grad_norm": 0.14468833804130554, "learning_rate": 0.00034325620568538117, "loss": 2.1685, "step": 468620 }, { "epoch": 1.8115925221505775, "grad_norm": 0.14792367815971375, "learning_rate": 0.00034313549135724444, "loss": 2.1722, "step": 468630 }, { "epoch": 1.8116311793539608, "grad_norm": 0.14683176577091217, "learning_rate": 0.00034301478582336167, "loss": 2.1904, "step": 468640 }, { "epoch": 1.811669836557344, "grad_norm": 0.14537638425827026, "learning_rate": 0.0003428940890818113, "loss": 2.1845, "step": 468650 }, { "epoch": 1.8117084937607273, "grad_norm": 0.14245370030403137, "learning_rate": 0.00034277340113067224, "loss": 2.1565, "step": 468660 }, { "epoch": 1.8117471509641105, "grad_norm": 0.14482244849205017, "learning_rate": 0.000342652721968024, "loss": 2.1833, "step": 468670 }, { "epoch": 1.8117858081674938, "grad_norm": 0.13195568323135376, "learning_rate": 0.00034253205159194723, "loss": 2.1453, "step": 468680 }, { "epoch": 1.811824465370877, "grad_norm": 0.15000492334365845, "learning_rate": 0.0003424113900005226, "loss": 2.1771, "step": 468690 }, { "epoch": 1.8118631225742605, "grad_norm": 0.13357426226139069, "learning_rate": 0.0003422907371918318, "loss": 2.1556, "step": 468700 }, { "epoch": 1.8119017797776438, "grad_norm": 0.13514158129692078, "learning_rate": 0.00034217009316395776, "loss": 2.1813, "step": 468710 }, { "epoch": 1.811940436981027, "grad_norm": 0.13902096450328827, "learning_rate": 0.0003420494579149835, "loss": 2.1665, "step": 468720 }, { "epoch": 1.8119790941844103, "grad_norm": 0.1330592930316925, "learning_rate": 0.0003419288314429927, "loss": 2.1671, "step": 468730 }, { "epoch": 1.8120177513877938, "grad_norm": 0.133706197142601, "learning_rate": 0.00034180821374606965, "loss": 2.1595, "step": 468740 }, { "epoch": 1.812056408591177, "grad_norm": 0.13246847689151764, "learning_rate": 0.0003416876048223001, "loss": 2.1694, "step": 468750 }, { "epoch": 1.8120950657945603, "grad_norm": 0.13621580600738525, "learning_rate": 0.00034156700466976965, "loss": 2.1671, "step": 468760 }, { "epoch": 1.8121337229979435, "grad_norm": 0.14098776876926422, "learning_rate": 0.0003414464132865649, "loss": 2.16, "step": 468770 }, { "epoch": 1.8121723802013268, "grad_norm": 0.15192024409770966, "learning_rate": 0.0003413258306707734, "loss": 2.1746, "step": 468780 }, { "epoch": 1.81221103740471, "grad_norm": 0.14788542687892914, "learning_rate": 0.0003412052568204831, "loss": 2.1594, "step": 468790 }, { "epoch": 1.8122496946080933, "grad_norm": 0.13699501752853394, "learning_rate": 0.0003410846917337824, "loss": 2.1766, "step": 468800 }, { "epoch": 1.8122883518114765, "grad_norm": 0.14366194605827332, "learning_rate": 0.00034096413540876094, "loss": 2.1682, "step": 468810 }, { "epoch": 1.8123270090148598, "grad_norm": 0.9558488726615906, "learning_rate": 0.000340843587843509, "loss": 2.1689, "step": 468820 }, { "epoch": 1.812365666218243, "grad_norm": 0.16100719571113586, "learning_rate": 0.00034072304903611704, "loss": 2.1775, "step": 468830 }, { "epoch": 1.8124043234216263, "grad_norm": 0.14918309450149536, "learning_rate": 0.00034060251898467686, "loss": 2.1761, "step": 468840 }, { "epoch": 1.8124429806250095, "grad_norm": 0.14374400675296783, "learning_rate": 0.0003404819976872804, "loss": 2.1675, "step": 468850 }, { "epoch": 1.8124816378283928, "grad_norm": 0.13421660661697388, "learning_rate": 0.0003403614851420205, "loss": 2.1881, "step": 468860 }, { "epoch": 1.8125202950317763, "grad_norm": 0.14474283158779144, "learning_rate": 0.0003402409813469909, "loss": 2.1628, "step": 468870 }, { "epoch": 1.8125589522351595, "grad_norm": 0.1359967589378357, "learning_rate": 0.0003401204863002858, "loss": 2.1704, "step": 468880 }, { "epoch": 1.8125976094385428, "grad_norm": 0.13450078666210175, "learning_rate": 0.0003400000000000001, "loss": 2.1702, "step": 468890 }, { "epoch": 1.812636266641926, "grad_norm": 0.13271546363830566, "learning_rate": 0.0003398795224442295, "loss": 2.1689, "step": 468900 }, { "epoch": 1.8126749238453095, "grad_norm": 0.1268029659986496, "learning_rate": 0.00033975905363106993, "loss": 2.1692, "step": 468910 }, { "epoch": 1.8127135810486927, "grad_norm": 0.14645883440971375, "learning_rate": 0.00033963859355861926, "loss": 2.173, "step": 468920 }, { "epoch": 1.812752238252076, "grad_norm": 0.13335394859313965, "learning_rate": 0.00033951814222497444, "loss": 2.1859, "step": 468930 }, { "epoch": 1.8127908954554592, "grad_norm": 0.15273331105709076, "learning_rate": 0.0003393976996282342, "loss": 2.1771, "step": 468940 }, { "epoch": 1.8128295526588425, "grad_norm": 0.145224928855896, "learning_rate": 0.00033927726576649776, "loss": 2.1615, "step": 468950 }, { "epoch": 1.8128682098622257, "grad_norm": 0.14967118203639984, "learning_rate": 0.00033915684063786446, "loss": 2.1573, "step": 468960 }, { "epoch": 1.812906867065609, "grad_norm": 0.14878715574741364, "learning_rate": 0.0003390364242404351, "loss": 2.1606, "step": 468970 }, { "epoch": 1.8129455242689922, "grad_norm": 0.13415095210075378, "learning_rate": 0.0003389160165723106, "loss": 2.1638, "step": 468980 }, { "epoch": 1.8129841814723755, "grad_norm": 0.13814513385295868, "learning_rate": 0.0003387956176315932, "loss": 2.1731, "step": 468990 }, { "epoch": 1.8130228386757588, "grad_norm": 0.1519918441772461, "learning_rate": 0.0003386752274163851, "loss": 2.1596, "step": 469000 }, { "epoch": 1.813061495879142, "grad_norm": 0.13185402750968933, "learning_rate": 0.00033855484592478933, "loss": 2.1661, "step": 469010 }, { "epoch": 1.8131001530825253, "grad_norm": 0.13390794396400452, "learning_rate": 0.00033843447315491047, "loss": 2.1766, "step": 469020 }, { "epoch": 1.8131388102859087, "grad_norm": 0.14997097849845886, "learning_rate": 0.00033831410910485247, "loss": 2.1679, "step": 469030 }, { "epoch": 1.813177467489292, "grad_norm": 0.14121918380260468, "learning_rate": 0.00033819375377272087, "loss": 2.1631, "step": 469040 }, { "epoch": 1.8132161246926752, "grad_norm": 0.1386699229478836, "learning_rate": 0.0003380734071566218, "loss": 2.1764, "step": 469050 }, { "epoch": 1.8132547818960585, "grad_norm": 0.13607648015022278, "learning_rate": 0.0003379530692546615, "loss": 2.1766, "step": 469060 }, { "epoch": 1.8132934390994417, "grad_norm": 0.1364724189043045, "learning_rate": 0.0003378327400649477, "loss": 2.1688, "step": 469070 }, { "epoch": 1.8133320963028252, "grad_norm": 0.13681846857070923, "learning_rate": 0.00033771241958558806, "loss": 2.1722, "step": 469080 }, { "epoch": 1.8133707535062085, "grad_norm": 0.14456836879253387, "learning_rate": 0.00033759210781469175, "loss": 2.1639, "step": 469090 }, { "epoch": 1.8134094107095917, "grad_norm": 0.13703112304210663, "learning_rate": 0.0003374718047503675, "loss": 2.158, "step": 469100 }, { "epoch": 1.813448067912975, "grad_norm": 0.1477336883544922, "learning_rate": 0.0003373515103907261, "loss": 2.1578, "step": 469110 }, { "epoch": 1.8134867251163582, "grad_norm": 0.145752415060997, "learning_rate": 0.0003372312247338778, "loss": 2.1589, "step": 469120 }, { "epoch": 1.8135253823197415, "grad_norm": 0.14201414585113525, "learning_rate": 0.00033711094777793415, "loss": 2.1756, "step": 469130 }, { "epoch": 1.8135640395231247, "grad_norm": 0.1439470797777176, "learning_rate": 0.0003369906795210076, "loss": 2.1647, "step": 469140 }, { "epoch": 1.813602696726508, "grad_norm": 0.13308900594711304, "learning_rate": 0.0003368704199612107, "loss": 2.1674, "step": 469150 }, { "epoch": 1.8136413539298912, "grad_norm": 0.1444803923368454, "learning_rate": 0.0003367501690966568, "loss": 2.1459, "step": 469160 }, { "epoch": 1.8136800111332745, "grad_norm": 0.13094887137413025, "learning_rate": 0.00033662992692546023, "loss": 2.1732, "step": 469170 }, { "epoch": 1.8137186683366577, "grad_norm": 0.13859966397285461, "learning_rate": 0.0003365096934457359, "loss": 2.1773, "step": 469180 }, { "epoch": 1.813757325540041, "grad_norm": 0.14027716219425201, "learning_rate": 0.00033638946865559906, "loss": 2.1661, "step": 469190 }, { "epoch": 1.8137959827434245, "grad_norm": 0.13711123168468475, "learning_rate": 0.0003362692525531663, "loss": 2.1804, "step": 469200 }, { "epoch": 1.8138346399468077, "grad_norm": 0.1386299878358841, "learning_rate": 0.0003361490451365543, "loss": 2.185, "step": 469210 }, { "epoch": 1.813873297150191, "grad_norm": 0.1353200078010559, "learning_rate": 0.0003360288464038808, "loss": 2.1647, "step": 469220 }, { "epoch": 1.8139119543535742, "grad_norm": 0.13703496754169464, "learning_rate": 0.00033590865635326384, "loss": 2.17, "step": 469230 }, { "epoch": 1.8139506115569575, "grad_norm": 0.142286017537117, "learning_rate": 0.0003357884749828224, "loss": 2.1633, "step": 469240 }, { "epoch": 1.813989268760341, "grad_norm": 0.13622930645942688, "learning_rate": 0.0003356683022906761, "loss": 2.1674, "step": 469250 }, { "epoch": 1.8140279259637242, "grad_norm": 0.23010209202766418, "learning_rate": 0.00033554813827494544, "loss": 2.1682, "step": 469260 }, { "epoch": 1.8140665831671074, "grad_norm": 0.14340844750404358, "learning_rate": 0.0003354279829337512, "loss": 2.1779, "step": 469270 }, { "epoch": 1.8141052403704907, "grad_norm": 0.1332901418209076, "learning_rate": 0.0003353078362652151, "loss": 2.169, "step": 469280 }, { "epoch": 1.814143897573874, "grad_norm": 0.12993653118610382, "learning_rate": 0.00033518769826745934, "loss": 2.1811, "step": 469290 }, { "epoch": 1.8141825547772572, "grad_norm": 0.14110475778579712, "learning_rate": 0.00033506756893860715, "loss": 2.1651, "step": 469300 }, { "epoch": 1.8142212119806405, "grad_norm": 0.14228901267051697, "learning_rate": 0.0003349474482767818, "loss": 2.1658, "step": 469310 }, { "epoch": 1.8142598691840237, "grad_norm": 0.13981829583644867, "learning_rate": 0.0003348273362801082, "loss": 2.1691, "step": 469320 }, { "epoch": 1.814298526387407, "grad_norm": 0.13701368868350983, "learning_rate": 0.0003347072329467109, "loss": 2.1591, "step": 469330 }, { "epoch": 1.8143371835907902, "grad_norm": 0.14498670399188995, "learning_rate": 0.00033458713827471586, "loss": 2.1706, "step": 469340 }, { "epoch": 1.8143758407941735, "grad_norm": 0.1400134116411209, "learning_rate": 0.0003344670522622495, "loss": 2.1715, "step": 469350 }, { "epoch": 1.8144144979975567, "grad_norm": 0.15969963371753693, "learning_rate": 0.0003343469749074388, "loss": 2.1675, "step": 469360 }, { "epoch": 1.8144531552009402, "grad_norm": 0.1523132175207138, "learning_rate": 0.00033422690620841156, "loss": 2.1751, "step": 469370 }, { "epoch": 1.8144918124043234, "grad_norm": 0.1331949532032013, "learning_rate": 0.00033410684616329614, "loss": 2.1861, "step": 469380 }, { "epoch": 1.8145304696077067, "grad_norm": 0.14200444519519806, "learning_rate": 0.0003339867947702215, "loss": 2.1604, "step": 469390 }, { "epoch": 1.81456912681109, "grad_norm": 0.1326495110988617, "learning_rate": 0.00033386675202731774, "loss": 2.1813, "step": 469400 }, { "epoch": 1.8146077840144732, "grad_norm": 0.14433759450912476, "learning_rate": 0.00033374671793271474, "loss": 2.1593, "step": 469410 }, { "epoch": 1.8146464412178567, "grad_norm": 0.1497911512851715, "learning_rate": 0.00033362669248454416, "loss": 2.1541, "step": 469420 }, { "epoch": 1.81468509842124, "grad_norm": 0.14449547231197357, "learning_rate": 0.0003335066756809375, "loss": 2.1869, "step": 469430 }, { "epoch": 1.8147237556246232, "grad_norm": 0.14384634792804718, "learning_rate": 0.00033338666752002723, "loss": 2.1579, "step": 469440 }, { "epoch": 1.8147624128280064, "grad_norm": 0.13797500729560852, "learning_rate": 0.0003332666679999468, "loss": 2.1512, "step": 469450 }, { "epoch": 1.8148010700313897, "grad_norm": 0.1408056914806366, "learning_rate": 0.0003331466771188294, "loss": 2.1769, "step": 469460 }, { "epoch": 1.814839727234773, "grad_norm": 0.1476975828409195, "learning_rate": 0.00033302669487481, "loss": 2.1623, "step": 469470 }, { "epoch": 1.8148783844381562, "grad_norm": 0.14249102771282196, "learning_rate": 0.0003329067212660235, "loss": 2.1705, "step": 469480 }, { "epoch": 1.8149170416415394, "grad_norm": 0.1411779820919037, "learning_rate": 0.0003327867562906057, "loss": 2.1735, "step": 469490 }, { "epoch": 1.8149556988449227, "grad_norm": 0.14658279716968536, "learning_rate": 0.0003326667999466935, "loss": 2.1882, "step": 469500 }, { "epoch": 1.814994356048306, "grad_norm": 0.1431387960910797, "learning_rate": 0.00033254685223242333, "loss": 2.17, "step": 469510 }, { "epoch": 1.8150330132516892, "grad_norm": 0.13615825772285461, "learning_rate": 0.0003324269131459334, "loss": 2.183, "step": 469520 }, { "epoch": 1.8150716704550725, "grad_norm": 0.139374777674675, "learning_rate": 0.00033230698268536243, "loss": 2.1777, "step": 469530 }, { "epoch": 1.815110327658456, "grad_norm": 0.13844828307628632, "learning_rate": 0.0003321870608488493, "loss": 2.166, "step": 469540 }, { "epoch": 1.8151489848618392, "grad_norm": 0.14447201788425446, "learning_rate": 0.00033206714763453385, "loss": 2.1627, "step": 469550 }, { "epoch": 1.8151876420652224, "grad_norm": 0.14412400126457214, "learning_rate": 0.00033194724304055656, "loss": 2.1684, "step": 469560 }, { "epoch": 1.8152262992686057, "grad_norm": 0.1350231021642685, "learning_rate": 0.0003318273470650588, "loss": 2.1779, "step": 469570 }, { "epoch": 1.8152649564719892, "grad_norm": 0.14220920205116272, "learning_rate": 0.00033170745970618223, "loss": 2.1611, "step": 469580 }, { "epoch": 1.8153036136753724, "grad_norm": 0.13567551970481873, "learning_rate": 0.0003315875809620692, "loss": 2.1804, "step": 469590 }, { "epoch": 1.8153422708787557, "grad_norm": 0.14831498265266418, "learning_rate": 0.00033146771083086325, "loss": 2.165, "step": 469600 }, { "epoch": 1.815380928082139, "grad_norm": 0.15242017805576324, "learning_rate": 0.0003313478493107078, "loss": 2.1671, "step": 469610 }, { "epoch": 1.8154195852855222, "grad_norm": 0.1507246196269989, "learning_rate": 0.0003312279963997478, "loss": 2.1537, "step": 469620 }, { "epoch": 1.8154582424889054, "grad_norm": 0.13301242887973785, "learning_rate": 0.0003311081520961281, "loss": 2.1598, "step": 469630 }, { "epoch": 1.8154968996922887, "grad_norm": 0.14124217629432678, "learning_rate": 0.00033098831639799477, "loss": 2.1686, "step": 469640 }, { "epoch": 1.815535556895672, "grad_norm": 0.138587087392807, "learning_rate": 0.0003308684893034941, "loss": 2.171, "step": 469650 }, { "epoch": 1.8155742140990552, "grad_norm": 0.147024005651474, "learning_rate": 0.0003307486708107734, "loss": 2.1485, "step": 469660 }, { "epoch": 1.8156128713024384, "grad_norm": 0.14949698746204376, "learning_rate": 0.00033062886091798037, "loss": 2.1686, "step": 469670 }, { "epoch": 1.8156515285058217, "grad_norm": 0.13735251128673553, "learning_rate": 0.0003305090596232636, "loss": 2.1759, "step": 469680 }, { "epoch": 1.815690185709205, "grad_norm": 0.13064594566822052, "learning_rate": 0.0003303892669247721, "loss": 2.1772, "step": 469690 }, { "epoch": 1.8157288429125882, "grad_norm": 0.15359649062156677, "learning_rate": 0.00033026948282065586, "loss": 2.1486, "step": 469700 }, { "epoch": 1.8157675001159717, "grad_norm": 0.14719156920909882, "learning_rate": 0.00033014970730906533, "loss": 2.1517, "step": 469710 }, { "epoch": 1.815806157319355, "grad_norm": 0.14220820367336273, "learning_rate": 0.0003300299403881517, "loss": 2.1609, "step": 469720 }, { "epoch": 1.8158448145227382, "grad_norm": 0.13633151352405548, "learning_rate": 0.00032991018205606683, "loss": 2.1561, "step": 469730 }, { "epoch": 1.8158834717261214, "grad_norm": 0.14592762291431427, "learning_rate": 0.00032979043231096285, "loss": 2.1697, "step": 469740 }, { "epoch": 1.8159221289295049, "grad_norm": 0.13614079356193542, "learning_rate": 0.0003296706911509935, "loss": 2.1678, "step": 469750 }, { "epoch": 1.8159607861328881, "grad_norm": 0.1432075947523117, "learning_rate": 0.00032955095857431195, "loss": 2.1694, "step": 469760 }, { "epoch": 1.8159994433362714, "grad_norm": 0.1378885954618454, "learning_rate": 0.0003294312345790731, "loss": 2.1932, "step": 469770 }, { "epoch": 1.8160381005396546, "grad_norm": 0.14316721260547638, "learning_rate": 0.00032931151916343196, "loss": 2.1704, "step": 469780 }, { "epoch": 1.816076757743038, "grad_norm": 0.1418437510728836, "learning_rate": 0.0003291918123255442, "loss": 2.1502, "step": 469790 }, { "epoch": 1.8161154149464211, "grad_norm": 0.14487609267234802, "learning_rate": 0.0003290721140635662, "loss": 2.1786, "step": 469800 }, { "epoch": 1.8161540721498044, "grad_norm": 0.13837496936321259, "learning_rate": 0.0003289524243756552, "loss": 2.1862, "step": 469810 }, { "epoch": 1.8161927293531877, "grad_norm": 0.14798998832702637, "learning_rate": 0.00032883274325996913, "loss": 2.1732, "step": 469820 }, { "epoch": 1.816231386556571, "grad_norm": 0.1464812457561493, "learning_rate": 0.0003287130707146662, "loss": 2.173, "step": 469830 }, { "epoch": 1.8162700437599542, "grad_norm": 0.1402280181646347, "learning_rate": 0.00032859340673790574, "loss": 2.1594, "step": 469840 }, { "epoch": 1.8163087009633374, "grad_norm": 0.13324207067489624, "learning_rate": 0.0003284737513278471, "loss": 2.1681, "step": 469850 }, { "epoch": 1.8163473581667207, "grad_norm": 0.1384410262107849, "learning_rate": 0.00032835410448265104, "loss": 2.1794, "step": 469860 }, { "epoch": 1.816386015370104, "grad_norm": 0.13632450997829437, "learning_rate": 0.0003282344662004781, "loss": 2.1626, "step": 469870 }, { "epoch": 1.8164246725734874, "grad_norm": 0.14713577926158905, "learning_rate": 0.0003281148364794908, "loss": 2.1591, "step": 469880 }, { "epoch": 1.8164633297768706, "grad_norm": 0.13500018417835236, "learning_rate": 0.00032799521531785093, "loss": 2.1738, "step": 469890 }, { "epoch": 1.816501986980254, "grad_norm": 0.12875080108642578, "learning_rate": 0.00032787560271372153, "loss": 2.173, "step": 469900 }, { "epoch": 1.8165406441836371, "grad_norm": 0.1497087925672531, "learning_rate": 0.0003277559986652665, "loss": 2.1654, "step": 469910 }, { "epoch": 1.8165793013870206, "grad_norm": 0.14243261516094208, "learning_rate": 0.00032763640317065026, "loss": 2.1692, "step": 469920 }, { "epoch": 1.8166179585904039, "grad_norm": 0.14686591923236847, "learning_rate": 0.00032751681622803755, "loss": 2.1697, "step": 469930 }, { "epoch": 1.8166566157937871, "grad_norm": 0.1393844187259674, "learning_rate": 0.00032739723783559404, "loss": 2.1706, "step": 469940 }, { "epoch": 1.8166952729971704, "grad_norm": 0.15019193291664124, "learning_rate": 0.0003272776679914864, "loss": 2.1591, "step": 469950 }, { "epoch": 1.8167339302005536, "grad_norm": 0.15770284831523895, "learning_rate": 0.00032715810669388135, "loss": 2.1699, "step": 469960 }, { "epoch": 1.8167725874039369, "grad_norm": 0.16133318841457367, "learning_rate": 0.00032703855394094637, "loss": 2.1562, "step": 469970 }, { "epoch": 1.8168112446073201, "grad_norm": 0.14682245254516602, "learning_rate": 0.00032691900973084987, "loss": 2.1715, "step": 469980 }, { "epoch": 1.8168499018107034, "grad_norm": 0.1428011804819107, "learning_rate": 0.00032679947406176105, "loss": 2.1799, "step": 469990 }, { "epoch": 1.8168885590140866, "grad_norm": 0.14194734394550323, "learning_rate": 0.00032667994693184887, "loss": 2.1743, "step": 470000 }, { "epoch": 1.8169272162174699, "grad_norm": 0.14284037053585052, "learning_rate": 0.0003265604283392842, "loss": 2.1708, "step": 470010 }, { "epoch": 1.8169658734208531, "grad_norm": 0.16350673139095306, "learning_rate": 0.00032644091828223763, "loss": 2.1756, "step": 470020 }, { "epoch": 1.8170045306242364, "grad_norm": 0.1494877189397812, "learning_rate": 0.00032632141675888085, "loss": 2.1886, "step": 470030 }, { "epoch": 1.8170431878276196, "grad_norm": 0.15065732598304749, "learning_rate": 0.0003262019237673859, "loss": 2.1627, "step": 470040 }, { "epoch": 1.8170818450310031, "grad_norm": 0.15042832493782043, "learning_rate": 0.0003260824393059256, "loss": 2.1633, "step": 470050 }, { "epoch": 1.8171205022343864, "grad_norm": 0.14380717277526855, "learning_rate": 0.00032596296337267374, "loss": 2.1701, "step": 470060 }, { "epoch": 1.8171591594377696, "grad_norm": 0.13390575349330902, "learning_rate": 0.0003258434959658043, "loss": 2.1673, "step": 470070 }, { "epoch": 1.8171978166411529, "grad_norm": 0.14788520336151123, "learning_rate": 0.000325724037083492, "loss": 2.191, "step": 470080 }, { "epoch": 1.8172364738445363, "grad_norm": 0.14955569803714752, "learning_rate": 0.00032560458672391257, "loss": 2.1561, "step": 470090 }, { "epoch": 1.8172751310479196, "grad_norm": 0.1507478654384613, "learning_rate": 0.00032548514488524186, "loss": 2.1806, "step": 470100 }, { "epoch": 1.8173137882513029, "grad_norm": 0.15433341264724731, "learning_rate": 0.00032536571156565653, "loss": 2.1841, "step": 470110 }, { "epoch": 1.817352445454686, "grad_norm": 0.14390775561332703, "learning_rate": 0.0003252462867633341, "loss": 2.1729, "step": 470120 }, { "epoch": 1.8173911026580694, "grad_norm": 0.15695568919181824, "learning_rate": 0.0003251268704764529, "loss": 2.1697, "step": 470130 }, { "epoch": 1.8174297598614526, "grad_norm": 0.13125157356262207, "learning_rate": 0.0003250074627031916, "loss": 2.1614, "step": 470140 }, { "epoch": 1.8174684170648359, "grad_norm": 0.13682635128498077, "learning_rate": 0.00032488806344172926, "loss": 2.1589, "step": 470150 }, { "epoch": 1.8175070742682191, "grad_norm": 0.12663866579532623, "learning_rate": 0.00032476867269024614, "loss": 2.1648, "step": 470160 }, { "epoch": 1.8175457314716024, "grad_norm": 0.13196326792240143, "learning_rate": 0.00032464929044692247, "loss": 2.1604, "step": 470170 }, { "epoch": 1.8175843886749856, "grad_norm": 0.13578322529792786, "learning_rate": 0.00032452991670994027, "loss": 2.1697, "step": 470180 }, { "epoch": 1.8176230458783689, "grad_norm": 0.13914400339126587, "learning_rate": 0.0003244105514774809, "loss": 2.1602, "step": 470190 }, { "epoch": 1.8176617030817521, "grad_norm": 0.13743595778942108, "learning_rate": 0.00032429119474772716, "loss": 2.1734, "step": 470200 }, { "epoch": 1.8177003602851354, "grad_norm": 0.129056915640831, "learning_rate": 0.00032417184651886233, "loss": 2.1721, "step": 470210 }, { "epoch": 1.8177390174885188, "grad_norm": 0.14306758344173431, "learning_rate": 0.00032405250678907004, "loss": 2.1706, "step": 470220 }, { "epoch": 1.817777674691902, "grad_norm": 0.1493530571460724, "learning_rate": 0.00032393317555653534, "loss": 2.1721, "step": 470230 }, { "epoch": 1.8178163318952854, "grad_norm": 0.13991063833236694, "learning_rate": 0.000323813852819443, "loss": 2.1594, "step": 470240 }, { "epoch": 1.8178549890986686, "grad_norm": 0.14696717262268066, "learning_rate": 0.0003236945385759791, "loss": 2.1738, "step": 470250 }, { "epoch": 1.817893646302052, "grad_norm": 0.15113307535648346, "learning_rate": 0.0003235752328243298, "loss": 2.1661, "step": 470260 }, { "epoch": 1.8179323035054353, "grad_norm": 0.13656733930110931, "learning_rate": 0.0003234559355626825, "loss": 2.1601, "step": 470270 }, { "epoch": 1.8179709607088186, "grad_norm": 0.15864388644695282, "learning_rate": 0.0003233366467892251, "loss": 2.1721, "step": 470280 }, { "epoch": 1.8180096179122018, "grad_norm": 0.13841305673122406, "learning_rate": 0.00032321736650214563, "loss": 2.1593, "step": 470290 }, { "epoch": 1.818048275115585, "grad_norm": 0.15041330456733704, "learning_rate": 0.00032309809469963335, "loss": 2.1737, "step": 470300 }, { "epoch": 1.8180869323189683, "grad_norm": 0.14067049324512482, "learning_rate": 0.00032297883137987805, "loss": 2.1657, "step": 470310 }, { "epoch": 1.8181255895223516, "grad_norm": 0.13357412815093994, "learning_rate": 0.00032285957654106954, "loss": 2.1665, "step": 470320 }, { "epoch": 1.8181642467257348, "grad_norm": 0.14067021012306213, "learning_rate": 0.0003227403301813996, "loss": 2.1581, "step": 470330 }, { "epoch": 1.818202903929118, "grad_norm": 0.15399391949176788, "learning_rate": 0.00032262109229905955, "loss": 2.1756, "step": 470340 }, { "epoch": 1.8182415611325013, "grad_norm": 0.13756081461906433, "learning_rate": 0.0003225018628922416, "loss": 2.186, "step": 470350 }, { "epoch": 1.8182802183358846, "grad_norm": 0.14441081881523132, "learning_rate": 0.0003223826419591385, "loss": 2.1658, "step": 470360 }, { "epoch": 1.8183188755392679, "grad_norm": 0.13834241032600403, "learning_rate": 0.0003222634294979441, "loss": 2.1676, "step": 470370 }, { "epoch": 1.818357532742651, "grad_norm": 0.15014494955539703, "learning_rate": 0.0003221442255068525, "loss": 2.1694, "step": 470380 }, { "epoch": 1.8183961899460346, "grad_norm": 0.14847639203071594, "learning_rate": 0.00032202502998405857, "loss": 2.1727, "step": 470390 }, { "epoch": 1.8184348471494178, "grad_norm": 0.1385437697172165, "learning_rate": 0.0003219058429277575, "loss": 2.1695, "step": 470400 }, { "epoch": 1.818473504352801, "grad_norm": 0.14720351994037628, "learning_rate": 0.000321786664336146, "loss": 2.1796, "step": 470410 }, { "epoch": 1.8185121615561843, "grad_norm": 0.15029869973659515, "learning_rate": 0.00032166749420742003, "loss": 2.1467, "step": 470420 }, { "epoch": 1.8185508187595678, "grad_norm": 0.14885807037353516, "learning_rate": 0.0003215483325397779, "loss": 2.1716, "step": 470430 }, { "epoch": 1.818589475962951, "grad_norm": 0.14331059157848358, "learning_rate": 0.00032142917933141704, "loss": 2.1625, "step": 470440 }, { "epoch": 1.8186281331663343, "grad_norm": 0.14600177109241486, "learning_rate": 0.0003213100345805364, "loss": 2.1658, "step": 470450 }, { "epoch": 1.8186667903697176, "grad_norm": 0.14313966035842896, "learning_rate": 0.0003211908982853351, "loss": 2.1583, "step": 470460 }, { "epoch": 1.8187054475731008, "grad_norm": 0.13352936506271362, "learning_rate": 0.00032107177044401336, "loss": 2.1777, "step": 470470 }, { "epoch": 1.818744104776484, "grad_norm": 0.15081366896629333, "learning_rate": 0.00032095265105477156, "loss": 2.1828, "step": 470480 }, { "epoch": 1.8187827619798673, "grad_norm": 0.15778934955596924, "learning_rate": 0.0003208335401158109, "loss": 2.175, "step": 470490 }, { "epoch": 1.8188214191832506, "grad_norm": 0.15886171162128448, "learning_rate": 0.0003207144376253335, "loss": 2.1649, "step": 470500 }, { "epoch": 1.8188600763866338, "grad_norm": 0.14672130346298218, "learning_rate": 0.00032059534358154187, "loss": 2.1552, "step": 470510 }, { "epoch": 1.818898733590017, "grad_norm": 0.14366315305233002, "learning_rate": 0.0003204762579826386, "loss": 2.166, "step": 470520 }, { "epoch": 1.8189373907934003, "grad_norm": 0.1349872350692749, "learning_rate": 0.0003203571808268282, "loss": 2.1711, "step": 470530 }, { "epoch": 1.8189760479967836, "grad_norm": 0.14480504393577576, "learning_rate": 0.0003202381121123148, "loss": 2.1762, "step": 470540 }, { "epoch": 1.8190147052001668, "grad_norm": 0.14662893116474152, "learning_rate": 0.0003201190518373036, "loss": 2.1742, "step": 470550 }, { "epoch": 1.8190533624035503, "grad_norm": 0.1451336145401001, "learning_rate": 0.0003200000000000001, "loss": 2.1774, "step": 470560 }, { "epoch": 1.8190920196069336, "grad_norm": 0.1518847644329071, "learning_rate": 0.00031988095659861047, "loss": 2.1701, "step": 470570 }, { "epoch": 1.8191306768103168, "grad_norm": 0.16146059334278107, "learning_rate": 0.0003197619216313421, "loss": 2.1677, "step": 470580 }, { "epoch": 1.8191693340137, "grad_norm": 0.15619412064552307, "learning_rate": 0.0003196428950964025, "loss": 2.1668, "step": 470590 }, { "epoch": 1.8192079912170835, "grad_norm": 0.13538570702075958, "learning_rate": 0.0003195238769919997, "loss": 2.1662, "step": 470600 }, { "epoch": 1.8192466484204668, "grad_norm": 0.15582145750522614, "learning_rate": 0.0003194048673163428, "loss": 2.1905, "step": 470610 }, { "epoch": 1.81928530562385, "grad_norm": 0.14375025033950806, "learning_rate": 0.00031928586606764077, "loss": 2.1657, "step": 470620 }, { "epoch": 1.8193239628272333, "grad_norm": 0.14095117151737213, "learning_rate": 0.00031916687324410464, "loss": 2.1675, "step": 470630 }, { "epoch": 1.8193626200306166, "grad_norm": 0.1542339324951172, "learning_rate": 0.00031904788884394456, "loss": 2.1613, "step": 470640 }, { "epoch": 1.8194012772339998, "grad_norm": 0.1529356837272644, "learning_rate": 0.00031892891286537206, "loss": 2.1627, "step": 470650 }, { "epoch": 1.819439934437383, "grad_norm": 0.14034295082092285, "learning_rate": 0.00031880994530659914, "loss": 2.1585, "step": 470660 }, { "epoch": 1.8194785916407663, "grad_norm": 0.14266914129257202, "learning_rate": 0.0003186909861658387, "loss": 2.1699, "step": 470670 }, { "epoch": 1.8195172488441496, "grad_norm": 0.14826495945453644, "learning_rate": 0.00031857203544130374, "loss": 2.1701, "step": 470680 }, { "epoch": 1.8195559060475328, "grad_norm": 0.13871616125106812, "learning_rate": 0.00031845309313120865, "loss": 2.1656, "step": 470690 }, { "epoch": 1.819594563250916, "grad_norm": 0.14421504735946655, "learning_rate": 0.0003183341592337674, "loss": 2.1667, "step": 470700 }, { "epoch": 1.8196332204542993, "grad_norm": 0.13885298371315002, "learning_rate": 0.00031821523374719575, "loss": 2.143, "step": 470710 }, { "epoch": 1.8196718776576826, "grad_norm": 0.16003422439098358, "learning_rate": 0.0003180963166697088, "loss": 2.163, "step": 470720 }, { "epoch": 1.819710534861066, "grad_norm": 0.15020252764225006, "learning_rate": 0.0003179774079995239, "loss": 2.1784, "step": 470730 }, { "epoch": 1.8197491920644493, "grad_norm": 0.14752747118473053, "learning_rate": 0.0003178585077348577, "loss": 2.16, "step": 470740 }, { "epoch": 1.8197878492678325, "grad_norm": 0.14104293286800385, "learning_rate": 0.00031773961587392785, "loss": 2.1707, "step": 470750 }, { "epoch": 1.8198265064712158, "grad_norm": 0.141326904296875, "learning_rate": 0.0003176207324149527, "loss": 2.1736, "step": 470760 }, { "epoch": 1.8198651636745993, "grad_norm": 0.14769048988819122, "learning_rate": 0.00031750185735615144, "loss": 2.1854, "step": 470770 }, { "epoch": 1.8199038208779825, "grad_norm": 0.14957557618618011, "learning_rate": 0.0003173829906957437, "loss": 2.182, "step": 470780 }, { "epoch": 1.8199424780813658, "grad_norm": 0.1409343183040619, "learning_rate": 0.0003172641324319494, "loss": 2.1707, "step": 470790 }, { "epoch": 1.819981135284749, "grad_norm": 0.14448200166225433, "learning_rate": 0.0003171452825629897, "loss": 2.1599, "step": 470800 }, { "epoch": 1.8200197924881323, "grad_norm": 0.13009198009967804, "learning_rate": 0.000317026441087086, "loss": 2.1677, "step": 470810 }, { "epoch": 1.8200584496915155, "grad_norm": 0.1416071653366089, "learning_rate": 0.00031690760800246023, "loss": 2.1593, "step": 470820 }, { "epoch": 1.8200971068948988, "grad_norm": 0.1369924396276474, "learning_rate": 0.00031678878330733553, "loss": 2.167, "step": 470830 }, { "epoch": 1.820135764098282, "grad_norm": 0.13908712565898895, "learning_rate": 0.0003166699669999349, "loss": 2.1813, "step": 470840 }, { "epoch": 1.8201744213016653, "grad_norm": 0.13555458188056946, "learning_rate": 0.00031655115907848263, "loss": 2.1665, "step": 470850 }, { "epoch": 1.8202130785050485, "grad_norm": 0.15544474124908447, "learning_rate": 0.0003164323595412033, "loss": 2.1582, "step": 470860 }, { "epoch": 1.8202517357084318, "grad_norm": 0.13911612331867218, "learning_rate": 0.0003163135683863221, "loss": 2.1752, "step": 470870 }, { "epoch": 1.820290392911815, "grad_norm": 0.14102821052074432, "learning_rate": 0.00031619478561206504, "loss": 2.1728, "step": 470880 }, { "epoch": 1.8203290501151985, "grad_norm": 0.1417911797761917, "learning_rate": 0.0003160760112166583, "loss": 2.1681, "step": 470890 }, { "epoch": 1.8203677073185818, "grad_norm": 0.1564634144306183, "learning_rate": 0.00031595724519832926, "loss": 2.1569, "step": 470900 }, { "epoch": 1.820406364521965, "grad_norm": 0.15098914504051208, "learning_rate": 0.00031583848755530573, "loss": 2.1558, "step": 470910 }, { "epoch": 1.8204450217253483, "grad_norm": 0.1503758728504181, "learning_rate": 0.0003157197382858159, "loss": 2.1759, "step": 470920 }, { "epoch": 1.8204836789287315, "grad_norm": 0.1641359031200409, "learning_rate": 0.00031560099738808914, "loss": 2.1705, "step": 470930 }, { "epoch": 1.820522336132115, "grad_norm": 0.13702145218849182, "learning_rate": 0.0003154822648603548, "loss": 2.176, "step": 470940 }, { "epoch": 1.8205609933354983, "grad_norm": 0.1344425529241562, "learning_rate": 0.0003153635407008431, "loss": 2.1762, "step": 470950 }, { "epoch": 1.8205996505388815, "grad_norm": 0.14092884957790375, "learning_rate": 0.00031524482490778504, "loss": 2.1645, "step": 470960 }, { "epoch": 1.8206383077422648, "grad_norm": 0.14377425611019135, "learning_rate": 0.000315126117479412, "loss": 2.1598, "step": 470970 }, { "epoch": 1.820676964945648, "grad_norm": 0.16156238317489624, "learning_rate": 0.00031500741841395644, "loss": 2.1744, "step": 470980 }, { "epoch": 1.8207156221490313, "grad_norm": 0.14735539257526398, "learning_rate": 0.0003148887277096506, "loss": 2.1714, "step": 470990 }, { "epoch": 1.8207542793524145, "grad_norm": 0.13479958474636078, "learning_rate": 0.00031477004536472844, "loss": 2.1592, "step": 471000 }, { "epoch": 1.8207929365557978, "grad_norm": 0.14318402111530304, "learning_rate": 0.0003146513713774233, "loss": 2.1753, "step": 471010 }, { "epoch": 1.820831593759181, "grad_norm": 0.1418886035680771, "learning_rate": 0.00031453270574597014, "loss": 2.1701, "step": 471020 }, { "epoch": 1.8208702509625643, "grad_norm": 0.155859112739563, "learning_rate": 0.00031441404846860446, "loss": 2.1639, "step": 471030 }, { "epoch": 1.8209089081659475, "grad_norm": 0.15436893701553345, "learning_rate": 0.0003142953995435618, "loss": 2.1652, "step": 471040 }, { "epoch": 1.8209475653693308, "grad_norm": 0.13911384344100952, "learning_rate": 0.0003141767589690787, "loss": 2.158, "step": 471050 }, { "epoch": 1.8209862225727143, "grad_norm": 0.14979267120361328, "learning_rate": 0.0003140581267433922, "loss": 2.1578, "step": 471060 }, { "epoch": 1.8210248797760975, "grad_norm": 0.14796827733516693, "learning_rate": 0.00031393950286474003, "loss": 2.1664, "step": 471070 }, { "epoch": 1.8210635369794808, "grad_norm": 0.1441972851753235, "learning_rate": 0.0003138208873313606, "loss": 2.1759, "step": 471080 }, { "epoch": 1.821102194182864, "grad_norm": 0.1467493325471878, "learning_rate": 0.000313702280141493, "loss": 2.1713, "step": 471090 }, { "epoch": 1.8211408513862473, "grad_norm": 0.14196404814720154, "learning_rate": 0.00031358368129337657, "loss": 2.1555, "step": 471100 }, { "epoch": 1.8211795085896307, "grad_norm": 0.14079461991786957, "learning_rate": 0.0003134650907852516, "loss": 2.1676, "step": 471110 }, { "epoch": 1.821218165793014, "grad_norm": 0.14870962500572205, "learning_rate": 0.00031334650861535864, "loss": 2.1714, "step": 471120 }, { "epoch": 1.8212568229963972, "grad_norm": 0.14794059097766876, "learning_rate": 0.0003132279347819398, "loss": 2.1574, "step": 471130 }, { "epoch": 1.8212954801997805, "grad_norm": 0.15221253037452698, "learning_rate": 0.00031310936928323653, "loss": 2.1821, "step": 471140 }, { "epoch": 1.8213341374031637, "grad_norm": 0.14122171700000763, "learning_rate": 0.0003129908121174918, "loss": 2.1542, "step": 471150 }, { "epoch": 1.821372794606547, "grad_norm": 0.14007976651191711, "learning_rate": 0.0003128722632829486, "loss": 2.1689, "step": 471160 }, { "epoch": 1.8214114518099302, "grad_norm": 0.13979344069957733, "learning_rate": 0.00031275372277785117, "loss": 2.1767, "step": 471170 }, { "epoch": 1.8214501090133135, "grad_norm": 0.14779804646968842, "learning_rate": 0.0003126351906004441, "loss": 2.1651, "step": 471180 }, { "epoch": 1.8214887662166968, "grad_norm": 0.14292143285274506, "learning_rate": 0.00031251666674897205, "loss": 2.1753, "step": 471190 }, { "epoch": 1.82152742342008, "grad_norm": 0.14197584986686707, "learning_rate": 0.00031239815122168115, "loss": 2.1574, "step": 471200 }, { "epoch": 1.8215660806234633, "grad_norm": 0.13902144134044647, "learning_rate": 0.0003122796440168176, "loss": 2.1556, "step": 471210 }, { "epoch": 1.8216047378268465, "grad_norm": 0.15372341871261597, "learning_rate": 0.0003121611451326285, "loss": 2.1737, "step": 471220 }, { "epoch": 1.82164339503023, "grad_norm": 0.14466264843940735, "learning_rate": 0.00031204265456736136, "loss": 2.1771, "step": 471230 }, { "epoch": 1.8216820522336132, "grad_norm": 0.13506931066513062, "learning_rate": 0.00031192417231926453, "loss": 2.1691, "step": 471240 }, { "epoch": 1.8217207094369965, "grad_norm": 0.14005401730537415, "learning_rate": 0.0003118056983865867, "loss": 2.1636, "step": 471250 }, { "epoch": 1.8217593666403797, "grad_norm": 0.16184751689434052, "learning_rate": 0.0003116872327675775, "loss": 2.1734, "step": 471260 }, { "epoch": 1.821798023843763, "grad_norm": 0.13978254795074463, "learning_rate": 0.0003115687754604868, "loss": 2.1618, "step": 471270 }, { "epoch": 1.8218366810471465, "grad_norm": 0.13510258495807648, "learning_rate": 0.0003114503264635653, "loss": 2.173, "step": 471280 }, { "epoch": 1.8218753382505297, "grad_norm": 0.14572641253471375, "learning_rate": 0.00031133188577506444, "loss": 2.1782, "step": 471290 }, { "epoch": 1.821913995453913, "grad_norm": 0.15020966529846191, "learning_rate": 0.00031121345339323606, "loss": 2.164, "step": 471300 }, { "epoch": 1.8219526526572962, "grad_norm": 0.15157094597816467, "learning_rate": 0.0003110950293163326, "loss": 2.1667, "step": 471310 }, { "epoch": 1.8219913098606795, "grad_norm": 0.15115141868591309, "learning_rate": 0.0003109766135426071, "loss": 2.1664, "step": 471320 }, { "epoch": 1.8220299670640627, "grad_norm": 0.14831262826919556, "learning_rate": 0.00031085820607031333, "loss": 2.1798, "step": 471330 }, { "epoch": 1.822068624267446, "grad_norm": 0.14008532464504242, "learning_rate": 0.00031073980689770586, "loss": 2.1711, "step": 471340 }, { "epoch": 1.8221072814708292, "grad_norm": 0.14013555645942688, "learning_rate": 0.0003106214160230396, "loss": 2.1672, "step": 471350 }, { "epoch": 1.8221459386742125, "grad_norm": 0.14341752231121063, "learning_rate": 0.00031050303344457, "loss": 2.1691, "step": 471360 }, { "epoch": 1.8221845958775957, "grad_norm": 0.14467793703079224, "learning_rate": 0.00031038465916055326, "loss": 2.1627, "step": 471370 }, { "epoch": 1.822223253080979, "grad_norm": 0.13100607693195343, "learning_rate": 0.00031026629316924616, "loss": 2.1567, "step": 471380 }, { "epoch": 1.8222619102843622, "grad_norm": 0.13663366436958313, "learning_rate": 0.0003101479354689063, "loss": 2.1653, "step": 471390 }, { "epoch": 1.8223005674877457, "grad_norm": 0.13873040676116943, "learning_rate": 0.0003100295860577913, "loss": 2.1536, "step": 471400 }, { "epoch": 1.822339224691129, "grad_norm": 0.1375930905342102, "learning_rate": 0.00030991124493416036, "loss": 2.1583, "step": 471410 }, { "epoch": 1.8223778818945122, "grad_norm": 0.14047600328922272, "learning_rate": 0.00030979291209627213, "loss": 2.1596, "step": 471420 }, { "epoch": 1.8224165390978955, "grad_norm": 0.14657500386238098, "learning_rate": 0.0003096745875423868, "loss": 2.182, "step": 471430 }, { "epoch": 1.822455196301279, "grad_norm": 0.1562751680612564, "learning_rate": 0.0003095562712707649, "loss": 2.1657, "step": 471440 }, { "epoch": 1.8224938535046622, "grad_norm": 0.1424303650856018, "learning_rate": 0.0003094379632796671, "loss": 2.1518, "step": 471450 }, { "epoch": 1.8225325107080455, "grad_norm": 0.15109124779701233, "learning_rate": 0.0003093196635673554, "loss": 2.1767, "step": 471460 }, { "epoch": 1.8225711679114287, "grad_norm": 0.15089304745197296, "learning_rate": 0.0003092013721320921, "loss": 2.1718, "step": 471470 }, { "epoch": 1.822609825114812, "grad_norm": 0.13845579326152802, "learning_rate": 0.00030908308897213986, "loss": 2.1584, "step": 471480 }, { "epoch": 1.8226484823181952, "grad_norm": 0.15574099123477936, "learning_rate": 0.0003089648140857624, "loss": 2.163, "step": 471490 }, { "epoch": 1.8226871395215785, "grad_norm": 0.1501399129629135, "learning_rate": 0.0003088465474712237, "loss": 2.1597, "step": 471500 }, { "epoch": 1.8227257967249617, "grad_norm": 0.19525304436683655, "learning_rate": 0.0003087282891267886, "loss": 2.1577, "step": 471510 }, { "epoch": 1.822764453928345, "grad_norm": 0.14497607946395874, "learning_rate": 0.00030861003905072204, "loss": 2.161, "step": 471520 }, { "epoch": 1.8228031111317282, "grad_norm": 0.13903570175170898, "learning_rate": 0.0003084917972412904, "loss": 2.1695, "step": 471530 }, { "epoch": 1.8228417683351115, "grad_norm": 0.1286279857158661, "learning_rate": 0.00030837356369676, "loss": 2.176, "step": 471540 }, { "epoch": 1.8228804255384947, "grad_norm": 0.7849547266960144, "learning_rate": 0.0003082553384153979, "loss": 2.1549, "step": 471550 }, { "epoch": 1.822919082741878, "grad_norm": 0.1463264673948288, "learning_rate": 0.000308137121395472, "loss": 2.154, "step": 471560 }, { "epoch": 1.8229577399452614, "grad_norm": 0.14043350517749786, "learning_rate": 0.0003080189126352506, "loss": 2.1559, "step": 471570 }, { "epoch": 1.8229963971486447, "grad_norm": 0.13505004346370697, "learning_rate": 0.00030790071213300264, "loss": 2.1785, "step": 471580 }, { "epoch": 1.823035054352028, "grad_norm": 0.1397033929824829, "learning_rate": 0.0003077825198869975, "loss": 2.151, "step": 471590 }, { "epoch": 1.8230737115554112, "grad_norm": 0.16114629805088043, "learning_rate": 0.00030766433589550556, "loss": 2.1602, "step": 471600 }, { "epoch": 1.8231123687587947, "grad_norm": 0.1364261955022812, "learning_rate": 0.0003075461601567977, "loss": 2.16, "step": 471610 }, { "epoch": 1.823151025962178, "grad_norm": 0.1511559635400772, "learning_rate": 0.000307427992669145, "loss": 2.1632, "step": 471620 }, { "epoch": 1.8231896831655612, "grad_norm": 0.1392008364200592, "learning_rate": 0.00030730983343081955, "loss": 2.1617, "step": 471630 }, { "epoch": 1.8232283403689444, "grad_norm": 0.14022274315357208, "learning_rate": 0.00030719168244009375, "loss": 2.1701, "step": 471640 }, { "epoch": 1.8232669975723277, "grad_norm": 0.14056052267551422, "learning_rate": 0.0003070735396952411, "loss": 2.1341, "step": 471650 }, { "epoch": 1.823305654775711, "grad_norm": 0.15311531722545624, "learning_rate": 0.00030695540519453535, "loss": 2.1791, "step": 471660 }, { "epoch": 1.8233443119790942, "grad_norm": 0.14211353659629822, "learning_rate": 0.0003068372789362506, "loss": 2.1611, "step": 471670 }, { "epoch": 1.8233829691824774, "grad_norm": 0.14266042411327362, "learning_rate": 0.00030671916091866213, "loss": 2.1623, "step": 471680 }, { "epoch": 1.8234216263858607, "grad_norm": 0.1391574889421463, "learning_rate": 0.0003066010511400452, "loss": 2.1728, "step": 471690 }, { "epoch": 1.823460283589244, "grad_norm": 0.13404053449630737, "learning_rate": 0.0003064829495986763, "loss": 2.1765, "step": 471700 }, { "epoch": 1.8234989407926272, "grad_norm": 0.148732990026474, "learning_rate": 0.00030636485629283183, "loss": 2.1709, "step": 471710 }, { "epoch": 1.8235375979960105, "grad_norm": 0.15154549479484558, "learning_rate": 0.0003062467712207897, "loss": 2.1499, "step": 471720 }, { "epoch": 1.8235762551993937, "grad_norm": 0.14113524556159973, "learning_rate": 0.0003061286943808275, "loss": 2.1826, "step": 471730 }, { "epoch": 1.8236149124027772, "grad_norm": 0.15453845262527466, "learning_rate": 0.00030601062577122407, "loss": 2.165, "step": 471740 }, { "epoch": 1.8236535696061604, "grad_norm": 0.1487610638141632, "learning_rate": 0.0003058925653902585, "loss": 2.1573, "step": 471750 }, { "epoch": 1.8236922268095437, "grad_norm": 0.14542990922927856, "learning_rate": 0.0003057745132362104, "loss": 2.167, "step": 471760 }, { "epoch": 1.823730884012927, "grad_norm": 0.14575399458408356, "learning_rate": 0.00030565646930736024, "loss": 2.1603, "step": 471770 }, { "epoch": 1.8237695412163104, "grad_norm": 0.1531772017478943, "learning_rate": 0.0003055384336019893, "loss": 2.1539, "step": 471780 }, { "epoch": 1.8238081984196937, "grad_norm": 0.1494482457637787, "learning_rate": 0.000305420406118379, "loss": 2.1679, "step": 471790 }, { "epoch": 1.823846855623077, "grad_norm": 0.15587127208709717, "learning_rate": 0.0003053023868548113, "loss": 2.1581, "step": 471800 }, { "epoch": 1.8238855128264602, "grad_norm": 0.1492954045534134, "learning_rate": 0.00030518437580956913, "loss": 2.1635, "step": 471810 }, { "epoch": 1.8239241700298434, "grad_norm": 0.1406371295452118, "learning_rate": 0.00030506637298093595, "loss": 2.1721, "step": 471820 }, { "epoch": 1.8239628272332267, "grad_norm": 0.1496431976556778, "learning_rate": 0.0003049483783671956, "loss": 2.1673, "step": 471830 }, { "epoch": 1.82400148443661, "grad_norm": 0.1430736482143402, "learning_rate": 0.0003048303919666327, "loss": 2.1649, "step": 471840 }, { "epoch": 1.8240401416399932, "grad_norm": 0.1506548672914505, "learning_rate": 0.00030471241377753253, "loss": 2.1603, "step": 471850 }, { "epoch": 1.8240787988433764, "grad_norm": 0.14081627130508423, "learning_rate": 0.0003045944437981807, "loss": 2.1693, "step": 471860 }, { "epoch": 1.8241174560467597, "grad_norm": 0.13509422540664673, "learning_rate": 0.0003044764820268637, "loss": 2.1568, "step": 471870 }, { "epoch": 1.824156113250143, "grad_norm": 0.14616185426712036, "learning_rate": 0.00030435852846186863, "loss": 2.1586, "step": 471880 }, { "epoch": 1.8241947704535262, "grad_norm": 0.1500072479248047, "learning_rate": 0.0003042405831014825, "loss": 2.1719, "step": 471890 }, { "epoch": 1.8242334276569094, "grad_norm": 0.14021477103233337, "learning_rate": 0.0003041226459439941, "loss": 2.1569, "step": 471900 }, { "epoch": 1.824272084860293, "grad_norm": 0.14883461594581604, "learning_rate": 0.00030400471698769163, "loss": 2.1632, "step": 471910 }, { "epoch": 1.8243107420636762, "grad_norm": 0.1428908258676529, "learning_rate": 0.0003038867962308649, "loss": 2.1644, "step": 471920 }, { "epoch": 1.8243493992670594, "grad_norm": 0.1492062509059906, "learning_rate": 0.0003037688836718033, "loss": 2.1625, "step": 471930 }, { "epoch": 1.8243880564704427, "grad_norm": 0.1407235711812973, "learning_rate": 0.00030365097930879804, "loss": 2.1627, "step": 471940 }, { "epoch": 1.8244267136738261, "grad_norm": 0.14518225193023682, "learning_rate": 0.0003035330831401397, "loss": 2.182, "step": 471950 }, { "epoch": 1.8244653708772094, "grad_norm": 0.14669685065746307, "learning_rate": 0.0003034151951641202, "loss": 2.167, "step": 471960 }, { "epoch": 1.8245040280805926, "grad_norm": 0.14908838272094727, "learning_rate": 0.0003032973153790319, "loss": 2.1547, "step": 471970 }, { "epoch": 1.824542685283976, "grad_norm": 0.15010426938533783, "learning_rate": 0.00030317944378316787, "loss": 2.1593, "step": 471980 }, { "epoch": 1.8245813424873591, "grad_norm": 0.15162095427513123, "learning_rate": 0.0003030615803748211, "loss": 2.1476, "step": 471990 }, { "epoch": 1.8246199996907424, "grad_norm": 0.17372465133666992, "learning_rate": 0.00030294372515228595, "loss": 2.1694, "step": 472000 }, { "epoch": 1.8246586568941257, "grad_norm": 0.1417848914861679, "learning_rate": 0.0003028258781138573, "loss": 2.1725, "step": 472010 }, { "epoch": 1.824697314097509, "grad_norm": 0.138796865940094, "learning_rate": 0.00030270803925783006, "loss": 2.1666, "step": 472020 }, { "epoch": 1.8247359713008922, "grad_norm": 0.13671287894248962, "learning_rate": 0.0003025902085825003, "loss": 2.1578, "step": 472030 }, { "epoch": 1.8247746285042754, "grad_norm": 0.14566947519779205, "learning_rate": 0.0003024723860861645, "loss": 2.1563, "step": 472040 }, { "epoch": 1.8248132857076587, "grad_norm": 0.14366553723812103, "learning_rate": 0.00030235457176711945, "loss": 2.1647, "step": 472050 }, { "epoch": 1.824851942911042, "grad_norm": 0.14583460986614227, "learning_rate": 0.0003022367656236633, "loss": 2.1619, "step": 472060 }, { "epoch": 1.8248906001144252, "grad_norm": 0.144693523645401, "learning_rate": 0.0003021189676540939, "loss": 2.1452, "step": 472070 }, { "epoch": 1.8249292573178086, "grad_norm": 0.15990732610225677, "learning_rate": 0.00030200117785671, "loss": 2.1751, "step": 472080 }, { "epoch": 1.824967914521192, "grad_norm": 0.16300183534622192, "learning_rate": 0.0003018833962298113, "loss": 2.1598, "step": 472090 }, { "epoch": 1.8250065717245751, "grad_norm": 0.14283479750156403, "learning_rate": 0.0003017656227716976, "loss": 2.1696, "step": 472100 }, { "epoch": 1.8250452289279584, "grad_norm": 0.13620631396770477, "learning_rate": 0.0003016478574806696, "loss": 2.173, "step": 472110 }, { "epoch": 1.8250838861313419, "grad_norm": 0.1480875462293625, "learning_rate": 0.00030153010035502837, "loss": 2.1708, "step": 472120 }, { "epoch": 1.8251225433347251, "grad_norm": 0.13537877798080444, "learning_rate": 0.00030141235139307575, "loss": 2.1646, "step": 472130 }, { "epoch": 1.8251612005381084, "grad_norm": 0.14668239653110504, "learning_rate": 0.000301294610593114, "loss": 2.1873, "step": 472140 }, { "epoch": 1.8251998577414916, "grad_norm": 0.14213785529136658, "learning_rate": 0.0003011768779534463, "loss": 2.1644, "step": 472150 }, { "epoch": 1.8252385149448749, "grad_norm": 0.13207575678825378, "learning_rate": 0.00030105915347237587, "loss": 2.1637, "step": 472160 }, { "epoch": 1.8252771721482581, "grad_norm": 0.12869346141815186, "learning_rate": 0.0003009414371482071, "loss": 2.161, "step": 472170 }, { "epoch": 1.8253158293516414, "grad_norm": 0.12761442363262177, "learning_rate": 0.00030082372897924457, "loss": 2.1647, "step": 472180 }, { "epoch": 1.8253544865550246, "grad_norm": 0.14196434617042542, "learning_rate": 0.0003007060289637935, "loss": 2.1608, "step": 472190 }, { "epoch": 1.8253931437584079, "grad_norm": 0.15592649579048157, "learning_rate": 0.00030058833710015987, "loss": 2.1646, "step": 472200 }, { "epoch": 1.8254318009617911, "grad_norm": 0.1407962143421173, "learning_rate": 0.0003004706533866501, "loss": 2.1762, "step": 472210 }, { "epoch": 1.8254704581651744, "grad_norm": 0.14168062806129456, "learning_rate": 0.0003003529778215715, "loss": 2.1481, "step": 472220 }, { "epoch": 1.8255091153685576, "grad_norm": 0.13927967846393585, "learning_rate": 0.00030023531040323116, "loss": 2.1729, "step": 472230 }, { "epoch": 1.825547772571941, "grad_norm": 0.14393939077854156, "learning_rate": 0.0003001176511299377, "loss": 2.1701, "step": 472240 }, { "epoch": 1.8255864297753244, "grad_norm": 0.14461536705493927, "learning_rate": 0.00030000000000000003, "loss": 2.1705, "step": 472250 }, { "epoch": 1.8256250869787076, "grad_norm": 0.1421518325805664, "learning_rate": 0.00029988235701172727, "loss": 2.1574, "step": 472260 }, { "epoch": 1.8256637441820909, "grad_norm": 0.14677967131137848, "learning_rate": 0.0002997647221634294, "loss": 2.1507, "step": 472270 }, { "epoch": 1.8257024013854741, "grad_norm": 0.14067254960536957, "learning_rate": 0.0002996470954534174, "loss": 2.1773, "step": 472280 }, { "epoch": 1.8257410585888576, "grad_norm": 0.13654285669326782, "learning_rate": 0.0002995294768800019, "loss": 2.1803, "step": 472290 }, { "epoch": 1.8257797157922409, "grad_norm": 0.13923801481723785, "learning_rate": 0.00029941186644149486, "loss": 2.1612, "step": 472300 }, { "epoch": 1.825818372995624, "grad_norm": 0.14260360598564148, "learning_rate": 0.00029929426413620864, "loss": 2.1781, "step": 472310 }, { "epoch": 1.8258570301990074, "grad_norm": 0.14026546478271484, "learning_rate": 0.0002991766699624561, "loss": 2.1452, "step": 472320 }, { "epoch": 1.8258956874023906, "grad_norm": 0.14311176538467407, "learning_rate": 0.0002990590839185505, "loss": 2.1521, "step": 472330 }, { "epoch": 1.8259343446057739, "grad_norm": 0.13906440138816833, "learning_rate": 0.00029894150600280644, "loss": 2.1628, "step": 472340 }, { "epoch": 1.8259730018091571, "grad_norm": 0.13940848410129547, "learning_rate": 0.0002988239362135381, "loss": 2.1673, "step": 472350 }, { "epoch": 1.8260116590125404, "grad_norm": 0.15167121589183807, "learning_rate": 0.000298706374549061, "loss": 2.166, "step": 472360 }, { "epoch": 1.8260503162159236, "grad_norm": 0.14164578914642334, "learning_rate": 0.00029858882100769057, "loss": 2.1489, "step": 472370 }, { "epoch": 1.8260889734193069, "grad_norm": 0.15085890889167786, "learning_rate": 0.0002984712755877437, "loss": 2.1669, "step": 472380 }, { "epoch": 1.8261276306226901, "grad_norm": 0.329809308052063, "learning_rate": 0.00029835373828753696, "loss": 2.1522, "step": 472390 }, { "epoch": 1.8261662878260734, "grad_norm": 0.14183588325977325, "learning_rate": 0.0002982362091053883, "loss": 2.1665, "step": 472400 }, { "epoch": 1.8262049450294566, "grad_norm": 0.15209904313087463, "learning_rate": 0.0002981186880396154, "loss": 2.1621, "step": 472410 }, { "epoch": 1.82624360223284, "grad_norm": 0.1430646777153015, "learning_rate": 0.0002980011750885374, "loss": 2.1497, "step": 472420 }, { "epoch": 1.8262822594362234, "grad_norm": 0.14137636125087738, "learning_rate": 0.00029788367025047305, "loss": 2.1704, "step": 472430 }, { "epoch": 1.8263209166396066, "grad_norm": 0.14333899319171906, "learning_rate": 0.00029776617352374294, "loss": 2.1547, "step": 472440 }, { "epoch": 1.8263595738429899, "grad_norm": 0.13744285702705383, "learning_rate": 0.0002976486849066671, "loss": 2.1571, "step": 472450 }, { "epoch": 1.8263982310463733, "grad_norm": 0.15237878262996674, "learning_rate": 0.00029753120439756666, "loss": 2.1533, "step": 472460 }, { "epoch": 1.8264368882497566, "grad_norm": 0.14781443774700165, "learning_rate": 0.0002974137319947632, "loss": 2.1643, "step": 472470 }, { "epoch": 1.8264755454531398, "grad_norm": 0.15223272144794464, "learning_rate": 0.00029729626769657894, "loss": 2.1584, "step": 472480 }, { "epoch": 1.826514202656523, "grad_norm": 0.1459142416715622, "learning_rate": 0.00029717881150133677, "loss": 2.1711, "step": 472490 }, { "epoch": 1.8265528598599063, "grad_norm": 0.1432747095823288, "learning_rate": 0.00029706136340736, "loss": 2.1697, "step": 472500 }, { "epoch": 1.8265915170632896, "grad_norm": 0.14006705582141876, "learning_rate": 0.00029694392341297226, "loss": 2.15, "step": 472510 }, { "epoch": 1.8266301742666728, "grad_norm": 0.3814711570739746, "learning_rate": 0.00029682649151649865, "loss": 2.1609, "step": 472520 }, { "epoch": 1.826668831470056, "grad_norm": 0.14432662725448608, "learning_rate": 0.0002967090677162636, "loss": 2.1637, "step": 472530 }, { "epoch": 1.8267074886734394, "grad_norm": 0.13059882819652557, "learning_rate": 0.0002965916520105931, "loss": 2.1574, "step": 472540 }, { "epoch": 1.8267461458768226, "grad_norm": 0.1524466872215271, "learning_rate": 0.0002964742443978137, "loss": 2.1566, "step": 472550 }, { "epoch": 1.8267848030802059, "grad_norm": 0.1440562754869461, "learning_rate": 0.0002963568448762517, "loss": 2.1619, "step": 472560 }, { "epoch": 1.826823460283589, "grad_norm": 0.14504368603229523, "learning_rate": 0.00029623945344423475, "loss": 2.1673, "step": 472570 }, { "epoch": 1.8268621174869724, "grad_norm": 0.13357733190059662, "learning_rate": 0.00029612207010009083, "loss": 2.1599, "step": 472580 }, { "epoch": 1.8269007746903558, "grad_norm": 0.13890889286994934, "learning_rate": 0.00029600469484214844, "loss": 2.1698, "step": 472590 }, { "epoch": 1.826939431893739, "grad_norm": 0.15237276256084442, "learning_rate": 0.0002958873276687366, "loss": 2.1601, "step": 472600 }, { "epoch": 1.8269780890971223, "grad_norm": 0.14198938012123108, "learning_rate": 0.0002957699685781852, "loss": 2.1589, "step": 472610 }, { "epoch": 1.8270167463005056, "grad_norm": 0.13609854876995087, "learning_rate": 0.00029565261756882435, "loss": 2.1449, "step": 472620 }, { "epoch": 1.827055403503889, "grad_norm": 0.14549599587917328, "learning_rate": 0.0002955352746389852, "loss": 2.1497, "step": 472630 }, { "epoch": 1.8270940607072723, "grad_norm": 0.14228083193302155, "learning_rate": 0.00029541793978699873, "loss": 2.1675, "step": 472640 }, { "epoch": 1.8271327179106556, "grad_norm": 0.14949370920658112, "learning_rate": 0.00029530061301119727, "loss": 2.1674, "step": 472650 }, { "epoch": 1.8271713751140388, "grad_norm": 0.1407162845134735, "learning_rate": 0.0002951832943099131, "loss": 2.1596, "step": 472660 }, { "epoch": 1.827210032317422, "grad_norm": 0.14520780742168427, "learning_rate": 0.00029506598368147976, "loss": 2.1736, "step": 472670 }, { "epoch": 1.8272486895208053, "grad_norm": 0.14568758010864258, "learning_rate": 0.0002949486811242308, "loss": 2.1858, "step": 472680 }, { "epoch": 1.8272873467241886, "grad_norm": 0.14513273537158966, "learning_rate": 0.00029483138663650044, "loss": 2.1635, "step": 472690 }, { "epoch": 1.8273260039275718, "grad_norm": 0.13524672389030457, "learning_rate": 0.0002947141002166236, "loss": 2.1727, "step": 472700 }, { "epoch": 1.827364661130955, "grad_norm": 0.14150987565517426, "learning_rate": 0.00029459682186293556, "loss": 2.1465, "step": 472710 }, { "epoch": 1.8274033183343383, "grad_norm": 0.14861683547496796, "learning_rate": 0.0002944795515737726, "loss": 2.1769, "step": 472720 }, { "epoch": 1.8274419755377216, "grad_norm": 0.14497342705726624, "learning_rate": 0.0002943622893474711, "loss": 2.1703, "step": 472730 }, { "epoch": 1.8274806327411048, "grad_norm": 0.14805848896503448, "learning_rate": 0.00029424503518236825, "loss": 2.1594, "step": 472740 }, { "epoch": 1.827519289944488, "grad_norm": 0.14520274102687836, "learning_rate": 0.00029412778907680193, "loss": 2.173, "step": 472750 }, { "epoch": 1.8275579471478716, "grad_norm": 0.14054147899150848, "learning_rate": 0.00029401055102911024, "loss": 2.1616, "step": 472760 }, { "epoch": 1.8275966043512548, "grad_norm": 0.14863263070583344, "learning_rate": 0.00029389332103763223, "loss": 2.1564, "step": 472770 }, { "epoch": 1.827635261554638, "grad_norm": 0.14384257793426514, "learning_rate": 0.000293776099100707, "loss": 2.1452, "step": 472780 }, { "epoch": 1.8276739187580213, "grad_norm": 0.13806447386741638, "learning_rate": 0.00029365888521667505, "loss": 2.1665, "step": 472790 }, { "epoch": 1.8277125759614048, "grad_norm": 0.13954965770244598, "learning_rate": 0.00029354167938387677, "loss": 2.1549, "step": 472800 }, { "epoch": 1.827751233164788, "grad_norm": 0.13952240347862244, "learning_rate": 0.00029342448160065305, "loss": 2.1762, "step": 472810 }, { "epoch": 1.8277898903681713, "grad_norm": 0.36426278948783875, "learning_rate": 0.0002933072918653459, "loss": 2.1747, "step": 472820 }, { "epoch": 1.8278285475715546, "grad_norm": 0.14163519442081451, "learning_rate": 0.0002931901101762975, "loss": 2.1594, "step": 472830 }, { "epoch": 1.8278672047749378, "grad_norm": 0.1430559605360031, "learning_rate": 0.0002930729365318505, "loss": 2.155, "step": 472840 }, { "epoch": 1.827905861978321, "grad_norm": 0.14050045609474182, "learning_rate": 0.00029295577093034876, "loss": 2.1582, "step": 472850 }, { "epoch": 1.8279445191817043, "grad_norm": 0.1458028256893158, "learning_rate": 0.00029283861337013595, "loss": 2.1622, "step": 472860 }, { "epoch": 1.8279831763850876, "grad_norm": 0.14921119809150696, "learning_rate": 0.00029272146384955676, "loss": 2.1689, "step": 472870 }, { "epoch": 1.8280218335884708, "grad_norm": 0.15343713760375977, "learning_rate": 0.00029260432236695634, "loss": 2.1886, "step": 472880 }, { "epoch": 1.828060490791854, "grad_norm": 0.1427895873785019, "learning_rate": 0.0002924871889206804, "loss": 2.1567, "step": 472890 }, { "epoch": 1.8280991479952373, "grad_norm": 0.14036491513252258, "learning_rate": 0.00029237006350907514, "loss": 2.1473, "step": 472900 }, { "epoch": 1.8281378051986206, "grad_norm": 0.1467023640871048, "learning_rate": 0.0002922529461304875, "loss": 2.1488, "step": 472910 }, { "epoch": 1.828176462402004, "grad_norm": 0.1514258086681366, "learning_rate": 0.0002921358367832645, "loss": 2.1603, "step": 472920 }, { "epoch": 1.8282151196053873, "grad_norm": 0.1391729712486267, "learning_rate": 0.00029201873546575466, "loss": 2.1634, "step": 472930 }, { "epoch": 1.8282537768087705, "grad_norm": 0.14794260263442993, "learning_rate": 0.0002919016421763061, "loss": 2.159, "step": 472940 }, { "epoch": 1.8282924340121538, "grad_norm": 0.14104720950126648, "learning_rate": 0.000291784556913268, "loss": 2.1398, "step": 472950 }, { "epoch": 1.828331091215537, "grad_norm": 0.141220822930336, "learning_rate": 0.00029166747967499006, "loss": 2.1565, "step": 472960 }, { "epoch": 1.8283697484189205, "grad_norm": 0.1357167363166809, "learning_rate": 0.00029155041045982277, "loss": 2.1559, "step": 472970 }, { "epoch": 1.8284084056223038, "grad_norm": 0.1552206575870514, "learning_rate": 0.00029143334926611653, "loss": 2.1579, "step": 472980 }, { "epoch": 1.828447062825687, "grad_norm": 0.1342216283082962, "learning_rate": 0.00029131629609222286, "loss": 2.1659, "step": 472990 }, { "epoch": 1.8284857200290703, "grad_norm": 0.13896004855632782, "learning_rate": 0.0002911992509364938, "loss": 2.1699, "step": 473000 }, { "epoch": 1.8285243772324535, "grad_norm": 0.145687997341156, "learning_rate": 0.0002910822137972817, "loss": 2.145, "step": 473010 }, { "epoch": 1.8285630344358368, "grad_norm": 0.13861437141895294, "learning_rate": 0.00029096518467293956, "loss": 2.1561, "step": 473020 }, { "epoch": 1.82860169163922, "grad_norm": 0.13986791670322418, "learning_rate": 0.00029084816356182095, "loss": 2.1615, "step": 473030 }, { "epoch": 1.8286403488426033, "grad_norm": 0.14401181042194366, "learning_rate": 0.0002907311504622803, "loss": 2.1583, "step": 473040 }, { "epoch": 1.8286790060459865, "grad_norm": 0.13906337320804596, "learning_rate": 0.00029061414537267204, "loss": 2.1475, "step": 473050 }, { "epoch": 1.8287176632493698, "grad_norm": 0.14993421733379364, "learning_rate": 0.0002904971482913519, "loss": 2.1652, "step": 473060 }, { "epoch": 1.828756320452753, "grad_norm": 0.13433660566806793, "learning_rate": 0.0002903801592166755, "loss": 2.1577, "step": 473070 }, { "epoch": 1.8287949776561363, "grad_norm": 0.1434684544801712, "learning_rate": 0.0002902631781469991, "loss": 2.15, "step": 473080 }, { "epoch": 1.8288336348595198, "grad_norm": 0.13965515792369843, "learning_rate": 0.00029014620508068, "loss": 2.1687, "step": 473090 }, { "epoch": 1.828872292062903, "grad_norm": 0.15121091902256012, "learning_rate": 0.00029002924001607556, "loss": 2.1612, "step": 473100 }, { "epoch": 1.8289109492662863, "grad_norm": 0.13976910710334778, "learning_rate": 0.00028991228295154416, "loss": 2.1516, "step": 473110 }, { "epoch": 1.8289496064696695, "grad_norm": 0.13840851187705994, "learning_rate": 0.000289795333885444, "loss": 2.1733, "step": 473120 }, { "epoch": 1.8289882636730528, "grad_norm": 0.1366550624370575, "learning_rate": 0.00028967839281613484, "loss": 2.1458, "step": 473130 }, { "epoch": 1.8290269208764363, "grad_norm": 0.147894486784935, "learning_rate": 0.000289561459741976, "loss": 2.1685, "step": 473140 }, { "epoch": 1.8290655780798195, "grad_norm": 0.16173036396503448, "learning_rate": 0.00028944453466132815, "loss": 2.1608, "step": 473150 }, { "epoch": 1.8291042352832028, "grad_norm": 0.1482090801000595, "learning_rate": 0.0002893276175725523, "loss": 2.1696, "step": 473160 }, { "epoch": 1.829142892486586, "grad_norm": 0.13548068702220917, "learning_rate": 0.00028921070847400963, "loss": 2.1496, "step": 473170 }, { "epoch": 1.8291815496899693, "grad_norm": 0.18090428411960602, "learning_rate": 0.0002890938073640623, "loss": 2.1535, "step": 473180 }, { "epoch": 1.8292202068933525, "grad_norm": 0.14325076341629028, "learning_rate": 0.0002889769142410732, "loss": 2.1672, "step": 473190 }, { "epoch": 1.8292588640967358, "grad_norm": 0.15077604353427887, "learning_rate": 0.0002888600291034049, "loss": 2.168, "step": 473200 }, { "epoch": 1.829297521300119, "grad_norm": 0.15352006256580353, "learning_rate": 0.0002887431519494217, "loss": 2.1481, "step": 473210 }, { "epoch": 1.8293361785035023, "grad_norm": 0.13646052777767181, "learning_rate": 0.00028862628277748773, "loss": 2.156, "step": 473220 }, { "epoch": 1.8293748357068855, "grad_norm": 0.14072224497795105, "learning_rate": 0.00028850942158596736, "loss": 2.1543, "step": 473230 }, { "epoch": 1.8294134929102688, "grad_norm": 0.13621889054775238, "learning_rate": 0.00028839256837322647, "loss": 2.1632, "step": 473240 }, { "epoch": 1.829452150113652, "grad_norm": 0.1435336321592331, "learning_rate": 0.00028827572313763094, "loss": 2.1498, "step": 473250 }, { "epoch": 1.8294908073170355, "grad_norm": 0.14366935193538666, "learning_rate": 0.00028815888587754726, "loss": 2.1547, "step": 473260 }, { "epoch": 1.8295294645204188, "grad_norm": 0.13832125067710876, "learning_rate": 0.00028804205659134263, "loss": 2.1409, "step": 473270 }, { "epoch": 1.829568121723802, "grad_norm": 0.1476607769727707, "learning_rate": 0.00028792523527738424, "loss": 2.1582, "step": 473280 }, { "epoch": 1.8296067789271853, "grad_norm": 0.14731153845787048, "learning_rate": 0.0002878084219340407, "loss": 2.1598, "step": 473290 }, { "epoch": 1.8296454361305687, "grad_norm": 0.1447271704673767, "learning_rate": 0.00028769161655968056, "loss": 2.1614, "step": 473300 }, { "epoch": 1.829684093333952, "grad_norm": 0.14205624163150787, "learning_rate": 0.00028757481915267323, "loss": 2.1652, "step": 473310 }, { "epoch": 1.8297227505373352, "grad_norm": 0.14254549145698547, "learning_rate": 0.00028745802971138844, "loss": 2.1605, "step": 473320 }, { "epoch": 1.8297614077407185, "grad_norm": 0.15251408517360687, "learning_rate": 0.0002873412482341966, "loss": 2.1681, "step": 473330 }, { "epoch": 1.8298000649441017, "grad_norm": 0.14379853010177612, "learning_rate": 0.00028722447471946853, "loss": 2.1687, "step": 473340 }, { "epoch": 1.829838722147485, "grad_norm": 0.1437206268310547, "learning_rate": 0.0002871077091655763, "loss": 2.1617, "step": 473350 }, { "epoch": 1.8298773793508682, "grad_norm": 0.14462733268737793, "learning_rate": 0.0002869909515708915, "loss": 2.1615, "step": 473360 }, { "epoch": 1.8299160365542515, "grad_norm": 0.1429223120212555, "learning_rate": 0.00028687420193378665, "loss": 2.1718, "step": 473370 }, { "epoch": 1.8299546937576348, "grad_norm": 0.4351711571216583, "learning_rate": 0.0002867574602526355, "loss": 2.1731, "step": 473380 }, { "epoch": 1.829993350961018, "grad_norm": 0.1433011293411255, "learning_rate": 0.0002866407265258113, "loss": 2.1601, "step": 473390 }, { "epoch": 1.8300320081644013, "grad_norm": 0.1451789289712906, "learning_rate": 0.0002865240007516885, "loss": 2.1515, "step": 473400 }, { "epoch": 1.8300706653677845, "grad_norm": 0.14553236961364746, "learning_rate": 0.0002864072829286417, "loss": 2.1798, "step": 473410 }, { "epoch": 1.8301093225711678, "grad_norm": 0.14178434014320374, "learning_rate": 0.0002862905730550469, "loss": 2.1588, "step": 473420 }, { "epoch": 1.8301479797745512, "grad_norm": 0.1531856209039688, "learning_rate": 0.0002861738711292794, "loss": 2.156, "step": 473430 }, { "epoch": 1.8301866369779345, "grad_norm": 0.13813577592372894, "learning_rate": 0.000286057177149716, "loss": 2.1485, "step": 473440 }, { "epoch": 1.8302252941813177, "grad_norm": 0.1400258094072342, "learning_rate": 0.0002859404911147336, "loss": 2.1445, "step": 473450 }, { "epoch": 1.830263951384701, "grad_norm": 0.17947034537792206, "learning_rate": 0.00028582381302271024, "loss": 2.1659, "step": 473460 }, { "epoch": 1.8303026085880845, "grad_norm": 0.14751413464546204, "learning_rate": 0.0002857071428720237, "loss": 2.1496, "step": 473470 }, { "epoch": 1.8303412657914677, "grad_norm": 0.14646713435649872, "learning_rate": 0.00028559048066105274, "loss": 2.1789, "step": 473480 }, { "epoch": 1.830379922994851, "grad_norm": 0.14492836594581604, "learning_rate": 0.00028547382638817665, "loss": 2.1544, "step": 473490 }, { "epoch": 1.8304185801982342, "grad_norm": 0.1351613849401474, "learning_rate": 0.0002853571800517754, "loss": 2.1534, "step": 473500 }, { "epoch": 1.8304572374016175, "grad_norm": 0.13662733137607574, "learning_rate": 0.00028524054165022904, "loss": 2.1562, "step": 473510 }, { "epoch": 1.8304958946050007, "grad_norm": 0.14270558953285217, "learning_rate": 0.00028512391118191863, "loss": 2.1563, "step": 473520 }, { "epoch": 1.830534551808384, "grad_norm": 0.14456135034561157, "learning_rate": 0.0002850072886452257, "loss": 2.1608, "step": 473530 }, { "epoch": 1.8305732090117672, "grad_norm": 0.15161548554897308, "learning_rate": 0.0002848906740385322, "loss": 2.1488, "step": 473540 }, { "epoch": 1.8306118662151505, "grad_norm": 0.15137378871440887, "learning_rate": 0.0002847740673602208, "loss": 2.1451, "step": 473550 }, { "epoch": 1.8306505234185337, "grad_norm": 0.1438361555337906, "learning_rate": 0.0002846574686086747, "loss": 2.1628, "step": 473560 }, { "epoch": 1.830689180621917, "grad_norm": 0.13977812230587006, "learning_rate": 0.0002845408777822771, "loss": 2.1502, "step": 473570 }, { "epoch": 1.8307278378253002, "grad_norm": 0.14614924788475037, "learning_rate": 0.0002844242948794127, "loss": 2.1742, "step": 473580 }, { "epoch": 1.8307664950286835, "grad_norm": 0.15732614696025848, "learning_rate": 0.0002843077198984663, "loss": 2.183, "step": 473590 }, { "epoch": 1.830805152232067, "grad_norm": 0.14953894913196564, "learning_rate": 0.0002841911528378227, "loss": 2.16, "step": 473600 }, { "epoch": 1.8308438094354502, "grad_norm": 0.1374642252922058, "learning_rate": 0.0002840745936958682, "loss": 2.1552, "step": 473610 }, { "epoch": 1.8308824666388335, "grad_norm": 0.14025695621967316, "learning_rate": 0.00028395804247098914, "loss": 2.1515, "step": 473620 }, { "epoch": 1.8309211238422167, "grad_norm": 0.14120370149612427, "learning_rate": 0.00028384149916157233, "loss": 2.1632, "step": 473630 }, { "epoch": 1.8309597810456002, "grad_norm": 0.14266963303089142, "learning_rate": 0.0002837249637660051, "loss": 2.1542, "step": 473640 }, { "epoch": 1.8309984382489835, "grad_norm": 0.14860790967941284, "learning_rate": 0.0002836084362826761, "loss": 2.1692, "step": 473650 }, { "epoch": 1.8310370954523667, "grad_norm": 0.1417301744222641, "learning_rate": 0.00028349191670997364, "loss": 2.1607, "step": 473660 }, { "epoch": 1.83107575265575, "grad_norm": 0.14226599037647247, "learning_rate": 0.00028337540504628665, "loss": 2.1626, "step": 473670 }, { "epoch": 1.8311144098591332, "grad_norm": 0.15236929059028625, "learning_rate": 0.0002832589012900053, "loss": 2.1684, "step": 473680 }, { "epoch": 1.8311530670625165, "grad_norm": 0.13800197839736938, "learning_rate": 0.0002831424054395193, "loss": 2.1689, "step": 473690 }, { "epoch": 1.8311917242658997, "grad_norm": 0.14995115995407104, "learning_rate": 0.0002830259174932197, "loss": 2.1602, "step": 473700 }, { "epoch": 1.831230381469283, "grad_norm": 0.13879883289337158, "learning_rate": 0.0002829094374494978, "loss": 2.1448, "step": 473710 }, { "epoch": 1.8312690386726662, "grad_norm": 0.14781266450881958, "learning_rate": 0.0002827929653067456, "loss": 2.1695, "step": 473720 }, { "epoch": 1.8313076958760495, "grad_norm": 0.1535464972257614, "learning_rate": 0.0002826765010633554, "loss": 2.1707, "step": 473730 }, { "epoch": 1.8313463530794327, "grad_norm": 0.14041906595230103, "learning_rate": 0.0002825600447177199, "loss": 2.1556, "step": 473740 }, { "epoch": 1.831385010282816, "grad_norm": 0.1497502326965332, "learning_rate": 0.00028244359626823325, "loss": 2.1657, "step": 473750 }, { "epoch": 1.8314236674861992, "grad_norm": 0.13972197473049164, "learning_rate": 0.0002823271557132889, "loss": 2.1658, "step": 473760 }, { "epoch": 1.8314623246895827, "grad_norm": 0.13600200414657593, "learning_rate": 0.0002822107230512818, "loss": 2.1512, "step": 473770 }, { "epoch": 1.831500981892966, "grad_norm": 0.14934469759464264, "learning_rate": 0.00028209429828060696, "loss": 2.1513, "step": 473780 }, { "epoch": 1.8315396390963492, "grad_norm": 0.15474063158035278, "learning_rate": 0.0002819778813996603, "loss": 2.1744, "step": 473790 }, { "epoch": 1.8315782962997325, "grad_norm": 0.14638902246952057, "learning_rate": 0.00028186147240683756, "loss": 2.1552, "step": 473800 }, { "epoch": 1.831616953503116, "grad_norm": 0.14121192693710327, "learning_rate": 0.0002817450713005361, "loss": 2.1506, "step": 473810 }, { "epoch": 1.8316556107064992, "grad_norm": 0.14948327839374542, "learning_rate": 0.0002816286780791528, "loss": 2.1494, "step": 473820 }, { "epoch": 1.8316942679098824, "grad_norm": 0.14478008449077606, "learning_rate": 0.0002815122927410858, "loss": 2.1566, "step": 473830 }, { "epoch": 1.8317329251132657, "grad_norm": 0.1355316936969757, "learning_rate": 0.0002813959152847332, "loss": 2.1698, "step": 473840 }, { "epoch": 1.831771582316649, "grad_norm": 0.14989060163497925, "learning_rate": 0.00028127954570849424, "loss": 2.1495, "step": 473850 }, { "epoch": 1.8318102395200322, "grad_norm": 0.14782382547855377, "learning_rate": 0.00028116318401076823, "loss": 2.161, "step": 473860 }, { "epoch": 1.8318488967234154, "grad_norm": 0.14131376147270203, "learning_rate": 0.0002810468301899554, "loss": 2.1479, "step": 473870 }, { "epoch": 1.8318875539267987, "grad_norm": 0.1483166664838791, "learning_rate": 0.0002809304842444562, "loss": 2.1772, "step": 473880 }, { "epoch": 1.831926211130182, "grad_norm": 0.14277474582195282, "learning_rate": 0.0002808141461726716, "loss": 2.1587, "step": 473890 }, { "epoch": 1.8319648683335652, "grad_norm": 0.16562721133232117, "learning_rate": 0.0002806978159730036, "loss": 2.1527, "step": 473900 }, { "epoch": 1.8320035255369485, "grad_norm": 0.15126599371433258, "learning_rate": 0.000280581493643854, "loss": 2.1806, "step": 473910 }, { "epoch": 1.8320421827403317, "grad_norm": 0.14657704532146454, "learning_rate": 0.0002804651791836259, "loss": 2.158, "step": 473920 }, { "epoch": 1.832080839943715, "grad_norm": 0.1424272060394287, "learning_rate": 0.00028034887259072237, "loss": 2.1626, "step": 473930 }, { "epoch": 1.8321194971470984, "grad_norm": 0.13862474262714386, "learning_rate": 0.0002802325738635472, "loss": 2.1462, "step": 473940 }, { "epoch": 1.8321581543504817, "grad_norm": 0.1364509016275406, "learning_rate": 0.0002801162830005046, "loss": 2.1579, "step": 473950 }, { "epoch": 1.832196811553865, "grad_norm": 0.15476275980472565, "learning_rate": 0.00028000000000000003, "loss": 2.1475, "step": 473960 }, { "epoch": 1.8322354687572482, "grad_norm": 0.15473146736621857, "learning_rate": 0.00027988372486043847, "loss": 2.159, "step": 473970 }, { "epoch": 1.8322741259606317, "grad_norm": 0.14409764111042023, "learning_rate": 0.0002797674575802258, "loss": 2.1618, "step": 473980 }, { "epoch": 1.832312783164015, "grad_norm": 0.15211471915245056, "learning_rate": 0.00027965119815776896, "loss": 2.1623, "step": 473990 }, { "epoch": 1.8323514403673982, "grad_norm": 0.14995014667510986, "learning_rate": 0.0002795349465914747, "loss": 2.1575, "step": 474000 }, { "epoch": 1.8323900975707814, "grad_norm": 0.1347171515226364, "learning_rate": 0.0002794187028797506, "loss": 2.1663, "step": 474010 }, { "epoch": 1.8324287547741647, "grad_norm": 0.15486112236976624, "learning_rate": 0.0002793024670210049, "loss": 2.1725, "step": 474020 }, { "epoch": 1.832467411977548, "grad_norm": 0.14291568100452423, "learning_rate": 0.0002791862390136461, "loss": 2.1361, "step": 474030 }, { "epoch": 1.8325060691809312, "grad_norm": 0.32501938939094543, "learning_rate": 0.00027907001885608376, "loss": 2.161, "step": 474040 }, { "epoch": 1.8325447263843144, "grad_norm": 0.15452173352241516, "learning_rate": 0.0002789538065467272, "loss": 2.1501, "step": 474050 }, { "epoch": 1.8325833835876977, "grad_norm": 0.15415510535240173, "learning_rate": 0.0002788376020839869, "loss": 2.1625, "step": 474060 }, { "epoch": 1.832622040791081, "grad_norm": 0.1350860446691513, "learning_rate": 0.0002787214054662737, "loss": 2.1674, "step": 474070 }, { "epoch": 1.8326606979944642, "grad_norm": 0.14305320382118225, "learning_rate": 0.000278605216691999, "loss": 2.1698, "step": 474080 }, { "epoch": 1.8326993551978474, "grad_norm": 0.14777852594852448, "learning_rate": 0.00027848903575957443, "loss": 2.1546, "step": 474090 }, { "epoch": 1.8327380124012307, "grad_norm": 0.1491856426000595, "learning_rate": 0.0002783728626674125, "loss": 2.1722, "step": 474100 }, { "epoch": 1.8327766696046142, "grad_norm": 0.14523689448833466, "learning_rate": 0.00027825669741392647, "loss": 2.1536, "step": 474110 }, { "epoch": 1.8328153268079974, "grad_norm": 0.1457177847623825, "learning_rate": 0.0002781405399975294, "loss": 2.1333, "step": 474120 }, { "epoch": 1.8328539840113807, "grad_norm": 0.14650383591651917, "learning_rate": 0.0002780243904166355, "loss": 2.1628, "step": 474130 }, { "epoch": 1.832892641214764, "grad_norm": 0.15087856352329254, "learning_rate": 0.00027790824866965935, "loss": 2.1577, "step": 474140 }, { "epoch": 1.8329312984181474, "grad_norm": 0.16552071273326874, "learning_rate": 0.000277792114755016, "loss": 2.1607, "step": 474150 }, { "epoch": 1.8329699556215306, "grad_norm": 0.15123656392097473, "learning_rate": 0.0002776759886711211, "loss": 2.1612, "step": 474160 }, { "epoch": 1.833008612824914, "grad_norm": 0.14551308751106262, "learning_rate": 0.00027755987041639085, "loss": 2.1521, "step": 474170 }, { "epoch": 1.8330472700282971, "grad_norm": 0.15437181293964386, "learning_rate": 0.0002774437599892421, "loss": 2.1716, "step": 474180 }, { "epoch": 1.8330859272316804, "grad_norm": 0.14891056716442108, "learning_rate": 0.0002773276573880916, "loss": 2.1543, "step": 474190 }, { "epoch": 1.8331245844350637, "grad_norm": 0.1399906873703003, "learning_rate": 0.0002772115626113576, "loss": 2.1685, "step": 474200 }, { "epoch": 1.833163241638447, "grad_norm": 0.14022347331047058, "learning_rate": 0.0002770954756574582, "loss": 2.16, "step": 474210 }, { "epoch": 1.8332018988418302, "grad_norm": 0.1491684913635254, "learning_rate": 0.0002769793965248124, "loss": 2.1424, "step": 474220 }, { "epoch": 1.8332405560452134, "grad_norm": 0.14810223877429962, "learning_rate": 0.0002768633252118391, "loss": 2.1571, "step": 474230 }, { "epoch": 1.8332792132485967, "grad_norm": 0.14386752247810364, "learning_rate": 0.00027674726171695885, "loss": 2.168, "step": 474240 }, { "epoch": 1.83331787045198, "grad_norm": 0.13856728374958038, "learning_rate": 0.00027663120603859135, "loss": 2.1629, "step": 474250 }, { "epoch": 1.8333565276553632, "grad_norm": 0.14329712092876434, "learning_rate": 0.00027651515817515816, "loss": 2.1705, "step": 474260 }, { "epoch": 1.8333951848587464, "grad_norm": 0.15207403898239136, "learning_rate": 0.00027639911812508044, "loss": 2.1651, "step": 474270 }, { "epoch": 1.83343384206213, "grad_norm": 0.142039954662323, "learning_rate": 0.00027628308588678043, "loss": 2.1834, "step": 474280 }, { "epoch": 1.8334724992655131, "grad_norm": 0.155516117811203, "learning_rate": 0.00027616706145868066, "loss": 2.1513, "step": 474290 }, { "epoch": 1.8335111564688964, "grad_norm": 0.15602277219295502, "learning_rate": 0.00027605104483920416, "loss": 2.1713, "step": 474300 }, { "epoch": 1.8335498136722796, "grad_norm": 0.15286743640899658, "learning_rate": 0.0002759350360267743, "loss": 2.1688, "step": 474310 }, { "epoch": 1.8335884708756631, "grad_norm": 0.157220259308815, "learning_rate": 0.0002758190350198155, "loss": 2.1447, "step": 474320 }, { "epoch": 1.8336271280790464, "grad_norm": 0.14431683719158173, "learning_rate": 0.00027570304181675256, "loss": 2.1547, "step": 474330 }, { "epoch": 1.8336657852824296, "grad_norm": 0.1535961776971817, "learning_rate": 0.00027558705641601036, "loss": 2.1531, "step": 474340 }, { "epoch": 1.8337044424858129, "grad_norm": 0.14194528758525848, "learning_rate": 0.00027547107881601465, "loss": 2.1503, "step": 474350 }, { "epoch": 1.8337430996891961, "grad_norm": 0.14567318558692932, "learning_rate": 0.000275355109015192, "loss": 2.1482, "step": 474360 }, { "epoch": 1.8337817568925794, "grad_norm": 0.19744686782360077, "learning_rate": 0.00027523914701196904, "loss": 2.1688, "step": 474370 }, { "epoch": 1.8338204140959626, "grad_norm": 0.14790453016757965, "learning_rate": 0.00027512319280477307, "loss": 2.1721, "step": 474380 }, { "epoch": 1.8338590712993459, "grad_norm": 0.14539088308811188, "learning_rate": 0.0002750072463920319, "loss": 2.1573, "step": 474390 }, { "epoch": 1.8338977285027291, "grad_norm": 0.14718958735466003, "learning_rate": 0.000274891307772174, "loss": 2.1648, "step": 474400 }, { "epoch": 1.8339363857061124, "grad_norm": 0.13314035534858704, "learning_rate": 0.0002747753769436283, "loss": 2.1405, "step": 474410 }, { "epoch": 1.8339750429094956, "grad_norm": 0.14325682818889618, "learning_rate": 0.00027465945390482417, "loss": 2.1694, "step": 474420 }, { "epoch": 1.834013700112879, "grad_norm": 0.15003050863742828, "learning_rate": 0.0002745435386541915, "loss": 2.178, "step": 474430 }, { "epoch": 1.8340523573162621, "grad_norm": 0.15796884894371033, "learning_rate": 0.0002744276311901608, "loss": 2.1636, "step": 474440 }, { "epoch": 1.8340910145196456, "grad_norm": 0.15004146099090576, "learning_rate": 0.00027431173151116296, "loss": 2.1625, "step": 474450 }, { "epoch": 1.8341296717230289, "grad_norm": 0.14282776415348053, "learning_rate": 0.0002741958396156301, "loss": 2.1452, "step": 474460 }, { "epoch": 1.8341683289264121, "grad_norm": 0.5124521255493164, "learning_rate": 0.0002740799555019937, "loss": 2.1463, "step": 474470 }, { "epoch": 1.8342069861297954, "grad_norm": 0.1407298743724823, "learning_rate": 0.0002739640791686866, "loss": 2.1404, "step": 474480 }, { "epoch": 1.8342456433331789, "grad_norm": 0.14269711077213287, "learning_rate": 0.0002738482106141418, "loss": 2.1486, "step": 474490 }, { "epoch": 1.834284300536562, "grad_norm": 0.13416120409965515, "learning_rate": 0.00027373234983679316, "loss": 2.1663, "step": 474500 }, { "epoch": 1.8343229577399454, "grad_norm": 0.16566786170005798, "learning_rate": 0.0002736164968350747, "loss": 2.1441, "step": 474510 }, { "epoch": 1.8343616149433286, "grad_norm": 0.14914767444133759, "learning_rate": 0.000273500651607421, "loss": 2.1547, "step": 474520 }, { "epoch": 1.8344002721467119, "grad_norm": 0.14149288833141327, "learning_rate": 0.00027338481415226746, "loss": 2.1503, "step": 474530 }, { "epoch": 1.8344389293500951, "grad_norm": 0.15202617645263672, "learning_rate": 0.00027326898446804985, "loss": 2.1671, "step": 474540 }, { "epoch": 1.8344775865534784, "grad_norm": 0.14977170526981354, "learning_rate": 0.00027315316255320423, "loss": 2.1531, "step": 474550 }, { "epoch": 1.8345162437568616, "grad_norm": 0.14531210064888, "learning_rate": 0.0002730373484061677, "loss": 2.158, "step": 474560 }, { "epoch": 1.8345549009602449, "grad_norm": 0.14642809331417084, "learning_rate": 0.00027292154202537746, "loss": 2.1638, "step": 474570 }, { "epoch": 1.8345935581636281, "grad_norm": 0.1511840671300888, "learning_rate": 0.00027280574340927125, "loss": 2.1561, "step": 474580 }, { "epoch": 1.8346322153670114, "grad_norm": 0.1489248126745224, "learning_rate": 0.0002726899525562876, "loss": 2.1671, "step": 474590 }, { "epoch": 1.8346708725703946, "grad_norm": 0.1497693657875061, "learning_rate": 0.00027257416946486535, "loss": 2.1526, "step": 474600 }, { "epoch": 1.8347095297737779, "grad_norm": 0.15171314775943756, "learning_rate": 0.00027245839413344376, "loss": 2.155, "step": 474610 }, { "epoch": 1.8347481869771614, "grad_norm": 0.14315788447856903, "learning_rate": 0.0002723426265604629, "loss": 2.1467, "step": 474620 }, { "epoch": 1.8347868441805446, "grad_norm": 0.1636415421962738, "learning_rate": 0.0002722268667443635, "loss": 2.1516, "step": 474630 }, { "epoch": 1.8348255013839279, "grad_norm": 0.147812619805336, "learning_rate": 0.00027211111468358594, "loss": 2.1596, "step": 474640 }, { "epoch": 1.8348641585873111, "grad_norm": 0.1467977911233902, "learning_rate": 0.00027199537037657205, "loss": 2.1551, "step": 474650 }, { "epoch": 1.8349028157906946, "grad_norm": 0.1470346301794052, "learning_rate": 0.00027187963382176416, "loss": 2.159, "step": 474660 }, { "epoch": 1.8349414729940778, "grad_norm": 0.14632827043533325, "learning_rate": 0.0002717639050176044, "loss": 2.1762, "step": 474670 }, { "epoch": 1.834980130197461, "grad_norm": 0.13886310160160065, "learning_rate": 0.000271648183962536, "loss": 2.1751, "step": 474680 }, { "epoch": 1.8350187874008443, "grad_norm": 0.14693546295166016, "learning_rate": 0.00027153247065500244, "loss": 2.1516, "step": 474690 }, { "epoch": 1.8350574446042276, "grad_norm": 0.14294764399528503, "learning_rate": 0.00027141676509344803, "loss": 2.1513, "step": 474700 }, { "epoch": 1.8350961018076108, "grad_norm": 0.15899953246116638, "learning_rate": 0.0002713010672763172, "loss": 2.1759, "step": 474710 }, { "epoch": 1.835134759010994, "grad_norm": 0.15766823291778564, "learning_rate": 0.00027118537720205536, "loss": 2.1411, "step": 474720 }, { "epoch": 1.8351734162143774, "grad_norm": 0.14663904905319214, "learning_rate": 0.0002710696948691078, "loss": 2.1547, "step": 474730 }, { "epoch": 1.8352120734177606, "grad_norm": 0.14257924258708954, "learning_rate": 0.0002709540202759211, "loss": 2.1337, "step": 474740 }, { "epoch": 1.8352507306211439, "grad_norm": 0.1540602296590805, "learning_rate": 0.0002708383534209418, "loss": 2.1465, "step": 474750 }, { "epoch": 1.835289387824527, "grad_norm": 0.14710083603858948, "learning_rate": 0.00027072269430261707, "loss": 2.1615, "step": 474760 }, { "epoch": 1.8353280450279104, "grad_norm": 0.1486217975616455, "learning_rate": 0.00027060704291939473, "loss": 2.1634, "step": 474770 }, { "epoch": 1.8353667022312938, "grad_norm": 0.14736615121364594, "learning_rate": 0.0002704913992697231, "loss": 2.1638, "step": 474780 }, { "epoch": 1.835405359434677, "grad_norm": 0.2241804301738739, "learning_rate": 0.00027037576335205116, "loss": 2.1621, "step": 474790 }, { "epoch": 1.8354440166380603, "grad_norm": 0.14994165301322937, "learning_rate": 0.0002702601351648277, "loss": 2.1686, "step": 474800 }, { "epoch": 1.8354826738414436, "grad_norm": 0.15455268323421478, "learning_rate": 0.00027014451470650294, "loss": 2.1466, "step": 474810 }, { "epoch": 1.8355213310448268, "grad_norm": 0.13080264627933502, "learning_rate": 0.0002700289019755273, "loss": 2.158, "step": 474820 }, { "epoch": 1.8355599882482103, "grad_norm": 0.1570408046245575, "learning_rate": 0.0002699132969703513, "loss": 2.1711, "step": 474830 }, { "epoch": 1.8355986454515936, "grad_norm": 0.14790542423725128, "learning_rate": 0.00026979769968942647, "loss": 2.166, "step": 474840 }, { "epoch": 1.8356373026549768, "grad_norm": 0.1418340802192688, "learning_rate": 0.0002696821101312048, "loss": 2.1674, "step": 474850 }, { "epoch": 1.83567595985836, "grad_norm": 0.14686687290668488, "learning_rate": 0.00026956652829413863, "loss": 2.1502, "step": 474860 }, { "epoch": 1.8357146170617433, "grad_norm": 0.14338235557079315, "learning_rate": 0.000269450954176681, "loss": 2.1498, "step": 474870 }, { "epoch": 1.8357532742651266, "grad_norm": 0.14893607795238495, "learning_rate": 0.00026933538777728505, "loss": 2.1666, "step": 474880 }, { "epoch": 1.8357919314685098, "grad_norm": 0.13746413588523865, "learning_rate": 0.00026921982909440523, "loss": 2.1737, "step": 474890 }, { "epoch": 1.835830588671893, "grad_norm": 0.16587093472480774, "learning_rate": 0.0002691042781264956, "loss": 2.1414, "step": 474900 }, { "epoch": 1.8358692458752763, "grad_norm": 0.16657157242298126, "learning_rate": 0.00026898873487201123, "loss": 2.149, "step": 474910 }, { "epoch": 1.8359079030786596, "grad_norm": 0.18725548684597015, "learning_rate": 0.000268873199329408, "loss": 2.1519, "step": 474920 }, { "epoch": 1.8359465602820428, "grad_norm": 0.14422450959682465, "learning_rate": 0.00026875767149714137, "loss": 2.1467, "step": 474930 }, { "epoch": 1.835985217485426, "grad_norm": 0.14225982129573822, "learning_rate": 0.0002686421513736683, "loss": 2.1649, "step": 474940 }, { "epoch": 1.8360238746888096, "grad_norm": 0.14490465819835663, "learning_rate": 0.00026852663895744544, "loss": 2.1558, "step": 474950 }, { "epoch": 1.8360625318921928, "grad_norm": 0.14130234718322754, "learning_rate": 0.0002684111342469309, "loss": 2.1651, "step": 474960 }, { "epoch": 1.836101189095576, "grad_norm": 0.14048299193382263, "learning_rate": 0.0002682956372405825, "loss": 2.1677, "step": 474970 }, { "epoch": 1.8361398462989593, "grad_norm": 0.14509908854961395, "learning_rate": 0.00026818014793685886, "loss": 2.1716, "step": 474980 }, { "epoch": 1.8361785035023426, "grad_norm": 0.14281538128852844, "learning_rate": 0.000268064666334219, "loss": 2.1586, "step": 474990 }, { "epoch": 1.836217160705726, "grad_norm": 0.14713968336582184, "learning_rate": 0.0002679491924311228, "loss": 2.1496, "step": 475000 }, { "epoch": 1.8362558179091093, "grad_norm": 0.14357225596904755, "learning_rate": 0.00026783372622603, "loss": 2.1569, "step": 475010 }, { "epoch": 1.8362944751124926, "grad_norm": 0.1509816199541092, "learning_rate": 0.0002677182677174017, "loss": 2.1676, "step": 475020 }, { "epoch": 1.8363331323158758, "grad_norm": 0.16111452877521515, "learning_rate": 0.0002676028169036986, "loss": 2.155, "step": 475030 }, { "epoch": 1.836371789519259, "grad_norm": 0.14163239300251007, "learning_rate": 0.00026748737378338294, "loss": 2.1634, "step": 475040 }, { "epoch": 1.8364104467226423, "grad_norm": 0.14450347423553467, "learning_rate": 0.00026737193835491647, "loss": 2.1466, "step": 475050 }, { "epoch": 1.8364491039260256, "grad_norm": 0.15021710097789764, "learning_rate": 0.0002672565106167617, "loss": 2.1522, "step": 475060 }, { "epoch": 1.8364877611294088, "grad_norm": 0.14771442115306854, "learning_rate": 0.00026714109056738257, "loss": 2.1503, "step": 475070 }, { "epoch": 1.836526418332792, "grad_norm": 0.1410464346408844, "learning_rate": 0.0002670256782052425, "loss": 2.1549, "step": 475080 }, { "epoch": 1.8365650755361753, "grad_norm": 0.17191557586193085, "learning_rate": 0.0002669102735288054, "loss": 2.1592, "step": 475090 }, { "epoch": 1.8366037327395586, "grad_norm": 0.15708066523075104, "learning_rate": 0.00026679487653653644, "loss": 2.1602, "step": 475100 }, { "epoch": 1.8366423899429418, "grad_norm": 0.15195932984352112, "learning_rate": 0.0002666794872269007, "loss": 2.1613, "step": 475110 }, { "epoch": 1.8366810471463253, "grad_norm": 0.1657049059867859, "learning_rate": 0.0002665641055983639, "loss": 2.147, "step": 475120 }, { "epoch": 1.8367197043497085, "grad_norm": 0.15128254890441895, "learning_rate": 0.0002664487316493924, "loss": 2.1517, "step": 475130 }, { "epoch": 1.8367583615530918, "grad_norm": 0.17023751139640808, "learning_rate": 0.0002663333653784532, "loss": 2.1521, "step": 475140 }, { "epoch": 1.836797018756475, "grad_norm": 0.14957919716835022, "learning_rate": 0.0002662180067840132, "loss": 2.1559, "step": 475150 }, { "epoch": 1.8368356759598583, "grad_norm": 0.1486700475215912, "learning_rate": 0.0002661026558645405, "loss": 2.1676, "step": 475160 }, { "epoch": 1.8368743331632418, "grad_norm": 0.1451827436685562, "learning_rate": 0.00026598731261850327, "loss": 2.1485, "step": 475170 }, { "epoch": 1.836912990366625, "grad_norm": 0.1410922110080719, "learning_rate": 0.0002658719770443705, "loss": 2.1614, "step": 475180 }, { "epoch": 1.8369516475700083, "grad_norm": 0.14330267906188965, "learning_rate": 0.0002657566491406116, "loss": 2.151, "step": 475190 }, { "epoch": 1.8369903047733915, "grad_norm": 0.15906070172786713, "learning_rate": 0.00026564132890569603, "loss": 2.1485, "step": 475200 }, { "epoch": 1.8370289619767748, "grad_norm": 0.2338830679655075, "learning_rate": 0.0002655260163380946, "loss": 2.153, "step": 475210 }, { "epoch": 1.837067619180158, "grad_norm": 0.1482182890176773, "learning_rate": 0.00026541071143627783, "loss": 2.1493, "step": 475220 }, { "epoch": 1.8371062763835413, "grad_norm": 0.14705368876457214, "learning_rate": 0.0002652954141987172, "loss": 2.1634, "step": 475230 }, { "epoch": 1.8371449335869245, "grad_norm": 0.15626183152198792, "learning_rate": 0.00026518012462388473, "loss": 2.1447, "step": 475240 }, { "epoch": 1.8371835907903078, "grad_norm": 0.15654116868972778, "learning_rate": 0.0002650648427102529, "loss": 2.1463, "step": 475250 }, { "epoch": 1.837222247993691, "grad_norm": 0.1517390012741089, "learning_rate": 0.00026494956845629414, "loss": 2.1582, "step": 475260 }, { "epoch": 1.8372609051970743, "grad_norm": 0.15306983888149261, "learning_rate": 0.00026483430186048217, "loss": 2.1513, "step": 475270 }, { "epoch": 1.8372995624004576, "grad_norm": 0.14637787640094757, "learning_rate": 0.0002647190429212911, "loss": 2.1519, "step": 475280 }, { "epoch": 1.837338219603841, "grad_norm": 0.15025612711906433, "learning_rate": 0.00026460379163719506, "loss": 2.1491, "step": 475290 }, { "epoch": 1.8373768768072243, "grad_norm": 0.14811120927333832, "learning_rate": 0.000264488548006669, "loss": 2.1278, "step": 475300 }, { "epoch": 1.8374155340106075, "grad_norm": 0.14240843057632446, "learning_rate": 0.0002643733120281886, "loss": 2.1479, "step": 475310 }, { "epoch": 1.8374541912139908, "grad_norm": 0.14492589235305786, "learning_rate": 0.00026425808370022933, "loss": 2.1516, "step": 475320 }, { "epoch": 1.8374928484173743, "grad_norm": 0.14916452765464783, "learning_rate": 0.0002641428630212681, "loss": 2.1487, "step": 475330 }, { "epoch": 1.8375315056207575, "grad_norm": 0.15178216993808746, "learning_rate": 0.0002640276499897818, "loss": 2.1499, "step": 475340 }, { "epoch": 1.8375701628241408, "grad_norm": 0.13903997838497162, "learning_rate": 0.00026391244460424756, "loss": 2.1368, "step": 475350 }, { "epoch": 1.837608820027524, "grad_norm": 0.1431676298379898, "learning_rate": 0.00026379724686314356, "loss": 2.1494, "step": 475360 }, { "epoch": 1.8376474772309073, "grad_norm": 0.1526685506105423, "learning_rate": 0.00026368205676494827, "loss": 2.1425, "step": 475370 }, { "epoch": 1.8376861344342905, "grad_norm": 0.146949902176857, "learning_rate": 0.00026356687430814077, "loss": 2.1652, "step": 475380 }, { "epoch": 1.8377247916376738, "grad_norm": 0.14898286759853363, "learning_rate": 0.0002634516994912004, "loss": 2.1543, "step": 475390 }, { "epoch": 1.837763448841057, "grad_norm": 0.1496376097202301, "learning_rate": 0.0002633365323126071, "loss": 2.1489, "step": 475400 }, { "epoch": 1.8378021060444403, "grad_norm": 0.15171100199222565, "learning_rate": 0.0002632213727708417, "loss": 2.1625, "step": 475410 }, { "epoch": 1.8378407632478235, "grad_norm": 0.14547541737556458, "learning_rate": 0.0002631062208643846, "loss": 2.1543, "step": 475420 }, { "epoch": 1.8378794204512068, "grad_norm": 0.14050796627998352, "learning_rate": 0.00026299107659171763, "loss": 2.1584, "step": 475430 }, { "epoch": 1.83791807765459, "grad_norm": 0.14410996437072754, "learning_rate": 0.00026287593995132297, "loss": 2.1416, "step": 475440 }, { "epoch": 1.8379567348579733, "grad_norm": 0.14802516996860504, "learning_rate": 0.0002627608109416828, "loss": 2.1649, "step": 475450 }, { "epoch": 1.8379953920613568, "grad_norm": 0.14655569195747375, "learning_rate": 0.0002626456895612801, "loss": 2.1495, "step": 475460 }, { "epoch": 1.83803404926474, "grad_norm": 0.14457468688488007, "learning_rate": 0.0002625305758085985, "loss": 2.1561, "step": 475470 }, { "epoch": 1.8380727064681233, "grad_norm": 0.15370580554008484, "learning_rate": 0.00026241546968212213, "loss": 2.1535, "step": 475480 }, { "epoch": 1.8381113636715065, "grad_norm": 0.15351314842700958, "learning_rate": 0.00026230037118033536, "loss": 2.146, "step": 475490 }, { "epoch": 1.83815002087489, "grad_norm": 0.14985795319080353, "learning_rate": 0.0002621852803017233, "loss": 2.1583, "step": 475500 }, { "epoch": 1.8381886780782732, "grad_norm": 0.1384783238172531, "learning_rate": 0.00026207019704477144, "loss": 2.145, "step": 475510 }, { "epoch": 1.8382273352816565, "grad_norm": 0.14273719489574432, "learning_rate": 0.0002619551214079654, "loss": 2.1598, "step": 475520 }, { "epoch": 1.8382659924850397, "grad_norm": 0.1650056093931198, "learning_rate": 0.0002618400533897924, "loss": 2.1493, "step": 475530 }, { "epoch": 1.838304649688423, "grad_norm": 0.14638406038284302, "learning_rate": 0.0002617249929887389, "loss": 2.1714, "step": 475540 }, { "epoch": 1.8383433068918063, "grad_norm": 0.16371501982212067, "learning_rate": 0.00026160994020329255, "loss": 2.1756, "step": 475550 }, { "epoch": 1.8383819640951895, "grad_norm": 0.1557331085205078, "learning_rate": 0.00026149489503194155, "loss": 2.1417, "step": 475560 }, { "epoch": 1.8384206212985728, "grad_norm": 0.14200273156166077, "learning_rate": 0.0002613798574731743, "loss": 2.1578, "step": 475570 }, { "epoch": 1.838459278501956, "grad_norm": 0.1394173502922058, "learning_rate": 0.00026126482752547966, "loss": 2.1599, "step": 475580 }, { "epoch": 1.8384979357053393, "grad_norm": 0.14956103265285492, "learning_rate": 0.0002611498051873473, "loss": 2.1487, "step": 475590 }, { "epoch": 1.8385365929087225, "grad_norm": 0.14384682476520538, "learning_rate": 0.0002610347904572674, "loss": 2.155, "step": 475600 }, { "epoch": 1.8385752501121058, "grad_norm": 0.14592944085597992, "learning_rate": 0.0002609197833337302, "loss": 2.1555, "step": 475610 }, { "epoch": 1.838613907315489, "grad_norm": 0.14544503390789032, "learning_rate": 0.00026080478381522676, "loss": 2.1542, "step": 475620 }, { "epoch": 1.8386525645188725, "grad_norm": 0.15256692469120026, "learning_rate": 0.00026068979190024886, "loss": 2.1349, "step": 475630 }, { "epoch": 1.8386912217222557, "grad_norm": 0.1543371081352234, "learning_rate": 0.000260574807587288, "loss": 2.1641, "step": 475640 }, { "epoch": 1.838729878925639, "grad_norm": 0.1660279631614685, "learning_rate": 0.00026045983087483713, "loss": 2.1526, "step": 475650 }, { "epoch": 1.8387685361290222, "grad_norm": 0.15747512876987457, "learning_rate": 0.00026034486176138927, "loss": 2.1638, "step": 475660 }, { "epoch": 1.8388071933324057, "grad_norm": 0.14996637403964996, "learning_rate": 0.00026022990024543755, "loss": 2.1445, "step": 475670 }, { "epoch": 1.838845850535789, "grad_norm": 0.16035395860671997, "learning_rate": 0.0002601149463254764, "loss": 2.1599, "step": 475680 }, { "epoch": 1.8388845077391722, "grad_norm": 0.1412738412618637, "learning_rate": 0.00026000000000000003, "loss": 2.1543, "step": 475690 }, { "epoch": 1.8389231649425555, "grad_norm": 0.14722250401973724, "learning_rate": 0.00025988506126750345, "loss": 2.1565, "step": 475700 }, { "epoch": 1.8389618221459387, "grad_norm": 0.14353296160697937, "learning_rate": 0.00025977013012648233, "loss": 2.1579, "step": 475710 }, { "epoch": 1.839000479349322, "grad_norm": 0.15804100036621094, "learning_rate": 0.0002596552065754327, "loss": 2.1548, "step": 475720 }, { "epoch": 1.8390391365527052, "grad_norm": 0.14293187856674194, "learning_rate": 0.0002595402906128508, "loss": 2.1488, "step": 475730 }, { "epoch": 1.8390777937560885, "grad_norm": 0.1571645587682724, "learning_rate": 0.00025942538223723365, "loss": 2.1635, "step": 475740 }, { "epoch": 1.8391164509594717, "grad_norm": 0.15808333456516266, "learning_rate": 0.00025931048144707904, "loss": 2.1539, "step": 475750 }, { "epoch": 1.839155108162855, "grad_norm": 0.14674799144268036, "learning_rate": 0.00025919558824088454, "loss": 2.1502, "step": 475760 }, { "epoch": 1.8391937653662382, "grad_norm": 0.14407147467136383, "learning_rate": 0.0002590807026171489, "loss": 2.1639, "step": 475770 }, { "epoch": 1.8392324225696215, "grad_norm": 0.14988110959529877, "learning_rate": 0.0002589658245743709, "loss": 2.1673, "step": 475780 }, { "epoch": 1.8392710797730047, "grad_norm": 0.15999698638916016, "learning_rate": 0.0002588509541110502, "loss": 2.1353, "step": 475790 }, { "epoch": 1.8393097369763882, "grad_norm": 0.15566125512123108, "learning_rate": 0.00025873609122568664, "loss": 2.1478, "step": 475800 }, { "epoch": 1.8393483941797715, "grad_norm": 0.14536809921264648, "learning_rate": 0.0002586212359167808, "loss": 2.1529, "step": 475810 }, { "epoch": 1.8393870513831547, "grad_norm": 0.15134699642658234, "learning_rate": 0.00025850638818283357, "loss": 2.1546, "step": 475820 }, { "epoch": 1.839425708586538, "grad_norm": 0.1482597142457962, "learning_rate": 0.0002583915480223462, "loss": 2.1701, "step": 475830 }, { "epoch": 1.8394643657899215, "grad_norm": 0.15543188154697418, "learning_rate": 0.0002582767154338208, "loss": 2.1482, "step": 475840 }, { "epoch": 1.8395030229933047, "grad_norm": 0.14815393090248108, "learning_rate": 0.00025816189041575965, "loss": 2.1483, "step": 475850 }, { "epoch": 1.839541680196688, "grad_norm": 0.1632230579853058, "learning_rate": 0.000258047072966666, "loss": 2.1672, "step": 475860 }, { "epoch": 1.8395803374000712, "grad_norm": 0.1543683409690857, "learning_rate": 0.0002579322630850429, "loss": 2.1607, "step": 475870 }, { "epoch": 1.8396189946034545, "grad_norm": 0.14933699369430542, "learning_rate": 0.0002578174607693946, "loss": 2.1641, "step": 475880 }, { "epoch": 1.8396576518068377, "grad_norm": 0.1431177854537964, "learning_rate": 0.0002577026660182251, "loss": 2.1481, "step": 475890 }, { "epoch": 1.839696309010221, "grad_norm": 0.152797669172287, "learning_rate": 0.00025758787883003987, "loss": 2.1494, "step": 475900 }, { "epoch": 1.8397349662136042, "grad_norm": 0.15314161777496338, "learning_rate": 0.0002574730992033438, "loss": 2.1549, "step": 475910 }, { "epoch": 1.8397736234169875, "grad_norm": 0.1449500471353531, "learning_rate": 0.0002573583271366429, "loss": 2.1419, "step": 475920 }, { "epoch": 1.8398122806203707, "grad_norm": 0.14397546648979187, "learning_rate": 0.0002572435626284435, "loss": 2.154, "step": 475930 }, { "epoch": 1.839850937823754, "grad_norm": 0.15783585608005524, "learning_rate": 0.00025712880567725275, "loss": 2.1552, "step": 475940 }, { "epoch": 1.8398895950271372, "grad_norm": 0.15068823099136353, "learning_rate": 0.0002570140562815775, "loss": 2.1508, "step": 475950 }, { "epoch": 1.8399282522305205, "grad_norm": 0.14194492995738983, "learning_rate": 0.00025689931443992586, "loss": 2.1682, "step": 475960 }, { "epoch": 1.839966909433904, "grad_norm": 0.14788579940795898, "learning_rate": 0.00025678458015080644, "loss": 2.14, "step": 475970 }, { "epoch": 1.8400055666372872, "grad_norm": 0.14433981478214264, "learning_rate": 0.00025666985341272786, "loss": 2.1504, "step": 475980 }, { "epoch": 1.8400442238406705, "grad_norm": 0.14616207778453827, "learning_rate": 0.0002565551342241992, "loss": 2.1583, "step": 475990 }, { "epoch": 1.8400828810440537, "grad_norm": 0.1440388709306717, "learning_rate": 0.0002564404225837307, "loss": 2.133, "step": 476000 }, { "epoch": 1.8401215382474372, "grad_norm": 0.14986035227775574, "learning_rate": 0.0002563257184898322, "loss": 2.1477, "step": 476010 }, { "epoch": 1.8401601954508204, "grad_norm": 0.14334015548229218, "learning_rate": 0.00025621102194101476, "loss": 2.1548, "step": 476020 }, { "epoch": 1.8401988526542037, "grad_norm": 0.18292531371116638, "learning_rate": 0.0002560963329357897, "loss": 2.1629, "step": 476030 }, { "epoch": 1.840237509857587, "grad_norm": 0.15646730363368988, "learning_rate": 0.00025598165147266874, "loss": 2.1462, "step": 476040 }, { "epoch": 1.8402761670609702, "grad_norm": 0.1612592488527298, "learning_rate": 0.0002558669775501641, "loss": 2.1633, "step": 476050 }, { "epoch": 1.8403148242643534, "grad_norm": 0.16252127289772034, "learning_rate": 0.00025575231116678834, "loss": 2.1448, "step": 476060 }, { "epoch": 1.8403534814677367, "grad_norm": 0.15415285527706146, "learning_rate": 0.00025563765232105505, "loss": 2.1516, "step": 476070 }, { "epoch": 1.84039213867112, "grad_norm": 0.1514085829257965, "learning_rate": 0.00025552300101147797, "loss": 2.1596, "step": 476080 }, { "epoch": 1.8404307958745032, "grad_norm": 0.14109759032726288, "learning_rate": 0.00025540835723657083, "loss": 2.1461, "step": 476090 }, { "epoch": 1.8404694530778865, "grad_norm": 0.13598160445690155, "learning_rate": 0.0002552937209948489, "loss": 2.153, "step": 476100 }, { "epoch": 1.8405081102812697, "grad_norm": 0.14688196778297424, "learning_rate": 0.00025517909228482715, "loss": 2.1528, "step": 476110 }, { "epoch": 1.840546767484653, "grad_norm": 0.14169596135616302, "learning_rate": 0.00025506447110502093, "loss": 2.1507, "step": 476120 }, { "epoch": 1.8405854246880362, "grad_norm": 0.16858428716659546, "learning_rate": 0.00025494985745394707, "loss": 2.1603, "step": 476130 }, { "epoch": 1.8406240818914197, "grad_norm": 0.146564781665802, "learning_rate": 0.0002548352513301215, "loss": 2.1703, "step": 476140 }, { "epoch": 1.840662739094803, "grad_norm": 0.15180619060993195, "learning_rate": 0.00025472065273206203, "loss": 2.1593, "step": 476150 }, { "epoch": 1.8407013962981862, "grad_norm": 0.15303488075733185, "learning_rate": 0.0002546060616582857, "loss": 2.1517, "step": 476160 }, { "epoch": 1.8407400535015694, "grad_norm": 0.15305596590042114, "learning_rate": 0.0002544914781073109, "loss": 2.1556, "step": 476170 }, { "epoch": 1.840778710704953, "grad_norm": 0.14316235482692719, "learning_rate": 0.0002543769020776563, "loss": 2.1535, "step": 476180 }, { "epoch": 1.8408173679083362, "grad_norm": 0.14286579191684723, "learning_rate": 0.0002542623335678409, "loss": 2.1468, "step": 476190 }, { "epoch": 1.8408560251117194, "grad_norm": 0.1483672559261322, "learning_rate": 0.00025414777257638435, "loss": 2.1534, "step": 476200 }, { "epoch": 1.8408946823151027, "grad_norm": 0.153128981590271, "learning_rate": 0.00025403321910180666, "loss": 2.1568, "step": 476210 }, { "epoch": 1.840933339518486, "grad_norm": 0.16412784159183502, "learning_rate": 0.0002539186731426282, "loss": 2.1495, "step": 476220 }, { "epoch": 1.8409719967218692, "grad_norm": 0.14767330884933472, "learning_rate": 0.0002538041346973703, "loss": 2.1457, "step": 476230 }, { "epoch": 1.8410106539252524, "grad_norm": 0.15422873198986053, "learning_rate": 0.00025368960376455417, "loss": 2.1722, "step": 476240 }, { "epoch": 1.8410493111286357, "grad_norm": 0.14106683433055878, "learning_rate": 0.0002535750803427019, "loss": 2.1421, "step": 476250 }, { "epoch": 1.841087968332019, "grad_norm": 0.1555287390947342, "learning_rate": 0.0002534605644303363, "loss": 2.1435, "step": 476260 }, { "epoch": 1.8411266255354022, "grad_norm": 0.1445557177066803, "learning_rate": 0.0002533460560259797, "loss": 2.1498, "step": 476270 }, { "epoch": 1.8411652827387854, "grad_norm": 0.1519489884376526, "learning_rate": 0.0002532315551281561, "loss": 2.1547, "step": 476280 }, { "epoch": 1.8412039399421687, "grad_norm": 0.1514035314321518, "learning_rate": 0.0002531170617353893, "loss": 2.1446, "step": 476290 }, { "epoch": 1.841242597145552, "grad_norm": 0.14062903821468353, "learning_rate": 0.00025300257584620357, "loss": 2.1401, "step": 476300 }, { "epoch": 1.8412812543489354, "grad_norm": 0.13704952597618103, "learning_rate": 0.0002528880974591239, "loss": 2.1568, "step": 476310 }, { "epoch": 1.8413199115523187, "grad_norm": 0.17359179258346558, "learning_rate": 0.00025277362657267566, "loss": 2.1533, "step": 476320 }, { "epoch": 1.841358568755702, "grad_norm": 0.20773059129714966, "learning_rate": 0.0002526591631853847, "loss": 2.1696, "step": 476330 }, { "epoch": 1.8413972259590852, "grad_norm": 0.15593963861465454, "learning_rate": 0.0002525447072957776, "loss": 2.1463, "step": 476340 }, { "epoch": 1.8414358831624686, "grad_norm": 0.13709397614002228, "learning_rate": 0.0002524302589023808, "loss": 2.1573, "step": 476350 }, { "epoch": 1.841474540365852, "grad_norm": 0.14867442846298218, "learning_rate": 0.00025231581800372197, "loss": 2.143, "step": 476360 }, { "epoch": 1.8415131975692351, "grad_norm": 0.15370690822601318, "learning_rate": 0.0002522013845983284, "loss": 2.1557, "step": 476370 }, { "epoch": 1.8415518547726184, "grad_norm": 0.15814514458179474, "learning_rate": 0.000252086958684729, "loss": 2.163, "step": 476380 }, { "epoch": 1.8415905119760017, "grad_norm": 0.15417690575122833, "learning_rate": 0.0002519725402614523, "loss": 2.1483, "step": 476390 }, { "epoch": 1.841629169179385, "grad_norm": 0.16330553591251373, "learning_rate": 0.0002518581293270274, "loss": 2.1412, "step": 476400 }, { "epoch": 1.8416678263827682, "grad_norm": 0.15288779139518738, "learning_rate": 0.0002517437258799842, "loss": 2.1471, "step": 476410 }, { "epoch": 1.8417064835861514, "grad_norm": 0.1717262715101242, "learning_rate": 0.0002516293299188528, "loss": 2.1599, "step": 476420 }, { "epoch": 1.8417451407895347, "grad_norm": 0.15698036551475525, "learning_rate": 0.0002515149414421638, "loss": 2.1335, "step": 476430 }, { "epoch": 1.841783797992918, "grad_norm": 0.16390347480773926, "learning_rate": 0.0002514005604484486, "loss": 2.1474, "step": 476440 }, { "epoch": 1.8418224551963012, "grad_norm": 0.1516304314136505, "learning_rate": 0.0002512861869362386, "loss": 2.146, "step": 476450 }, { "epoch": 1.8418611123996844, "grad_norm": 0.19226273894309998, "learning_rate": 0.0002511718209040661, "loss": 2.149, "step": 476460 }, { "epoch": 1.8418997696030677, "grad_norm": 0.1427430957555771, "learning_rate": 0.0002510574623504636, "loss": 2.1615, "step": 476470 }, { "epoch": 1.8419384268064511, "grad_norm": 0.159335657954216, "learning_rate": 0.00025094311127396416, "loss": 2.1554, "step": 476480 }, { "epoch": 1.8419770840098344, "grad_norm": 0.14797185361385345, "learning_rate": 0.0002508287676731016, "loss": 2.1598, "step": 476490 }, { "epoch": 1.8420157412132176, "grad_norm": 0.16486115753650665, "learning_rate": 0.00025071443154640985, "loss": 2.1591, "step": 476500 }, { "epoch": 1.842054398416601, "grad_norm": 0.14702408015727997, "learning_rate": 0.00025060010289242317, "loss": 2.1509, "step": 476510 }, { "epoch": 1.8420930556199844, "grad_norm": 0.16612829267978668, "learning_rate": 0.0002504857817096771, "loss": 2.1511, "step": 476520 }, { "epoch": 1.8421317128233676, "grad_norm": 0.15480519831180573, "learning_rate": 0.00025037146799670643, "loss": 2.1573, "step": 476530 }, { "epoch": 1.8421703700267509, "grad_norm": 0.15102191269397736, "learning_rate": 0.0002502571617520477, "loss": 2.1603, "step": 476540 }, { "epoch": 1.8422090272301341, "grad_norm": 0.15874643623828888, "learning_rate": 0.00025014286297423706, "loss": 2.1558, "step": 476550 }, { "epoch": 1.8422476844335174, "grad_norm": 0.13553157448768616, "learning_rate": 0.0002500285716618114, "loss": 2.1539, "step": 476560 }, { "epoch": 1.8422863416369006, "grad_norm": 0.822083592414856, "learning_rate": 0.0002499142878133083, "loss": 2.1482, "step": 476570 }, { "epoch": 1.842324998840284, "grad_norm": 0.16487015783786774, "learning_rate": 0.0002498000114272654, "loss": 2.1404, "step": 476580 }, { "epoch": 1.8423636560436671, "grad_norm": 0.15109044313430786, "learning_rate": 0.0002496857425022214, "loss": 2.1623, "step": 476590 }, { "epoch": 1.8424023132470504, "grad_norm": 0.15568329393863678, "learning_rate": 0.0002495714810367149, "loss": 2.1414, "step": 476600 }, { "epoch": 1.8424409704504336, "grad_norm": 0.14634571969509125, "learning_rate": 0.000249457227029285, "loss": 2.1592, "step": 476610 }, { "epoch": 1.842479627653817, "grad_norm": 0.15658384561538696, "learning_rate": 0.0002493429804784719, "loss": 2.1577, "step": 476620 }, { "epoch": 1.8425182848572002, "grad_norm": 0.14044654369354248, "learning_rate": 0.0002492287413828156, "loss": 2.147, "step": 476630 }, { "epoch": 1.8425569420605836, "grad_norm": 0.13307639956474304, "learning_rate": 0.00024911450974085693, "loss": 2.1446, "step": 476640 }, { "epoch": 1.8425955992639669, "grad_norm": 0.14166012406349182, "learning_rate": 0.0002490002855511371, "loss": 2.1486, "step": 476650 }, { "epoch": 1.8426342564673501, "grad_norm": 0.15263959765434265, "learning_rate": 0.00024888606881219745, "loss": 2.1615, "step": 476660 }, { "epoch": 1.8426729136707334, "grad_norm": 0.15352514386177063, "learning_rate": 0.0002487718595225805, "loss": 2.1607, "step": 476670 }, { "epoch": 1.8427115708741166, "grad_norm": 0.15273882448673248, "learning_rate": 0.000248657657680829, "loss": 2.1578, "step": 476680 }, { "epoch": 1.8427502280775, "grad_norm": 0.15000106394290924, "learning_rate": 0.0002485434632854859, "loss": 2.1488, "step": 476690 }, { "epoch": 1.8427888852808834, "grad_norm": 0.14709743857383728, "learning_rate": 0.0002484292763350946, "loss": 2.1539, "step": 476700 }, { "epoch": 1.8428275424842666, "grad_norm": 0.14307551085948944, "learning_rate": 0.0002483150968281995, "loss": 2.1502, "step": 476710 }, { "epoch": 1.8428661996876499, "grad_norm": 0.1431547999382019, "learning_rate": 0.000248200924763345, "loss": 2.1427, "step": 476720 }, { "epoch": 1.8429048568910331, "grad_norm": 0.1426902562379837, "learning_rate": 0.00024808676013907593, "loss": 2.1508, "step": 476730 }, { "epoch": 1.8429435140944164, "grad_norm": 0.1503293514251709, "learning_rate": 0.00024797260295393776, "loss": 2.1488, "step": 476740 }, { "epoch": 1.8429821712977996, "grad_norm": 0.15397608280181885, "learning_rate": 0.00024785845320647696, "loss": 2.1451, "step": 476750 }, { "epoch": 1.8430208285011829, "grad_norm": 0.14943160116672516, "learning_rate": 0.0002477443108952393, "loss": 2.1624, "step": 476760 }, { "epoch": 1.8430594857045661, "grad_norm": 0.14495742321014404, "learning_rate": 0.000247630176018772, "loss": 2.1507, "step": 476770 }, { "epoch": 1.8430981429079494, "grad_norm": 0.15321022272109985, "learning_rate": 0.0002475160485756225, "loss": 2.1644, "step": 476780 }, { "epoch": 1.8431368001113326, "grad_norm": 0.1492360681295395, "learning_rate": 0.0002474019285643385, "loss": 2.1533, "step": 476790 }, { "epoch": 1.8431754573147159, "grad_norm": 0.1437566727399826, "learning_rate": 0.0002472878159834684, "loss": 2.1566, "step": 476800 }, { "epoch": 1.8432141145180994, "grad_norm": 0.14607855677604675, "learning_rate": 0.00024717371083156104, "loss": 2.1383, "step": 476810 }, { "epoch": 1.8432527717214826, "grad_norm": 0.14503221213817596, "learning_rate": 0.0002470596131071656, "loss": 2.1485, "step": 476820 }, { "epoch": 1.8432914289248659, "grad_norm": 0.15728530287742615, "learning_rate": 0.00024694552280883175, "loss": 2.1448, "step": 476830 }, { "epoch": 1.8433300861282491, "grad_norm": 0.1535523235797882, "learning_rate": 0.0002468314399351099, "loss": 2.1411, "step": 476840 }, { "epoch": 1.8433687433316324, "grad_norm": 0.15958794951438904, "learning_rate": 0.00024671736448455064, "loss": 2.1587, "step": 476850 }, { "epoch": 1.8434074005350158, "grad_norm": 0.14848648011684418, "learning_rate": 0.0002466032964557052, "loss": 2.1426, "step": 476860 }, { "epoch": 1.843446057738399, "grad_norm": 0.15634022653102875, "learning_rate": 0.0002464892358471249, "loss": 2.1768, "step": 476870 }, { "epoch": 1.8434847149417823, "grad_norm": 0.14585278928279877, "learning_rate": 0.0002463751826573621, "loss": 2.1488, "step": 476880 }, { "epoch": 1.8435233721451656, "grad_norm": 0.1499786376953125, "learning_rate": 0.0002462611368849694, "loss": 2.156, "step": 476890 }, { "epoch": 1.8435620293485488, "grad_norm": 0.15097768604755402, "learning_rate": 0.0002461470985284997, "loss": 2.1508, "step": 476900 }, { "epoch": 1.843600686551932, "grad_norm": 0.1578410118818283, "learning_rate": 0.0002460330675865066, "loss": 2.1508, "step": 476910 }, { "epoch": 1.8436393437553154, "grad_norm": 0.14859865605831146, "learning_rate": 0.00024591904405754406, "loss": 2.1512, "step": 476920 }, { "epoch": 1.8436780009586986, "grad_norm": 0.1477453112602234, "learning_rate": 0.00024580502794016644, "loss": 2.1369, "step": 476930 }, { "epoch": 1.8437166581620819, "grad_norm": 0.15519174933433533, "learning_rate": 0.0002456910192329289, "loss": 2.1551, "step": 476940 }, { "epoch": 1.843755315365465, "grad_norm": 0.1488887071609497, "learning_rate": 0.0002455770179343866, "loss": 2.1478, "step": 476950 }, { "epoch": 1.8437939725688484, "grad_norm": 0.14880669116973877, "learning_rate": 0.0002454630240430953, "loss": 2.1417, "step": 476960 }, { "epoch": 1.8438326297722316, "grad_norm": 0.14693772792816162, "learning_rate": 0.0002453490375576115, "loss": 2.161, "step": 476970 }, { "epoch": 1.843871286975615, "grad_norm": 0.16454418003559113, "learning_rate": 0.0002452350584764922, "loss": 2.152, "step": 476980 }, { "epoch": 1.8439099441789983, "grad_norm": 0.17832502722740173, "learning_rate": 0.00024512108679829425, "loss": 2.1435, "step": 476990 }, { "epoch": 1.8439486013823816, "grad_norm": 0.14790251851081848, "learning_rate": 0.0002450071225215755, "loss": 2.1578, "step": 477000 }, { "epoch": 1.8439872585857648, "grad_norm": 0.142452210187912, "learning_rate": 0.0002448931656448945, "loss": 2.1419, "step": 477010 }, { "epoch": 1.844025915789148, "grad_norm": 0.14764165878295898, "learning_rate": 0.0002447792161668094, "loss": 2.1409, "step": 477020 }, { "epoch": 1.8440645729925316, "grad_norm": 0.14111246168613434, "learning_rate": 0.0002446652740858797, "loss": 2.1565, "step": 477030 }, { "epoch": 1.8441032301959148, "grad_norm": 0.16016855835914612, "learning_rate": 0.0002445513394006649, "loss": 2.1595, "step": 477040 }, { "epoch": 1.844141887399298, "grad_norm": 0.15253598988056183, "learning_rate": 0.00024443741210972484, "loss": 2.1467, "step": 477050 }, { "epoch": 1.8441805446026813, "grad_norm": 0.14610204100608826, "learning_rate": 0.00024432349221162043, "loss": 2.1543, "step": 477060 }, { "epoch": 1.8442192018060646, "grad_norm": 0.14825892448425293, "learning_rate": 0.00024420957970491244, "loss": 2.1495, "step": 477070 }, { "epoch": 1.8442578590094478, "grad_norm": 0.142495796084404, "learning_rate": 0.0002440956745881624, "loss": 2.1487, "step": 477080 }, { "epoch": 1.844296516212831, "grad_norm": 0.14968812465667725, "learning_rate": 0.0002439817768599324, "loss": 2.1572, "step": 477090 }, { "epoch": 1.8443351734162143, "grad_norm": 0.16449612379074097, "learning_rate": 0.00024386788651878467, "loss": 2.1353, "step": 477100 }, { "epoch": 1.8443738306195976, "grad_norm": 0.14865903556346893, "learning_rate": 0.00024375400356328215, "loss": 2.1476, "step": 477110 }, { "epoch": 1.8444124878229808, "grad_norm": 0.14438387751579285, "learning_rate": 0.0002436401279919882, "loss": 2.1591, "step": 477120 }, { "epoch": 1.844451145026364, "grad_norm": 0.13989832997322083, "learning_rate": 0.00024352625980346643, "loss": 2.1306, "step": 477130 }, { "epoch": 1.8444898022297473, "grad_norm": 0.1448381245136261, "learning_rate": 0.0002434123989962813, "loss": 2.1626, "step": 477140 }, { "epoch": 1.8445284594331308, "grad_norm": 0.14926588535308838, "learning_rate": 0.00024329854556899734, "loss": 2.1455, "step": 477150 }, { "epoch": 1.844567116636514, "grad_norm": 0.14922012388706207, "learning_rate": 0.00024318469952018008, "loss": 2.16, "step": 477160 }, { "epoch": 1.8446057738398973, "grad_norm": 0.1519775688648224, "learning_rate": 0.00024307086084839492, "loss": 2.1492, "step": 477170 }, { "epoch": 1.8446444310432806, "grad_norm": 0.1431160271167755, "learning_rate": 0.00024295702955220812, "loss": 2.1519, "step": 477180 }, { "epoch": 1.844683088246664, "grad_norm": 0.15909992158412933, "learning_rate": 0.00024284320563018613, "loss": 2.1432, "step": 477190 }, { "epoch": 1.8447217454500473, "grad_norm": 0.15186427533626556, "learning_rate": 0.00024272938908089637, "loss": 2.1466, "step": 477200 }, { "epoch": 1.8447604026534306, "grad_norm": 0.14806345105171204, "learning_rate": 0.00024261557990290573, "loss": 2.146, "step": 477210 }, { "epoch": 1.8447990598568138, "grad_norm": 0.15865075588226318, "learning_rate": 0.00024250177809478292, "loss": 2.1494, "step": 477220 }, { "epoch": 1.844837717060197, "grad_norm": 0.1485503762960434, "learning_rate": 0.00024238798365509574, "loss": 2.1387, "step": 477230 }, { "epoch": 1.8448763742635803, "grad_norm": 0.13728691637516022, "learning_rate": 0.00024227419658241357, "loss": 2.1608, "step": 477240 }, { "epoch": 1.8449150314669636, "grad_norm": 0.1566370725631714, "learning_rate": 0.00024216041687530554, "loss": 2.1623, "step": 477250 }, { "epoch": 1.8449536886703468, "grad_norm": 0.13936889171600342, "learning_rate": 0.0002420466445323415, "loss": 2.1483, "step": 477260 }, { "epoch": 1.84499234587373, "grad_norm": 0.15174809098243713, "learning_rate": 0.00024193287955209166, "loss": 2.1379, "step": 477270 }, { "epoch": 1.8450310030771133, "grad_norm": 0.1539214551448822, "learning_rate": 0.00024181912193312717, "loss": 2.1543, "step": 477280 }, { "epoch": 1.8450696602804966, "grad_norm": 0.14683173596858978, "learning_rate": 0.00024170537167401897, "loss": 2.1538, "step": 477290 }, { "epoch": 1.8451083174838798, "grad_norm": 0.1581607162952423, "learning_rate": 0.0002415916287733386, "loss": 2.1513, "step": 477300 }, { "epoch": 1.845146974687263, "grad_norm": 0.14941240847110748, "learning_rate": 0.00024147789322965862, "loss": 2.1475, "step": 477310 }, { "epoch": 1.8451856318906465, "grad_norm": 0.15306486189365387, "learning_rate": 0.00024136416504155124, "loss": 2.1584, "step": 477320 }, { "epoch": 1.8452242890940298, "grad_norm": 0.15010662376880646, "learning_rate": 0.0002412504442075898, "loss": 2.1582, "step": 477330 }, { "epoch": 1.845262946297413, "grad_norm": 0.8174676299095154, "learning_rate": 0.0002411367307263479, "loss": 2.1466, "step": 477340 }, { "epoch": 1.8453016035007963, "grad_norm": 0.1684281975030899, "learning_rate": 0.00024102302459639912, "loss": 2.1379, "step": 477350 }, { "epoch": 1.8453402607041798, "grad_norm": 0.15354377031326294, "learning_rate": 0.0002409093258163182, "loss": 2.1526, "step": 477360 }, { "epoch": 1.845378917907563, "grad_norm": 0.15894003212451935, "learning_rate": 0.00024079563438468, "loss": 2.1566, "step": 477370 }, { "epoch": 1.8454175751109463, "grad_norm": 0.14311885833740234, "learning_rate": 0.00024068195030006013, "loss": 2.1597, "step": 477380 }, { "epoch": 1.8454562323143295, "grad_norm": 0.13516804575920105, "learning_rate": 0.00024056827356103415, "loss": 2.1488, "step": 477390 }, { "epoch": 1.8454948895177128, "grad_norm": 0.15137454867362976, "learning_rate": 0.00024045460416617838, "loss": 2.1505, "step": 477400 }, { "epoch": 1.845533546721096, "grad_norm": 0.15255650877952576, "learning_rate": 0.00024034094211406964, "loss": 2.1423, "step": 477410 }, { "epoch": 1.8455722039244793, "grad_norm": 0.1438567042350769, "learning_rate": 0.00024022728740328515, "loss": 2.1501, "step": 477420 }, { "epoch": 1.8456108611278625, "grad_norm": 0.15280750393867493, "learning_rate": 0.00024011364003240267, "loss": 2.1642, "step": 477430 }, { "epoch": 1.8456495183312458, "grad_norm": 0.14523179829120636, "learning_rate": 0.00024, "loss": 2.1465, "step": 477440 }, { "epoch": 1.845688175534629, "grad_norm": 0.14758449792861938, "learning_rate": 0.00023988636730465606, "loss": 2.151, "step": 477450 }, { "epoch": 1.8457268327380123, "grad_norm": 0.13859793543815613, "learning_rate": 0.00023977274194495002, "loss": 2.1414, "step": 477460 }, { "epoch": 1.8457654899413956, "grad_norm": 0.15651504695415497, "learning_rate": 0.00023965912391946077, "loss": 2.1559, "step": 477470 }, { "epoch": 1.8458041471447788, "grad_norm": 0.14840462803840637, "learning_rate": 0.000239545513226769, "loss": 2.1443, "step": 477480 }, { "epoch": 1.8458428043481623, "grad_norm": 0.15093280375003815, "learning_rate": 0.0002394319098654547, "loss": 2.1402, "step": 477490 }, { "epoch": 1.8458814615515455, "grad_norm": 0.15339349210262299, "learning_rate": 0.0002393183138340991, "loss": 2.1493, "step": 477500 }, { "epoch": 1.8459201187549288, "grad_norm": 0.1480063647031784, "learning_rate": 0.00023920472513128322, "loss": 2.1406, "step": 477510 }, { "epoch": 1.845958775958312, "grad_norm": 0.1451997458934784, "learning_rate": 0.00023909114375558893, "loss": 2.1465, "step": 477520 }, { "epoch": 1.8459974331616955, "grad_norm": 0.15897716581821442, "learning_rate": 0.00023897756970559848, "loss": 2.1402, "step": 477530 }, { "epoch": 1.8460360903650788, "grad_norm": 0.15453803539276123, "learning_rate": 0.00023886400297989474, "loss": 2.1535, "step": 477540 }, { "epoch": 1.846074747568462, "grad_norm": 0.15898756682872772, "learning_rate": 0.00023875044357706088, "loss": 2.1513, "step": 477550 }, { "epoch": 1.8461134047718453, "grad_norm": 0.13958440721035004, "learning_rate": 0.00023863689149568025, "loss": 2.1457, "step": 477560 }, { "epoch": 1.8461520619752285, "grad_norm": 0.1507442146539688, "learning_rate": 0.0002385233467343373, "loss": 2.1382, "step": 477570 }, { "epoch": 1.8461907191786118, "grad_norm": 0.14807669818401337, "learning_rate": 0.00023840980929161625, "loss": 2.1518, "step": 477580 }, { "epoch": 1.846229376381995, "grad_norm": 0.1578161120414734, "learning_rate": 0.0002382962791661023, "loss": 2.1465, "step": 477590 }, { "epoch": 1.8462680335853783, "grad_norm": 0.13840286433696747, "learning_rate": 0.00023818275635638077, "loss": 2.1467, "step": 477600 }, { "epoch": 1.8463066907887615, "grad_norm": 0.14774125814437866, "learning_rate": 0.00023806924086103764, "loss": 2.1564, "step": 477610 }, { "epoch": 1.8463453479921448, "grad_norm": 0.14336572587490082, "learning_rate": 0.00023795573267865945, "loss": 2.1463, "step": 477620 }, { "epoch": 1.846384005195528, "grad_norm": 0.15975713729858398, "learning_rate": 0.00023784223180783282, "loss": 2.1378, "step": 477630 }, { "epoch": 1.8464226623989113, "grad_norm": 0.14392030239105225, "learning_rate": 0.0002377287382471449, "loss": 2.1564, "step": 477640 }, { "epoch": 1.8464613196022945, "grad_norm": 0.15305523574352264, "learning_rate": 0.00023761525199518375, "loss": 2.143, "step": 477650 }, { "epoch": 1.846499976805678, "grad_norm": 0.1496932953596115, "learning_rate": 0.00023750177305053734, "loss": 2.1506, "step": 477660 }, { "epoch": 1.8465386340090613, "grad_norm": 0.14829660952091217, "learning_rate": 0.00023738830141179434, "loss": 2.1441, "step": 477670 }, { "epoch": 1.8465772912124445, "grad_norm": 0.1704322248697281, "learning_rate": 0.00023727483707754372, "loss": 2.1422, "step": 477680 }, { "epoch": 1.8466159484158278, "grad_norm": 0.14436282217502594, "learning_rate": 0.00023716138004637523, "loss": 2.1489, "step": 477690 }, { "epoch": 1.8466546056192112, "grad_norm": 0.45129719376564026, "learning_rate": 0.00023704793031687887, "loss": 2.1324, "step": 477700 }, { "epoch": 1.8466932628225945, "grad_norm": 0.1636328250169754, "learning_rate": 0.00023693448788764517, "loss": 2.1626, "step": 477710 }, { "epoch": 1.8467319200259777, "grad_norm": 0.14438024163246155, "learning_rate": 0.00023682105275726474, "loss": 2.1558, "step": 477720 }, { "epoch": 1.846770577229361, "grad_norm": 0.1593109369277954, "learning_rate": 0.00023670762492432917, "loss": 2.1418, "step": 477730 }, { "epoch": 1.8468092344327443, "grad_norm": 0.14541137218475342, "learning_rate": 0.00023659420438743028, "loss": 2.1584, "step": 477740 }, { "epoch": 1.8468478916361275, "grad_norm": 0.1478029489517212, "learning_rate": 0.0002364807911451603, "loss": 2.1348, "step": 477750 }, { "epoch": 1.8468865488395108, "grad_norm": 0.15339234471321106, "learning_rate": 0.0002363673851961119, "loss": 2.1536, "step": 477760 }, { "epoch": 1.846925206042894, "grad_norm": 0.16422609984874725, "learning_rate": 0.0002362539865388782, "loss": 2.1419, "step": 477770 }, { "epoch": 1.8469638632462773, "grad_norm": 0.1466737985610962, "learning_rate": 0.00023614059517205278, "loss": 2.1402, "step": 477780 }, { "epoch": 1.8470025204496605, "grad_norm": 0.1492961049079895, "learning_rate": 0.00023602721109423008, "loss": 2.1619, "step": 477790 }, { "epoch": 1.8470411776530438, "grad_norm": 0.15063001215457916, "learning_rate": 0.00023591383430400438, "loss": 2.1351, "step": 477800 }, { "epoch": 1.847079834856427, "grad_norm": 0.1395963877439499, "learning_rate": 0.00023580046479997052, "loss": 2.1354, "step": 477810 }, { "epoch": 1.8471184920598103, "grad_norm": 0.15143616497516632, "learning_rate": 0.00023568710258072413, "loss": 2.1403, "step": 477820 }, { "epoch": 1.8471571492631937, "grad_norm": 0.15706034004688263, "learning_rate": 0.00023557374764486116, "loss": 2.136, "step": 477830 }, { "epoch": 1.847195806466577, "grad_norm": 0.14363554120063782, "learning_rate": 0.0002354603999909779, "loss": 2.1397, "step": 477840 }, { "epoch": 1.8472344636699602, "grad_norm": 0.13538502156734467, "learning_rate": 0.000235347059617671, "loss": 2.1375, "step": 477850 }, { "epoch": 1.8472731208733435, "grad_norm": 0.1521271914243698, "learning_rate": 0.0002352337265235376, "loss": 2.1448, "step": 477860 }, { "epoch": 1.847311778076727, "grad_norm": 0.1534687876701355, "learning_rate": 0.0002351204007071759, "loss": 2.1527, "step": 477870 }, { "epoch": 1.8473504352801102, "grad_norm": 0.14379604160785675, "learning_rate": 0.00023500708216718326, "loss": 2.1533, "step": 477880 }, { "epoch": 1.8473890924834935, "grad_norm": 0.1465234011411667, "learning_rate": 0.00023489377090215902, "loss": 2.1589, "step": 477890 }, { "epoch": 1.8474277496868767, "grad_norm": 0.13903623819351196, "learning_rate": 0.00023478046691070164, "loss": 2.1462, "step": 477900 }, { "epoch": 1.84746640689026, "grad_norm": 0.15117527544498444, "learning_rate": 0.00023466717019141116, "loss": 2.1504, "step": 477910 }, { "epoch": 1.8475050640936432, "grad_norm": 0.15345846116542816, "learning_rate": 0.00023455388074288707, "loss": 2.1339, "step": 477920 }, { "epoch": 1.8475437212970265, "grad_norm": 0.1577267050743103, "learning_rate": 0.00023444059856373011, "loss": 2.1497, "step": 477930 }, { "epoch": 1.8475823785004097, "grad_norm": 0.15517856180667877, "learning_rate": 0.00023432732365254072, "loss": 2.1436, "step": 477940 }, { "epoch": 1.847621035703793, "grad_norm": 0.14545536041259766, "learning_rate": 0.00023421405600792045, "loss": 2.1409, "step": 477950 }, { "epoch": 1.8476596929071762, "grad_norm": 0.14651986956596375, "learning_rate": 0.00023410079562847087, "loss": 2.1468, "step": 477960 }, { "epoch": 1.8476983501105595, "grad_norm": 0.16297149658203125, "learning_rate": 0.0002339875425127942, "loss": 2.1414, "step": 477970 }, { "epoch": 1.8477370073139427, "grad_norm": 0.1577177792787552, "learning_rate": 0.0002338742966594931, "loss": 2.1626, "step": 477980 }, { "epoch": 1.847775664517326, "grad_norm": 0.15041273832321167, "learning_rate": 0.00023376105806717073, "loss": 2.1421, "step": 477990 }, { "epoch": 1.8478143217207095, "grad_norm": 0.14626824855804443, "learning_rate": 0.00023364782673443064, "loss": 2.1448, "step": 478000 }, { "epoch": 1.8478529789240927, "grad_norm": 0.14885634183883667, "learning_rate": 0.00023353460265987657, "loss": 2.1398, "step": 478010 }, { "epoch": 1.847891636127476, "grad_norm": 0.1460237354040146, "learning_rate": 0.00023342138584211326, "loss": 2.1466, "step": 478020 }, { "epoch": 1.8479302933308592, "grad_norm": 0.15810991823673248, "learning_rate": 0.0002333081762797451, "loss": 2.1635, "step": 478030 }, { "epoch": 1.8479689505342427, "grad_norm": 0.15092025697231293, "learning_rate": 0.0002331949739713779, "loss": 2.1538, "step": 478040 }, { "epoch": 1.848007607737626, "grad_norm": 0.15626902878284454, "learning_rate": 0.00023308177891561723, "loss": 2.1489, "step": 478050 }, { "epoch": 1.8480462649410092, "grad_norm": 0.16233813762664795, "learning_rate": 0.00023296859111106928, "loss": 2.1525, "step": 478060 }, { "epoch": 1.8480849221443925, "grad_norm": 0.16185061633586884, "learning_rate": 0.00023285541055634052, "loss": 2.1612, "step": 478070 }, { "epoch": 1.8481235793477757, "grad_norm": 0.16262325644493103, "learning_rate": 0.0002327422372500383, "loss": 2.1439, "step": 478080 }, { "epoch": 1.848162236551159, "grad_norm": 0.1457241326570511, "learning_rate": 0.00023262907119077014, "loss": 2.1635, "step": 478090 }, { "epoch": 1.8482008937545422, "grad_norm": 0.14732667803764343, "learning_rate": 0.00023251591237714387, "loss": 2.1589, "step": 478100 }, { "epoch": 1.8482395509579255, "grad_norm": 0.1515752077102661, "learning_rate": 0.00023240276080776812, "loss": 2.1383, "step": 478110 }, { "epoch": 1.8482782081613087, "grad_norm": 0.1464495062828064, "learning_rate": 0.0002322896164812518, "loss": 2.1357, "step": 478120 }, { "epoch": 1.848316865364692, "grad_norm": 0.15452329814434052, "learning_rate": 0.000232176479396204, "loss": 2.1674, "step": 478130 }, { "epoch": 1.8483555225680752, "grad_norm": 0.15739069879055023, "learning_rate": 0.00023206334955123476, "loss": 2.1432, "step": 478140 }, { "epoch": 1.8483941797714585, "grad_norm": 0.15375475585460663, "learning_rate": 0.00023195022694495404, "loss": 2.1417, "step": 478150 }, { "epoch": 1.8484328369748417, "grad_norm": 0.1497449278831482, "learning_rate": 0.00023183711157597254, "loss": 2.1378, "step": 478160 }, { "epoch": 1.8484714941782252, "grad_norm": 0.14955320954322815, "learning_rate": 0.00023172400344290156, "loss": 2.1485, "step": 478170 }, { "epoch": 1.8485101513816085, "grad_norm": 0.15689685940742493, "learning_rate": 0.00023161090254435223, "loss": 2.1523, "step": 478180 }, { "epoch": 1.8485488085849917, "grad_norm": 0.16159173846244812, "learning_rate": 0.00023149780887893724, "loss": 2.1467, "step": 478190 }, { "epoch": 1.848587465788375, "grad_norm": 0.1486079841852188, "learning_rate": 0.00023138472244526854, "loss": 2.1461, "step": 478200 }, { "epoch": 1.8486261229917584, "grad_norm": 0.1565522402524948, "learning_rate": 0.00023127164324195903, "loss": 2.1311, "step": 478210 }, { "epoch": 1.8486647801951417, "grad_norm": 0.13985569775104523, "learning_rate": 0.00023115857126762207, "loss": 2.1518, "step": 478220 }, { "epoch": 1.848703437398525, "grad_norm": 0.14702102541923523, "learning_rate": 0.00023104550652087164, "loss": 2.1481, "step": 478230 }, { "epoch": 1.8487420946019082, "grad_norm": 0.14980337023735046, "learning_rate": 0.0002309324490003215, "loss": 2.1523, "step": 478240 }, { "epoch": 1.8487807518052914, "grad_norm": 0.1474117487668991, "learning_rate": 0.0002308193987045868, "loss": 2.146, "step": 478250 }, { "epoch": 1.8488194090086747, "grad_norm": 0.14778390526771545, "learning_rate": 0.00023070635563228237, "loss": 2.1527, "step": 478260 }, { "epoch": 1.848858066212058, "grad_norm": 0.14890675246715546, "learning_rate": 0.0002305933197820238, "loss": 2.1535, "step": 478270 }, { "epoch": 1.8488967234154412, "grad_norm": 0.14470405876636505, "learning_rate": 0.00023048029115242685, "loss": 2.1449, "step": 478280 }, { "epoch": 1.8489353806188245, "grad_norm": 0.15589329600334167, "learning_rate": 0.0002303672697421082, "loss": 2.1545, "step": 478290 }, { "epoch": 1.8489740378222077, "grad_norm": 0.15257033705711365, "learning_rate": 0.00023025425554968492, "loss": 2.1412, "step": 478300 }, { "epoch": 1.849012695025591, "grad_norm": 0.16834686696529388, "learning_rate": 0.00023014124857377395, "loss": 2.1404, "step": 478310 }, { "epoch": 1.8490513522289742, "grad_norm": 0.15387968719005585, "learning_rate": 0.000230028248812993, "loss": 2.1511, "step": 478320 }, { "epoch": 1.8490900094323575, "grad_norm": 0.1577366292476654, "learning_rate": 0.00022991525626596054, "loss": 2.1405, "step": 478330 }, { "epoch": 1.849128666635741, "grad_norm": 0.15258517861366272, "learning_rate": 0.000229802270931295, "loss": 2.1339, "step": 478340 }, { "epoch": 1.8491673238391242, "grad_norm": 0.16167719662189484, "learning_rate": 0.00022968929280761574, "loss": 2.1293, "step": 478350 }, { "epoch": 1.8492059810425074, "grad_norm": 0.17660821974277496, "learning_rate": 0.00022957632189354182, "loss": 2.1588, "step": 478360 }, { "epoch": 1.8492446382458907, "grad_norm": 0.15384353697299957, "learning_rate": 0.0002294633581876935, "loss": 2.1441, "step": 478370 }, { "epoch": 1.8492832954492742, "grad_norm": 0.15233395993709564, "learning_rate": 0.00022935040168869093, "loss": 2.1485, "step": 478380 }, { "epoch": 1.8493219526526574, "grad_norm": 0.15865083038806915, "learning_rate": 0.00022923745239515525, "loss": 2.1522, "step": 478390 }, { "epoch": 1.8493606098560407, "grad_norm": 0.14701934158802032, "learning_rate": 0.00022912451030570759, "loss": 2.1356, "step": 478400 }, { "epoch": 1.849399267059424, "grad_norm": 0.15493252873420715, "learning_rate": 0.00022901157541896965, "loss": 2.1524, "step": 478410 }, { "epoch": 1.8494379242628072, "grad_norm": 0.1526908129453659, "learning_rate": 0.0002288986477335635, "loss": 2.1404, "step": 478420 }, { "epoch": 1.8494765814661904, "grad_norm": 0.15958495438098907, "learning_rate": 0.00022878572724811198, "loss": 2.1437, "step": 478430 }, { "epoch": 1.8495152386695737, "grad_norm": 0.15282100439071655, "learning_rate": 0.00022867281396123773, "loss": 2.1558, "step": 478440 }, { "epoch": 1.849553895872957, "grad_norm": 0.19952651858329773, "learning_rate": 0.00022855990787156455, "loss": 2.1479, "step": 478450 }, { "epoch": 1.8495925530763402, "grad_norm": 0.15583156049251556, "learning_rate": 0.0002284470089777162, "loss": 2.1595, "step": 478460 }, { "epoch": 1.8496312102797234, "grad_norm": 0.14816786348819733, "learning_rate": 0.0002283341172783171, "loss": 2.1473, "step": 478470 }, { "epoch": 1.8496698674831067, "grad_norm": 0.14721480011940002, "learning_rate": 0.00022822123277199192, "loss": 2.1432, "step": 478480 }, { "epoch": 1.84970852468649, "grad_norm": 0.15829156339168549, "learning_rate": 0.00022810835545736597, "loss": 2.1543, "step": 478490 }, { "epoch": 1.8497471818898732, "grad_norm": 0.14759312570095062, "learning_rate": 0.000227995485333065, "loss": 2.1546, "step": 478500 }, { "epoch": 1.8497858390932567, "grad_norm": 0.14392654597759247, "learning_rate": 0.00022788262239771485, "loss": 2.1652, "step": 478510 }, { "epoch": 1.84982449629664, "grad_norm": 0.1517169177532196, "learning_rate": 0.00022776976664994232, "loss": 2.1496, "step": 478520 }, { "epoch": 1.8498631535000232, "grad_norm": 0.14544710516929626, "learning_rate": 0.00022765691808837409, "loss": 2.1537, "step": 478530 }, { "epoch": 1.8499018107034064, "grad_norm": 0.15497958660125732, "learning_rate": 0.0002275440767116379, "loss": 2.1317, "step": 478540 }, { "epoch": 1.84994046790679, "grad_norm": 0.15696972608566284, "learning_rate": 0.00022743124251836157, "loss": 2.1529, "step": 478550 }, { "epoch": 1.8499791251101732, "grad_norm": 0.1588466912508011, "learning_rate": 0.00022731841550717303, "loss": 2.1546, "step": 478560 }, { "epoch": 1.8500177823135564, "grad_norm": 0.15451645851135254, "learning_rate": 0.00022720559567670118, "loss": 2.1448, "step": 478570 }, { "epoch": 1.8500564395169397, "grad_norm": 0.15711510181427002, "learning_rate": 0.00022709278302557513, "loss": 2.1333, "step": 478580 }, { "epoch": 1.850095096720323, "grad_norm": 0.14721792936325073, "learning_rate": 0.00022697997755242484, "loss": 2.151, "step": 478590 }, { "epoch": 1.8501337539237062, "grad_norm": 0.1567068099975586, "learning_rate": 0.00022686717925587984, "loss": 2.1518, "step": 478600 }, { "epoch": 1.8501724111270894, "grad_norm": 0.14560212194919586, "learning_rate": 0.00022675438813457083, "loss": 2.1303, "step": 478610 }, { "epoch": 1.8502110683304727, "grad_norm": 0.15781927108764648, "learning_rate": 0.00022664160418712888, "loss": 2.1512, "step": 478620 }, { "epoch": 1.850249725533856, "grad_norm": 0.14911913871765137, "learning_rate": 0.00022652882741218484, "loss": 2.1596, "step": 478630 }, { "epoch": 1.8502883827372392, "grad_norm": 0.17982690036296844, "learning_rate": 0.00022641605780837095, "loss": 2.1453, "step": 478640 }, { "epoch": 1.8503270399406224, "grad_norm": 0.31263604760169983, "learning_rate": 0.00022630329537431915, "loss": 2.1547, "step": 478650 }, { "epoch": 1.8503656971440057, "grad_norm": 0.14549441635608673, "learning_rate": 0.00022619054010866214, "loss": 2.1579, "step": 478660 }, { "epoch": 1.8504043543473891, "grad_norm": 0.1487957090139389, "learning_rate": 0.00022607779201003298, "loss": 2.1394, "step": 478670 }, { "epoch": 1.8504430115507724, "grad_norm": 0.14924855530261993, "learning_rate": 0.00022596505107706522, "loss": 2.1412, "step": 478680 }, { "epoch": 1.8504816687541557, "grad_norm": 0.14954110980033875, "learning_rate": 0.0002258523173083926, "loss": 2.1458, "step": 478690 }, { "epoch": 1.850520325957539, "grad_norm": 0.16220064461231232, "learning_rate": 0.00022573959070265004, "loss": 2.1553, "step": 478700 }, { "epoch": 1.8505589831609222, "grad_norm": 0.14964133501052856, "learning_rate": 0.0002256268712584717, "loss": 2.149, "step": 478710 }, { "epoch": 1.8505976403643056, "grad_norm": 0.1590881198644638, "learning_rate": 0.00022551415897449336, "loss": 2.1485, "step": 478720 }, { "epoch": 1.8506362975676889, "grad_norm": 0.1411176323890686, "learning_rate": 0.0002254014538493503, "loss": 2.1484, "step": 478730 }, { "epoch": 1.8506749547710721, "grad_norm": 0.15405212342739105, "learning_rate": 0.00022528875588167873, "loss": 2.1365, "step": 478740 }, { "epoch": 1.8507136119744554, "grad_norm": 0.14678388833999634, "learning_rate": 0.00022517606507011512, "loss": 2.172, "step": 478750 }, { "epoch": 1.8507522691778386, "grad_norm": 0.14999248087406158, "learning_rate": 0.00022506338141329674, "loss": 2.1521, "step": 478760 }, { "epoch": 1.850790926381222, "grad_norm": 0.14902278780937195, "learning_rate": 0.00022495070490986092, "loss": 2.1409, "step": 478770 }, { "epoch": 1.8508295835846051, "grad_norm": 0.14862895011901855, "learning_rate": 0.00022483803555844518, "loss": 2.1455, "step": 478780 }, { "epoch": 1.8508682407879884, "grad_norm": 0.1663714349269867, "learning_rate": 0.00022472537335768795, "loss": 2.1408, "step": 478790 }, { "epoch": 1.8509068979913716, "grad_norm": 0.16073864698410034, "learning_rate": 0.0002246127183062283, "loss": 2.1484, "step": 478800 }, { "epoch": 1.850945555194755, "grad_norm": 0.1747768223285675, "learning_rate": 0.00022450007040270491, "loss": 2.1543, "step": 478810 }, { "epoch": 1.8509842123981382, "grad_norm": 0.1444845199584961, "learning_rate": 0.00022438742964575755, "loss": 2.1471, "step": 478820 }, { "epoch": 1.8510228696015214, "grad_norm": 0.14794333279132843, "learning_rate": 0.0002242747960340259, "loss": 2.1572, "step": 478830 }, { "epoch": 1.8510615268049049, "grad_norm": 0.16190718114376068, "learning_rate": 0.0002241621695661509, "loss": 2.1573, "step": 478840 }, { "epoch": 1.8511001840082881, "grad_norm": 0.17460119724273682, "learning_rate": 0.00022404955024077312, "loss": 2.1372, "step": 478850 }, { "epoch": 1.8511388412116714, "grad_norm": 0.1538202315568924, "learning_rate": 0.00022393693805653392, "loss": 2.1665, "step": 478860 }, { "epoch": 1.8511774984150546, "grad_norm": 0.14792491495609283, "learning_rate": 0.00022382433301207505, "loss": 2.1484, "step": 478870 }, { "epoch": 1.8512161556184379, "grad_norm": 0.14176231622695923, "learning_rate": 0.0002237117351060387, "loss": 2.1504, "step": 478880 }, { "epoch": 1.8512548128218214, "grad_norm": 0.166255384683609, "learning_rate": 0.00022359914433706706, "loss": 2.1418, "step": 478890 }, { "epoch": 1.8512934700252046, "grad_norm": 0.15698324143886566, "learning_rate": 0.00022348656070380368, "loss": 2.131, "step": 478900 }, { "epoch": 1.8513321272285879, "grad_norm": 0.15468591451644897, "learning_rate": 0.00022337398420489187, "loss": 2.1589, "step": 478910 }, { "epoch": 1.8513707844319711, "grad_norm": 0.14415162801742554, "learning_rate": 0.00022326141483897533, "loss": 2.1583, "step": 478920 }, { "epoch": 1.8514094416353544, "grad_norm": 0.1514313519001007, "learning_rate": 0.0002231488526046983, "loss": 2.1489, "step": 478930 }, { "epoch": 1.8514480988387376, "grad_norm": 0.14063210785388947, "learning_rate": 0.00022303629750070587, "loss": 2.1363, "step": 478940 }, { "epoch": 1.8514867560421209, "grad_norm": 0.1474284678697586, "learning_rate": 0.00022292374952564287, "loss": 2.1437, "step": 478950 }, { "epoch": 1.8515254132455041, "grad_norm": 0.15619979798793793, "learning_rate": 0.00022281120867815508, "loss": 2.1506, "step": 478960 }, { "epoch": 1.8515640704488874, "grad_norm": 0.14546486735343933, "learning_rate": 0.00022269867495688844, "loss": 2.1491, "step": 478970 }, { "epoch": 1.8516027276522706, "grad_norm": 0.15706144273281097, "learning_rate": 0.00022258614836048962, "loss": 2.1321, "step": 478980 }, { "epoch": 1.8516413848556539, "grad_norm": 0.1643100529909134, "learning_rate": 0.0002224736288876048, "loss": 2.1422, "step": 478990 }, { "epoch": 1.8516800420590371, "grad_norm": 0.15362891554832458, "learning_rate": 0.0002223611165368822, "loss": 2.1395, "step": 479000 }, { "epoch": 1.8517186992624206, "grad_norm": 0.13917003571987152, "learning_rate": 0.00022224861130696905, "loss": 2.17, "step": 479010 }, { "epoch": 1.8517573564658039, "grad_norm": 0.15380679070949554, "learning_rate": 0.0002221361131965136, "loss": 2.148, "step": 479020 }, { "epoch": 1.8517960136691871, "grad_norm": 0.15027354657649994, "learning_rate": 0.0002220236222041643, "loss": 2.1452, "step": 479030 }, { "epoch": 1.8518346708725704, "grad_norm": 0.1432102769613266, "learning_rate": 0.00022191113832857056, "loss": 2.142, "step": 479040 }, { "epoch": 1.8518733280759538, "grad_norm": 0.1524096429347992, "learning_rate": 0.00022179866156838134, "loss": 2.1489, "step": 479050 }, { "epoch": 1.851911985279337, "grad_norm": 0.15654858946800232, "learning_rate": 0.0002216861919222468, "loss": 2.1513, "step": 479060 }, { "epoch": 1.8519506424827203, "grad_norm": 0.14918921887874603, "learning_rate": 0.00022157372938881714, "loss": 2.1352, "step": 479070 }, { "epoch": 1.8519892996861036, "grad_norm": 0.1569262593984604, "learning_rate": 0.0002214612739667432, "loss": 2.1396, "step": 479080 }, { "epoch": 1.8520279568894868, "grad_norm": 0.15429821610450745, "learning_rate": 0.00022134882565467607, "loss": 2.1583, "step": 479090 }, { "epoch": 1.85206661409287, "grad_norm": 0.15140283107757568, "learning_rate": 0.00022123638445126727, "loss": 2.1419, "step": 479100 }, { "epoch": 1.8521052712962534, "grad_norm": 0.15845070779323578, "learning_rate": 0.00022112395035516874, "loss": 2.1437, "step": 479110 }, { "epoch": 1.8521439284996366, "grad_norm": 0.15516333281993866, "learning_rate": 0.00022101152336503316, "loss": 2.1388, "step": 479120 }, { "epoch": 1.8521825857030199, "grad_norm": 0.14900943636894226, "learning_rate": 0.00022089910347951314, "loss": 2.1492, "step": 479130 }, { "epoch": 1.852221242906403, "grad_norm": 0.15713250637054443, "learning_rate": 0.0002207866906972622, "loss": 2.153, "step": 479140 }, { "epoch": 1.8522599001097864, "grad_norm": 0.15991628170013428, "learning_rate": 0.00022067428501693366, "loss": 2.1466, "step": 479150 }, { "epoch": 1.8522985573131696, "grad_norm": 0.14617495238780975, "learning_rate": 0.00022056188643718213, "loss": 2.1565, "step": 479160 }, { "epoch": 1.8523372145165529, "grad_norm": 0.14892008900642395, "learning_rate": 0.0002204494949566618, "loss": 2.128, "step": 479170 }, { "epoch": 1.8523758717199363, "grad_norm": 0.15686427056789398, "learning_rate": 0.00022033711057402794, "loss": 2.1391, "step": 479180 }, { "epoch": 1.8524145289233196, "grad_norm": 0.1545068472623825, "learning_rate": 0.00022022473328793546, "loss": 2.1442, "step": 479190 }, { "epoch": 1.8524531861267028, "grad_norm": 0.14783914387226105, "learning_rate": 0.00022011236309704075, "loss": 2.1486, "step": 479200 }, { "epoch": 1.852491843330086, "grad_norm": 0.15659219026565552, "learning_rate": 0.00021999999999999998, "loss": 2.14, "step": 479210 }, { "epoch": 1.8525305005334696, "grad_norm": 0.14546005427837372, "learning_rate": 0.00021988764399546957, "loss": 2.1278, "step": 479220 }, { "epoch": 1.8525691577368528, "grad_norm": 0.15138238668441772, "learning_rate": 0.0002197752950821068, "loss": 2.1388, "step": 479230 }, { "epoch": 1.852607814940236, "grad_norm": 0.14894860982894897, "learning_rate": 0.000219662953258569, "loss": 2.1373, "step": 479240 }, { "epoch": 1.8526464721436193, "grad_norm": 0.1465761661529541, "learning_rate": 0.00021955061852351455, "loss": 2.1502, "step": 479250 }, { "epoch": 1.8526851293470026, "grad_norm": 0.14742115139961243, "learning_rate": 0.0002194382908756012, "loss": 2.1348, "step": 479260 }, { "epoch": 1.8527237865503858, "grad_norm": 0.1882600635290146, "learning_rate": 0.00021932597031348844, "loss": 2.1405, "step": 479270 }, { "epoch": 1.852762443753769, "grad_norm": 0.1500731259584427, "learning_rate": 0.00021921365683583494, "loss": 2.1383, "step": 479280 }, { "epoch": 1.8528011009571523, "grad_norm": 0.14603668451309204, "learning_rate": 0.0002191013504413004, "loss": 2.1354, "step": 479290 }, { "epoch": 1.8528397581605356, "grad_norm": 0.14398865401744843, "learning_rate": 0.00021898905112854506, "loss": 2.1507, "step": 479300 }, { "epoch": 1.8528784153639188, "grad_norm": 0.28502047061920166, "learning_rate": 0.00021887675889622948, "loss": 2.1256, "step": 479310 }, { "epoch": 1.852917072567302, "grad_norm": 0.1565953642129898, "learning_rate": 0.00021876447374301455, "loss": 2.1344, "step": 479320 }, { "epoch": 1.8529557297706853, "grad_norm": 0.15478111803531647, "learning_rate": 0.00021865219566756134, "loss": 2.1355, "step": 479330 }, { "epoch": 1.8529943869740686, "grad_norm": 0.15464521944522858, "learning_rate": 0.000218539924668532, "loss": 2.1406, "step": 479340 }, { "epoch": 1.853033044177452, "grad_norm": 0.15871332585811615, "learning_rate": 0.00021842766074458832, "loss": 2.1531, "step": 479350 }, { "epoch": 1.8530717013808353, "grad_norm": 0.14946506917476654, "learning_rate": 0.0002183154038943931, "loss": 2.1345, "step": 479360 }, { "epoch": 1.8531103585842186, "grad_norm": 0.1543097198009491, "learning_rate": 0.0002182031541166092, "loss": 2.1536, "step": 479370 }, { "epoch": 1.8531490157876018, "grad_norm": 0.15099318325519562, "learning_rate": 0.00021809091140990034, "loss": 2.1269, "step": 479380 }, { "epoch": 1.8531876729909853, "grad_norm": 0.15252339839935303, "learning_rate": 0.00021797867577293007, "loss": 2.1417, "step": 479390 }, { "epoch": 1.8532263301943686, "grad_norm": 0.16234450042247772, "learning_rate": 0.00021786644720436278, "loss": 2.1313, "step": 479400 }, { "epoch": 1.8532649873977518, "grad_norm": 0.1449269950389862, "learning_rate": 0.0002177542257028633, "loss": 2.1406, "step": 479410 }, { "epoch": 1.853303644601135, "grad_norm": 0.15894246101379395, "learning_rate": 0.0002176420112670967, "loss": 2.1507, "step": 479420 }, { "epoch": 1.8533423018045183, "grad_norm": 0.15004245936870575, "learning_rate": 0.00021752980389572852, "loss": 2.1223, "step": 479430 }, { "epoch": 1.8533809590079016, "grad_norm": 0.17616984248161316, "learning_rate": 0.0002174176035874247, "loss": 2.1495, "step": 479440 }, { "epoch": 1.8534196162112848, "grad_norm": 0.14092867076396942, "learning_rate": 0.00021730541034085138, "loss": 2.1402, "step": 479450 }, { "epoch": 1.853458273414668, "grad_norm": 0.15265598893165588, "learning_rate": 0.0002171932241546759, "loss": 2.1377, "step": 479460 }, { "epoch": 1.8534969306180513, "grad_norm": 0.1434614360332489, "learning_rate": 0.00021708104502756486, "loss": 2.1522, "step": 479470 }, { "epoch": 1.8535355878214346, "grad_norm": 0.14725279808044434, "learning_rate": 0.00021696887295818645, "loss": 2.1534, "step": 479480 }, { "epoch": 1.8535742450248178, "grad_norm": 0.15050674974918365, "learning_rate": 0.0002168567079452084, "loss": 2.1455, "step": 479490 }, { "epoch": 1.853612902228201, "grad_norm": 0.14720620214939117, "learning_rate": 0.00021674454998729908, "loss": 2.1453, "step": 479500 }, { "epoch": 1.8536515594315843, "grad_norm": 0.1538514643907547, "learning_rate": 0.00021663239908312781, "loss": 2.1589, "step": 479510 }, { "epoch": 1.8536902166349678, "grad_norm": 0.15909264981746674, "learning_rate": 0.00021652025523136365, "loss": 2.1354, "step": 479520 }, { "epoch": 1.853728873838351, "grad_norm": 0.15823258459568024, "learning_rate": 0.0002164081184306763, "loss": 2.148, "step": 479530 }, { "epoch": 1.8537675310417343, "grad_norm": 0.15360794961452484, "learning_rate": 0.0002162959886797362, "loss": 2.1573, "step": 479540 }, { "epoch": 1.8538061882451176, "grad_norm": 0.15805944800376892, "learning_rate": 0.00021618386597721352, "loss": 2.149, "step": 479550 }, { "epoch": 1.853844845448501, "grad_norm": 0.164720356464386, "learning_rate": 0.0002160717503217793, "loss": 2.1375, "step": 479560 }, { "epoch": 1.8538835026518843, "grad_norm": 0.15219347178936005, "learning_rate": 0.00021595964171210524, "loss": 2.1444, "step": 479570 }, { "epoch": 1.8539221598552675, "grad_norm": 0.15331251919269562, "learning_rate": 0.00021584754014686292, "loss": 2.1411, "step": 479580 }, { "epoch": 1.8539608170586508, "grad_norm": 0.15167184174060822, "learning_rate": 0.00021573544562472448, "loss": 2.1376, "step": 479590 }, { "epoch": 1.853999474262034, "grad_norm": 0.16140606999397278, "learning_rate": 0.00021562335814436295, "loss": 2.122, "step": 479600 }, { "epoch": 1.8540381314654173, "grad_norm": 0.15602107346057892, "learning_rate": 0.00021551127770445122, "loss": 2.1376, "step": 479610 }, { "epoch": 1.8540767886688005, "grad_norm": 0.15653519332408905, "learning_rate": 0.0002153992043036628, "loss": 2.1518, "step": 479620 }, { "epoch": 1.8541154458721838, "grad_norm": 0.15672744810581207, "learning_rate": 0.00021528713794067135, "loss": 2.1379, "step": 479630 }, { "epoch": 1.854154103075567, "grad_norm": 0.15469038486480713, "learning_rate": 0.00021517507861415152, "loss": 2.146, "step": 479640 }, { "epoch": 1.8541927602789503, "grad_norm": 0.15438590943813324, "learning_rate": 0.00021506302632277795, "loss": 2.1518, "step": 479650 }, { "epoch": 1.8542314174823336, "grad_norm": 0.1710808128118515, "learning_rate": 0.00021495098106522571, "loss": 2.1442, "step": 479660 }, { "epoch": 1.8542700746857168, "grad_norm": 0.14917579293251038, "learning_rate": 0.00021483894284017046, "loss": 2.1487, "step": 479670 }, { "epoch": 1.8543087318891, "grad_norm": 0.9882127046585083, "learning_rate": 0.0002147269116462882, "loss": 2.1286, "step": 479680 }, { "epoch": 1.8543473890924835, "grad_norm": 0.17200686037540436, "learning_rate": 0.00021461488748225532, "loss": 2.1462, "step": 479690 }, { "epoch": 1.8543860462958668, "grad_norm": 0.15341956913471222, "learning_rate": 0.00021450287034674843, "loss": 2.1443, "step": 479700 }, { "epoch": 1.85442470349925, "grad_norm": 0.15414680540561676, "learning_rate": 0.000214390860238445, "loss": 2.1424, "step": 479710 }, { "epoch": 1.8544633607026333, "grad_norm": 0.15845142304897308, "learning_rate": 0.0002142788571560228, "loss": 2.1278, "step": 479720 }, { "epoch": 1.8545020179060168, "grad_norm": 0.14842304587364197, "learning_rate": 0.00021416686109815953, "loss": 2.1378, "step": 479730 }, { "epoch": 1.8545406751094, "grad_norm": 0.1431240290403366, "learning_rate": 0.00021405487206353402, "loss": 2.129, "step": 479740 }, { "epoch": 1.8545793323127833, "grad_norm": 0.1469450742006302, "learning_rate": 0.0002139428900508249, "loss": 2.1358, "step": 479750 }, { "epoch": 1.8546179895161665, "grad_norm": 0.1524389535188675, "learning_rate": 0.00021383091505871145, "loss": 2.1401, "step": 479760 }, { "epoch": 1.8546566467195498, "grad_norm": 0.14780215919017792, "learning_rate": 0.0002137189470858736, "loss": 2.1581, "step": 479770 }, { "epoch": 1.854695303922933, "grad_norm": 0.16302737593650818, "learning_rate": 0.0002136069861309915, "loss": 2.1486, "step": 479780 }, { "epoch": 1.8547339611263163, "grad_norm": 0.15350155532360077, "learning_rate": 0.00021349503219274536, "loss": 2.151, "step": 479790 }, { "epoch": 1.8547726183296995, "grad_norm": 0.14787808060646057, "learning_rate": 0.00021338308526981642, "loss": 2.1374, "step": 479800 }, { "epoch": 1.8548112755330828, "grad_norm": 0.1476603001356125, "learning_rate": 0.000213271145360886, "loss": 2.1433, "step": 479810 }, { "epoch": 1.854849932736466, "grad_norm": 0.1481381356716156, "learning_rate": 0.00021315921246463598, "loss": 2.1312, "step": 479820 }, { "epoch": 1.8548885899398493, "grad_norm": 0.1528703272342682, "learning_rate": 0.00021304728657974836, "loss": 2.1444, "step": 479830 }, { "epoch": 1.8549272471432325, "grad_norm": 0.14263570308685303, "learning_rate": 0.00021293536770490595, "loss": 2.1385, "step": 479840 }, { "epoch": 1.8549659043466158, "grad_norm": 0.15099699795246124, "learning_rate": 0.00021282345583879158, "loss": 2.1437, "step": 479850 }, { "epoch": 1.8550045615499993, "grad_norm": 0.1449032425880432, "learning_rate": 0.00021271155098008876, "loss": 2.1295, "step": 479860 }, { "epoch": 1.8550432187533825, "grad_norm": 0.15079474449157715, "learning_rate": 0.0002125996531274814, "loss": 2.1551, "step": 479870 }, { "epoch": 1.8550818759567658, "grad_norm": 0.16255615651607513, "learning_rate": 0.0002124877622796537, "loss": 2.1425, "step": 479880 }, { "epoch": 1.855120533160149, "grad_norm": 0.16530774533748627, "learning_rate": 0.00021237587843529027, "loss": 2.142, "step": 479890 }, { "epoch": 1.8551591903635325, "grad_norm": 0.15740294754505157, "learning_rate": 0.00021226400159307635, "loss": 2.1331, "step": 479900 }, { "epoch": 1.8551978475669157, "grad_norm": 0.1708087921142578, "learning_rate": 0.00021215213175169745, "loss": 2.1379, "step": 479910 }, { "epoch": 1.855236504770299, "grad_norm": 0.16706566512584686, "learning_rate": 0.00021204026890983928, "loss": 2.1447, "step": 479920 }, { "epoch": 1.8552751619736823, "grad_norm": 0.14972372353076935, "learning_rate": 0.00021192841306618828, "loss": 2.1391, "step": 479930 }, { "epoch": 1.8553138191770655, "grad_norm": 0.14616578817367554, "learning_rate": 0.0002118165642194312, "loss": 2.1469, "step": 479940 }, { "epoch": 1.8553524763804488, "grad_norm": 0.15004681050777435, "learning_rate": 0.0002117047223682549, "loss": 2.1505, "step": 479950 }, { "epoch": 1.855391133583832, "grad_norm": 0.15200355648994446, "learning_rate": 0.00021159288751134754, "loss": 2.1349, "step": 479960 }, { "epoch": 1.8554297907872153, "grad_norm": 0.15411987900733948, "learning_rate": 0.0002114810596473964, "loss": 2.1361, "step": 479970 }, { "epoch": 1.8554684479905985, "grad_norm": 0.1675427407026291, "learning_rate": 0.0002113692387750903, "loss": 2.1356, "step": 479980 }, { "epoch": 1.8555071051939818, "grad_norm": 0.15278521180152893, "learning_rate": 0.00021125742489311783, "loss": 2.1384, "step": 479990 }, { "epoch": 1.855545762397365, "grad_norm": 0.14866431057453156, "learning_rate": 0.0002111456180001683, "loss": 2.1452, "step": 480000 }, { "epoch": 1.8555844196007483, "grad_norm": 0.15397635102272034, "learning_rate": 0.00021103381809493116, "loss": 2.1495, "step": 480010 }, { "epoch": 1.8556230768041315, "grad_norm": 0.15433016419410706, "learning_rate": 0.00021092202517609637, "loss": 2.1346, "step": 480020 }, { "epoch": 1.855661734007515, "grad_norm": 0.1563122272491455, "learning_rate": 0.00021081023924235476, "loss": 2.1481, "step": 480030 }, { "epoch": 1.8557003912108982, "grad_norm": 0.1498311460018158, "learning_rate": 0.0002106984602923967, "loss": 2.1397, "step": 480040 }, { "epoch": 1.8557390484142815, "grad_norm": 0.15273213386535645, "learning_rate": 0.0002105866883249137, "loss": 2.1441, "step": 480050 }, { "epoch": 1.8557777056176648, "grad_norm": 0.1529931277036667, "learning_rate": 0.0002104749233385972, "loss": 2.1401, "step": 480060 }, { "epoch": 1.8558163628210482, "grad_norm": 0.155166894197464, "learning_rate": 0.00021036316533213918, "loss": 2.1409, "step": 480070 }, { "epoch": 1.8558550200244315, "grad_norm": 0.14800570905208588, "learning_rate": 0.00021025141430423266, "loss": 2.1492, "step": 480080 }, { "epoch": 1.8558936772278147, "grad_norm": 0.14278216660022736, "learning_rate": 0.00021013967025356982, "loss": 2.1379, "step": 480090 }, { "epoch": 1.855932334431198, "grad_norm": 0.1603323519229889, "learning_rate": 0.00021002793317884418, "loss": 2.1262, "step": 480100 }, { "epoch": 1.8559709916345812, "grad_norm": 0.16060198843479156, "learning_rate": 0.00020991620307874958, "loss": 2.1357, "step": 480110 }, { "epoch": 1.8560096488379645, "grad_norm": 0.14317700266838074, "learning_rate": 0.00020980447995198027, "loss": 2.1536, "step": 480120 }, { "epoch": 1.8560483060413477, "grad_norm": 0.22249913215637207, "learning_rate": 0.00020969276379723034, "loss": 2.1612, "step": 480130 }, { "epoch": 1.856086963244731, "grad_norm": 0.16858729720115662, "learning_rate": 0.00020958105461319466, "loss": 2.1437, "step": 480140 }, { "epoch": 1.8561256204481142, "grad_norm": 0.14766691625118256, "learning_rate": 0.0002094693523985689, "loss": 2.145, "step": 480150 }, { "epoch": 1.8561642776514975, "grad_norm": 0.15140976011753082, "learning_rate": 0.0002093576571520488, "loss": 2.1551, "step": 480160 }, { "epoch": 1.8562029348548807, "grad_norm": 0.1595422774553299, "learning_rate": 0.00020924596887233004, "loss": 2.139, "step": 480170 }, { "epoch": 1.856241592058264, "grad_norm": 0.14984270930290222, "learning_rate": 0.0002091342875581095, "loss": 2.1372, "step": 480180 }, { "epoch": 1.8562802492616473, "grad_norm": 0.1510619968175888, "learning_rate": 0.00020902261320808414, "loss": 2.1542, "step": 480190 }, { "epoch": 1.8563189064650307, "grad_norm": 0.1541844755411148, "learning_rate": 0.00020891094582095105, "loss": 2.1303, "step": 480200 }, { "epoch": 1.856357563668414, "grad_norm": 0.16086439788341522, "learning_rate": 0.00020879928539540814, "loss": 2.1366, "step": 480210 }, { "epoch": 1.8563962208717972, "grad_norm": 0.14684398472309113, "learning_rate": 0.00020868763193015384, "loss": 2.1436, "step": 480220 }, { "epoch": 1.8564348780751805, "grad_norm": 0.15152642130851746, "learning_rate": 0.00020857598542388622, "loss": 2.1465, "step": 480230 }, { "epoch": 1.856473535278564, "grad_norm": 0.156929150223732, "learning_rate": 0.0002084643458753046, "loss": 2.1514, "step": 480240 }, { "epoch": 1.8565121924819472, "grad_norm": 0.1541062295436859, "learning_rate": 0.00020835271328310822, "loss": 2.1311, "step": 480250 }, { "epoch": 1.8565508496853305, "grad_norm": 0.17336539924144745, "learning_rate": 0.00020824108764599703, "loss": 2.1542, "step": 480260 }, { "epoch": 1.8565895068887137, "grad_norm": 0.16037580370903015, "learning_rate": 0.00020812946896267093, "loss": 2.158, "step": 480270 }, { "epoch": 1.856628164092097, "grad_norm": 0.15069545805454254, "learning_rate": 0.00020801785723183053, "loss": 2.1451, "step": 480280 }, { "epoch": 1.8566668212954802, "grad_norm": 0.147901713848114, "learning_rate": 0.00020790625245217708, "loss": 2.1389, "step": 480290 }, { "epoch": 1.8567054784988635, "grad_norm": 0.14777696132659912, "learning_rate": 0.00020779465462241188, "loss": 2.1386, "step": 480300 }, { "epoch": 1.8567441357022467, "grad_norm": 0.1483861654996872, "learning_rate": 0.00020768306374123658, "loss": 2.127, "step": 480310 }, { "epoch": 1.85678279290563, "grad_norm": 0.1482858806848526, "learning_rate": 0.00020757147980735358, "loss": 2.1259, "step": 480320 }, { "epoch": 1.8568214501090132, "grad_norm": 0.14977093040943146, "learning_rate": 0.00020745990281946546, "loss": 2.1416, "step": 480330 }, { "epoch": 1.8568601073123965, "grad_norm": 0.1569179892539978, "learning_rate": 0.0002073483327762753, "loss": 2.1518, "step": 480340 }, { "epoch": 1.8568987645157797, "grad_norm": 0.15524955093860626, "learning_rate": 0.0002072367696764863, "loss": 2.1476, "step": 480350 }, { "epoch": 1.856937421719163, "grad_norm": 0.14991699159145355, "learning_rate": 0.00020712521351880242, "loss": 2.1576, "step": 480360 }, { "epoch": 1.8569760789225465, "grad_norm": 0.13605119287967682, "learning_rate": 0.00020701366430192782, "loss": 2.1511, "step": 480370 }, { "epoch": 1.8570147361259297, "grad_norm": 0.14542028307914734, "learning_rate": 0.00020690212202456725, "loss": 2.1367, "step": 480380 }, { "epoch": 1.857053393329313, "grad_norm": 0.15281639993190765, "learning_rate": 0.0002067905866854256, "loss": 2.1535, "step": 480390 }, { "epoch": 1.8570920505326962, "grad_norm": 0.15086878836154938, "learning_rate": 0.00020667905828320832, "loss": 2.1308, "step": 480400 }, { "epoch": 1.8571307077360797, "grad_norm": 0.15849731862545013, "learning_rate": 0.00020656753681662154, "loss": 2.1441, "step": 480410 }, { "epoch": 1.857169364939463, "grad_norm": 0.17104901373386383, "learning_rate": 0.00020645602228437122, "loss": 2.1438, "step": 480420 }, { "epoch": 1.8572080221428462, "grad_norm": 0.14817239344120026, "learning_rate": 0.00020634451468516392, "loss": 2.1279, "step": 480430 }, { "epoch": 1.8572466793462294, "grad_norm": 0.1596451699733734, "learning_rate": 0.0002062330140177069, "loss": 2.1289, "step": 480440 }, { "epoch": 1.8572853365496127, "grad_norm": 0.16045700013637543, "learning_rate": 0.00020612152028070762, "loss": 2.1445, "step": 480450 }, { "epoch": 1.857323993752996, "grad_norm": 0.1484665721654892, "learning_rate": 0.0002060100334728736, "loss": 2.1343, "step": 480460 }, { "epoch": 1.8573626509563792, "grad_norm": 0.15261681377887726, "learning_rate": 0.00020589855359291344, "loss": 2.1466, "step": 480470 }, { "epoch": 1.8574013081597625, "grad_norm": 0.14725467562675476, "learning_rate": 0.00020578708063953566, "loss": 2.1402, "step": 480480 }, { "epoch": 1.8574399653631457, "grad_norm": 0.14944353699684143, "learning_rate": 0.00020567561461144935, "loss": 2.145, "step": 480490 }, { "epoch": 1.857478622566529, "grad_norm": 0.15037965774536133, "learning_rate": 0.00020556415550736397, "loss": 2.1352, "step": 480500 }, { "epoch": 1.8575172797699122, "grad_norm": 0.1716088354587555, "learning_rate": 0.0002054527033259892, "loss": 2.1406, "step": 480510 }, { "epoch": 1.8575559369732955, "grad_norm": 0.1456490159034729, "learning_rate": 0.00020534125806603542, "loss": 2.1293, "step": 480520 }, { "epoch": 1.857594594176679, "grad_norm": 0.1489306390285492, "learning_rate": 0.00020522981972621347, "loss": 2.1434, "step": 480530 }, { "epoch": 1.8576332513800622, "grad_norm": 0.1532936990261078, "learning_rate": 0.00020511838830523411, "loss": 2.1411, "step": 480540 }, { "epoch": 1.8576719085834454, "grad_norm": 0.14928655326366425, "learning_rate": 0.00020500696380180904, "loss": 2.1499, "step": 480550 }, { "epoch": 1.8577105657868287, "grad_norm": 0.1661611646413803, "learning_rate": 0.00020489554621465, "loss": 2.1282, "step": 480560 }, { "epoch": 1.857749222990212, "grad_norm": 0.1443454474210739, "learning_rate": 0.0002047841355424691, "loss": 2.1462, "step": 480570 }, { "epoch": 1.8577878801935954, "grad_norm": 0.15595643222332, "learning_rate": 0.00020467273178397915, "loss": 2.1332, "step": 480580 }, { "epoch": 1.8578265373969787, "grad_norm": 0.15329024195671082, "learning_rate": 0.0002045613349378932, "loss": 2.1271, "step": 480590 }, { "epoch": 1.857865194600362, "grad_norm": 0.1506737768650055, "learning_rate": 0.00020444994500292467, "loss": 2.1498, "step": 480600 }, { "epoch": 1.8579038518037452, "grad_norm": 0.15003903210163116, "learning_rate": 0.00020433856197778756, "loss": 2.1247, "step": 480610 }, { "epoch": 1.8579425090071284, "grad_norm": 0.15266703069210052, "learning_rate": 0.00020422718586119592, "loss": 2.1401, "step": 480620 }, { "epoch": 1.8579811662105117, "grad_norm": 0.1718612164258957, "learning_rate": 0.0002041158166518644, "loss": 2.1377, "step": 480630 }, { "epoch": 1.858019823413895, "grad_norm": 0.15592265129089355, "learning_rate": 0.00020400445434850823, "loss": 2.127, "step": 480640 }, { "epoch": 1.8580584806172782, "grad_norm": 0.1493699699640274, "learning_rate": 0.00020389309894984264, "loss": 2.1341, "step": 480650 }, { "epoch": 1.8580971378206614, "grad_norm": 0.17006298899650574, "learning_rate": 0.0002037817504545836, "loss": 2.1658, "step": 480660 }, { "epoch": 1.8581357950240447, "grad_norm": 0.1559170037508011, "learning_rate": 0.0002036704088614474, "loss": 2.14, "step": 480670 }, { "epoch": 1.858174452227428, "grad_norm": 0.2897215485572815, "learning_rate": 0.00020355907416915042, "loss": 2.1544, "step": 480680 }, { "epoch": 1.8582131094308112, "grad_norm": 0.15754903852939606, "learning_rate": 0.00020344774637641016, "loss": 2.1396, "step": 480690 }, { "epoch": 1.8582517666341947, "grad_norm": 0.16809256374835968, "learning_rate": 0.0002033364254819434, "loss": 2.1651, "step": 480700 }, { "epoch": 1.858290423837578, "grad_norm": 0.16307581961154938, "learning_rate": 0.00020322511148446854, "loss": 2.1433, "step": 480710 }, { "epoch": 1.8583290810409612, "grad_norm": 0.16375359892845154, "learning_rate": 0.00020311380438270388, "loss": 2.143, "step": 480720 }, { "epoch": 1.8583677382443444, "grad_norm": 0.14777526259422302, "learning_rate": 0.00020300250417536758, "loss": 2.1462, "step": 480730 }, { "epoch": 1.8584063954477277, "grad_norm": 0.15680406987667084, "learning_rate": 0.0002028912108611789, "loss": 2.1271, "step": 480740 }, { "epoch": 1.8584450526511112, "grad_norm": 0.1567578762769699, "learning_rate": 0.00020277992443885708, "loss": 2.1343, "step": 480750 }, { "epoch": 1.8584837098544944, "grad_norm": 0.15893380343914032, "learning_rate": 0.00020266864490712223, "loss": 2.1406, "step": 480760 }, { "epoch": 1.8585223670578777, "grad_norm": 0.14979642629623413, "learning_rate": 0.0002025573722646945, "loss": 2.1323, "step": 480770 }, { "epoch": 1.858561024261261, "grad_norm": 0.15911221504211426, "learning_rate": 0.0002024461065102945, "loss": 2.1474, "step": 480780 }, { "epoch": 1.8585996814646442, "grad_norm": 0.1472899615764618, "learning_rate": 0.00020233484764264298, "loss": 2.1277, "step": 480790 }, { "epoch": 1.8586383386680274, "grad_norm": 0.5327991843223572, "learning_rate": 0.00020222359566046146, "loss": 2.1463, "step": 480800 }, { "epoch": 1.8586769958714107, "grad_norm": 0.15671391785144806, "learning_rate": 0.0002021123505624718, "loss": 2.1622, "step": 480810 }, { "epoch": 1.858715653074794, "grad_norm": 0.15824978053569794, "learning_rate": 0.00020200111234739638, "loss": 2.1395, "step": 480820 }, { "epoch": 1.8587543102781772, "grad_norm": 0.15751320123672485, "learning_rate": 0.00020188988101395755, "loss": 2.1479, "step": 480830 }, { "epoch": 1.8587929674815604, "grad_norm": 0.14931145310401917, "learning_rate": 0.00020177865656087813, "loss": 2.1417, "step": 480840 }, { "epoch": 1.8588316246849437, "grad_norm": 0.15421828627586365, "learning_rate": 0.000201667438986882, "loss": 2.1507, "step": 480850 }, { "epoch": 1.858870281888327, "grad_norm": 0.14969541132450104, "learning_rate": 0.00020155622829069243, "loss": 2.1333, "step": 480860 }, { "epoch": 1.8589089390917104, "grad_norm": 0.1477525681257248, "learning_rate": 0.00020144502447103397, "loss": 2.1481, "step": 480870 }, { "epoch": 1.8589475962950937, "grad_norm": 0.17129698395729065, "learning_rate": 0.00020133382752663076, "loss": 2.1463, "step": 480880 }, { "epoch": 1.858986253498477, "grad_norm": 0.15016396343708038, "learning_rate": 0.00020122263745620827, "loss": 2.1285, "step": 480890 }, { "epoch": 1.8590249107018602, "grad_norm": 0.1594838947057724, "learning_rate": 0.0002011114542584913, "loss": 2.1379, "step": 480900 }, { "epoch": 1.8590635679052434, "grad_norm": 0.16034048795700073, "learning_rate": 0.00020100027793220598, "loss": 2.1364, "step": 480910 }, { "epoch": 1.8591022251086269, "grad_norm": 0.15773534774780273, "learning_rate": 0.00020088910847607843, "loss": 2.125, "step": 480920 }, { "epoch": 1.8591408823120101, "grad_norm": 0.15313485264778137, "learning_rate": 0.00020077794588883502, "loss": 2.133, "step": 480930 }, { "epoch": 1.8591795395153934, "grad_norm": 0.1616487205028534, "learning_rate": 0.00020066679016920275, "loss": 2.1419, "step": 480940 }, { "epoch": 1.8592181967187766, "grad_norm": 0.22435157001018524, "learning_rate": 0.00020055564131590887, "loss": 2.1246, "step": 480950 }, { "epoch": 1.85925685392216, "grad_norm": 0.15276563167572021, "learning_rate": 0.00020044449932768126, "loss": 2.138, "step": 480960 }, { "epoch": 1.8592955111255431, "grad_norm": 0.14432063698768616, "learning_rate": 0.00020033336420324788, "loss": 2.14, "step": 480970 }, { "epoch": 1.8593341683289264, "grad_norm": 0.14203238487243652, "learning_rate": 0.00020022223594133725, "loss": 2.1249, "step": 480980 }, { "epoch": 1.8593728255323096, "grad_norm": 0.15544167160987854, "learning_rate": 0.0002001111145406782, "loss": 2.1445, "step": 480990 }, { "epoch": 1.859411482735693, "grad_norm": 0.16453050076961517, "learning_rate": 0.00019999999999999996, "loss": 2.1366, "step": 481000 }, { "epoch": 1.8594501399390762, "grad_norm": 0.15054762363433838, "learning_rate": 0.00019988889231803243, "loss": 2.1366, "step": 481010 }, { "epoch": 1.8594887971424594, "grad_norm": 0.15232422947883606, "learning_rate": 0.00019977779149350571, "loss": 2.1546, "step": 481020 }, { "epoch": 1.8595274543458427, "grad_norm": 0.15228308737277985, "learning_rate": 0.00019966669752514998, "loss": 2.1237, "step": 481030 }, { "epoch": 1.8595661115492261, "grad_norm": 0.16331073641777039, "learning_rate": 0.00019955561041169623, "loss": 2.1459, "step": 481040 }, { "epoch": 1.8596047687526094, "grad_norm": 0.16497546434402466, "learning_rate": 0.0001994445301518757, "loss": 2.1263, "step": 481050 }, { "epoch": 1.8596434259559926, "grad_norm": 0.1411234438419342, "learning_rate": 0.00019933345674441982, "loss": 2.1437, "step": 481060 }, { "epoch": 1.8596820831593759, "grad_norm": 0.15147987008094788, "learning_rate": 0.000199222390188061, "loss": 2.1351, "step": 481070 }, { "epoch": 1.8597207403627594, "grad_norm": 0.16037048399448395, "learning_rate": 0.00019911133048153152, "loss": 2.1301, "step": 481080 }, { "epoch": 1.8597593975661426, "grad_norm": 0.15522290766239166, "learning_rate": 0.000199000277623564, "loss": 2.1439, "step": 481090 }, { "epoch": 1.8597980547695259, "grad_norm": 0.14487279951572418, "learning_rate": 0.00019888923161289163, "loss": 2.1494, "step": 481100 }, { "epoch": 1.8598367119729091, "grad_norm": 0.14845119416713715, "learning_rate": 0.00019877819244824813, "loss": 2.1363, "step": 481110 }, { "epoch": 1.8598753691762924, "grad_norm": 0.16259387135505676, "learning_rate": 0.0001986671601283676, "loss": 2.1332, "step": 481120 }, { "epoch": 1.8599140263796756, "grad_norm": 0.16065792739391327, "learning_rate": 0.00019855613465198418, "loss": 2.1547, "step": 481130 }, { "epoch": 1.8599526835830589, "grad_norm": 0.1511327475309372, "learning_rate": 0.00019844511601783266, "loss": 2.1412, "step": 481140 }, { "epoch": 1.8599913407864421, "grad_norm": 0.14757372438907623, "learning_rate": 0.00019833410422464849, "loss": 2.1191, "step": 481150 }, { "epoch": 1.8600299979898254, "grad_norm": 0.14953935146331787, "learning_rate": 0.00019822309927116667, "loss": 2.1367, "step": 481160 }, { "epoch": 1.8600686551932086, "grad_norm": 0.14788267016410828, "learning_rate": 0.00019811210115612356, "loss": 2.146, "step": 481170 }, { "epoch": 1.8601073123965919, "grad_norm": 0.14757679402828217, "learning_rate": 0.00019800110987825526, "loss": 2.1438, "step": 481180 }, { "epoch": 1.8601459695999751, "grad_norm": 0.1558070331811905, "learning_rate": 0.00019789012543629857, "loss": 2.1433, "step": 481190 }, { "epoch": 1.8601846268033584, "grad_norm": 0.16387951374053955, "learning_rate": 0.00019777914782899032, "loss": 2.1311, "step": 481200 }, { "epoch": 1.8602232840067419, "grad_norm": 0.26343652606010437, "learning_rate": 0.00019766817705506835, "loss": 2.147, "step": 481210 }, { "epoch": 1.8602619412101251, "grad_norm": 0.17362500727176666, "learning_rate": 0.00019755721311327058, "loss": 2.1419, "step": 481220 }, { "epoch": 1.8603005984135084, "grad_norm": 0.16361446678638458, "learning_rate": 0.0001974462560023349, "loss": 2.1311, "step": 481230 }, { "epoch": 1.8603392556168916, "grad_norm": 0.14905600249767303, "learning_rate": 0.00019733530572100012, "loss": 2.1439, "step": 481240 }, { "epoch": 1.860377912820275, "grad_norm": 0.15537752211093903, "learning_rate": 0.00019722436226800544, "loss": 2.1553, "step": 481250 }, { "epoch": 1.8604165700236583, "grad_norm": 0.16758646070957184, "learning_rate": 0.0001971134256420899, "loss": 2.135, "step": 481260 }, { "epoch": 1.8604552272270416, "grad_norm": 0.15464277565479279, "learning_rate": 0.00019700249584199382, "loss": 2.1376, "step": 481270 }, { "epoch": 1.8604938844304248, "grad_norm": 0.1499345600605011, "learning_rate": 0.00019689157286645686, "loss": 2.1457, "step": 481280 }, { "epoch": 1.860532541633808, "grad_norm": 0.15644638240337372, "learning_rate": 0.00019678065671422008, "loss": 2.1401, "step": 481290 }, { "epoch": 1.8605711988371914, "grad_norm": 0.15277767181396484, "learning_rate": 0.00019666974738402398, "loss": 2.142, "step": 481300 }, { "epoch": 1.8606098560405746, "grad_norm": 0.14947441220283508, "learning_rate": 0.0001965588448746103, "loss": 2.1266, "step": 481310 }, { "epoch": 1.8606485132439579, "grad_norm": 0.14855043590068817, "learning_rate": 0.00019644794918472063, "loss": 2.1375, "step": 481320 }, { "epoch": 1.860687170447341, "grad_norm": 0.1475961059331894, "learning_rate": 0.00019633706031309717, "loss": 2.14, "step": 481330 }, { "epoch": 1.8607258276507244, "grad_norm": 0.15531527996063232, "learning_rate": 0.00019622617825848244, "loss": 2.14, "step": 481340 }, { "epoch": 1.8607644848541076, "grad_norm": 0.15467391908168793, "learning_rate": 0.0001961153030196192, "loss": 2.1358, "step": 481350 }, { "epoch": 1.8608031420574909, "grad_norm": 0.15794637799263, "learning_rate": 0.00019600443459525096, "loss": 2.1215, "step": 481360 }, { "epoch": 1.8608417992608741, "grad_norm": 0.15984663367271423, "learning_rate": 0.00019589357298412135, "loss": 2.1246, "step": 481370 }, { "epoch": 1.8608804564642576, "grad_norm": 0.15682539343833923, "learning_rate": 0.00019578271818497428, "loss": 2.1351, "step": 481380 }, { "epoch": 1.8609191136676408, "grad_norm": 0.16392043232917786, "learning_rate": 0.0001956718701965543, "loss": 2.1516, "step": 481390 }, { "epoch": 1.860957770871024, "grad_norm": 0.16104425489902496, "learning_rate": 0.00019556102901760643, "loss": 2.1396, "step": 481400 }, { "epoch": 1.8609964280744073, "grad_norm": 0.17536848783493042, "learning_rate": 0.00019545019464687542, "loss": 2.1361, "step": 481410 }, { "epoch": 1.8610350852777908, "grad_norm": 0.16459685564041138, "learning_rate": 0.00019533936708310718, "loss": 2.1469, "step": 481420 }, { "epoch": 1.861073742481174, "grad_norm": 0.15408708155155182, "learning_rate": 0.00019522854632504782, "loss": 2.1438, "step": 481430 }, { "epoch": 1.8611123996845573, "grad_norm": 0.16289669275283813, "learning_rate": 0.0001951177323714435, "loss": 2.1419, "step": 481440 }, { "epoch": 1.8611510568879406, "grad_norm": 0.15478117763996124, "learning_rate": 0.00019500692522104112, "loss": 2.132, "step": 481450 }, { "epoch": 1.8611897140913238, "grad_norm": 0.14389732480049133, "learning_rate": 0.0001948961248725878, "loss": 2.1376, "step": 481460 }, { "epoch": 1.861228371294707, "grad_norm": 0.17540700733661652, "learning_rate": 0.00019478533132483112, "loss": 2.1362, "step": 481470 }, { "epoch": 1.8612670284980903, "grad_norm": 0.14640533924102783, "learning_rate": 0.0001946745445765188, "loss": 2.1416, "step": 481480 }, { "epoch": 1.8613056857014736, "grad_norm": 0.15137052536010742, "learning_rate": 0.00019456376462639936, "loss": 2.141, "step": 481490 }, { "epoch": 1.8613443429048568, "grad_norm": 0.14634105563163757, "learning_rate": 0.00019445299147322117, "loss": 2.1271, "step": 481500 }, { "epoch": 1.86138300010824, "grad_norm": 0.17296701669692993, "learning_rate": 0.00019434222511573363, "loss": 2.1336, "step": 481510 }, { "epoch": 1.8614216573116233, "grad_norm": 0.1500747799873352, "learning_rate": 0.000194231465552686, "loss": 2.1351, "step": 481520 }, { "epoch": 1.8614603145150066, "grad_norm": 0.15063370764255524, "learning_rate": 0.00019412071278282838, "loss": 2.1338, "step": 481530 }, { "epoch": 1.8614989717183898, "grad_norm": 0.15135610103607178, "learning_rate": 0.00019400996680491068, "loss": 2.1433, "step": 481540 }, { "epoch": 1.8615376289217733, "grad_norm": 0.15572667121887207, "learning_rate": 0.00019389922761768363, "loss": 2.1276, "step": 481550 }, { "epoch": 1.8615762861251566, "grad_norm": 0.14076967537403107, "learning_rate": 0.00019378849521989804, "loss": 2.1277, "step": 481560 }, { "epoch": 1.8616149433285398, "grad_norm": 0.15520797669887543, "learning_rate": 0.0001936777696103056, "loss": 2.1461, "step": 481570 }, { "epoch": 1.861653600531923, "grad_norm": 0.14745649695396423, "learning_rate": 0.00019356705078765792, "loss": 2.1442, "step": 481580 }, { "epoch": 1.8616922577353066, "grad_norm": 0.14961227774620056, "learning_rate": 0.00019345633875070712, "loss": 2.1143, "step": 481590 }, { "epoch": 1.8617309149386898, "grad_norm": 0.20407278835773468, "learning_rate": 0.00019334563349820578, "loss": 2.1338, "step": 481600 }, { "epoch": 1.861769572142073, "grad_norm": 0.17995503544807434, "learning_rate": 0.00019323493502890643, "loss": 2.1313, "step": 481610 }, { "epoch": 1.8618082293454563, "grad_norm": 0.161366268992424, "learning_rate": 0.00019312424334156275, "loss": 2.133, "step": 481620 }, { "epoch": 1.8618468865488396, "grad_norm": 0.151927188038826, "learning_rate": 0.00019301355843492842, "loss": 2.131, "step": 481630 }, { "epoch": 1.8618855437522228, "grad_norm": 0.1566513329744339, "learning_rate": 0.00019290288030775728, "loss": 2.1357, "step": 481640 }, { "epoch": 1.861924200955606, "grad_norm": 0.15415827929973602, "learning_rate": 0.00019279220895880366, "loss": 2.1348, "step": 481650 }, { "epoch": 1.8619628581589893, "grad_norm": 0.1509864628314972, "learning_rate": 0.00019268154438682283, "loss": 2.1459, "step": 481660 }, { "epoch": 1.8620015153623726, "grad_norm": 0.16013085842132568, "learning_rate": 0.00019257088659056955, "loss": 2.1362, "step": 481670 }, { "epoch": 1.8620401725657558, "grad_norm": 0.16000637412071228, "learning_rate": 0.00019246023556879942, "loss": 2.1395, "step": 481680 }, { "epoch": 1.862078829769139, "grad_norm": 0.1579359471797943, "learning_rate": 0.00019234959132026863, "loss": 2.152, "step": 481690 }, { "epoch": 1.8621174869725223, "grad_norm": 0.17409813404083252, "learning_rate": 0.00019223895384373323, "loss": 2.1409, "step": 481700 }, { "epoch": 1.8621561441759056, "grad_norm": 0.15212517976760864, "learning_rate": 0.00019212832313795003, "loss": 2.1281, "step": 481710 }, { "epoch": 1.862194801379289, "grad_norm": 0.15253092348575592, "learning_rate": 0.00019201769920167621, "loss": 2.1231, "step": 481720 }, { "epoch": 1.8622334585826723, "grad_norm": 0.16168487071990967, "learning_rate": 0.00019190708203366925, "loss": 2.136, "step": 481730 }, { "epoch": 1.8622721157860556, "grad_norm": 0.16396203637123108, "learning_rate": 0.000191796471632687, "loss": 2.1247, "step": 481740 }, { "epoch": 1.8623107729894388, "grad_norm": 0.1663297414779663, "learning_rate": 0.00019168586799748756, "loss": 2.1506, "step": 481750 }, { "epoch": 1.8623494301928223, "grad_norm": 0.16218312084674835, "learning_rate": 0.0001915752711268295, "loss": 2.1432, "step": 481760 }, { "epoch": 1.8623880873962055, "grad_norm": 0.1498250812292099, "learning_rate": 0.000191464681019472, "loss": 2.113, "step": 481770 }, { "epoch": 1.8624267445995888, "grad_norm": 0.15257258713245392, "learning_rate": 0.00019135409767417433, "loss": 2.1467, "step": 481780 }, { "epoch": 1.862465401802972, "grad_norm": 0.16256940364837646, "learning_rate": 0.00019124352108969633, "loss": 2.1341, "step": 481790 }, { "epoch": 1.8625040590063553, "grad_norm": 0.17142386734485626, "learning_rate": 0.00019113295126479792, "loss": 2.1369, "step": 481800 }, { "epoch": 1.8625427162097385, "grad_norm": 0.1537153124809265, "learning_rate": 0.0001910223881982398, "loss": 2.1435, "step": 481810 }, { "epoch": 1.8625813734131218, "grad_norm": 0.36844369769096375, "learning_rate": 0.0001909118318887826, "loss": 2.1321, "step": 481820 }, { "epoch": 1.862620030616505, "grad_norm": 0.146562859416008, "learning_rate": 0.0001908012823351879, "loss": 2.145, "step": 481830 }, { "epoch": 1.8626586878198883, "grad_norm": 0.16262121498584747, "learning_rate": 0.0001906907395362174, "loss": 2.133, "step": 481840 }, { "epoch": 1.8626973450232716, "grad_norm": 0.15142381191253662, "learning_rate": 0.00019058020349063277, "loss": 2.1399, "step": 481850 }, { "epoch": 1.8627360022266548, "grad_norm": 0.16121938824653625, "learning_rate": 0.00019046967419719652, "loss": 2.1323, "step": 481860 }, { "epoch": 1.862774659430038, "grad_norm": 0.14631405472755432, "learning_rate": 0.00019035915165467165, "loss": 2.1298, "step": 481870 }, { "epoch": 1.8628133166334213, "grad_norm": 0.16886377334594727, "learning_rate": 0.00019024863586182095, "loss": 2.1239, "step": 481880 }, { "epoch": 1.8628519738368048, "grad_norm": 0.1621556133031845, "learning_rate": 0.0001901381268174083, "loss": 2.1285, "step": 481890 }, { "epoch": 1.862890631040188, "grad_norm": 0.16947074234485626, "learning_rate": 0.00019002762452019728, "loss": 2.139, "step": 481900 }, { "epoch": 1.8629292882435713, "grad_norm": 0.14935055375099182, "learning_rate": 0.0001899171289689523, "loss": 2.1452, "step": 481910 }, { "epoch": 1.8629679454469545, "grad_norm": 0.15513372421264648, "learning_rate": 0.00018980664016243832, "loss": 2.1468, "step": 481920 }, { "epoch": 1.863006602650338, "grad_norm": 0.16348998248577118, "learning_rate": 0.00018969615809942008, "loss": 2.1398, "step": 481930 }, { "epoch": 1.8630452598537213, "grad_norm": 0.1574879288673401, "learning_rate": 0.00018958568277866283, "loss": 2.1301, "step": 481940 }, { "epoch": 1.8630839170571045, "grad_norm": 0.15516617894172668, "learning_rate": 0.00018947521419893288, "loss": 2.1426, "step": 481950 }, { "epoch": 1.8631225742604878, "grad_norm": 0.15340623259544373, "learning_rate": 0.0001893647523589961, "loss": 2.1258, "step": 481960 }, { "epoch": 1.863161231463871, "grad_norm": 0.16473540663719177, "learning_rate": 0.00018925429725761878, "loss": 2.1442, "step": 481970 }, { "epoch": 1.8631998886672543, "grad_norm": 0.15988117456436157, "learning_rate": 0.00018914384889356817, "loss": 2.1216, "step": 481980 }, { "epoch": 1.8632385458706375, "grad_norm": 0.14998415112495422, "learning_rate": 0.0001890334072656117, "loss": 2.1168, "step": 481990 }, { "epoch": 1.8632772030740208, "grad_norm": 0.1660185307264328, "learning_rate": 0.00018892297237251677, "loss": 2.137, "step": 482000 }, { "epoch": 1.863315860277404, "grad_norm": 0.14230751991271973, "learning_rate": 0.00018881254421305129, "loss": 2.1246, "step": 482010 }, { "epoch": 1.8633545174807873, "grad_norm": 0.15070542693138123, "learning_rate": 0.0001887021227859842, "loss": 2.1282, "step": 482020 }, { "epoch": 1.8633931746841705, "grad_norm": 0.1525546759366989, "learning_rate": 0.00018859170809008385, "loss": 2.1445, "step": 482030 }, { "epoch": 1.8634318318875538, "grad_norm": 0.20553191006183624, "learning_rate": 0.00018848130012411968, "loss": 2.1378, "step": 482040 }, { "epoch": 1.863470489090937, "grad_norm": 0.15216852724552155, "learning_rate": 0.0001883708988868611, "loss": 2.1275, "step": 482050 }, { "epoch": 1.8635091462943205, "grad_norm": 0.15563897788524628, "learning_rate": 0.000188260504377078, "loss": 2.1381, "step": 482060 }, { "epoch": 1.8635478034977038, "grad_norm": 0.1471768468618393, "learning_rate": 0.00018815011659354086, "loss": 2.1353, "step": 482070 }, { "epoch": 1.863586460701087, "grad_norm": 0.16196849942207336, "learning_rate": 0.00018803973553502008, "loss": 2.1457, "step": 482080 }, { "epoch": 1.8636251179044703, "grad_norm": 0.15377961099147797, "learning_rate": 0.00018792936120028703, "loss": 2.1387, "step": 482090 }, { "epoch": 1.8636637751078537, "grad_norm": 0.1568869948387146, "learning_rate": 0.00018781899358811293, "loss": 2.1265, "step": 482100 }, { "epoch": 1.863702432311237, "grad_norm": 0.1527637392282486, "learning_rate": 0.00018770863269726968, "loss": 2.1321, "step": 482110 }, { "epoch": 1.8637410895146203, "grad_norm": 0.1602158546447754, "learning_rate": 0.00018759827852652934, "loss": 2.1186, "step": 482120 }, { "epoch": 1.8637797467180035, "grad_norm": 0.1528051197528839, "learning_rate": 0.00018748793107466443, "loss": 2.1397, "step": 482130 }, { "epoch": 1.8638184039213868, "grad_norm": 0.15877701342105865, "learning_rate": 0.0001873775903404482, "loss": 2.1318, "step": 482140 }, { "epoch": 1.86385706112477, "grad_norm": 0.14863698184490204, "learning_rate": 0.00018726725632265363, "loss": 2.1431, "step": 482150 }, { "epoch": 1.8638957183281533, "grad_norm": 0.15921323001384735, "learning_rate": 0.00018715692902005433, "loss": 2.1409, "step": 482160 }, { "epoch": 1.8639343755315365, "grad_norm": 0.15810279548168182, "learning_rate": 0.00018704660843142464, "loss": 2.1297, "step": 482170 }, { "epoch": 1.8639730327349198, "grad_norm": 0.16936606168746948, "learning_rate": 0.00018693629455553886, "loss": 2.1365, "step": 482180 }, { "epoch": 1.864011689938303, "grad_norm": 0.15929560363292694, "learning_rate": 0.0001868259873911715, "loss": 2.12, "step": 482190 }, { "epoch": 1.8640503471416863, "grad_norm": 0.15395177900791168, "learning_rate": 0.00018671568693709806, "loss": 2.1392, "step": 482200 }, { "epoch": 1.8640890043450695, "grad_norm": 0.15042053163051605, "learning_rate": 0.0001866053931920939, "loss": 2.1473, "step": 482210 }, { "epoch": 1.8641276615484528, "grad_norm": 0.14917847514152527, "learning_rate": 0.00018649510615493514, "loss": 2.1431, "step": 482220 }, { "epoch": 1.8641663187518362, "grad_norm": 0.15905718505382538, "learning_rate": 0.00018638482582439763, "loss": 2.1237, "step": 482230 }, { "epoch": 1.8642049759552195, "grad_norm": 0.14712104201316833, "learning_rate": 0.0001862745521992586, "loss": 2.1291, "step": 482240 }, { "epoch": 1.8642436331586028, "grad_norm": 0.15073151886463165, "learning_rate": 0.00018616428527829453, "loss": 2.1296, "step": 482250 }, { "epoch": 1.864282290361986, "grad_norm": 0.1549181491136551, "learning_rate": 0.00018605402506028312, "loss": 2.132, "step": 482260 }, { "epoch": 1.8643209475653695, "grad_norm": 0.15898975729942322, "learning_rate": 0.000185943771544002, "loss": 2.1344, "step": 482270 }, { "epoch": 1.8643596047687527, "grad_norm": 0.18511076271533966, "learning_rate": 0.00018583352472822945, "loss": 2.1376, "step": 482280 }, { "epoch": 1.864398261972136, "grad_norm": 0.14872094988822937, "learning_rate": 0.0001857232846117438, "loss": 2.1394, "step": 482290 }, { "epoch": 1.8644369191755192, "grad_norm": 0.15533463656902313, "learning_rate": 0.00018561305119332383, "loss": 2.1179, "step": 482300 }, { "epoch": 1.8644755763789025, "grad_norm": 0.14731545746326447, "learning_rate": 0.00018550282447174916, "loss": 2.1482, "step": 482310 }, { "epoch": 1.8645142335822857, "grad_norm": 0.17277966439723969, "learning_rate": 0.0001853926044457992, "loss": 2.1355, "step": 482320 }, { "epoch": 1.864552890785669, "grad_norm": 0.1634880006313324, "learning_rate": 0.00018528239111425382, "loss": 2.137, "step": 482330 }, { "epoch": 1.8645915479890522, "grad_norm": 0.15768516063690186, "learning_rate": 0.00018517218447589357, "loss": 2.1376, "step": 482340 }, { "epoch": 1.8646302051924355, "grad_norm": 0.15683893859386444, "learning_rate": 0.0001850619845294992, "loss": 2.1432, "step": 482350 }, { "epoch": 1.8646688623958187, "grad_norm": 0.1512300670146942, "learning_rate": 0.00018495179127385166, "loss": 2.1321, "step": 482360 }, { "epoch": 1.864707519599202, "grad_norm": 0.15822456777095795, "learning_rate": 0.00018484160470773236, "loss": 2.1379, "step": 482370 }, { "epoch": 1.8647461768025853, "grad_norm": 0.17078734934329987, "learning_rate": 0.00018473142482992345, "loss": 2.117, "step": 482380 }, { "epoch": 1.8647848340059685, "grad_norm": 0.15186278522014618, "learning_rate": 0.00018462125163920694, "loss": 2.1434, "step": 482390 }, { "epoch": 1.864823491209352, "grad_norm": 0.1607029289007187, "learning_rate": 0.0001845110851343652, "loss": 2.1275, "step": 482400 }, { "epoch": 1.8648621484127352, "grad_norm": 0.1595838963985443, "learning_rate": 0.00018440092531418162, "loss": 2.1437, "step": 482410 }, { "epoch": 1.8649008056161185, "grad_norm": 0.17397913336753845, "learning_rate": 0.00018429077217743916, "loss": 2.1392, "step": 482420 }, { "epoch": 1.8649394628195017, "grad_norm": 0.15208283066749573, "learning_rate": 0.0001841806257229217, "loss": 2.1398, "step": 482430 }, { "epoch": 1.8649781200228852, "grad_norm": 0.149830162525177, "learning_rate": 0.00018407048594941332, "loss": 2.1394, "step": 482440 }, { "epoch": 1.8650167772262685, "grad_norm": 0.14657558500766754, "learning_rate": 0.00018396035285569835, "loss": 2.1315, "step": 482450 }, { "epoch": 1.8650554344296517, "grad_norm": 0.15106050670146942, "learning_rate": 0.00018385022644056147, "loss": 2.128, "step": 482460 }, { "epoch": 1.865094091633035, "grad_norm": 0.15019604563713074, "learning_rate": 0.00018374010670278796, "loss": 2.1319, "step": 482470 }, { "epoch": 1.8651327488364182, "grad_norm": 0.15791895985603333, "learning_rate": 0.0001836299936411634, "loss": 2.1241, "step": 482480 }, { "epoch": 1.8651714060398015, "grad_norm": 0.1745760440826416, "learning_rate": 0.00018351988725447366, "loss": 2.133, "step": 482490 }, { "epoch": 1.8652100632431847, "grad_norm": 0.1689693182706833, "learning_rate": 0.00018340978754150506, "loss": 2.1328, "step": 482500 }, { "epoch": 1.865248720446568, "grad_norm": 0.14829005300998688, "learning_rate": 0.00018329969450104413, "loss": 2.1249, "step": 482510 }, { "epoch": 1.8652873776499512, "grad_norm": 0.15087290108203888, "learning_rate": 0.0001831896081318778, "loss": 2.1148, "step": 482520 }, { "epoch": 1.8653260348533345, "grad_norm": 0.14755970239639282, "learning_rate": 0.0001830795284327935, "loss": 2.1343, "step": 482530 }, { "epoch": 1.8653646920567177, "grad_norm": 0.15620015561580658, "learning_rate": 0.00018296945540257915, "loss": 2.1269, "step": 482540 }, { "epoch": 1.865403349260101, "grad_norm": 0.16158314049243927, "learning_rate": 0.00018285938904002253, "loss": 2.1348, "step": 482550 }, { "epoch": 1.8654420064634845, "grad_norm": 0.1560443490743637, "learning_rate": 0.00018274932934391242, "loss": 2.1357, "step": 482560 }, { "epoch": 1.8654806636668677, "grad_norm": 0.15698698163032532, "learning_rate": 0.00018263927631303757, "loss": 2.1244, "step": 482570 }, { "epoch": 1.865519320870251, "grad_norm": 0.1533084660768509, "learning_rate": 0.00018252922994618692, "loss": 2.1444, "step": 482580 }, { "epoch": 1.8655579780736342, "grad_norm": 0.168225958943367, "learning_rate": 0.00018241919024215058, "loss": 2.1364, "step": 482590 }, { "epoch": 1.8655966352770175, "grad_norm": 0.15649709105491638, "learning_rate": 0.00018230915719971797, "loss": 2.1379, "step": 482600 }, { "epoch": 1.865635292480401, "grad_norm": 0.15371111035346985, "learning_rate": 0.00018219913081767957, "loss": 2.1213, "step": 482610 }, { "epoch": 1.8656739496837842, "grad_norm": 0.1561652272939682, "learning_rate": 0.00018208911109482596, "loss": 2.1355, "step": 482620 }, { "epoch": 1.8657126068871674, "grad_norm": 0.15515419840812683, "learning_rate": 0.00018197909802994828, "loss": 2.1387, "step": 482630 }, { "epoch": 1.8657512640905507, "grad_norm": 0.16376587748527527, "learning_rate": 0.00018186909162183818, "loss": 2.1385, "step": 482640 }, { "epoch": 1.865789921293934, "grad_norm": 0.15734699368476868, "learning_rate": 0.00018175909186928709, "loss": 2.1345, "step": 482650 }, { "epoch": 1.8658285784973172, "grad_norm": 0.15997950732707977, "learning_rate": 0.00018164909877108705, "loss": 2.1414, "step": 482660 }, { "epoch": 1.8658672357007005, "grad_norm": 0.15235339105129242, "learning_rate": 0.0001815391123260308, "loss": 2.1272, "step": 482670 }, { "epoch": 1.8659058929040837, "grad_norm": 0.15580111742019653, "learning_rate": 0.00018142913253291115, "loss": 2.1336, "step": 482680 }, { "epoch": 1.865944550107467, "grad_norm": 0.1720065474510193, "learning_rate": 0.00018131915939052147, "loss": 2.1288, "step": 482690 }, { "epoch": 1.8659832073108502, "grad_norm": 0.17029879987239838, "learning_rate": 0.00018120919289765493, "loss": 2.1259, "step": 482700 }, { "epoch": 1.8660218645142335, "grad_norm": 0.1510152667760849, "learning_rate": 0.00018109923305310583, "loss": 2.1421, "step": 482710 }, { "epoch": 1.8660605217176167, "grad_norm": 0.19208844006061554, "learning_rate": 0.00018098927985566826, "loss": 2.1118, "step": 482720 }, { "epoch": 1.8660991789210002, "grad_norm": 0.1666448712348938, "learning_rate": 0.0001808793333041372, "loss": 2.1489, "step": 482730 }, { "epoch": 1.8661378361243834, "grad_norm": 0.16235436499118805, "learning_rate": 0.00018076939339730757, "loss": 2.1442, "step": 482740 }, { "epoch": 1.8661764933277667, "grad_norm": 0.14573925733566284, "learning_rate": 0.00018065946013397482, "loss": 2.1479, "step": 482750 }, { "epoch": 1.86621515053115, "grad_norm": 0.1504787802696228, "learning_rate": 0.00018054953351293458, "loss": 2.1493, "step": 482760 }, { "epoch": 1.8662538077345332, "grad_norm": 0.15526950359344482, "learning_rate": 0.0001804396135329831, "loss": 2.1348, "step": 482770 }, { "epoch": 1.8662924649379167, "grad_norm": 0.15732213854789734, "learning_rate": 0.00018032970019291695, "loss": 2.1192, "step": 482780 }, { "epoch": 1.8663311221413, "grad_norm": 0.16246534883975983, "learning_rate": 0.00018021979349153262, "loss": 2.1264, "step": 482790 }, { "epoch": 1.8663697793446832, "grad_norm": 0.15312378108501434, "learning_rate": 0.00018010989342762796, "loss": 2.1406, "step": 482800 }, { "epoch": 1.8664084365480664, "grad_norm": 0.17585042119026184, "learning_rate": 0.00018000000000000017, "loss": 2.15, "step": 482810 }, { "epoch": 1.8664470937514497, "grad_norm": 0.15485194325447083, "learning_rate": 0.0001798901132074471, "loss": 2.1439, "step": 482820 }, { "epoch": 1.866485750954833, "grad_norm": 0.19659042358398438, "learning_rate": 0.00017978023304876723, "loss": 2.1181, "step": 482830 }, { "epoch": 1.8665244081582162, "grad_norm": 0.14731131494045258, "learning_rate": 0.00017967035952275935, "loss": 2.1382, "step": 482840 }, { "epoch": 1.8665630653615994, "grad_norm": 0.15818408131599426, "learning_rate": 0.0001795604926282224, "loss": 2.1297, "step": 482850 }, { "epoch": 1.8666017225649827, "grad_norm": 0.17280857264995575, "learning_rate": 0.00017945063236395598, "loss": 2.1388, "step": 482860 }, { "epoch": 1.866640379768366, "grad_norm": 0.15899436175823212, "learning_rate": 0.00017934077872875954, "loss": 2.1233, "step": 482870 }, { "epoch": 1.8666790369717492, "grad_norm": 0.1656186729669571, "learning_rate": 0.00017923093172143335, "loss": 2.1503, "step": 482880 }, { "epoch": 1.8667176941751324, "grad_norm": 0.1577361524105072, "learning_rate": 0.00017912109134077793, "loss": 2.1376, "step": 482890 }, { "epoch": 1.866756351378516, "grad_norm": 0.17275647819042206, "learning_rate": 0.000179011257585594, "loss": 2.1421, "step": 482900 }, { "epoch": 1.8667950085818992, "grad_norm": 0.16903147101402283, "learning_rate": 0.000178901430454683, "loss": 2.1134, "step": 482910 }, { "epoch": 1.8668336657852824, "grad_norm": 0.14542759954929352, "learning_rate": 0.0001787916099468463, "loss": 2.1384, "step": 482920 }, { "epoch": 1.8668723229886657, "grad_norm": 0.1691199541091919, "learning_rate": 0.00017868179606088597, "loss": 2.1316, "step": 482930 }, { "epoch": 1.8669109801920492, "grad_norm": 0.15983478724956512, "learning_rate": 0.00017857198879560432, "loss": 2.1406, "step": 482940 }, { "epoch": 1.8669496373954324, "grad_norm": 0.15318359434604645, "learning_rate": 0.00017846218814980409, "loss": 2.1402, "step": 482950 }, { "epoch": 1.8669882945988157, "grad_norm": 0.16408123075962067, "learning_rate": 0.00017835239412228798, "loss": 2.1361, "step": 482960 }, { "epoch": 1.867026951802199, "grad_norm": 0.15106911957263947, "learning_rate": 0.00017824260671185966, "loss": 2.1272, "step": 482970 }, { "epoch": 1.8670656090055822, "grad_norm": 0.16219568252563477, "learning_rate": 0.00017813282591732293, "loss": 2.1442, "step": 482980 }, { "epoch": 1.8671042662089654, "grad_norm": 0.16399811208248138, "learning_rate": 0.00017802305173748146, "loss": 2.1214, "step": 482990 }, { "epoch": 1.8671429234123487, "grad_norm": 0.15034039318561554, "learning_rate": 0.00017791328417114015, "loss": 2.127, "step": 483000 }, { "epoch": 1.867181580615732, "grad_norm": 0.15108898282051086, "learning_rate": 0.0001778035232171038, "loss": 2.129, "step": 483010 }, { "epoch": 1.8672202378191152, "grad_norm": 0.1576581746339798, "learning_rate": 0.00017769376887417733, "loss": 2.141, "step": 483020 }, { "epoch": 1.8672588950224984, "grad_norm": 0.14400140941143036, "learning_rate": 0.00017758402114116656, "loss": 2.1375, "step": 483030 }, { "epoch": 1.8672975522258817, "grad_norm": 0.16434095799922943, "learning_rate": 0.00017747428001687715, "loss": 2.1233, "step": 483040 }, { "epoch": 1.867336209429265, "grad_norm": 0.16243363916873932, "learning_rate": 0.0001773645455001156, "loss": 2.1175, "step": 483050 }, { "epoch": 1.8673748666326482, "grad_norm": 0.1503385305404663, "learning_rate": 0.00017725481758968842, "loss": 2.1224, "step": 483060 }, { "epoch": 1.8674135238360317, "grad_norm": 0.1493840366601944, "learning_rate": 0.00017714509628440257, "loss": 2.1218, "step": 483070 }, { "epoch": 1.867452181039415, "grad_norm": 0.15031930804252625, "learning_rate": 0.0001770353815830654, "loss": 2.1392, "step": 483080 }, { "epoch": 1.8674908382427982, "grad_norm": 0.1668921858072281, "learning_rate": 0.00017692567348448463, "loss": 2.1309, "step": 483090 }, { "epoch": 1.8675294954461814, "grad_norm": 0.16633087396621704, "learning_rate": 0.00017681597198746825, "loss": 2.1331, "step": 483100 }, { "epoch": 1.8675681526495649, "grad_norm": 0.17308935523033142, "learning_rate": 0.00017670627709082497, "loss": 2.1349, "step": 483110 }, { "epoch": 1.8676068098529481, "grad_norm": 0.15083816647529602, "learning_rate": 0.00017659658879336294, "loss": 2.1346, "step": 483120 }, { "epoch": 1.8676454670563314, "grad_norm": 0.14988155663013458, "learning_rate": 0.00017648690709389192, "loss": 2.1369, "step": 483130 }, { "epoch": 1.8676841242597146, "grad_norm": 0.14918486773967743, "learning_rate": 0.00017637723199122137, "loss": 2.1328, "step": 483140 }, { "epoch": 1.867722781463098, "grad_norm": 0.1509108543395996, "learning_rate": 0.00017626756348416062, "loss": 2.1115, "step": 483150 }, { "epoch": 1.8677614386664811, "grad_norm": 0.16556978225708008, "learning_rate": 0.00017615790157152046, "loss": 2.1411, "step": 483160 }, { "epoch": 1.8678000958698644, "grad_norm": 0.15764233469963074, "learning_rate": 0.0001760482462521109, "loss": 2.1285, "step": 483170 }, { "epoch": 1.8678387530732476, "grad_norm": 0.1732935756444931, "learning_rate": 0.00017593859752474338, "loss": 2.1386, "step": 483180 }, { "epoch": 1.867877410276631, "grad_norm": 0.14939621090888977, "learning_rate": 0.00017582895538822908, "loss": 2.1402, "step": 483190 }, { "epoch": 1.8679160674800142, "grad_norm": 0.16969427466392517, "learning_rate": 0.00017571931984137934, "loss": 2.1324, "step": 483200 }, { "epoch": 1.8679547246833974, "grad_norm": 0.1577702909708023, "learning_rate": 0.00017560969088300626, "loss": 2.127, "step": 483210 }, { "epoch": 1.8679933818867807, "grad_norm": 0.15833325684070587, "learning_rate": 0.00017550006851192236, "loss": 2.1255, "step": 483220 }, { "epoch": 1.868032039090164, "grad_norm": 0.15044455230236053, "learning_rate": 0.00017539045272694031, "loss": 2.1314, "step": 483230 }, { "epoch": 1.8680706962935474, "grad_norm": 0.15325351059436798, "learning_rate": 0.0001752808435268729, "loss": 2.1401, "step": 483240 }, { "epoch": 1.8681093534969306, "grad_norm": 0.15859206020832062, "learning_rate": 0.00017517124091053415, "loss": 2.1297, "step": 483250 }, { "epoch": 1.8681480107003139, "grad_norm": 0.1479908525943756, "learning_rate": 0.00017506164487673725, "loss": 2.1147, "step": 483260 }, { "epoch": 1.8681866679036971, "grad_norm": 0.15474596619606018, "learning_rate": 0.00017495205542429647, "loss": 2.1334, "step": 483270 }, { "epoch": 1.8682253251070806, "grad_norm": 0.14719825983047485, "learning_rate": 0.00017484247255202657, "loss": 2.1269, "step": 483280 }, { "epoch": 1.8682639823104639, "grad_norm": 0.15452922880649567, "learning_rate": 0.000174732896258742, "loss": 2.1139, "step": 483290 }, { "epoch": 1.8683026395138471, "grad_norm": 0.160796120762825, "learning_rate": 0.00017462332654325841, "loss": 2.133, "step": 483300 }, { "epoch": 1.8683412967172304, "grad_norm": 0.14414744079113007, "learning_rate": 0.00017451376340439095, "loss": 2.1281, "step": 483310 }, { "epoch": 1.8683799539206136, "grad_norm": 0.15399828553199768, "learning_rate": 0.0001744042068409557, "loss": 2.1407, "step": 483320 }, { "epoch": 1.8684186111239969, "grad_norm": 0.15108740329742432, "learning_rate": 0.00017429465685176894, "loss": 2.1304, "step": 483330 }, { "epoch": 1.8684572683273801, "grad_norm": 0.14604243636131287, "learning_rate": 0.00017418511343564736, "loss": 2.1389, "step": 483340 }, { "epoch": 1.8684959255307634, "grad_norm": 0.1571522355079651, "learning_rate": 0.00017407557659140772, "loss": 2.139, "step": 483350 }, { "epoch": 1.8685345827341466, "grad_norm": 0.15916845202445984, "learning_rate": 0.0001739660463178676, "loss": 2.1364, "step": 483360 }, { "epoch": 1.8685732399375299, "grad_norm": 0.1621374934911728, "learning_rate": 0.00017385652261384465, "loss": 2.1282, "step": 483370 }, { "epoch": 1.8686118971409131, "grad_norm": 0.15086057782173157, "learning_rate": 0.0001737470054781567, "loss": 2.1474, "step": 483380 }, { "epoch": 1.8686505543442964, "grad_norm": 0.15980279445648193, "learning_rate": 0.0001736374949096222, "loss": 2.1328, "step": 483390 }, { "epoch": 1.8686892115476796, "grad_norm": 0.16501730680465698, "learning_rate": 0.00017352799090706016, "loss": 2.1457, "step": 483400 }, { "epoch": 1.8687278687510631, "grad_norm": 0.14960528910160065, "learning_rate": 0.00017341849346928952, "loss": 2.129, "step": 483410 }, { "epoch": 1.8687665259544464, "grad_norm": 0.1552843600511551, "learning_rate": 0.0001733090025951296, "loss": 2.143, "step": 483420 }, { "epoch": 1.8688051831578296, "grad_norm": 0.14932730793952942, "learning_rate": 0.00017319951828340054, "loss": 2.1336, "step": 483430 }, { "epoch": 1.8688438403612129, "grad_norm": 0.15046526491641998, "learning_rate": 0.0001730900405329221, "loss": 2.132, "step": 483440 }, { "epoch": 1.8688824975645963, "grad_norm": 0.17451153695583344, "learning_rate": 0.00017298056934251504, "loss": 2.1268, "step": 483450 }, { "epoch": 1.8689211547679796, "grad_norm": 0.151564359664917, "learning_rate": 0.00017287110471100032, "loss": 2.127, "step": 483460 }, { "epoch": 1.8689598119713628, "grad_norm": 0.15798701345920563, "learning_rate": 0.00017276164663719906, "loss": 2.1415, "step": 483470 }, { "epoch": 1.868998469174746, "grad_norm": 0.16805607080459595, "learning_rate": 0.0001726521951199329, "loss": 2.1163, "step": 483480 }, { "epoch": 1.8690371263781294, "grad_norm": 0.15856941044330597, "learning_rate": 0.00017254275015802346, "loss": 2.125, "step": 483490 }, { "epoch": 1.8690757835815126, "grad_norm": 0.1525784581899643, "learning_rate": 0.0001724333117502934, "loss": 2.1211, "step": 483500 }, { "epoch": 1.8691144407848959, "grad_norm": 0.16771401464939117, "learning_rate": 0.00017232387989556531, "loss": 2.1177, "step": 483510 }, { "epoch": 1.869153097988279, "grad_norm": 0.15492326021194458, "learning_rate": 0.00017221445459266206, "loss": 2.1077, "step": 483520 }, { "epoch": 1.8691917551916624, "grad_norm": 0.15133053064346313, "learning_rate": 0.00017210503584040682, "loss": 2.1411, "step": 483530 }, { "epoch": 1.8692304123950456, "grad_norm": 0.16135741770267487, "learning_rate": 0.00017199562363762368, "loss": 2.1339, "step": 483540 }, { "epoch": 1.8692690695984289, "grad_norm": 0.15097425878047943, "learning_rate": 0.00017188621798313664, "loss": 2.1395, "step": 483550 }, { "epoch": 1.8693077268018121, "grad_norm": 0.1603614091873169, "learning_rate": 0.0001717768188757698, "loss": 2.1365, "step": 483560 }, { "epoch": 1.8693463840051954, "grad_norm": 0.14693760871887207, "learning_rate": 0.00017166742631434808, "loss": 2.1465, "step": 483570 }, { "epoch": 1.8693850412085788, "grad_norm": 0.150224968791008, "learning_rate": 0.00017155804029769662, "loss": 2.1161, "step": 483580 }, { "epoch": 1.869423698411962, "grad_norm": 1.3554350137710571, "learning_rate": 0.0001714486608246406, "loss": 2.1413, "step": 483590 }, { "epoch": 1.8694623556153454, "grad_norm": 0.18808268010616302, "learning_rate": 0.00017133928789400633, "loss": 2.1193, "step": 483600 }, { "epoch": 1.8695010128187286, "grad_norm": 0.15480872988700867, "learning_rate": 0.00017122992150461958, "loss": 2.1132, "step": 483610 }, { "epoch": 1.869539670022112, "grad_norm": 0.15553432703018188, "learning_rate": 0.00017112056165530687, "loss": 2.1279, "step": 483620 }, { "epoch": 1.8695783272254953, "grad_norm": 0.15232661366462708, "learning_rate": 0.00017101120834489537, "loss": 2.1363, "step": 483630 }, { "epoch": 1.8696169844288786, "grad_norm": 0.15363068878650665, "learning_rate": 0.00017090186157221176, "loss": 2.1229, "step": 483640 }, { "epoch": 1.8696556416322618, "grad_norm": 0.15250281989574432, "learning_rate": 0.00017079252133608413, "loss": 2.1319, "step": 483650 }, { "epoch": 1.869694298835645, "grad_norm": 0.15061458945274353, "learning_rate": 0.00017068318763534008, "loss": 2.125, "step": 483660 }, { "epoch": 1.8697329560390283, "grad_norm": 0.14238150417804718, "learning_rate": 0.00017057386046880808, "loss": 2.1277, "step": 483670 }, { "epoch": 1.8697716132424116, "grad_norm": 0.16396325826644897, "learning_rate": 0.0001704645398353166, "loss": 2.1408, "step": 483680 }, { "epoch": 1.8698102704457948, "grad_norm": 0.16763406991958618, "learning_rate": 0.00017035522573369445, "loss": 2.1272, "step": 483690 }, { "epoch": 1.869848927649178, "grad_norm": 0.16632625460624695, "learning_rate": 0.00017024591816277134, "loss": 2.1199, "step": 483700 }, { "epoch": 1.8698875848525613, "grad_norm": 0.15391650795936584, "learning_rate": 0.0001701366171213765, "loss": 2.1323, "step": 483710 }, { "epoch": 1.8699262420559446, "grad_norm": 0.16232500970363617, "learning_rate": 0.0001700273226083402, "loss": 2.1286, "step": 483720 }, { "epoch": 1.8699648992593279, "grad_norm": 0.16050100326538086, "learning_rate": 0.00016991803462249267, "loss": 2.1351, "step": 483730 }, { "epoch": 1.870003556462711, "grad_norm": 0.15308740735054016, "learning_rate": 0.00016980875316266464, "loss": 2.1319, "step": 483740 }, { "epoch": 1.8700422136660946, "grad_norm": 0.1580066829919815, "learning_rate": 0.00016969947822768728, "loss": 2.1328, "step": 483750 }, { "epoch": 1.8700808708694778, "grad_norm": 0.1556428223848343, "learning_rate": 0.00016959020981639194, "loss": 2.1202, "step": 483760 }, { "epoch": 1.870119528072861, "grad_norm": 0.1538323163986206, "learning_rate": 0.00016948094792761025, "loss": 2.1229, "step": 483770 }, { "epoch": 1.8701581852762443, "grad_norm": 0.16740500926971436, "learning_rate": 0.0001693716925601745, "loss": 2.1457, "step": 483780 }, { "epoch": 1.8701968424796278, "grad_norm": 0.15987759828567505, "learning_rate": 0.0001692624437129171, "loss": 2.14, "step": 483790 }, { "epoch": 1.870235499683011, "grad_norm": 0.1688595712184906, "learning_rate": 0.00016915320138467083, "loss": 2.1329, "step": 483800 }, { "epoch": 1.8702741568863943, "grad_norm": 0.19591599702835083, "learning_rate": 0.00016904396557426859, "loss": 2.1394, "step": 483810 }, { "epoch": 1.8703128140897776, "grad_norm": 0.17649315297603607, "learning_rate": 0.00016893473628054424, "loss": 2.1205, "step": 483820 }, { "epoch": 1.8703514712931608, "grad_norm": 0.16197006404399872, "learning_rate": 0.00016882551350233134, "loss": 2.1503, "step": 483830 }, { "epoch": 1.870390128496544, "grad_norm": 0.15415135025978088, "learning_rate": 0.00016871629723846438, "loss": 2.1454, "step": 483840 }, { "epoch": 1.8704287856999273, "grad_norm": 0.16244566440582275, "learning_rate": 0.00016860708748777785, "loss": 2.1232, "step": 483850 }, { "epoch": 1.8704674429033106, "grad_norm": 0.1580551117658615, "learning_rate": 0.0001684978842491063, "loss": 2.1333, "step": 483860 }, { "epoch": 1.8705061001066938, "grad_norm": 0.16132232546806335, "learning_rate": 0.0001683886875212852, "loss": 2.121, "step": 483870 }, { "epoch": 1.870544757310077, "grad_norm": 0.1563108265399933, "learning_rate": 0.00016827949730315029, "loss": 2.1359, "step": 483880 }, { "epoch": 1.8705834145134603, "grad_norm": 0.16171641647815704, "learning_rate": 0.0001681703135935373, "loss": 2.1247, "step": 483890 }, { "epoch": 1.8706220717168436, "grad_norm": 0.1576695740222931, "learning_rate": 0.00016806113639128228, "loss": 2.1313, "step": 483900 }, { "epoch": 1.8706607289202268, "grad_norm": 0.15826772153377533, "learning_rate": 0.00016795196569522242, "loss": 2.1241, "step": 483910 }, { "epoch": 1.8706993861236103, "grad_norm": 0.16920413076877594, "learning_rate": 0.00016784280150419417, "loss": 2.1433, "step": 483920 }, { "epoch": 1.8707380433269936, "grad_norm": 0.15005506575107574, "learning_rate": 0.00016773364381703494, "loss": 2.138, "step": 483930 }, { "epoch": 1.8707767005303768, "grad_norm": 0.16162391006946564, "learning_rate": 0.00016762449263258274, "loss": 2.1464, "step": 483940 }, { "epoch": 1.87081535773376, "grad_norm": 0.15866687893867493, "learning_rate": 0.00016751534794967515, "loss": 2.1179, "step": 483950 }, { "epoch": 1.8708540149371435, "grad_norm": 0.15613588690757751, "learning_rate": 0.00016740620976715071, "loss": 2.1181, "step": 483960 }, { "epoch": 1.8708926721405268, "grad_norm": 0.15326961874961853, "learning_rate": 0.00016729707808384831, "loss": 2.1349, "step": 483970 }, { "epoch": 1.87093132934391, "grad_norm": 0.16684074699878693, "learning_rate": 0.00016718795289860666, "loss": 2.1445, "step": 483980 }, { "epoch": 1.8709699865472933, "grad_norm": 0.17308743298053741, "learning_rate": 0.00016707883421026538, "loss": 2.1417, "step": 483990 }, { "epoch": 1.8710086437506765, "grad_norm": 0.16576573252677917, "learning_rate": 0.00016696972201766402, "loss": 2.1312, "step": 484000 }, { "epoch": 1.8710473009540598, "grad_norm": 0.15447202324867249, "learning_rate": 0.00016686061631964288, "loss": 2.1339, "step": 484010 }, { "epoch": 1.871085958157443, "grad_norm": 0.14893104135990143, "learning_rate": 0.000166751517115042, "loss": 2.1237, "step": 484020 }, { "epoch": 1.8711246153608263, "grad_norm": 0.1605764925479889, "learning_rate": 0.0001666424244027025, "loss": 2.1284, "step": 484030 }, { "epoch": 1.8711632725642096, "grad_norm": 0.15590053796768188, "learning_rate": 0.00016653333818146554, "loss": 2.124, "step": 484040 }, { "epoch": 1.8712019297675928, "grad_norm": 0.17661923170089722, "learning_rate": 0.0001664242584501725, "loss": 2.1364, "step": 484050 }, { "epoch": 1.871240586970976, "grad_norm": 0.16185982525348663, "learning_rate": 0.00016631518520766498, "loss": 2.1356, "step": 484060 }, { "epoch": 1.8712792441743593, "grad_norm": 0.16207298636436462, "learning_rate": 0.00016620611845278544, "loss": 2.1341, "step": 484070 }, { "epoch": 1.8713179013777426, "grad_norm": 0.17160029709339142, "learning_rate": 0.00016609705818437592, "loss": 2.1346, "step": 484080 }, { "epoch": 1.871356558581126, "grad_norm": 0.16804343461990356, "learning_rate": 0.00016598800440127982, "loss": 2.1449, "step": 484090 }, { "epoch": 1.8713952157845093, "grad_norm": 0.14551687240600586, "learning_rate": 0.00016587895710234, "loss": 2.1222, "step": 484100 }, { "epoch": 1.8714338729878925, "grad_norm": 0.1561463177204132, "learning_rate": 0.00016576991628640015, "loss": 2.1394, "step": 484110 }, { "epoch": 1.8714725301912758, "grad_norm": 0.1606985479593277, "learning_rate": 0.0001656608819523038, "loss": 2.1411, "step": 484120 }, { "epoch": 1.8715111873946593, "grad_norm": 0.1565343588590622, "learning_rate": 0.0001655518540988954, "loss": 2.1393, "step": 484130 }, { "epoch": 1.8715498445980425, "grad_norm": 0.15693065524101257, "learning_rate": 0.0001654428327250197, "loss": 2.1288, "step": 484140 }, { "epoch": 1.8715885018014258, "grad_norm": 0.1524079442024231, "learning_rate": 0.0001653338178295214, "loss": 2.131, "step": 484150 }, { "epoch": 1.871627159004809, "grad_norm": 0.15368495881557465, "learning_rate": 0.00016522480941124563, "loss": 2.1196, "step": 484160 }, { "epoch": 1.8716658162081923, "grad_norm": 0.15699228644371033, "learning_rate": 0.00016511580746903821, "loss": 2.1361, "step": 484170 }, { "epoch": 1.8717044734115755, "grad_norm": 0.15115079283714294, "learning_rate": 0.00016500681200174472, "loss": 2.1361, "step": 484180 }, { "epoch": 1.8717431306149588, "grad_norm": 0.1602882593870163, "learning_rate": 0.00016489782300821188, "loss": 2.1227, "step": 484190 }, { "epoch": 1.871781787818342, "grad_norm": 0.1549602597951889, "learning_rate": 0.00016478884048728616, "loss": 2.1352, "step": 484200 }, { "epoch": 1.8718204450217253, "grad_norm": 0.15959705412387848, "learning_rate": 0.00016467986443781423, "loss": 2.1341, "step": 484210 }, { "epoch": 1.8718591022251085, "grad_norm": 0.16147272288799286, "learning_rate": 0.00016457089485864373, "loss": 2.1306, "step": 484220 }, { "epoch": 1.8718977594284918, "grad_norm": 0.2113785594701767, "learning_rate": 0.00016446193174862202, "loss": 2.1235, "step": 484230 }, { "epoch": 1.871936416631875, "grad_norm": 0.16358286142349243, "learning_rate": 0.00016435297510659753, "loss": 2.1388, "step": 484240 }, { "epoch": 1.8719750738352583, "grad_norm": 0.15772676467895508, "learning_rate": 0.0001642440249314181, "loss": 2.1265, "step": 484250 }, { "epoch": 1.8720137310386418, "grad_norm": 0.14948904514312744, "learning_rate": 0.00016413508122193265, "loss": 2.1163, "step": 484260 }, { "epoch": 1.872052388242025, "grad_norm": 0.154491126537323, "learning_rate": 0.0001640261439769901, "loss": 2.1356, "step": 484270 }, { "epoch": 1.8720910454454083, "grad_norm": 0.18150001764297485, "learning_rate": 0.00016391721319543984, "loss": 2.1355, "step": 484280 }, { "epoch": 1.8721297026487915, "grad_norm": 0.16020137071609497, "learning_rate": 0.00016380828887613163, "loss": 2.1295, "step": 484290 }, { "epoch": 1.872168359852175, "grad_norm": 0.14903488755226135, "learning_rate": 0.00016369937101791532, "loss": 2.1428, "step": 484300 }, { "epoch": 1.8722070170555583, "grad_norm": 0.15942107141017914, "learning_rate": 0.00016359045961964136, "loss": 2.1186, "step": 484310 }, { "epoch": 1.8722456742589415, "grad_norm": 0.15722288191318512, "learning_rate": 0.00016348155468016068, "loss": 2.124, "step": 484320 }, { "epoch": 1.8722843314623248, "grad_norm": 0.15270255506038666, "learning_rate": 0.00016337265619832398, "loss": 2.1225, "step": 484330 }, { "epoch": 1.872322988665708, "grad_norm": 0.1539790779352188, "learning_rate": 0.00016326376417298284, "loss": 2.1341, "step": 484340 }, { "epoch": 1.8723616458690913, "grad_norm": 0.16061554849147797, "learning_rate": 0.00016315487860298905, "loss": 2.1399, "step": 484350 }, { "epoch": 1.8724003030724745, "grad_norm": 0.33266738057136536, "learning_rate": 0.00016304599948719446, "loss": 2.1191, "step": 484360 }, { "epoch": 1.8724389602758578, "grad_norm": 0.18792271614074707, "learning_rate": 0.00016293712682445194, "loss": 2.1264, "step": 484370 }, { "epoch": 1.872477617479241, "grad_norm": 0.21823568642139435, "learning_rate": 0.00016282826061361379, "loss": 2.1211, "step": 484380 }, { "epoch": 1.8725162746826243, "grad_norm": 0.14920276403427124, "learning_rate": 0.0001627194008535333, "loss": 2.1313, "step": 484390 }, { "epoch": 1.8725549318860075, "grad_norm": 0.16141065955162048, "learning_rate": 0.0001626105475430637, "loss": 2.1345, "step": 484400 }, { "epoch": 1.8725935890893908, "grad_norm": 0.15054553747177124, "learning_rate": 0.0001625017006810592, "loss": 2.1196, "step": 484410 }, { "epoch": 1.8726322462927742, "grad_norm": 0.16641885042190552, "learning_rate": 0.00016239286026637357, "loss": 2.1425, "step": 484420 }, { "epoch": 1.8726709034961575, "grad_norm": 0.17120453715324402, "learning_rate": 0.00016228402629786109, "loss": 2.1292, "step": 484430 }, { "epoch": 1.8727095606995408, "grad_norm": 0.16046109795570374, "learning_rate": 0.00016217519877437714, "loss": 2.1344, "step": 484440 }, { "epoch": 1.872748217902924, "grad_norm": 0.15624983608722687, "learning_rate": 0.00016206637769477638, "loss": 2.1161, "step": 484450 }, { "epoch": 1.8727868751063073, "grad_norm": 0.15509746968746185, "learning_rate": 0.00016195756305791465, "loss": 2.1338, "step": 484460 }, { "epoch": 1.8728255323096907, "grad_norm": 0.16712747514247894, "learning_rate": 0.00016184875486264727, "loss": 2.1228, "step": 484470 }, { "epoch": 1.872864189513074, "grad_norm": 0.18799036741256714, "learning_rate": 0.00016173995310783074, "loss": 2.1281, "step": 484480 }, { "epoch": 1.8729028467164572, "grad_norm": 0.15011072158813477, "learning_rate": 0.00016163115779232152, "loss": 2.1337, "step": 484490 }, { "epoch": 1.8729415039198405, "grad_norm": 0.15797634422779083, "learning_rate": 0.00016152236891497652, "loss": 2.1344, "step": 484500 }, { "epoch": 1.8729801611232237, "grad_norm": 0.1545293778181076, "learning_rate": 0.00016141358647465265, "loss": 2.1312, "step": 484510 }, { "epoch": 1.873018818326607, "grad_norm": 0.16722118854522705, "learning_rate": 0.00016130481047020752, "loss": 2.1385, "step": 484520 }, { "epoch": 1.8730574755299902, "grad_norm": 0.15909487009048462, "learning_rate": 0.00016119604090049865, "loss": 2.1127, "step": 484530 }, { "epoch": 1.8730961327333735, "grad_norm": 0.16210758686065674, "learning_rate": 0.00016108727776438503, "loss": 2.1274, "step": 484540 }, { "epoch": 1.8731347899367567, "grad_norm": 0.16574212908744812, "learning_rate": 0.00016097852106072442, "loss": 2.133, "step": 484550 }, { "epoch": 1.87317344714014, "grad_norm": 0.15301044285297394, "learning_rate": 0.00016086977078837617, "loss": 2.1248, "step": 484560 }, { "epoch": 1.8732121043435233, "grad_norm": 0.1614498645067215, "learning_rate": 0.00016076102694619922, "loss": 2.1401, "step": 484570 }, { "epoch": 1.8732507615469065, "grad_norm": 0.15972541272640228, "learning_rate": 0.0001606522895330529, "loss": 2.1208, "step": 484580 }, { "epoch": 1.87328941875029, "grad_norm": 0.17323851585388184, "learning_rate": 0.00016054355854779745, "loss": 2.1266, "step": 484590 }, { "epoch": 1.8733280759536732, "grad_norm": 0.14773207902908325, "learning_rate": 0.00016043483398929292, "loss": 2.1314, "step": 484600 }, { "epoch": 1.8733667331570565, "grad_norm": 0.15913040935993195, "learning_rate": 0.0001603261158564, "loss": 2.1275, "step": 484610 }, { "epoch": 1.8734053903604397, "grad_norm": 0.15760380029678345, "learning_rate": 0.00016021740414797937, "loss": 2.13, "step": 484620 }, { "epoch": 1.873444047563823, "grad_norm": 0.15176202356815338, "learning_rate": 0.00016010869886289193, "loss": 2.1198, "step": 484630 }, { "epoch": 1.8734827047672065, "grad_norm": 0.1578439623117447, "learning_rate": 0.00015999999999999993, "loss": 2.126, "step": 484640 }, { "epoch": 1.8735213619705897, "grad_norm": 0.15178169310092926, "learning_rate": 0.00015989130755816495, "loss": 2.1295, "step": 484650 }, { "epoch": 1.873560019173973, "grad_norm": 0.1513904482126236, "learning_rate": 0.000159782621536249, "loss": 2.1362, "step": 484660 }, { "epoch": 1.8735986763773562, "grad_norm": 0.15508714318275452, "learning_rate": 0.00015967394193311502, "loss": 2.1361, "step": 484670 }, { "epoch": 1.8736373335807395, "grad_norm": 0.1674482524394989, "learning_rate": 0.00015956526874762544, "loss": 2.1354, "step": 484680 }, { "epoch": 1.8736759907841227, "grad_norm": 0.15818580985069275, "learning_rate": 0.00015945660197864387, "loss": 2.1283, "step": 484690 }, { "epoch": 1.873714647987506, "grad_norm": 0.15632577240467072, "learning_rate": 0.00015934794162503386, "loss": 2.1207, "step": 484700 }, { "epoch": 1.8737533051908892, "grad_norm": 0.1655329465866089, "learning_rate": 0.000159239287685659, "loss": 2.1354, "step": 484710 }, { "epoch": 1.8737919623942725, "grad_norm": 0.1501639187335968, "learning_rate": 0.00015913064015938395, "loss": 2.1357, "step": 484720 }, { "epoch": 1.8738306195976557, "grad_norm": 0.16501399874687195, "learning_rate": 0.00015902199904507276, "loss": 2.1292, "step": 484730 }, { "epoch": 1.873869276801039, "grad_norm": 0.15810200572013855, "learning_rate": 0.00015891336434159075, "loss": 2.1133, "step": 484740 }, { "epoch": 1.8739079340044222, "grad_norm": 0.15270966291427612, "learning_rate": 0.0001588047360478031, "loss": 2.1288, "step": 484750 }, { "epoch": 1.8739465912078057, "grad_norm": 0.16167369484901428, "learning_rate": 0.00015869611416257533, "loss": 2.1416, "step": 484760 }, { "epoch": 1.873985248411189, "grad_norm": 0.1594432145357132, "learning_rate": 0.0001585874986847733, "loss": 2.1347, "step": 484770 }, { "epoch": 1.8740239056145722, "grad_norm": 0.1732734590768814, "learning_rate": 0.00015847888961326316, "loss": 2.1336, "step": 484780 }, { "epoch": 1.8740625628179555, "grad_norm": 0.15716975927352905, "learning_rate": 0.0001583702869469119, "loss": 2.128, "step": 484790 }, { "epoch": 1.874101220021339, "grad_norm": 0.14907538890838623, "learning_rate": 0.00015826169068458595, "loss": 2.1324, "step": 484800 }, { "epoch": 1.8741398772247222, "grad_norm": 0.1508820801973343, "learning_rate": 0.00015815310082515266, "loss": 2.14, "step": 484810 }, { "epoch": 1.8741785344281054, "grad_norm": 0.16098782420158386, "learning_rate": 0.00015804451736748005, "loss": 2.1336, "step": 484820 }, { "epoch": 1.8742171916314887, "grad_norm": 0.19443218410015106, "learning_rate": 0.00015793594031043523, "loss": 2.119, "step": 484830 }, { "epoch": 1.874255848834872, "grad_norm": 0.1581234484910965, "learning_rate": 0.00015782736965288714, "loss": 2.1249, "step": 484840 }, { "epoch": 1.8742945060382552, "grad_norm": 0.14153549075126648, "learning_rate": 0.0001577188053937042, "loss": 2.1368, "step": 484850 }, { "epoch": 1.8743331632416385, "grad_norm": 0.1574030965566635, "learning_rate": 0.00015761024753175535, "loss": 2.1379, "step": 484860 }, { "epoch": 1.8743718204450217, "grad_norm": 0.17852358520030975, "learning_rate": 0.00015750169606590947, "loss": 2.1255, "step": 484870 }, { "epoch": 1.874410477648405, "grad_norm": 0.15847718715667725, "learning_rate": 0.0001573931509950366, "loss": 2.1324, "step": 484880 }, { "epoch": 1.8744491348517882, "grad_norm": 0.1581220179796219, "learning_rate": 0.00015728461231800651, "loss": 2.1266, "step": 484890 }, { "epoch": 1.8744877920551715, "grad_norm": 0.1563778966665268, "learning_rate": 0.0001571760800336892, "loss": 2.1276, "step": 484900 }, { "epoch": 1.8745264492585547, "grad_norm": 0.15693436563014984, "learning_rate": 0.00015706755414095563, "loss": 2.1275, "step": 484910 }, { "epoch": 1.874565106461938, "grad_norm": 0.16170282661914825, "learning_rate": 0.00015695903463867645, "loss": 2.1097, "step": 484920 }, { "epoch": 1.8746037636653214, "grad_norm": 0.1586095094680786, "learning_rate": 0.00015685052152572277, "loss": 2.114, "step": 484930 }, { "epoch": 1.8746424208687047, "grad_norm": 0.16190335154533386, "learning_rate": 0.0001567420148009666, "loss": 2.1227, "step": 484940 }, { "epoch": 1.874681078072088, "grad_norm": 0.15662255883216858, "learning_rate": 0.00015663351446327956, "loss": 2.1219, "step": 484950 }, { "epoch": 1.8747197352754712, "grad_norm": 0.1621389091014862, "learning_rate": 0.00015652502051153406, "loss": 2.1306, "step": 484960 }, { "epoch": 1.8747583924788547, "grad_norm": 0.1581934094429016, "learning_rate": 0.00015641653294460255, "loss": 2.1447, "step": 484970 }, { "epoch": 1.874797049682238, "grad_norm": 0.15592055022716522, "learning_rate": 0.00015630805176135775, "loss": 2.1279, "step": 484980 }, { "epoch": 1.8748357068856212, "grad_norm": 0.167986199259758, "learning_rate": 0.00015619957696067345, "loss": 2.1249, "step": 484990 }, { "epoch": 1.8748743640890044, "grad_norm": 0.16340020298957825, "learning_rate": 0.0001560911085414225, "loss": 2.1389, "step": 485000 }, { "epoch": 1.8749130212923877, "grad_norm": 0.15751740336418152, "learning_rate": 0.00015598264650247938, "loss": 2.1055, "step": 485010 }, { "epoch": 1.874951678495771, "grad_norm": 0.16410748660564423, "learning_rate": 0.00015587419084271814, "loss": 2.1328, "step": 485020 }, { "epoch": 1.8749903356991542, "grad_norm": 0.16635240614414215, "learning_rate": 0.0001557657415610132, "loss": 2.1311, "step": 485030 }, { "epoch": 1.8750289929025374, "grad_norm": 0.15139134228229523, "learning_rate": 0.00015565729865623946, "loss": 2.1283, "step": 485040 }, { "epoch": 1.8750676501059207, "grad_norm": 0.16576196253299713, "learning_rate": 0.00015554886212727225, "loss": 2.124, "step": 485050 }, { "epoch": 1.875106307309304, "grad_norm": 0.1514219492673874, "learning_rate": 0.00015544043197298718, "loss": 2.1285, "step": 485060 }, { "epoch": 1.8751449645126872, "grad_norm": 0.18162968754768372, "learning_rate": 0.00015533200819226002, "loss": 2.1296, "step": 485070 }, { "epoch": 1.8751836217160704, "grad_norm": 0.1785215139389038, "learning_rate": 0.0001552235907839672, "loss": 2.1444, "step": 485080 }, { "epoch": 1.8752222789194537, "grad_norm": 0.15658053755760193, "learning_rate": 0.00015511517974698498, "loss": 2.1308, "step": 485090 }, { "epoch": 1.8752609361228372, "grad_norm": 0.15919972956180573, "learning_rate": 0.00015500677508019023, "loss": 2.1163, "step": 485100 }, { "epoch": 1.8752995933262204, "grad_norm": 1.2751634120941162, "learning_rate": 0.00015489837678246031, "loss": 2.1182, "step": 485110 }, { "epoch": 1.8753382505296037, "grad_norm": 0.15868133306503296, "learning_rate": 0.00015478998485267281, "loss": 2.1322, "step": 485120 }, { "epoch": 1.875376907732987, "grad_norm": 0.1583377718925476, "learning_rate": 0.00015468159928970525, "loss": 2.1199, "step": 485130 }, { "epoch": 1.8754155649363704, "grad_norm": 0.1486121267080307, "learning_rate": 0.00015457322009243614, "loss": 2.1289, "step": 485140 }, { "epoch": 1.8754542221397537, "grad_norm": 0.15714432299137115, "learning_rate": 0.00015446484725974407, "loss": 2.1399, "step": 485150 }, { "epoch": 1.875492879343137, "grad_norm": 0.1527232676744461, "learning_rate": 0.00015435648079050758, "loss": 2.1301, "step": 485160 }, { "epoch": 1.8755315365465202, "grad_norm": 0.15908005833625793, "learning_rate": 0.00015424812068360615, "loss": 2.1265, "step": 485170 }, { "epoch": 1.8755701937499034, "grad_norm": 0.16998544335365295, "learning_rate": 0.00015413976693791898, "loss": 2.1127, "step": 485180 }, { "epoch": 1.8756088509532867, "grad_norm": 0.15736450254917145, "learning_rate": 0.00015403141955232623, "loss": 2.1282, "step": 485190 }, { "epoch": 1.87564750815667, "grad_norm": 0.16208943724632263, "learning_rate": 0.00015392307852570776, "loss": 2.1342, "step": 485200 }, { "epoch": 1.8756861653600532, "grad_norm": 0.17129312455654144, "learning_rate": 0.00015381474385694439, "loss": 2.1387, "step": 485210 }, { "epoch": 1.8757248225634364, "grad_norm": 0.17631563544273376, "learning_rate": 0.0001537064155449166, "loss": 2.1307, "step": 485220 }, { "epoch": 1.8757634797668197, "grad_norm": 0.16245117783546448, "learning_rate": 0.00015359809358850597, "loss": 2.1263, "step": 485230 }, { "epoch": 1.875802136970203, "grad_norm": 0.1504349410533905, "learning_rate": 0.00015348977798659337, "loss": 2.1237, "step": 485240 }, { "epoch": 1.8758407941735862, "grad_norm": 0.17673885822296143, "learning_rate": 0.00015338146873806124, "loss": 2.1216, "step": 485250 }, { "epoch": 1.8758794513769694, "grad_norm": 0.17802615463733673, "learning_rate": 0.0001532731658417912, "loss": 2.1193, "step": 485260 }, { "epoch": 1.875918108580353, "grad_norm": 0.6284236311912537, "learning_rate": 0.0001531648692966663, "loss": 2.1408, "step": 485270 }, { "epoch": 1.8759567657837362, "grad_norm": 0.1544286012649536, "learning_rate": 0.00015305657910156877, "loss": 2.0998, "step": 485280 }, { "epoch": 1.8759954229871194, "grad_norm": 0.16721487045288086, "learning_rate": 0.00015294829525538178, "loss": 2.1216, "step": 485290 }, { "epoch": 1.8760340801905027, "grad_norm": 0.16701914370059967, "learning_rate": 0.0001528400177569893, "loss": 2.1176, "step": 485300 }, { "epoch": 1.8760727373938861, "grad_norm": 0.14934012293815613, "learning_rate": 0.00015273174660527446, "loss": 2.1364, "step": 485310 }, { "epoch": 1.8761113945972694, "grad_norm": 0.15575703978538513, "learning_rate": 0.00015262348179912165, "loss": 2.1406, "step": 485320 }, { "epoch": 1.8761500518006526, "grad_norm": 0.20635825395584106, "learning_rate": 0.00015251522333741541, "loss": 2.124, "step": 485330 }, { "epoch": 1.876188709004036, "grad_norm": 0.15215198695659637, "learning_rate": 0.00015240697121904034, "loss": 2.1229, "step": 485340 }, { "epoch": 1.8762273662074191, "grad_norm": 0.16519252955913544, "learning_rate": 0.00015229872544288159, "loss": 2.128, "step": 485350 }, { "epoch": 1.8762660234108024, "grad_norm": 0.17716598510742188, "learning_rate": 0.00015219048600782447, "loss": 2.1367, "step": 485360 }, { "epoch": 1.8763046806141856, "grad_norm": 0.1608271300792694, "learning_rate": 0.00015208225291275478, "loss": 2.1294, "step": 485370 }, { "epoch": 1.876343337817569, "grad_norm": 0.15679559111595154, "learning_rate": 0.00015197402615655853, "loss": 2.1329, "step": 485380 }, { "epoch": 1.8763819950209522, "grad_norm": 0.14653101563453674, "learning_rate": 0.00015186580573812235, "loss": 2.1109, "step": 485390 }, { "epoch": 1.8764206522243354, "grad_norm": 0.16167862713336945, "learning_rate": 0.00015175759165633252, "loss": 2.1208, "step": 485400 }, { "epoch": 1.8764593094277187, "grad_norm": 0.15243785083293915, "learning_rate": 0.00015164938391007654, "loss": 2.124, "step": 485410 }, { "epoch": 1.876497966631102, "grad_norm": 0.17141318321228027, "learning_rate": 0.00015154118249824134, "loss": 2.1336, "step": 485420 }, { "epoch": 1.8765366238344852, "grad_norm": 0.15932010114192963, "learning_rate": 0.0001514329874197149, "loss": 2.1272, "step": 485430 }, { "epoch": 1.8765752810378686, "grad_norm": 0.1552756428718567, "learning_rate": 0.000151324798673385, "loss": 2.1196, "step": 485440 }, { "epoch": 1.8766139382412519, "grad_norm": 0.16936630010604858, "learning_rate": 0.00015121661625814033, "loss": 2.1271, "step": 485450 }, { "epoch": 1.8766525954446351, "grad_norm": 0.15281683206558228, "learning_rate": 0.0001511084401728693, "loss": 2.129, "step": 485460 }, { "epoch": 1.8766912526480184, "grad_norm": 0.15728114545345306, "learning_rate": 0.00015100027041646102, "loss": 2.1303, "step": 485470 }, { "epoch": 1.8767299098514019, "grad_norm": 0.1558963656425476, "learning_rate": 0.00015089210698780486, "loss": 2.137, "step": 485480 }, { "epoch": 1.8767685670547851, "grad_norm": 0.15373340249061584, "learning_rate": 0.00015078394988579015, "loss": 2.1224, "step": 485490 }, { "epoch": 1.8768072242581684, "grad_norm": 0.15859030187129974, "learning_rate": 0.00015067579910930706, "loss": 2.1145, "step": 485500 }, { "epoch": 1.8768458814615516, "grad_norm": 0.17240332067012787, "learning_rate": 0.00015056765465724586, "loss": 2.1175, "step": 485510 }, { "epoch": 1.8768845386649349, "grad_norm": 0.16093796491622925, "learning_rate": 0.00015045951652849744, "loss": 2.1186, "step": 485520 }, { "epoch": 1.8769231958683181, "grad_norm": 0.15308769047260284, "learning_rate": 0.0001503513847219522, "loss": 2.1228, "step": 485530 }, { "epoch": 1.8769618530717014, "grad_norm": 0.15490011870861053, "learning_rate": 0.0001502432592365015, "loss": 2.1218, "step": 485540 }, { "epoch": 1.8770005102750846, "grad_norm": 0.15395839512348175, "learning_rate": 0.00015013514007103756, "loss": 2.1373, "step": 485550 }, { "epoch": 1.8770391674784679, "grad_norm": 0.15664257109165192, "learning_rate": 0.00015002702722445149, "loss": 2.1299, "step": 485560 }, { "epoch": 1.8770778246818511, "grad_norm": 0.15513335168361664, "learning_rate": 0.00014991892069563618, "loss": 2.1108, "step": 485570 }, { "epoch": 1.8771164818852344, "grad_norm": 0.15297462046146393, "learning_rate": 0.0001498108204834836, "loss": 2.1336, "step": 485580 }, { "epoch": 1.8771551390886176, "grad_norm": 0.16893671452999115, "learning_rate": 0.0001497027265868871, "loss": 2.1354, "step": 485590 }, { "epoch": 1.877193796292001, "grad_norm": 0.1585252583026886, "learning_rate": 0.00014959463900473958, "loss": 2.1261, "step": 485600 }, { "epoch": 1.8772324534953844, "grad_norm": 0.1679084599018097, "learning_rate": 0.00014948655773593477, "loss": 2.1334, "step": 485610 }, { "epoch": 1.8772711106987676, "grad_norm": 0.15539462864398956, "learning_rate": 0.0001493784827793665, "loss": 2.1448, "step": 485620 }, { "epoch": 1.8773097679021509, "grad_norm": 0.15404072403907776, "learning_rate": 0.00014927041413392871, "loss": 2.132, "step": 485630 }, { "epoch": 1.8773484251055341, "grad_norm": 0.17238987982273102, "learning_rate": 0.00014916235179851612, "loss": 2.1136, "step": 485640 }, { "epoch": 1.8773870823089176, "grad_norm": 0.15702857077121735, "learning_rate": 0.00014905429577202333, "loss": 2.1272, "step": 485650 }, { "epoch": 1.8774257395123009, "grad_norm": 0.1531582623720169, "learning_rate": 0.00014894624605334594, "loss": 2.1256, "step": 485660 }, { "epoch": 1.877464396715684, "grad_norm": 0.16453592479228973, "learning_rate": 0.00014883820264137903, "loss": 2.1306, "step": 485670 }, { "epoch": 1.8775030539190674, "grad_norm": 0.1602281630039215, "learning_rate": 0.00014873016553501839, "loss": 2.1412, "step": 485680 }, { "epoch": 1.8775417111224506, "grad_norm": 0.15847299993038177, "learning_rate": 0.00014862213473316045, "loss": 2.1222, "step": 485690 }, { "epoch": 1.8775803683258339, "grad_norm": 0.15805181860923767, "learning_rate": 0.00014851411023470118, "loss": 2.1125, "step": 485700 }, { "epoch": 1.8776190255292171, "grad_norm": 0.1721765697002411, "learning_rate": 0.00014840609203853772, "loss": 2.1191, "step": 485710 }, { "epoch": 1.8776576827326004, "grad_norm": 0.16002759337425232, "learning_rate": 0.00014829808014356694, "loss": 2.1325, "step": 485720 }, { "epoch": 1.8776963399359836, "grad_norm": 0.1535620242357254, "learning_rate": 0.00014819007454868637, "loss": 2.1066, "step": 485730 }, { "epoch": 1.8777349971393669, "grad_norm": 0.1585405021905899, "learning_rate": 0.00014808207525279337, "loss": 2.1305, "step": 485740 }, { "epoch": 1.8777736543427501, "grad_norm": 0.15157096087932587, "learning_rate": 0.0001479740822547866, "loss": 2.1197, "step": 485750 }, { "epoch": 1.8778123115461334, "grad_norm": 0.15094926953315735, "learning_rate": 0.000147866095553564, "loss": 2.1225, "step": 485760 }, { "epoch": 1.8778509687495166, "grad_norm": 0.1576872318983078, "learning_rate": 0.00014775811514802428, "loss": 2.1347, "step": 485770 }, { "epoch": 1.8778896259529, "grad_norm": 0.16082343459129333, "learning_rate": 0.00014765014103706631, "loss": 2.1338, "step": 485780 }, { "epoch": 1.8779282831562834, "grad_norm": 0.1641700267791748, "learning_rate": 0.00014754217321958984, "loss": 2.1274, "step": 485790 }, { "epoch": 1.8779669403596666, "grad_norm": 0.15711650252342224, "learning_rate": 0.00014743421169449424, "loss": 2.1242, "step": 485800 }, { "epoch": 1.8780055975630499, "grad_norm": 0.16527198255062103, "learning_rate": 0.00014732625646067944, "loss": 2.1207, "step": 485810 }, { "epoch": 1.8780442547664333, "grad_norm": 0.1620732545852661, "learning_rate": 0.0001472183075170459, "loss": 2.1259, "step": 485820 }, { "epoch": 1.8780829119698166, "grad_norm": 0.15371723473072052, "learning_rate": 0.00014711036486249408, "loss": 2.1207, "step": 485830 }, { "epoch": 1.8781215691731998, "grad_norm": 0.15590305626392365, "learning_rate": 0.00014700242849592483, "loss": 2.1269, "step": 485840 }, { "epoch": 1.878160226376583, "grad_norm": 0.18628154695034027, "learning_rate": 0.00014689449841623968, "loss": 2.1157, "step": 485850 }, { "epoch": 1.8781988835799663, "grad_norm": 0.16660727560520172, "learning_rate": 0.00014678657462234, "loss": 2.1185, "step": 485860 }, { "epoch": 1.8782375407833496, "grad_norm": 0.16076427698135376, "learning_rate": 0.00014667865711312755, "loss": 2.1309, "step": 485870 }, { "epoch": 1.8782761979867328, "grad_norm": 0.16650605201721191, "learning_rate": 0.00014657074588750497, "loss": 2.1267, "step": 485880 }, { "epoch": 1.878314855190116, "grad_norm": 0.15219688415527344, "learning_rate": 0.00014646284094437423, "loss": 2.1206, "step": 485890 }, { "epoch": 1.8783535123934993, "grad_norm": 0.15262344479560852, "learning_rate": 0.00014635494228263868, "loss": 2.1256, "step": 485900 }, { "epoch": 1.8783921695968826, "grad_norm": 0.15145814418792725, "learning_rate": 0.00014624704990120118, "loss": 2.1177, "step": 485910 }, { "epoch": 1.8784308268002659, "grad_norm": 0.16400784254074097, "learning_rate": 0.0001461391637989653, "loss": 2.1161, "step": 485920 }, { "epoch": 1.878469484003649, "grad_norm": 0.15878716111183167, "learning_rate": 0.00014603128397483477, "loss": 2.1112, "step": 485930 }, { "epoch": 1.8785081412070324, "grad_norm": 0.16059978306293488, "learning_rate": 0.00014592341042771363, "loss": 2.119, "step": 485940 }, { "epoch": 1.8785467984104158, "grad_norm": 0.16229532659053802, "learning_rate": 0.00014581554315650668, "loss": 2.1209, "step": 485950 }, { "epoch": 1.878585455613799, "grad_norm": 0.1519196480512619, "learning_rate": 0.00014570768216011842, "loss": 2.1122, "step": 485960 }, { "epoch": 1.8786241128171823, "grad_norm": 0.1595258116722107, "learning_rate": 0.00014559982743745414, "loss": 2.1316, "step": 485970 }, { "epoch": 1.8786627700205656, "grad_norm": 0.17319795489311218, "learning_rate": 0.00014549197898741894, "loss": 2.1407, "step": 485980 }, { "epoch": 1.878701427223949, "grad_norm": 0.15375776588916779, "learning_rate": 0.0001453841368089186, "loss": 2.1252, "step": 485990 }, { "epoch": 1.8787400844273323, "grad_norm": 0.1607261598110199, "learning_rate": 0.0001452763009008593, "loss": 2.1285, "step": 486000 }, { "epoch": 1.8787787416307156, "grad_norm": 0.15706588327884674, "learning_rate": 0.00014516847126214727, "loss": 2.114, "step": 486010 }, { "epoch": 1.8788173988340988, "grad_norm": 0.17170752584934235, "learning_rate": 0.00014506064789168916, "loss": 2.1228, "step": 486020 }, { "epoch": 1.878856056037482, "grad_norm": 0.1548837274312973, "learning_rate": 0.00014495283078839205, "loss": 2.1338, "step": 486030 }, { "epoch": 1.8788947132408653, "grad_norm": 0.15781624615192413, "learning_rate": 0.00014484501995116306, "loss": 2.1202, "step": 486040 }, { "epoch": 1.8789333704442486, "grad_norm": 0.16095009446144104, "learning_rate": 0.00014473721537891016, "loss": 2.125, "step": 486050 }, { "epoch": 1.8789720276476318, "grad_norm": 0.14927291870117188, "learning_rate": 0.00014462941707054112, "loss": 2.1126, "step": 486060 }, { "epoch": 1.879010684851015, "grad_norm": 0.15893907845020294, "learning_rate": 0.00014452162502496412, "loss": 2.1177, "step": 486070 }, { "epoch": 1.8790493420543983, "grad_norm": 0.16693106293678284, "learning_rate": 0.00014441383924108765, "loss": 2.1403, "step": 486080 }, { "epoch": 1.8790879992577816, "grad_norm": 0.15528330206871033, "learning_rate": 0.00014430605971782075, "loss": 2.1151, "step": 486090 }, { "epoch": 1.8791266564611648, "grad_norm": 0.16108617186546326, "learning_rate": 0.00014419828645407273, "loss": 2.1045, "step": 486100 }, { "epoch": 1.879165313664548, "grad_norm": 0.16097860038280487, "learning_rate": 0.00014409051944875296, "loss": 2.1268, "step": 486110 }, { "epoch": 1.8792039708679316, "grad_norm": 0.16463223099708557, "learning_rate": 0.00014398275870077115, "loss": 2.1292, "step": 486120 }, { "epoch": 1.8792426280713148, "grad_norm": 0.14849969744682312, "learning_rate": 0.00014387500420903775, "loss": 2.1199, "step": 486130 }, { "epoch": 1.879281285274698, "grad_norm": 0.1572045236825943, "learning_rate": 0.00014376725597246276, "loss": 2.1243, "step": 486140 }, { "epoch": 1.8793199424780813, "grad_norm": 0.15840677917003632, "learning_rate": 0.0001436595139899577, "loss": 2.1358, "step": 486150 }, { "epoch": 1.8793585996814648, "grad_norm": 0.18019139766693115, "learning_rate": 0.00014355177826043318, "loss": 2.1158, "step": 486160 }, { "epoch": 1.879397256884848, "grad_norm": 0.15955372154712677, "learning_rate": 0.00014344404878280058, "loss": 2.1315, "step": 486170 }, { "epoch": 1.8794359140882313, "grad_norm": 0.16374975442886353, "learning_rate": 0.00014333632555597187, "loss": 2.103, "step": 486180 }, { "epoch": 1.8794745712916145, "grad_norm": 0.1583992838859558, "learning_rate": 0.000143228608578859, "loss": 2.1356, "step": 486190 }, { "epoch": 1.8795132284949978, "grad_norm": 0.16933327913284302, "learning_rate": 0.0001431208978503744, "loss": 2.1388, "step": 486200 }, { "epoch": 1.879551885698381, "grad_norm": 0.15624232590198517, "learning_rate": 0.00014301319336943052, "loss": 2.1248, "step": 486210 }, { "epoch": 1.8795905429017643, "grad_norm": 0.16327272355556488, "learning_rate": 0.00014290549513494067, "loss": 2.1302, "step": 486220 }, { "epoch": 1.8796292001051476, "grad_norm": 0.15281060338020325, "learning_rate": 0.00014279780314581793, "loss": 2.1258, "step": 486230 }, { "epoch": 1.8796678573085308, "grad_norm": 0.1542748659849167, "learning_rate": 0.00014269011740097604, "loss": 2.1019, "step": 486240 }, { "epoch": 1.879706514511914, "grad_norm": 0.15855109691619873, "learning_rate": 0.000142582437899329, "loss": 2.1122, "step": 486250 }, { "epoch": 1.8797451717152973, "grad_norm": 0.16607624292373657, "learning_rate": 0.00014247476463979104, "loss": 2.1334, "step": 486260 }, { "epoch": 1.8797838289186806, "grad_norm": 0.1596800535917282, "learning_rate": 0.00014236709762127653, "loss": 2.1187, "step": 486270 }, { "epoch": 1.879822486122064, "grad_norm": 0.1803329885005951, "learning_rate": 0.0001422594368427006, "loss": 2.1246, "step": 486280 }, { "epoch": 1.8798611433254473, "grad_norm": 0.1732296645641327, "learning_rate": 0.0001421517823029783, "loss": 2.1277, "step": 486290 }, { "epoch": 1.8798998005288305, "grad_norm": 0.159649059176445, "learning_rate": 0.00014204413400102523, "loss": 2.1386, "step": 486300 }, { "epoch": 1.8799384577322138, "grad_norm": 0.15590937435626984, "learning_rate": 0.0001419364919357573, "loss": 2.1168, "step": 486310 }, { "epoch": 1.879977114935597, "grad_norm": 0.16680322587490082, "learning_rate": 0.00014182885610609054, "loss": 2.1175, "step": 486320 }, { "epoch": 1.8800157721389805, "grad_norm": 0.19824039936065674, "learning_rate": 0.00014172122651094155, "loss": 2.1226, "step": 486330 }, { "epoch": 1.8800544293423638, "grad_norm": 0.16763262450695038, "learning_rate": 0.00014161360314922679, "loss": 2.122, "step": 486340 }, { "epoch": 1.880093086545747, "grad_norm": 0.15511512756347656, "learning_rate": 0.0001415059860198633, "loss": 2.1234, "step": 486350 }, { "epoch": 1.8801317437491303, "grad_norm": 0.155037060379982, "learning_rate": 0.00014139837512176911, "loss": 2.1265, "step": 486360 }, { "epoch": 1.8801704009525135, "grad_norm": 0.16194379329681396, "learning_rate": 0.00014129077045386151, "loss": 2.1276, "step": 486370 }, { "epoch": 1.8802090581558968, "grad_norm": 0.1541333943605423, "learning_rate": 0.00014118317201505847, "loss": 2.1315, "step": 486380 }, { "epoch": 1.88024771535928, "grad_norm": 0.16888245940208435, "learning_rate": 0.00014107557980427843, "loss": 2.1098, "step": 486390 }, { "epoch": 1.8802863725626633, "grad_norm": 0.15296173095703125, "learning_rate": 0.00014096799382044, "loss": 2.1297, "step": 486400 }, { "epoch": 1.8803250297660465, "grad_norm": 0.1709115356206894, "learning_rate": 0.00014086041406246208, "loss": 2.1034, "step": 486410 }, { "epoch": 1.8803636869694298, "grad_norm": 0.15439197421073914, "learning_rate": 0.00014075284052926417, "loss": 2.1248, "step": 486420 }, { "epoch": 1.880402344172813, "grad_norm": 0.16216754913330078, "learning_rate": 0.0001406452732197656, "loss": 2.1247, "step": 486430 }, { "epoch": 1.8804410013761963, "grad_norm": 0.16694015264511108, "learning_rate": 0.00014053771213288658, "loss": 2.1192, "step": 486440 }, { "epoch": 1.8804796585795798, "grad_norm": 0.16613997519016266, "learning_rate": 0.00014043015726754703, "loss": 2.1379, "step": 486450 }, { "epoch": 1.880518315782963, "grad_norm": 0.16087213158607483, "learning_rate": 0.00014032260862266766, "loss": 2.1101, "step": 486460 }, { "epoch": 1.8805569729863463, "grad_norm": 0.1598331481218338, "learning_rate": 0.00014021506619716907, "loss": 2.1167, "step": 486470 }, { "epoch": 1.8805956301897295, "grad_norm": 0.15491698682308197, "learning_rate": 0.00014010752998997277, "loss": 2.111, "step": 486480 }, { "epoch": 1.8806342873931128, "grad_norm": 0.16934970021247864, "learning_rate": 0.0001399999999999999, "loss": 2.1386, "step": 486490 }, { "epoch": 1.8806729445964963, "grad_norm": 0.15546073019504547, "learning_rate": 0.00013989247622617261, "loss": 2.1342, "step": 486500 }, { "epoch": 1.8807116017998795, "grad_norm": 0.15205851197242737, "learning_rate": 0.00013978495866741246, "loss": 2.1296, "step": 486510 }, { "epoch": 1.8807502590032628, "grad_norm": 0.16739751398563385, "learning_rate": 0.0001396774473226423, "loss": 2.112, "step": 486520 }, { "epoch": 1.880788916206646, "grad_norm": 0.15638157725334167, "learning_rate": 0.0001395699421907848, "loss": 2.1197, "step": 486530 }, { "epoch": 1.8808275734100293, "grad_norm": 0.16130010783672333, "learning_rate": 0.00013946244327076273, "loss": 2.132, "step": 486540 }, { "epoch": 1.8808662306134125, "grad_norm": 0.1580820232629776, "learning_rate": 0.0001393549505614995, "loss": 2.1167, "step": 486550 }, { "epoch": 1.8809048878167958, "grad_norm": 0.15250080823898315, "learning_rate": 0.0001392474640619188, "loss": 2.1264, "step": 486560 }, { "epoch": 1.880943545020179, "grad_norm": 0.1518588364124298, "learning_rate": 0.0001391399837709446, "loss": 2.1198, "step": 486570 }, { "epoch": 1.8809822022235623, "grad_norm": 0.161702960729599, "learning_rate": 0.00013903250968750136, "loss": 2.1373, "step": 486580 }, { "epoch": 1.8810208594269455, "grad_norm": 0.16377724707126617, "learning_rate": 0.00013892504181051325, "loss": 2.1427, "step": 486590 }, { "epoch": 1.8810595166303288, "grad_norm": 0.15726323425769806, "learning_rate": 0.00013881758013890532, "loss": 2.1385, "step": 486600 }, { "epoch": 1.881098173833712, "grad_norm": 0.16078706085681915, "learning_rate": 0.00013871012467160293, "loss": 2.1095, "step": 486610 }, { "epoch": 1.8811368310370955, "grad_norm": 0.16197741031646729, "learning_rate": 0.00013860267540753135, "loss": 2.1102, "step": 486620 }, { "epoch": 1.8811754882404788, "grad_norm": 0.15010589361190796, "learning_rate": 0.00013849523234561655, "loss": 2.1181, "step": 486630 }, { "epoch": 1.881214145443862, "grad_norm": 0.168479323387146, "learning_rate": 0.00013838779548478475, "loss": 2.1193, "step": 486640 }, { "epoch": 1.8812528026472453, "grad_norm": 0.3779033422470093, "learning_rate": 0.00013828036482396188, "loss": 2.1389, "step": 486650 }, { "epoch": 1.8812914598506285, "grad_norm": 0.1739717423915863, "learning_rate": 0.00013817294036207528, "loss": 2.1246, "step": 486660 }, { "epoch": 1.881330117054012, "grad_norm": 0.1617521196603775, "learning_rate": 0.0001380655220980518, "loss": 2.1083, "step": 486670 }, { "epoch": 1.8813687742573952, "grad_norm": 0.17464694380760193, "learning_rate": 0.00013795811003081894, "loss": 2.1222, "step": 486680 }, { "epoch": 1.8814074314607785, "grad_norm": 0.17022061347961426, "learning_rate": 0.00013785070415930402, "loss": 2.129, "step": 486690 }, { "epoch": 1.8814460886641617, "grad_norm": 0.16917157173156738, "learning_rate": 0.0001377433044824352, "loss": 2.1218, "step": 486700 }, { "epoch": 1.881484745867545, "grad_norm": 0.15667511522769928, "learning_rate": 0.00013763591099914097, "loss": 2.1158, "step": 486710 }, { "epoch": 1.8815234030709282, "grad_norm": 0.16120067238807678, "learning_rate": 0.00013752852370834946, "loss": 2.1243, "step": 486720 }, { "epoch": 1.8815620602743115, "grad_norm": 0.17816998064517975, "learning_rate": 0.0001374211426089902, "loss": 2.1227, "step": 486730 }, { "epoch": 1.8816007174776948, "grad_norm": 0.1660146415233612, "learning_rate": 0.0001373137676999918, "loss": 2.1195, "step": 486740 }, { "epoch": 1.881639374681078, "grad_norm": 0.15490961074829102, "learning_rate": 0.00013720639898028407, "loss": 2.123, "step": 486750 }, { "epoch": 1.8816780318844613, "grad_norm": 0.1545129418373108, "learning_rate": 0.00013709903644879717, "loss": 2.1264, "step": 486760 }, { "epoch": 1.8817166890878445, "grad_norm": 0.1632499098777771, "learning_rate": 0.0001369916801044606, "loss": 2.1117, "step": 486770 }, { "epoch": 1.8817553462912278, "grad_norm": 0.1726263165473938, "learning_rate": 0.0001368843299462055, "loss": 2.1142, "step": 486780 }, { "epoch": 1.8817940034946112, "grad_norm": 0.16340069472789764, "learning_rate": 0.00013677698597296196, "loss": 2.1167, "step": 486790 }, { "epoch": 1.8818326606979945, "grad_norm": 0.1490408033132553, "learning_rate": 0.00013666964818366157, "loss": 2.1218, "step": 486800 }, { "epoch": 1.8818713179013777, "grad_norm": 0.16850118339061737, "learning_rate": 0.00013656231657723517, "loss": 2.1352, "step": 486810 }, { "epoch": 1.881909975104761, "grad_norm": 0.16097033023834229, "learning_rate": 0.00013645499115261516, "loss": 2.1231, "step": 486820 }, { "epoch": 1.8819486323081445, "grad_norm": 0.163710355758667, "learning_rate": 0.00013634767190873288, "loss": 2.1355, "step": 486830 }, { "epoch": 1.8819872895115277, "grad_norm": 0.15214866399765015, "learning_rate": 0.0001362403588445209, "loss": 2.1244, "step": 486840 }, { "epoch": 1.882025946714911, "grad_norm": 0.15033996105194092, "learning_rate": 0.0001361330519589119, "loss": 2.1294, "step": 486850 }, { "epoch": 1.8820646039182942, "grad_norm": 0.1694653034210205, "learning_rate": 0.0001360257512508387, "loss": 2.1317, "step": 486860 }, { "epoch": 1.8821032611216775, "grad_norm": 0.1655498743057251, "learning_rate": 0.00013591845671923465, "loss": 2.1338, "step": 486870 }, { "epoch": 1.8821419183250607, "grad_norm": 0.17144210636615753, "learning_rate": 0.00013581116836303297, "loss": 2.1327, "step": 486880 }, { "epoch": 1.882180575528444, "grad_norm": 0.17053934931755066, "learning_rate": 0.0001357038861811679, "loss": 2.1095, "step": 486890 }, { "epoch": 1.8822192327318272, "grad_norm": 0.1776016503572464, "learning_rate": 0.00013559661017257317, "loss": 2.1428, "step": 486900 }, { "epoch": 1.8822578899352105, "grad_norm": 0.1560843139886856, "learning_rate": 0.00013548934033618366, "loss": 2.1265, "step": 486910 }, { "epoch": 1.8822965471385937, "grad_norm": 0.16443794965744019, "learning_rate": 0.0001353820766709337, "loss": 2.1147, "step": 486920 }, { "epoch": 1.882335204341977, "grad_norm": 0.15588612854480743, "learning_rate": 0.00013527481917575867, "loss": 2.1185, "step": 486930 }, { "epoch": 1.8823738615453602, "grad_norm": 0.16323307156562805, "learning_rate": 0.0001351675678495936, "loss": 2.1171, "step": 486940 }, { "epoch": 1.8824125187487435, "grad_norm": 0.1513441652059555, "learning_rate": 0.00013506032269137446, "loss": 2.1351, "step": 486950 }, { "epoch": 1.882451175952127, "grad_norm": 0.15196464955806732, "learning_rate": 0.00013495308370003724, "loss": 2.1327, "step": 486960 }, { "epoch": 1.8824898331555102, "grad_norm": 0.1596112847328186, "learning_rate": 0.00013484585087451828, "loss": 2.1268, "step": 486970 }, { "epoch": 1.8825284903588935, "grad_norm": 0.16076111793518066, "learning_rate": 0.00013473862421375382, "loss": 2.1334, "step": 486980 }, { "epoch": 1.8825671475622767, "grad_norm": 0.15090005099773407, "learning_rate": 0.00013463140371668093, "loss": 2.121, "step": 486990 }, { "epoch": 1.8826058047656602, "grad_norm": 0.15267503261566162, "learning_rate": 0.0001345241893822371, "loss": 2.1141, "step": 487000 }, { "epoch": 1.8826444619690434, "grad_norm": 0.1471244841814041, "learning_rate": 0.00013441698120935942, "loss": 2.1237, "step": 487010 }, { "epoch": 1.8826831191724267, "grad_norm": 0.16960223019123077, "learning_rate": 0.00013430977919698584, "loss": 2.1114, "step": 487020 }, { "epoch": 1.88272177637581, "grad_norm": 0.16533735394477844, "learning_rate": 0.00013420258334405455, "loss": 2.1116, "step": 487030 }, { "epoch": 1.8827604335791932, "grad_norm": 0.1614953726530075, "learning_rate": 0.00013409539364950395, "loss": 2.1298, "step": 487040 }, { "epoch": 1.8827990907825765, "grad_norm": 0.15685488283634186, "learning_rate": 0.00013398821011227248, "loss": 2.1154, "step": 487050 }, { "epoch": 1.8828377479859597, "grad_norm": 0.14576545357704163, "learning_rate": 0.00013388103273129982, "loss": 2.1209, "step": 487060 }, { "epoch": 1.882876405189343, "grad_norm": 0.158021941781044, "learning_rate": 0.00013377386150552483, "loss": 2.1167, "step": 487070 }, { "epoch": 1.8829150623927262, "grad_norm": 0.16273944079875946, "learning_rate": 0.0001336666964338873, "loss": 2.1141, "step": 487080 }, { "epoch": 1.8829537195961095, "grad_norm": 0.16825802624225616, "learning_rate": 0.0001335595375153269, "loss": 2.1288, "step": 487090 }, { "epoch": 1.8829923767994927, "grad_norm": 0.16017933189868927, "learning_rate": 0.0001334523847487843, "loss": 2.1084, "step": 487100 }, { "epoch": 1.883031034002876, "grad_norm": 0.16350767016410828, "learning_rate": 0.00013334523813319986, "loss": 2.1136, "step": 487110 }, { "epoch": 1.8830696912062592, "grad_norm": 0.17448604106903076, "learning_rate": 0.0001332380976675145, "loss": 2.1318, "step": 487120 }, { "epoch": 1.8831083484096427, "grad_norm": 0.16670966148376465, "learning_rate": 0.00013313096335066944, "loss": 2.1139, "step": 487130 }, { "epoch": 1.883147005613026, "grad_norm": 0.1731451004743576, "learning_rate": 0.00013302383518160578, "loss": 2.1107, "step": 487140 }, { "epoch": 1.8831856628164092, "grad_norm": 0.1635160744190216, "learning_rate": 0.00013291671315926568, "loss": 2.1325, "step": 487150 }, { "epoch": 1.8832243200197925, "grad_norm": 0.15141884982585907, "learning_rate": 0.0001328095972825909, "loss": 2.1392, "step": 487160 }, { "epoch": 1.883262977223176, "grad_norm": 0.15572725236415863, "learning_rate": 0.00013270248755052426, "loss": 2.1259, "step": 487170 }, { "epoch": 1.8833016344265592, "grad_norm": 0.15225672721862793, "learning_rate": 0.000132595383962008, "loss": 2.1211, "step": 487180 }, { "epoch": 1.8833402916299424, "grad_norm": 0.40289080142974854, "learning_rate": 0.00013248828651598555, "loss": 2.1192, "step": 487190 }, { "epoch": 1.8833789488333257, "grad_norm": 0.20261716842651367, "learning_rate": 0.00013238119521139958, "loss": 2.1045, "step": 487200 }, { "epoch": 1.883417606036709, "grad_norm": 0.1588941514492035, "learning_rate": 0.00013227411004719425, "loss": 2.1217, "step": 487210 }, { "epoch": 1.8834562632400922, "grad_norm": 0.16519004106521606, "learning_rate": 0.0001321670310223133, "loss": 2.1242, "step": 487220 }, { "epoch": 1.8834949204434754, "grad_norm": 0.15652592480182648, "learning_rate": 0.0001320599581357007, "loss": 2.1223, "step": 487230 }, { "epoch": 1.8835335776468587, "grad_norm": 0.18390634655952454, "learning_rate": 0.0001319528913863013, "loss": 2.121, "step": 487240 }, { "epoch": 1.883572234850242, "grad_norm": 0.15679307281970978, "learning_rate": 0.00013184583077305946, "loss": 2.1028, "step": 487250 }, { "epoch": 1.8836108920536252, "grad_norm": 0.17130286991596222, "learning_rate": 0.0001317387762949207, "loss": 2.1373, "step": 487260 }, { "epoch": 1.8836495492570084, "grad_norm": 0.15090951323509216, "learning_rate": 0.00013163172795083034, "loss": 2.1299, "step": 487270 }, { "epoch": 1.8836882064603917, "grad_norm": 0.15132005512714386, "learning_rate": 0.00013152468573973387, "loss": 2.143, "step": 487280 }, { "epoch": 1.883726863663775, "grad_norm": 0.15275931358337402, "learning_rate": 0.00013141764966057769, "loss": 2.1084, "step": 487290 }, { "epoch": 1.8837655208671584, "grad_norm": 0.1515328586101532, "learning_rate": 0.0001313106197123075, "loss": 2.1221, "step": 487300 }, { "epoch": 1.8838041780705417, "grad_norm": 0.153660848736763, "learning_rate": 0.00013120359589387042, "loss": 2.1173, "step": 487310 }, { "epoch": 1.883842835273925, "grad_norm": 0.15133818984031677, "learning_rate": 0.00013109657820421327, "loss": 2.1224, "step": 487320 }, { "epoch": 1.8838814924773082, "grad_norm": 0.16636015474796295, "learning_rate": 0.00013098956664228334, "loss": 2.1232, "step": 487330 }, { "epoch": 1.8839201496806917, "grad_norm": 0.1621353030204773, "learning_rate": 0.0001308825612070279, "loss": 2.1188, "step": 487340 }, { "epoch": 1.883958806884075, "grad_norm": 0.15565310418605804, "learning_rate": 0.0001307755618973947, "loss": 2.1248, "step": 487350 }, { "epoch": 1.8839974640874582, "grad_norm": 0.15307669341564178, "learning_rate": 0.00013066856871233234, "loss": 2.1392, "step": 487360 }, { "epoch": 1.8840361212908414, "grad_norm": 0.16902467608451843, "learning_rate": 0.0001305615816507888, "loss": 2.1208, "step": 487370 }, { "epoch": 1.8840747784942247, "grad_norm": 0.1794981211423874, "learning_rate": 0.0001304546007117131, "loss": 2.1186, "step": 487380 }, { "epoch": 1.884113435697608, "grad_norm": 0.16142766177654266, "learning_rate": 0.00013034762589405415, "loss": 2.139, "step": 487390 }, { "epoch": 1.8841520929009912, "grad_norm": 0.1620454490184784, "learning_rate": 0.00013024065719676115, "loss": 2.131, "step": 487400 }, { "epoch": 1.8841907501043744, "grad_norm": 0.17644056677818298, "learning_rate": 0.00013013369461878366, "loss": 2.1261, "step": 487410 }, { "epoch": 1.8842294073077577, "grad_norm": 0.15617609024047852, "learning_rate": 0.00013002673815907208, "loss": 2.1163, "step": 487420 }, { "epoch": 1.884268064511141, "grad_norm": 0.15311667323112488, "learning_rate": 0.0001299197878165761, "loss": 2.1354, "step": 487430 }, { "epoch": 1.8843067217145242, "grad_norm": 0.15431946516036987, "learning_rate": 0.00012981284359024658, "loss": 2.1246, "step": 487440 }, { "epoch": 1.8843453789179074, "grad_norm": 0.15608011186122894, "learning_rate": 0.00012970590547903393, "loss": 2.1194, "step": 487450 }, { "epoch": 1.8843840361212907, "grad_norm": 0.16052739322185516, "learning_rate": 0.00012959897348188988, "loss": 2.1343, "step": 487460 }, { "epoch": 1.8844226933246742, "grad_norm": 0.15716208517551422, "learning_rate": 0.00012949204759776545, "loss": 2.1354, "step": 487470 }, { "epoch": 1.8844613505280574, "grad_norm": 0.16176652908325195, "learning_rate": 0.0001293851278256124, "loss": 2.1224, "step": 487480 }, { "epoch": 1.8845000077314407, "grad_norm": 0.1501695066690445, "learning_rate": 0.0001292782141643829, "loss": 2.1173, "step": 487490 }, { "epoch": 1.884538664934824, "grad_norm": 0.1616787314414978, "learning_rate": 0.00012917130661302935, "loss": 2.1343, "step": 487500 }, { "epoch": 1.8845773221382074, "grad_norm": 0.15648533403873444, "learning_rate": 0.0001290644051705041, "loss": 2.1268, "step": 487510 }, { "epoch": 1.8846159793415906, "grad_norm": 0.17075617611408234, "learning_rate": 0.00012895750983576005, "loss": 2.1178, "step": 487520 }, { "epoch": 1.884654636544974, "grad_norm": 0.15663281083106995, "learning_rate": 0.00012885062060775067, "loss": 2.1132, "step": 487530 }, { "epoch": 1.8846932937483571, "grad_norm": 0.1682283878326416, "learning_rate": 0.00012874373748542924, "loss": 2.1176, "step": 487540 }, { "epoch": 1.8847319509517404, "grad_norm": 0.1556764394044876, "learning_rate": 0.00012863686046774993, "loss": 2.1112, "step": 487550 }, { "epoch": 1.8847706081551237, "grad_norm": 0.1526879519224167, "learning_rate": 0.00012852998955366647, "loss": 2.1326, "step": 487560 }, { "epoch": 1.884809265358507, "grad_norm": 0.17758522927761078, "learning_rate": 0.00012842312474213325, "loss": 2.1105, "step": 487570 }, { "epoch": 1.8848479225618902, "grad_norm": 0.16019943356513977, "learning_rate": 0.00012831626603210556, "loss": 2.1063, "step": 487580 }, { "epoch": 1.8848865797652734, "grad_norm": 0.16548269987106323, "learning_rate": 0.00012820941342253777, "loss": 2.1245, "step": 487590 }, { "epoch": 1.8849252369686567, "grad_norm": 0.16766045987606049, "learning_rate": 0.00012810256691238563, "loss": 2.1243, "step": 487600 }, { "epoch": 1.88496389417204, "grad_norm": 0.16872750222682953, "learning_rate": 0.00012799572650060444, "loss": 2.1228, "step": 487610 }, { "epoch": 1.8850025513754232, "grad_norm": 0.15807810425758362, "learning_rate": 0.0001278888921861503, "loss": 2.1236, "step": 487620 }, { "epoch": 1.8850412085788064, "grad_norm": 0.27638283371925354, "learning_rate": 0.0001277820639679792, "loss": 2.1273, "step": 487630 }, { "epoch": 1.88507986578219, "grad_norm": 0.17451395094394684, "learning_rate": 0.00012767524184504798, "loss": 2.1293, "step": 487640 }, { "epoch": 1.8851185229855731, "grad_norm": 0.16688884794712067, "learning_rate": 0.000127568425816313, "loss": 2.1357, "step": 487650 }, { "epoch": 1.8851571801889564, "grad_norm": 0.15373960137367249, "learning_rate": 0.0001274616158807318, "loss": 2.1121, "step": 487660 }, { "epoch": 1.8851958373923396, "grad_norm": 0.15518179535865784, "learning_rate": 0.0001273548120372616, "loss": 2.1217, "step": 487670 }, { "epoch": 1.8852344945957231, "grad_norm": 0.15639948844909668, "learning_rate": 0.00012724801428486, "loss": 2.1219, "step": 487680 }, { "epoch": 1.8852731517991064, "grad_norm": 0.1561547964811325, "learning_rate": 0.00012714122262248506, "loss": 2.1041, "step": 487690 }, { "epoch": 1.8853118090024896, "grad_norm": 0.15862923860549927, "learning_rate": 0.00012703443704909523, "loss": 2.1182, "step": 487700 }, { "epoch": 1.8853504662058729, "grad_norm": 0.1520431488752365, "learning_rate": 0.00012692765756364865, "loss": 2.1168, "step": 487710 }, { "epoch": 1.8853891234092561, "grad_norm": 0.1558041274547577, "learning_rate": 0.00012682088416510485, "loss": 2.1045, "step": 487720 }, { "epoch": 1.8854277806126394, "grad_norm": 0.15232814848423004, "learning_rate": 0.0001267141168524224, "loss": 2.1187, "step": 487730 }, { "epoch": 1.8854664378160226, "grad_norm": 0.15680523216724396, "learning_rate": 0.00012660735562456104, "loss": 2.1226, "step": 487740 }, { "epoch": 1.8855050950194059, "grad_norm": 0.15346527099609375, "learning_rate": 0.0001265006004804805, "loss": 2.1253, "step": 487750 }, { "epoch": 1.8855437522227891, "grad_norm": 0.16200987994670868, "learning_rate": 0.00012639385141914095, "loss": 2.1183, "step": 487760 }, { "epoch": 1.8855824094261724, "grad_norm": 0.159101665019989, "learning_rate": 0.00012628710843950276, "loss": 2.1051, "step": 487770 }, { "epoch": 1.8856210666295556, "grad_norm": 0.16451914608478546, "learning_rate": 0.0001261803715405263, "loss": 2.112, "step": 487780 }, { "epoch": 1.885659723832939, "grad_norm": 0.15994106233119965, "learning_rate": 0.00012607364072117288, "loss": 2.13, "step": 487790 }, { "epoch": 1.8856983810363221, "grad_norm": 0.16974908113479614, "learning_rate": 0.00012596691598040354, "loss": 2.1299, "step": 487800 }, { "epoch": 1.8857370382397056, "grad_norm": 0.1618840992450714, "learning_rate": 0.00012586019731717978, "loss": 2.1022, "step": 487810 }, { "epoch": 1.8857756954430889, "grad_norm": 0.15879476070404053, "learning_rate": 0.00012575348473046376, "loss": 2.1283, "step": 487820 }, { "epoch": 1.8858143526464721, "grad_norm": 0.16188471019268036, "learning_rate": 0.00012564677821921743, "loss": 2.1182, "step": 487830 }, { "epoch": 1.8858530098498554, "grad_norm": 0.16069914400577545, "learning_rate": 0.00012554007778240296, "loss": 2.12, "step": 487840 }, { "epoch": 1.8858916670532389, "grad_norm": 0.1658526360988617, "learning_rate": 0.0001254333834189836, "loss": 2.1085, "step": 487850 }, { "epoch": 1.885930324256622, "grad_norm": 0.1735226958990097, "learning_rate": 0.0001253266951279217, "loss": 2.1164, "step": 487860 }, { "epoch": 1.8859689814600054, "grad_norm": 0.16433432698249817, "learning_rate": 0.0001252200129081813, "loss": 2.1188, "step": 487870 }, { "epoch": 1.8860076386633886, "grad_norm": 0.15564902126789093, "learning_rate": 0.0001251133367587256, "loss": 2.1229, "step": 487880 }, { "epoch": 1.8860462958667719, "grad_norm": 0.16110435128211975, "learning_rate": 0.00012500666667851856, "loss": 2.126, "step": 487890 }, { "epoch": 1.8860849530701551, "grad_norm": 0.14982998371124268, "learning_rate": 0.00012490000266652434, "loss": 2.121, "step": 487900 }, { "epoch": 1.8861236102735384, "grad_norm": 0.17189742624759674, "learning_rate": 0.00012479334472170777, "loss": 2.118, "step": 487910 }, { "epoch": 1.8861622674769216, "grad_norm": 0.16747061908245087, "learning_rate": 0.00012468669284303324, "loss": 2.1028, "step": 487920 }, { "epoch": 1.8862009246803049, "grad_norm": 0.1881030797958374, "learning_rate": 0.00012458004702946602, "loss": 2.108, "step": 487930 }, { "epoch": 1.8862395818836881, "grad_norm": 0.16692771017551422, "learning_rate": 0.00012447340727997136, "loss": 2.1305, "step": 487940 }, { "epoch": 1.8862782390870714, "grad_norm": 0.15471774339675903, "learning_rate": 0.00012436677359351522, "loss": 2.1135, "step": 487950 }, { "epoch": 1.8863168962904546, "grad_norm": 0.1992231011390686, "learning_rate": 0.0001242601459690631, "loss": 2.125, "step": 487960 }, { "epoch": 1.8863555534938379, "grad_norm": 0.17142538726329803, "learning_rate": 0.0001241535244055818, "loss": 2.1121, "step": 487970 }, { "epoch": 1.8863942106972214, "grad_norm": 0.17059116065502167, "learning_rate": 0.0001240469089020375, "loss": 2.0921, "step": 487980 }, { "epoch": 1.8864328679006046, "grad_norm": 0.15251795947551727, "learning_rate": 0.00012394029945739726, "loss": 2.128, "step": 487990 }, { "epoch": 1.8864715251039879, "grad_norm": 0.14697898924350739, "learning_rate": 0.00012383369607062812, "loss": 2.1197, "step": 488000 }, { "epoch": 1.886510182307371, "grad_norm": 0.15622594952583313, "learning_rate": 0.00012372709874069755, "loss": 2.117, "step": 488010 }, { "epoch": 1.8865488395107546, "grad_norm": 0.15641766786575317, "learning_rate": 0.00012362050746657328, "loss": 2.127, "step": 488020 }, { "epoch": 1.8865874967141378, "grad_norm": 0.16108526289463043, "learning_rate": 0.00012351392224722323, "loss": 2.1187, "step": 488030 }, { "epoch": 1.886626153917521, "grad_norm": 0.16521552205085754, "learning_rate": 0.00012340734308161606, "loss": 2.1193, "step": 488040 }, { "epoch": 1.8866648111209043, "grad_norm": 0.15821205079555511, "learning_rate": 0.00012330076996871987, "loss": 2.1193, "step": 488050 }, { "epoch": 1.8867034683242876, "grad_norm": 0.16402612626552582, "learning_rate": 0.0001231942029075037, "loss": 2.104, "step": 488060 }, { "epoch": 1.8867421255276708, "grad_norm": 0.16501112282276154, "learning_rate": 0.0001230876418969371, "loss": 2.1198, "step": 488070 }, { "epoch": 1.886780782731054, "grad_norm": 0.16025055944919586, "learning_rate": 0.0001229810869359893, "loss": 2.1342, "step": 488080 }, { "epoch": 1.8868194399344373, "grad_norm": 0.15322361886501312, "learning_rate": 0.00012287453802363002, "loss": 2.1132, "step": 488090 }, { "epoch": 1.8868580971378206, "grad_norm": 0.15714363753795624, "learning_rate": 0.00012276799515882964, "loss": 2.116, "step": 488100 }, { "epoch": 1.8868967543412039, "grad_norm": 0.15222831070423126, "learning_rate": 0.0001226614583405581, "loss": 2.1186, "step": 488110 }, { "epoch": 1.886935411544587, "grad_norm": 0.1677834838628769, "learning_rate": 0.00012255492756778642, "loss": 2.1143, "step": 488120 }, { "epoch": 1.8869740687479704, "grad_norm": 0.15755745768547058, "learning_rate": 0.00012244840283948523, "loss": 2.1114, "step": 488130 }, { "epoch": 1.8870127259513536, "grad_norm": 0.17090429365634918, "learning_rate": 0.000122341884154626, "loss": 2.1224, "step": 488140 }, { "epoch": 1.887051383154737, "grad_norm": 0.17305462062358856, "learning_rate": 0.00012223537151218023, "loss": 2.1288, "step": 488150 }, { "epoch": 1.8870900403581203, "grad_norm": 0.17158415913581848, "learning_rate": 0.00012212886491111963, "loss": 2.1184, "step": 488160 }, { "epoch": 1.8871286975615036, "grad_norm": 0.17280597984790802, "learning_rate": 0.00012202236435041637, "loss": 2.1083, "step": 488170 }, { "epoch": 1.8871673547648868, "grad_norm": 0.16423514485359192, "learning_rate": 0.00012191586982904301, "loss": 2.1219, "step": 488180 }, { "epoch": 1.8872060119682703, "grad_norm": 0.16097402572631836, "learning_rate": 0.00012180938134597219, "loss": 2.1161, "step": 488190 }, { "epoch": 1.8872446691716536, "grad_norm": 0.15495575964450836, "learning_rate": 0.00012170289890017671, "loss": 2.1266, "step": 488200 }, { "epoch": 1.8872833263750368, "grad_norm": 0.15620481967926025, "learning_rate": 0.00012159642249062985, "loss": 2.1178, "step": 488210 }, { "epoch": 1.88732198357842, "grad_norm": 0.15706951916217804, "learning_rate": 0.00012148995211630553, "loss": 2.1418, "step": 488220 }, { "epoch": 1.8873606407818033, "grad_norm": 0.16063298285007477, "learning_rate": 0.00012138348777617747, "loss": 2.112, "step": 488230 }, { "epoch": 1.8873992979851866, "grad_norm": 0.16350902616977692, "learning_rate": 0.0001212770294692196, "loss": 2.108, "step": 488240 }, { "epoch": 1.8874379551885698, "grad_norm": 0.17477919161319733, "learning_rate": 0.0001211705771944065, "loss": 2.1117, "step": 488250 }, { "epoch": 1.887476612391953, "grad_norm": 0.16410206258296967, "learning_rate": 0.00012106413095071278, "loss": 2.1094, "step": 488260 }, { "epoch": 1.8875152695953363, "grad_norm": 0.1584874391555786, "learning_rate": 0.00012095769073711371, "loss": 2.1284, "step": 488270 }, { "epoch": 1.8875539267987196, "grad_norm": 0.1609467715024948, "learning_rate": 0.00012085125655258455, "loss": 2.1154, "step": 488280 }, { "epoch": 1.8875925840021028, "grad_norm": 0.15845824778079987, "learning_rate": 0.00012074482839610102, "loss": 2.1077, "step": 488290 }, { "epoch": 1.887631241205486, "grad_norm": 0.15312768518924713, "learning_rate": 0.00012063840626663858, "loss": 2.1206, "step": 488300 }, { "epoch": 1.8876698984088696, "grad_norm": 0.40977486968040466, "learning_rate": 0.00012053199016317384, "loss": 2.116, "step": 488310 }, { "epoch": 1.8877085556122528, "grad_norm": 0.1777162402868271, "learning_rate": 0.00012042558008468318, "loss": 2.115, "step": 488320 }, { "epoch": 1.887747212815636, "grad_norm": 0.1626431792974472, "learning_rate": 0.00012031917603014319, "loss": 2.108, "step": 488330 }, { "epoch": 1.8877858700190193, "grad_norm": 0.16696617007255554, "learning_rate": 0.00012021277799853114, "loss": 2.1241, "step": 488340 }, { "epoch": 1.8878245272224026, "grad_norm": 0.15991130471229553, "learning_rate": 0.00012010638598882407, "loss": 2.105, "step": 488350 }, { "epoch": 1.887863184425786, "grad_norm": 0.16908201575279236, "learning_rate": 0.00011999999999999988, "loss": 2.099, "step": 488360 }, { "epoch": 1.8879018416291693, "grad_norm": 0.15702217817306519, "learning_rate": 0.00011989362003103655, "loss": 2.1133, "step": 488370 }, { "epoch": 1.8879404988325525, "grad_norm": 0.15526017546653748, "learning_rate": 0.00011978724608091218, "loss": 2.118, "step": 488380 }, { "epoch": 1.8879791560359358, "grad_norm": 0.1556904911994934, "learning_rate": 0.00011968087814860539, "loss": 2.1328, "step": 488390 }, { "epoch": 1.888017813239319, "grad_norm": 0.15495745837688446, "learning_rate": 0.00011957451623309457, "loss": 2.1317, "step": 488400 }, { "epoch": 1.8880564704427023, "grad_norm": 0.16102412343025208, "learning_rate": 0.00011946816033335938, "loss": 2.1058, "step": 488410 }, { "epoch": 1.8880951276460856, "grad_norm": 0.16249670088291168, "learning_rate": 0.00011936181044837868, "loss": 2.1146, "step": 488420 }, { "epoch": 1.8881337848494688, "grad_norm": 0.165382981300354, "learning_rate": 0.00011925546657713238, "loss": 2.1091, "step": 488430 }, { "epoch": 1.888172442052852, "grad_norm": 0.16189248859882355, "learning_rate": 0.0001191491287186004, "loss": 2.1206, "step": 488440 }, { "epoch": 1.8882110992562353, "grad_norm": 0.1849163919687271, "learning_rate": 0.00011904279687176312, "loss": 2.1092, "step": 488450 }, { "epoch": 1.8882497564596186, "grad_norm": 0.17459073662757874, "learning_rate": 0.00011893647103560046, "loss": 2.1263, "step": 488460 }, { "epoch": 1.8882884136630018, "grad_norm": 0.1747400015592575, "learning_rate": 0.00011883015120909391, "loss": 2.1259, "step": 488470 }, { "epoch": 1.8883270708663853, "grad_norm": 0.16780216991901398, "learning_rate": 0.0001187238373912245, "loss": 2.123, "step": 488480 }, { "epoch": 1.8883657280697685, "grad_norm": 0.17317448556423187, "learning_rate": 0.00011861752958097328, "loss": 2.101, "step": 488490 }, { "epoch": 1.8884043852731518, "grad_norm": 0.1796831637620926, "learning_rate": 0.00011851122777732215, "loss": 2.1058, "step": 488500 }, { "epoch": 1.888443042476535, "grad_norm": 0.18265679478645325, "learning_rate": 0.00011840493197925285, "loss": 2.1085, "step": 488510 }, { "epoch": 1.8884816996799183, "grad_norm": 0.15292461216449738, "learning_rate": 0.00011829864218574792, "loss": 2.1173, "step": 488520 }, { "epoch": 1.8885203568833018, "grad_norm": 0.15771964192390442, "learning_rate": 0.0001181923583957898, "loss": 2.1086, "step": 488530 }, { "epoch": 1.888559014086685, "grad_norm": 0.16015884280204773, "learning_rate": 0.00011808608060836123, "loss": 2.1265, "step": 488540 }, { "epoch": 1.8885976712900683, "grad_norm": 0.1658799648284912, "learning_rate": 0.0001179798088224453, "loss": 2.1318, "step": 488550 }, { "epoch": 1.8886363284934515, "grad_norm": 0.23712924122810364, "learning_rate": 0.00011787354303702525, "loss": 2.127, "step": 488560 }, { "epoch": 1.8886749856968348, "grad_norm": 0.17107848823070526, "learning_rate": 0.00011776728325108521, "loss": 2.1213, "step": 488570 }, { "epoch": 1.888713642900218, "grad_norm": 0.16752935945987701, "learning_rate": 0.00011766102946360912, "loss": 2.1112, "step": 488580 }, { "epoch": 1.8887523001036013, "grad_norm": 0.1616651713848114, "learning_rate": 0.00011755478167358069, "loss": 2.1214, "step": 488590 }, { "epoch": 1.8887909573069845, "grad_norm": 0.15499405562877655, "learning_rate": 0.00011744853987998516, "loss": 2.1187, "step": 488600 }, { "epoch": 1.8888296145103678, "grad_norm": 0.16054315865039825, "learning_rate": 0.00011734230408180691, "loss": 2.1052, "step": 488610 }, { "epoch": 1.888868271713751, "grad_norm": 0.15929390490055084, "learning_rate": 0.00011723607427803095, "loss": 2.0946, "step": 488620 }, { "epoch": 1.8889069289171343, "grad_norm": 0.15963678061962128, "learning_rate": 0.00011712985046764325, "loss": 2.1204, "step": 488630 }, { "epoch": 1.8889455861205176, "grad_norm": 0.17510229349136353, "learning_rate": 0.00011702363264962879, "loss": 2.1243, "step": 488640 }, { "epoch": 1.888984243323901, "grad_norm": 0.15754762291908264, "learning_rate": 0.00011691742082297418, "loss": 2.1106, "step": 488650 }, { "epoch": 1.8890229005272843, "grad_norm": 0.1671770066022873, "learning_rate": 0.00011681121498666514, "loss": 2.1041, "step": 488660 }, { "epoch": 1.8890615577306675, "grad_norm": 0.17678804695606232, "learning_rate": 0.00011670501513968867, "loss": 2.1089, "step": 488670 }, { "epoch": 1.8891002149340508, "grad_norm": 0.16856589913368225, "learning_rate": 0.00011659882128103139, "loss": 2.1023, "step": 488680 }, { "epoch": 1.8891388721374343, "grad_norm": 0.17763420939445496, "learning_rate": 0.00011649263340968052, "loss": 2.1089, "step": 488690 }, { "epoch": 1.8891775293408175, "grad_norm": 0.15996591746807098, "learning_rate": 0.00011638645152462335, "loss": 2.1165, "step": 488700 }, { "epoch": 1.8892161865442008, "grad_norm": 0.16765794157981873, "learning_rate": 0.00011628027562484755, "loss": 2.1063, "step": 488710 }, { "epoch": 1.889254843747584, "grad_norm": 0.16311784088611603, "learning_rate": 0.00011617410570934128, "loss": 2.1214, "step": 488720 }, { "epoch": 1.8892935009509673, "grad_norm": 0.16353066265583038, "learning_rate": 0.00011606794177709268, "loss": 2.1171, "step": 488730 }, { "epoch": 1.8893321581543505, "grad_norm": 0.15941888093948364, "learning_rate": 0.00011596178382709033, "loss": 2.0929, "step": 488740 }, { "epoch": 1.8893708153577338, "grad_norm": 0.15811793506145477, "learning_rate": 0.00011585563185832282, "loss": 2.1072, "step": 488750 }, { "epoch": 1.889409472561117, "grad_norm": 0.16331081092357635, "learning_rate": 0.00011574948586977962, "loss": 2.1119, "step": 488760 }, { "epoch": 1.8894481297645003, "grad_norm": 0.1618635654449463, "learning_rate": 0.00011564334586044978, "loss": 2.1161, "step": 488770 }, { "epoch": 1.8894867869678835, "grad_norm": 0.17881222069263458, "learning_rate": 0.00011553721182932343, "loss": 2.1156, "step": 488780 }, { "epoch": 1.8895254441712668, "grad_norm": 0.18215249478816986, "learning_rate": 0.00011543108377539024, "loss": 2.1133, "step": 488790 }, { "epoch": 1.88956410137465, "grad_norm": 0.17007042467594147, "learning_rate": 0.00011532496169764061, "loss": 2.1209, "step": 488800 }, { "epoch": 1.8896027585780333, "grad_norm": 0.16164295375347137, "learning_rate": 0.0001152188455950649, "loss": 2.1067, "step": 488810 }, { "epoch": 1.8896414157814168, "grad_norm": 0.17763203382492065, "learning_rate": 0.0001151127354666539, "loss": 2.1166, "step": 488820 }, { "epoch": 1.8896800729848, "grad_norm": 0.1676315814256668, "learning_rate": 0.0001150066313113991, "loss": 2.1156, "step": 488830 }, { "epoch": 1.8897187301881833, "grad_norm": 0.15571266412734985, "learning_rate": 0.00011490053312829151, "loss": 2.1094, "step": 488840 }, { "epoch": 1.8897573873915665, "grad_norm": 0.17012977600097656, "learning_rate": 0.00011479444091632285, "loss": 2.1101, "step": 488850 }, { "epoch": 1.88979604459495, "grad_norm": 0.16374844312667847, "learning_rate": 0.00011468835467448524, "loss": 2.1116, "step": 488860 }, { "epoch": 1.8898347017983332, "grad_norm": 0.15436828136444092, "learning_rate": 0.0001145822744017706, "loss": 2.1159, "step": 488870 }, { "epoch": 1.8898733590017165, "grad_norm": 0.15185752511024475, "learning_rate": 0.00011447620009717197, "loss": 2.1062, "step": 488880 }, { "epoch": 1.8899120162050997, "grad_norm": 0.16675709187984467, "learning_rate": 0.0001143701317596817, "loss": 2.0991, "step": 488890 }, { "epoch": 1.889950673408483, "grad_norm": 0.15582484006881714, "learning_rate": 0.00011426406938829326, "loss": 2.1184, "step": 488900 }, { "epoch": 1.8899893306118662, "grad_norm": 0.1695159524679184, "learning_rate": 0.00011415801298199969, "loss": 2.1161, "step": 488910 }, { "epoch": 1.8900279878152495, "grad_norm": 0.1637667566537857, "learning_rate": 0.00011405196253979466, "loss": 2.1202, "step": 488920 }, { "epoch": 1.8900666450186328, "grad_norm": 0.16691024601459503, "learning_rate": 0.00011394591806067233, "loss": 2.1157, "step": 488930 }, { "epoch": 1.890105302222016, "grad_norm": 0.1548158973455429, "learning_rate": 0.00011383987954362685, "loss": 2.1125, "step": 488940 }, { "epoch": 1.8901439594253993, "grad_norm": 0.17079788446426392, "learning_rate": 0.00011373384698765276, "loss": 2.1317, "step": 488950 }, { "epoch": 1.8901826166287825, "grad_norm": 0.1615532487630844, "learning_rate": 0.00011362782039174468, "loss": 2.1065, "step": 488960 }, { "epoch": 1.8902212738321658, "grad_norm": 0.1652381718158722, "learning_rate": 0.00011352179975489763, "loss": 2.1217, "step": 488970 }, { "epoch": 1.890259931035549, "grad_norm": 0.15695932507514954, "learning_rate": 0.0001134157850761075, "loss": 2.1181, "step": 488980 }, { "epoch": 1.8902985882389325, "grad_norm": 0.15450656414031982, "learning_rate": 0.00011330977635436934, "loss": 2.0999, "step": 488990 }, { "epoch": 1.8903372454423157, "grad_norm": 0.16795946657657623, "learning_rate": 0.00011320377358867929, "loss": 2.1087, "step": 489000 }, { "epoch": 1.890375902645699, "grad_norm": 0.18700319528579712, "learning_rate": 0.00011309777677803346, "loss": 2.0991, "step": 489010 }, { "epoch": 1.8904145598490822, "grad_norm": 0.15968748927116394, "learning_rate": 0.00011299178592142844, "loss": 2.1284, "step": 489020 }, { "epoch": 1.8904532170524657, "grad_norm": 0.1658526510000229, "learning_rate": 0.00011288580101786105, "loss": 2.1193, "step": 489030 }, { "epoch": 1.890491874255849, "grad_norm": 0.1720876395702362, "learning_rate": 0.00011277982206632808, "loss": 2.1051, "step": 489040 }, { "epoch": 1.8905305314592322, "grad_norm": 0.17556461691856384, "learning_rate": 0.00011267384906582723, "loss": 2.107, "step": 489050 }, { "epoch": 1.8905691886626155, "grad_norm": 0.16108305752277374, "learning_rate": 0.00011256788201535573, "loss": 2.1214, "step": 489060 }, { "epoch": 1.8906078458659987, "grad_norm": 0.169228196144104, "learning_rate": 0.00011246192091391172, "loss": 2.1144, "step": 489070 }, { "epoch": 1.890646503069382, "grad_norm": 0.16416077315807343, "learning_rate": 0.00011235596576049334, "loss": 2.0981, "step": 489080 }, { "epoch": 1.8906851602727652, "grad_norm": 0.15246742963790894, "learning_rate": 0.00011225001655409894, "loss": 2.1026, "step": 489090 }, { "epoch": 1.8907238174761485, "grad_norm": 0.1551797091960907, "learning_rate": 0.00011214407329372711, "loss": 2.1206, "step": 489100 }, { "epoch": 1.8907624746795317, "grad_norm": 0.16857977211475372, "learning_rate": 0.00011203813597837731, "loss": 2.1246, "step": 489110 }, { "epoch": 1.890801131882915, "grad_norm": 0.1534964144229889, "learning_rate": 0.00011193220460704856, "loss": 2.0995, "step": 489120 }, { "epoch": 1.8908397890862982, "grad_norm": 0.16405372321605682, "learning_rate": 0.00011182627917874033, "loss": 2.0984, "step": 489130 }, { "epoch": 1.8908784462896815, "grad_norm": 0.16919684410095215, "learning_rate": 0.00011172035969245275, "loss": 2.1236, "step": 489140 }, { "epoch": 1.8909171034930647, "grad_norm": 0.17017854750156403, "learning_rate": 0.00011161444614718574, "loss": 2.1196, "step": 489150 }, { "epoch": 1.8909557606964482, "grad_norm": 0.17277006804943085, "learning_rate": 0.00011150853854193987, "loss": 2.1239, "step": 489160 }, { "epoch": 1.8909944178998315, "grad_norm": 0.17439673840999603, "learning_rate": 0.0001114026368757155, "loss": 2.0974, "step": 489170 }, { "epoch": 1.8910330751032147, "grad_norm": 0.17001686990261078, "learning_rate": 0.00011129674114751409, "loss": 2.1178, "step": 489180 }, { "epoch": 1.891071732306598, "grad_norm": 0.17020079493522644, "learning_rate": 0.00011119085135633666, "loss": 2.1116, "step": 489190 }, { "epoch": 1.8911103895099814, "grad_norm": 0.1608460545539856, "learning_rate": 0.0001110849675011849, "loss": 2.1123, "step": 489200 }, { "epoch": 1.8911490467133647, "grad_norm": 0.16076086461544037, "learning_rate": 0.00011097908958106028, "loss": 2.1265, "step": 489210 }, { "epoch": 1.891187703916748, "grad_norm": 0.17049437761306763, "learning_rate": 0.00011087321759496517, "loss": 2.1173, "step": 489220 }, { "epoch": 1.8912263611201312, "grad_norm": 0.1605737954378128, "learning_rate": 0.00011076735154190188, "loss": 2.1261, "step": 489230 }, { "epoch": 1.8912650183235145, "grad_norm": 0.16839636862277985, "learning_rate": 0.000110661491420873, "loss": 2.1043, "step": 489240 }, { "epoch": 1.8913036755268977, "grad_norm": 0.16663509607315063, "learning_rate": 0.00011055563723088157, "loss": 2.1162, "step": 489250 }, { "epoch": 1.891342332730281, "grad_norm": 0.17225117981433868, "learning_rate": 0.00011044978897093084, "loss": 2.1134, "step": 489260 }, { "epoch": 1.8913809899336642, "grad_norm": 0.1735590398311615, "learning_rate": 0.000110343946640024, "loss": 2.1098, "step": 489270 }, { "epoch": 1.8914196471370475, "grad_norm": 0.16959644854068756, "learning_rate": 0.00011023811023716523, "loss": 2.1175, "step": 489280 }, { "epoch": 1.8914583043404307, "grad_norm": 0.17331750690937042, "learning_rate": 0.00011013227976135842, "loss": 2.1147, "step": 489290 }, { "epoch": 1.891496961543814, "grad_norm": 0.18145471811294556, "learning_rate": 0.00011002645521160792, "loss": 2.1365, "step": 489300 }, { "epoch": 1.8915356187471972, "grad_norm": 0.159023255109787, "learning_rate": 0.00010992063658691832, "loss": 2.0984, "step": 489310 }, { "epoch": 1.8915742759505805, "grad_norm": 0.21775251626968384, "learning_rate": 0.00010981482388629438, "loss": 2.1036, "step": 489320 }, { "epoch": 1.891612933153964, "grad_norm": 0.16009607911109924, "learning_rate": 0.00010970901710874159, "loss": 2.1108, "step": 489330 }, { "epoch": 1.8916515903573472, "grad_norm": 0.17309287190437317, "learning_rate": 0.00010960321625326497, "loss": 2.1237, "step": 489340 }, { "epoch": 1.8916902475607305, "grad_norm": 0.15677177906036377, "learning_rate": 0.00010949742131887063, "loss": 2.1011, "step": 489350 }, { "epoch": 1.8917289047641137, "grad_norm": 0.16687935590744019, "learning_rate": 0.00010939163230456428, "loss": 2.1016, "step": 489360 }, { "epoch": 1.8917675619674972, "grad_norm": 0.17260977625846863, "learning_rate": 0.00010928584920935225, "loss": 2.1076, "step": 489370 }, { "epoch": 1.8918062191708804, "grad_norm": 0.17035962641239166, "learning_rate": 0.00010918007203224135, "loss": 2.1092, "step": 489380 }, { "epoch": 1.8918448763742637, "grad_norm": 0.17869456112384796, "learning_rate": 0.00010907430077223812, "loss": 2.1203, "step": 489390 }, { "epoch": 1.891883533577647, "grad_norm": 0.17196069657802582, "learning_rate": 0.00010896853542834984, "loss": 2.1163, "step": 489400 }, { "epoch": 1.8919221907810302, "grad_norm": 0.17762623727321625, "learning_rate": 0.00010886277599958394, "loss": 2.1064, "step": 489410 }, { "epoch": 1.8919608479844134, "grad_norm": 0.16974352300167084, "learning_rate": 0.00010875702248494789, "loss": 2.1059, "step": 489420 }, { "epoch": 1.8919995051877967, "grad_norm": 0.16500453650951385, "learning_rate": 0.00010865127488344984, "loss": 2.1321, "step": 489430 }, { "epoch": 1.89203816239118, "grad_norm": 0.1621197909116745, "learning_rate": 0.00010854553319409788, "loss": 2.1056, "step": 489440 }, { "epoch": 1.8920768195945632, "grad_norm": 0.178715780377388, "learning_rate": 0.00010843979741590037, "loss": 2.0964, "step": 489450 }, { "epoch": 1.8921154767979464, "grad_norm": 0.17246749997138977, "learning_rate": 0.00010833406754786656, "loss": 2.1143, "step": 489460 }, { "epoch": 1.8921541340013297, "grad_norm": 0.1624779850244522, "learning_rate": 0.00010822834358900479, "loss": 2.1088, "step": 489470 }, { "epoch": 1.892192791204713, "grad_norm": 0.16980381309986115, "learning_rate": 0.00010812262553832519, "loss": 2.129, "step": 489480 }, { "epoch": 1.8922314484080962, "grad_norm": 0.1774771809577942, "learning_rate": 0.000108016913394837, "loss": 2.1068, "step": 489490 }, { "epoch": 1.8922701056114797, "grad_norm": 0.182191401720047, "learning_rate": 0.00010791120715754987, "loss": 2.1016, "step": 489500 }, { "epoch": 1.892308762814863, "grad_norm": 0.17115147411823273, "learning_rate": 0.00010780550682547441, "loss": 2.1183, "step": 489510 }, { "epoch": 1.8923474200182462, "grad_norm": 0.1588856279850006, "learning_rate": 0.00010769981239762072, "loss": 2.1241, "step": 489520 }, { "epoch": 1.8923860772216294, "grad_norm": 0.15776441991329193, "learning_rate": 0.0001075941238729996, "loss": 2.1039, "step": 489530 }, { "epoch": 1.892424734425013, "grad_norm": 0.15810024738311768, "learning_rate": 0.00010748844125062207, "loss": 2.1067, "step": 489540 }, { "epoch": 1.8924633916283962, "grad_norm": 0.15623559057712555, "learning_rate": 0.00010738276452949957, "loss": 2.1129, "step": 489550 }, { "epoch": 1.8925020488317794, "grad_norm": 0.16129539906978607, "learning_rate": 0.00010727709370864335, "loss": 2.1111, "step": 489560 }, { "epoch": 1.8925407060351627, "grad_norm": 0.1808207482099533, "learning_rate": 0.0001071714287870651, "loss": 2.1164, "step": 489570 }, { "epoch": 1.892579363238546, "grad_norm": 0.16985240578651428, "learning_rate": 0.00010706576976377757, "loss": 2.1112, "step": 489580 }, { "epoch": 1.8926180204419292, "grad_norm": 0.16655011475086212, "learning_rate": 0.00010696011663779248, "loss": 2.1204, "step": 489590 }, { "epoch": 1.8926566776453124, "grad_norm": 0.16127444803714752, "learning_rate": 0.00010685446940812282, "loss": 2.1041, "step": 489600 }, { "epoch": 1.8926953348486957, "grad_norm": 0.17058631777763367, "learning_rate": 0.0001067488280737814, "loss": 2.1279, "step": 489610 }, { "epoch": 1.892733992052079, "grad_norm": 0.1634225845336914, "learning_rate": 0.00010664319263378142, "loss": 2.1183, "step": 489620 }, { "epoch": 1.8927726492554622, "grad_norm": 0.16177918016910553, "learning_rate": 0.00010653756308713635, "loss": 2.1147, "step": 489630 }, { "epoch": 1.8928113064588454, "grad_norm": 0.16086721420288086, "learning_rate": 0.00010643193943286011, "loss": 2.1138, "step": 489640 }, { "epoch": 1.8928499636622287, "grad_norm": 0.1561380922794342, "learning_rate": 0.00010632632166996636, "loss": 2.1159, "step": 489650 }, { "epoch": 1.892888620865612, "grad_norm": 0.17296133935451508, "learning_rate": 0.00010622070979746967, "loss": 2.1123, "step": 489660 }, { "epoch": 1.8929272780689954, "grad_norm": 0.16407518088817596, "learning_rate": 0.00010611510381438439, "loss": 2.1127, "step": 489670 }, { "epoch": 1.8929659352723787, "grad_norm": 0.16518047451972961, "learning_rate": 0.00010600950371972551, "loss": 2.12, "step": 489680 }, { "epoch": 1.893004592475762, "grad_norm": 0.1828446090221405, "learning_rate": 0.00010590390951250828, "loss": 2.1121, "step": 489690 }, { "epoch": 1.8930432496791452, "grad_norm": 0.17029117047786713, "learning_rate": 0.00010579832119174814, "loss": 2.1005, "step": 489700 }, { "epoch": 1.8930819068825286, "grad_norm": 0.15997372567653656, "learning_rate": 0.00010569273875646035, "loss": 2.1075, "step": 489710 }, { "epoch": 1.893120564085912, "grad_norm": 0.1557878702878952, "learning_rate": 0.00010558716220566122, "loss": 2.1279, "step": 489720 }, { "epoch": 1.8931592212892951, "grad_norm": 0.18081702291965485, "learning_rate": 0.00010548159153836667, "loss": 2.1233, "step": 489730 }, { "epoch": 1.8931978784926784, "grad_norm": 0.16168901324272156, "learning_rate": 0.00010537602675359348, "loss": 2.1285, "step": 489740 }, { "epoch": 1.8932365356960617, "grad_norm": 0.16792845726013184, "learning_rate": 0.00010527046785035843, "loss": 2.1283, "step": 489750 }, { "epoch": 1.893275192899445, "grad_norm": 0.16445757448673248, "learning_rate": 0.00010516491482767832, "loss": 2.1007, "step": 489760 }, { "epoch": 1.8933138501028282, "grad_norm": 0.17205293476581573, "learning_rate": 0.00010505936768457036, "loss": 2.1099, "step": 489770 }, { "epoch": 1.8933525073062114, "grad_norm": 0.15777753293514252, "learning_rate": 0.0001049538264200527, "loss": 2.1143, "step": 489780 }, { "epoch": 1.8933911645095947, "grad_norm": 0.17058062553405762, "learning_rate": 0.00010484829103314253, "loss": 2.121, "step": 489790 }, { "epoch": 1.893429821712978, "grad_norm": 0.16718408465385437, "learning_rate": 0.00010474276152285867, "loss": 2.1058, "step": 489800 }, { "epoch": 1.8934684789163612, "grad_norm": 0.16895322501659393, "learning_rate": 0.00010463723788821899, "loss": 2.125, "step": 489810 }, { "epoch": 1.8935071361197444, "grad_norm": 0.1603923887014389, "learning_rate": 0.00010453172012824231, "loss": 2.1064, "step": 489820 }, { "epoch": 1.8935457933231277, "grad_norm": 0.16751614212989807, "learning_rate": 0.00010442620824194759, "loss": 2.0994, "step": 489830 }, { "epoch": 1.8935844505265111, "grad_norm": 0.16878701746463776, "learning_rate": 0.00010432070222835433, "loss": 2.1189, "step": 489840 }, { "epoch": 1.8936231077298944, "grad_norm": 0.1680416315793991, "learning_rate": 0.00010421520208648149, "loss": 2.1102, "step": 489850 }, { "epoch": 1.8936617649332776, "grad_norm": 0.18519482016563416, "learning_rate": 0.00010410970781534945, "loss": 2.1132, "step": 489860 }, { "epoch": 1.893700422136661, "grad_norm": 0.15986217558383942, "learning_rate": 0.00010400421941397764, "loss": 2.1003, "step": 489870 }, { "epoch": 1.8937390793400444, "grad_norm": 0.1842077672481537, "learning_rate": 0.00010389873688138684, "loss": 2.1031, "step": 489880 }, { "epoch": 1.8937777365434276, "grad_norm": 0.17824490368366241, "learning_rate": 0.00010379326021659763, "loss": 2.1104, "step": 489890 }, { "epoch": 1.8938163937468109, "grad_norm": 0.1671292632818222, "learning_rate": 0.00010368778941863055, "loss": 2.0957, "step": 489900 }, { "epoch": 1.8938550509501941, "grad_norm": 0.169941246509552, "learning_rate": 0.00010358232448650707, "loss": 2.1178, "step": 489910 }, { "epoch": 1.8938937081535774, "grad_norm": 0.16140219569206238, "learning_rate": 0.00010347686541924839, "loss": 2.1135, "step": 489920 }, { "epoch": 1.8939323653569606, "grad_norm": 0.15384657680988312, "learning_rate": 0.0001033714122158762, "loss": 2.1182, "step": 489930 }, { "epoch": 1.8939710225603439, "grad_norm": 0.1640327274799347, "learning_rate": 0.0001032659648754124, "loss": 2.108, "step": 489940 }, { "epoch": 1.8940096797637271, "grad_norm": 0.17281986773014069, "learning_rate": 0.00010316052339687953, "loss": 2.1032, "step": 489950 }, { "epoch": 1.8940483369671104, "grad_norm": 0.16244745254516602, "learning_rate": 0.00010305508777929995, "loss": 2.108, "step": 489960 }, { "epoch": 1.8940869941704936, "grad_norm": 0.17009977996349335, "learning_rate": 0.00010294965802169598, "loss": 2.1132, "step": 489970 }, { "epoch": 1.894125651373877, "grad_norm": 0.1579209715127945, "learning_rate": 0.0001028442341230913, "loss": 2.1107, "step": 489980 }, { "epoch": 1.8941643085772601, "grad_norm": 0.16433437168598175, "learning_rate": 0.00010273881608250891, "loss": 2.122, "step": 489990 }, { "epoch": 1.8942029657806434, "grad_norm": 0.1659168004989624, "learning_rate": 0.00010263340389897247, "loss": 2.1132, "step": 490000 }, { "epoch": 1.8942416229840269, "grad_norm": 0.16217505931854248, "learning_rate": 0.00010252799757150566, "loss": 2.1159, "step": 490010 }, { "epoch": 1.8942802801874101, "grad_norm": 0.15367664396762848, "learning_rate": 0.00010242259709913282, "loss": 2.1236, "step": 490020 }, { "epoch": 1.8943189373907934, "grad_norm": 0.1673082560300827, "learning_rate": 0.00010231720248087829, "loss": 2.1194, "step": 490030 }, { "epoch": 1.8943575945941766, "grad_norm": 0.16598571836948395, "learning_rate": 0.0001022118137157666, "loss": 2.1027, "step": 490040 }, { "epoch": 1.89439625179756, "grad_norm": 0.8749808669090271, "learning_rate": 0.00010210643080282301, "loss": 2.1101, "step": 490050 }, { "epoch": 1.8944349090009434, "grad_norm": 0.16333632171154022, "learning_rate": 0.00010200105374107228, "loss": 2.1011, "step": 490060 }, { "epoch": 1.8944735662043266, "grad_norm": 0.17187359929084778, "learning_rate": 0.00010189568252954029, "loss": 2.09, "step": 490070 }, { "epoch": 1.8945122234077099, "grad_norm": 0.16790197789669037, "learning_rate": 0.00010179031716725252, "loss": 2.1102, "step": 490080 }, { "epoch": 1.8945508806110931, "grad_norm": 0.16747283935546875, "learning_rate": 0.00010168495765323504, "loss": 2.0999, "step": 490090 }, { "epoch": 1.8945895378144764, "grad_norm": 0.16838395595550537, "learning_rate": 0.00010157960398651423, "loss": 2.1348, "step": 490100 }, { "epoch": 1.8946281950178596, "grad_norm": 0.16698074340820312, "learning_rate": 0.00010147425616611661, "loss": 2.0991, "step": 490110 }, { "epoch": 1.8946668522212429, "grad_norm": 0.15873688459396362, "learning_rate": 0.00010136891419106898, "loss": 2.1061, "step": 490120 }, { "epoch": 1.8947055094246261, "grad_norm": 0.17689259350299835, "learning_rate": 0.00010126357806039855, "loss": 2.1158, "step": 490130 }, { "epoch": 1.8947441666280094, "grad_norm": 0.15214355289936066, "learning_rate": 0.00010115824777313253, "loss": 2.1043, "step": 490140 }, { "epoch": 1.8947828238313926, "grad_norm": 0.15863355994224548, "learning_rate": 0.00010105292332829885, "loss": 2.1053, "step": 490150 }, { "epoch": 1.8948214810347759, "grad_norm": 0.1705903857946396, "learning_rate": 0.00010094760472492493, "loss": 2.1247, "step": 490160 }, { "epoch": 1.8948601382381594, "grad_norm": 0.16898466646671295, "learning_rate": 0.00010084229196203931, "loss": 2.1054, "step": 490170 }, { "epoch": 1.8948987954415426, "grad_norm": 0.16580814123153687, "learning_rate": 0.00010073698503867035, "loss": 2.1213, "step": 490180 }, { "epoch": 1.8949374526449259, "grad_norm": 0.15544813871383667, "learning_rate": 0.00010063168395384658, "loss": 2.1152, "step": 490190 }, { "epoch": 1.894976109848309, "grad_norm": 0.15902158617973328, "learning_rate": 0.00010052638870659747, "loss": 2.0942, "step": 490200 }, { "epoch": 1.8950147670516924, "grad_norm": 0.1620321124792099, "learning_rate": 0.00010042109929595178, "loss": 2.0955, "step": 490210 }, { "epoch": 1.8950534242550758, "grad_norm": 0.17381994426250458, "learning_rate": 0.0001003158157209394, "loss": 2.1169, "step": 490220 }, { "epoch": 1.895092081458459, "grad_norm": 0.16555550694465637, "learning_rate": 0.00010021053798058977, "loss": 2.0989, "step": 490230 }, { "epoch": 1.8951307386618423, "grad_norm": 0.1549934446811676, "learning_rate": 0.00010010526607393322, "loss": 2.1271, "step": 490240 }, { "epoch": 1.8951693958652256, "grad_norm": 0.16366319358348846, "learning_rate": 0.00010000000000000009, "loss": 2.1254, "step": 490250 }, { "epoch": 1.8952080530686088, "grad_norm": 0.1647455394268036, "learning_rate": 9.98947397578207e-05, "loss": 2.1154, "step": 490260 }, { "epoch": 1.895246710271992, "grad_norm": 0.16468089818954468, "learning_rate": 9.978948534642629e-05, "loss": 2.1167, "step": 490270 }, { "epoch": 1.8952853674753753, "grad_norm": 0.1661677360534668, "learning_rate": 9.96842367648474e-05, "loss": 2.111, "step": 490280 }, { "epoch": 1.8953240246787586, "grad_norm": 0.1536356657743454, "learning_rate": 9.957899401211634e-05, "loss": 2.1241, "step": 490290 }, { "epoch": 1.8953626818821419, "grad_norm": 0.16869594156742096, "learning_rate": 9.947375708726392e-05, "loss": 2.1159, "step": 490300 }, { "epoch": 1.895401339085525, "grad_norm": 0.16110627353191376, "learning_rate": 9.936852598932267e-05, "loss": 2.1121, "step": 490310 }, { "epoch": 1.8954399962889084, "grad_norm": 0.1599881798028946, "learning_rate": 9.926330071732448e-05, "loss": 2.117, "step": 490320 }, { "epoch": 1.8954786534922916, "grad_norm": 0.18143880367279053, "learning_rate": 9.91580812703019e-05, "loss": 2.1017, "step": 490330 }, { "epoch": 1.895517310695675, "grad_norm": 0.16994333267211914, "learning_rate": 9.905286764728772e-05, "loss": 2.1139, "step": 490340 }, { "epoch": 1.8955559678990583, "grad_norm": 0.1837039440870285, "learning_rate": 9.894765984731513e-05, "loss": 2.1103, "step": 490350 }, { "epoch": 1.8955946251024416, "grad_norm": 0.1612176150083542, "learning_rate": 9.884245786941693e-05, "loss": 2.1163, "step": 490360 }, { "epoch": 1.8956332823058248, "grad_norm": 0.19064176082611084, "learning_rate": 9.873726171262698e-05, "loss": 2.099, "step": 490370 }, { "epoch": 1.895671939509208, "grad_norm": 0.16493946313858032, "learning_rate": 9.863207137597897e-05, "loss": 2.1121, "step": 490380 }, { "epoch": 1.8957105967125916, "grad_norm": 0.18231101334095, "learning_rate": 9.852688685850719e-05, "loss": 2.1142, "step": 490390 }, { "epoch": 1.8957492539159748, "grad_norm": 0.15567000210285187, "learning_rate": 9.8421708159246e-05, "loss": 2.1014, "step": 490400 }, { "epoch": 1.895787911119358, "grad_norm": 0.1578628122806549, "learning_rate": 9.831653527722973e-05, "loss": 2.098, "step": 490410 }, { "epoch": 1.8958265683227413, "grad_norm": 0.1570051908493042, "learning_rate": 9.821136821149334e-05, "loss": 2.1152, "step": 490420 }, { "epoch": 1.8958652255261246, "grad_norm": 0.1664205938577652, "learning_rate": 9.810620696107209e-05, "loss": 2.0934, "step": 490430 }, { "epoch": 1.8959038827295078, "grad_norm": 0.15952792763710022, "learning_rate": 9.80010515250016e-05, "loss": 2.1018, "step": 490440 }, { "epoch": 1.895942539932891, "grad_norm": 0.17305637896060944, "learning_rate": 9.789590190231712e-05, "loss": 2.1097, "step": 490450 }, { "epoch": 1.8959811971362743, "grad_norm": 0.17136140167713165, "learning_rate": 9.779075809205473e-05, "loss": 2.1068, "step": 490460 }, { "epoch": 1.8960198543396576, "grad_norm": 0.16530878841876984, "learning_rate": 9.768562009325077e-05, "loss": 2.1179, "step": 490470 }, { "epoch": 1.8960585115430408, "grad_norm": 0.16368193924427032, "learning_rate": 9.758048790494156e-05, "loss": 2.1151, "step": 490480 }, { "epoch": 1.896097168746424, "grad_norm": 0.18392740190029144, "learning_rate": 9.747536152616409e-05, "loss": 2.1116, "step": 490490 }, { "epoch": 1.8961358259498073, "grad_norm": 0.17140942811965942, "learning_rate": 9.737024095595515e-05, "loss": 2.1141, "step": 490500 }, { "epoch": 1.8961744831531908, "grad_norm": 0.17512132227420807, "learning_rate": 9.726512619335215e-05, "loss": 2.1088, "step": 490510 }, { "epoch": 1.896213140356574, "grad_norm": 0.16664306819438934, "learning_rate": 9.716001723739254e-05, "loss": 2.099, "step": 490520 }, { "epoch": 1.8962517975599573, "grad_norm": 0.46299320459365845, "learning_rate": 9.70549140871142e-05, "loss": 2.0923, "step": 490530 }, { "epoch": 1.8962904547633406, "grad_norm": 0.17620672285556793, "learning_rate": 9.694981674155523e-05, "loss": 2.1093, "step": 490540 }, { "epoch": 1.896329111966724, "grad_norm": 0.16797108948230743, "learning_rate": 9.684472519975396e-05, "loss": 2.1117, "step": 490550 }, { "epoch": 1.8963677691701073, "grad_norm": 0.16022421419620514, "learning_rate": 9.673963946074893e-05, "loss": 2.1063, "step": 490560 }, { "epoch": 1.8964064263734906, "grad_norm": 0.16269107162952423, "learning_rate": 9.663455952357913e-05, "loss": 2.1209, "step": 490570 }, { "epoch": 1.8964450835768738, "grad_norm": 0.1669422686100006, "learning_rate": 9.652948538728334e-05, "loss": 2.1159, "step": 490580 }, { "epoch": 1.896483740780257, "grad_norm": 0.16729198396205902, "learning_rate": 9.642441705090143e-05, "loss": 2.1197, "step": 490590 }, { "epoch": 1.8965223979836403, "grad_norm": 0.1610100418329239, "learning_rate": 9.631935451347307e-05, "loss": 2.1063, "step": 490600 }, { "epoch": 1.8965610551870236, "grad_norm": 0.15145553648471832, "learning_rate": 9.62142977740379e-05, "loss": 2.1107, "step": 490610 }, { "epoch": 1.8965997123904068, "grad_norm": 0.16301782429218292, "learning_rate": 9.610924683163602e-05, "loss": 2.1093, "step": 490620 }, { "epoch": 1.89663836959379, "grad_norm": 0.1682334691286087, "learning_rate": 9.600420168530844e-05, "loss": 2.1127, "step": 490630 }, { "epoch": 1.8966770267971733, "grad_norm": 0.16489244997501373, "learning_rate": 9.589916233409523e-05, "loss": 2.0874, "step": 490640 }, { "epoch": 1.8967156840005566, "grad_norm": 0.15822367370128632, "learning_rate": 9.579412877703786e-05, "loss": 2.095, "step": 490650 }, { "epoch": 1.8967543412039398, "grad_norm": 0.15861819684505463, "learning_rate": 9.568910101317752e-05, "loss": 2.0985, "step": 490660 }, { "epoch": 1.896792998407323, "grad_norm": 0.16408208012580872, "learning_rate": 9.558407904155563e-05, "loss": 2.1079, "step": 490670 }, { "epoch": 1.8968316556107065, "grad_norm": 0.1616327166557312, "learning_rate": 9.547906286121389e-05, "loss": 2.0993, "step": 490680 }, { "epoch": 1.8968703128140898, "grad_norm": 0.15668755769729614, "learning_rate": 9.537405247119457e-05, "loss": 2.1131, "step": 490690 }, { "epoch": 1.896908970017473, "grad_norm": 0.17546828091144562, "learning_rate": 9.52690478705398e-05, "loss": 2.1081, "step": 490700 }, { "epoch": 1.8969476272208563, "grad_norm": 0.17848749458789825, "learning_rate": 9.516404905829234e-05, "loss": 2.1102, "step": 490710 }, { "epoch": 1.8969862844242398, "grad_norm": 0.15275801718235016, "learning_rate": 9.505905603349474e-05, "loss": 2.1089, "step": 490720 }, { "epoch": 1.897024941627623, "grad_norm": 0.17563579976558685, "learning_rate": 9.495406879519042e-05, "loss": 2.1168, "step": 490730 }, { "epoch": 1.8970635988310063, "grad_norm": 0.17075873911380768, "learning_rate": 9.484908734242259e-05, "loss": 2.1148, "step": 490740 }, { "epoch": 1.8971022560343895, "grad_norm": 0.168623149394989, "learning_rate": 9.474411167423491e-05, "loss": 2.1064, "step": 490750 }, { "epoch": 1.8971409132377728, "grad_norm": 0.16039146482944489, "learning_rate": 9.463914178967147e-05, "loss": 2.1172, "step": 490760 }, { "epoch": 1.897179570441156, "grad_norm": 0.16803433001041412, "learning_rate": 9.453417768777595e-05, "loss": 2.107, "step": 490770 }, { "epoch": 1.8972182276445393, "grad_norm": 0.18396279215812683, "learning_rate": 9.44292193675933e-05, "loss": 2.1161, "step": 490780 }, { "epoch": 1.8972568848479225, "grad_norm": 0.15813076496124268, "learning_rate": 9.432426682816786e-05, "loss": 2.0962, "step": 490790 }, { "epoch": 1.8972955420513058, "grad_norm": 0.15674901008605957, "learning_rate": 9.421932006854461e-05, "loss": 2.1119, "step": 490800 }, { "epoch": 1.897334199254689, "grad_norm": 0.169746994972229, "learning_rate": 9.411437908776899e-05, "loss": 2.108, "step": 490810 }, { "epoch": 1.8973728564580723, "grad_norm": 0.16584500670433044, "learning_rate": 9.400944388488641e-05, "loss": 2.1231, "step": 490820 }, { "epoch": 1.8974115136614556, "grad_norm": 0.1614682823419571, "learning_rate": 9.390451445894254e-05, "loss": 2.0992, "step": 490830 }, { "epoch": 1.8974501708648388, "grad_norm": 0.16342851519584656, "learning_rate": 9.379959080898325e-05, "loss": 2.0946, "step": 490840 }, { "epoch": 1.8974888280682223, "grad_norm": 0.16105535626411438, "learning_rate": 9.369467293405488e-05, "loss": 2.1039, "step": 490850 }, { "epoch": 1.8975274852716055, "grad_norm": 0.170151948928833, "learning_rate": 9.358976083320414e-05, "loss": 2.1133, "step": 490860 }, { "epoch": 1.8975661424749888, "grad_norm": 0.1685088872909546, "learning_rate": 9.348485450547761e-05, "loss": 2.1215, "step": 490870 }, { "epoch": 1.897604799678372, "grad_norm": 0.1650388389825821, "learning_rate": 9.337995394992227e-05, "loss": 2.1133, "step": 490880 }, { "epoch": 1.8976434568817555, "grad_norm": 0.43443021178245544, "learning_rate": 9.327505916558576e-05, "loss": 2.1133, "step": 490890 }, { "epoch": 1.8976821140851388, "grad_norm": 0.1729881912469864, "learning_rate": 9.317017015151552e-05, "loss": 2.1096, "step": 490900 }, { "epoch": 1.897720771288522, "grad_norm": 0.15954531729221344, "learning_rate": 9.306528690675942e-05, "loss": 2.1196, "step": 490910 }, { "epoch": 1.8977594284919053, "grad_norm": 0.16167645156383514, "learning_rate": 9.296040943036532e-05, "loss": 2.1126, "step": 490920 }, { "epoch": 1.8977980856952885, "grad_norm": 0.16091011464595795, "learning_rate": 9.285553772138178e-05, "loss": 2.1164, "step": 490930 }, { "epoch": 1.8978367428986718, "grad_norm": 0.16552941501140594, "learning_rate": 9.275067177885732e-05, "loss": 2.1084, "step": 490940 }, { "epoch": 1.897875400102055, "grad_norm": 0.16158799827098846, "learning_rate": 9.264581160184094e-05, "loss": 2.1159, "step": 490950 }, { "epoch": 1.8979140573054383, "grad_norm": 0.16633044183254242, "learning_rate": 9.254095718938182e-05, "loss": 2.102, "step": 490960 }, { "epoch": 1.8979527145088215, "grad_norm": 0.1752433031797409, "learning_rate": 9.243610854052919e-05, "loss": 2.1225, "step": 490970 }, { "epoch": 1.8979913717122048, "grad_norm": 0.17548854649066925, "learning_rate": 9.23312656543327e-05, "loss": 2.1005, "step": 490980 }, { "epoch": 1.898030028915588, "grad_norm": 0.15903107821941376, "learning_rate": 9.222642852984242e-05, "loss": 2.1208, "step": 490990 }, { "epoch": 1.8980686861189713, "grad_norm": 0.1658545732498169, "learning_rate": 9.212159716610868e-05, "loss": 2.1086, "step": 491000 }, { "epoch": 1.8981073433223545, "grad_norm": 0.1633501648902893, "learning_rate": 9.201677156218158e-05, "loss": 2.1149, "step": 491010 }, { "epoch": 1.898146000525738, "grad_norm": 0.19065724313259125, "learning_rate": 9.191195171711208e-05, "loss": 2.1066, "step": 491020 }, { "epoch": 1.8981846577291213, "grad_norm": 0.16385003924369812, "learning_rate": 9.180713762995119e-05, "loss": 2.1046, "step": 491030 }, { "epoch": 1.8982233149325045, "grad_norm": 0.1566624492406845, "learning_rate": 9.170232929974986e-05, "loss": 2.1091, "step": 491040 }, { "epoch": 1.8982619721358878, "grad_norm": 0.16979220509529114, "learning_rate": 9.159752672555999e-05, "loss": 2.1047, "step": 491050 }, { "epoch": 1.8983006293392712, "grad_norm": 0.15770958364009857, "learning_rate": 9.149272990643298e-05, "loss": 2.0884, "step": 491060 }, { "epoch": 1.8983392865426545, "grad_norm": 0.18209083378314972, "learning_rate": 9.138793884142093e-05, "loss": 2.1155, "step": 491070 }, { "epoch": 1.8983779437460377, "grad_norm": 0.17145898938179016, "learning_rate": 9.128315352957595e-05, "loss": 2.1227, "step": 491080 }, { "epoch": 1.898416600949421, "grad_norm": 0.16826361417770386, "learning_rate": 9.1178373969951e-05, "loss": 2.0928, "step": 491090 }, { "epoch": 1.8984552581528042, "grad_norm": 0.17281651496887207, "learning_rate": 9.10736001615986e-05, "loss": 2.1212, "step": 491100 }, { "epoch": 1.8984939153561875, "grad_norm": 0.1669350415468216, "learning_rate": 9.096883210357199e-05, "loss": 2.1081, "step": 491110 }, { "epoch": 1.8985325725595708, "grad_norm": 0.16174283623695374, "learning_rate": 9.086406979492412e-05, "loss": 2.1074, "step": 491120 }, { "epoch": 1.898571229762954, "grad_norm": 0.18184956908226013, "learning_rate": 9.07593132347091e-05, "loss": 2.108, "step": 491130 }, { "epoch": 1.8986098869663373, "grad_norm": 0.1940072625875473, "learning_rate": 9.065456242198011e-05, "loss": 2.1051, "step": 491140 }, { "epoch": 1.8986485441697205, "grad_norm": 0.17223842442035675, "learning_rate": 9.054981735579193e-05, "loss": 2.1127, "step": 491150 }, { "epoch": 1.8986872013731038, "grad_norm": 0.1719387173652649, "learning_rate": 9.044507803519841e-05, "loss": 2.1026, "step": 491160 }, { "epoch": 1.898725858576487, "grad_norm": 0.16838513314723969, "learning_rate": 9.034034445925433e-05, "loss": 2.1093, "step": 491170 }, { "epoch": 1.8987645157798703, "grad_norm": 0.165732741355896, "learning_rate": 9.02356166270144e-05, "loss": 2.1217, "step": 491180 }, { "epoch": 1.8988031729832537, "grad_norm": 0.17605352401733398, "learning_rate": 9.013089453753387e-05, "loss": 2.0982, "step": 491190 }, { "epoch": 1.898841830186637, "grad_norm": 0.16870661079883575, "learning_rate": 9.002617818986836e-05, "loss": 2.111, "step": 491200 }, { "epoch": 1.8988804873900202, "grad_norm": 0.1646031141281128, "learning_rate": 8.992146758307352e-05, "loss": 2.1055, "step": 491210 }, { "epoch": 1.8989191445934035, "grad_norm": 0.18385101854801178, "learning_rate": 8.981676271620476e-05, "loss": 2.1093, "step": 491220 }, { "epoch": 1.898957801796787, "grad_norm": 0.1688939929008484, "learning_rate": 8.971206358831862e-05, "loss": 2.1119, "step": 491230 }, { "epoch": 1.8989964590001702, "grad_norm": 0.18524891138076782, "learning_rate": 8.960737019847143e-05, "loss": 2.1087, "step": 491240 }, { "epoch": 1.8990351162035535, "grad_norm": 0.16723163425922394, "learning_rate": 8.950268254571992e-05, "loss": 2.1083, "step": 491250 }, { "epoch": 1.8990737734069367, "grad_norm": 0.17184731364250183, "learning_rate": 8.939800062912106e-05, "loss": 2.1076, "step": 491260 }, { "epoch": 1.89911243061032, "grad_norm": 0.1555802822113037, "learning_rate": 8.929332444773208e-05, "loss": 2.1193, "step": 491270 }, { "epoch": 1.8991510878137032, "grad_norm": 0.1600703001022339, "learning_rate": 8.918865400061016e-05, "loss": 2.1156, "step": 491280 }, { "epoch": 1.8991897450170865, "grad_norm": 0.15900921821594238, "learning_rate": 8.908398928681315e-05, "loss": 2.1085, "step": 491290 }, { "epoch": 1.8992284022204697, "grad_norm": 0.1766301989555359, "learning_rate": 8.89793303053994e-05, "loss": 2.1065, "step": 491300 }, { "epoch": 1.899267059423853, "grad_norm": 0.18237997591495514, "learning_rate": 8.887467705542651e-05, "loss": 2.1187, "step": 491310 }, { "epoch": 1.8993057166272362, "grad_norm": 0.17178525030612946, "learning_rate": 8.877002953595348e-05, "loss": 2.1063, "step": 491320 }, { "epoch": 1.8993443738306195, "grad_norm": 0.1610889583826065, "learning_rate": 8.866538774603883e-05, "loss": 2.1138, "step": 491330 }, { "epoch": 1.8993830310340027, "grad_norm": 0.17128078639507294, "learning_rate": 8.856075168474153e-05, "loss": 2.1002, "step": 491340 }, { "epoch": 1.899421688237386, "grad_norm": 0.20281025767326355, "learning_rate": 8.845612135112103e-05, "loss": 2.1107, "step": 491350 }, { "epoch": 1.8994603454407695, "grad_norm": 0.18568745255470276, "learning_rate": 8.835149674423671e-05, "loss": 2.1036, "step": 491360 }, { "epoch": 1.8994990026441527, "grad_norm": 0.16455897688865662, "learning_rate": 8.824687786314845e-05, "loss": 2.1048, "step": 491370 }, { "epoch": 1.899537659847536, "grad_norm": 0.16593965888023376, "learning_rate": 8.81422647069161e-05, "loss": 2.1175, "step": 491380 }, { "epoch": 1.8995763170509192, "grad_norm": 0.1637643575668335, "learning_rate": 8.80376572746e-05, "loss": 2.1058, "step": 491390 }, { "epoch": 1.8996149742543027, "grad_norm": 0.17839862406253815, "learning_rate": 8.793305556526087e-05, "loss": 2.1113, "step": 491400 }, { "epoch": 1.899653631457686, "grad_norm": 0.15830892324447632, "learning_rate": 8.782845957795927e-05, "loss": 2.1175, "step": 491410 }, { "epoch": 1.8996922886610692, "grad_norm": 0.1570470631122589, "learning_rate": 8.772386931175657e-05, "loss": 2.1049, "step": 491420 }, { "epoch": 1.8997309458644525, "grad_norm": 0.16065655648708344, "learning_rate": 8.761928476571379e-05, "loss": 2.107, "step": 491430 }, { "epoch": 1.8997696030678357, "grad_norm": 0.16688545048236847, "learning_rate": 8.751470593889277e-05, "loss": 2.1079, "step": 491440 }, { "epoch": 1.899808260271219, "grad_norm": 0.15952154994010925, "learning_rate": 8.741013283035515e-05, "loss": 2.1246, "step": 491450 }, { "epoch": 1.8998469174746022, "grad_norm": 0.1672104001045227, "learning_rate": 8.730556543916301e-05, "loss": 2.1136, "step": 491460 }, { "epoch": 1.8998855746779855, "grad_norm": 0.17419083416461945, "learning_rate": 8.72010037643789e-05, "loss": 2.1179, "step": 491470 }, { "epoch": 1.8999242318813687, "grad_norm": 0.15057621896266937, "learning_rate": 8.709644780506509e-05, "loss": 2.0982, "step": 491480 }, { "epoch": 1.899962889084752, "grad_norm": 0.15921354293823242, "learning_rate": 8.699189756028481e-05, "loss": 2.1102, "step": 491490 }, { "epoch": 1.9000015462881352, "grad_norm": 0.1713653802871704, "learning_rate": 8.688735302910078e-05, "loss": 2.1014, "step": 491500 }, { "epoch": 1.9000402034915185, "grad_norm": 0.16376575827598572, "learning_rate": 8.678281421057687e-05, "loss": 2.1036, "step": 491510 }, { "epoch": 1.9000788606949017, "grad_norm": 0.17783987522125244, "learning_rate": 8.66782811037763e-05, "loss": 2.1069, "step": 491520 }, { "epoch": 1.9001175178982852, "grad_norm": 0.1579967737197876, "learning_rate": 8.65737537077631e-05, "loss": 2.1106, "step": 491530 }, { "epoch": 1.9001561751016685, "grad_norm": 0.19459132850170135, "learning_rate": 8.64692320216014e-05, "loss": 2.1063, "step": 491540 }, { "epoch": 1.9001948323050517, "grad_norm": 0.1886938214302063, "learning_rate": 8.636471604435547e-05, "loss": 2.1083, "step": 491550 }, { "epoch": 1.900233489508435, "grad_norm": 0.18700428307056427, "learning_rate": 8.62602057750903e-05, "loss": 2.1215, "step": 491560 }, { "epoch": 1.9002721467118184, "grad_norm": 0.1653238981962204, "learning_rate": 8.615570121287042e-05, "loss": 2.1021, "step": 491570 }, { "epoch": 1.9003108039152017, "grad_norm": 0.15879376232624054, "learning_rate": 8.605120235676122e-05, "loss": 2.1079, "step": 491580 }, { "epoch": 1.900349461118585, "grad_norm": 0.15012842416763306, "learning_rate": 8.59467092058277e-05, "loss": 2.1037, "step": 491590 }, { "epoch": 1.9003881183219682, "grad_norm": 0.15867432951927185, "learning_rate": 8.584222175913614e-05, "loss": 2.1041, "step": 491600 }, { "epoch": 1.9004267755253514, "grad_norm": 0.16058456897735596, "learning_rate": 8.573774001575219e-05, "loss": 2.0914, "step": 491610 }, { "epoch": 1.9004654327287347, "grad_norm": 0.17358066141605377, "learning_rate": 8.563326397474191e-05, "loss": 2.0977, "step": 491620 }, { "epoch": 1.900504089932118, "grad_norm": 0.16065537929534912, "learning_rate": 8.552879363517185e-05, "loss": 2.1127, "step": 491630 }, { "epoch": 1.9005427471355012, "grad_norm": 0.16244420409202576, "learning_rate": 8.542432899610898e-05, "loss": 2.1001, "step": 491640 }, { "epoch": 1.9005814043388845, "grad_norm": 0.1713636815547943, "learning_rate": 8.531987005661957e-05, "loss": 2.1211, "step": 491650 }, { "epoch": 1.9006200615422677, "grad_norm": 0.15860594809055328, "learning_rate": 8.521541681577149e-05, "loss": 2.1229, "step": 491660 }, { "epoch": 1.900658718745651, "grad_norm": 0.16405101120471954, "learning_rate": 8.511096927263174e-05, "loss": 2.118, "step": 491670 }, { "epoch": 1.9006973759490342, "grad_norm": 0.153397798538208, "learning_rate": 8.500652742626836e-05, "loss": 2.108, "step": 491680 }, { "epoch": 1.9007360331524175, "grad_norm": 0.15481743216514587, "learning_rate": 8.490209127574877e-05, "loss": 2.1038, "step": 491690 }, { "epoch": 1.900774690355801, "grad_norm": 0.16705960035324097, "learning_rate": 8.479766082014196e-05, "loss": 2.1018, "step": 491700 }, { "epoch": 1.9008133475591842, "grad_norm": 0.16170643270015717, "learning_rate": 8.469323605851575e-05, "loss": 2.0994, "step": 491710 }, { "epoch": 1.9008520047625674, "grad_norm": 0.16278567910194397, "learning_rate": 8.458881698993936e-05, "loss": 2.1103, "step": 491720 }, { "epoch": 1.9008906619659507, "grad_norm": 0.16915789246559143, "learning_rate": 8.448440361348131e-05, "loss": 2.0999, "step": 491730 }, { "epoch": 1.9009293191693342, "grad_norm": 0.1592896431684494, "learning_rate": 8.437999592821121e-05, "loss": 2.0839, "step": 491740 }, { "epoch": 1.9009679763727174, "grad_norm": 0.16387787461280823, "learning_rate": 8.427559393319828e-05, "loss": 2.0966, "step": 491750 }, { "epoch": 1.9010066335761007, "grad_norm": 0.16786013543605804, "learning_rate": 8.417119762751257e-05, "loss": 2.1064, "step": 491760 }, { "epoch": 1.901045290779484, "grad_norm": 0.1598120778799057, "learning_rate": 8.406680701022352e-05, "loss": 2.1104, "step": 491770 }, { "epoch": 1.9010839479828672, "grad_norm": 0.1629783660173416, "learning_rate": 8.396242208040183e-05, "loss": 2.1139, "step": 491780 }, { "epoch": 1.9011226051862504, "grad_norm": 0.1871945708990097, "learning_rate": 8.385804283711784e-05, "loss": 2.1141, "step": 491790 }, { "epoch": 1.9011612623896337, "grad_norm": 0.16119788587093353, "learning_rate": 8.37536692794425e-05, "loss": 2.1084, "step": 491800 }, { "epoch": 1.901199919593017, "grad_norm": 0.83812016248703, "learning_rate": 8.364930140644655e-05, "loss": 2.0879, "step": 491810 }, { "epoch": 1.9012385767964002, "grad_norm": 0.17770658433437347, "learning_rate": 8.35449392172014e-05, "loss": 2.1137, "step": 491820 }, { "epoch": 1.9012772339997834, "grad_norm": 0.16469278931617737, "learning_rate": 8.344058271077847e-05, "loss": 2.1256, "step": 491830 }, { "epoch": 1.9013158912031667, "grad_norm": 0.15335942804813385, "learning_rate": 8.333623188624961e-05, "loss": 2.0877, "step": 491840 }, { "epoch": 1.90135454840655, "grad_norm": 0.17696666717529297, "learning_rate": 8.323188674268689e-05, "loss": 2.1227, "step": 491850 }, { "epoch": 1.9013932056099332, "grad_norm": 0.16875462234020233, "learning_rate": 8.312754727916238e-05, "loss": 2.1048, "step": 491860 }, { "epoch": 1.9014318628133167, "grad_norm": 0.16450636088848114, "learning_rate": 8.302321349474862e-05, "loss": 2.1136, "step": 491870 }, { "epoch": 1.9014705200167, "grad_norm": 0.17607928812503815, "learning_rate": 8.291888538851854e-05, "loss": 2.1152, "step": 491880 }, { "epoch": 1.9015091772200832, "grad_norm": 0.18208394944667816, "learning_rate": 8.281456295954515e-05, "loss": 2.1085, "step": 491890 }, { "epoch": 1.9015478344234664, "grad_norm": 0.16099147498607635, "learning_rate": 8.27102462069016e-05, "loss": 2.1094, "step": 491900 }, { "epoch": 1.90158649162685, "grad_norm": 0.1623336374759674, "learning_rate": 8.260593512966153e-05, "loss": 2.0994, "step": 491910 }, { "epoch": 1.9016251488302331, "grad_norm": 0.16568103432655334, "learning_rate": 8.250162972689856e-05, "loss": 2.0917, "step": 491920 }, { "epoch": 1.9016638060336164, "grad_norm": 0.16497556865215302, "learning_rate": 8.2397329997687e-05, "loss": 2.1014, "step": 491930 }, { "epoch": 1.9017024632369997, "grad_norm": 0.17084918916225433, "learning_rate": 8.229303594110093e-05, "loss": 2.1148, "step": 491940 }, { "epoch": 1.901741120440383, "grad_norm": 0.15589167177677155, "learning_rate": 8.218874755621485e-05, "loss": 2.0954, "step": 491950 }, { "epoch": 1.9017797776437662, "grad_norm": 0.16014504432678223, "learning_rate": 8.208446484210374e-05, "loss": 2.1057, "step": 491960 }, { "epoch": 1.9018184348471494, "grad_norm": 0.1599462628364563, "learning_rate": 8.198018779784234e-05, "loss": 2.1057, "step": 491970 }, { "epoch": 1.9018570920505327, "grad_norm": 0.16896983981132507, "learning_rate": 8.187591642250647e-05, "loss": 2.1032, "step": 491980 }, { "epoch": 1.901895749253916, "grad_norm": 0.1815507858991623, "learning_rate": 8.17716507151709e-05, "loss": 2.1013, "step": 491990 }, { "epoch": 1.9019344064572992, "grad_norm": 0.178026482462883, "learning_rate": 8.166739067491213e-05, "loss": 2.1065, "step": 492000 }, { "epoch": 1.9019730636606824, "grad_norm": 0.15985998511314392, "learning_rate": 8.156313630080603e-05, "loss": 2.1001, "step": 492010 }, { "epoch": 1.9020117208640657, "grad_norm": 0.1622258871793747, "learning_rate": 8.145888759192866e-05, "loss": 2.0882, "step": 492020 }, { "epoch": 1.9020503780674491, "grad_norm": 0.1622309535741806, "learning_rate": 8.135464454735675e-05, "loss": 2.1052, "step": 492030 }, { "epoch": 1.9020890352708324, "grad_norm": 0.16055290400981903, "learning_rate": 8.125040716616706e-05, "loss": 2.1015, "step": 492040 }, { "epoch": 1.9021276924742156, "grad_norm": 0.17899253964424133, "learning_rate": 8.114617544743653e-05, "loss": 2.1045, "step": 492050 }, { "epoch": 1.902166349677599, "grad_norm": 0.1712222844362259, "learning_rate": 8.104194939024278e-05, "loss": 2.104, "step": 492060 }, { "epoch": 1.9022050068809822, "grad_norm": 0.16439427435398102, "learning_rate": 8.093772899366303e-05, "loss": 2.1121, "step": 492070 }, { "epoch": 1.9022436640843656, "grad_norm": 0.16032835841178894, "learning_rate": 8.083351425677509e-05, "loss": 2.1072, "step": 492080 }, { "epoch": 1.9022823212877489, "grad_norm": 0.15671013295650482, "learning_rate": 8.072930517865706e-05, "loss": 2.0875, "step": 492090 }, { "epoch": 1.9023209784911321, "grad_norm": 0.16478151082992554, "learning_rate": 8.062510175838744e-05, "loss": 2.0919, "step": 492100 }, { "epoch": 1.9023596356945154, "grad_norm": 0.1695231795310974, "learning_rate": 8.052090399504475e-05, "loss": 2.1059, "step": 492110 }, { "epoch": 1.9023982928978986, "grad_norm": 0.16111020743846893, "learning_rate": 8.041671188770772e-05, "loss": 2.0965, "step": 492120 }, { "epoch": 1.9024369501012819, "grad_norm": 0.16548483073711395, "learning_rate": 8.031252543545509e-05, "loss": 2.0972, "step": 492130 }, { "epoch": 1.9024756073046651, "grad_norm": 0.1690864861011505, "learning_rate": 8.02083446373667e-05, "loss": 2.1142, "step": 492140 }, { "epoch": 1.9025142645080484, "grad_norm": 0.1572396159172058, "learning_rate": 8.010416949252175e-05, "loss": 2.094, "step": 492150 }, { "epoch": 1.9025529217114316, "grad_norm": 0.17758668959140778, "learning_rate": 8.000000000000007e-05, "loss": 2.1138, "step": 492160 }, { "epoch": 1.902591578914815, "grad_norm": 0.16696658730506897, "learning_rate": 7.989583615888174e-05, "loss": 2.1092, "step": 492170 }, { "epoch": 1.9026302361181981, "grad_norm": 0.17181764543056488, "learning_rate": 7.979167796824727e-05, "loss": 2.1121, "step": 492180 }, { "epoch": 1.9026688933215814, "grad_norm": 0.21159546077251434, "learning_rate": 7.968752542717673e-05, "loss": 2.1018, "step": 492190 }, { "epoch": 1.9027075505249649, "grad_norm": 0.16343888640403748, "learning_rate": 7.958337853475129e-05, "loss": 2.1122, "step": 492200 }, { "epoch": 1.9027462077283481, "grad_norm": 0.16192626953125, "learning_rate": 7.947923729005213e-05, "loss": 2.1112, "step": 492210 }, { "epoch": 1.9027848649317314, "grad_norm": 0.15736247599124908, "learning_rate": 7.937510169216022e-05, "loss": 2.1062, "step": 492220 }, { "epoch": 1.9028235221351146, "grad_norm": 0.15779156982898712, "learning_rate": 7.927097174015718e-05, "loss": 2.1118, "step": 492230 }, { "epoch": 1.9028621793384979, "grad_norm": 0.158307746052742, "learning_rate": 7.916684743312486e-05, "loss": 2.1033, "step": 492240 }, { "epoch": 1.9029008365418814, "grad_norm": 0.1720828264951706, "learning_rate": 7.906272877014531e-05, "loss": 2.0993, "step": 492250 }, { "epoch": 1.9029394937452646, "grad_norm": 0.17028604447841644, "learning_rate": 7.895861575030106e-05, "loss": 2.0985, "step": 492260 }, { "epoch": 1.9029781509486479, "grad_norm": 0.16534186899662018, "learning_rate": 7.885450837267416e-05, "loss": 2.1087, "step": 492270 }, { "epoch": 1.9030168081520311, "grad_norm": 0.17664501070976257, "learning_rate": 7.875040663634758e-05, "loss": 2.1143, "step": 492280 }, { "epoch": 1.9030554653554144, "grad_norm": 0.1606430858373642, "learning_rate": 7.864631054040427e-05, "loss": 2.1024, "step": 492290 }, { "epoch": 1.9030941225587976, "grad_norm": 0.16679500043392181, "learning_rate": 7.854222008392809e-05, "loss": 2.0931, "step": 492300 }, { "epoch": 1.9031327797621809, "grad_norm": 0.17149166762828827, "learning_rate": 7.843813526600197e-05, "loss": 2.1047, "step": 492310 }, { "epoch": 1.9031714369655641, "grad_norm": 0.17418958246707916, "learning_rate": 7.833405608570977e-05, "loss": 2.1088, "step": 492320 }, { "epoch": 1.9032100941689474, "grad_norm": 0.15961408615112305, "learning_rate": 7.822998254213576e-05, "loss": 2.1078, "step": 492330 }, { "epoch": 1.9032487513723306, "grad_norm": 0.1657746434211731, "learning_rate": 7.812591463436403e-05, "loss": 2.0973, "step": 492340 }, { "epoch": 1.9032874085757139, "grad_norm": 0.1687639206647873, "learning_rate": 7.802185236147908e-05, "loss": 2.1027, "step": 492350 }, { "epoch": 1.9033260657790971, "grad_norm": 0.15681514143943787, "learning_rate": 7.791779572256585e-05, "loss": 2.1186, "step": 492360 }, { "epoch": 1.9033647229824806, "grad_norm": 0.16729243099689484, "learning_rate": 7.781374471670933e-05, "loss": 2.0935, "step": 492370 }, { "epoch": 1.9034033801858639, "grad_norm": 0.16020803153514862, "learning_rate": 7.770969934299466e-05, "loss": 2.101, "step": 492380 }, { "epoch": 1.903442037389247, "grad_norm": 0.16960710287094116, "learning_rate": 7.760565960050747e-05, "loss": 2.0957, "step": 492390 }, { "epoch": 1.9034806945926304, "grad_norm": 0.16362504661083221, "learning_rate": 7.75016254883334e-05, "loss": 2.097, "step": 492400 }, { "epoch": 1.9035193517960136, "grad_norm": 0.3407345712184906, "learning_rate": 7.739759700555871e-05, "loss": 2.0968, "step": 492410 }, { "epoch": 1.903558008999397, "grad_norm": 0.16705606877803802, "learning_rate": 7.729357415126948e-05, "loss": 2.1039, "step": 492420 }, { "epoch": 1.9035966662027803, "grad_norm": 0.16231025755405426, "learning_rate": 7.71895569245522e-05, "loss": 2.1105, "step": 492430 }, { "epoch": 1.9036353234061636, "grad_norm": 0.161398783326149, "learning_rate": 7.708554532449364e-05, "loss": 2.1272, "step": 492440 }, { "epoch": 1.9036739806095468, "grad_norm": 0.45710331201553345, "learning_rate": 7.698153935018093e-05, "loss": 2.1006, "step": 492450 }, { "epoch": 1.90371263781293, "grad_norm": 0.17735600471496582, "learning_rate": 7.687753900070105e-05, "loss": 2.096, "step": 492460 }, { "epoch": 1.9037512950163133, "grad_norm": 0.15921702980995178, "learning_rate": 7.677354427514182e-05, "loss": 2.1209, "step": 492470 }, { "epoch": 1.9037899522196966, "grad_norm": 0.16269877552986145, "learning_rate": 7.666955517259089e-05, "loss": 2.1105, "step": 492480 }, { "epoch": 1.9038286094230799, "grad_norm": 0.1621716171503067, "learning_rate": 7.656557169213585e-05, "loss": 2.1091, "step": 492490 }, { "epoch": 1.903867266626463, "grad_norm": 0.1870255023241043, "learning_rate": 7.646159383286543e-05, "loss": 2.1107, "step": 492500 }, { "epoch": 1.9039059238298464, "grad_norm": 0.16632509231567383, "learning_rate": 7.635762159386816e-05, "loss": 2.1009, "step": 492510 }, { "epoch": 1.9039445810332296, "grad_norm": 0.17069365084171295, "learning_rate": 7.625365497423231e-05, "loss": 2.1023, "step": 492520 }, { "epoch": 1.9039832382366129, "grad_norm": 0.15745419263839722, "learning_rate": 7.614969397304727e-05, "loss": 2.1124, "step": 492530 }, { "epoch": 1.9040218954399963, "grad_norm": 0.16946125030517578, "learning_rate": 7.604573858940201e-05, "loss": 2.0966, "step": 492540 }, { "epoch": 1.9040605526433796, "grad_norm": 0.17170938849449158, "learning_rate": 7.59417888223859e-05, "loss": 2.109, "step": 492550 }, { "epoch": 1.9040992098467628, "grad_norm": 0.1586230844259262, "learning_rate": 7.583784467108901e-05, "loss": 2.1093, "step": 492560 }, { "epoch": 1.904137867050146, "grad_norm": 0.18770702183246613, "learning_rate": 7.573390613460118e-05, "loss": 2.1032, "step": 492570 }, { "epoch": 1.9041765242535296, "grad_norm": 0.16343411803245544, "learning_rate": 7.562997321201249e-05, "loss": 2.0975, "step": 492580 }, { "epoch": 1.9042151814569128, "grad_norm": 0.1600416898727417, "learning_rate": 7.552604590241342e-05, "loss": 2.108, "step": 492590 }, { "epoch": 1.904253838660296, "grad_norm": 0.16096729040145874, "learning_rate": 7.54221242048947e-05, "loss": 2.0926, "step": 492600 }, { "epoch": 1.9042924958636793, "grad_norm": 0.16454388201236725, "learning_rate": 7.531820811854705e-05, "loss": 2.1128, "step": 492610 }, { "epoch": 1.9043311530670626, "grad_norm": 0.15723025798797607, "learning_rate": 7.521429764246212e-05, "loss": 2.0927, "step": 492620 }, { "epoch": 1.9043698102704458, "grad_norm": 0.16144847869873047, "learning_rate": 7.511039277573107e-05, "loss": 2.1037, "step": 492630 }, { "epoch": 1.904408467473829, "grad_norm": 0.16370125114917755, "learning_rate": 7.500649351744571e-05, "loss": 2.0843, "step": 492640 }, { "epoch": 1.9044471246772123, "grad_norm": 0.17126250267028809, "learning_rate": 7.490259986669768e-05, "loss": 2.0975, "step": 492650 }, { "epoch": 1.9044857818805956, "grad_norm": 0.16658659279346466, "learning_rate": 7.479871182257924e-05, "loss": 2.0936, "step": 492660 }, { "epoch": 1.9045244390839788, "grad_norm": 0.17625750601291656, "learning_rate": 7.469482938418314e-05, "loss": 2.1041, "step": 492670 }, { "epoch": 1.904563096287362, "grad_norm": 0.17531739175319672, "learning_rate": 7.459095255060167e-05, "loss": 2.0967, "step": 492680 }, { "epoch": 1.9046017534907453, "grad_norm": 0.15542291104793549, "learning_rate": 7.448708132092774e-05, "loss": 2.0852, "step": 492690 }, { "epoch": 1.9046404106941286, "grad_norm": 0.15380744636058807, "learning_rate": 7.438321569425454e-05, "loss": 2.104, "step": 492700 }, { "epoch": 1.904679067897512, "grad_norm": 0.1589040756225586, "learning_rate": 7.427935566967547e-05, "loss": 2.0979, "step": 492710 }, { "epoch": 1.9047177251008953, "grad_norm": 0.16777478158473969, "learning_rate": 7.417550124628436e-05, "loss": 2.1063, "step": 492720 }, { "epoch": 1.9047563823042786, "grad_norm": 0.1524752974510193, "learning_rate": 7.407165242317481e-05, "loss": 2.0863, "step": 492730 }, { "epoch": 1.9047950395076618, "grad_norm": 0.16807228326797485, "learning_rate": 7.396780919944113e-05, "loss": 2.1105, "step": 492740 }, { "epoch": 1.9048336967110453, "grad_norm": 0.15382082760334015, "learning_rate": 7.38639715741778e-05, "loss": 2.117, "step": 492750 }, { "epoch": 1.9048723539144286, "grad_norm": 0.1633932739496231, "learning_rate": 7.37601395464791e-05, "loss": 2.1043, "step": 492760 }, { "epoch": 1.9049110111178118, "grad_norm": 0.16407723724842072, "learning_rate": 7.365631311543996e-05, "loss": 2.1054, "step": 492770 }, { "epoch": 1.904949668321195, "grad_norm": 0.17223787307739258, "learning_rate": 7.35524922801556e-05, "loss": 2.1032, "step": 492780 }, { "epoch": 1.9049883255245783, "grad_norm": 0.1645432412624359, "learning_rate": 7.344867703972136e-05, "loss": 2.1154, "step": 492790 }, { "epoch": 1.9050269827279616, "grad_norm": 0.15831483900547028, "learning_rate": 7.334486739323265e-05, "loss": 2.1056, "step": 492800 }, { "epoch": 1.9050656399313448, "grad_norm": 0.1638144552707672, "learning_rate": 7.324106333978552e-05, "loss": 2.1131, "step": 492810 }, { "epoch": 1.905104297134728, "grad_norm": 0.15509441494941711, "learning_rate": 7.313726487847605e-05, "loss": 2.1032, "step": 492820 }, { "epoch": 1.9051429543381113, "grad_norm": 0.17044693231582642, "learning_rate": 7.30334720084005e-05, "loss": 2.1115, "step": 492830 }, { "epoch": 1.9051816115414946, "grad_norm": 0.17525023221969604, "learning_rate": 7.292968472865536e-05, "loss": 2.1051, "step": 492840 }, { "epoch": 1.9052202687448778, "grad_norm": 0.1675274521112442, "learning_rate": 7.282590303833735e-05, "loss": 2.0874, "step": 492850 }, { "epoch": 1.905258925948261, "grad_norm": 0.16292007267475128, "learning_rate": 7.272212693654367e-05, "loss": 2.1031, "step": 492860 }, { "epoch": 1.9052975831516443, "grad_norm": 0.16628581285476685, "learning_rate": 7.26183564223717e-05, "loss": 2.0982, "step": 492870 }, { "epoch": 1.9053362403550278, "grad_norm": 0.1902909129858017, "learning_rate": 7.251459149491879e-05, "loss": 2.0997, "step": 492880 }, { "epoch": 1.905374897558411, "grad_norm": 0.18070019781589508, "learning_rate": 7.241083215328281e-05, "loss": 2.0984, "step": 492890 }, { "epoch": 1.9054135547617943, "grad_norm": 0.15961043536663055, "learning_rate": 7.230707839656159e-05, "loss": 2.0905, "step": 492900 }, { "epoch": 1.9054522119651776, "grad_norm": 0.15894865989685059, "learning_rate": 7.220333022385362e-05, "loss": 2.1048, "step": 492910 }, { "epoch": 1.905490869168561, "grad_norm": 0.16615809500217438, "learning_rate": 7.209958763425739e-05, "loss": 2.0916, "step": 492920 }, { "epoch": 1.9055295263719443, "grad_norm": 0.1639198213815689, "learning_rate": 7.199585062687164e-05, "loss": 2.1091, "step": 492930 }, { "epoch": 1.9055681835753275, "grad_norm": 0.1681961864233017, "learning_rate": 7.189211920079531e-05, "loss": 2.0868, "step": 492940 }, { "epoch": 1.9056068407787108, "grad_norm": 0.1636681854724884, "learning_rate": 7.178839335512755e-05, "loss": 2.1019, "step": 492950 }, { "epoch": 1.905645497982094, "grad_norm": 0.16586966812610626, "learning_rate": 7.168467308896797e-05, "loss": 2.0925, "step": 492960 }, { "epoch": 1.9056841551854773, "grad_norm": 0.17896217107772827, "learning_rate": 7.158095840141643e-05, "loss": 2.1119, "step": 492970 }, { "epoch": 1.9057228123888605, "grad_norm": 0.1667049378156662, "learning_rate": 7.147724929157251e-05, "loss": 2.0868, "step": 492980 }, { "epoch": 1.9057614695922438, "grad_norm": 0.15626761317253113, "learning_rate": 7.137354575853649e-05, "loss": 2.0883, "step": 492990 }, { "epoch": 1.905800126795627, "grad_norm": 0.1608096808195114, "learning_rate": 7.126984780140888e-05, "loss": 2.1107, "step": 493000 }, { "epoch": 1.9058387839990103, "grad_norm": 0.16706053912639618, "learning_rate": 7.116615541929061e-05, "loss": 2.1116, "step": 493010 }, { "epoch": 1.9058774412023936, "grad_norm": 0.17353135347366333, "learning_rate": 7.10624686112824e-05, "loss": 2.1139, "step": 493020 }, { "epoch": 1.9059160984057768, "grad_norm": 0.165201336145401, "learning_rate": 7.09587873764852e-05, "loss": 2.096, "step": 493030 }, { "epoch": 1.90595475560916, "grad_norm": 0.16129054129123688, "learning_rate": 7.085511171400083e-05, "loss": 2.1152, "step": 493040 }, { "epoch": 1.9059934128125435, "grad_norm": 0.15986932814121246, "learning_rate": 7.075144162293068e-05, "loss": 2.1079, "step": 493050 }, { "epoch": 1.9060320700159268, "grad_norm": 0.16731788218021393, "learning_rate": 7.06477771023768e-05, "loss": 2.0957, "step": 493060 }, { "epoch": 1.90607072721931, "grad_norm": 0.16379481554031372, "learning_rate": 7.054411815144123e-05, "loss": 2.0806, "step": 493070 }, { "epoch": 1.9061093844226933, "grad_norm": 0.17693264782428741, "learning_rate": 7.044046476922628e-05, "loss": 2.0933, "step": 493080 }, { "epoch": 1.9061480416260768, "grad_norm": 0.2025172859430313, "learning_rate": 7.033681695483463e-05, "loss": 2.1058, "step": 493090 }, { "epoch": 1.90618669882946, "grad_norm": 0.16205249726772308, "learning_rate": 7.0233174707369e-05, "loss": 2.1102, "step": 493100 }, { "epoch": 1.9062253560328433, "grad_norm": 0.16341358423233032, "learning_rate": 7.012953802593258e-05, "loss": 2.0891, "step": 493110 }, { "epoch": 1.9062640132362265, "grad_norm": 0.16129940748214722, "learning_rate": 7.002590690962896e-05, "loss": 2.0967, "step": 493120 }, { "epoch": 1.9063026704396098, "grad_norm": 0.16581223905086517, "learning_rate": 6.992228135756152e-05, "loss": 2.1091, "step": 493130 }, { "epoch": 1.906341327642993, "grad_norm": 0.185667023062706, "learning_rate": 6.981866136883408e-05, "loss": 2.1036, "step": 493140 }, { "epoch": 1.9063799848463763, "grad_norm": 0.16112111508846283, "learning_rate": 6.971504694255049e-05, "loss": 2.0944, "step": 493150 }, { "epoch": 1.9064186420497595, "grad_norm": 0.16273248195648193, "learning_rate": 6.961143807781545e-05, "loss": 2.0983, "step": 493160 }, { "epoch": 1.9064572992531428, "grad_norm": 0.16150085628032684, "learning_rate": 6.950783477373324e-05, "loss": 2.0926, "step": 493170 }, { "epoch": 1.906495956456526, "grad_norm": 0.17488321661949158, "learning_rate": 6.940423702940857e-05, "loss": 2.0894, "step": 493180 }, { "epoch": 1.9065346136599093, "grad_norm": 0.16473519802093506, "learning_rate": 6.930064484394683e-05, "loss": 2.1142, "step": 493190 }, { "epoch": 1.9065732708632925, "grad_norm": 0.17141981422901154, "learning_rate": 6.919705821645272e-05, "loss": 2.1066, "step": 493200 }, { "epoch": 1.9066119280666758, "grad_norm": 0.16588962078094482, "learning_rate": 6.909347714603232e-05, "loss": 2.1022, "step": 493210 }, { "epoch": 1.9066505852700593, "grad_norm": 0.179043710231781, "learning_rate": 6.898990163179097e-05, "loss": 2.1046, "step": 493220 }, { "epoch": 1.9066892424734425, "grad_norm": 0.20257137715816498, "learning_rate": 6.888633167283498e-05, "loss": 2.0871, "step": 493230 }, { "epoch": 1.9067278996768258, "grad_norm": 0.17999424040317535, "learning_rate": 6.878276726827015e-05, "loss": 2.1039, "step": 493240 }, { "epoch": 1.906766556880209, "grad_norm": 0.176579087972641, "learning_rate": 6.867920841720343e-05, "loss": 2.0887, "step": 493250 }, { "epoch": 1.9068052140835925, "grad_norm": 0.17156197130680084, "learning_rate": 6.857565511874109e-05, "loss": 2.1202, "step": 493260 }, { "epoch": 1.9068438712869757, "grad_norm": 0.16644462943077087, "learning_rate": 6.847210737199028e-05, "loss": 2.1053, "step": 493270 }, { "epoch": 1.906882528490359, "grad_norm": 0.17738530039787292, "learning_rate": 6.836856517605816e-05, "loss": 2.106, "step": 493280 }, { "epoch": 1.9069211856937422, "grad_norm": 0.17238637804985046, "learning_rate": 6.826502853005234e-05, "loss": 2.0958, "step": 493290 }, { "epoch": 1.9069598428971255, "grad_norm": 0.16516748070716858, "learning_rate": 6.816149743307998e-05, "loss": 2.0796, "step": 493300 }, { "epoch": 1.9069985001005088, "grad_norm": 0.1585695445537567, "learning_rate": 6.805797188424934e-05, "loss": 2.1046, "step": 493310 }, { "epoch": 1.907037157303892, "grad_norm": 0.15817442536354065, "learning_rate": 6.795445188266847e-05, "loss": 2.0786, "step": 493320 }, { "epoch": 1.9070758145072753, "grad_norm": 0.1786375343799591, "learning_rate": 6.785093742744586e-05, "loss": 2.0931, "step": 493330 }, { "epoch": 1.9071144717106585, "grad_norm": 0.18257126212120056, "learning_rate": 6.774742851768978e-05, "loss": 2.1082, "step": 493340 }, { "epoch": 1.9071531289140418, "grad_norm": 0.1689731925725937, "learning_rate": 6.764392515250962e-05, "loss": 2.1119, "step": 493350 }, { "epoch": 1.907191786117425, "grad_norm": 0.1615624576807022, "learning_rate": 6.754042733101406e-05, "loss": 2.096, "step": 493360 }, { "epoch": 1.9072304433208083, "grad_norm": 0.16443485021591187, "learning_rate": 6.743693505231252e-05, "loss": 2.1194, "step": 493370 }, { "epoch": 1.9072691005241915, "grad_norm": 0.1595473736524582, "learning_rate": 6.733344831551458e-05, "loss": 2.106, "step": 493380 }, { "epoch": 1.907307757727575, "grad_norm": 0.16571669280529022, "learning_rate": 6.722996711973006e-05, "loss": 2.1058, "step": 493390 }, { "epoch": 1.9073464149309582, "grad_norm": 0.169804185628891, "learning_rate": 6.712649146406879e-05, "loss": 2.1144, "step": 493400 }, { "epoch": 1.9073850721343415, "grad_norm": 0.15742474794387817, "learning_rate": 6.70230213476415e-05, "loss": 2.0847, "step": 493410 }, { "epoch": 1.9074237293377247, "grad_norm": 0.16608522832393646, "learning_rate": 6.691955676955841e-05, "loss": 2.1136, "step": 493420 }, { "epoch": 1.9074623865411082, "grad_norm": 0.1622288078069687, "learning_rate": 6.681609772893049e-05, "loss": 2.0849, "step": 493430 }, { "epoch": 1.9075010437444915, "grad_norm": 0.1680872142314911, "learning_rate": 6.671264422486845e-05, "loss": 2.0969, "step": 493440 }, { "epoch": 1.9075397009478747, "grad_norm": 0.17885079979896545, "learning_rate": 6.660919625648365e-05, "loss": 2.097, "step": 493450 }, { "epoch": 1.907578358151258, "grad_norm": 0.1733819842338562, "learning_rate": 6.65057538228877e-05, "loss": 2.0908, "step": 493460 }, { "epoch": 1.9076170153546412, "grad_norm": 0.1674387902021408, "learning_rate": 6.640231692319199e-05, "loss": 2.1053, "step": 493470 }, { "epoch": 1.9076556725580245, "grad_norm": 0.16638167202472687, "learning_rate": 6.629888555650876e-05, "loss": 2.0994, "step": 493480 }, { "epoch": 1.9076943297614077, "grad_norm": 0.18338817358016968, "learning_rate": 6.619545972195007e-05, "loss": 2.0999, "step": 493490 }, { "epoch": 1.907732986964791, "grad_norm": 0.1649668663740158, "learning_rate": 6.60920394186284e-05, "loss": 2.1204, "step": 493500 }, { "epoch": 1.9077716441681742, "grad_norm": 0.18231862783432007, "learning_rate": 6.598862464565625e-05, "loss": 2.1075, "step": 493510 }, { "epoch": 1.9078103013715575, "grad_norm": 0.1718713939189911, "learning_rate": 6.588521540214676e-05, "loss": 2.0866, "step": 493520 }, { "epoch": 1.9078489585749407, "grad_norm": 0.16804492473602295, "learning_rate": 6.578181168721287e-05, "loss": 2.119, "step": 493530 }, { "epoch": 1.907887615778324, "grad_norm": 0.16353757679462433, "learning_rate": 6.567841349996817e-05, "loss": 2.0941, "step": 493540 }, { "epoch": 1.9079262729817072, "grad_norm": 0.16315865516662598, "learning_rate": 6.557502083952605e-05, "loss": 2.1108, "step": 493550 }, { "epoch": 1.9079649301850907, "grad_norm": 0.17387837171554565, "learning_rate": 6.547163370500053e-05, "loss": 2.1107, "step": 493560 }, { "epoch": 1.908003587388474, "grad_norm": 0.16080337762832642, "learning_rate": 6.536825209550546e-05, "loss": 2.1074, "step": 493570 }, { "epoch": 1.9080422445918572, "grad_norm": 0.16609761118888855, "learning_rate": 6.526487601015529e-05, "loss": 2.1042, "step": 493580 }, { "epoch": 1.9080809017952405, "grad_norm": 0.16756406426429749, "learning_rate": 6.516150544806454e-05, "loss": 2.0849, "step": 493590 }, { "epoch": 1.908119558998624, "grad_norm": 0.16301332414150238, "learning_rate": 6.505814040834812e-05, "loss": 2.0829, "step": 493600 }, { "epoch": 1.9081582162020072, "grad_norm": 0.16793601214885712, "learning_rate": 6.495478089012097e-05, "loss": 2.103, "step": 493610 }, { "epoch": 1.9081968734053905, "grad_norm": 0.17147155106067657, "learning_rate": 6.485142689249823e-05, "loss": 2.0983, "step": 493620 }, { "epoch": 1.9082355306087737, "grad_norm": 0.15886180102825165, "learning_rate": 6.474807841459574e-05, "loss": 2.1134, "step": 493630 }, { "epoch": 1.908274187812157, "grad_norm": 0.1755588799715042, "learning_rate": 6.464473545552885e-05, "loss": 2.091, "step": 493640 }, { "epoch": 1.9083128450155402, "grad_norm": 0.2920505106449127, "learning_rate": 6.454139801441383e-05, "loss": 2.1106, "step": 493650 }, { "epoch": 1.9083515022189235, "grad_norm": 0.17460133135318756, "learning_rate": 6.443806609036674e-05, "loss": 2.0824, "step": 493660 }, { "epoch": 1.9083901594223067, "grad_norm": 0.16135340929031372, "learning_rate": 6.43347396825038e-05, "loss": 2.0928, "step": 493670 }, { "epoch": 1.90842881662569, "grad_norm": 0.16903318464756012, "learning_rate": 6.423141878994221e-05, "loss": 2.105, "step": 493680 }, { "epoch": 1.9084674738290732, "grad_norm": 0.22488346695899963, "learning_rate": 6.412810341179865e-05, "loss": 2.1125, "step": 493690 }, { "epoch": 1.9085061310324565, "grad_norm": 0.16939231753349304, "learning_rate": 6.402479354719004e-05, "loss": 2.0891, "step": 493700 }, { "epoch": 1.9085447882358397, "grad_norm": 0.16648931801319122, "learning_rate": 6.392148919523399e-05, "loss": 2.0998, "step": 493710 }, { "epoch": 1.908583445439223, "grad_norm": 0.1556132584810257, "learning_rate": 6.381819035504832e-05, "loss": 2.1057, "step": 493720 }, { "epoch": 1.9086221026426065, "grad_norm": 0.15572424232959747, "learning_rate": 6.37148970257504e-05, "loss": 2.1056, "step": 493730 }, { "epoch": 1.9086607598459897, "grad_norm": 0.18413236737251282, "learning_rate": 6.36116092064587e-05, "loss": 2.0767, "step": 493740 }, { "epoch": 1.908699417049373, "grad_norm": 0.1694517433643341, "learning_rate": 6.350832689629149e-05, "loss": 2.0982, "step": 493750 }, { "epoch": 1.9087380742527562, "grad_norm": 0.17999476194381714, "learning_rate": 6.340505009436726e-05, "loss": 2.092, "step": 493760 }, { "epoch": 1.9087767314561397, "grad_norm": 0.166117861866951, "learning_rate": 6.330177879980492e-05, "loss": 2.0978, "step": 493770 }, { "epoch": 1.908815388659523, "grad_norm": 0.1705954372882843, "learning_rate": 6.319851301172319e-05, "loss": 2.1018, "step": 493780 }, { "epoch": 1.9088540458629062, "grad_norm": 0.15947870910167694, "learning_rate": 6.309525272924166e-05, "loss": 2.0963, "step": 493790 }, { "epoch": 1.9088927030662894, "grad_norm": 0.156826451420784, "learning_rate": 6.299199795147992e-05, "loss": 2.0929, "step": 493800 }, { "epoch": 1.9089313602696727, "grad_norm": 0.15414969623088837, "learning_rate": 6.288874867755712e-05, "loss": 2.0806, "step": 493810 }, { "epoch": 1.908970017473056, "grad_norm": 0.15578539669513702, "learning_rate": 6.278550490659395e-05, "loss": 2.0833, "step": 493820 }, { "epoch": 1.9090086746764392, "grad_norm": 0.1562417447566986, "learning_rate": 6.268226663771026e-05, "loss": 2.1098, "step": 493830 }, { "epoch": 1.9090473318798225, "grad_norm": 0.16142068803310394, "learning_rate": 6.25790338700265e-05, "loss": 2.1012, "step": 493840 }, { "epoch": 1.9090859890832057, "grad_norm": 0.1723729819059372, "learning_rate": 6.247580660266339e-05, "loss": 2.1185, "step": 493850 }, { "epoch": 1.909124646286589, "grad_norm": 0.16679581999778748, "learning_rate": 6.237258483474184e-05, "loss": 2.0856, "step": 493860 }, { "epoch": 1.9091633034899722, "grad_norm": 0.16381263732910156, "learning_rate": 6.226936856538279e-05, "loss": 2.0949, "step": 493870 }, { "epoch": 1.9092019606933555, "grad_norm": 0.15974077582359314, "learning_rate": 6.216615779370805e-05, "loss": 2.0889, "step": 493880 }, { "epoch": 1.9092406178967387, "grad_norm": 0.1609431654214859, "learning_rate": 6.206295251883898e-05, "loss": 2.1091, "step": 493890 }, { "epoch": 1.9092792751001222, "grad_norm": 0.18349827826023102, "learning_rate": 6.19597527398974e-05, "loss": 2.1102, "step": 493900 }, { "epoch": 1.9093179323035054, "grad_norm": 0.15824545919895172, "learning_rate": 6.185655845600536e-05, "loss": 2.0911, "step": 493910 }, { "epoch": 1.9093565895068887, "grad_norm": 0.15692760050296783, "learning_rate": 6.175336966628508e-05, "loss": 2.1116, "step": 493920 }, { "epoch": 1.909395246710272, "grad_norm": 0.1580006331205368, "learning_rate": 6.165018636985953e-05, "loss": 2.0933, "step": 493930 }, { "epoch": 1.9094339039136554, "grad_norm": 0.17121177911758423, "learning_rate": 6.154700856585117e-05, "loss": 2.1123, "step": 493940 }, { "epoch": 1.9094725611170387, "grad_norm": 0.1740797758102417, "learning_rate": 6.144383625338312e-05, "loss": 2.0984, "step": 493950 }, { "epoch": 1.909511218320422, "grad_norm": 0.1799916923046112, "learning_rate": 6.134066943157856e-05, "loss": 2.1022, "step": 493960 }, { "epoch": 1.9095498755238052, "grad_norm": 0.18135613203048706, "learning_rate": 6.123750809956108e-05, "loss": 2.0922, "step": 493970 }, { "epoch": 1.9095885327271884, "grad_norm": 0.1724577099084854, "learning_rate": 6.113435225645403e-05, "loss": 2.0952, "step": 493980 }, { "epoch": 1.9096271899305717, "grad_norm": 0.16730991005897522, "learning_rate": 6.10312019013819e-05, "loss": 2.1004, "step": 493990 }, { "epoch": 1.909665847133955, "grad_norm": 0.17100889980793, "learning_rate": 6.092805703346849e-05, "loss": 2.0855, "step": 494000 }, { "epoch": 1.9097045043373382, "grad_norm": 0.16315822303295135, "learning_rate": 6.0824917651838285e-05, "loss": 2.1015, "step": 494010 }, { "epoch": 1.9097431615407214, "grad_norm": 0.15623895823955536, "learning_rate": 6.072178375561599e-05, "loss": 2.1154, "step": 494020 }, { "epoch": 1.9097818187441047, "grad_norm": 0.15995801985263824, "learning_rate": 6.061865534392652e-05, "loss": 2.0926, "step": 494030 }, { "epoch": 1.909820475947488, "grad_norm": 0.1566266119480133, "learning_rate": 6.0515532415894805e-05, "loss": 2.1107, "step": 494040 }, { "epoch": 1.9098591331508712, "grad_norm": 0.2434413731098175, "learning_rate": 6.0412414970646424e-05, "loss": 2.1014, "step": 494050 }, { "epoch": 1.9098977903542547, "grad_norm": 0.1829388439655304, "learning_rate": 6.030930300730675e-05, "loss": 2.1143, "step": 494060 }, { "epoch": 1.909936447557638, "grad_norm": 0.16349373757839203, "learning_rate": 6.020619652500181e-05, "loss": 2.1026, "step": 494070 }, { "epoch": 1.9099751047610212, "grad_norm": 0.1656537652015686, "learning_rate": 6.010309552285742e-05, "loss": 2.0957, "step": 494080 }, { "epoch": 1.9100137619644044, "grad_norm": 0.16540515422821045, "learning_rate": 6.0000000000000056e-05, "loss": 2.1104, "step": 494090 }, { "epoch": 1.9100524191677877, "grad_norm": 0.17239612340927124, "learning_rate": 5.9896909955555964e-05, "loss": 2.1086, "step": 494100 }, { "epoch": 1.9100910763711711, "grad_norm": 0.1950691193342209, "learning_rate": 5.979382538865208e-05, "loss": 2.1026, "step": 494110 }, { "epoch": 1.9101297335745544, "grad_norm": 0.17566072940826416, "learning_rate": 5.969074629841531e-05, "loss": 2.0855, "step": 494120 }, { "epoch": 1.9101683907779377, "grad_norm": 0.15897676348686218, "learning_rate": 5.958767268397303e-05, "loss": 2.0936, "step": 494130 }, { "epoch": 1.910207047981321, "grad_norm": 0.16137805581092834, "learning_rate": 5.948460454445237e-05, "loss": 2.1013, "step": 494140 }, { "epoch": 1.9102457051847042, "grad_norm": 0.19188115000724792, "learning_rate": 5.938154187898137e-05, "loss": 2.0886, "step": 494150 }, { "epoch": 1.9102843623880874, "grad_norm": 0.1605629324913025, "learning_rate": 5.9278484686687396e-05, "loss": 2.0894, "step": 494160 }, { "epoch": 1.9103230195914707, "grad_norm": 0.17366410791873932, "learning_rate": 5.9175432966699136e-05, "loss": 2.0834, "step": 494170 }, { "epoch": 1.910361676794854, "grad_norm": 0.16272400319576263, "learning_rate": 5.9072386718144635e-05, "loss": 2.0966, "step": 494180 }, { "epoch": 1.9104003339982372, "grad_norm": 0.17019392549991608, "learning_rate": 5.8969345940152356e-05, "loss": 2.1097, "step": 494190 }, { "epoch": 1.9104389912016204, "grad_norm": 0.16691423952579498, "learning_rate": 5.886631063185144e-05, "loss": 2.0996, "step": 494200 }, { "epoch": 1.9104776484050037, "grad_norm": 0.17281699180603027, "learning_rate": 5.876328079237081e-05, "loss": 2.0976, "step": 494210 }, { "epoch": 1.910516305608387, "grad_norm": 0.17887260019779205, "learning_rate": 5.866025642083961e-05, "loss": 2.1068, "step": 494220 }, { "epoch": 1.9105549628117704, "grad_norm": 0.16713376343250275, "learning_rate": 5.855723751638764e-05, "loss": 2.0981, "step": 494230 }, { "epoch": 1.9105936200151536, "grad_norm": 0.16143283247947693, "learning_rate": 5.8454224078144494e-05, "loss": 2.1031, "step": 494240 }, { "epoch": 1.910632277218537, "grad_norm": 0.16023682057857513, "learning_rate": 5.835121610524019e-05, "loss": 2.1022, "step": 494250 }, { "epoch": 1.9106709344219202, "grad_norm": 0.17260730266571045, "learning_rate": 5.824821359680477e-05, "loss": 2.0983, "step": 494260 }, { "epoch": 1.9107095916253034, "grad_norm": 0.1591053307056427, "learning_rate": 5.8145216551968916e-05, "loss": 2.1008, "step": 494270 }, { "epoch": 1.9107482488286869, "grad_norm": 0.16962049901485443, "learning_rate": 5.804222496986311e-05, "loss": 2.0997, "step": 494280 }, { "epoch": 1.9107869060320701, "grad_norm": 0.16175496578216553, "learning_rate": 5.793923884961827e-05, "loss": 2.0857, "step": 494290 }, { "epoch": 1.9108255632354534, "grad_norm": 0.1648041307926178, "learning_rate": 5.7836258190365755e-05, "loss": 2.0961, "step": 494300 }, { "epoch": 1.9108642204388366, "grad_norm": 0.1681378185749054, "learning_rate": 5.773328299123648e-05, "loss": 2.1007, "step": 494310 }, { "epoch": 1.9109028776422199, "grad_norm": 0.1582988053560257, "learning_rate": 5.7630313251362474e-05, "loss": 2.0963, "step": 494320 }, { "epoch": 1.9109415348456031, "grad_norm": 0.1673489660024643, "learning_rate": 5.752734896987533e-05, "loss": 2.1051, "step": 494330 }, { "epoch": 1.9109801920489864, "grad_norm": 0.17708514630794525, "learning_rate": 5.742439014590728e-05, "loss": 2.0814, "step": 494340 }, { "epoch": 1.9110188492523696, "grad_norm": 0.16237381100654602, "learning_rate": 5.7321436778590586e-05, "loss": 2.1114, "step": 494350 }, { "epoch": 1.911057506455753, "grad_norm": 0.16191226243972778, "learning_rate": 5.721848886705749e-05, "loss": 2.0904, "step": 494360 }, { "epoch": 1.9110961636591361, "grad_norm": 0.17363980412483215, "learning_rate": 5.711554641044092e-05, "loss": 2.0971, "step": 494370 }, { "epoch": 1.9111348208625194, "grad_norm": 0.16366997361183167, "learning_rate": 5.701260940787378e-05, "loss": 2.1044, "step": 494380 }, { "epoch": 1.9111734780659027, "grad_norm": 0.16033147275447845, "learning_rate": 5.6909677858489216e-05, "loss": 2.109, "step": 494390 }, { "epoch": 1.9112121352692861, "grad_norm": 0.17024444043636322, "learning_rate": 5.680675176142103e-05, "loss": 2.1059, "step": 494400 }, { "epoch": 1.9112507924726694, "grad_norm": 0.17983657121658325, "learning_rate": 5.670383111580235e-05, "loss": 2.1084, "step": 494410 }, { "epoch": 1.9112894496760526, "grad_norm": 0.15747319161891937, "learning_rate": 5.660091592076744e-05, "loss": 2.1059, "step": 494420 }, { "epoch": 1.9113281068794359, "grad_norm": 0.1654488891363144, "learning_rate": 5.649800617545031e-05, "loss": 2.0944, "step": 494430 }, { "epoch": 1.9113667640828194, "grad_norm": 0.16729286313056946, "learning_rate": 5.639510187898522e-05, "loss": 2.0839, "step": 494440 }, { "epoch": 1.9114054212862026, "grad_norm": 0.16759923100471497, "learning_rate": 5.629220303050686e-05, "loss": 2.0828, "step": 494450 }, { "epoch": 1.9114440784895859, "grad_norm": 0.1605670154094696, "learning_rate": 5.6189309629150146e-05, "loss": 2.0979, "step": 494460 }, { "epoch": 1.9114827356929691, "grad_norm": 0.16231954097747803, "learning_rate": 5.608642167404976e-05, "loss": 2.0906, "step": 494470 }, { "epoch": 1.9115213928963524, "grad_norm": 0.15948623418807983, "learning_rate": 5.598353916434129e-05, "loss": 2.0949, "step": 494480 }, { "epoch": 1.9115600500997356, "grad_norm": 0.17366357147693634, "learning_rate": 5.58806620991601e-05, "loss": 2.1019, "step": 494490 }, { "epoch": 1.9115987073031189, "grad_norm": 0.16815611720085144, "learning_rate": 5.577779047764198e-05, "loss": 2.1088, "step": 494500 }, { "epoch": 1.9116373645065021, "grad_norm": 0.16711877286434174, "learning_rate": 5.5674924298922735e-05, "loss": 2.0901, "step": 494510 }, { "epoch": 1.9116760217098854, "grad_norm": 0.17813915014266968, "learning_rate": 5.557206356213862e-05, "loss": 2.0913, "step": 494520 }, { "epoch": 1.9117146789132686, "grad_norm": 0.17400996387004852, "learning_rate": 5.54692082664261e-05, "loss": 2.0951, "step": 494530 }, { "epoch": 1.9117533361166519, "grad_norm": 0.17161357402801514, "learning_rate": 5.536635841092163e-05, "loss": 2.0766, "step": 494540 }, { "epoch": 1.9117919933200351, "grad_norm": 0.16898822784423828, "learning_rate": 5.526351399476237e-05, "loss": 2.0943, "step": 494550 }, { "epoch": 1.9118306505234184, "grad_norm": 0.16697081923484802, "learning_rate": 5.516067501708522e-05, "loss": 2.0733, "step": 494560 }, { "epoch": 1.9118693077268019, "grad_norm": 0.17042747139930725, "learning_rate": 5.5057841477027304e-05, "loss": 2.0913, "step": 494570 }, { "epoch": 1.911907964930185, "grad_norm": 0.17008812725543976, "learning_rate": 5.495501337372666e-05, "loss": 2.0995, "step": 494580 }, { "epoch": 1.9119466221335684, "grad_norm": 0.1692279428243637, "learning_rate": 5.485219070632064e-05, "loss": 2.0928, "step": 494590 }, { "epoch": 1.9119852793369516, "grad_norm": 0.16472196578979492, "learning_rate": 5.474937347394748e-05, "loss": 2.092, "step": 494600 }, { "epoch": 1.912023936540335, "grad_norm": 0.16748890280723572, "learning_rate": 5.464656167574522e-05, "loss": 2.1186, "step": 494610 }, { "epoch": 1.9120625937437183, "grad_norm": 0.15886230766773224, "learning_rate": 5.454375531085254e-05, "loss": 2.101, "step": 494620 }, { "epoch": 1.9121012509471016, "grad_norm": 0.15965144336223602, "learning_rate": 5.44409543784079e-05, "loss": 2.096, "step": 494630 }, { "epoch": 1.9121399081504848, "grad_norm": 0.1668359488248825, "learning_rate": 5.4338158877550446e-05, "loss": 2.0932, "step": 494640 }, { "epoch": 1.912178565353868, "grad_norm": 0.16767774522304535, "learning_rate": 5.4235368807419085e-05, "loss": 2.0969, "step": 494650 }, { "epoch": 1.9122172225572514, "grad_norm": 0.16439062356948853, "learning_rate": 5.413258416715339e-05, "loss": 2.0968, "step": 494660 }, { "epoch": 1.9122558797606346, "grad_norm": 0.16456596553325653, "learning_rate": 5.402980495589294e-05, "loss": 2.1074, "step": 494670 }, { "epoch": 1.9122945369640179, "grad_norm": 0.168012335896492, "learning_rate": 5.392703117277753e-05, "loss": 2.0912, "step": 494680 }, { "epoch": 1.912333194167401, "grad_norm": 0.15883708000183105, "learning_rate": 5.382426281694697e-05, "loss": 2.0853, "step": 494690 }, { "epoch": 1.9123718513707844, "grad_norm": 0.1676301658153534, "learning_rate": 5.372149988754193e-05, "loss": 2.0935, "step": 494700 }, { "epoch": 1.9124105085741676, "grad_norm": 0.15922707319259644, "learning_rate": 5.361874238370246e-05, "loss": 2.0872, "step": 494710 }, { "epoch": 1.9124491657775509, "grad_norm": 0.1661909520626068, "learning_rate": 5.351599030456966e-05, "loss": 2.1026, "step": 494720 }, { "epoch": 1.9124878229809341, "grad_norm": 0.18675149977207184, "learning_rate": 5.341324364928446e-05, "loss": 2.0991, "step": 494730 }, { "epoch": 1.9125264801843176, "grad_norm": 0.165910005569458, "learning_rate": 5.331050241698798e-05, "loss": 2.0866, "step": 494740 }, { "epoch": 1.9125651373877008, "grad_norm": 0.16928835213184357, "learning_rate": 5.320776660682158e-05, "loss": 2.0892, "step": 494750 }, { "epoch": 1.912603794591084, "grad_norm": 0.15087957680225372, "learning_rate": 5.310503621792684e-05, "loss": 2.0986, "step": 494760 }, { "epoch": 1.9126424517944673, "grad_norm": 0.15772745013237, "learning_rate": 5.3002311249445764e-05, "loss": 2.1019, "step": 494770 }, { "epoch": 1.9126811089978508, "grad_norm": 0.16974887251853943, "learning_rate": 5.289959170052039e-05, "loss": 2.112, "step": 494780 }, { "epoch": 1.912719766201234, "grad_norm": 0.1652223765850067, "learning_rate": 5.279687757029317e-05, "loss": 2.1034, "step": 494790 }, { "epoch": 1.9127584234046173, "grad_norm": 0.17936939001083374, "learning_rate": 5.269416885790634e-05, "loss": 2.0964, "step": 494800 }, { "epoch": 1.9127970806080006, "grad_norm": 0.16628216207027435, "learning_rate": 5.259146556250283e-05, "loss": 2.0861, "step": 494810 }, { "epoch": 1.9128357378113838, "grad_norm": 0.16931118071079254, "learning_rate": 5.248876768322597e-05, "loss": 2.0966, "step": 494820 }, { "epoch": 1.912874395014767, "grad_norm": 0.1719769537448883, "learning_rate": 5.238607521921846e-05, "loss": 2.1026, "step": 494830 }, { "epoch": 1.9129130522181503, "grad_norm": 0.1746273785829544, "learning_rate": 5.228338816962386e-05, "loss": 2.1045, "step": 494840 }, { "epoch": 1.9129517094215336, "grad_norm": 0.1762704700231552, "learning_rate": 5.2180706533585976e-05, "loss": 2.0949, "step": 494850 }, { "epoch": 1.9129903666249168, "grad_norm": 0.1763325184583664, "learning_rate": 5.207803031024882e-05, "loss": 2.1006, "step": 494860 }, { "epoch": 1.9130290238283, "grad_norm": 0.1622261106967926, "learning_rate": 5.19753594987562e-05, "loss": 2.102, "step": 494870 }, { "epoch": 1.9130676810316833, "grad_norm": 0.16686417162418365, "learning_rate": 5.187269409825279e-05, "loss": 2.0961, "step": 494880 }, { "epoch": 1.9131063382350666, "grad_norm": 0.1586511880159378, "learning_rate": 5.177003410788283e-05, "loss": 2.0864, "step": 494890 }, { "epoch": 1.9131449954384498, "grad_norm": 0.16049085557460785, "learning_rate": 5.1667379526791456e-05, "loss": 2.0958, "step": 494900 }, { "epoch": 1.9131836526418333, "grad_norm": 0.1666175276041031, "learning_rate": 5.156473035412335e-05, "loss": 2.1041, "step": 494910 }, { "epoch": 1.9132223098452166, "grad_norm": 0.17487689852714539, "learning_rate": 5.1462086589024074e-05, "loss": 2.1052, "step": 494920 }, { "epoch": 1.9132609670485998, "grad_norm": 0.16758491098880768, "learning_rate": 5.135944823063898e-05, "loss": 2.0988, "step": 494930 }, { "epoch": 1.913299624251983, "grad_norm": 0.17294859886169434, "learning_rate": 5.1256815278113654e-05, "loss": 2.0941, "step": 494940 }, { "epoch": 1.9133382814553666, "grad_norm": 0.17001833021640778, "learning_rate": 5.11541877305941e-05, "loss": 2.1009, "step": 494950 }, { "epoch": 1.9133769386587498, "grad_norm": 0.16715086996555328, "learning_rate": 5.105156558722679e-05, "loss": 2.0983, "step": 494960 }, { "epoch": 1.913415595862133, "grad_norm": 0.16920140385627747, "learning_rate": 5.09489488471575e-05, "loss": 2.0909, "step": 494970 }, { "epoch": 1.9134542530655163, "grad_norm": 0.17399254441261292, "learning_rate": 5.0846337509533384e-05, "loss": 2.0813, "step": 494980 }, { "epoch": 1.9134929102688996, "grad_norm": 0.15936678647994995, "learning_rate": 5.0743731573500874e-05, "loss": 2.0899, "step": 494990 }, { "epoch": 1.9135315674722828, "grad_norm": 0.1558026522397995, "learning_rate": 5.0641131038207336e-05, "loss": 2.0827, "step": 495000 }, { "epoch": 1.913570224675666, "grad_norm": 0.15770964324474335, "learning_rate": 5.0538535902799665e-05, "loss": 2.1019, "step": 495010 }, { "epoch": 1.9136088818790493, "grad_norm": 0.16118407249450684, "learning_rate": 5.043594616642566e-05, "loss": 2.1088, "step": 495020 }, { "epoch": 1.9136475390824326, "grad_norm": 0.15688639879226685, "learning_rate": 5.033336182823289e-05, "loss": 2.0929, "step": 495030 }, { "epoch": 1.9136861962858158, "grad_norm": 0.1677384227514267, "learning_rate": 5.0230782887369596e-05, "loss": 2.0859, "step": 495040 }, { "epoch": 1.913724853489199, "grad_norm": 0.16379329562187195, "learning_rate": 5.0128209342983566e-05, "loss": 2.1021, "step": 495050 }, { "epoch": 1.9137635106925823, "grad_norm": 0.1766320914030075, "learning_rate": 5.002564119422326e-05, "loss": 2.1001, "step": 495060 }, { "epoch": 1.9138021678959656, "grad_norm": 0.16742245852947235, "learning_rate": 4.992307844023758e-05, "loss": 2.0897, "step": 495070 }, { "epoch": 1.913840825099349, "grad_norm": 0.1736634373664856, "learning_rate": 4.982052108017499e-05, "loss": 2.1047, "step": 495080 }, { "epoch": 1.9138794823027323, "grad_norm": 0.16338351368904114, "learning_rate": 4.9717969113185044e-05, "loss": 2.0953, "step": 495090 }, { "epoch": 1.9139181395061156, "grad_norm": 0.16212569177150726, "learning_rate": 4.9615422538416445e-05, "loss": 2.0836, "step": 495100 }, { "epoch": 1.9139567967094988, "grad_norm": 0.15982629358768463, "learning_rate": 4.951288135501919e-05, "loss": 2.0809, "step": 495110 }, { "epoch": 1.9139954539128823, "grad_norm": 0.16392414271831512, "learning_rate": 4.941034556214263e-05, "loss": 2.0885, "step": 495120 }, { "epoch": 1.9140341111162655, "grad_norm": 0.16265085339546204, "learning_rate": 4.930781515893701e-05, "loss": 2.0921, "step": 495130 }, { "epoch": 1.9140727683196488, "grad_norm": 0.1701953262090683, "learning_rate": 4.9205290144552326e-05, "loss": 2.0745, "step": 495140 }, { "epoch": 1.914111425523032, "grad_norm": 0.17344048619270325, "learning_rate": 4.9102770518139274e-05, "loss": 2.0906, "step": 495150 }, { "epoch": 1.9141500827264153, "grad_norm": 0.16372932493686676, "learning_rate": 4.90002562788483e-05, "loss": 2.1067, "step": 495160 }, { "epoch": 1.9141887399297985, "grad_norm": 0.16525626182556152, "learning_rate": 4.8897747425830086e-05, "loss": 2.0958, "step": 495170 }, { "epoch": 1.9142273971331818, "grad_norm": 0.16770386695861816, "learning_rate": 4.879524395823598e-05, "loss": 2.1081, "step": 495180 }, { "epoch": 1.914266054336565, "grad_norm": 0.17800508439540863, "learning_rate": 4.8692745875217107e-05, "loss": 2.0979, "step": 495190 }, { "epoch": 1.9143047115399483, "grad_norm": 0.1625628024339676, "learning_rate": 4.8590253175925246e-05, "loss": 2.0977, "step": 495200 }, { "epoch": 1.9143433687433316, "grad_norm": 0.17888452112674713, "learning_rate": 4.848776585951176e-05, "loss": 2.1001, "step": 495210 }, { "epoch": 1.9143820259467148, "grad_norm": 0.15721629559993744, "learning_rate": 4.838528392512886e-05, "loss": 2.095, "step": 495220 }, { "epoch": 1.914420683150098, "grad_norm": 0.1689646989107132, "learning_rate": 4.8282807371928586e-05, "loss": 2.0966, "step": 495230 }, { "epoch": 1.9144593403534813, "grad_norm": 0.16454440355300903, "learning_rate": 4.8180336199063593e-05, "loss": 2.0996, "step": 495240 }, { "epoch": 1.9144979975568648, "grad_norm": 0.15929938852787018, "learning_rate": 4.8077870405686566e-05, "loss": 2.0977, "step": 495250 }, { "epoch": 1.914536654760248, "grad_norm": 0.16392184793949127, "learning_rate": 4.7975409990949957e-05, "loss": 2.097, "step": 495260 }, { "epoch": 1.9145753119636313, "grad_norm": 0.1720370650291443, "learning_rate": 4.787295495400712e-05, "loss": 2.1052, "step": 495270 }, { "epoch": 1.9146139691670145, "grad_norm": 0.17082928121089935, "learning_rate": 4.777050529401139e-05, "loss": 2.0957, "step": 495280 }, { "epoch": 1.914652626370398, "grad_norm": 0.16387730836868286, "learning_rate": 4.766806101011611e-05, "loss": 2.0829, "step": 495290 }, { "epoch": 1.9146912835737813, "grad_norm": 0.16461658477783203, "learning_rate": 4.756562210147508e-05, "loss": 2.089, "step": 495300 }, { "epoch": 1.9147299407771645, "grad_norm": 0.1617818921804428, "learning_rate": 4.746318856724252e-05, "loss": 2.1012, "step": 495310 }, { "epoch": 1.9147685979805478, "grad_norm": 0.16729439795017242, "learning_rate": 4.736076040657222e-05, "loss": 2.102, "step": 495320 }, { "epoch": 1.914807255183931, "grad_norm": 0.1714024394750595, "learning_rate": 4.725833761861886e-05, "loss": 2.1033, "step": 495330 }, { "epoch": 1.9148459123873143, "grad_norm": 0.16891701519489288, "learning_rate": 4.7155920202536896e-05, "loss": 2.0894, "step": 495340 }, { "epoch": 1.9148845695906975, "grad_norm": 0.16476847231388092, "learning_rate": 4.705350815748144e-05, "loss": 2.1021, "step": 495350 }, { "epoch": 1.9149232267940808, "grad_norm": 0.16403105854988098, "learning_rate": 4.6951101482607614e-05, "loss": 2.0897, "step": 495360 }, { "epoch": 1.914961883997464, "grad_norm": 0.1697767972946167, "learning_rate": 4.6848700177070324e-05, "loss": 2.0851, "step": 495370 }, { "epoch": 1.9150005412008473, "grad_norm": 0.16083191335201263, "learning_rate": 4.6746304240025354e-05, "loss": 2.0728, "step": 495380 }, { "epoch": 1.9150391984042305, "grad_norm": 0.17566703259944916, "learning_rate": 4.664391367062826e-05, "loss": 2.1028, "step": 495390 }, { "epoch": 1.9150778556076138, "grad_norm": 0.17670664191246033, "learning_rate": 4.6541528468035277e-05, "loss": 2.1006, "step": 495400 }, { "epoch": 1.915116512810997, "grad_norm": 0.15895481407642365, "learning_rate": 4.643914863140242e-05, "loss": 2.0972, "step": 495410 }, { "epoch": 1.9151551700143805, "grad_norm": 0.16283272206783295, "learning_rate": 4.633677415988613e-05, "loss": 2.0997, "step": 495420 }, { "epoch": 1.9151938272177638, "grad_norm": 0.16882751882076263, "learning_rate": 4.623440505264287e-05, "loss": 2.1107, "step": 495430 }, { "epoch": 1.915232484421147, "grad_norm": 0.16597266495227814, "learning_rate": 4.613204130882998e-05, "loss": 2.0941, "step": 495440 }, { "epoch": 1.9152711416245303, "grad_norm": 0.15502455830574036, "learning_rate": 4.60296829276039e-05, "loss": 2.086, "step": 495450 }, { "epoch": 1.9153097988279137, "grad_norm": 0.16382566094398499, "learning_rate": 4.592732990812243e-05, "loss": 2.0944, "step": 495460 }, { "epoch": 1.915348456031297, "grad_norm": 0.16698609292507172, "learning_rate": 4.582498224954268e-05, "loss": 2.1043, "step": 495470 }, { "epoch": 1.9153871132346802, "grad_norm": 0.1581621915102005, "learning_rate": 4.572263995102266e-05, "loss": 2.0727, "step": 495480 }, { "epoch": 1.9154257704380635, "grad_norm": 0.1631089299917221, "learning_rate": 4.562030301172038e-05, "loss": 2.101, "step": 495490 }, { "epoch": 1.9154644276414468, "grad_norm": 0.1734185665845871, "learning_rate": 4.551797143079361e-05, "loss": 2.1079, "step": 495500 }, { "epoch": 1.91550308484483, "grad_norm": 0.17021867632865906, "learning_rate": 4.541564520740127e-05, "loss": 2.0877, "step": 495510 }, { "epoch": 1.9155417420482133, "grad_norm": 0.17814646661281586, "learning_rate": 4.531332434070135e-05, "loss": 2.0873, "step": 495520 }, { "epoch": 1.9155803992515965, "grad_norm": 0.1666654497385025, "learning_rate": 4.521100882985318e-05, "loss": 2.1061, "step": 495530 }, { "epoch": 1.9156190564549798, "grad_norm": 0.1610458493232727, "learning_rate": 4.510869867401568e-05, "loss": 2.1013, "step": 495540 }, { "epoch": 1.915657713658363, "grad_norm": 0.2371460646390915, "learning_rate": 4.500639387234817e-05, "loss": 2.0977, "step": 495550 }, { "epoch": 1.9156963708617463, "grad_norm": 0.19147741794586182, "learning_rate": 4.490409442401e-05, "loss": 2.0862, "step": 495560 }, { "epoch": 1.9157350280651295, "grad_norm": 0.1672198474407196, "learning_rate": 4.4801800328161166e-05, "loss": 2.0855, "step": 495570 }, { "epoch": 1.9157736852685128, "grad_norm": 0.16370943188667297, "learning_rate": 4.4699511583961236e-05, "loss": 2.0843, "step": 495580 }, { "epoch": 1.9158123424718962, "grad_norm": 0.17125527560710907, "learning_rate": 4.4597228190570437e-05, "loss": 2.09, "step": 495590 }, { "epoch": 1.9158509996752795, "grad_norm": 0.1694050133228302, "learning_rate": 4.449495014714944e-05, "loss": 2.0969, "step": 495600 }, { "epoch": 1.9158896568786628, "grad_norm": 0.169652059674263, "learning_rate": 4.4392677452858464e-05, "loss": 2.1017, "step": 495610 }, { "epoch": 1.915928314082046, "grad_norm": 0.16704246401786804, "learning_rate": 4.4290410106858417e-05, "loss": 2.1011, "step": 495620 }, { "epoch": 1.9159669712854295, "grad_norm": 0.16456107795238495, "learning_rate": 4.41881481083104e-05, "loss": 2.095, "step": 495630 }, { "epoch": 1.9160056284888127, "grad_norm": 0.17001429200172424, "learning_rate": 4.408589145637576e-05, "loss": 2.084, "step": 495640 }, { "epoch": 1.916044285692196, "grad_norm": 0.18315348029136658, "learning_rate": 4.398364015021583e-05, "loss": 2.0921, "step": 495650 }, { "epoch": 1.9160829428955792, "grad_norm": 0.17084050178527832, "learning_rate": 4.38813941889924e-05, "loss": 2.0921, "step": 495660 }, { "epoch": 1.9161216000989625, "grad_norm": 0.16386601328849792, "learning_rate": 4.3779153571867234e-05, "loss": 2.0905, "step": 495670 }, { "epoch": 1.9161602573023457, "grad_norm": 0.16962173581123352, "learning_rate": 4.367691829800258e-05, "loss": 2.0901, "step": 495680 }, { "epoch": 1.916198914505729, "grad_norm": 0.16165487468242645, "learning_rate": 4.3574688366560645e-05, "loss": 2.0929, "step": 495690 }, { "epoch": 1.9162375717091122, "grad_norm": 0.1727007031440735, "learning_rate": 4.347246377670433e-05, "loss": 2.0901, "step": 495700 }, { "epoch": 1.9162762289124955, "grad_norm": 0.1733340471982956, "learning_rate": 4.337024452759586e-05, "loss": 2.1096, "step": 495710 }, { "epoch": 1.9163148861158787, "grad_norm": 0.16607238352298737, "learning_rate": 4.326803061839857e-05, "loss": 2.1001, "step": 495720 }, { "epoch": 1.916353543319262, "grad_norm": 0.165201336145401, "learning_rate": 4.3165822048275795e-05, "loss": 2.0872, "step": 495730 }, { "epoch": 1.9163922005226453, "grad_norm": 0.17623762786388397, "learning_rate": 4.306361881639087e-05, "loss": 2.1111, "step": 495740 }, { "epoch": 1.9164308577260285, "grad_norm": 0.1635725051164627, "learning_rate": 4.296142092190736e-05, "loss": 2.0938, "step": 495750 }, { "epoch": 1.916469514929412, "grad_norm": 0.17206135392189026, "learning_rate": 4.285922836398903e-05, "loss": 2.0796, "step": 495760 }, { "epoch": 1.9165081721327952, "grad_norm": 0.19353315234184265, "learning_rate": 4.2757041141800345e-05, "loss": 2.0963, "step": 495770 }, { "epoch": 1.9165468293361785, "grad_norm": 0.16452445089817047, "learning_rate": 4.265485925450552e-05, "loss": 2.1097, "step": 495780 }, { "epoch": 1.9165854865395617, "grad_norm": 0.16897344589233398, "learning_rate": 4.2552682701268776e-05, "loss": 2.1021, "step": 495790 }, { "epoch": 1.9166241437429452, "grad_norm": 0.15898294746875763, "learning_rate": 4.2450511481255e-05, "loss": 2.0935, "step": 495800 }, { "epoch": 1.9166628009463285, "grad_norm": 0.15929338335990906, "learning_rate": 4.2348345593629325e-05, "loss": 2.0823, "step": 495810 }, { "epoch": 1.9167014581497117, "grad_norm": 0.1751093715429306, "learning_rate": 4.224618503755684e-05, "loss": 2.0861, "step": 495820 }, { "epoch": 1.916740115353095, "grad_norm": 0.16839739680290222, "learning_rate": 4.2144029812202886e-05, "loss": 2.0883, "step": 495830 }, { "epoch": 1.9167787725564782, "grad_norm": 0.17080606520175934, "learning_rate": 4.204187991673325e-05, "loss": 2.1052, "step": 495840 }, { "epoch": 1.9168174297598615, "grad_norm": 0.16607625782489777, "learning_rate": 4.193973535031348e-05, "loss": 2.0959, "step": 495850 }, { "epoch": 1.9168560869632447, "grad_norm": 0.16274487972259521, "learning_rate": 4.183759611211002e-05, "loss": 2.0859, "step": 495860 }, { "epoch": 1.916894744166628, "grad_norm": 0.16844266653060913, "learning_rate": 4.173546220128888e-05, "loss": 2.0837, "step": 495870 }, { "epoch": 1.9169334013700112, "grad_norm": 0.1613076776266098, "learning_rate": 4.163333361701649e-05, "loss": 2.0909, "step": 495880 }, { "epoch": 1.9169720585733945, "grad_norm": 0.16152922809123993, "learning_rate": 4.153121035845975e-05, "loss": 2.0916, "step": 495890 }, { "epoch": 1.9170107157767777, "grad_norm": 0.1656840741634369, "learning_rate": 4.142909242478532e-05, "loss": 2.1162, "step": 495900 }, { "epoch": 1.917049372980161, "grad_norm": 0.16846653819084167, "learning_rate": 4.132697981516076e-05, "loss": 2.0992, "step": 495910 }, { "epoch": 1.9170880301835445, "grad_norm": 0.17735540866851807, "learning_rate": 4.1224872528752954e-05, "loss": 2.0961, "step": 495920 }, { "epoch": 1.9171266873869277, "grad_norm": 0.16371974349021912, "learning_rate": 4.112277056472991e-05, "loss": 2.0876, "step": 495930 }, { "epoch": 1.917165344590311, "grad_norm": 0.16141781210899353, "learning_rate": 4.1020673922258944e-05, "loss": 2.0841, "step": 495940 }, { "epoch": 1.9172040017936942, "grad_norm": 0.16501149535179138, "learning_rate": 4.091858260050873e-05, "loss": 2.093, "step": 495950 }, { "epoch": 1.9172426589970775, "grad_norm": 0.16135872900485992, "learning_rate": 4.081649659864684e-05, "loss": 2.0859, "step": 495960 }, { "epoch": 1.917281316200461, "grad_norm": 0.17437012493610382, "learning_rate": 4.071441591584213e-05, "loss": 2.0881, "step": 495970 }, { "epoch": 1.9173199734038442, "grad_norm": 0.170584574341774, "learning_rate": 4.061234055126306e-05, "loss": 2.1035, "step": 495980 }, { "epoch": 1.9173586306072274, "grad_norm": 0.1637963205575943, "learning_rate": 4.051027050407852e-05, "loss": 2.0935, "step": 495990 }, { "epoch": 1.9173972878106107, "grad_norm": 0.16705727577209473, "learning_rate": 4.040820577345761e-05, "loss": 2.0915, "step": 496000 }, { "epoch": 1.917435945013994, "grad_norm": 0.17151454091072083, "learning_rate": 4.030614635856966e-05, "loss": 2.0944, "step": 496010 }, { "epoch": 1.9174746022173772, "grad_norm": 0.16552621126174927, "learning_rate": 4.020409225858423e-05, "loss": 2.0838, "step": 496020 }, { "epoch": 1.9175132594207605, "grad_norm": 0.17188087105751038, "learning_rate": 4.010204347267088e-05, "loss": 2.1022, "step": 496030 }, { "epoch": 1.9175519166241437, "grad_norm": 0.1699785590171814, "learning_rate": 4.000000000000004e-05, "loss": 2.1053, "step": 496040 }, { "epoch": 1.917590573827527, "grad_norm": 0.17038536071777344, "learning_rate": 3.989796183974126e-05, "loss": 2.0966, "step": 496050 }, { "epoch": 1.9176292310309102, "grad_norm": 0.16701827943325043, "learning_rate": 3.979592899106543e-05, "loss": 2.0907, "step": 496060 }, { "epoch": 1.9176678882342935, "grad_norm": 0.16330760717391968, "learning_rate": 3.9693901453143e-05, "loss": 2.1054, "step": 496070 }, { "epoch": 1.9177065454376767, "grad_norm": 0.1569678634405136, "learning_rate": 3.959187922514462e-05, "loss": 2.0949, "step": 496080 }, { "epoch": 1.9177452026410602, "grad_norm": 0.16981206834316254, "learning_rate": 3.948986230624141e-05, "loss": 2.0887, "step": 496090 }, { "epoch": 1.9177838598444434, "grad_norm": 0.17285099625587463, "learning_rate": 3.938785069560491e-05, "loss": 2.083, "step": 496100 }, { "epoch": 1.9178225170478267, "grad_norm": 0.16686466336250305, "learning_rate": 3.928584439240623e-05, "loss": 2.0834, "step": 496110 }, { "epoch": 1.91786117425121, "grad_norm": 0.16524849832057953, "learning_rate": 3.918384339581693e-05, "loss": 2.1053, "step": 496120 }, { "epoch": 1.9178998314545932, "grad_norm": 0.17060840129852295, "learning_rate": 3.908184770500945e-05, "loss": 2.0982, "step": 496130 }, { "epoch": 1.9179384886579767, "grad_norm": 0.1700611114501953, "learning_rate": 3.897985731915532e-05, "loss": 2.1048, "step": 496140 }, { "epoch": 1.91797714586136, "grad_norm": 0.1688399761915207, "learning_rate": 3.887787223742745e-05, "loss": 2.0942, "step": 496150 }, { "epoch": 1.9180158030647432, "grad_norm": 0.35818973183631897, "learning_rate": 3.877589245899804e-05, "loss": 2.1014, "step": 496160 }, { "epoch": 1.9180544602681264, "grad_norm": 0.18928390741348267, "learning_rate": 3.867391798303976e-05, "loss": 2.1023, "step": 496170 }, { "epoch": 1.9180931174715097, "grad_norm": 0.17422586679458618, "learning_rate": 3.857194880872572e-05, "loss": 2.0758, "step": 496180 }, { "epoch": 1.918131774674893, "grad_norm": 0.159042626619339, "learning_rate": 3.846998493522924e-05, "loss": 2.0892, "step": 496190 }, { "epoch": 1.9181704318782762, "grad_norm": 0.17417706549167633, "learning_rate": 3.836802636172343e-05, "loss": 2.108, "step": 496200 }, { "epoch": 1.9182090890816594, "grad_norm": 0.16420163214206696, "learning_rate": 3.826607308738228e-05, "loss": 2.0893, "step": 496210 }, { "epoch": 1.9182477462850427, "grad_norm": 0.15608297288417816, "learning_rate": 3.8164125111379125e-05, "loss": 2.0745, "step": 496220 }, { "epoch": 1.918286403488426, "grad_norm": 0.20413453876972198, "learning_rate": 3.8062182432888616e-05, "loss": 2.0949, "step": 496230 }, { "epoch": 1.9183250606918092, "grad_norm": 0.16319338977336884, "learning_rate": 3.796024505108453e-05, "loss": 2.0946, "step": 496240 }, { "epoch": 1.9183637178951924, "grad_norm": 0.16638115048408508, "learning_rate": 3.785831296514153e-05, "loss": 2.0847, "step": 496250 }, { "epoch": 1.918402375098576, "grad_norm": 0.1976384073495865, "learning_rate": 3.775638617423449e-05, "loss": 2.0923, "step": 496260 }, { "epoch": 1.9184410323019592, "grad_norm": 0.16054300963878632, "learning_rate": 3.765446467753808e-05, "loss": 2.0683, "step": 496270 }, { "epoch": 1.9184796895053424, "grad_norm": 0.1680409014225006, "learning_rate": 3.75525484742274e-05, "loss": 2.1037, "step": 496280 }, { "epoch": 1.9185183467087257, "grad_norm": 0.16273841261863708, "learning_rate": 3.745063756347778e-05, "loss": 2.0923, "step": 496290 }, { "epoch": 1.918557003912109, "grad_norm": 0.17158521711826324, "learning_rate": 3.7348731944464974e-05, "loss": 2.09, "step": 496300 }, { "epoch": 1.9185956611154924, "grad_norm": 0.18540428578853607, "learning_rate": 3.724683161636455e-05, "loss": 2.0847, "step": 496310 }, { "epoch": 1.9186343183188757, "grad_norm": 0.1612491011619568, "learning_rate": 3.714493657835249e-05, "loss": 2.0827, "step": 496320 }, { "epoch": 1.918672975522259, "grad_norm": 0.1789754182100296, "learning_rate": 3.704304682960502e-05, "loss": 2.0907, "step": 496330 }, { "epoch": 1.9187116327256422, "grad_norm": 0.15669743716716766, "learning_rate": 3.694116236929878e-05, "loss": 2.1034, "step": 496340 }, { "epoch": 1.9187502899290254, "grad_norm": 0.15760864317417145, "learning_rate": 3.6839283196610006e-05, "loss": 2.0776, "step": 496350 }, { "epoch": 1.9187889471324087, "grad_norm": 0.166031152009964, "learning_rate": 3.6737409310715784e-05, "loss": 2.0842, "step": 496360 }, { "epoch": 1.918827604335792, "grad_norm": 0.16171342134475708, "learning_rate": 3.6635540710793005e-05, "loss": 2.0912, "step": 496370 }, { "epoch": 1.9188662615391752, "grad_norm": 0.16252924501895905, "learning_rate": 3.6533677396019205e-05, "loss": 2.0846, "step": 496380 }, { "epoch": 1.9189049187425584, "grad_norm": 0.1602102369070053, "learning_rate": 3.6431819365571496e-05, "loss": 2.1002, "step": 496390 }, { "epoch": 1.9189435759459417, "grad_norm": 0.16001828014850616, "learning_rate": 3.632996661862764e-05, "loss": 2.1107, "step": 496400 }, { "epoch": 1.918982233149325, "grad_norm": 0.16714394092559814, "learning_rate": 3.622811915436586e-05, "loss": 2.0719, "step": 496410 }, { "epoch": 1.9190208903527082, "grad_norm": 0.16935458779335022, "learning_rate": 3.612627697196391e-05, "loss": 2.0813, "step": 496420 }, { "epoch": 1.9190595475560916, "grad_norm": 0.16367019712924957, "learning_rate": 3.6024440070600016e-05, "loss": 2.0756, "step": 496430 }, { "epoch": 1.919098204759475, "grad_norm": 0.19588987529277802, "learning_rate": 3.592260844945305e-05, "loss": 2.0907, "step": 496440 }, { "epoch": 1.9191368619628582, "grad_norm": 0.15941786766052246, "learning_rate": 3.58207821077019e-05, "loss": 2.0899, "step": 496450 }, { "epoch": 1.9191755191662414, "grad_norm": 0.16553384065628052, "learning_rate": 3.5718961044524986e-05, "loss": 2.0874, "step": 496460 }, { "epoch": 1.9192141763696249, "grad_norm": 0.17352290451526642, "learning_rate": 3.561714525910209e-05, "loss": 2.0852, "step": 496470 }, { "epoch": 1.9192528335730081, "grad_norm": 0.15746286511421204, "learning_rate": 3.5515334750612085e-05, "loss": 2.1012, "step": 496480 }, { "epoch": 1.9192914907763914, "grad_norm": 0.1638580858707428, "learning_rate": 3.5413529518234735e-05, "loss": 2.0964, "step": 496490 }, { "epoch": 1.9193301479797746, "grad_norm": 0.16750258207321167, "learning_rate": 3.531172956115003e-05, "loss": 2.0837, "step": 496500 }, { "epoch": 1.9193688051831579, "grad_norm": 0.1676705926656723, "learning_rate": 3.5209934878537744e-05, "loss": 2.1003, "step": 496510 }, { "epoch": 1.9194074623865411, "grad_norm": 0.16694529354572296, "learning_rate": 3.51081454695783e-05, "loss": 2.0749, "step": 496520 }, { "epoch": 1.9194461195899244, "grad_norm": 0.17172051966190338, "learning_rate": 3.500636133345192e-05, "loss": 2.1072, "step": 496530 }, { "epoch": 1.9194847767933076, "grad_norm": 0.16720014810562134, "learning_rate": 3.49045824693397e-05, "loss": 2.1067, "step": 496540 }, { "epoch": 1.919523433996691, "grad_norm": 0.15233376622200012, "learning_rate": 3.480280887642206e-05, "loss": 2.0892, "step": 496550 }, { "epoch": 1.9195620912000741, "grad_norm": 0.16428092122077942, "learning_rate": 3.470104055388057e-05, "loss": 2.0915, "step": 496560 }, { "epoch": 1.9196007484034574, "grad_norm": 0.15058358013629913, "learning_rate": 3.459927750089609e-05, "loss": 2.0904, "step": 496570 }, { "epoch": 1.9196394056068407, "grad_norm": 0.16439568996429443, "learning_rate": 3.449751971665016e-05, "loss": 2.1122, "step": 496580 }, { "epoch": 1.919678062810224, "grad_norm": 0.16111652553081512, "learning_rate": 3.439576720032478e-05, "loss": 2.1009, "step": 496590 }, { "epoch": 1.9197167200136074, "grad_norm": 0.16498912870883942, "learning_rate": 3.4294019951101705e-05, "loss": 2.0852, "step": 496600 }, { "epoch": 1.9197553772169906, "grad_norm": 0.1640535145998001, "learning_rate": 3.4192277968163156e-05, "loss": 2.0939, "step": 496610 }, { "epoch": 1.9197940344203739, "grad_norm": 0.16125431656837463, "learning_rate": 3.4090541250691334e-05, "loss": 2.0965, "step": 496620 }, { "epoch": 1.9198326916237571, "grad_norm": 0.17195944488048553, "learning_rate": 3.398880979786889e-05, "loss": 2.0957, "step": 496630 }, { "epoch": 1.9198713488271406, "grad_norm": 0.16381843388080597, "learning_rate": 3.388708360887871e-05, "loss": 2.1053, "step": 496640 }, { "epoch": 1.9199100060305239, "grad_norm": 0.16488052904605865, "learning_rate": 3.378536268290389e-05, "loss": 2.0911, "step": 496650 }, { "epoch": 1.9199486632339071, "grad_norm": 0.15774224698543549, "learning_rate": 3.36836470191273e-05, "loss": 2.0915, "step": 496660 }, { "epoch": 1.9199873204372904, "grad_norm": 0.15254941582679749, "learning_rate": 3.358193661673248e-05, "loss": 2.0879, "step": 496670 }, { "epoch": 1.9200259776406736, "grad_norm": 0.1649966835975647, "learning_rate": 3.34802314749032e-05, "loss": 2.0844, "step": 496680 }, { "epoch": 1.9200646348440569, "grad_norm": 0.1633954644203186, "learning_rate": 3.3378531592823225e-05, "loss": 2.094, "step": 496690 }, { "epoch": 1.9201032920474401, "grad_norm": 0.1792258471250534, "learning_rate": 3.327683696967654e-05, "loss": 2.0967, "step": 496700 }, { "epoch": 1.9201419492508234, "grad_norm": 0.17409303784370422, "learning_rate": 3.317514760464757e-05, "loss": 2.0952, "step": 496710 }, { "epoch": 1.9201806064542066, "grad_norm": 0.16607849299907684, "learning_rate": 3.307346349692053e-05, "loss": 2.1021, "step": 496720 }, { "epoch": 1.9202192636575899, "grad_norm": 0.16597460210323334, "learning_rate": 3.297178464568029e-05, "loss": 2.1005, "step": 496730 }, { "epoch": 1.9202579208609731, "grad_norm": 0.1616782397031784, "learning_rate": 3.287011105011173e-05, "loss": 2.0854, "step": 496740 }, { "epoch": 1.9202965780643564, "grad_norm": 0.16032592952251434, "learning_rate": 3.276844270939971e-05, "loss": 2.1086, "step": 496750 }, { "epoch": 1.9203352352677396, "grad_norm": 0.16161899268627167, "learning_rate": 3.2666779622730017e-05, "loss": 2.1006, "step": 496760 }, { "epoch": 1.9203738924711231, "grad_norm": 0.1673613041639328, "learning_rate": 3.2565121789287725e-05, "loss": 2.0802, "step": 496770 }, { "epoch": 1.9204125496745064, "grad_norm": 0.16218461096286774, "learning_rate": 3.2463469208258826e-05, "loss": 2.0987, "step": 496780 }, { "epoch": 1.9204512068778896, "grad_norm": 0.15713539719581604, "learning_rate": 3.23618218788293e-05, "loss": 2.091, "step": 496790 }, { "epoch": 1.9204898640812729, "grad_norm": 0.16224777698516846, "learning_rate": 3.226017980018514e-05, "loss": 2.0786, "step": 496800 }, { "epoch": 1.9205285212846563, "grad_norm": 0.1700608879327774, "learning_rate": 3.215854297151277e-05, "loss": 2.0889, "step": 496810 }, { "epoch": 1.9205671784880396, "grad_norm": 0.16657862067222595, "learning_rate": 3.205691139199862e-05, "loss": 2.0996, "step": 496820 }, { "epoch": 1.9206058356914228, "grad_norm": 0.16817116737365723, "learning_rate": 3.1955285060829564e-05, "loss": 2.1092, "step": 496830 }, { "epoch": 1.920644492894806, "grad_norm": 0.15900260210037231, "learning_rate": 3.185366397719292e-05, "loss": 2.0807, "step": 496840 }, { "epoch": 1.9206831500981894, "grad_norm": 0.16846583783626556, "learning_rate": 3.175204814027555e-05, "loss": 2.0803, "step": 496850 }, { "epoch": 1.9207218073015726, "grad_norm": 0.16676563024520874, "learning_rate": 3.1650437549265e-05, "loss": 2.0818, "step": 496860 }, { "epoch": 1.9207604645049559, "grad_norm": 0.17843875288963318, "learning_rate": 3.154883220334881e-05, "loss": 2.0833, "step": 496870 }, { "epoch": 1.920799121708339, "grad_norm": 0.1726779043674469, "learning_rate": 3.144723210171474e-05, "loss": 2.0926, "step": 496880 }, { "epoch": 1.9208377789117224, "grad_norm": 0.1588815450668335, "learning_rate": 3.1345637243550997e-05, "loss": 2.0811, "step": 496890 }, { "epoch": 1.9208764361151056, "grad_norm": 0.16358856856822968, "learning_rate": 3.1244047628046e-05, "loss": 2.0864, "step": 496900 }, { "epoch": 1.9209150933184889, "grad_norm": 0.16491766273975372, "learning_rate": 3.1142463254387746e-05, "loss": 2.0988, "step": 496910 }, { "epoch": 1.9209537505218721, "grad_norm": 0.23247385025024414, "learning_rate": 3.104088412176531e-05, "loss": 2.0923, "step": 496920 }, { "epoch": 1.9209924077252554, "grad_norm": 0.1721058338880539, "learning_rate": 3.0939310229367135e-05, "loss": 2.0808, "step": 496930 }, { "epoch": 1.9210310649286388, "grad_norm": 0.15375511348247528, "learning_rate": 3.083774157638297e-05, "loss": 2.0904, "step": 496940 }, { "epoch": 1.921069722132022, "grad_norm": 0.15841242671012878, "learning_rate": 3.0736178162001474e-05, "loss": 2.0963, "step": 496950 }, { "epoch": 1.9211083793354053, "grad_norm": 0.1550142467021942, "learning_rate": 3.0634619985412617e-05, "loss": 2.0928, "step": 496960 }, { "epoch": 1.9211470365387886, "grad_norm": 0.1543474793434143, "learning_rate": 3.053306704580594e-05, "loss": 2.1007, "step": 496970 }, { "epoch": 1.921185693742172, "grad_norm": 0.1605292558670044, "learning_rate": 3.0431519342371205e-05, "loss": 2.0966, "step": 496980 }, { "epoch": 1.9212243509455553, "grad_norm": 0.16049958765506744, "learning_rate": 3.0329976874298837e-05, "loss": 2.0857, "step": 496990 }, { "epoch": 1.9212630081489386, "grad_norm": 0.15806148946285248, "learning_rate": 3.022843964077904e-05, "loss": 2.088, "step": 497000 }, { "epoch": 1.9213016653523218, "grad_norm": 0.15949906408786774, "learning_rate": 3.0126907641002456e-05, "loss": 2.0815, "step": 497010 }, { "epoch": 1.921340322555705, "grad_norm": 0.16571669280529022, "learning_rate": 3.0025380874159737e-05, "loss": 2.0862, "step": 497020 }, { "epoch": 1.9213789797590883, "grad_norm": 0.16648319363594055, "learning_rate": 2.992385933944153e-05, "loss": 2.0836, "step": 497030 }, { "epoch": 1.9214176369624716, "grad_norm": 0.16171889007091522, "learning_rate": 2.982234303603981e-05, "loss": 2.0988, "step": 497040 }, { "epoch": 1.9214562941658548, "grad_norm": 0.16150999069213867, "learning_rate": 2.9720831963145457e-05, "loss": 2.0727, "step": 497050 }, { "epoch": 1.921494951369238, "grad_norm": 0.16288860142230988, "learning_rate": 2.9619326119949996e-05, "loss": 2.0962, "step": 497060 }, { "epoch": 1.9215336085726213, "grad_norm": 0.16144005954265594, "learning_rate": 2.9517825505645414e-05, "loss": 2.0893, "step": 497070 }, { "epoch": 1.9215722657760046, "grad_norm": 0.15442635118961334, "learning_rate": 2.9416330119423684e-05, "loss": 2.0967, "step": 497080 }, { "epoch": 1.9216109229793878, "grad_norm": 0.15948958694934845, "learning_rate": 2.9314839960477014e-05, "loss": 2.0991, "step": 497090 }, { "epoch": 1.921649580182771, "grad_norm": 0.17574442923069, "learning_rate": 2.9213355027998045e-05, "loss": 2.0934, "step": 497100 }, { "epoch": 1.9216882373861546, "grad_norm": 0.15875700116157532, "learning_rate": 2.9111875321178983e-05, "loss": 2.0884, "step": 497110 }, { "epoch": 1.9217268945895378, "grad_norm": 0.16926713287830353, "learning_rate": 2.9010400839212915e-05, "loss": 2.0772, "step": 497120 }, { "epoch": 1.921765551792921, "grad_norm": 0.16093897819519043, "learning_rate": 2.890893158129293e-05, "loss": 2.0844, "step": 497130 }, { "epoch": 1.9218042089963043, "grad_norm": 0.15793362259864807, "learning_rate": 2.880746754661234e-05, "loss": 2.0753, "step": 497140 }, { "epoch": 1.9218428661996878, "grad_norm": 0.16137558221817017, "learning_rate": 2.870600873436424e-05, "loss": 2.081, "step": 497150 }, { "epoch": 1.921881523403071, "grad_norm": 0.16008661687374115, "learning_rate": 2.8604555143742827e-05, "loss": 2.0733, "step": 497160 }, { "epoch": 1.9219201806064543, "grad_norm": 0.17916366457939148, "learning_rate": 2.8503106773941635e-05, "loss": 2.0863, "step": 497170 }, { "epoch": 1.9219588378098376, "grad_norm": 0.16702742874622345, "learning_rate": 2.840166362415486e-05, "loss": 2.0998, "step": 497180 }, { "epoch": 1.9219974950132208, "grad_norm": 0.1600625365972519, "learning_rate": 2.8300225693576932e-05, "loss": 2.0912, "step": 497190 }, { "epoch": 1.922036152216604, "grad_norm": 0.1780082732439041, "learning_rate": 2.8198792981402042e-05, "loss": 2.0917, "step": 497200 }, { "epoch": 1.9220748094199873, "grad_norm": 0.16587743163108826, "learning_rate": 2.8097365486825065e-05, "loss": 2.0816, "step": 497210 }, { "epoch": 1.9221134666233706, "grad_norm": 0.16313040256500244, "learning_rate": 2.7995943209041086e-05, "loss": 2.0881, "step": 497220 }, { "epoch": 1.9221521238267538, "grad_norm": 0.16506393253803253, "learning_rate": 2.7894526147244748e-05, "loss": 2.0968, "step": 497230 }, { "epoch": 1.922190781030137, "grad_norm": 0.1675456464290619, "learning_rate": 2.779311430063203e-05, "loss": 2.0927, "step": 497240 }, { "epoch": 1.9222294382335203, "grad_norm": 0.1661851704120636, "learning_rate": 2.7691707668398015e-05, "loss": 2.0833, "step": 497250 }, { "epoch": 1.9222680954369036, "grad_norm": 0.1600489616394043, "learning_rate": 2.759030624973846e-05, "loss": 2.0867, "step": 497260 }, { "epoch": 1.9223067526402868, "grad_norm": 0.15950718522071838, "learning_rate": 2.7488910043849792e-05, "loss": 2.0789, "step": 497270 }, { "epoch": 1.9223454098436703, "grad_norm": 0.4952988624572754, "learning_rate": 2.7387519049927534e-05, "loss": 2.0819, "step": 497280 }, { "epoch": 1.9223840670470536, "grad_norm": 0.1711588203907013, "learning_rate": 2.7286133267168333e-05, "loss": 2.0835, "step": 497290 }, { "epoch": 1.9224227242504368, "grad_norm": 0.1545725017786026, "learning_rate": 2.7184752694769054e-05, "loss": 2.0985, "step": 497300 }, { "epoch": 1.92246138145382, "grad_norm": 0.16545742750167847, "learning_rate": 2.7083377331925896e-05, "loss": 2.0882, "step": 497310 }, { "epoch": 1.9225000386572035, "grad_norm": 0.17229430377483368, "learning_rate": 2.6982007177836166e-05, "loss": 2.0788, "step": 497320 }, { "epoch": 1.9225386958605868, "grad_norm": 0.16649629175662994, "learning_rate": 2.6880642231697172e-05, "loss": 2.0769, "step": 497330 }, { "epoch": 1.92257735306397, "grad_norm": 0.15658745169639587, "learning_rate": 2.6779282492706005e-05, "loss": 2.0893, "step": 497340 }, { "epoch": 1.9226160102673533, "grad_norm": 0.1738753765821457, "learning_rate": 2.6677927960060412e-05, "loss": 2.0972, "step": 497350 }, { "epoch": 1.9226546674707365, "grad_norm": 0.15449616312980652, "learning_rate": 2.657657863295837e-05, "loss": 2.0907, "step": 497360 }, { "epoch": 1.9226933246741198, "grad_norm": 0.16673849523067474, "learning_rate": 2.6475234510597857e-05, "loss": 2.0871, "step": 497370 }, { "epoch": 1.922731981877503, "grad_norm": 0.15777698159217834, "learning_rate": 2.6373895592176845e-05, "loss": 2.0953, "step": 497380 }, { "epoch": 1.9227706390808863, "grad_norm": 0.5646606087684631, "learning_rate": 2.6272561876893752e-05, "loss": 2.089, "step": 497390 }, { "epoch": 1.9228092962842696, "grad_norm": 0.17194247245788574, "learning_rate": 2.6171233363947667e-05, "loss": 2.0926, "step": 497400 }, { "epoch": 1.9228479534876528, "grad_norm": 0.16004107892513275, "learning_rate": 2.606991005253678e-05, "loss": 2.0919, "step": 497410 }, { "epoch": 1.922886610691036, "grad_norm": 0.16976672410964966, "learning_rate": 2.5968591941860853e-05, "loss": 2.0906, "step": 497420 }, { "epoch": 1.9229252678944193, "grad_norm": 0.165140762925148, "learning_rate": 2.5867279031118294e-05, "loss": 2.0898, "step": 497430 }, { "epoch": 1.9229639250978026, "grad_norm": 0.16237983107566833, "learning_rate": 2.5765971319509306e-05, "loss": 2.0822, "step": 497440 }, { "epoch": 1.923002582301186, "grad_norm": 0.15686587989330292, "learning_rate": 2.5664668806233414e-05, "loss": 2.087, "step": 497450 }, { "epoch": 1.9230412395045693, "grad_norm": 0.16701488196849823, "learning_rate": 2.556337149049015e-05, "loss": 2.0851, "step": 497460 }, { "epoch": 1.9230798967079525, "grad_norm": 0.16209928691387177, "learning_rate": 2.546207937147993e-05, "loss": 2.0915, "step": 497470 }, { "epoch": 1.9231185539113358, "grad_norm": 0.17187343537807465, "learning_rate": 2.5360792448402726e-05, "loss": 2.0916, "step": 497480 }, { "epoch": 1.9231572111147193, "grad_norm": 0.15735509991645813, "learning_rate": 2.525951072045918e-05, "loss": 2.0987, "step": 497490 }, { "epoch": 1.9231958683181025, "grad_norm": 0.16227933764457703, "learning_rate": 2.5158234186850148e-05, "loss": 2.0824, "step": 497500 }, { "epoch": 1.9232345255214858, "grad_norm": 0.168210968375206, "learning_rate": 2.5056962846776276e-05, "loss": 2.0978, "step": 497510 }, { "epoch": 1.923273182724869, "grad_norm": 0.15861283242702484, "learning_rate": 2.4955696699438647e-05, "loss": 2.1101, "step": 497520 }, { "epoch": 1.9233118399282523, "grad_norm": 0.1637565791606903, "learning_rate": 2.4854435744038785e-05, "loss": 2.0775, "step": 497530 }, { "epoch": 1.9233504971316355, "grad_norm": 0.15976262092590332, "learning_rate": 2.4753179979777774e-05, "loss": 2.0916, "step": 497540 }, { "epoch": 1.9233891543350188, "grad_norm": 0.16702741384506226, "learning_rate": 2.4651929405857808e-05, "loss": 2.0942, "step": 497550 }, { "epoch": 1.923427811538402, "grad_norm": 0.16337959468364716, "learning_rate": 2.4550684021480642e-05, "loss": 2.0812, "step": 497560 }, { "epoch": 1.9234664687417853, "grad_norm": 0.15780958533287048, "learning_rate": 2.4449443825848238e-05, "loss": 2.0807, "step": 497570 }, { "epoch": 1.9235051259451685, "grad_norm": 0.16224455833435059, "learning_rate": 2.4348208818163242e-05, "loss": 2.077, "step": 497580 }, { "epoch": 1.9235437831485518, "grad_norm": 0.1588510423898697, "learning_rate": 2.4246978997627843e-05, "loss": 2.0938, "step": 497590 }, { "epoch": 1.923582440351935, "grad_norm": 0.19597147405147552, "learning_rate": 2.4145754363444906e-05, "loss": 2.0695, "step": 497600 }, { "epoch": 1.9236210975553183, "grad_norm": 0.15698018670082092, "learning_rate": 2.4044534914817507e-05, "loss": 2.0844, "step": 497610 }, { "epoch": 1.9236597547587018, "grad_norm": 0.16293266415596008, "learning_rate": 2.394332065094851e-05, "loss": 2.0815, "step": 497620 }, { "epoch": 1.923698411962085, "grad_norm": 0.1649063676595688, "learning_rate": 2.3842111571041658e-05, "loss": 2.0947, "step": 497630 }, { "epoch": 1.9237370691654683, "grad_norm": 0.16432203352451324, "learning_rate": 2.3740907674299816e-05, "loss": 2.1011, "step": 497640 }, { "epoch": 1.9237757263688515, "grad_norm": 0.16237296164035797, "learning_rate": 2.3639708959927398e-05, "loss": 2.0957, "step": 497650 }, { "epoch": 1.923814383572235, "grad_norm": 0.17158690094947815, "learning_rate": 2.353851542712837e-05, "loss": 2.0853, "step": 497660 }, { "epoch": 1.9238530407756183, "grad_norm": 0.16722534596920013, "learning_rate": 2.3437327075106263e-05, "loss": 2.0818, "step": 497670 }, { "epoch": 1.9238916979790015, "grad_norm": 0.16253255307674408, "learning_rate": 2.3336143903066155e-05, "loss": 2.0944, "step": 497680 }, { "epoch": 1.9239303551823848, "grad_norm": 0.16351984441280365, "learning_rate": 2.3234965910212235e-05, "loss": 2.0991, "step": 497690 }, { "epoch": 1.923969012385768, "grad_norm": 0.18320319056510925, "learning_rate": 2.3133793095749146e-05, "loss": 2.0812, "step": 497700 }, { "epoch": 1.9240076695891513, "grad_norm": 0.16540493071079254, "learning_rate": 2.3032625458882405e-05, "loss": 2.0781, "step": 497710 }, { "epoch": 1.9240463267925345, "grad_norm": 0.16712576150894165, "learning_rate": 2.2931462998816655e-05, "loss": 2.0957, "step": 497720 }, { "epoch": 1.9240849839959178, "grad_norm": 0.1669510304927826, "learning_rate": 2.2830305714757417e-05, "loss": 2.0826, "step": 497730 }, { "epoch": 1.924123641199301, "grad_norm": 0.15979358553886414, "learning_rate": 2.2729153605910213e-05, "loss": 2.0826, "step": 497740 }, { "epoch": 1.9241622984026843, "grad_norm": 0.15580396354198456, "learning_rate": 2.2628006671481016e-05, "loss": 2.0935, "step": 497750 }, { "epoch": 1.9242009556060675, "grad_norm": 0.1805225908756256, "learning_rate": 2.2526864910676016e-05, "loss": 2.0809, "step": 497760 }, { "epoch": 1.9242396128094508, "grad_norm": 0.1583130806684494, "learning_rate": 2.2425728322700957e-05, "loss": 2.0912, "step": 497770 }, { "epoch": 1.9242782700128342, "grad_norm": 0.16896626353263855, "learning_rate": 2.232459690676225e-05, "loss": 2.087, "step": 497780 }, { "epoch": 1.9243169272162175, "grad_norm": 0.16624216735363007, "learning_rate": 2.222347066206698e-05, "loss": 2.0922, "step": 497790 }, { "epoch": 1.9243555844196008, "grad_norm": 0.17013777792453766, "learning_rate": 2.2122349587821554e-05, "loss": 2.0903, "step": 497800 }, { "epoch": 1.924394241622984, "grad_norm": 0.15819691121578217, "learning_rate": 2.2021233683232834e-05, "loss": 2.0977, "step": 497810 }, { "epoch": 1.9244328988263673, "grad_norm": 0.16058465838432312, "learning_rate": 2.192012294750856e-05, "loss": 2.0806, "step": 497820 }, { "epoch": 1.9244715560297507, "grad_norm": 0.1678999662399292, "learning_rate": 2.1819017379855587e-05, "loss": 2.1012, "step": 497830 }, { "epoch": 1.924510213233134, "grad_norm": 0.1562078595161438, "learning_rate": 2.1717916979481888e-05, "loss": 2.0852, "step": 497840 }, { "epoch": 1.9245488704365172, "grad_norm": 0.16949644684791565, "learning_rate": 2.1616821745595206e-05, "loss": 2.102, "step": 497850 }, { "epoch": 1.9245875276399005, "grad_norm": 0.15762582421302795, "learning_rate": 2.1515731677403506e-05, "loss": 2.0957, "step": 497860 }, { "epoch": 1.9246261848432837, "grad_norm": 0.1545087844133377, "learning_rate": 2.1414646774114977e-05, "loss": 2.0911, "step": 497870 }, { "epoch": 1.924664842046667, "grad_norm": 0.1587749570608139, "learning_rate": 2.1313567034938254e-05, "loss": 2.0966, "step": 497880 }, { "epoch": 1.9247034992500502, "grad_norm": 0.16813033819198608, "learning_rate": 2.1212492459081746e-05, "loss": 2.0915, "step": 497890 }, { "epoch": 1.9247421564534335, "grad_norm": 0.1571056842803955, "learning_rate": 2.1111423045754306e-05, "loss": 2.0834, "step": 497900 }, { "epoch": 1.9247808136568167, "grad_norm": 0.15674418210983276, "learning_rate": 2.101035879416502e-05, "loss": 2.0793, "step": 497910 }, { "epoch": 1.9248194708602, "grad_norm": 0.20706580579280853, "learning_rate": 2.0909299703522954e-05, "loss": 2.0638, "step": 497920 }, { "epoch": 1.9248581280635833, "grad_norm": 0.15839233994483948, "learning_rate": 2.0808245773037858e-05, "loss": 2.0728, "step": 497930 }, { "epoch": 1.9248967852669665, "grad_norm": 0.1598314344882965, "learning_rate": 2.070719700191903e-05, "loss": 2.1033, "step": 497940 }, { "epoch": 1.92493544247035, "grad_norm": 0.15691877901554108, "learning_rate": 2.060615338937666e-05, "loss": 2.0766, "step": 497950 }, { "epoch": 1.9249740996737332, "grad_norm": 0.16243983805179596, "learning_rate": 2.0505114934620484e-05, "loss": 2.0885, "step": 497960 }, { "epoch": 1.9250127568771165, "grad_norm": 0.16956232488155365, "learning_rate": 2.0404081636860917e-05, "loss": 2.0959, "step": 497970 }, { "epoch": 1.9250514140804997, "grad_norm": 0.16090889275074005, "learning_rate": 2.0303053495308364e-05, "loss": 2.0777, "step": 497980 }, { "epoch": 1.925090071283883, "grad_norm": 0.1664169579744339, "learning_rate": 2.0202030509173463e-05, "loss": 2.0855, "step": 497990 }, { "epoch": 1.9251287284872665, "grad_norm": 0.15896333754062653, "learning_rate": 2.010101267766684e-05, "loss": 2.0926, "step": 498000 }, { "epoch": 1.9251673856906497, "grad_norm": 0.17546389997005463, "learning_rate": 2.000000000000002e-05, "loss": 2.0943, "step": 498010 }, { "epoch": 1.925206042894033, "grad_norm": 0.15868791937828064, "learning_rate": 1.9898992475383847e-05, "loss": 2.0868, "step": 498020 }, { "epoch": 1.9252447000974162, "grad_norm": 0.15706300735473633, "learning_rate": 1.979799010303007e-05, "loss": 2.0817, "step": 498030 }, { "epoch": 1.9252833573007995, "grad_norm": 0.1700574904680252, "learning_rate": 1.9696992882149767e-05, "loss": 2.0733, "step": 498040 }, { "epoch": 1.9253220145041827, "grad_norm": 0.1693497747182846, "learning_rate": 1.959600081195556e-05, "loss": 2.0879, "step": 498050 }, { "epoch": 1.925360671707566, "grad_norm": 0.15419720113277435, "learning_rate": 1.9495013891658974e-05, "loss": 2.0951, "step": 498060 }, { "epoch": 1.9253993289109492, "grad_norm": 0.15349604189395905, "learning_rate": 1.9394032120472417e-05, "loss": 2.0951, "step": 498070 }, { "epoch": 1.9254379861143325, "grad_norm": 0.6273776888847351, "learning_rate": 1.9293055497608513e-05, "loss": 2.0877, "step": 498080 }, { "epoch": 1.9254766433177157, "grad_norm": 0.17664119601249695, "learning_rate": 1.9192084022279675e-05, "loss": 2.0932, "step": 498090 }, { "epoch": 1.925515300521099, "grad_norm": 0.16162990033626556, "learning_rate": 1.9091117693698758e-05, "loss": 2.0929, "step": 498100 }, { "epoch": 1.9255539577244822, "grad_norm": 0.1592620462179184, "learning_rate": 1.8990156511078826e-05, "loss": 2.0871, "step": 498110 }, { "epoch": 1.9255926149278657, "grad_norm": 0.16288858652114868, "learning_rate": 1.8889200473633406e-05, "loss": 2.0784, "step": 498120 }, { "epoch": 1.925631272131249, "grad_norm": 0.16478540003299713, "learning_rate": 1.8788249580575567e-05, "loss": 2.0965, "step": 498130 }, { "epoch": 1.9256699293346322, "grad_norm": 0.15600290894508362, "learning_rate": 1.8687303831119053e-05, "loss": 2.087, "step": 498140 }, { "epoch": 1.9257085865380155, "grad_norm": 0.15345881879329681, "learning_rate": 1.8586363224478043e-05, "loss": 2.0777, "step": 498150 }, { "epoch": 1.9257472437413987, "grad_norm": 0.16258493065834045, "learning_rate": 1.848542775986628e-05, "loss": 2.0898, "step": 498160 }, { "epoch": 1.9257859009447822, "grad_norm": 0.16715885698795319, "learning_rate": 1.838449743649817e-05, "loss": 2.0847, "step": 498170 }, { "epoch": 1.9258245581481654, "grad_norm": 0.15741226077079773, "learning_rate": 1.828357225358812e-05, "loss": 2.079, "step": 498180 }, { "epoch": 1.9258632153515487, "grad_norm": 0.15472184121608734, "learning_rate": 1.818265221035076e-05, "loss": 2.0921, "step": 498190 }, { "epoch": 1.925901872554932, "grad_norm": 0.1586213856935501, "learning_rate": 1.8081737306000713e-05, "loss": 2.0709, "step": 498200 }, { "epoch": 1.9259405297583152, "grad_norm": 0.16578112542629242, "learning_rate": 1.798082753975372e-05, "loss": 2.0695, "step": 498210 }, { "epoch": 1.9259791869616985, "grad_norm": 0.16239455342292786, "learning_rate": 1.7879922910824186e-05, "loss": 2.1012, "step": 498220 }, { "epoch": 1.9260178441650817, "grad_norm": 0.1665370762348175, "learning_rate": 1.7779023418428296e-05, "loss": 2.0751, "step": 498230 }, { "epoch": 1.926056501368465, "grad_norm": 0.1582210808992386, "learning_rate": 1.767812906178112e-05, "loss": 2.0871, "step": 498240 }, { "epoch": 1.9260951585718482, "grad_norm": 0.15214033424854279, "learning_rate": 1.757723984009907e-05, "loss": 2.0826, "step": 498250 }, { "epoch": 1.9261338157752315, "grad_norm": 0.156508669257164, "learning_rate": 1.7476355752597872e-05, "loss": 2.0822, "step": 498260 }, { "epoch": 1.9261724729786147, "grad_norm": 0.16116832196712494, "learning_rate": 1.737547679849372e-05, "loss": 2.092, "step": 498270 }, { "epoch": 1.926211130181998, "grad_norm": 0.1606052964925766, "learning_rate": 1.727460297700323e-05, "loss": 2.0812, "step": 498280 }, { "epoch": 1.9262497873853814, "grad_norm": 0.15373031795024872, "learning_rate": 1.7173734287343033e-05, "loss": 2.0877, "step": 498290 }, { "epoch": 1.9262884445887647, "grad_norm": 0.1582830548286438, "learning_rate": 1.7072870728729985e-05, "loss": 2.0819, "step": 498300 }, { "epoch": 1.926327101792148, "grad_norm": 0.15733368694782257, "learning_rate": 1.6972012300381145e-05, "loss": 2.0806, "step": 498310 }, { "epoch": 1.9263657589955312, "grad_norm": 0.15600501000881195, "learning_rate": 1.6871159001513594e-05, "loss": 2.0883, "step": 498320 }, { "epoch": 1.9264044161989147, "grad_norm": 0.15862053632736206, "learning_rate": 1.677031083134506e-05, "loss": 2.1008, "step": 498330 }, { "epoch": 1.926443073402298, "grad_norm": 0.16201405227184296, "learning_rate": 1.666946778909284e-05, "loss": 2.0811, "step": 498340 }, { "epoch": 1.9264817306056812, "grad_norm": 0.15795743465423584, "learning_rate": 1.6568629873975116e-05, "loss": 2.0822, "step": 498350 }, { "epoch": 1.9265203878090644, "grad_norm": 0.16118592023849487, "learning_rate": 1.6467797085209844e-05, "loss": 2.0861, "step": 498360 }, { "epoch": 1.9265590450124477, "grad_norm": 0.15352848172187805, "learning_rate": 1.636696942201521e-05, "loss": 2.094, "step": 498370 }, { "epoch": 1.926597702215831, "grad_norm": 0.15276506543159485, "learning_rate": 1.6266146883609835e-05, "loss": 2.0752, "step": 498380 }, { "epoch": 1.9266363594192142, "grad_norm": 0.16707296669483185, "learning_rate": 1.616532946921212e-05, "loss": 2.0875, "step": 498390 }, { "epoch": 1.9266750166225974, "grad_norm": 0.1634710729122162, "learning_rate": 1.6064517178040916e-05, "loss": 2.0779, "step": 498400 }, { "epoch": 1.9267136738259807, "grad_norm": 0.16529691219329834, "learning_rate": 1.5963710009315514e-05, "loss": 2.0951, "step": 498410 }, { "epoch": 1.926752331029364, "grad_norm": 0.21218083798885345, "learning_rate": 1.5862907962254757e-05, "loss": 2.0855, "step": 498420 }, { "epoch": 1.9267909882327472, "grad_norm": 0.16146108508110046, "learning_rate": 1.5762111036078385e-05, "loss": 2.0816, "step": 498430 }, { "epoch": 1.9268296454361304, "grad_norm": 0.16276432573795319, "learning_rate": 1.566131923000591e-05, "loss": 2.0884, "step": 498440 }, { "epoch": 1.9268683026395137, "grad_norm": 0.15712717175483704, "learning_rate": 1.5560532543257512e-05, "loss": 2.1012, "step": 498450 }, { "epoch": 1.9269069598428972, "grad_norm": 0.1626255065202713, "learning_rate": 1.54597509750527e-05, "loss": 2.0731, "step": 498460 }, { "epoch": 1.9269456170462804, "grad_norm": 0.1566796898841858, "learning_rate": 1.535897452461188e-05, "loss": 2.0855, "step": 498470 }, { "epoch": 1.9269842742496637, "grad_norm": 1.1111677885055542, "learning_rate": 1.5258203191155672e-05, "loss": 2.0843, "step": 498480 }, { "epoch": 1.927022931453047, "grad_norm": 0.16669593751430511, "learning_rate": 1.5157436973904481e-05, "loss": 2.0786, "step": 498490 }, { "epoch": 1.9270615886564304, "grad_norm": 0.15540610253810883, "learning_rate": 1.505667587207915e-05, "loss": 2.1004, "step": 498500 }, { "epoch": 1.9271002458598137, "grad_norm": 0.16334572434425354, "learning_rate": 1.495591988490097e-05, "loss": 2.0952, "step": 498510 }, { "epoch": 1.927138903063197, "grad_norm": 0.1581311970949173, "learning_rate": 1.4855169011590785e-05, "loss": 2.0911, "step": 498520 }, { "epoch": 1.9271775602665802, "grad_norm": 0.1556026041507721, "learning_rate": 1.4754423251370109e-05, "loss": 2.097, "step": 498530 }, { "epoch": 1.9272162174699634, "grad_norm": 0.34120458364486694, "learning_rate": 1.4653682603460672e-05, "loss": 2.085, "step": 498540 }, { "epoch": 1.9272548746733467, "grad_norm": 0.15523135662078857, "learning_rate": 1.4552947067084211e-05, "loss": 2.0765, "step": 498550 }, { "epoch": 1.92729353187673, "grad_norm": 0.15660513937473297, "learning_rate": 1.44522166414629e-05, "loss": 2.0961, "step": 498560 }, { "epoch": 1.9273321890801132, "grad_norm": 0.15083718299865723, "learning_rate": 1.43514913258187e-05, "loss": 2.0884, "step": 498570 }, { "epoch": 1.9273708462834964, "grad_norm": 0.16068926453590393, "learning_rate": 1.4250771119374228e-05, "loss": 2.0815, "step": 498580 }, { "epoch": 1.9274095034868797, "grad_norm": 0.1572009027004242, "learning_rate": 1.4150056021351886e-05, "loss": 2.0845, "step": 498590 }, { "epoch": 1.927448160690263, "grad_norm": 0.15406206250190735, "learning_rate": 1.404934603097452e-05, "loss": 2.0754, "step": 498600 }, { "epoch": 1.9274868178936462, "grad_norm": 0.15755899250507355, "learning_rate": 1.3948641147465191e-05, "loss": 2.0842, "step": 498610 }, { "epoch": 1.9275254750970294, "grad_norm": 0.15822505950927734, "learning_rate": 1.384794137004719e-05, "loss": 2.0812, "step": 498620 }, { "epoch": 1.927564132300413, "grad_norm": 0.15680760145187378, "learning_rate": 1.3747246697943583e-05, "loss": 2.0832, "step": 498630 }, { "epoch": 1.9276027895037962, "grad_norm": 0.16229304671287537, "learning_rate": 1.3646557130378101e-05, "loss": 2.0784, "step": 498640 }, { "epoch": 1.9276414467071794, "grad_norm": 0.15850351750850677, "learning_rate": 1.3545872666574699e-05, "loss": 2.0978, "step": 498650 }, { "epoch": 1.9276801039105627, "grad_norm": 0.15322215855121613, "learning_rate": 1.344519330575733e-05, "loss": 2.0911, "step": 498660 }, { "epoch": 1.9277187611139461, "grad_norm": 0.15965676307678223, "learning_rate": 1.334451904714995e-05, "loss": 2.0949, "step": 498670 }, { "epoch": 1.9277574183173294, "grad_norm": 0.1568049192428589, "learning_rate": 1.3243849889976956e-05, "loss": 2.0873, "step": 498680 }, { "epoch": 1.9277960755207126, "grad_norm": 0.1586667150259018, "learning_rate": 1.3143185833463188e-05, "loss": 2.085, "step": 498690 }, { "epoch": 1.927834732724096, "grad_norm": 0.15746428072452545, "learning_rate": 1.3042526876833271e-05, "loss": 2.0733, "step": 498700 }, { "epoch": 1.9278733899274791, "grad_norm": 0.1772247552871704, "learning_rate": 1.2941873019312045e-05, "loss": 2.0868, "step": 498710 }, { "epoch": 1.9279120471308624, "grad_norm": 0.1588650941848755, "learning_rate": 1.2841224260124795e-05, "loss": 2.1039, "step": 498720 }, { "epoch": 1.9279507043342456, "grad_norm": 0.15893688797950745, "learning_rate": 1.2740580598496809e-05, "loss": 2.0938, "step": 498730 }, { "epoch": 1.927989361537629, "grad_norm": 0.1555604785680771, "learning_rate": 1.2639942033653373e-05, "loss": 2.1021, "step": 498740 }, { "epoch": 1.9280280187410122, "grad_norm": 0.15307071805000305, "learning_rate": 1.2539308564820884e-05, "loss": 2.092, "step": 498750 }, { "epoch": 1.9280666759443954, "grad_norm": 0.15516358613967896, "learning_rate": 1.2438680191224849e-05, "loss": 2.0851, "step": 498760 }, { "epoch": 1.9281053331477787, "grad_norm": 0.15749025344848633, "learning_rate": 1.2338056912091445e-05, "loss": 2.0726, "step": 498770 }, { "epoch": 1.928143990351162, "grad_norm": 0.16453056037425995, "learning_rate": 1.2237438726647066e-05, "loss": 2.0833, "step": 498780 }, { "epoch": 1.9281826475545452, "grad_norm": 0.1561942994594574, "learning_rate": 1.2136825634118331e-05, "loss": 2.0889, "step": 498790 }, { "epoch": 1.9282213047579286, "grad_norm": 0.15563102066516876, "learning_rate": 1.2036217633731638e-05, "loss": 2.0907, "step": 498800 }, { "epoch": 1.9282599619613119, "grad_norm": 0.16227789223194122, "learning_rate": 1.1935614724714273e-05, "loss": 2.0934, "step": 498810 }, { "epoch": 1.9282986191646951, "grad_norm": 0.1553933024406433, "learning_rate": 1.1835016906293073e-05, "loss": 2.0954, "step": 498820 }, { "epoch": 1.9283372763680784, "grad_norm": 0.16049771010875702, "learning_rate": 1.1734424177695547e-05, "loss": 2.0913, "step": 498830 }, { "epoch": 1.9283759335714619, "grad_norm": 0.16263166069984436, "learning_rate": 1.16338365381492e-05, "loss": 2.0843, "step": 498840 }, { "epoch": 1.9284145907748451, "grad_norm": 0.16481728851795197, "learning_rate": 1.1533253986881543e-05, "loss": 2.1019, "step": 498850 }, { "epoch": 1.9284532479782284, "grad_norm": 0.16423772275447845, "learning_rate": 1.1432676523120522e-05, "loss": 2.0942, "step": 498860 }, { "epoch": 1.9284919051816116, "grad_norm": 0.15550151467323303, "learning_rate": 1.1332104146094536e-05, "loss": 2.094, "step": 498870 }, { "epoch": 1.9285305623849949, "grad_norm": 0.1563386619091034, "learning_rate": 1.1231536855031533e-05, "loss": 2.0742, "step": 498880 }, { "epoch": 1.9285692195883781, "grad_norm": 0.1562238335609436, "learning_rate": 1.1130974649160131e-05, "loss": 2.0853, "step": 498890 }, { "epoch": 1.9286078767917614, "grad_norm": 0.1578655242919922, "learning_rate": 1.1030417527708946e-05, "loss": 2.0899, "step": 498900 }, { "epoch": 1.9286465339951446, "grad_norm": 0.15448294579982758, "learning_rate": 1.092986548990682e-05, "loss": 2.0888, "step": 498910 }, { "epoch": 1.9286851911985279, "grad_norm": 0.15214316546916962, "learning_rate": 1.0829318534983035e-05, "loss": 2.0823, "step": 498920 }, { "epoch": 1.9287238484019111, "grad_norm": 0.15848049521446228, "learning_rate": 1.072877666216665e-05, "loss": 2.0806, "step": 498930 }, { "epoch": 1.9287625056052944, "grad_norm": 0.1562502384185791, "learning_rate": 1.0628239870687173e-05, "loss": 2.0774, "step": 498940 }, { "epoch": 1.9288011628086776, "grad_norm": 0.1534113883972168, "learning_rate": 1.052770815977433e-05, "loss": 2.0889, "step": 498950 }, { "epoch": 1.928839820012061, "grad_norm": 0.15739640593528748, "learning_rate": 1.0427181528657847e-05, "loss": 2.0891, "step": 498960 }, { "epoch": 1.9288784772154444, "grad_norm": 0.16295334696769714, "learning_rate": 1.0326659976567899e-05, "loss": 2.0795, "step": 498970 }, { "epoch": 1.9289171344188276, "grad_norm": 0.1570284366607666, "learning_rate": 1.0226143502734654e-05, "loss": 2.0894, "step": 498980 }, { "epoch": 1.9289557916222109, "grad_norm": 0.1605892926454544, "learning_rate": 1.0125632106388505e-05, "loss": 2.0887, "step": 498990 }, { "epoch": 1.9289944488255941, "grad_norm": 0.15600307285785675, "learning_rate": 1.002512578676007e-05, "loss": 2.0777, "step": 499000 }, { "epoch": 1.9290331060289776, "grad_norm": 0.1557171493768692, "learning_rate": 9.924624543080408e-06, "loss": 2.0935, "step": 499010 }, { "epoch": 1.9290717632323608, "grad_norm": 0.15279081463813782, "learning_rate": 9.824128374580132e-06, "loss": 2.0806, "step": 499020 }, { "epoch": 1.929110420435744, "grad_norm": 0.15580080449581146, "learning_rate": 9.72363728049075e-06, "loss": 2.0762, "step": 499030 }, { "epoch": 1.9291490776391274, "grad_norm": 0.1522652804851532, "learning_rate": 9.623151260043539e-06, "loss": 2.0978, "step": 499040 }, { "epoch": 1.9291877348425106, "grad_norm": 0.15721817314624786, "learning_rate": 9.522670312470006e-06, "loss": 2.0829, "step": 499050 }, { "epoch": 1.9292263920458939, "grad_norm": 0.16213186085224152, "learning_rate": 9.422194437002097e-06, "loss": 2.0832, "step": 499060 }, { "epoch": 1.929265049249277, "grad_norm": 0.157264843583107, "learning_rate": 9.321723632871982e-06, "loss": 2.0887, "step": 499070 }, { "epoch": 1.9293037064526604, "grad_norm": 0.15679962933063507, "learning_rate": 9.221257899311385e-06, "loss": 2.0768, "step": 499080 }, { "epoch": 1.9293423636560436, "grad_norm": 0.15212687849998474, "learning_rate": 9.120797235552925e-06, "loss": 2.0873, "step": 499090 }, { "epoch": 1.9293810208594269, "grad_norm": 0.1606370061635971, "learning_rate": 9.020341640829432e-06, "loss": 2.0871, "step": 499100 }, { "epoch": 1.9294196780628101, "grad_norm": 0.17525170743465424, "learning_rate": 8.91989111437308e-06, "loss": 2.0926, "step": 499110 }, { "epoch": 1.9294583352661934, "grad_norm": 0.15746445953845978, "learning_rate": 8.819445655417146e-06, "loss": 2.0828, "step": 499120 }, { "epoch": 1.9294969924695766, "grad_norm": 0.1544865220785141, "learning_rate": 8.719005263194912e-06, "loss": 2.0888, "step": 499130 }, { "epoch": 1.92953564967296, "grad_norm": 0.15426436066627502, "learning_rate": 8.618569936939436e-06, "loss": 2.0765, "step": 499140 }, { "epoch": 1.9295743068763433, "grad_norm": 0.15780779719352722, "learning_rate": 8.51813967588444e-06, "loss": 2.0805, "step": 499150 }, { "epoch": 1.9296129640797266, "grad_norm": 0.15918251872062683, "learning_rate": 8.41771447926365e-06, "loss": 2.075, "step": 499160 }, { "epoch": 1.9296516212831099, "grad_norm": 0.15235045552253723, "learning_rate": 8.317294346310789e-06, "loss": 2.0832, "step": 499170 }, { "epoch": 1.9296902784864933, "grad_norm": 0.15264438092708588, "learning_rate": 8.216879276260248e-06, "loss": 2.0832, "step": 499180 }, { "epoch": 1.9297289356898766, "grad_norm": 0.15535661578178406, "learning_rate": 8.116469268345971e-06, "loss": 2.0835, "step": 499190 }, { "epoch": 1.9297675928932598, "grad_norm": 0.16374972462654114, "learning_rate": 8.016064321802797e-06, "loss": 2.0684, "step": 499200 }, { "epoch": 1.929806250096643, "grad_norm": 0.1522805392742157, "learning_rate": 7.915664435865332e-06, "loss": 2.0914, "step": 499210 }, { "epoch": 1.9298449073000263, "grad_norm": 0.1582413613796234, "learning_rate": 7.815269609768417e-06, "loss": 2.0801, "step": 499220 }, { "epoch": 1.9298835645034096, "grad_norm": 0.15402774512767792, "learning_rate": 7.714879842747102e-06, "loss": 2.0744, "step": 499230 }, { "epoch": 1.9299222217067928, "grad_norm": 0.15144217014312744, "learning_rate": 7.61449513403667e-06, "loss": 2.078, "step": 499240 }, { "epoch": 1.929960878910176, "grad_norm": 0.14982645213603973, "learning_rate": 7.514115482872397e-06, "loss": 2.0867, "step": 499250 }, { "epoch": 1.9299995361135593, "grad_norm": 0.15028296411037445, "learning_rate": 7.41374088849045e-06, "loss": 2.084, "step": 499260 }, { "epoch": 1.9300381933169426, "grad_norm": 0.16110238432884216, "learning_rate": 7.313371350126108e-06, "loss": 2.0796, "step": 499270 }, { "epoch": 1.9300768505203258, "grad_norm": 0.15005861222743988, "learning_rate": 7.213006867015981e-06, "loss": 2.0954, "step": 499280 }, { "epoch": 1.930115507723709, "grad_norm": 0.1562095582485199, "learning_rate": 7.112647438395792e-06, "loss": 2.0881, "step": 499290 }, { "epoch": 1.9301541649270924, "grad_norm": 0.15073002874851227, "learning_rate": 7.012293063502151e-06, "loss": 2.0852, "step": 499300 }, { "epoch": 1.9301928221304758, "grad_norm": 0.15659964084625244, "learning_rate": 6.911943741571669e-06, "loss": 2.0964, "step": 499310 }, { "epoch": 1.930231479333859, "grad_norm": 0.15236833691596985, "learning_rate": 6.811599471841179e-06, "loss": 2.0645, "step": 499320 }, { "epoch": 1.9302701365372423, "grad_norm": 0.15685366094112396, "learning_rate": 6.711260253547735e-06, "loss": 2.0672, "step": 499330 }, { "epoch": 1.9303087937406256, "grad_norm": 0.15089979767799377, "learning_rate": 6.610926085928392e-06, "loss": 2.0858, "step": 499340 }, { "epoch": 1.930347450944009, "grad_norm": 0.15144990384578705, "learning_rate": 6.51059696822065e-06, "loss": 2.0766, "step": 499350 }, { "epoch": 1.9303861081473923, "grad_norm": 0.14963634312152863, "learning_rate": 6.4102728996620065e-06, "loss": 2.0737, "step": 499360 }, { "epoch": 1.9304247653507756, "grad_norm": 0.15470971167087555, "learning_rate": 6.309953879490404e-06, "loss": 2.085, "step": 499370 }, { "epoch": 1.9304634225541588, "grad_norm": 0.1591530442237854, "learning_rate": 6.2096399069435646e-06, "loss": 2.0996, "step": 499380 }, { "epoch": 1.930502079757542, "grad_norm": 0.15408135950565338, "learning_rate": 6.1093309812596534e-06, "loss": 2.0892, "step": 499390 }, { "epoch": 1.9305407369609253, "grad_norm": 0.15867233276367188, "learning_rate": 6.009027101677278e-06, "loss": 2.0822, "step": 499400 }, { "epoch": 1.9305793941643086, "grad_norm": 0.15423956513404846, "learning_rate": 5.908728267434604e-06, "loss": 2.0953, "step": 499410 }, { "epoch": 1.9306180513676918, "grad_norm": 0.16282016038894653, "learning_rate": 5.808434477770685e-06, "loss": 2.0738, "step": 499420 }, { "epoch": 1.930656708571075, "grad_norm": 0.14597930014133453, "learning_rate": 5.708145731924352e-06, "loss": 2.0794, "step": 499430 }, { "epoch": 1.9306953657744583, "grad_norm": 0.15457600355148315, "learning_rate": 5.607862029134436e-06, "loss": 2.0842, "step": 499440 }, { "epoch": 1.9307340229778416, "grad_norm": 0.15878304839134216, "learning_rate": 5.507583368640656e-06, "loss": 2.0962, "step": 499450 }, { "epoch": 1.9307726801812248, "grad_norm": 1.0090843439102173, "learning_rate": 5.407309749682288e-06, "loss": 2.0813, "step": 499460 }, { "epoch": 1.930811337384608, "grad_norm": 0.15068727731704712, "learning_rate": 5.3070411714990495e-06, "loss": 2.0946, "step": 499470 }, { "epoch": 1.9308499945879916, "grad_norm": 0.1503390669822693, "learning_rate": 5.206777633330662e-06, "loss": 2.1053, "step": 499480 }, { "epoch": 1.9308886517913748, "grad_norm": 0.14822939038276672, "learning_rate": 5.106519134417509e-06, "loss": 2.0816, "step": 499490 }, { "epoch": 1.930927308994758, "grad_norm": 0.15106622874736786, "learning_rate": 5.006265673999755e-06, "loss": 2.0756, "step": 499500 }, { "epoch": 1.9309659661981413, "grad_norm": 0.15554742515087128, "learning_rate": 4.906017251317563e-06, "loss": 2.0865, "step": 499510 }, { "epoch": 1.9310046234015248, "grad_norm": 0.14966271817684174, "learning_rate": 4.805773865611984e-06, "loss": 2.0888, "step": 499520 }, { "epoch": 1.931043280604908, "grad_norm": 0.1527647078037262, "learning_rate": 4.705535516123405e-06, "loss": 2.0945, "step": 499530 }, { "epoch": 1.9310819378082913, "grad_norm": 0.15576697885990143, "learning_rate": 4.605302202093098e-06, "loss": 2.0841, "step": 499540 }, { "epoch": 1.9311205950116745, "grad_norm": 0.14974485337734222, "learning_rate": 4.505073922762337e-06, "loss": 2.0797, "step": 499550 }, { "epoch": 1.9311592522150578, "grad_norm": 0.14997202157974243, "learning_rate": 4.404850677372396e-06, "loss": 2.0903, "step": 499560 }, { "epoch": 1.931197909418441, "grad_norm": 0.15646487474441528, "learning_rate": 4.3046324651649925e-06, "loss": 2.0843, "step": 499570 }, { "epoch": 1.9312365666218243, "grad_norm": 0.15585726499557495, "learning_rate": 4.2044192853818445e-06, "loss": 2.0945, "step": 499580 }, { "epoch": 1.9312752238252076, "grad_norm": 0.1496593952178955, "learning_rate": 4.104211137264891e-06, "loss": 2.0853, "step": 499590 }, { "epoch": 1.9313138810285908, "grad_norm": 0.15382583439350128, "learning_rate": 4.004008020056071e-06, "loss": 2.0881, "step": 499600 }, { "epoch": 1.931352538231974, "grad_norm": 0.14952684938907623, "learning_rate": 3.903809932998215e-06, "loss": 2.1048, "step": 499610 }, { "epoch": 1.9313911954353573, "grad_norm": 0.15660254657268524, "learning_rate": 3.803616875333704e-06, "loss": 2.0571, "step": 499620 }, { "epoch": 1.9314298526387406, "grad_norm": 0.15601907670497894, "learning_rate": 3.7034288463049236e-06, "loss": 2.0852, "step": 499630 }, { "epoch": 1.9314685098421238, "grad_norm": 0.15216785669326782, "learning_rate": 3.6032458451551454e-06, "loss": 2.092, "step": 499640 }, { "epoch": 1.9315071670455073, "grad_norm": 0.15855903923511505, "learning_rate": 3.5030678711274187e-06, "loss": 2.0718, "step": 499650 }, { "epoch": 1.9315458242488905, "grad_norm": 0.15953026711940765, "learning_rate": 3.4028949234650164e-06, "loss": 2.0856, "step": 499660 }, { "epoch": 1.9315844814522738, "grad_norm": 0.1549243927001953, "learning_rate": 3.302727001411432e-06, "loss": 2.0979, "step": 499670 }, { "epoch": 1.931623138655657, "grad_norm": 0.15357156097888947, "learning_rate": 3.2025641042103817e-06, "loss": 2.0899, "step": 499680 }, { "epoch": 1.9316617958590405, "grad_norm": 0.15665321052074432, "learning_rate": 3.1024062311058033e-06, "loss": 2.0889, "step": 499690 }, { "epoch": 1.9317004530624238, "grad_norm": 0.15755918622016907, "learning_rate": 3.0022533813414134e-06, "loss": 2.0704, "step": 499700 }, { "epoch": 1.931739110265807, "grad_norm": 0.1516823172569275, "learning_rate": 2.9021055541618156e-06, "loss": 2.089, "step": 499710 }, { "epoch": 1.9317777674691903, "grad_norm": 0.1494276374578476, "learning_rate": 2.801962748811393e-06, "loss": 2.095, "step": 499720 }, { "epoch": 1.9318164246725735, "grad_norm": 0.1531931608915329, "learning_rate": 2.7018249645347493e-06, "loss": 2.0815, "step": 499730 }, { "epoch": 1.9318550818759568, "grad_norm": 0.1523408740758896, "learning_rate": 2.601692200576711e-06, "loss": 2.0761, "step": 499740 }, { "epoch": 1.93189373907934, "grad_norm": 0.14666548371315002, "learning_rate": 2.5015644561821038e-06, "loss": 2.0641, "step": 499750 }, { "epoch": 1.9319323962827233, "grad_norm": 0.15638861060142517, "learning_rate": 2.401441730596421e-06, "loss": 2.0839, "step": 499760 }, { "epoch": 1.9319710534861065, "grad_norm": 0.15035462379455566, "learning_rate": 2.3013240230647102e-06, "loss": 2.0846, "step": 499770 }, { "epoch": 1.9320097106894898, "grad_norm": 0.15132653713226318, "learning_rate": 2.2012113328329085e-06, "loss": 2.0848, "step": 499780 }, { "epoch": 1.932048367892873, "grad_norm": 0.1532151997089386, "learning_rate": 2.1011036591465082e-06, "loss": 2.0807, "step": 499790 }, { "epoch": 1.9320870250962563, "grad_norm": 0.27533096075057983, "learning_rate": 2.0010010012516676e-06, "loss": 2.0879, "step": 499800 }, { "epoch": 1.9321256822996398, "grad_norm": 0.15347447991371155, "learning_rate": 1.9009033583945456e-06, "loss": 2.0861, "step": 499810 }, { "epoch": 1.932164339503023, "grad_norm": 0.15102313458919525, "learning_rate": 1.8008107298210784e-06, "loss": 2.0871, "step": 499820 }, { "epoch": 1.9322029967064063, "grad_norm": 0.1492982655763626, "learning_rate": 1.700723114778313e-06, "loss": 2.0826, "step": 499830 }, { "epoch": 1.9322416539097895, "grad_norm": 0.14892347157001495, "learning_rate": 1.60064051251263e-06, "loss": 2.086, "step": 499840 }, { "epoch": 1.9322803111131728, "grad_norm": 0.14762091636657715, "learning_rate": 1.5005629222708539e-06, "loss": 2.0986, "step": 499850 }, { "epoch": 1.9323189683165563, "grad_norm": 0.1522284299135208, "learning_rate": 1.4004903433004756e-06, "loss": 2.0931, "step": 499860 }, { "epoch": 1.9323576255199395, "grad_norm": 0.14866903424263, "learning_rate": 1.30042277484832e-06, "loss": 2.0816, "step": 499870 }, { "epoch": 1.9323962827233228, "grad_norm": 0.148747518658638, "learning_rate": 1.2003602161620997e-06, "loss": 2.0782, "step": 499880 }, { "epoch": 1.932434939926706, "grad_norm": 0.15060488879680634, "learning_rate": 1.1003026664895277e-06, "loss": 2.069, "step": 499890 }, { "epoch": 1.9324735971300893, "grad_norm": 0.14562411606311798, "learning_rate": 1.000250125078095e-06, "loss": 2.0864, "step": 499900 }, { "epoch": 1.9325122543334725, "grad_norm": 0.15331986546516418, "learning_rate": 9.002025911764023e-07, "loss": 2.077, "step": 499910 }, { "epoch": 1.9325509115368558, "grad_norm": 0.14756985008716583, "learning_rate": 8.001600640319406e-07, "loss": 2.0719, "step": 499920 }, { "epoch": 1.932589568740239, "grad_norm": 0.1471695899963379, "learning_rate": 7.001225428937552e-07, "loss": 2.0673, "step": 499930 }, { "epoch": 1.9326282259436223, "grad_norm": 0.15674185752868652, "learning_rate": 6.00090027010225e-07, "loss": 2.0643, "step": 499940 }, { "epoch": 1.9326668831470055, "grad_norm": 0.14780041575431824, "learning_rate": 5.000625156297289e-07, "loss": 2.0786, "step": 499950 }, { "epoch": 1.9327055403503888, "grad_norm": 0.1498531550168991, "learning_rate": 4.0004000800197835e-07, "loss": 2.0835, "step": 499960 }, { "epoch": 1.932744197553772, "grad_norm": 0.14761725068092346, "learning_rate": 3.0002250337557434e-07, "loss": 2.0839, "step": 499970 }, { "epoch": 1.9327828547571555, "grad_norm": 0.1464381068944931, "learning_rate": 2.000100010000061e-07, "loss": 2.0842, "step": 499980 }, { "epoch": 1.9328215119605388, "grad_norm": 0.1600688099861145, "learning_rate": 1.0000250012498491e-07, "loss": 2.0756, "step": 499990 }, { "epoch": 1.932860169163922, "grad_norm": 0.1499907523393631, "learning_rate": 0.0, "loss": 2.109, "step": 500000 } ], "logging_steps": 10, "max_steps": 500000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2721816869275194e+21, "train_batch_size": 13, "trial_name": null, "trial_params": null }